[back/refactor] avoid directly modify Jsoup node

This commit is contained in:
SouthFox 2023-09-01 11:00:42 +08:00
parent 075436487e
commit cf88cb41cf

View file

@ -10,7 +10,6 @@
[content]
(generate-string content))
(defn clean-html
[docs hugo]
(let [replace-str (if hugo
@ -25,7 +24,6 @@
(str/replace "https://zhuanlan.zhihu.com/p/" replace-str)
(url-decode))))))
(defn clean-images
[docs]
(-> (.select docs "figure > img")
@ -35,7 +33,6 @@
(doseq [img (.select docs "figure > div > img")]
(.attr img "loading" "lazy")))
(defn render-linkcard
[docs]
(doseq [link-card (.select docs "a.LinkCard > span.LinkCard-contents")]
@ -44,19 +41,17 @@
(defn build-catalog-item
[catalog-item]
(.tagName catalog-item "a")
(.attr catalog-item "href"
(str/join ["#" (.attr catalog-item "id")]))
(.attr catalog-item "id" "")
(str "<li>" (.toString catalog-item) "</li>"))
(str "<li>" "<a href=\""
(str/join ["#" (.attr catalog-item "id")])
"\">"
(.text catalog-item)
"</a></li>"))
(defn build-catalog
[docs]
(let [catalog (.select docs "h2, h3, h4, h5")]
(apply str (mapv build-catalog-item catalog))))
(defn fetch-hu-post
[request & {:keys [hugo]}]
(let [id (-> request :path-params :id)
@ -64,7 +59,8 @@
page (-> (client/get post-url) :body Jsoup/parse)
docs (.getElementsByClass page "Post-RichTextContainer")
title (.getElementsByClass page "Post-Title")
post-time (.getElementsByClass page "ContentItem-time")]
post-time (.getElementsByClass page "ContentItem-time")
catalog (build-catalog docs)]
(clean-html docs hugo)
(clean-images docs)
@ -84,7 +80,7 @@
(let [content {:content (.toString docs)
:title (.text title)
:time (first (str/split (.text post-time) #"・"))
:catalog (.toString (build-catalog docs))}]
:catalog catalog}]
(if hugo
content
{:status 200