[back/refactor] avoid directly modify Jsoup node

This commit is contained in:
SouthFox 2023-09-01 11:00:42 +08:00
parent 075436487e
commit cf88cb41cf

View file

@ -10,7 +10,6 @@
[content] [content]
(generate-string content)) (generate-string content))
(defn clean-html (defn clean-html
[docs hugo] [docs hugo]
(let [replace-str (if hugo (let [replace-str (if hugo
@ -25,7 +24,6 @@
(str/replace "https://zhuanlan.zhihu.com/p/" replace-str) (str/replace "https://zhuanlan.zhihu.com/p/" replace-str)
(url-decode)))))) (url-decode))))))
(defn clean-images (defn clean-images
[docs] [docs]
(-> (.select docs "figure > img") (-> (.select docs "figure > img")
@ -35,7 +33,6 @@
(doseq [img (.select docs "figure > div > img")] (doseq [img (.select docs "figure > div > img")]
(.attr img "loading" "lazy"))) (.attr img "loading" "lazy")))
(defn render-linkcard (defn render-linkcard
[docs] [docs]
(doseq [link-card (.select docs "a.LinkCard > span.LinkCard-contents")] (doseq [link-card (.select docs "a.LinkCard > span.LinkCard-contents")]
@ -44,27 +41,26 @@
(defn build-catalog-item (defn build-catalog-item
[catalog-item] [catalog-item]
(.tagName catalog-item "a") (str "<li>" "<a href=\""
(.attr catalog-item "href" (str/join ["#" (.attr catalog-item "id")])
(str/join ["#" (.attr catalog-item "id")])) "\">"
(.attr catalog-item "id" "") (.text catalog-item)
(str "<li>" (.toString catalog-item) "</li>")) "</a></li>"))
(defn build-catalog (defn build-catalog
[docs] [docs]
(let [catalog (.select docs "h2, h3, h4, h5")] (let [catalog (.select docs "h2, h3, h4, h5")]
(apply str (mapv build-catalog-item catalog)))) (apply str (mapv build-catalog-item catalog))))
(defn fetch-hu-post (defn fetch-hu-post
[request & {:keys [hugo]}] [request & {:keys [hugo]}]
(let [id (-> request :path-params :id) (let [id (-> request :path-params :id)
post-url (str/join ["https://zhuanlan.zhihu.com/p/" id]) post-url (str/join ["https://zhuanlan.zhihu.com/p/" id])
page (-> (client/get post-url) :body Jsoup/parse) page (-> (client/get post-url) :body Jsoup/parse)
docs (.getElementsByClass page "Post-RichTextContainer") docs (.getElementsByClass page "Post-RichTextContainer")
title (.getElementsByClass page "Post-Title") title (.getElementsByClass page "Post-Title")
post-time (.getElementsByClass page "ContentItem-time")] post-time (.getElementsByClass page "ContentItem-time")
catalog (build-catalog docs)]
(clean-html docs hugo) (clean-html docs hugo)
(clean-images docs) (clean-images docs)
@ -84,7 +80,7 @@
(let [content {:content (.toString docs) (let [content {:content (.toString docs)
:title (.text title) :title (.text title)
:time (first (str/split (.text post-time) #"・")) :time (first (str/split (.text post-time) #"・"))
:catalog (.toString (build-catalog docs))}] :catalog catalog}]
(if hugo (if hugo
content content
{:status 200 {:status 200