[back/feat] clean styles, links and show images

This commit is contained in:
SouthFox 2023-08-26 10:42:59 +08:00
parent 83eca1f359
commit c0b4982e33

View file

@ -1,6 +1,7 @@
(ns backend.handlers (ns backend.handlers
(:require [clojure.string :as str] (:require [clojure.string :as str]
[babashka.http-client :as client] [babashka.http-client :as client]
[ring.util.codec :refer [url-decode]]
[cheshire.core :refer [generate-string]]) [cheshire.core :refer [generate-string]])
(:import [org.jsoup Jsoup])) (:import [org.jsoup Jsoup]))
@ -9,16 +10,31 @@
[content] [content]
(generate-string {:content content})) (generate-string {:content content}))
(defn clean-html
[docs]
(-> (.select docs "style[data-emotion-css~=^[a-z0-9]*$]")
(.remove))
(-> (.select docs "figure > img")
(.remove))
(-> (.select docs "figure > noscript")
(.tagName "div"))
(vec
(for [a (.select docs "a")]
(.attr a "href"
(url-decode
(str/replace
(.attr a "href")
"https://link.zhihu.com/?target=" ""))))))
(defn fetch-hu-post [request]
(defn fetch-hu-post
[request]
(let [id (-> request :path-params :id) (let [id (-> request :path-params :id)
post-url (str/join ["https://zhuanlan.zhihu.com/p/" id])] post-url (str/join ["https://zhuanlan.zhihu.com/p/" id])
docs (-> (client/get post-url) :body Jsoup/parse
(.getElementsByClass "Post-RichTextContainer"))]
(clean-html docs)
{:status 200 {:status 200
:headers {"Content-Type" "application/json; charset=utf-8"} :headers {"Content-Type" "application/json; charset=utf-8"}
:body (-> (client/get post-url) :body (-> (.toString docs)
:body
Jsoup/parse
(.getElementsByClass "Post-RichTextContainer")
(.toString)
(wrap-json))})) (wrap-json))}))