[back/feat] clean styles, links and show images
This commit is contained in:
parent
83eca1f359
commit
c0b4982e33
1 changed files with 24 additions and 8 deletions
|
@ -1,6 +1,7 @@
|
||||||
(ns backend.handlers
|
(ns backend.handlers
|
||||||
(:require [clojure.string :as str]
|
(:require [clojure.string :as str]
|
||||||
[babashka.http-client :as client]
|
[babashka.http-client :as client]
|
||||||
|
[ring.util.codec :refer [url-decode]]
|
||||||
[cheshire.core :refer [generate-string]])
|
[cheshire.core :refer [generate-string]])
|
||||||
(:import [org.jsoup Jsoup]))
|
(:import [org.jsoup Jsoup]))
|
||||||
|
|
||||||
|
@ -9,16 +10,31 @@
|
||||||
[content]
|
[content]
|
||||||
(generate-string {:content content}))
|
(generate-string {:content content}))
|
||||||
|
|
||||||
|
(defn clean-html
|
||||||
|
[docs]
|
||||||
|
(-> (.select docs "style[data-emotion-css~=^[a-z0-9]*$]")
|
||||||
|
(.remove))
|
||||||
|
(-> (.select docs "figure > img")
|
||||||
|
(.remove))
|
||||||
|
(-> (.select docs "figure > noscript")
|
||||||
|
(.tagName "div"))
|
||||||
|
(vec
|
||||||
|
(for [a (.select docs "a")]
|
||||||
|
(.attr a "href"
|
||||||
|
(url-decode
|
||||||
|
(str/replace
|
||||||
|
(.attr a "href")
|
||||||
|
"https://link.zhihu.com/?target=" ""))))))
|
||||||
|
|
||||||
(defn fetch-hu-post [request]
|
|
||||||
|
(defn fetch-hu-post
|
||||||
|
[request]
|
||||||
(let [id (-> request :path-params :id)
|
(let [id (-> request :path-params :id)
|
||||||
post-url (str/join ["https://zhuanlan.zhihu.com/p/" id])]
|
post-url (str/join ["https://zhuanlan.zhihu.com/p/" id])
|
||||||
|
docs (-> (client/get post-url) :body Jsoup/parse
|
||||||
|
(.getElementsByClass "Post-RichTextContainer"))]
|
||||||
|
(clean-html docs)
|
||||||
{:status 200
|
{:status 200
|
||||||
:headers {"Content-Type" "application/json; charset=utf-8"}
|
:headers {"Content-Type" "application/json; charset=utf-8"}
|
||||||
:body (-> (client/get post-url)
|
:body (-> (.toString docs)
|
||||||
:body
|
|
||||||
Jsoup/parse
|
|
||||||
(.getElementsByClass "Post-RichTextContainer")
|
|
||||||
(.toString)
|
|
||||||
(wrap-json))}))
|
(wrap-json))}))
|
||||||
|
|
Loading…
Reference in a new issue