[backend/feat] grab zhuanlan html page

This commit is contained in:
SouthFox 2023-08-23 09:44:03 +08:00
parent 5e22c19663
commit a6a8aa0115
2 changed files with 42 additions and 9 deletions

View file

@ -1,12 +1,28 @@
(ns backend.core (ns backend.core
(:use ring.adapter.jetty)) (:require [ring.adapter.jetty :refer [run-jetty]]
[reitit.ring :as ring]
[backend.handlers :as handlers])
(:gen-class))
(defn handler [request]
{:status 200
:headers {"Content-Type" "text/plain"}
:body "Hello World"})
(defn -main (def app
[& args] (ring/ring-handler
(run-jetty handler {:port 3000 (ring/router
[["/hp/:id" {:parameters {:path {:id int?}}
:get {:handler handlers/fetch-hu-post}}]
])
(ring/create-default-handler
{:not-found (constantly {:status 404 :body "Not found"})})))
(defn -main [& args]
(run-jetty #'app {:port 3000
:join? false})) :join? false}))
(def server (run-jetty #'app {:port 3000
:join? false}))
(comment
(app {:request-method :get
:uri "/hp/431038004"
}))

View file

@ -0,0 +1,17 @@
(ns backend.handlers
(:require [clojure.string :as str]
[babashka.http-client :as client])
(:import [org.jsoup Jsoup]))
(defn fetch-hu-post [request]
(let [id (-> request :path-params :id)
post-url (str/join ["https://zhuanlan.zhihu.com/p/" id])]
{:status 200
:headers {"Content-Type" "text/html; charset=utf-8"}
:body (-> (client/get post-url)
:body
Jsoup/parse
(.getElementsByClass "Post-RichTextContainer")
(.toString))}))