Add scraper rules for version2.dk and ing.dk
This commit is contained in:
parent
d4839b5597
commit
c454f67037
2 changed files with 3 additions and 1 deletions
|
@ -129,7 +129,7 @@ func (c *Client) buildClient() http.Client {
|
|||
func (c *Client) buildHeaders() http.Header {
|
||||
headers := make(http.Header)
|
||||
headers.Add("User-Agent", userAgent)
|
||||
headers.Add("Accept", "text/html,application/xhtml+xml,application/xml,application/json,image/*")
|
||||
headers.Add("Accept", "*/*")
|
||||
|
||||
if c.etagHeader != "" {
|
||||
headers.Add("If-None-Match", c.etagHeader)
|
||||
|
|
|
@ -10,6 +10,7 @@ var predefinedRules = map[string]string{
|
|||
"cbc.ca": ".story-content",
|
||||
"github.com": "article.entry-content",
|
||||
"igen.fr": "section.corps",
|
||||
"ing.dk": "section.body",
|
||||
"lapresse.ca": ".amorce, .entry",
|
||||
"lemonde.fr": "div#articleBody",
|
||||
"lesjoiesducode.fr": ".blog-post-content img",
|
||||
|
@ -20,5 +21,6 @@ var predefinedRules = map[string]string{
|
|||
"phoronix.com": "div.content",
|
||||
"techcrunch.com": "div.article-entry",
|
||||
"theregister.co.uk": "#body",
|
||||
"version2.dk": "section.body",
|
||||
"wired.com": "main figure, article",
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue