Add scraper rules for version2.dk and ing.dk
This commit is contained in:
parent
d4839b5597
commit
c454f67037
2 changed files with 3 additions and 1 deletions
|
@ -129,7 +129,7 @@ func (c *Client) buildClient() http.Client {
|
||||||
func (c *Client) buildHeaders() http.Header {
|
func (c *Client) buildHeaders() http.Header {
|
||||||
headers := make(http.Header)
|
headers := make(http.Header)
|
||||||
headers.Add("User-Agent", userAgent)
|
headers.Add("User-Agent", userAgent)
|
||||||
headers.Add("Accept", "text/html,application/xhtml+xml,application/xml,application/json,image/*")
|
headers.Add("Accept", "*/*")
|
||||||
|
|
||||||
if c.etagHeader != "" {
|
if c.etagHeader != "" {
|
||||||
headers.Add("If-None-Match", c.etagHeader)
|
headers.Add("If-None-Match", c.etagHeader)
|
||||||
|
|
|
@ -10,6 +10,7 @@ var predefinedRules = map[string]string{
|
||||||
"cbc.ca": ".story-content",
|
"cbc.ca": ".story-content",
|
||||||
"github.com": "article.entry-content",
|
"github.com": "article.entry-content",
|
||||||
"igen.fr": "section.corps",
|
"igen.fr": "section.corps",
|
||||||
|
"ing.dk": "section.body",
|
||||||
"lapresse.ca": ".amorce, .entry",
|
"lapresse.ca": ".amorce, .entry",
|
||||||
"lemonde.fr": "div#articleBody",
|
"lemonde.fr": "div#articleBody",
|
||||||
"lesjoiesducode.fr": ".blog-post-content img",
|
"lesjoiesducode.fr": ".blog-post-content img",
|
||||||
|
@ -20,5 +21,6 @@ var predefinedRules = map[string]string{
|
||||||
"phoronix.com": "div.content",
|
"phoronix.com": "div.content",
|
||||||
"techcrunch.com": "div.article-entry",
|
"techcrunch.com": "div.article-entry",
|
||||||
"theregister.co.uk": "#body",
|
"theregister.co.uk": "#body",
|
||||||
|
"version2.dk": "section.body",
|
||||||
"wired.com": "main figure, article",
|
"wired.com": "main figure, article",
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue