diff --git a/locale/translations.go b/locale/translations.go index 7124f1b6..75fe5536 100644 --- a/locale/translations.go +++ b/locale/translations.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// 2017-12-10 20:08:14.447304303 -0800 PST m=+0.040286758 +// 2017-12-11 22:04:47.860104328 -0800 PST m=+0.042425898 package locale @@ -168,12 +168,13 @@ var translations = map[string]string{ "Fever Username": "Nom d'utilisateur pour l'API de Fever", "Fever Password": "Mot de passe pour l'API de Fever", "Fetch original content": "Récupérer le contenu original", - "Scraper Rules": "Règles pour récupérer le contenu original" + "Scraper Rules": "Règles pour récupérer le contenu original", + "Rewrite Rules": "Règles de réécriture" } `, } var translationsChecksums = map[string]string{ "en_US": "6fe95384260941e8a5a3c695a655a932e0a8a6a572c1e45cb2b1ae8baa01b897", - "fr_FR": "4426cea875ee2c9acb1a2b0619cb82f3a32f71aabe5d07657eaf2f6b7387c5f9", + "fr_FR": "0e14d65f38ca5c5e34f1d84f6837ce8a29a4ae5f8836b384bb098222b724cb5b", } diff --git a/locale/translations/fr_FR.json b/locale/translations/fr_FR.json index 0a51ec3c..a7c34f93 100644 --- a/locale/translations/fr_FR.json +++ b/locale/translations/fr_FR.json @@ -152,5 +152,6 @@ "Fever Username": "Nom d'utilisateur pour l'API de Fever", "Fever Password": "Mot de passe pour l'API de Fever", "Fetch original content": "Récupérer le contenu original", - "Scraper Rules": "Règles pour récupérer le contenu original" + "Scraper Rules": "Règles pour récupérer le contenu original", + "Rewrite Rules": "Règles de réécriture" } diff --git a/miniflux-test b/miniflux-test new file mode 100755 index 00000000..5e0309a3 Binary files /dev/null and b/miniflux-test differ diff --git a/model/feed.go b/model/feed.go index fb2819da..79012e9a 100644 --- a/model/feed.go +++ b/model/feed.go @@ -23,6 +23,7 @@ type Feed struct { ParsingErrorMsg string `json:"parsing_error_message,omitempty"` ParsingErrorCount int `json:"parsing_error_count,omitempty"` ScraperRules string `json:"scraper_rules"` + RewriteRules string `json:"rewrite_rules"` Category *Category `json:"category,omitempty"` Entries Entries `json:"entries,omitempty"` Icon *FeedIcon `json:"icon,omitempty"` diff --git a/reader/atom/atom.go b/reader/atom/atom.go index a2bd2c33..b62f42ac 100644 --- a/reader/atom/atom.go +++ b/reader/atom/atom.go @@ -14,7 +14,6 @@ import ( "github.com/miniflux/miniflux2/helper" "github.com/miniflux/miniflux2/model" "github.com/miniflux/miniflux2/reader/date" - "github.com/miniflux/miniflux2/reader/processor" ) type atomFeed struct { @@ -87,7 +86,7 @@ func (a *atomEntry) Transform() *model.Entry { entry.Date = getDate(a) entry.Author = getAuthor(a.Author) entry.Hash = getHash(a) - entry.Content = processor.ItemContentProcessor(entry.URL, getContent(a)) + entry.Content = getContent(a) entry.Title = strings.TrimSpace(a.Title) entry.Enclosures = getEnclosures(a) diff --git a/reader/feed/handler.go b/reader/feed/handler.go index e978deb1..7a986138 100644 --- a/reader/feed/handler.go +++ b/reader/feed/handler.go @@ -14,6 +14,7 @@ import ( "github.com/miniflux/miniflux2/http" "github.com/miniflux/miniflux2/model" "github.com/miniflux/miniflux2/reader/icon" + "github.com/miniflux/miniflux2/reader/processor" "github.com/miniflux/miniflux2/storage" ) @@ -63,6 +64,9 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string) (*model.Feed, return nil, err } + feedProcessor := processor.NewFeedProcessor(subscription) + feedProcessor.Process() + subscription.Category = &model.Category{ID: categoryID} subscription.EtagHeader = response.ETag subscription.LastModifiedHeader = response.LastModified @@ -136,6 +140,11 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error { return err } + feedProcessor := processor.NewFeedProcessor(subscription) + feedProcessor.WithScraperRules(originalFeed.ScraperRules) + feedProcessor.WithRewriteRules(originalFeed.RewriteRules) + feedProcessor.Process() + originalFeed.EtagHeader = response.ETag originalFeed.LastModifiedHeader = response.LastModified diff --git a/reader/json/json.go b/reader/json/json.go index 34012321..da6df240 100644 --- a/reader/json/json.go +++ b/reader/json/json.go @@ -9,12 +9,10 @@ import ( "strings" "time" - "github.com/miniflux/miniflux2/reader/sanitizer" - "github.com/miniflux/miniflux2/helper" "github.com/miniflux/miniflux2/model" "github.com/miniflux/miniflux2/reader/date" - "github.com/miniflux/miniflux2/reader/processor" + "github.com/miniflux/miniflux2/reader/sanitizer" ) type jsonFeed struct { @@ -148,7 +146,7 @@ func (j *jsonItem) Transform() *model.Entry { entry.Date = j.GetDate() entry.Author = j.GetAuthor() entry.Hash = j.GetHash() - entry.Content = processor.ItemContentProcessor(entry.URL, j.GetContent()) + entry.Content = j.GetContent() entry.Title = strings.TrimSpace(j.GetTitle()) entry.Enclosures = j.GetEnclosures() return entry diff --git a/reader/json/parser_test.go b/reader/json/parser_test.go index dd680f85..60d7707b 100644 --- a/reader/json/parser_test.go +++ b/reader/json/parser_test.go @@ -148,7 +148,7 @@ func TestParsePodcast(t *testing.T) { t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title) } - if feed.Entries[0].Content != `Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.` { + if feed.Entries[0].Content != `Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.` { t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content) } diff --git a/reader/processor/processor.go b/reader/processor/processor.go index ef93b9a0..06dad43d 100644 --- a/reader/processor/processor.go +++ b/reader/processor/processor.go @@ -5,12 +5,37 @@ package processor import ( + "github.com/miniflux/miniflux2/model" "github.com/miniflux/miniflux2/reader/rewrite" "github.com/miniflux/miniflux2/reader/sanitizer" ) -// ItemContentProcessor executes a set of functions to sanitize and alter item contents. -func ItemContentProcessor(url, content string) string { - content = sanitizer.Sanitize(url, content) - return rewrite.Rewriter(url, content) +// FeedProcessor handles the processing of feed contents. +type FeedProcessor struct { + feed *model.Feed + scraperRules string + rewriteRules string +} + +// WithScraperRules adds scraper rules to the processing. +func (f *FeedProcessor) WithScraperRules(rules string) { + f.scraperRules = rules +} + +// WithRewriteRules adds rewrite rules to the processing. +func (f *FeedProcessor) WithRewriteRules(rules string) { + f.rewriteRules = rules +} + +// Process applies rewrite and scraper rules. +func (f *FeedProcessor) Process() { + for _, entry := range f.feed.Entries { + entry.Content = sanitizer.Sanitize(entry.URL, entry.Content) + entry.Content = rewrite.Rewriter(entry.URL, entry.Content, f.rewriteRules) + } +} + +// NewFeedProcessor returns a new FeedProcessor. +func NewFeedProcessor(feed *model.Feed) *FeedProcessor { + return &FeedProcessor{feed: feed} } diff --git a/reader/rdf/rdf.go b/reader/rdf/rdf.go index 9b8ccdc3..c97085fd 100644 --- a/reader/rdf/rdf.go +++ b/reader/rdf/rdf.go @@ -10,10 +10,8 @@ import ( "time" "github.com/miniflux/miniflux2/helper" - "github.com/miniflux/miniflux2/reader/processor" - "github.com/miniflux/miniflux2/reader/sanitizer" - "github.com/miniflux/miniflux2/model" + "github.com/miniflux/miniflux2/reader/sanitizer" ) type rdfFeed struct { @@ -58,7 +56,7 @@ func (r *rdfItem) Transform() *model.Entry { entry.Title = strings.TrimSpace(r.Title) entry.Author = strings.TrimSpace(r.Creator) entry.URL = r.Link - entry.Content = processor.ItemContentProcessor(entry.URL, r.Description) + entry.Content = r.Description entry.Hash = getHash(r) entry.Date = time.Now() return entry diff --git a/reader/rewrite/rewrite_functions.go b/reader/rewrite/rewrite_functions.go new file mode 100644 index 00000000..f1a5b839 --- /dev/null +++ b/reader/rewrite/rewrite_functions.go @@ -0,0 +1,40 @@ +// Copyright 2017 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package rewrite + +import ( + "regexp" + "strings" + + "github.com/PuerkitoBio/goquery" +) + +var ( + youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`) +) + +func addImageTitle(entryURL, entryContent string) string { + doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent)) + if err != nil { + return entryContent + } + + imgTag := doc.Find("img").First() + if titleAttr, found := imgTag.Attr("title"); found { + return entryContent + `
` + titleAttr + "" + } + + return entryContent +} + +func addYoutubeVideo(entryURL, entryContent string) string { + matches := youtubeRegex.FindStringSubmatch(entryURL) + + if len(matches) == 2 { + video := `` + return video + "
" + entryContent + "
" + } + return entryContent +} diff --git a/reader/rewrite/rewriter.go b/reader/rewrite/rewriter.go index d76feeee..0fcd166f 100644 --- a/reader/rewrite/rewriter.go +++ b/reader/rewrite/rewriter.go @@ -5,44 +5,39 @@ package rewrite import ( - "regexp" "strings" - "github.com/PuerkitoBio/goquery" + "github.com/miniflux/miniflux2/url" ) -var rewriteRules = []func(string, string) string{ - func(url, content string) string { - re := regexp.MustCompile(`youtube\.com/watch\?v=(.*)`) - matches := re.FindStringSubmatch(url) - - if len(matches) == 2 { - video := `` - return video + "" + content + "
" - } - return content - }, - func(url, content string) string { - if strings.HasPrefix(url, "https://xkcd.com") { - doc, err := goquery.NewDocumentFromReader(strings.NewReader(content)) - if err != nil { - return content - } - - imgTag := doc.Find("img").First() - if titleAttr, found := imgTag.Attr("title"); found { - return content + `` + titleAttr + "" - } - } - return content - }, -} - // Rewriter modify item contents with a set of rewriting rules. -func Rewriter(url, content string) string { - for _, rewriteRule := range rewriteRules { - content = rewriteRule(url, content) +func Rewriter(entryURL, entryContent, customRewriteRules string) string { + rulesList := getPredefinedRewriteRules(entryURL) + if customRewriteRules != "" { + rulesList = customRewriteRules } - return content + rules := strings.Split(rulesList, ",") + for _, rule := range rules { + switch strings.TrimSpace(rule) { + case "add_image_title": + entryContent = addImageTitle(entryURL, entryContent) + case "add_youtube_video": + entryContent = addYoutubeVideo(entryURL, entryContent) + } + } + + return entryContent +} + +func getPredefinedRewriteRules(entryURL string) string { + urlDomain := url.Domain(entryURL) + + for domain, rules := range predefinedRules { + if strings.Contains(urlDomain, domain) { + return rules + } + } + + return "" } diff --git a/reader/rewrite/rewriter_test.go b/reader/rewrite/rewriter_test.go index a6664067..5f13895e 100644 --- a/reader/rewrite/rewriter_test.go +++ b/reader/rewrite/rewriter_test.go @@ -7,7 +7,7 @@ package rewrite import "testing" func TestRewriteWithNoMatchingRule(t *testing.T) { - output := Rewriter("https://example.org/article", `Some text.`) + output := Rewriter("https://example.org/article", `Some text.`, ``) expected := `Some text.` if expected != output { @@ -16,7 +16,7 @@ func TestRewriteWithNoMatchingRule(t *testing.T) { } func TestRewriteWithYoutubeLink(t *testing.T) { - output := Rewriter("https://www.youtube.com/watch?v=1234", `Video Description`) + output := Rewriter("https://www.youtube.com/watch?v=1234", `Video Description`, ``) expected := `
Video Description
` if expected != output { @@ -24,11 +24,37 @@ func TestRewriteWithYoutubeLink(t *testing.T) { } } +func TestRewriteWithInexistingCustomRule(t *testing.T) { + output := Rewriter("https://www.youtube.com/watch?v=1234", `Video Description`, `some rule`) + expected := `Video Description` + if expected != output { + t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + } +} + func TestRewriteWithXkcdLink(t *testing.T) { description := `` - output := Rewriter("https://xkcd.com/1912/", description) + output := Rewriter("https://xkcd.com/1912/", description, ``) expected := description + `Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.` if expected != output { t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) } } +func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) { + description := "test" + output := Rewriter("https://xkcd.com/1912/", description, ``) + expected := description + if expected != output { + t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + } +} + +func TestRewriteWithXkcdAndNoImage(t *testing.T) { + description := "test" + output := Rewriter("https://xkcd.com/1912/", description, ``) + expected := description + + if expected != output { + t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected) + } +} diff --git a/reader/rewrite/rules.go b/reader/rewrite/rules.go new file mode 100644 index 00000000..76a1c0b1 --- /dev/null +++ b/reader/rewrite/rules.go @@ -0,0 +1,30 @@ +// Copyright 2017 Frédéric Guillot. All rights reserved. +// Use of this source code is governed by the Apache 2.0 +// license that can be found in the LICENSE file. + +package rewrite + +// List of predefined rewrite rules (alphabetically sorted) +// Available rules: "add_image_title", "add_youtube_video" +// domain => rule name +var predefinedRules = map[string]string{ + "abstrusegoose.com": "add_image_title", + "amazingsuperpowers.com": "add_image_title", + "cowbirdsinlove.com": "add_image_title", + "drawingboardcomic.com": "add_image_title", + "exocomics.com": "add_image_title", + "happletea.com": "add_image_title", + "imogenquest.net": "add_image_title", + "lukesurl.com": "add_image_title", + "mercworks.net": "add_image_title", + "mrlovenstein.com": "add_image_title", + "nedroid.com": "add_image_title", + "oglaf.com": "add_image_title", + "optipess.com": "add_image_title", + "peebleslab.com": "add_image_title", + "sentfromthemoon.com": "add_image_title", + "thedoghousediaries.com": "add_image_title", + "treelobsters.com": "add_image_title", + "youtube.com": "add_youtube_video", + "xkcd.com": "add_image_title", +} diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go index f92be508..3abdd175 100644 --- a/reader/rss/parser_test.go +++ b/reader/rss/parser_test.go @@ -94,7 +94,7 @@ func TestParseRss2Sample(t *testing.T) { t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) } - if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's Star City.` { + if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's Star City.` { t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) } } @@ -383,7 +383,7 @@ func TestParseEntryWithContentEncoded(t *testing.T) { t.Error(err) } - if feed.Entries[0].Content != `` { + if feed.Entries[0].Content != `` { t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) } } diff --git a/reader/rss/rss.go b/reader/rss/rss.go index 55739869..e5800b69 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -15,7 +15,6 @@ import ( "github.com/miniflux/miniflux2/helper" "github.com/miniflux/miniflux2/model" "github.com/miniflux/miniflux2/reader/date" - "github.com/miniflux/miniflux2/reader/processor" ) type rssFeed struct { @@ -211,7 +210,7 @@ func (r *rssItem) Transform() *model.Entry { entry.Date = r.GetDate() entry.Author = r.GetAuthor() entry.Hash = r.GetHash() - entry.Content = processor.ItemContentProcessor(entry.URL, r.GetContent()) + entry.Content = r.GetContent() entry.Title = strings.TrimSpace(r.Title) entry.Enclosures = r.GetEnclosures() diff --git a/server/static/bin.go b/server/static/bin.go index 29664d84..adf44615 100644 --- a/server/static/bin.go +++ b/server/static/bin.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// 2017-12-10 18:56:24.36887959 -0800 PST m=+0.010858677 +// 2017-12-11 22:04:47.832384663 -0800 PST m=+0.014706233 package static diff --git a/server/static/css.go b/server/static/css.go index dd309308..91db7b90 100644 --- a/server/static/css.go +++ b/server/static/css.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// 2017-12-10 18:56:24.370410193 -0800 PST m=+0.012389280 +// 2017-12-11 22:04:47.835872498 -0800 PST m=+0.018194068 package static diff --git a/server/static/js.go b/server/static/js.go index be2086c9..e77a8aa9 100644 --- a/server/static/js.go +++ b/server/static/js.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// 2017-12-10 18:56:24.37299237 -0800 PST m=+0.014971457 +// 2017-12-11 22:04:47.840119593 -0800 PST m=+0.022441163 package static diff --git a/server/template/common.go b/server/template/common.go index 268ae697..88d34bd2 100644 --- a/server/template/common.go +++ b/server/template/common.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// 2017-12-10 18:56:24.386027486 -0800 PST m=+0.028006573 +// 2017-12-11 22:04:47.859021405 -0800 PST m=+0.041342975 package template diff --git a/server/template/html/edit_feed.html b/server/template/html/edit_feed.html index 04950926..84958284 100644 --- a/server/template/html/edit_feed.html +++ b/server/template/html/edit_feed.html @@ -48,6 +48,9 @@ + + +