diff --git a/internal/reader/rewrite/rewrite_functions.go b/internal/reader/rewrite/rewrite_functions.go index fcccd358..44b15049 100644 --- a/internal/reader/rewrite/rewrite_functions.go +++ b/internal/reader/rewrite/rewrite_functions.go @@ -14,6 +14,8 @@ import ( "miniflux.app/v2/internal/config" + nethtml "golang.org/x/net/html" + "github.com/PuerkitoBio/goquery" "github.com/yuin/goldmark" goldmarkhtml "github.com/yuin/goldmark/renderer/html" @@ -301,10 +303,6 @@ func replaceTextLinks(input string) string { return textLinkRegex.ReplaceAllString(input, `${1}`) } -func replaceLineFeeds(input string) string { - return strings.ReplaceAll(input, "\n", "
") -} - func replaceCustom(entryContent string, searchTerm string, replaceTerm string) string { re, err := regexp.Compile(searchTerm) if err == nil { @@ -334,7 +332,7 @@ func addCastopodEpisode(entryURL, entryContent string) string { func applyFuncOnTextContent(entryContent string, selector string, repl func(string) string) string { var treatChildren func(i int, s *goquery.Selection) treatChildren = func(i int, s *goquery.Selection) { - if s.Nodes[0].Type == 1 { + if s.Nodes[0].Type == nethtml.TextNode { s.ReplaceWithHtml(repl(s.Nodes[0].Data)) } else { s.Contents().Each(treatChildren) @@ -457,17 +455,3 @@ func removeTables(entryContent string) string { output, _ := doc.Find("body").First().Html() return output } - -func removeClickbait(entryTitle string) string { - titleWords := []string{} - for _, word := range strings.Fields(entryTitle) { - runes := []rune(word) - if len(runes) > 1 { - // keep first rune as is to keep the first capital letter - titleWords = append(titleWords, string([]rune{runes[0]})+strings.ToLower(string(runes[1:]))) - } else { - titleWords = append(titleWords, word) - } - } - return strings.Join(titleWords, " ") -} diff --git a/internal/reader/rewrite/rewriter.go b/internal/reader/rewrite/rewriter.go index 08fa1fa9..b577f687 100644 --- a/internal/reader/rewrite/rewriter.go +++ b/internal/reader/rewrite/rewriter.go @@ -11,6 +11,9 @@ import ( "miniflux.app/v2/internal/model" "miniflux.app/v2/internal/urllib" + + "golang.org/x/text/cases" + "golang.org/x/text/language" ) type rule struct { @@ -18,50 +21,7 @@ type rule struct { args []string } -// Rewriter modify item contents with a set of rewriting rules. -func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) { - rulesList := getPredefinedRewriteRules(entryURL) - if customRewriteRules != "" { - rulesList = customRewriteRules - } - - rules := parseRules(rulesList) - rules = append(rules, rule{name: "add_pdf_download_link"}) - - slog.Debug("Rewrite rules applied", - slog.Any("rules", rules), - slog.String("entry_url", entryURL), - ) - - for _, rule := range rules { - applyRule(entryURL, entry, rule) - } -} - -func parseRules(rulesText string) (rules []rule) { - scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings} - scan.Init(strings.NewReader(rulesText)) - - for { - switch scan.Scan() { - case scanner.Ident: - rules = append(rules, rule{name: scan.TokenText()}) - - case scanner.String: - if l := len(rules) - 1; l >= 0 { - text := scan.TokenText() - text, _ = strconv.Unquote(text) - - rules[l].args = append(rules[l].args, text) - } - - case scanner.EOF: - return - } - } -} - -func applyRule(entryURL string, entry *model.Entry, rule rule) { +func (rule rule) applyRule(entryURL string, entry *model.Entry) { switch rule.name { case "add_image_title": entry.Content = addImageTitle(entryURL, entry.Content) @@ -82,7 +42,7 @@ func applyRule(entryURL string, entry *model.Entry, rule rule) { case "add_pdf_download_link": entry.Content = addPDFLink(entryURL, entry.Content) case "nl2br": - entry.Content = replaceLineFeeds(entry.Content) + entry.Content = strings.ReplaceAll(entry.Content, "\n", "
") case "convert_text_link", "convert_text_links": entry.Content = replaceTextLinks(entry.Content) case "fix_medium_images": @@ -122,11 +82,11 @@ func applyRule(entryURL string, entry *model.Entry, rule rule) { case "add_castopod_episode": entry.Content = addCastopodEpisode(entryURL, entry.Content) case "base64_decode": + selector := "body" if len(rule.args) >= 1 { - entry.Content = applyFuncOnTextContent(entry.Content, rule.args[0], decodeBase64Content) - } else { - entry.Content = applyFuncOnTextContent(entry.Content, "body", decodeBase64Content) + selector = rule.args[0] } + entry.Content = applyFuncOnTextContent(entry.Content, selector, decodeBase64Content) case "add_hn_links_using_hack": entry.Content = addHackerNewsLinksUsing(entry.Content, "hack") case "add_hn_links_using_opener": @@ -136,7 +96,46 @@ func applyRule(entryURL string, entry *model.Entry, rule rule) { case "remove_tables": entry.Content = removeTables(entry.Content) case "remove_clickbait": - entry.Title = removeClickbait(entry.Title) + entry.Title = cases.Title(language.English).String(strings.ToLower(entry.Title)) + } +} + +// Rewriter modify item contents with a set of rewriting rules. +func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) { + rulesList := getPredefinedRewriteRules(entryURL) + if customRewriteRules != "" { + rulesList = customRewriteRules + } + + rules := parseRules(rulesList) + rules = append(rules, rule{name: "add_pdf_download_link"}) + + slog.Debug("Rewrite rules applied", + slog.Any("rules", rules), + slog.String("entry_url", entryURL), + ) + + for _, rule := range rules { + rule.applyRule(entryURL, entry) + } +} + +func parseRules(rulesText string) (rules []rule) { + scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings} + scan.Init(strings.NewReader(rulesText)) + + for { + switch scan.Scan() { + case scanner.Ident: + rules = append(rules, rule{name: scan.TokenText()}) + case scanner.String: + if l := len(rules) - 1; l >= 0 { + text, _ := strconv.Unquote(scan.TokenText()) + rules[l].args = append(rules[l].args, text) + } + case scanner.EOF: + return + } } }