reader/processor: minifier is breaking HTML entry content

This commit is contained in:
Frédéric Guillot 2024-04-23 20:08:09 -07:00
parent 2c4c845cd2
commit fb075b60b5
2 changed files with 27 additions and 16 deletions

View file

@ -38,9 +38,6 @@ var (
func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.User, forceRefresh bool) { func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.User, forceRefresh bool) {
var filteredEntries model.Entries var filteredEntries model.Entries
minifier := minify.New()
minifier.AddFunc("text/html", html.Minify)
// Process older entries first // Process older entries first
for i := len(feed.Entries) - 1; i >= 0; i-- { for i := len(feed.Entries) - 1; i >= 0; i-- {
entry := feed.Entries[i] entry := feed.Entries[i]
@ -107,11 +104,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
) )
} else if content != "" { } else if content != "" {
// We replace the entry content only if the scraper doesn't return any error. // We replace the entry content only if the scraper doesn't return any error.
if minifiedHTML, err := minifier.String("text/html", content); err == nil { entry.Content = minifyEntryContent(content)
entry.Content = minifiedHTML
} else {
entry.Content = content
}
} }
} }
@ -189,9 +182,6 @@ func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool {
// ProcessEntryWebPage downloads the entry web page and apply rewrite rules. // ProcessEntryWebPage downloads the entry web page and apply rewrite rules.
func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) error { func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) error {
minifier := minify.New()
minifier.AddFunc("text/html", html.Minify)
startTime := time.Now() startTime := time.Now()
websiteURL := getUrlFromEntry(feed, entry) websiteURL := getUrlFromEntry(feed, entry)
@ -223,11 +213,7 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
} }
if content != "" { if content != "" {
if minifiedHTML, err := minifier.String("text/html", content); err == nil { entry.Content = minifyEntryContent(content)
entry.Content = minifiedHTML
} else {
entry.Content = content
}
if user.ShowReadingTime { if user.ShowReadingTime {
entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed) entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
} }
@ -439,3 +425,19 @@ func isRecentEntry(entry *model.Entry) bool {
} }
return false return false
} }
func minifyEntryContent(entryContent string) string {
m := minify.New()
// Options required to avoid breaking the HTML content.
m.Add("text/html", &html.Minifier{
KeepEndTags: true,
KeepQuotes: true,
})
if minifiedHTML, err := m.String("text/html", entryContent); err == nil {
entryContent = minifiedHTML
}
return entryContent
}

View file

@ -117,3 +117,12 @@ func TestIsRecentEntry(t *testing.T) {
} }
} }
} }
func TestMinifyEntryContent(t *testing.T) {
input := `<p> Some text with a <a href="http://example.org/"> link </a> </p>`
expected := `<p>Some text with a <a href="http://example.org/">link</a></p>`
result := minifyEntryContent(input)
if expected != result {
t.Errorf(`Unexpected result, got %q`, result)
}
}