From b205b5aad075dc89040231f87c79bec2a7ea60c7 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sat, 30 Mar 2024 23:01:02 +0100 Subject: [PATCH] reader/processor: minimize the feed's entries html Compress the html of feed entries before storing it. This should reduce the size of the database a bit, but more importantly, reduce the amount of data sent to clients minify being [stupidly fast](https://github.com/tdewolff/minify/?tab=readme-ov-file#performance), the performance impact should be in the noise level. --- internal/reader/processor/processor.go | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go index eb2e2f9d..c92550d2 100644 --- a/internal/reader/processor/processor.go +++ b/internal/reader/processor/processor.go @@ -23,6 +23,8 @@ import ( "miniflux.app/v2/internal/storage" "github.com/PuerkitoBio/goquery" + "github.com/tdewolff/minify/v2" + "github.com/tdewolff/minify/v2/html" ) var ( @@ -36,6 +38,9 @@ var ( func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.User, forceRefresh bool) { var filteredEntries model.Entries + minifier := minify.New() + minifier.AddFunc("text/html", html.Minify) + // Process older entries first for i := len(feed.Entries) - 1; i >= 0; i-- { entry := feed.Entries[i] @@ -102,7 +107,11 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us ) } else if content != "" { // We replace the entry content only if the scraper doesn't return any error. - entry.Content = content + if minifiedHTML, err := minifier.String("text/html", content); err == nil { + entry.Content = minifiedHTML + } else { + entry.Content = content + } } } @@ -180,6 +189,9 @@ func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool { // ProcessEntryWebPage downloads the entry web page and apply rewrite rules. func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) error { + minifier := minify.New() + minifier.AddFunc("text/html", html.Minify) + startTime := time.Now() websiteURL := getUrlFromEntry(feed, entry) @@ -211,7 +223,11 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) } if content != "" { - entry.Content = content + if minifiedHTML, err := minifier.String("text/html", content); err == nil { + entry.Content = minifiedHTML + } else { + entry.Content = content + } if user.ShowReadingTime { entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed) }