reader/processor: minimize the feed's entries html

Compress the html of feed entries before storing it. This should reduce the
size of the database a bit, but more importantly, reduce the amount of data
sent to clients

minify being [stupidly fast](https://github.com/tdewolff/minify/?tab=readme-ov-file#performance), the performance impact should be in the noise level.
This commit is contained in:
jvoisin 2024-03-30 23:01:02 +01:00 committed by Frédéric Guillot
parent 4ab0d9422d
commit b205b5aad0

View file

@ -23,6 +23,8 @@ import (
"miniflux.app/v2/internal/storage"
"github.com/PuerkitoBio/goquery"
"github.com/tdewolff/minify/v2"
"github.com/tdewolff/minify/v2/html"
)
var (
@ -36,6 +38,9 @@ var (
func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.User, forceRefresh bool) {
var filteredEntries model.Entries
minifier := minify.New()
minifier.AddFunc("text/html", html.Minify)
// Process older entries first
for i := len(feed.Entries) - 1; i >= 0; i-- {
entry := feed.Entries[i]
@ -102,7 +107,11 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
)
} else if content != "" {
// We replace the entry content only if the scraper doesn't return any error.
entry.Content = content
if minifiedHTML, err := minifier.String("text/html", content); err == nil {
entry.Content = minifiedHTML
} else {
entry.Content = content
}
}
}
@ -180,6 +189,9 @@ func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool {
// ProcessEntryWebPage downloads the entry web page and apply rewrite rules.
func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) error {
minifier := minify.New()
minifier.AddFunc("text/html", html.Minify)
startTime := time.Now()
websiteURL := getUrlFromEntry(feed, entry)
@ -211,7 +223,11 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
}
if content != "" {
entry.Content = content
if minifiedHTML, err := minifier.String("text/html", content); err == nil {
entry.Content = minifiedHTML
} else {
entry.Content = content
}
if user.ShowReadingTime {
entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
}