From a631bd527d73b5b8d8e85343a1d5fe438e89dad7 Mon Sep 17 00:00:00 2001 From: fin444 Date: Wed, 1 May 2024 19:28:59 -0400 Subject: [PATCH] options: add FETCH_NEBULA_WATCH_TIME --- internal/config/config_test.go | 18 ++++++++ internal/config/options.go | 10 +++++ internal/config/parser.go | 2 + internal/reader/processor/processor.go | 60 ++++++++++++++++++++++++++ miniflux.1 | 6 +++ 5 files changed, 96 insertions(+) diff --git a/internal/config/config_test.go b/internal/config/config_test.go index bcf58da3..bc1db2b5 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -2021,6 +2021,24 @@ func TestAuthProxyUserCreationAdmin(t *testing.T) { } } +func TestFetchNebulaWatchTime(t *testing.T) { + os.Clearenv() + os.Setenv("FETCH_NEBULA_WATCH_TIME", "1") + + parser := NewParser() + opts, err := parser.ParseEnvironmentVariables() + if err != nil { + t.Fatalf(`Parsing failure: %v`, err) + } + + expected := true + result := opts.FetchNebulaWatchTime() + + if result != expected { + t.Fatalf(`Unexpected FETCH_NEBULA_WATCH_TIME value, got %v instead of %v`, result, expected) + } +} + func TestFetchOdyseeWatchTime(t *testing.T) { os.Clearenv() os.Setenv("FETCH_ODYSEE_WATCH_TIME", "1") diff --git a/internal/config/options.go b/internal/config/options.go index 89bff536..d5d793ac 100644 --- a/internal/config/options.go +++ b/internal/config/options.go @@ -56,6 +56,7 @@ const ( defaultMediaResourceTypes = "image" defaultMediaProxyURL = "" defaultFilterEntryMaxAgeDays = 0 + defaultFetchNebulaWatchTime = false defaultFetchOdyseeWatchTime = false defaultFetchYouTubeWatchTime = false defaultYouTubeEmbedUrlOverride = "https://www.youtube-nocookie.com/embed/" @@ -140,6 +141,7 @@ type Options struct { mediaProxyMode string mediaProxyResourceTypes []string mediaProxyCustomURL string + fetchNebulaWatchTime bool fetchOdyseeWatchTime bool fetchYouTubeWatchTime bool filterEntryMaxAgeDays int @@ -216,6 +218,7 @@ func NewOptions() *Options { mediaProxyResourceTypes: []string{defaultMediaResourceTypes}, mediaProxyCustomURL: defaultMediaProxyURL, filterEntryMaxAgeDays: defaultFilterEntryMaxAgeDays, + fetchNebulaWatchTime: defaultFetchNebulaWatchTime, fetchOdyseeWatchTime: defaultFetchOdyseeWatchTime, fetchYouTubeWatchTime: defaultFetchYouTubeWatchTime, youTubeEmbedUrlOverride: defaultYouTubeEmbedUrlOverride, @@ -486,6 +489,12 @@ func (o *Options) YouTubeEmbedUrlOverride() string { return o.youTubeEmbedUrlOverride } +// FetchNebulaWatchTime returns true if the Nebula video duration +// should be fetched and used as a reading time. +func (o *Options) FetchNebulaWatchTime() bool { + return o.fetchNebulaWatchTime +} + // FetchOdyseeWatchTime returns true if the Odysee video duration // should be fetched and used as a reading time. func (o *Options) FetchOdyseeWatchTime() bool { @@ -647,6 +656,7 @@ func (o *Options) SortedOptions(redactSecret bool) []*Option { "DISABLE_SCHEDULER_SERVICE": !o.schedulerService, "FILTER_ENTRY_MAX_AGE_DAYS": o.filterEntryMaxAgeDays, "FETCH_YOUTUBE_WATCH_TIME": o.fetchYouTubeWatchTime, + "FETCH_NEBULA_WATCH_TIME": o.fetchNebulaWatchTime, "FETCH_ODYSEE_WATCH_TIME": o.fetchOdyseeWatchTime, "HTTPS": o.HTTPS, "HTTP_CLIENT_MAX_BODY_SIZE": o.httpClientMaxBodySize, diff --git a/internal/config/parser.go b/internal/config/parser.go index 24704710..f7e58aaa 100644 --- a/internal/config/parser.go +++ b/internal/config/parser.go @@ -259,6 +259,8 @@ func (p *Parser) parseLines(lines []string) (err error) { p.opts.metricsPassword = parseString(value, defaultMetricsPassword) case "METRICS_PASSWORD_FILE": p.opts.metricsPassword = readSecretFile(value, defaultMetricsPassword) + case "FETCH_NEBULA_WATCH_TIME": + p.opts.fetchNebulaWatchTime = parseBool(value, defaultFetchNebulaWatchTime) case "FETCH_ODYSEE_WATCH_TIME": p.opts.fetchOdyseeWatchTime = parseBool(value, defaultFetchOdyseeWatchTime) case "FETCH_YOUTUBE_WATCH_TIME": diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go index fa3e53be..c5d368e4 100644 --- a/internal/reader/processor/processor.go +++ b/internal/reader/processor/processor.go @@ -29,6 +29,7 @@ import ( var ( youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`) + nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`) odyseeRegex = regexp.MustCompile(`^https://odysee\.com`) iso8601Regex = regexp.MustCompile(`^P((?P\d+)Y)?((?P\d+)M)?((?P\d+)W)?((?P\d+)D)?(T((?P\d+)H)?((?P\d+)M)?((?P\d+)S)?)?$`) customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`) @@ -277,6 +278,25 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod } } + if shouldFetchNebulaWatchTime(entry) { + if entryIsNew { + watchTime, err := fetchNebulaWatchTime(entry.URL) + if err != nil { + slog.Warn("Unable to fetch Nebula watch time", + slog.Int64("user_id", user.ID), + slog.Int64("entry_id", entry.ID), + slog.String("entry_url", entry.URL), + slog.Int64("feed_id", feed.ID), + slog.String("feed_url", feed.FeedURL), + slog.Any("error", err), + ) + } + entry.ReadingTime = watchTime + } else { + entry.ReadingTime = store.GetReadTime(feed.ID, entry.Hash) + } + } + if shouldFetchOdyseeWatchTime(entry) { if entryIsNew { watchTime, err := fetchOdyseeWatchTime(entry.URL) @@ -311,6 +331,14 @@ func shouldFetchYouTubeWatchTime(entry *model.Entry) bool { return urlMatchesYouTubePattern } +func shouldFetchNebulaWatchTime(entry *model.Entry) bool { + if !config.Opts.FetchNebulaWatchTime() { + return false + } + matches := nebulaRegex.FindStringSubmatch(entry.URL) + return matches != nil +} + func shouldFetchOdyseeWatchTime(entry *model.Entry) bool { if !config.Opts.FetchOdyseeWatchTime() { return false @@ -350,6 +378,38 @@ func fetchYouTubeWatchTime(websiteURL string) (int, error) { return int(dur.Minutes()), nil } +func fetchNebulaWatchTime(websiteURL string) (int, error) { + requestBuilder := fetcher.NewRequestBuilder() + requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout()) + requestBuilder.WithProxy(config.Opts.HTTPClientProxy()) + + responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL)) + defer responseHandler.Close() + + if localizedError := responseHandler.LocalizedError(); localizedError != nil { + slog.Warn("Unable to fetch Nebula watch time", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error())) + return 0, localizedError.Error() + } + + doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize())) + if docErr != nil { + return 0, docErr + } + + durs, exists := doc.Find(`meta[property="video:duration"]`).First().Attr("content") + // durs contains video watch time in seconds + if !exists { + return 0, errors.New("duration has not found") + } + + dur, err := strconv.ParseInt(durs, 10, 64) + if err != nil { + return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err) + } + + return int(dur / 60), nil +} + func fetchOdyseeWatchTime(websiteURL string) (int, error) { requestBuilder := fetcher.NewRequestBuilder() requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout()) diff --git a/miniflux.1 b/miniflux.1 index 67132a5d..d0879f09 100644 --- a/miniflux.1 +++ b/miniflux.1 @@ -244,6 +244,12 @@ Set the value to 1 to disable the internal scheduler service\&. .br Default is false (The internal scheduler service is enabled)\&. .TP +.B FETCH_NEBULA_WATCH_TIME +Set the value to 1 to scrape video duration from Nebula website and +use it as a reading time\&. +.br +Disabled by default\&. +.TP .B FETCH_ODYSEE_WATCH_TIME Set the value to 1 to scrape video duration from Odysee website and use it as a reading time\&.