Add feed option to ignore HTTP cache

This commit is contained in:
Frédéric Guillot 2020-06-05 21:50:59 -07:00
parent 57f62b4797
commit 6c6ca69141
23 changed files with 105 additions and 52 deletions

View file

@ -12,7 +12,7 @@ import (
"miniflux.app/logger"
)
const schemaVersion = 30
const schemaVersion = 31
// Migrate executes database migrations.
func Migrate(db *sql.DB) {

View file

@ -182,6 +182,7 @@ create unique index entries_share_code_idx on entries using btree(share_code) wh
"schema_version_30": `alter table feeds add column next_check_at timestamp with time zone default now();
create index entries_user_feed_idx on entries (user_id, feed_id);
`,
"schema_version_31": `alter table feeds add column ignore_http_cache bool default false;`,
"schema_version_4": `create type entry_sorting_direction as enum('asc', 'desc');
alter table users add column entry_direction entry_sorting_direction default 'asc';
`,
@ -235,6 +236,7 @@ var SqlMapChecksums = map[string]string{
"schema_version_29": "527403d951d025b387baf7b1ab80c014752c5429cc0b9851aeb34b7716cf2c68",
"schema_version_3": "a54745dbc1c51c000f74d4e5068f1e2f43e83309f023415b1749a47d5c1e0f12",
"schema_version_30": "3ec48a9b2e7a0fc32c85f31652f723565c34213f5f2d7e5e5076aad8f0b40d23",
"schema_version_31": "9290ef295731b03ddfe32dcaded0be70d41b63572420ad379cf2874a9b54581c",
"schema_version_4": "216ea3a7d3e1704e40c797b5dc47456517c27dbb6ca98bf88812f4f63d74b5d9",
"schema_version_5": "46397e2f5f2c82116786127e9f6a403e975b14d2ca7b652a48cd1ba843e6a27c",
"schema_version_6": "9d05b4fb223f0e60efc716add5048b0ca9c37511cf2041721e20505d6d798ce4",

View file

@ -0,0 +1 @@
alter table feeds add column ignore_http_cache bool default false;

View file

@ -50,12 +50,22 @@ type Client struct {
}
func (c *Client) String() string {
etagHeader := c.etagHeader
if c.etagHeader == "" {
etagHeader = "None"
}
lastModifiedHeader := c.lastModifiedHeader
if c.lastModifiedHeader == "" {
lastModifiedHeader = "None"
}
return fmt.Sprintf(
`InputURL=%q RequestURL=%q ETag=%s LastModified=%q BasicAuth=%v UserAgent=%q`,
`InputURL=%q RequestURL=%q ETag=%s LastModified=%s BasicAuth=%v UserAgent=%q`,
c.inputURL,
c.requestURL,
c.etagHeader,
c.lastModifiedHeader,
etagHeader,
lastModifiedHeader,
c.authorizationHeader != "" || (c.username != "" && c.password != ""),
c.userAgent,
)

View file

@ -247,6 +247,7 @@ var translations = map[string]string{
"form.feed.label.user_agent": "Standardbenutzeragenten überschreiben",
"form.feed.label.scraper_rules": "Extraktionsregeln",
"form.feed.label.rewrite_rules": "Umschreiberegeln",
"form.feed.label.ignore_http_cache": "Negeer HTTP-cache",
"form.feed.label.disabled": "Dieses Abonnement nicht aktualisieren",
"form.category.label.title": "Titel",
"form.user.label.username": "Benutzername",
@ -583,6 +584,7 @@ var translations = map[string]string{
"form.feed.label.user_agent": "Override Default User Agent",
"form.feed.label.scraper_rules": "Scraper Rules",
"form.feed.label.rewrite_rules": "Rewrite Rules",
"form.feed.label.ignore_http_cache": "Ignore HTTP cache",
"form.feed.label.disabled": "Do not refresh this feed",
"form.category.label.title": "Title",
"form.user.label.username": "Username",
@ -899,6 +901,7 @@ var translations = map[string]string{
"form.feed.label.user_agent": "Invalidar el agente de usuario predeterminado",
"form.feed.label.scraper_rules": "Reglas de raspador",
"form.feed.label.rewrite_rules": "Reglas de reescribir",
"form.feed.label.ignore_http_cache": "Ignorar caché HTTP",
"form.feed.label.disabled": "No actualice este feed",
"form.category.label.title": "Título",
"form.user.label.username": "Nombre de usuario",
@ -1215,6 +1218,7 @@ var translations = map[string]string{
"form.feed.label.user_agent": "Remplacer l'agent utilisateur par défaut",
"form.feed.label.scraper_rules": "Règles pour récupérer le contenu original",
"form.feed.label.rewrite_rules": "Règles de réécriture",
"form.feed.label.ignore_http_cache": "Ignore cache HTTP",
"form.feed.label.disabled": "Ne pas actualiser ce flux",
"form.category.label.title": "Titre",
"form.user.label.username": "Nom d'utilisateur",
@ -1551,6 +1555,7 @@ var translations = map[string]string{
"form.feed.label.user_agent": "Usa user agent personalizzato",
"form.feed.label.scraper_rules": "Regole di estrazione del contenuto",
"form.feed.label.rewrite_rules": "Regole di impaginazione del contenuto",
"form.feed.label.ignore_http_cache": "Ignora cache HTTP",
"form.feed.label.disabled": "Non aggiornare questo feed",
"form.category.label.title": "Titolo",
"form.user.label.username": "Nome utente",
@ -1867,6 +1872,7 @@ var translations = map[string]string{
"form.feed.label.user_agent": "ディフォルトの User Agent を上書きする",
"form.feed.label.scraper_rules": "スクラップルール",
"form.feed.label.rewrite_rules": "Rewrite ルール",
"form.feed.label.ignore_http_cache": "HTTPキャッシュを無視",
"form.feed.label.disabled": "このフィードを更新しない",
"form.category.label.title": "タイトル",
"form.user.label.username": "ユーザー名",
@ -2183,6 +2189,7 @@ var translations = map[string]string{
"form.feed.label.user_agent": "Standaard User Agent overschrijven",
"form.feed.label.scraper_rules": "Scraper regels",
"form.feed.label.rewrite_rules": "Rewrite regels",
"form.feed.label.ignore_http_cache": "Negeer HTTP-cache",
"form.feed.label.disabled": "Vernieuw deze feed niet",
"form.category.label.title": "Naam",
"form.user.label.username": "Gebruikersnaam",
@ -2519,6 +2526,7 @@ var translations = map[string]string{
"form.feed.label.user_agent": "Zastąp domyślny agent użytkownika",
"form.feed.label.scraper_rules": "Zasady ekstrakcji",
"form.feed.label.rewrite_rules": "Reguły zapisu",
"form.feed.label.ignore_http_cache": "Zignoruj pamięć podręczną HTTP",
"form.feed.label.disabled": "Не обновлять этот канал",
"form.category.label.title": "Tytuł",
"form.user.label.username": "Nazwa użytkownika",
@ -2861,6 +2869,7 @@ var translations = map[string]string{
"form.feed.label.user_agent": "Переопределить User Agent по умолчанию",
"form.feed.label.scraper_rules": "Правила Scraper",
"form.feed.label.rewrite_rules": "Правила Rewrite",
"form.feed.label.ignore_http_cache": "Игнорировать HTTP-кеш",
"form.feed.label.disabled": "Не обновлять этот канал",
"form.category.label.title": "Название",
"form.user.label.username": "Имя пользователя",
@ -3181,6 +3190,7 @@ var translations = map[string]string{
"form.feed.label.user_agent": "覆盖默认 User-Agent",
"form.feed.label.scraper_rules": "Scraper 规则",
"form.feed.label.rewrite_rules": "重写规则",
"form.feed.label.ignore_http_cache": "忽略HTTP缓存",
"form.feed.label.disabled": "请勿刷新此Feed",
"form.category.label.title": "标题",
"form.user.label.username": "用户名",
@ -3269,14 +3279,14 @@ var translations = map[string]string{
}
var translationsChecksums = map[string]string{
"de_DE": "9dcd00605e1cce53d2e9b5009cf063d5b948022835c488e1ca540a9770a4d857",
"en_US": "144dcd9d68909ee2de603806dbcd378f2bee5b26789bff60dac295659c510497",
"es_ES": "f23e54a0f39ffd724d389fb4c5a5e274d1b4370bdbbb544efc113fc56489a6d7",
"fr_FR": "b9dd548782871317a2496e291b34646218084422d6120d50edc32ec4b69eaa93",
"it_IT": "bb7fd373d0e41b3942709d81213ade3e3a0ca0291d341453e21879dda53ef116",
"ja_JP": "0597183310bee313e70bf80dd2c31df732d8e7f55b02a6689132dc219a39298c",
"nl_NL": "c7de86bd209281f4e20847bae3a3c50c66f56655e16d7fed6bf8dd2844ae034b",
"pl_PL": "b5ea016c2ec6d37f81910d000ca43d732ebedb6bea0c862f94e4ac8787781dc2",
"ru_RU": "fa934cd06e362b782246199a84274aab0c6812437d07f4b112df02946a8c9cb4",
"zh_CN": "7c9069daf0bc611469404046f1902c2cb8f8b7c7dbb2708914b022f71e58d0f6",
"de_DE": "d648712531636d88c3c4d0cc6a509c80afc30f9999a8a591db6cbfcbb084db0b",
"en_US": "3fc6d6aa333363b81b88e33300e62db8878d190bc35e1c16881fded5d3d805d4",
"es_ES": "b441b4bff456bbb40332798f434b08b69e204e6ab7518d18a769df8ab6a56666",
"fr_FR": "1b5710d2803b634bd901611e204bd92a8702c5c4e637e26a0fb8c4061ce2cbb3",
"it_IT": "c543e0803c78b8549702e37c8c259cce43925fa8a949a6b48f89a9223bed9f24",
"ja_JP": "5a11a2a4c632f8df45d107750ab11149eff603eb944233393784d8b53183d086",
"nl_NL": "dd802be5de3a82924eb51f84e565151a1fb68fc13e0d75f81d10de90386b55d0",
"pl_PL": "32f8a9cfd193297cfe9c2747056621a8f57c17c50b816bf665647de05cdb328d",
"ru_RU": "b6664af10cc9c4acf111b196f92b6c1d7c271c764970250d48655057348add77",
"zh_CN": "08deb6dab5a9613a25f693aec1594564de9eae2a3fbccaea5147870499921e46",
}

View file

@ -242,6 +242,7 @@
"form.feed.label.user_agent": "Standardbenutzeragenten überschreiben",
"form.feed.label.scraper_rules": "Extraktionsregeln",
"form.feed.label.rewrite_rules": "Umschreiberegeln",
"form.feed.label.ignore_http_cache": "Negeer HTTP-cache",
"form.feed.label.disabled": "Dieses Abonnement nicht aktualisieren",
"form.category.label.title": "Titel",
"form.user.label.username": "Benutzername",

View file

@ -242,6 +242,7 @@
"form.feed.label.user_agent": "Override Default User Agent",
"form.feed.label.scraper_rules": "Scraper Rules",
"form.feed.label.rewrite_rules": "Rewrite Rules",
"form.feed.label.ignore_http_cache": "Ignore HTTP cache",
"form.feed.label.disabled": "Do not refresh this feed",
"form.category.label.title": "Title",
"form.user.label.username": "Username",

View file

@ -242,6 +242,7 @@
"form.feed.label.user_agent": "Invalidar el agente de usuario predeterminado",
"form.feed.label.scraper_rules": "Reglas de raspador",
"form.feed.label.rewrite_rules": "Reglas de reescribir",
"form.feed.label.ignore_http_cache": "Ignorar caché HTTP",
"form.feed.label.disabled": "No actualice este feed",
"form.category.label.title": "Título",
"form.user.label.username": "Nombre de usuario",

View file

@ -242,6 +242,7 @@
"form.feed.label.user_agent": "Remplacer l'agent utilisateur par défaut",
"form.feed.label.scraper_rules": "Règles pour récupérer le contenu original",
"form.feed.label.rewrite_rules": "Règles de réécriture",
"form.feed.label.ignore_http_cache": "Ignore cache HTTP",
"form.feed.label.disabled": "Ne pas actualiser ce flux",
"form.category.label.title": "Titre",
"form.user.label.username": "Nom d'utilisateur",

View file

@ -242,6 +242,7 @@
"form.feed.label.user_agent": "Usa user agent personalizzato",
"form.feed.label.scraper_rules": "Regole di estrazione del contenuto",
"form.feed.label.rewrite_rules": "Regole di impaginazione del contenuto",
"form.feed.label.ignore_http_cache": "Ignora cache HTTP",
"form.feed.label.disabled": "Non aggiornare questo feed",
"form.category.label.title": "Titolo",
"form.user.label.username": "Nome utente",

View file

@ -242,6 +242,7 @@
"form.feed.label.user_agent": "ディフォルトの User Agent を上書きする",
"form.feed.label.scraper_rules": "スクラップルール",
"form.feed.label.rewrite_rules": "Rewrite ルール",
"form.feed.label.ignore_http_cache": "HTTPキャッシュを無視",
"form.feed.label.disabled": "このフィードを更新しない",
"form.category.label.title": "タイトル",
"form.user.label.username": "ユーザー名",

View file

@ -242,6 +242,7 @@
"form.feed.label.user_agent": "Standaard User Agent overschrijven",
"form.feed.label.scraper_rules": "Scraper regels",
"form.feed.label.rewrite_rules": "Rewrite regels",
"form.feed.label.ignore_http_cache": "Negeer HTTP-cache",
"form.feed.label.disabled": "Vernieuw deze feed niet",
"form.category.label.title": "Naam",
"form.user.label.username": "Gebruikersnaam",

View file

@ -244,6 +244,7 @@
"form.feed.label.user_agent": "Zastąp domyślny agent użytkownika",
"form.feed.label.scraper_rules": "Zasady ekstrakcji",
"form.feed.label.rewrite_rules": "Reguły zapisu",
"form.feed.label.ignore_http_cache": "Zignoruj pamięć podręczną HTTP",
"form.feed.label.disabled": "Не обновлять этот канал",
"form.category.label.title": "Tytuł",
"form.user.label.username": "Nazwa użytkownika",

View file

@ -244,6 +244,7 @@
"form.feed.label.user_agent": "Переопределить User Agent по умолчанию",
"form.feed.label.scraper_rules": "Правила Scraper",
"form.feed.label.rewrite_rules": "Правила Rewrite",
"form.feed.label.ignore_http_cache": "Игнорировать HTTP-кеш",
"form.feed.label.disabled": "Не обновлять этот канал",
"form.category.label.title": "Название",
"form.user.label.username": "Имя пользователя",

View file

@ -240,6 +240,7 @@
"form.feed.label.user_agent": "覆盖默认 User-Agent",
"form.feed.label.scraper_rules": "Scraper 规则",
"form.feed.label.rewrite_rules": "重写规则",
"form.feed.label.ignore_http_cache": "忽略HTTP缓存",
"form.feed.label.disabled": "请勿刷新此Feed",
"form.category.label.title": "标题",
"form.user.label.username": "用户名",

View file

@ -33,6 +33,7 @@ type Feed struct {
Username string `json:"username"`
Password string `json:"password"`
Disabled bool `json:"disabled"`
IgnoreHTTPCache bool `json:"ignore_http_cache"`
Category *Category `json:"category,omitempty"`
Entries Entries `json:"entries,omitempty"`
Icon *FeedIcon `json:"icon"`

View file

@ -105,8 +105,12 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error {
request := client.New(originalFeed.FeedURL)
request.WithCredentials(originalFeed.Username, originalFeed.Password)
request.WithCacheHeaders(originalFeed.EtagHeader, originalFeed.LastModifiedHeader)
request.WithUserAgent(originalFeed.UserAgent)
if !originalFeed.IgnoreHTTPCache {
request.WithCacheHeaders(originalFeed.EtagHeader, originalFeed.LastModifiedHeader)
}
response, requestErr := browser.Exec(request)
if requestErr != nil {
originalFeed.WithError(requestErr.Localize(printer))
@ -114,7 +118,7 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error {
return requestErr
}
if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
if originalFeed.IgnoreHTTPCache || response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID)
updatedFeed, parseErr := parser.ParseFeed(response.BodyAsString())

View file

@ -16,6 +16,8 @@ import (
// ProcessFeedEntries downloads original web page for entries and apply filters.
func ProcessFeedEntries(store *storage.Storage, feed *model.Feed) {
for _, entry := range feed.Entries {
logger.Debug("[Feed #%d] Processing entry %s", feed.ID, entry.URL)
if feed.Crawler {
if !store.EntryURLExists(feed.ID, entry.URL) {
content, err := scraper.Fetch(entry.URL, feed.ScraperRules, feed.UserAgent)

View file

@ -31,6 +31,7 @@ var feedListQuery = `
f.user_agent,
f.username,
f.password,
f.ignore_http_cache,
f.disabled,
f.category_id,
c.title as category_title,
@ -131,6 +132,7 @@ func (s *Storage) FeedsByCategoryWithCounters(userID, categoryID int64) (model.F
f.user_agent,
f.username,
f.password,
f.ignore_http_cache,
f.disabled,
f.category_id,
c.title as category_title,
@ -239,6 +241,7 @@ func (s *Storage) fetchFeeds(feedQuery, counterQuery string, args ...interface{}
&feed.UserAgent,
&feed.Username,
&feed.Password,
&feed.IgnoreHTTPCache,
&feed.Disabled,
&feed.Category.ID,
&feed.Category.Title,
@ -322,6 +325,7 @@ func (s *Storage) FeedByID(userID, feedID int64) (*model.Feed, error) {
f.user_agent,
f.username,
f.password,
f.ignore_http_cache,
f.disabled,
f.category_id,
c.title as category_title,
@ -352,6 +356,7 @@ func (s *Storage) FeedByID(userID, feedID int64) (*model.Feed, error) {
&feed.UserAgent,
&feed.Username,
&feed.Password,
&feed.IgnoreHTTPCache,
&feed.Disabled,
&feed.Category.ID,
&feed.Category.Title,
@ -456,9 +461,10 @@ func (s *Storage) UpdateFeed(feed *model.Feed) (err error) {
username=$14,
password=$15,
disabled=$16,
next_check_at=$17
next_check_at=$17,
ignore_http_cache=$18
WHERE
id=$18 AND user_id=$19
id=$19 AND user_id=$20
`
_, err = s.db.Exec(query,
feed.FeedURL,
@ -478,6 +484,7 @@ func (s *Storage) UpdateFeed(feed *model.Feed) (err error) {
feed.Password,
feed.Disabled,
feed.NextCheckAt,
feed.IgnoreHTTPCache,
feed.ID,
feed.UserID,
)

View file

@ -72,6 +72,7 @@
</select>
<label><input type="checkbox" name="crawler" value="1" {{ if .form.Crawler }}checked{{ end }}> {{ t "form.feed.label.crawler" }}</label>
<label><input type="checkbox" name="ignore_http_cache" value="1" {{ if .form.IgnoreHTTPCache }}checked{{ end }}> {{ t "form.feed.label.ignore_http_cache" }}</label>
<label><input type="checkbox" name="disabled" value="1" {{ if .form.Disabled }}checked{{ end }}> {{ t "form.feed.label.disabled" }}</label>
<div class="buttons">

View file

@ -583,6 +583,7 @@ var templateViewsMap = map[string]string{
</select>
<label><input type="checkbox" name="crawler" value="1" {{ if .form.Crawler }}checked{{ end }}> {{ t "form.feed.label.crawler" }}</label>
<label><input type="checkbox" name="ignore_http_cache" value="1" {{ if .form.IgnoreHTTPCache }}checked{{ end }}> {{ t "form.feed.label.ignore_http_cache" }}</label>
<label><input type="checkbox" name="disabled" value="1" {{ if .form.Disabled }}checked{{ end }}> {{ t "form.feed.label.disabled" }}</label>
<div class="buttons">
@ -1542,7 +1543,7 @@ var templateViewsMapChecksums = map[string]string{
"create_category": "6b22b5ce51abf4e225e23a79f81be09a7fb90acb265e93a8faf9446dff74018d",
"create_user": "9b73a55233615e461d1f07d99ad1d4d3b54532588ab960097ba3e090c85aaf3a",
"edit_category": "b1c0b38f1b714c5d884edcd61e5b5295a5f1c8b71c469b35391e4dcc97cc6d36",
"edit_feed": "cc0b5dbb73f81398410958b41771ed38246bc7ae4bd548228f0d48c49a598c2a",
"edit_feed": "635bb8f7b927f3216ccd35cc6d9edd4b2af96e8677a14766a86fb77fa9fb4e38",
"edit_user": "c692db9de1a084c57b93e95a14b041d39bf489846cbb91fc982a62b72b77062a",
"entry": "d8c30d412d58e14c946ba682166f7c582948e7b0f657d04dcbc3d004267627bb",
"feed_entries": "9c70b82f55e4b311eff20be1641733612e3c1b406ce8010861e4c417d97b6dcc",

View file

@ -41,17 +41,18 @@ func (h *handler) showEditFeedPage(w http.ResponseWriter, r *http.Request) {
}
feedForm := form.FeedForm{
SiteURL: feed.SiteURL,
FeedURL: feed.FeedURL,
Title: feed.Title,
ScraperRules: feed.ScraperRules,
RewriteRules: feed.RewriteRules,
Crawler: feed.Crawler,
UserAgent: feed.UserAgent,
CategoryID: feed.Category.ID,
Username: feed.Username,
Password: feed.Password,
Disabled: feed.Disabled,
SiteURL: feed.SiteURL,
FeedURL: feed.FeedURL,
Title: feed.Title,
ScraperRules: feed.ScraperRules,
RewriteRules: feed.RewriteRules,
Crawler: feed.Crawler,
UserAgent: feed.UserAgent,
CategoryID: feed.Category.ID,
Username: feed.Username,
Password: feed.Password,
IgnoreHTTPCache: feed.IgnoreHTTPCache,
Disabled: feed.Disabled,
}
sess := session.New(h.store, request.SessionID(r))

View file

@ -14,17 +14,18 @@ import (
// FeedForm represents a feed form in the UI
type FeedForm struct {
FeedURL string
SiteURL string
Title string
ScraperRules string
RewriteRules string
Crawler bool
UserAgent string
CategoryID int64
Username string
Password string
Disabled bool
FeedURL string
SiteURL string
Title string
ScraperRules string
RewriteRules string
Crawler bool
UserAgent string
CategoryID int64
Username string
Password string
IgnoreHTTPCache bool
Disabled bool
}
// ValidateModification validates FeedForm fields
@ -49,6 +50,7 @@ func (f FeedForm) Merge(feed *model.Feed) *model.Feed {
feed.ParsingErrorMsg = ""
feed.Username = f.Username
feed.Password = f.Password
feed.IgnoreHTTPCache = f.IgnoreHTTPCache
feed.Disabled = f.Disabled
return feed
}
@ -61,16 +63,17 @@ func NewFeedForm(r *http.Request) *FeedForm {
}
return &FeedForm{
FeedURL: r.FormValue("feed_url"),
SiteURL: r.FormValue("site_url"),
Title: r.FormValue("title"),
ScraperRules: r.FormValue("scraper_rules"),
UserAgent: r.FormValue("user_agent"),
RewriteRules: r.FormValue("rewrite_rules"),
Crawler: r.FormValue("crawler") == "1",
CategoryID: int64(categoryID),
Username: r.FormValue("feed_username"),
Password: r.FormValue("feed_password"),
Disabled: r.FormValue("disabled") == "1",
FeedURL: r.FormValue("feed_url"),
SiteURL: r.FormValue("site_url"),
Title: r.FormValue("title"),
ScraperRules: r.FormValue("scraper_rules"),
UserAgent: r.FormValue("user_agent"),
RewriteRules: r.FormValue("rewrite_rules"),
Crawler: r.FormValue("crawler") == "1",
CategoryID: int64(categoryID),
Username: r.FormValue("feed_username"),
Password: r.FormValue("feed_password"),
IgnoreHTTPCache: r.FormValue("ignore_http_cache") == "1",
Disabled: r.FormValue("disabled") == "1",
}
}