2017-11-20 06:10:04 +01:00
|
|
|
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
|
|
|
// Use of this source code is governed by the Apache 2.0
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
2018-08-25 06:51:50 +02:00
|
|
|
package feed // import "miniflux.app/reader/feed"
|
2017-11-20 06:10:04 +01:00
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2017-11-21 02:12:37 +01:00
|
|
|
"time"
|
|
|
|
|
2020-05-25 23:59:15 +02:00
|
|
|
"miniflux.app/config"
|
2018-08-25 06:51:50 +02:00
|
|
|
"miniflux.app/errors"
|
|
|
|
"miniflux.app/http/client"
|
|
|
|
"miniflux.app/locale"
|
|
|
|
"miniflux.app/logger"
|
|
|
|
"miniflux.app/model"
|
2018-10-15 06:43:48 +02:00
|
|
|
"miniflux.app/reader/browser"
|
2018-08-25 06:51:50 +02:00
|
|
|
"miniflux.app/reader/icon"
|
2018-10-14 20:46:41 +02:00
|
|
|
"miniflux.app/reader/parser"
|
2018-12-03 05:51:06 +01:00
|
|
|
"miniflux.app/reader/processor"
|
2018-08-25 06:51:50 +02:00
|
|
|
"miniflux.app/storage"
|
|
|
|
"miniflux.app/timer"
|
2017-11-20 06:10:04 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
2018-02-08 04:10:36 +01:00
|
|
|
errDuplicate = "This feed already exists (%s)"
|
2017-11-25 07:29:20 +01:00
|
|
|
errNotFound = "Feed %d not found"
|
2018-02-08 04:10:36 +01:00
|
|
|
errCategoryNotFound = "Category not found for this user"
|
2017-11-20 06:10:04 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
// Handler contains all the logic to create and refresh feeds.
|
|
|
|
type Handler struct {
|
2019-11-29 20:17:14 +01:00
|
|
|
store *storage.Storage
|
2017-11-20 06:10:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// CreateFeed fetch, parse and store a new feed.
|
2020-09-10 08:28:54 +02:00
|
|
|
func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool, userAgent, username, password, scraperRules, rewriteRules string, fetchViaProxy bool) (*model.Feed, error) {
|
2018-01-03 04:15:08 +01:00
|
|
|
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:CreateFeed] feedUrl=%s", url))
|
2017-11-20 06:10:04 +01:00
|
|
|
|
2017-11-25 07:29:20 +01:00
|
|
|
if !h.store.CategoryExists(userID, categoryID) {
|
|
|
|
return nil, errors.NewLocalizedError(errCategoryNotFound)
|
|
|
|
}
|
|
|
|
|
2018-10-15 06:43:48 +02:00
|
|
|
request := client.New(url)
|
|
|
|
request.WithCredentials(username, password)
|
|
|
|
request.WithUserAgent(userAgent)
|
2020-09-10 08:28:54 +02:00
|
|
|
|
|
|
|
if fetchViaProxy {
|
|
|
|
request.WithProxy()
|
|
|
|
}
|
|
|
|
|
2018-10-15 06:43:48 +02:00
|
|
|
response, requestErr := browser.Exec(request)
|
|
|
|
if requestErr != nil {
|
|
|
|
return nil, requestErr
|
2018-01-05 03:32:36 +01:00
|
|
|
}
|
|
|
|
|
2017-11-20 06:10:04 +01:00
|
|
|
if h.store.FeedURLExists(userID, response.EffectiveURL) {
|
|
|
|
return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
|
|
|
|
}
|
|
|
|
|
2019-12-27 00:26:23 +01:00
|
|
|
subscription, parseErr := parser.ParseFeed(response.BodyAsString())
|
2018-10-15 06:43:48 +02:00
|
|
|
if parseErr != nil {
|
|
|
|
return nil, parseErr
|
2017-11-21 02:12:37 +01:00
|
|
|
}
|
|
|
|
|
2018-10-15 06:43:48 +02:00
|
|
|
subscription.UserID = userID
|
|
|
|
subscription.WithCategoryID(categoryID)
|
2020-09-10 08:28:54 +02:00
|
|
|
subscription.WithBrowsingParameters(crawler, userAgent, username, password, scraperRules, rewriteRules, fetchViaProxy)
|
2018-10-15 06:43:48 +02:00
|
|
|
subscription.WithClientResponse(response)
|
|
|
|
subscription.CheckedNow()
|
2017-11-20 06:10:04 +01:00
|
|
|
|
2018-12-03 05:51:06 +01:00
|
|
|
processor.ProcessFeedEntries(h.store, subscription)
|
2017-12-12 07:16:32 +01:00
|
|
|
|
2018-10-15 06:43:48 +02:00
|
|
|
if storeErr := h.store.CreateFeed(subscription); storeErr != nil {
|
|
|
|
return nil, storeErr
|
2017-11-20 06:10:04 +01:00
|
|
|
}
|
|
|
|
|
2017-12-16 03:55:57 +01:00
|
|
|
logger.Debug("[Handler:CreateFeed] Feed saved with ID: %d", subscription.ID)
|
2017-11-20 06:10:04 +01:00
|
|
|
|
2020-09-10 08:28:54 +02:00
|
|
|
checkFeedIcon(h.store, subscription.ID, subscription.SiteURL, fetchViaProxy)
|
2017-11-20 06:10:04 +01:00
|
|
|
return subscription, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// RefreshFeed fetch and update a feed if necessary.
|
|
|
|
func (h *Handler) RefreshFeed(userID, feedID int64) error {
|
2018-01-03 04:15:08 +01:00
|
|
|
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:RefreshFeed] feedID=%d", feedID))
|
2018-10-15 06:43:48 +02:00
|
|
|
userLanguage := h.store.UserLanguage(userID)
|
2018-09-23 00:04:55 +02:00
|
|
|
printer := locale.NewPrinter(userLanguage)
|
2017-11-20 06:10:04 +01:00
|
|
|
|
2018-10-15 06:43:48 +02:00
|
|
|
originalFeed, storeErr := h.store.FeedByID(userID, feedID)
|
|
|
|
if storeErr != nil {
|
|
|
|
return storeErr
|
2017-11-20 06:10:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if originalFeed == nil {
|
|
|
|
return errors.NewLocalizedError(errNotFound, feedID)
|
|
|
|
}
|
|
|
|
|
2020-05-25 23:59:15 +02:00
|
|
|
weeklyEntryCount := 0
|
|
|
|
if config.Opts.PollingScheduler() == model.SchedulerEntryFrequency {
|
|
|
|
var weeklyCountErr error
|
|
|
|
weeklyEntryCount, weeklyCountErr = h.store.WeeklyFeedEntryCount(userID, feedID)
|
|
|
|
if weeklyCountErr != nil {
|
|
|
|
return weeklyCountErr
|
|
|
|
}
|
2020-05-25 23:06:56 +02:00
|
|
|
}
|
|
|
|
|
2018-10-15 06:43:48 +02:00
|
|
|
originalFeed.CheckedNow()
|
2020-05-25 23:59:15 +02:00
|
|
|
originalFeed.ScheduleNextCheck(weeklyEntryCount)
|
2018-02-09 03:16:54 +01:00
|
|
|
|
2018-10-15 06:43:48 +02:00
|
|
|
request := client.New(originalFeed.FeedURL)
|
|
|
|
request.WithCredentials(originalFeed.Username, originalFeed.Password)
|
|
|
|
request.WithUserAgent(originalFeed.UserAgent)
|
2020-06-06 06:50:59 +02:00
|
|
|
|
|
|
|
if !originalFeed.IgnoreHTTPCache {
|
|
|
|
request.WithCacheHeaders(originalFeed.EtagHeader, originalFeed.LastModifiedHeader)
|
|
|
|
}
|
|
|
|
|
2020-09-10 08:28:54 +02:00
|
|
|
if originalFeed.FetchViaProxy {
|
|
|
|
request.WithProxy()
|
|
|
|
}
|
|
|
|
|
2018-10-15 06:43:48 +02:00
|
|
|
response, requestErr := browser.Exec(request)
|
|
|
|
if requestErr != nil {
|
|
|
|
originalFeed.WithError(requestErr.Localize(printer))
|
2018-12-15 22:04:38 +01:00
|
|
|
h.store.UpdateFeedError(originalFeed)
|
2018-10-15 06:43:48 +02:00
|
|
|
return requestErr
|
2017-11-20 06:10:04 +01:00
|
|
|
}
|
|
|
|
|
2020-09-21 08:29:51 +02:00
|
|
|
if h.store.AnotherFeedURLExists(userID, originalFeed.ID, response.EffectiveURL) {
|
|
|
|
storeErr := errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
|
|
|
|
originalFeed.WithError(storeErr.Error())
|
|
|
|
h.store.UpdateFeedError(originalFeed)
|
|
|
|
return storeErr
|
|
|
|
}
|
|
|
|
|
2020-06-06 06:50:59 +02:00
|
|
|
if originalFeed.IgnoreHTTPCache || response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
|
2017-12-16 03:55:57 +01:00
|
|
|
logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID)
|
2018-01-05 03:32:36 +01:00
|
|
|
|
2019-12-27 00:26:23 +01:00
|
|
|
updatedFeed, parseErr := parser.ParseFeed(response.BodyAsString())
|
2018-02-28 06:08:32 +01:00
|
|
|
if parseErr != nil {
|
2018-10-15 06:43:48 +02:00
|
|
|
originalFeed.WithError(parseErr.Localize(printer))
|
2018-12-15 22:04:38 +01:00
|
|
|
h.store.UpdateFeedError(originalFeed)
|
2018-10-15 06:43:48 +02:00
|
|
|
return parseErr
|
2017-11-20 06:10:04 +01:00
|
|
|
}
|
|
|
|
|
2018-10-15 07:33:19 +02:00
|
|
|
originalFeed.Entries = updatedFeed.Entries
|
2018-12-03 05:51:06 +01:00
|
|
|
processor.ProcessFeedEntries(h.store, originalFeed)
|
2017-12-12 07:16:32 +01:00
|
|
|
|
2018-10-15 07:33:19 +02:00
|
|
|
// We don't update existing entries when the crawler is enabled (we crawl only inexisting entries).
|
2020-09-21 08:01:01 +02:00
|
|
|
if storeErr := h.store.RefreshFeedEntries(originalFeed.UserID, originalFeed.ID, originalFeed.Entries, !originalFeed.Crawler); storeErr != nil {
|
2018-10-21 20:44:29 +02:00
|
|
|
originalFeed.WithError(storeErr.Error())
|
2018-12-15 22:04:38 +01:00
|
|
|
h.store.UpdateFeedError(originalFeed)
|
2018-10-15 06:43:48 +02:00
|
|
|
return storeErr
|
2017-11-20 06:10:04 +01:00
|
|
|
}
|
|
|
|
|
2018-10-15 06:43:48 +02:00
|
|
|
// We update caching headers only if the feed has been modified,
|
|
|
|
// because some websites don't return the same headers when replying with a 304.
|
|
|
|
originalFeed.WithClientResponse(response)
|
2020-09-10 08:28:54 +02:00
|
|
|
checkFeedIcon(h.store, originalFeed.ID, originalFeed.SiteURL, originalFeed.FetchViaProxy)
|
2017-11-20 06:10:04 +01:00
|
|
|
} else {
|
2017-12-16 03:55:57 +01:00
|
|
|
logger.Debug("[Handler:RefreshFeed] Feed #%d not modified", feedID)
|
2017-11-20 06:10:04 +01:00
|
|
|
}
|
|
|
|
|
2018-10-15 06:43:48 +02:00
|
|
|
originalFeed.ResetErrorCounter()
|
2018-12-15 22:04:38 +01:00
|
|
|
|
|
|
|
if storeErr := h.store.UpdateFeed(originalFeed); storeErr != nil {
|
|
|
|
originalFeed.WithError(storeErr.Error())
|
|
|
|
h.store.UpdateFeedError(originalFeed)
|
|
|
|
return storeErr
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2017-11-20 06:10:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewFeedHandler returns a feed handler.
|
2018-09-23 00:04:55 +02:00
|
|
|
func NewFeedHandler(store *storage.Storage) *Handler {
|
|
|
|
return &Handler{store}
|
2017-11-20 06:10:04 +01:00
|
|
|
}
|
2018-10-15 06:43:48 +02:00
|
|
|
|
2020-09-10 08:28:54 +02:00
|
|
|
func checkFeedIcon(store *storage.Storage, feedID int64, websiteURL string, fetchViaProxy bool) {
|
2018-10-15 06:43:48 +02:00
|
|
|
if !store.HasIcon(feedID) {
|
2020-09-10 08:28:54 +02:00
|
|
|
icon, err := icon.FindIcon(websiteURL, fetchViaProxy)
|
2018-10-15 06:43:48 +02:00
|
|
|
if err != nil {
|
2018-12-15 22:04:38 +01:00
|
|
|
logger.Debug("CheckFeedIcon: %v (feedID=%d websiteURL=%s)", err, feedID, websiteURL)
|
2018-10-15 06:43:48 +02:00
|
|
|
} else if icon == nil {
|
|
|
|
logger.Debug("CheckFeedIcon: No icon found (feedID=%d websiteURL=%s)", feedID, websiteURL)
|
|
|
|
} else {
|
|
|
|
if err := store.CreateFeedIcon(feedID, icon); err != nil {
|
2018-12-15 22:04:38 +01:00
|
|
|
logger.Debug("CheckFeedIcon: %v (feedID=%d websiteURL=%s)", err, feedID, websiteURL)
|
2018-10-15 06:43:48 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-11-29 20:17:14 +01:00
|
|
|
}
|