2017-11-20 06:10:04 +01:00
|
|
|
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
|
|
|
// Use of this source code is governed by the Apache 2.0
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package feed
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2017-11-21 02:12:37 +01:00
|
|
|
"time"
|
|
|
|
|
2017-12-13 06:48:13 +01:00
|
|
|
"github.com/miniflux/miniflux/errors"
|
|
|
|
"github.com/miniflux/miniflux/http"
|
2017-12-16 03:55:57 +01:00
|
|
|
"github.com/miniflux/miniflux/logger"
|
2017-12-13 06:48:13 +01:00
|
|
|
"github.com/miniflux/miniflux/model"
|
|
|
|
"github.com/miniflux/miniflux/reader/icon"
|
|
|
|
"github.com/miniflux/miniflux/reader/processor"
|
|
|
|
"github.com/miniflux/miniflux/storage"
|
2018-01-03 04:15:08 +01:00
|
|
|
"github.com/miniflux/miniflux/timer"
|
2017-11-20 06:10:04 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
2017-11-25 07:29:20 +01:00
|
|
|
errRequestFailed = "Unable to execute request: %v"
|
2018-02-08 04:10:36 +01:00
|
|
|
errServerFailure = "Unable to fetch feed (statusCode=%d)"
|
|
|
|
errDuplicate = "This feed already exists (%s)"
|
2017-11-25 07:29:20 +01:00
|
|
|
errNotFound = "Feed %d not found"
|
2018-02-08 04:10:36 +01:00
|
|
|
errEncoding = "Unable to normalize encoding: %v"
|
|
|
|
errCategoryNotFound = "Category not found for this user"
|
2018-01-05 03:32:36 +01:00
|
|
|
errEmptyFeed = "This feed is empty"
|
2017-11-20 06:10:04 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
// Handler contains all the logic to create and refresh feeds.
|
|
|
|
type Handler struct {
|
|
|
|
store *storage.Storage
|
|
|
|
}
|
|
|
|
|
|
|
|
// CreateFeed fetch, parse and store a new feed.
|
2017-12-13 04:19:36 +01:00
|
|
|
func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool) (*model.Feed, error) {
|
2018-01-03 04:15:08 +01:00
|
|
|
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:CreateFeed] feedUrl=%s", url))
|
2017-11-20 06:10:04 +01:00
|
|
|
|
2017-11-25 07:29:20 +01:00
|
|
|
if !h.store.CategoryExists(userID, categoryID) {
|
|
|
|
return nil, errors.NewLocalizedError(errCategoryNotFound)
|
|
|
|
}
|
|
|
|
|
2017-11-21 02:12:37 +01:00
|
|
|
client := http.NewClient(url)
|
2017-11-20 06:10:04 +01:00
|
|
|
response, err := client.Get()
|
|
|
|
if err != nil {
|
2018-02-09 03:16:54 +01:00
|
|
|
if _, ok := err.(errors.LocalizedError); ok {
|
|
|
|
return nil, err
|
|
|
|
}
|
2017-11-20 06:10:04 +01:00
|
|
|
return nil, errors.NewLocalizedError(errRequestFailed, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if response.HasServerFailure() {
|
|
|
|
return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode)
|
|
|
|
}
|
|
|
|
|
2018-01-05 03:32:36 +01:00
|
|
|
// Content-Length = -1 when no Content-Length header is sent
|
|
|
|
if response.ContentLength == 0 {
|
|
|
|
return nil, errors.NewLocalizedError(errEmptyFeed)
|
|
|
|
}
|
|
|
|
|
2017-11-20 06:10:04 +01:00
|
|
|
if h.store.FeedURLExists(userID, response.EffectiveURL) {
|
|
|
|
return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
|
|
|
|
}
|
|
|
|
|
2017-11-21 02:12:37 +01:00
|
|
|
body, err := response.NormalizeBodyEncoding()
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.NewLocalizedError(errEncoding, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
subscription, err := parseFeed(body)
|
2017-11-20 06:10:04 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2018-01-20 03:43:27 +01:00
|
|
|
feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription)
|
2017-12-13 04:19:36 +01:00
|
|
|
feedProcessor.WithCrawler(crawler)
|
2017-12-12 07:16:32 +01:00
|
|
|
feedProcessor.Process()
|
|
|
|
|
2017-11-20 06:10:04 +01:00
|
|
|
subscription.Category = &model.Category{ID: categoryID}
|
|
|
|
subscription.EtagHeader = response.ETag
|
|
|
|
subscription.LastModifiedHeader = response.LastModified
|
|
|
|
subscription.FeedURL = response.EffectiveURL
|
|
|
|
subscription.UserID = userID
|
2017-12-13 04:19:36 +01:00
|
|
|
subscription.Crawler = crawler
|
2017-11-20 06:10:04 +01:00
|
|
|
|
2018-01-04 03:23:21 +01:00
|
|
|
if subscription.SiteURL == "" {
|
|
|
|
subscription.SiteURL = subscription.FeedURL
|
|
|
|
}
|
|
|
|
|
2017-11-20 06:10:04 +01:00
|
|
|
err = h.store.CreateFeed(subscription)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2017-12-16 03:55:57 +01:00
|
|
|
logger.Debug("[Handler:CreateFeed] Feed saved with ID: %d", subscription.ID)
|
2017-11-20 06:10:04 +01:00
|
|
|
|
|
|
|
icon, err := icon.FindIcon(subscription.SiteURL)
|
|
|
|
if err != nil {
|
2017-12-16 03:55:57 +01:00
|
|
|
logger.Error("[Handler:CreateFeed] %v", err)
|
2017-11-20 06:10:04 +01:00
|
|
|
} else if icon == nil {
|
2017-12-16 03:55:57 +01:00
|
|
|
logger.Info("No icon found for feedID=%d", subscription.ID)
|
2017-11-20 06:10:04 +01:00
|
|
|
} else {
|
|
|
|
h.store.CreateFeedIcon(subscription, icon)
|
|
|
|
}
|
|
|
|
|
|
|
|
return subscription, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// RefreshFeed fetch and update a feed if necessary.
|
|
|
|
func (h *Handler) RefreshFeed(userID, feedID int64) error {
|
2018-01-03 04:15:08 +01:00
|
|
|
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:RefreshFeed] feedID=%d", feedID))
|
2017-11-20 06:10:04 +01:00
|
|
|
|
2017-11-28 06:30:04 +01:00
|
|
|
originalFeed, err := h.store.FeedByID(userID, feedID)
|
2017-11-20 06:10:04 +01:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if originalFeed == nil {
|
|
|
|
return errors.NewLocalizedError(errNotFound, feedID)
|
|
|
|
}
|
|
|
|
|
2017-11-21 02:12:37 +01:00
|
|
|
client := http.NewClientWithCacheHeaders(originalFeed.FeedURL, originalFeed.EtagHeader, originalFeed.LastModifiedHeader)
|
2017-11-20 06:10:04 +01:00
|
|
|
response, err := client.Get()
|
|
|
|
if err != nil {
|
2018-02-09 03:16:54 +01:00
|
|
|
var customErr errors.LocalizedError
|
|
|
|
if lerr, ok := err.(errors.LocalizedError); ok {
|
|
|
|
customErr = lerr
|
|
|
|
} else {
|
|
|
|
customErr = errors.NewLocalizedError(errRequestFailed, err)
|
|
|
|
}
|
|
|
|
|
2017-11-20 06:10:04 +01:00
|
|
|
originalFeed.ParsingErrorCount++
|
|
|
|
originalFeed.ParsingErrorMsg = customErr.Error()
|
|
|
|
h.store.UpdateFeed(originalFeed)
|
|
|
|
return customErr
|
|
|
|
}
|
|
|
|
|
|
|
|
originalFeed.CheckedAt = time.Now()
|
|
|
|
|
|
|
|
if response.HasServerFailure() {
|
|
|
|
err := errors.NewLocalizedError(errServerFailure, response.StatusCode)
|
|
|
|
originalFeed.ParsingErrorCount++
|
|
|
|
originalFeed.ParsingErrorMsg = err.Error()
|
|
|
|
h.store.UpdateFeed(originalFeed)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
|
2017-12-16 03:55:57 +01:00
|
|
|
logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID)
|
2018-01-05 03:32:36 +01:00
|
|
|
|
|
|
|
// Content-Length = -1 when no Content-Length header is sent
|
|
|
|
if response.ContentLength == 0 {
|
|
|
|
err := errors.NewLocalizedError(errEmptyFeed)
|
|
|
|
originalFeed.ParsingErrorCount++
|
|
|
|
originalFeed.ParsingErrorMsg = err.Error()
|
|
|
|
h.store.UpdateFeed(originalFeed)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-11-21 02:12:37 +01:00
|
|
|
body, err := response.NormalizeBodyEncoding()
|
|
|
|
if err != nil {
|
|
|
|
return errors.NewLocalizedError(errEncoding, err)
|
|
|
|
}
|
2017-11-20 06:10:04 +01:00
|
|
|
|
2017-11-21 02:12:37 +01:00
|
|
|
subscription, err := parseFeed(body)
|
2017-11-20 06:10:04 +01:00
|
|
|
if err != nil {
|
|
|
|
originalFeed.ParsingErrorCount++
|
|
|
|
originalFeed.ParsingErrorMsg = err.Error()
|
|
|
|
h.store.UpdateFeed(originalFeed)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2018-01-20 03:43:27 +01:00
|
|
|
feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription)
|
2017-12-12 07:16:32 +01:00
|
|
|
feedProcessor.WithScraperRules(originalFeed.ScraperRules)
|
|
|
|
feedProcessor.WithRewriteRules(originalFeed.RewriteRules)
|
2017-12-13 04:19:36 +01:00
|
|
|
feedProcessor.WithCrawler(originalFeed.Crawler)
|
2017-12-12 07:16:32 +01:00
|
|
|
feedProcessor.Process()
|
|
|
|
|
2017-11-20 06:10:04 +01:00
|
|
|
originalFeed.EtagHeader = response.ETag
|
|
|
|
originalFeed.LastModifiedHeader = response.LastModified
|
|
|
|
|
2018-01-20 23:04:19 +01:00
|
|
|
// Note: We don't update existing entries when the crawler is enabled (we crawl only inexisting entries).
|
|
|
|
if err := h.store.UpdateEntries(originalFeed.UserID, originalFeed.ID, subscription.Entries, !originalFeed.Crawler); err != nil {
|
2017-11-20 06:10:04 +01:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if !h.store.HasIcon(originalFeed.ID) {
|
2017-12-16 03:55:57 +01:00
|
|
|
logger.Debug("[Handler:RefreshFeed] Looking for feed icon")
|
2017-11-20 06:10:04 +01:00
|
|
|
icon, err := icon.FindIcon(originalFeed.SiteURL)
|
|
|
|
if err != nil {
|
2018-02-01 06:57:20 +01:00
|
|
|
logger.Debug("[Handler:RefreshFeed] %v", err)
|
2017-11-20 06:10:04 +01:00
|
|
|
} else {
|
|
|
|
h.store.CreateFeedIcon(originalFeed, icon)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
2017-12-16 03:55:57 +01:00
|
|
|
logger.Debug("[Handler:RefreshFeed] Feed #%d not modified", feedID)
|
2017-11-20 06:10:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
originalFeed.ParsingErrorCount = 0
|
|
|
|
originalFeed.ParsingErrorMsg = ""
|
|
|
|
|
2018-01-04 03:23:21 +01:00
|
|
|
if originalFeed.SiteURL == "" {
|
|
|
|
originalFeed.SiteURL = originalFeed.FeedURL
|
|
|
|
}
|
|
|
|
|
2017-11-20 06:10:04 +01:00
|
|
|
return h.store.UpdateFeed(originalFeed)
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewFeedHandler returns a feed handler.
|
|
|
|
func NewFeedHandler(store *storage.Storage) *Handler {
|
|
|
|
return &Handler{store: store}
|
|
|
|
}
|