2017-11-20 06:10:04 +01:00
|
|
|
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
|
|
|
// Use of this source code is governed by the Apache 2.0
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package subscription
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"fmt"
|
2017-11-21 02:12:37 +01:00
|
|
|
"io"
|
|
|
|
"time"
|
|
|
|
|
2017-12-13 06:48:13 +01:00
|
|
|
"github.com/miniflux/miniflux/errors"
|
2018-04-28 19:51:07 +02:00
|
|
|
"github.com/miniflux/miniflux/http/client"
|
2017-12-16 03:55:57 +01:00
|
|
|
"github.com/miniflux/miniflux/logger"
|
2017-12-13 06:48:13 +01:00
|
|
|
"github.com/miniflux/miniflux/reader/feed"
|
2018-01-03 04:15:08 +01:00
|
|
|
"github.com/miniflux/miniflux/timer"
|
2017-12-13 06:48:13 +01:00
|
|
|
"github.com/miniflux/miniflux/url"
|
2017-11-20 06:10:04 +01:00
|
|
|
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
errConnectionFailure = "Unable to open this link: %v"
|
|
|
|
errUnreadableDoc = "Unable to analyze this page: %v"
|
2018-02-08 03:47:47 +01:00
|
|
|
errEmptyBody = "This web page is empty"
|
2018-06-30 21:42:12 +02:00
|
|
|
errNotAuthorized = "You are not authorized to access this resource (invalid username/password)"
|
|
|
|
errServerFailure = "Unable to fetch this resource (Status Code = %d)"
|
2017-11-20 06:10:04 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
// FindSubscriptions downloads and try to find one or more subscriptions from an URL.
|
2018-06-20 07:58:29 +02:00
|
|
|
func FindSubscriptions(websiteURL, username, password string) (Subscriptions, error) {
|
2018-01-03 04:15:08 +01:00
|
|
|
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[FindSubscriptions] url=%s", websiteURL))
|
2017-11-20 06:10:04 +01:00
|
|
|
|
2018-04-28 19:51:07 +02:00
|
|
|
clt := client.New(websiteURL)
|
2018-06-20 07:58:29 +02:00
|
|
|
clt.WithCredentials(username, password)
|
2018-04-28 19:51:07 +02:00
|
|
|
response, err := clt.Get()
|
2017-11-20 06:10:04 +01:00
|
|
|
if err != nil {
|
2018-02-09 03:16:54 +01:00
|
|
|
if _, ok := err.(errors.LocalizedError); ok {
|
|
|
|
return nil, err
|
|
|
|
}
|
2017-11-20 06:10:04 +01:00
|
|
|
return nil, errors.NewLocalizedError(errConnectionFailure, err)
|
|
|
|
}
|
|
|
|
|
2018-06-30 21:42:12 +02:00
|
|
|
if response.IsNotAuthorized() {
|
|
|
|
return nil, errors.NewLocalizedError(errNotAuthorized)
|
|
|
|
}
|
|
|
|
|
|
|
|
if response.HasServerFailure() {
|
|
|
|
return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode)
|
|
|
|
}
|
|
|
|
|
2018-02-08 03:47:47 +01:00
|
|
|
// Content-Length = -1 when no Content-Length header is sent
|
|
|
|
if response.ContentLength == 0 {
|
|
|
|
return nil, errors.NewLocalizedError(errEmptyBody)
|
|
|
|
}
|
|
|
|
|
2018-01-20 07:42:55 +01:00
|
|
|
body, err := response.NormalizeBodyEncoding()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2017-11-20 06:10:04 +01:00
|
|
|
var buffer bytes.Buffer
|
2018-02-08 03:47:47 +01:00
|
|
|
size, _ := io.Copy(&buffer, body)
|
|
|
|
if size == 0 {
|
|
|
|
return nil, errors.NewLocalizedError(errEmptyBody)
|
|
|
|
}
|
|
|
|
|
2017-11-20 06:10:04 +01:00
|
|
|
reader := bytes.NewReader(buffer.Bytes())
|
|
|
|
|
|
|
|
if format := feed.DetectFeedFormat(reader); format != feed.FormatUnknown {
|
|
|
|
var subscriptions Subscriptions
|
|
|
|
subscriptions = append(subscriptions, &Subscription{
|
|
|
|
Title: response.EffectiveURL,
|
|
|
|
URL: response.EffectiveURL,
|
|
|
|
Type: format,
|
|
|
|
})
|
|
|
|
|
|
|
|
return subscriptions, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
reader.Seek(0, io.SeekStart)
|
|
|
|
return parseDocument(response.EffectiveURL, bytes.NewReader(buffer.Bytes()))
|
|
|
|
}
|
|
|
|
|
|
|
|
func parseDocument(websiteURL string, data io.Reader) (Subscriptions, error) {
|
|
|
|
var subscriptions Subscriptions
|
|
|
|
queries := map[string]string{
|
|
|
|
"link[type='application/rss+xml']": "rss",
|
|
|
|
"link[type='application/atom+xml']": "atom",
|
|
|
|
"link[type='application/json']": "json",
|
|
|
|
}
|
|
|
|
|
|
|
|
doc, err := goquery.NewDocumentFromReader(data)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.NewLocalizedError(errUnreadableDoc, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
for query, kind := range queries {
|
|
|
|
doc.Find(query).Each(func(i int, s *goquery.Selection) {
|
|
|
|
subscription := new(Subscription)
|
|
|
|
subscription.Type = kind
|
|
|
|
|
|
|
|
if title, exists := s.Attr("title"); exists {
|
|
|
|
subscription.Title = title
|
|
|
|
} else {
|
|
|
|
subscription.Title = "Feed"
|
|
|
|
}
|
|
|
|
|
|
|
|
if feedURL, exists := s.Attr("href"); exists {
|
2017-12-02 07:29:18 +01:00
|
|
|
subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL)
|
2017-11-20 06:10:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if subscription.Title == "" {
|
|
|
|
subscription.Title = subscription.URL
|
|
|
|
}
|
|
|
|
|
|
|
|
if subscription.URL != "" {
|
2017-12-16 03:55:57 +01:00
|
|
|
logger.Debug("[FindSubscriptions] %s", subscription)
|
2017-11-20 06:10:04 +01:00
|
|
|
subscriptions = append(subscriptions, subscription)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
return subscriptions, nil
|
|
|
|
}
|