Try known urls if no link alternate

I came across a few blogs that didn't have a link rel alternate
but offered a RSS/Atom feed.
This aims at solving this issue for "well known" feed urls, since
these urls are often the same.
This commit is contained in:
Gabriel Augendre 2020-06-16 22:52:20 +02:00 committed by Frédéric Guillot
parent 248cb38390
commit e44b4b2540

View file

@ -43,7 +43,11 @@ func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscr
return subscriptions, nil
}
return parseDocument(response.EffectiveURL, strings.NewReader(body))
subscriptions, err := parseDocument(response.EffectiveURL, strings.NewReader(body))
if err != nil || subscriptions != nil {
return subscriptions, err
}
return tryWellKnownUrls(websiteURL, userAgent, username, password)
}
func parseDocument(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) {
@ -86,3 +90,44 @@ func parseDocument(websiteURL string, data io.Reader) (Subscriptions, *errors.Lo
return subscriptions, nil
}
func tryWellKnownUrls(websiteURL, userAgent, username, password string) (Subscriptions, *errors.LocalizedError) {
var subscriptions Subscriptions
knownURLs := map[string]string{
"/atom.xml": "atom",
"/feed.xml": "atom",
"/feed/": "atom",
"/rss.xml": "rss",
}
lastCharacter := websiteURL[len(websiteURL)-1:]
if lastCharacter == "/" {
websiteURL = websiteURL[:len(websiteURL)-1]
}
for knownURL, kind := range knownURLs {
fullURL, err := url.AbsoluteURL(websiteURL, knownURL)
if err != nil {
continue
}
request := client.New(fullURL)
request.WithCredentials(username, password)
request.WithUserAgent(userAgent)
response, err := request.Get()
if err != nil {
continue
}
if response != nil && response.StatusCode == 200 {
subscription := new(Subscription)
subscription.Type = kind
subscription.Title = fullURL
subscription.URL = fullURL
if subscription.URL != "" {
subscriptions = append(subscriptions, subscription)
}
}
}
return subscriptions, nil
}