Make sure that item URL are absolute

This commit is contained in:
Frédéric Guillot 2017-12-13 20:16:15 -08:00
parent 84d912c979
commit 827683ab59
8 changed files with 129 additions and 11 deletions

View file

@ -14,6 +14,7 @@ import (
"github.com/miniflux/miniflux/helper"
"github.com/miniflux/miniflux/model"
"github.com/miniflux/miniflux/reader/date"
"github.com/miniflux/miniflux/url"
)
type atomFeed struct {
@ -70,10 +71,19 @@ func (a *atomFeed) Transform() *model.Feed {
for _, entry := range a.Entries {
item := entry.Transform()
entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL)
if err == nil {
item.URL = entryURL
}
if item.Author == "" {
item.Author = getAuthor(a.Author)
}
if item.Title == "" {
item.Title = item.URL
}
feed.Entries = append(feed.Entries, item)
}
@ -89,11 +99,6 @@ func (a *atomEntry) Transform() *model.Entry {
entry.Content = getContent(a)
entry.Title = strings.TrimSpace(a.Title)
entry.Enclosures = getEnclosures(a)
if entry.Title == "" {
entry.Title = entry.URL
}
return entry
}

View file

@ -152,6 +152,32 @@ func TestParseFeedURL(t *testing.T) {
}
}
func TestParseEntryWithRelativeURL(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Example Feed</title>
<link href="http://example.org/"/>
<entry>
<title>Test</title>
<link href="something.html"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2003-12-13T18:30:02Z</updated>
<summary>Some text.</summary>
</entry>
</feed>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if feed.Entries[0].URL != "http://example.org/something.html" {
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
}
}
func TestParseEntryTitleWithWhitespaces(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">

View file

@ -13,6 +13,7 @@ import (
"github.com/miniflux/miniflux/model"
"github.com/miniflux/miniflux/reader/date"
"github.com/miniflux/miniflux/reader/sanitizer"
"github.com/miniflux/miniflux/url"
)
type jsonFeed struct {
@ -66,6 +67,11 @@ func (j *jsonFeed) Transform() *model.Feed {
for _, item := range j.Items {
entry := item.Transform()
entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL)
if err == nil {
entry.URL = entryURL
}
if entry.Author == "" {
entry.Author = j.GetAuthor()
}

View file

@ -174,6 +174,31 @@ func TestParsePodcast(t *testing.T) {
}
}
func TestParseFeedWithRelativeURL(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "Example",
"home_page_url": "https://example.org/",
"feed_url": "https://example.org/feed.json",
"items": [
{
"id": "2347259",
"url": "something.html",
"date_published": "2016-02-09T14:22:00-07:00"
}
]
}`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if feed.Entries[0].URL != "https://example.org/something.html" {
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
}
}
func TestParseAuthor(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",

View file

@ -266,6 +266,31 @@ func TestParseItemWithOnlyFeedAuthor(t *testing.T) {
}
}
func TestParseItemRelativeURL(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
<channel>
<title>Example</title>
<link>http://example.org</link>
</channel>
<item>
<title>Title</title>
<description>Test</description>
<link>something.html</link>
</item>
</rdf:RDF>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if feed.Entries[0].URL != "http://example.org/something.html" {
t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL)
}
}
func TestParseItemWithoutLink(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>

View file

@ -12,6 +12,7 @@ import (
"github.com/miniflux/miniflux/helper"
"github.com/miniflux/miniflux/model"
"github.com/miniflux/miniflux/reader/sanitizer"
"github.com/miniflux/miniflux/url"
)
type rdfFeed struct {
@ -29,13 +30,17 @@ func (r *rdfFeed) Transform() *model.Feed {
for _, item := range r.Items {
entry := item.Transform()
if entry.Author == "" && r.Creator != "" {
entry.Author = sanitizer.StripTags(r.Creator)
}
if entry.URL == "" {
entry.URL = feed.SiteURL
} else {
entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL)
if err == nil {
entry.URL = entryURL
}
}
feed.Entries = append(feed.Entries, entry)

View file

@ -537,6 +537,27 @@ func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) {
}
}
func TestParseEntryWithRelativeURL(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">
<channel>
<link>https://example.org/</link>
<item>
<link>item.html</link>
</item>
</channel>
</rss>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if feed.Entries[0].Title != "https://example.org/item.html" {
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
}
}
func TestParseInvalidXml(t *testing.T) {
data := `garbage`
_, err := Parse(bytes.NewBufferString(data))

View file

@ -15,6 +15,7 @@ import (
"github.com/miniflux/miniflux/helper"
"github.com/miniflux/miniflux/model"
"github.com/miniflux/miniflux/reader/date"
"github.com/miniflux/miniflux/url"
)
type rssFeed struct {
@ -103,6 +104,15 @@ func (r *rssFeed) Transform() *model.Feed {
if entry.URL == "" {
entry.URL = feed.SiteURL
} else {
entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL)
if err == nil {
entry.URL = entryURL
}
}
if entry.Title == "" {
entry.Title = entry.URL
}
feed.Entries = append(feed.Entries, entry)
@ -213,11 +223,6 @@ func (r *rssItem) Transform() *model.Entry {
entry.Content = r.GetContent()
entry.Title = strings.TrimSpace(r.Title)
entry.Enclosures = r.GetEnclosures()
if entry.Title == "" {
entry.Title = entry.URL
}
return entry
}