Allow only absolute URLs in comments URL
Some feeds are using invalid URLs (random text).
This commit is contained in:
parent
8cebd985a2
commit
bf632fad2e
6 changed files with 105 additions and 2 deletions
|
@ -84,7 +84,7 @@ func (a *atom10Entry) Transform() *model.Entry {
|
|||
entry.Content = a.entryContent()
|
||||
entry.Title = a.entryTitle()
|
||||
entry.Enclosures = a.entryEnclosures()
|
||||
entry.CommentsURL = a.Links.firstLinkWithRelationAndType("replies", "text/html")
|
||||
entry.CommentsURL = a.entryCommentsURL()
|
||||
return entry
|
||||
}
|
||||
|
||||
|
@ -194,6 +194,15 @@ func (a *atom10Entry) entryEnclosures() model.EnclosureList {
|
|||
return enclosures
|
||||
}
|
||||
|
||||
// See https://tools.ietf.org/html/rfc4685#section-3
|
||||
func (a *atom10Entry) entryCommentsURL() string {
|
||||
commentsURL := a.Links.firstLinkWithRelationAndType("replies", "text/html")
|
||||
if url.IsAbsoluteURL(commentsURL) {
|
||||
return commentsURL
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type atom10Text struct {
|
||||
Type string `xml:"type,attr"`
|
||||
Data string `xml:",chardata"`
|
||||
|
|
|
@ -777,3 +777,43 @@ func TestParseRepliesLinkRelation(t *testing.T) {
|
|||
t.Errorf("Incorrect entry comments URL, got: %s", feed.Entries[0].CommentsURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAbsoluteCommentsURL(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom"
|
||||
xmlns:thr="http://purl.org/syndication/thread/1.0">
|
||||
<id>http://www.example.org/myfeed</id>
|
||||
<title>My Example Feed</title>
|
||||
<updated>2005-07-28T12:00:00Z</updated>
|
||||
<link href="http://www.example.org/myfeed" />
|
||||
<author><name>James</name></author>
|
||||
<entry>
|
||||
<id>tag:entries.com,2005:1</id>
|
||||
<title>My original entry</title>
|
||||
<updated>2006-03-01T12:12:12Z</updated>
|
||||
<link href="http://www.example.org/entries/1" />
|
||||
<link rel="replies"
|
||||
type="text/html"
|
||||
href="invalid url"
|
||||
thr:count="10" thr:updated="2005-07-28T12:10:00Z" />
|
||||
<summary>This is my original entry</summary>
|
||||
</entry>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://www.example.org/entries/1" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].CommentsURL != "" {
|
||||
t.Errorf("Incorrect entry comments URL, got: %s", feed.Entries[0].CommentsURL)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -837,6 +837,31 @@ func TestParseEntryWithCommentsURL(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithInvalidCommentsURL(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<title>Item 1</title>
|
||||
<link>https://example.org/item1</link>
|
||||
<comments>
|
||||
Some text
|
||||
</comments>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].CommentsURL != "" {
|
||||
t.Errorf("Incorrect entry comments URL, got: %q", feed.Entries[0].CommentsURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidXml(t *testing.T) {
|
||||
data := `garbage`
|
||||
_, err := Parse(bytes.NewBufferString(data))
|
||||
|
|
|
@ -317,7 +317,12 @@ func (r *rssItem) entryEnclosures() model.EnclosureList {
|
|||
func (r *rssItem) entryCommentsURL() string {
|
||||
for _, commentLink := range r.CommentLinks {
|
||||
if commentLink.XMLName.Space == "" {
|
||||
return strings.TrimSpace(commentLink.Data)
|
||||
commentsURL := strings.TrimSpace(commentLink.Data)
|
||||
// The comments URL is supposed to be absolute (some feeds publishes incorrect comments URL)
|
||||
// See https://cyber.harvard.edu/rss/rss.html#ltcommentsgtSubelementOfLtitemgt
|
||||
if url.IsAbsoluteURL(commentsURL) {
|
||||
return commentsURL
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -11,6 +11,15 @@ import (
|
|||
"strings"
|
||||
)
|
||||
|
||||
// IsAbsoluteURL returns true if the link is absolute.
|
||||
func IsAbsoluteURL(link string) bool {
|
||||
u, err := url.Parse(link)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return u.IsAbs()
|
||||
}
|
||||
|
||||
// AbsoluteURL converts the input URL as absolute URL if necessary.
|
||||
func AbsoluteURL(baseURL, input string) (string, error) {
|
||||
if strings.HasPrefix(input, "//") {
|
||||
|
|
|
@ -6,6 +6,21 @@ package url // import "miniflux.app/url"
|
|||
|
||||
import "testing"
|
||||
|
||||
func TestIsAbsoluteURL(t *testing.T) {
|
||||
scenarios := map[string]bool{
|
||||
"https://example.org/file.pdf": true,
|
||||
"magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7": true,
|
||||
"invalid url": false,
|
||||
}
|
||||
|
||||
for input, expected := range scenarios {
|
||||
actual := IsAbsoluteURL(input)
|
||||
if actual != expected {
|
||||
t.Errorf(`Unexpected result, got %v instead of %v for %q`, actual, expected, input)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAbsoluteURL(t *testing.T) {
|
||||
scenarios := [][]string{
|
||||
[]string{"https://example.org/path/file.ext", "https://example.org/folder/", "/path/file.ext"},
|
||||
|
|
Loading…
Reference in a new issue