diff --git a/internal/reader/media/media.go b/internal/reader/media/media.go
index 4d9c3661..df84bf03 100644
--- a/internal/reader/media/media.go
+++ b/internal/reader/media/media.go
@@ -12,6 +12,7 @@ import (
var textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)
// Element represents XML media elements.
+// Specs: https://www.rssboard.org/media-rss
type Element struct {
MediaGroups []Group `xml:"http://search.yahoo.com/mrss/ group"`
MediaContents []Content `xml:"http://search.yahoo.com/mrss/ content"`
diff --git a/internal/reader/rss/atom.go b/internal/reader/rss/atom.go
new file mode 100644
index 00000000..e0d66910
--- /dev/null
+++ b/internal/reader/rss/atom.go
@@ -0,0 +1,43 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package rss // import "miniflux.app/v2/internal/reader/rss"
+
+import "strings"
+
+type AtomAuthor struct {
+ Author AtomPerson `xml:"http://www.w3.org/2005/Atom author"`
+}
+
+func (a *AtomAuthor) String() string {
+ return a.Author.String()
+}
+
+type AtomPerson struct {
+ Name string `xml:"name"`
+ Email string `xml:"email"`
+}
+
+func (a *AtomPerson) String() string {
+ var name string
+
+ switch {
+ case a.Name != "":
+ name = a.Name
+ case a.Email != "":
+ name = a.Email
+ }
+
+ return strings.TrimSpace(name)
+}
+
+type AtomLink struct {
+ URL string `xml:"href,attr"`
+ Type string `xml:"type,attr"`
+ Rel string `xml:"rel,attr"`
+ Length string `xml:"length,attr"`
+}
+
+type AtomLinks struct {
+ Links []*AtomLink `xml:"http://www.w3.org/2005/Atom link"`
+}
diff --git a/internal/reader/rss/parser.go b/internal/reader/rss/parser.go
index a8390dc6..55122ea4 100644
--- a/internal/reader/rss/parser.go
+++ b/internal/reader/rss/parser.go
@@ -14,7 +14,9 @@ import (
// Parse returns a normalized feed struct from a RSS feed.
func Parse(baseURL string, data io.ReadSeeker) (*model.Feed, error) {
feed := new(rssFeed)
- if err := xml.NewXMLDecoder(data).Decode(feed); err != nil {
+ decoder := xml.NewXMLDecoder(data)
+ decoder.DefaultSpace = "rss"
+ if err := decoder.Decode(feed); err != nil {
return nil, fmt.Errorf("rss: unable to parse feed: %w", err)
}
return feed.Transform(baseURL), nil
diff --git a/internal/reader/rss/parser_test.go b/internal/reader/rss/parser_test.go
index b3a46719..a8fbc76f 100644
--- a/internal/reader/rss/parser_test.go
+++ b/internal/reader/rss/parser_test.go
@@ -300,7 +300,7 @@ func TestParseEntryWithMultipleAtomLinks(t *testing.T) {
-
Test
-
+
`
@@ -430,7 +430,7 @@ func TestParseEntryWithAuthorAndCDATA(t *testing.T) {
Test
https://example.org/item
- by
+
@@ -447,38 +447,6 @@ func TestParseEntryWithAuthorAndCDATA(t *testing.T) {
}
}
-func TestParseEntryWithNonStandardAtomAuthor(t *testing.T) {
- data := `
-
-
- Example
- https://example.org/
-
- -
- Test
- https://example.org/item
-
- Foo Bar
- Vice President
-
- FooBar Inc.
-
-
-
- `
-
- feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
- if err != nil {
- t.Fatal(err)
- }
-
- expected := "Foo Bar"
- result := feed.Entries[0].Author
- if result != expected {
- t.Errorf("Incorrect entry author, got %q instead of %q", result, expected)
- }
-}
-
func TestParseEntryWithAtomAuthorEmail(t *testing.T) {
data := `
@@ -508,7 +476,7 @@ func TestParseEntryWithAtomAuthorEmail(t *testing.T) {
}
}
-func TestParseEntryWithAtomAuthor(t *testing.T) {
+func TestParseEntryWithAtomAuthorName(t *testing.T) {
data := `
@@ -1435,6 +1403,37 @@ func TestEntryDescriptionFromGooglePlayDescription(t *testing.T) {
}
}
+func TestParseEntryWithRSSDescriptionAndMediaDescription(t *testing.T) {
+ data := `
+
+
+ Podcast Example
+ http://www.example.com/index.html
+ -
+ Entry Title
+ http://www.example.com/entries/1
+ Entry Description
+ Media Description
+
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ expected := "Entry Description"
+ result := feed.Entries[0].Content
+ if expected != result {
+ t.Errorf(`Unexpected description, got %q instead of %q`, result, expected)
+ }
+}
+
func TestParseEntryWithCategoryAndInnerHTML(t *testing.T) {
data := `
diff --git a/internal/reader/rss/podcast.go b/internal/reader/rss/podcast.go
index b72426cc..867bc03b 100644
--- a/internal/reader/rss/podcast.go
+++ b/internal/reader/rss/podcast.go
@@ -15,21 +15,24 @@ var ErrInvalidDurationFormat = errors.New("rss: invalid duration format")
// PodcastFeedElement represents iTunes and GooglePlay feed XML elements.
// Specs:
// - https://github.com/simplepie/simplepie-ng/wiki/Spec:-iTunes-Podcast-RSS
-// - https://developers.google.com/search/reference/podcast/rss-feed
+// - https://support.google.com/podcast-publishers/answer/9889544
type PodcastFeedElement struct {
- ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>author"`
- Subtitle string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>subtitle"`
- Summary string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>summary"`
- PodcastOwner PodcastOwner `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>owner"`
- GooglePlayAuthor string `xml:"http://www.google.com/schemas/play-podcasts/1.0 channel>author"`
+ ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd author"`
+ Subtitle string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd subtitle"`
+ Summary string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd summary"`
+ PodcastOwner PodcastOwner `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd owner"`
+ GooglePlayAuthor string `xml:"http://www.google.com/schemas/play-podcasts/1.0 author"`
}
// PodcastEntryElement represents iTunes and GooglePlay entry XML elements.
type PodcastEntryElement struct {
- Subtitle string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd subtitle"`
- Summary string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd summary"`
- Duration string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd duration"`
- GooglePlayDescription string `xml:"http://www.google.com/schemas/play-podcasts/1.0 description"`
+ ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd author"`
+ Subtitle string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd subtitle"`
+ Summary string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd summary"`
+ Duration string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd duration"`
+ PodcastOwner PodcastOwner `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd owner"`
+ GooglePlayAuthor string `xml:"http://www.google.com/schemas/play-podcasts/1.0 author"`
+ GooglePlayDescription string `xml:"http://www.google.com/schemas/play-podcasts/1.0 description"`
}
// PodcastOwner represents contact information for the podcast owner.
@@ -38,6 +41,19 @@ type PodcastOwner struct {
Email string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd email"`
}
+func (p *PodcastOwner) String() string {
+ var name string
+
+ switch {
+ case p.Name != "":
+ name = p.Name
+ case p.Email != "":
+ name = p.Email
+ }
+
+ return strings.TrimSpace(name)
+}
+
// Image represents podcast artwork.
type Image struct {
URL string `xml:"href,attr"`
@@ -52,10 +68,8 @@ func (e *PodcastFeedElement) PodcastAuthor() string {
author = e.ItunesAuthor
case e.GooglePlayAuthor != "":
author = e.GooglePlayAuthor
- case e.PodcastOwner.Name != "":
- author = e.PodcastOwner.Name
- case e.PodcastOwner.Email != "":
- author = e.PodcastOwner.Email
+ case e.PodcastOwner.String() != "":
+ author = e.PodcastOwner.String()
}
return strings.TrimSpace(author)
diff --git a/internal/reader/rss/rss.go b/internal/reader/rss/rss.go
index 963b2d10..cb769141 100644
--- a/internal/reader/rss/rss.go
+++ b/internal/reader/rss/rss.go
@@ -21,20 +21,25 @@ import (
"miniflux.app/v2/internal/urllib"
)
-// Specs: https://cyber.harvard.edu/rss/rss.html
+// Specs: https://www.rssboard.org/rss-specification
type rssFeed struct {
- XMLName xml.Name `xml:"rss"`
- Version string `xml:"version,attr"`
- Title string `xml:"channel>title"`
- Links []rssLink `xml:"channel>link"`
- ImageURL string `xml:"channel>image>url"`
- Language string `xml:"channel>language"`
- Description string `xml:"channel>description"`
- PubDate string `xml:"channel>pubDate"`
- ManagingEditor string `xml:"channel>managingEditor"`
- Webmaster string `xml:"channel>webMaster"`
- TimeToLive rssTTL `xml:"channel>ttl"`
- Items []rssItem `xml:"channel>item"`
+ XMLName xml.Name `xml:"rss"`
+ Version string `xml:"rss version,attr"`
+ Channel rssChannel `xml:"rss channel"`
+}
+
+type rssChannel struct {
+ Title string `xml:"rss title"`
+ Link string `xml:"rss link"`
+ ImageURL string `xml:"rss image>url"`
+ Language string `xml:"rss language"`
+ Description string `xml:"rss description"`
+ PubDate string `xml:"rss pubDate"`
+ ManagingEditor string `xml:"rss managingEditor"`
+ Webmaster string `xml:"rss webMaster"`
+ TimeToLive rssTTL `xml:"rss ttl"`
+ Items []rssItem `xml:"rss item"`
+ AtomLinks
PodcastFeedElement
}
@@ -72,15 +77,15 @@ func (r *rssFeed) Transform(baseURL string) *model.Feed {
feed.FeedURL = feedURL
}
- feed.Title = html.UnescapeString(strings.TrimSpace(r.Title))
+ feed.Title = html.UnescapeString(strings.TrimSpace(r.Channel.Title))
if feed.Title == "" {
feed.Title = feed.SiteURL
}
- feed.IconURL = strings.TrimSpace(r.ImageURL)
- feed.TTL = r.TimeToLive.Value()
+ feed.IconURL = strings.TrimSpace(r.Channel.ImageURL)
+ feed.TTL = r.Channel.TimeToLive.Value()
- for _, item := range r.Items {
+ for _, item := range r.Channel.Items {
entry := item.Transform()
if entry.Author == "" {
entry.Author = r.feedAuthor()
@@ -110,32 +115,29 @@ func (r *rssFeed) Transform(baseURL string) *model.Feed {
}
func (r *rssFeed) siteURL() string {
- for _, element := range r.Links {
- if element.XMLName.Space == "" {
- return strings.TrimSpace(element.Data)
- }
- }
-
- return ""
+ return strings.TrimSpace(r.Channel.Link)
}
func (r *rssFeed) feedURL() string {
- for _, element := range r.Links {
- if element.XMLName.Space == "http://www.w3.org/2005/Atom" {
- return strings.TrimSpace(element.Href)
+ for _, atomLink := range r.Channel.AtomLinks.Links {
+ if atomLink.Rel == "self" {
+ return strings.TrimSpace(atomLink.URL)
}
}
-
return ""
}
func (r rssFeed) feedAuthor() string {
- author := r.PodcastAuthor()
+ author := r.Channel.PodcastAuthor()
switch {
- case r.ManagingEditor != "":
- author = r.ManagingEditor
- case r.Webmaster != "":
- author = r.Webmaster
+ case r.Channel.ManagingEditor != "":
+ author = r.Channel.ManagingEditor
+ case r.Channel.Webmaster != "":
+ author = r.Channel.Webmaster
+ case r.Channel.GooglePlayAuthor != "":
+ author = r.Channel.GooglePlayAuthor
+ case r.Channel.PodcastOwner.String() != "":
+ author = r.Channel.PodcastOwner.String()
}
return sanitizer.StripTags(strings.TrimSpace(author))
}
@@ -146,27 +148,7 @@ type rssGUID struct {
IsPermaLink string `xml:"isPermaLink,attr"`
}
-type rssLink struct {
- XMLName xml.Name
- Data string `xml:",chardata"`
- Href string `xml:"href,attr"`
- Rel string `xml:"rel,attr"`
-}
-
-type rssCommentLink struct {
- XMLName xml.Name
- Data string `xml:",chardata"`
-}
-
type rssAuthor struct {
- XMLName xml.Name
- Data string `xml:",chardata"`
- Name string `xml:"name"`
- Email string `xml:"email"`
- Inner string `xml:",innerxml"`
-}
-
-type rssTitle struct {
XMLName xml.Name
Data string `xml:",chardata"`
Inner string `xml:",innerxml"`
@@ -193,19 +175,21 @@ func (enclosure *rssEnclosure) Size() int64 {
}
type rssItem struct {
- GUID rssGUID `xml:"guid"`
- Title []rssTitle `xml:"title"`
- Links []rssLink `xml:"link"`
- Description string `xml:"description"`
- PubDate string `xml:"pubDate"`
- Authors []rssAuthor `xml:"author"`
- CommentLinks []rssCommentLink `xml:"comments"`
- EnclosureLinks []rssEnclosure `xml:"enclosure"`
- Categories []rssCategory `xml:"category"`
+ GUID rssGUID `xml:"rss guid"`
+ Title string `xml:"rss title"`
+ Link string `xml:"rss link"`
+ Description string `xml:"rss description"`
+ PubDate string `xml:"rss pubDate"`
+ Author rssAuthor `xml:"rss author"`
+ Comments string `xml:"rss comments"`
+ EnclosureLinks []rssEnclosure `xml:"rss enclosure"`
+ Categories []rssCategory `xml:"rss category"`
dublincore.DublinCoreItemElement
FeedBurnerElement
PodcastEntryElement
media.Element
+ AtomAuthor
+ AtomLinks
}
func (r *rssItem) Transform() *model.Entry {
@@ -250,34 +234,26 @@ func (r *rssItem) entryDate() time.Time {
}
func (r *rssItem) entryAuthor() string {
- author := ""
+ var author string
- for _, rssAuthor := range r.Authors {
- switch rssAuthor.XMLName.Space {
- case "http://www.itunes.com/dtds/podcast-1.0.dtd", "http://www.google.com/schemas/play-podcasts/1.0":
- author = rssAuthor.Data
- case "http://www.w3.org/2005/Atom":
- if rssAuthor.Name != "" {
- author = rssAuthor.Name
- } else if rssAuthor.Email != "" {
- author = rssAuthor.Email
- }
- default:
- if rssAuthor.Name != "" {
- author = rssAuthor.Name
- } else if strings.Contains(rssAuthor.Inner, "