From dd4fb660c19fd1f6ce5716f9f5783eb7565fed2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?=
Date: Fri, 15 Mar 2024 16:39:32 -0700
Subject: [PATCH] Refactor Atom parser to use an adapter
---
internal/reader/atom/atom_03.go | 216 +++++--------
internal/reader/atom/atom_03_adapter.go | 115 +++++++
internal/reader/atom/atom_03_test.go | 26 +-
internal/reader/atom/atom_10.go | 407 ++++++++++--------------
internal/reader/atom/atom_10_adapter.go | 210 ++++++++++++
internal/reader/atom/atom_10_test.go | 145 ++++-----
internal/reader/atom/atom_common.go | 111 +++++--
internal/reader/atom/parser.go | 28 +-
internal/reader/json/adapter.go | 4 +-
internal/reader/parser/parser_test.go | 30 +-
internal/reader/rss/adapter.go | 3 +-
11 files changed, 795 insertions(+), 500 deletions(-)
create mode 100644 internal/reader/atom/atom_03_adapter.go
create mode 100644 internal/reader/atom/atom_10_adapter.go
diff --git a/internal/reader/atom/atom_03.go b/internal/reader/atom/atom_03.go
index edcb83dc..fb458e91 100644
--- a/internal/reader/atom/atom_03.go
+++ b/internal/reader/atom/atom_03.go
@@ -6,158 +6,114 @@ package atom // import "miniflux.app/v2/internal/reader/atom"
import (
"encoding/base64"
"html"
- "log/slog"
"strings"
- "time"
-
- "miniflux.app/v2/internal/crypto"
- "miniflux.app/v2/internal/model"
- "miniflux.app/v2/internal/reader/date"
- "miniflux.app/v2/internal/reader/sanitizer"
- "miniflux.app/v2/internal/urllib"
)
// Specs: http://web.archive.org/web/20060811235523/http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html
-type atom03Feed struct {
- ID string `xml:"id"`
- Title atom03Text `xml:"title"`
- Author atomPerson `xml:"author"`
- Links atomLinks `xml:"link"`
- Entries []atom03Entry `xml:"entry"`
+type Atom03Feed struct {
+ Version string `xml:"version,attr"`
+
+ // The "atom:id" element's content conveys a permanent, globally unique identifier for the feed.
+ // It MUST NOT change over time, even if the feed is relocated. atom:feed elements MAY contain an atom:id element,
+ // but MUST NOT contain more than one. The content of this element, when present, MUST be a URI.
+ ID string `xml:"http://purl.org/atom/ns# id"`
+
+ // The "atom:title" element is a Content construct that conveys a human-readable title for the feed.
+ // atom:feed elements MUST contain exactly one atom:title element.
+ // If the feed describes a Web resource, its content SHOULD be the same as that resource's title.
+ Title Atom03Content `xml:"http://purl.org/atom/ns# title"`
+
+ // The "atom:link" element is a Link construct that conveys a URI associated with the feed.
+ // The nature of the relationship as well as the link itself is determined by the element's content.
+ // atom:feed elements MUST contain at least one atom:link element with a rel attribute value of "alternate".
+ // atom:feed elements MUST NOT contain more than one atom:link element with a rel attribute value of "alternate" that has the same type attribute value.
+ // atom:feed elements MAY contain additional atom:link elements beyond those described above.
+ Links AtomLinks `xml:"http://purl.org/atom/ns# link"`
+
+ // The "atom:author" element is a Person construct that indicates the default author of the feed.
+ // atom:feed elements MUST contain exactly one atom:author element,
+ // UNLESS all of the atom:feed element's child atom:entry elements contain an atom:author element.
+ // atom:feed elements MUST NOT contain more than one atom:author element.
+ Author AtomPerson `xml:"http://purl.org/atom/ns# author"`
+
+ // The "atom:entry" element's represents an individual entry that is contained by the feed.
+ // atom:feed elements MAY contain one or more atom:entry elements.
+ Entries []Atom03Entry `xml:"http://purl.org/atom/ns# entry"`
}
-func (a *atom03Feed) Transform(baseURL string) *model.Feed {
- var err error
+type Atom03Entry struct {
+ // The "atom:id" element's content conveys a permanent, globally unique identifier for the entry.
+ // It MUST NOT change over time, even if other representations of the entry (such as a web representation pointed to by the entry's atom:link element) are relocated.
+ // If the same entry is syndicated in two atom:feeds published by the same entity, the entry's atom:id MUST be the same in both feeds.
+ ID string `xml:"id"`
- feed := new(model.Feed)
+ // The "atom:title" element is a Content construct that conveys a human-readable title for the entry.
+ // atom:entry elements MUST have exactly one "atom:title" element.
+ // If an entry describes a Web resource, its content SHOULD be the same as that resource's title.
+ Title Atom03Content `xml:"title"`
- feedURL := a.Links.firstLinkWithRelation("self")
- feed.FeedURL, err = urllib.AbsoluteURL(baseURL, feedURL)
- if err != nil {
- feed.FeedURL = feedURL
- }
+ // The "atom:modified" element is a Date construct that indicates the time that the entry was last modified.
+ // atom:entry elements MUST contain an atom:modified element, but MUST NOT contain more than one.
+ // The content of an atom:modified element MUST have a time zone whose value SHOULD be "UTC".
+ Modified string `xml:"modified"`
- siteURL := a.Links.originalLink()
- feed.SiteURL, err = urllib.AbsoluteURL(baseURL, siteURL)
- if err != nil {
- feed.SiteURL = siteURL
- }
+ // The "atom:issued" element is a Date construct that indicates the time that the entry was issued.
+ // atom:entry elements MUST contain an atom:issued element, but MUST NOT contain more than one.
+ // The content of an atom:issued element MAY omit a time zone.
+ Issued string `xml:"issued"`
- feed.Title = a.Title.String()
- if feed.Title == "" {
- feed.Title = feed.SiteURL
- }
+ // The "atom:created" element is a Date construct that indicates the time that the entry was created.
+ // atom:entry elements MAY contain an atom:created element, but MUST NOT contain more than one.
+ // The content of an atom:created element MUST have a time zone whose value SHOULD be "UTC".
+ // If atom:created is not present, its content MUST considered to be the same as that of atom:modified.
+ Created string `xml:"created"`
- for _, entry := range a.Entries {
- item := entry.Transform()
- entryURL, err := urllib.AbsoluteURL(feed.SiteURL, item.URL)
- if err == nil {
- item.URL = entryURL
- }
+ // The "atom:link" element is a Link construct that conveys a URI associated with the entry.
+ // The nature of the relationship as well as the link itself is determined by the element's content.
+ // atom:entry elements MUST contain at least one atom:link element with a rel attribute value of "alternate".
+ // atom:entry elements MUST NOT contain more than one atom:link element with a rel attribute value of "alternate" that has the same type attribute value.
+ // atom:entry elements MAY contain additional atom:link elements beyond those described above.
+ Links AtomLinks `xml:"link"`
- if item.Author == "" {
- item.Author = a.Author.String()
- }
+ // The "atom:summary" element is a Content construct that conveys a short summary, abstract or excerpt of the entry.
+ // atom:entry elements MAY contain an atom:created element, but MUST NOT contain more than one.
+ Summary Atom03Content `xml:"summary"`
- if item.Title == "" {
- item.Title = sanitizer.TruncateHTML(item.Content, 100)
- }
+ // The "atom:content" element is a Content construct that conveys the content of the entry.
+ // atom:entry elements MAY contain one or more atom:content elements.
+ Content Atom03Content `xml:"content"`
- if item.Title == "" {
- item.Title = item.URL
- }
-
- feed.Entries = append(feed.Entries, item)
- }
-
- return feed
+ // The "atom:author" element is a Person construct that indicates the default author of the entry.
+ // atom:entry elements MUST contain exactly one atom:author element,
+ // UNLESS the atom:feed element containing them contains an atom:author element itself.
+ // atom:entry elements MUST NOT contain more than one atom:author element.
+ Author AtomPerson `xml:"author"`
}
-type atom03Entry struct {
- ID string `xml:"id"`
- Title atom03Text `xml:"title"`
- Modified string `xml:"modified"`
- Issued string `xml:"issued"`
- Created string `xml:"created"`
- Links atomLinks `xml:"link"`
- Summary atom03Text `xml:"summary"`
- Content atom03Text `xml:"content"`
- Author atomPerson `xml:"author"`
-}
+type Atom03Content struct {
+ // Content constructs MAY have a "type" attribute, whose value indicates the media type of the content.
+ // When present, this attribute's value MUST be a registered media type [RFC2045].
+ // If not present, its value MUST be considered to be "text/plain".
+ Type string `xml:"type,attr"`
-func (a *atom03Entry) Transform() *model.Entry {
- entry := model.NewEntry()
- entry.URL = a.Links.originalLink()
- entry.Date = a.entryDate()
- entry.Author = a.Author.String()
- entry.Hash = a.entryHash()
- entry.Content = a.entryContent()
- entry.Title = a.entryTitle()
- return entry
-}
+ // Content constructs MAY have a "mode" attribute, whose value indicates the method used to encode the content.
+ // When present, this attribute's value MUST be listed below.
+ // If not present, its value MUST be considered to be "xml".
+ //
+ // "xml": A mode attribute with the value "xml" indicates that the element's content is inline xml (for example, namespace-qualified XHTML).
+ //
+ // "escaped": A mode attribute with the value "escaped" indicates that the element's content is an escaped string.
+ // Processors MUST unescape the element's content before considering it as content of the indicated media type.
+ //
+ // "base64": A mode attribute with the value "base64" indicates that the element's content is base64-encoded [RFC2045].
+ // Processors MUST decode the element's content before considering it as content of the the indicated media type.
+ Mode string `xml:"mode,attr"`
-func (a *atom03Entry) entryTitle() string {
- return sanitizer.StripTags(a.Title.String())
-}
-
-func (a *atom03Entry) entryContent() string {
- content := a.Content.String()
- if content != "" {
- return content
- }
-
- summary := a.Summary.String()
- if summary != "" {
- return summary
- }
-
- return ""
-}
-
-func (a *atom03Entry) entryDate() time.Time {
- dateText := ""
- for _, value := range []string{a.Issued, a.Modified, a.Created} {
- if value != "" {
- dateText = value
- break
- }
- }
-
- if dateText != "" {
- result, err := date.Parse(dateText)
- if err != nil {
- slog.Debug("Unable to parse date from Atom 0.3 feed",
- slog.String("date", dateText),
- slog.String("id", a.ID),
- slog.Any("error", err),
- )
- return time.Now()
- }
-
- return result
- }
-
- return time.Now()
-}
-
-func (a *atom03Entry) entryHash() string {
- for _, value := range []string{a.ID, a.Links.originalLink()} {
- if value != "" {
- return crypto.Hash(value)
- }
- }
-
- return ""
-}
-
-type atom03Text struct {
- Type string `xml:"type,attr"`
- Mode string `xml:"mode,attr"`
CharData string `xml:",chardata"`
InnerXML string `xml:",innerxml"`
}
-func (a *atom03Text) String() string {
+func (a *Atom03Content) Content() string {
content := ""
switch {
diff --git a/internal/reader/atom/atom_03_adapter.go b/internal/reader/atom/atom_03_adapter.go
new file mode 100644
index 00000000..02d78ec8
--- /dev/null
+++ b/internal/reader/atom/atom_03_adapter.go
@@ -0,0 +1,115 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package atom // import "miniflux.app/v2/internal/reader/atom"
+
+import (
+ "log/slog"
+ "time"
+
+ "miniflux.app/v2/internal/crypto"
+ "miniflux.app/v2/internal/model"
+ "miniflux.app/v2/internal/reader/date"
+ "miniflux.app/v2/internal/reader/sanitizer"
+ "miniflux.app/v2/internal/urllib"
+)
+
+type Atom03Adapter struct {
+ atomFeed *Atom03Feed
+}
+
+func NewAtom03Adapter(atomFeed *Atom03Feed) *Atom03Adapter {
+ return &Atom03Adapter{atomFeed}
+}
+
+func (a *Atom03Adapter) BuildFeed(baseURL string) *model.Feed {
+ feed := new(model.Feed)
+
+ // Populate the feed URL.
+ feedURL := a.atomFeed.Links.firstLinkWithRelation("self")
+ if feedURL != "" {
+ if absoluteFeedURL, err := urllib.AbsoluteURL(baseURL, feedURL); err == nil {
+ feed.FeedURL = absoluteFeedURL
+ }
+ } else {
+ feed.FeedURL = baseURL
+ }
+
+ // Populate the site URL.
+ siteURL := a.atomFeed.Links.OriginalLink()
+ if siteURL != "" {
+ if absoluteSiteURL, err := urllib.AbsoluteURL(baseURL, siteURL); err == nil {
+ feed.SiteURL = absoluteSiteURL
+ }
+ } else {
+ feed.SiteURL = baseURL
+ }
+
+ // Populate the feed title.
+ feed.Title = a.atomFeed.Title.Content()
+ if feed.Title == "" {
+ feed.Title = feed.SiteURL
+ }
+
+ for _, atomEntry := range a.atomFeed.Entries {
+ entry := model.NewEntry()
+
+ // Populate the entry URL.
+ entry.URL = atomEntry.Links.OriginalLink()
+ if entry.URL != "" {
+ if absoluteEntryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL); err == nil {
+ entry.URL = absoluteEntryURL
+ }
+ }
+
+ // Populate the entry content.
+ entry.Content = atomEntry.Content.Content()
+ if entry.Content == "" {
+ entry.Content = atomEntry.Summary.Content()
+ }
+
+ // Populate the entry title.
+ entry.Title = atomEntry.Title.Content()
+ if entry.Title == "" {
+ entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
+ }
+ if entry.Title == "" {
+ entry.Title = entry.URL
+ }
+
+ // Populate the entry author.
+ entry.Author = atomEntry.Author.PersonName()
+ if entry.Author == "" {
+ entry.Author = a.atomFeed.Author.PersonName()
+ }
+
+ // Populate the entry date.
+ for _, value := range []string{atomEntry.Issued, atomEntry.Modified, atomEntry.Created} {
+ if parsedDate, err := date.Parse(value); err == nil {
+ entry.Date = parsedDate
+ break
+ } else {
+ slog.Debug("Unable to parse date from Atom 0.3 feed",
+ slog.String("date", value),
+ slog.String("id", atomEntry.ID),
+ slog.Any("error", err),
+ )
+ }
+ }
+ if entry.Date.IsZero() {
+ entry.Date = time.Now()
+ }
+
+ // Generate the entry hash.
+ for _, value := range []string{atomEntry.ID, atomEntry.Links.OriginalLink()} {
+ if value != "" {
+ entry.Hash = crypto.Hash(value)
+ break
+ }
+ }
+
+ feed.Entries = append(feed.Entries, entry)
+ }
+
+ return feed
+}
diff --git a/internal/reader/atom/atom_03_test.go b/internal/reader/atom/atom_03_test.go
index 321c0d82..54662bc9 100644
--- a/internal/reader/atom/atom_03_test.go
+++ b/internal/reader/atom/atom_03_test.go
@@ -27,7 +27,7 @@ func TestParseAtom03(t *testing.T) {
`
- feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)), "0.3")
+ feed, err := Parse("http://diveintomark.org/atom.xml", bytes.NewReader([]byte(data)), "0.3")
if err != nil {
t.Fatal(err)
}
@@ -36,7 +36,7 @@ func TestParseAtom03(t *testing.T) {
t.Errorf("Incorrect title, got: %s", feed.Title)
}
- if feed.FeedURL != "http://diveintomark.org/" {
+ if feed.FeedURL != "http://diveintomark.org/atom.xml" {
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
}
@@ -74,6 +74,28 @@ func TestParseAtom03(t *testing.T) {
}
}
+func TestParseAtom03WithoutSiteURL(t *testing.T) {
+ data := `
+
+ 2003-12-13T18:30:02Z
+ Mark Pilgrim
+
+ Atom 0.3 snapshot
+
+ tag:diveintomark.org,2003:3.2397
+
+ `
+
+ feed, err := Parse("http://diveintomark.org/atom.xml", bytes.NewReader([]byte(data)), "0.3")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.SiteURL != "http://diveintomark.org/atom.xml" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+}
+
func TestParseAtom03WithoutFeedTitle(t *testing.T) {
data := `
diff --git a/internal/reader/atom/atom_10.go b/internal/reader/atom/atom_10.go
index 798a8748..201d00d1 100644
--- a/internal/reader/atom/atom_10.go
+++ b/internal/reader/atom/atom_10.go
@@ -6,286 +6,199 @@ package atom // import "miniflux.app/v2/internal/reader/atom"
import (
"encoding/xml"
"html"
- "log/slog"
- "strconv"
"strings"
- "time"
- "miniflux.app/v2/internal/crypto"
- "miniflux.app/v2/internal/model"
- "miniflux.app/v2/internal/reader/date"
"miniflux.app/v2/internal/reader/media"
"miniflux.app/v2/internal/reader/sanitizer"
- "miniflux.app/v2/internal/urllib"
)
+// The "atom:feed" element is the document (i.e., top-level) element of
+// an Atom Feed Document, acting as a container for metadata and data
+// associated with the feed. Its element children consist of metadata
+// elements followed by zero or more atom:entry child elements.
+//
// Specs:
// https://tools.ietf.org/html/rfc4287
// https://validator.w3.org/feed/docs/atom.html
-type atom10Feed struct {
- XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
- ID string `xml:"id"`
- Title atom10Text `xml:"title"`
- Authors atomAuthors `xml:"author"`
- Icon string `xml:"icon"`
- Links atomLinks `xml:"link"`
- Entries []atom10Entry `xml:"entry"`
+type Atom10Feed struct {
+ XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
+
+ // The "atom:id" element conveys a permanent, universally unique
+ // identifier for an entry or feed.
+ //
+ // Its content MUST be an IRI, as defined by [RFC3987]. Note that the
+ // definition of "IRI" excludes relative references. Though the IRI
+ // might use a dereferencable scheme, Atom Processors MUST NOT assume it
+ // can be dereferenced.
+ //
+ // atom:feed elements MUST contain exactly one atom:id element.
+ ID string `xml:"http://www.w3.org/2005/Atom id"`
+
+ // The "atom:title" element is a Text construct that conveys a human-
+ // readable title for an entry or feed.
+ //
+ // atom:feed elements MUST contain exactly one atom:title element.
+ Title Atom10Text `xml:"http://www.w3.org/2005/Atom title"`
+
+ // The "atom:author" element is a Person construct that indicates the
+ // author of the entry or feed.
+ //
+ // atom:feed elements MUST contain one or more atom:author elements,
+ // unless all of the atom:feed element's child atom:entry elements
+ // contain at least one atom:author element.
+ Authors AtomPersons `xml:"http://www.w3.org/2005/Atom author"`
+
+ // The "atom:icon" element's content is an IRI reference [RFC3987] that
+ // identifies an image that provides iconic visual identification for a
+ // feed.
+ //
+ // atom:feed elements MUST NOT contain more than one atom:icon element.
+ Icon string `xml:"http://www.w3.org/2005/Atom icon"`
+
+ // The "atom:logo" element's content is an IRI reference [RFC3987] that
+ // identifies an image that provides visual identification for a feed.
+ //
+ // atom:feed elements MUST NOT contain more than one atom:logo element.
+ Logo string `xml:"http://www.w3.org/2005/Atom logo"`
+
+ // atom:feed elements SHOULD contain one atom:link element with a rel
+ // attribute value of "self". This is the preferred URI for
+ // retrieving Atom Feed Documents representing this Atom feed.
+ //
+ // atom:feed elements MUST NOT contain more than one atom:link
+ // element with a rel attribute value of "alternate" that has the
+ // same combination of type and hreflang attribute values.
+ Links AtomLinks `xml:"http://www.w3.org/2005/Atom link"`
+
+ // The "atom:category" element conveys information about a category
+ // associated with an entry or feed. This specification assigns no
+ // meaning to the content (if any) of this element.
+ //
+ // atom:feed elements MAY contain any number of atom:category
+ // elements.
+ Categories AtomCategories `xml:"http://www.w3.org/2005/Atom category"`
+
+ Entries []Atom10Entry `xml:"http://www.w3.org/2005/Atom entry"`
}
-func (a *atom10Feed) Transform(baseURL string) *model.Feed {
- var err error
+type Atom10Entry struct {
+ // The "atom:id" element conveys a permanent, universally unique
+ // identifier for an entry or feed.
+ //
+ // Its content MUST be an IRI, as defined by [RFC3987]. Note that the
+ // definition of "IRI" excludes relative references. Though the IRI
+ // might use a dereferencable scheme, Atom Processors MUST NOT assume it
+ // can be dereferenced.
+ //
+ // atom:entry elements MUST contain exactly one atom:id element.
+ ID string `xml:"http://www.w3.org/2005/Atom id"`
- feed := new(model.Feed)
+ // The "atom:title" element is a Text construct that conveys a human-
+ // readable title for an entry or feed.
+ //
+ // atom:entry elements MUST contain exactly one atom:title element.
+ Title Atom10Text `xml:"http://www.w3.org/2005/Atom title"`
- feedURL := a.Links.firstLinkWithRelation("self")
- feed.FeedURL, err = urllib.AbsoluteURL(baseURL, feedURL)
- if err != nil {
- feed.FeedURL = feedURL
- }
+ // The "atom:published" element is a Date construct indicating an
+ // instant in time associated with an event early in the life cycle of
+ // the entry.
+ Published string `xml:"http://www.w3.org/2005/Atom published"`
- siteURL := a.Links.originalLink()
- feed.SiteURL, err = urllib.AbsoluteURL(baseURL, siteURL)
- if err != nil {
- feed.SiteURL = siteURL
- }
+ // The "atom:updated" element is a Date construct indicating the most
+ // recent instant in time when an entry or feed was modified in a way
+ // the publisher considers significant. Therefore, not all
+ // modifications necessarily result in a changed atom:updated value.
+ //
+ // atom:entry elements MUST contain exactly one atom:updated element.
+ Updated string `xml:"http://www.w3.org/2005/Atom updated"`
- feed.Title = html.UnescapeString(a.Title.String())
- if feed.Title == "" {
- feed.Title = feed.SiteURL
- }
+ // atom:entry elements MUST NOT contain more than one atom:link
+ // element with a rel attribute value of "alternate" that has the
+ // same combination of type and hreflang attribute values.
+ Links AtomLinks `xml:"http://www.w3.org/2005/Atom link"`
- feed.IconURL = strings.TrimSpace(a.Icon)
+ // atom:entry elements MUST contain an atom:summary element in either
+ // of the following cases:
+ // * the atom:entry contains an atom:content that has a "src"
+ // attribute (and is thus empty).
+ // * the atom:entry contains content that is encoded in Base64;
+ // i.e., the "type" attribute of atom:content is a MIME media type
+ // [MIMEREG], but is not an XML media type [RFC3023], does not
+ // begin with "text/", and does not end with "/xml" or "+xml".
+ //
+ // atom:entry elements MUST NOT contain more than one atom:summary
+ // element.
+ Summary Atom10Text `xml:"http://www.w3.org/2005/Atom summary"`
- for _, entry := range a.Entries {
- item := entry.Transform()
- entryURL, err := urllib.AbsoluteURL(feed.SiteURL, item.URL)
- if err == nil {
- item.URL = entryURL
- }
+ // atom:entry elements MUST NOT contain more than one atom:content
+ // element.
+ Content Atom10Text `xml:"http://www.w3.org/2005/Atom content"`
- if item.Author == "" {
- item.Author = a.Authors.String()
- }
+ // The "atom:author" element is a Person construct that indicates the
+ // author of the entry or feed.
+ //
+ // atom:entry elements MUST contain one or more atom:author elements
+ Authors AtomPersons `xml:"http://www.w3.org/2005/Atom author"`
- if item.Title == "" {
- item.Title = sanitizer.TruncateHTML(item.Content, 100)
- }
+ // The "atom:category" element conveys information about a category
+ // associated with an entry or feed. This specification assigns no
+ // meaning to the content (if any) of this element.
+ //
+ // atom:entry elements MAY contain any number of atom:category
+ // elements.
+ Categories AtomCategories `xml:"http://www.w3.org/2005/Atom category"`
- if item.Title == "" {
- item.Title = item.URL
- }
-
- feed.Entries = append(feed.Entries, item)
- }
-
- return feed
-}
-
-type atom10Entry struct {
- ID string `xml:"id"`
- Title atom10Text `xml:"title"`
- Published string `xml:"published"`
- Updated string `xml:"updated"`
- Links atomLinks `xml:"link"`
- Summary atom10Text `xml:"summary"`
- Content atom10Text `xml:"http://www.w3.org/2005/Atom content"`
- Authors atomAuthors `xml:"author"`
- Categories []atom10Category `xml:"category"`
media.MediaItemElement
}
-func (a *atom10Entry) Transform() *model.Entry {
- entry := model.NewEntry()
- entry.URL = a.Links.originalLink()
- entry.Date = a.entryDate()
- entry.Author = a.Authors.String()
- entry.Hash = a.entryHash()
- entry.Content = a.entryContent()
- entry.Title = a.entryTitle()
- entry.Enclosures = a.entryEnclosures()
- entry.CommentsURL = a.entryCommentsURL()
- entry.Tags = a.entryCategories()
- return entry
-}
-
-func (a *atom10Entry) entryTitle() string {
- return html.UnescapeString(a.Title.String())
-}
-
-func (a *atom10Entry) entryContent() string {
- content := a.Content.String()
- if content != "" {
- return content
- }
-
- summary := a.Summary.String()
- if summary != "" {
- return summary
- }
-
- mediaDescription := a.FirstMediaDescription()
- if mediaDescription != "" {
- return mediaDescription
- }
-
- return ""
-}
-
-// Note: The published date represents the original creation date for YouTube feeds.
-// Example:
-// 2019-01-26T08:02:28+00:00
-// 2019-01-29T07:27:27+00:00
-func (a *atom10Entry) entryDate() time.Time {
- dateText := a.Published
- if dateText == "" {
- dateText = a.Updated
- }
-
- if dateText != "" {
- result, err := date.Parse(dateText)
- if err != nil {
- slog.Debug("Unable to parse date from Atom 0.3 feed",
- slog.String("date", dateText),
- slog.String("id", a.ID),
- slog.Any("error", err),
- )
- return time.Now()
- }
-
- return result
- }
-
- return time.Now()
-}
-
-func (a *atom10Entry) entryHash() string {
- for _, value := range []string{a.ID, a.Links.originalLink()} {
- if value != "" {
- return crypto.Hash(value)
- }
- }
-
- return ""
-}
-
-func (a *atom10Entry) entryEnclosures() model.EnclosureList {
- enclosures := make(model.EnclosureList, 0)
- duplicates := make(map[string]bool)
-
- for _, mediaThumbnail := range a.AllMediaThumbnails() {
- if _, found := duplicates[mediaThumbnail.URL]; !found {
- duplicates[mediaThumbnail.URL] = true
- enclosures = append(enclosures, &model.Enclosure{
- URL: mediaThumbnail.URL,
- MimeType: mediaThumbnail.MimeType(),
- Size: mediaThumbnail.Size(),
- })
- }
- }
-
- for _, link := range a.Links {
- if strings.EqualFold(link.Rel, "enclosure") {
- if link.URL == "" {
- continue
- }
-
- if _, found := duplicates[link.URL]; !found {
- duplicates[link.URL] = true
- length, _ := strconv.ParseInt(link.Length, 10, 0)
- enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length})
- }
- }
- }
-
- for _, mediaContent := range a.AllMediaContents() {
- if _, found := duplicates[mediaContent.URL]; !found {
- duplicates[mediaContent.URL] = true
- enclosures = append(enclosures, &model.Enclosure{
- URL: mediaContent.URL,
- MimeType: mediaContent.MimeType(),
- Size: mediaContent.Size(),
- })
- }
- }
-
- for _, mediaPeerLink := range a.AllMediaPeerLinks() {
- if _, found := duplicates[mediaPeerLink.URL]; !found {
- duplicates[mediaPeerLink.URL] = true
- enclosures = append(enclosures, &model.Enclosure{
- URL: mediaPeerLink.URL,
- MimeType: mediaPeerLink.MimeType(),
- Size: mediaPeerLink.Size(),
- })
- }
- }
-
- return enclosures
-}
-
-func (r *atom10Entry) entryCategories() []string {
- categoryList := make([]string, 0)
-
- for _, atomCategory := range r.Categories {
- if strings.TrimSpace(atomCategory.Label) != "" {
- categoryList = append(categoryList, strings.TrimSpace(atomCategory.Label))
- } else {
- categoryList = append(categoryList, strings.TrimSpace(atomCategory.Term))
- }
- }
-
- return categoryList
-}
-
-// See https://tools.ietf.org/html/rfc4685#section-4
-// If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
-// We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
-func (a *atom10Entry) entryCommentsURL() string {
- commentsURL := a.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml")
- if urllib.IsAbsoluteURL(commentsURL) {
- return commentsURL
- }
- return ""
-}
-
-type atom10Text struct {
- Type string `xml:"type,attr"`
- CharData string `xml:",chardata"`
- InnerXML string `xml:",innerxml"`
- XHTMLRootElement atomXHTMLRootElement `xml:"http://www.w3.org/1999/xhtml div"`
-}
-
-type atom10Category struct {
- Term string `xml:"term,attr"`
- Label string `xml:"label,attr"`
-}
-
+// A Text construct contains human-readable text, usually in small
+// quantities. The content of Text constructs is Language-Sensitive.
+// Specs: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1
// Text: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.1
// HTML: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.2
// XHTML: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.3
-func (a *atom10Text) String() string {
+type Atom10Text struct {
+ Type string `xml:"type,attr"`
+ CharData string `xml:",chardata"`
+ InnerXML string `xml:",innerxml"`
+ XHTMLRootElement AtomXHTMLRootElement `xml:"http://www.w3.org/1999/xhtml div"`
+}
+
+func (a *Atom10Text) Body() string {
var content string
- switch {
- case a.Type == "", a.Type == "text", a.Type == "text/plain":
- if strings.HasPrefix(strings.TrimSpace(a.InnerXML), ` 0 {
+ categories = slices.Compact(categories)
+ sort.Strings(categories)
+ entry.Tags = categories
+ }
+
+ // Populate the commentsURL if defined.
+ // See https://tools.ietf.org/html/rfc4685#section-4
+ // If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
+ // We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
+ commentsURL := atomEntry.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml")
+ if urllib.IsAbsoluteURL(commentsURL) {
+ entry.CommentsURL = commentsURL
+ }
+
+ // Generate the entry hash.
+ for _, value := range []string{atomEntry.ID, atomEntry.Links.OriginalLink()} {
+ if value != "" {
+ entry.Hash = crypto.Hash(value)
+ break
+ }
+ }
+
+ // Populate the entry enclosures.
+ uniqueEnclosuresMap := make(map[string]bool)
+
+ for _, mediaThumbnail := range atomEntry.AllMediaThumbnails() {
+ if _, found := uniqueEnclosuresMap[mediaThumbnail.URL]; !found {
+ uniqueEnclosuresMap[mediaThumbnail.URL] = true
+ entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
+ URL: mediaThumbnail.URL,
+ MimeType: mediaThumbnail.MimeType(),
+ Size: mediaThumbnail.Size(),
+ })
+ }
+ }
+
+ for _, link := range atomEntry.Links {
+ if strings.EqualFold(link.Rel, "enclosure") {
+ if link.Href == "" {
+ continue
+ }
+
+ if _, found := uniqueEnclosuresMap[link.Href]; !found {
+ uniqueEnclosuresMap[link.Href] = true
+ length, _ := strconv.ParseInt(link.Length, 10, 0)
+ entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
+ URL: link.Href,
+ MimeType: link.Type,
+ Size: length,
+ })
+ }
+ }
+ }
+
+ for _, mediaContent := range atomEntry.AllMediaContents() {
+ if _, found := uniqueEnclosuresMap[mediaContent.URL]; !found {
+ uniqueEnclosuresMap[mediaContent.URL] = true
+ entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
+ URL: mediaContent.URL,
+ MimeType: mediaContent.MimeType(),
+ Size: mediaContent.Size(),
+ })
+ }
+ }
+
+ for _, mediaPeerLink := range atomEntry.AllMediaPeerLinks() {
+ if _, found := uniqueEnclosuresMap[mediaPeerLink.URL]; !found {
+ uniqueEnclosuresMap[mediaPeerLink.URL] = true
+ entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
+ URL: mediaPeerLink.URL,
+ MimeType: mediaPeerLink.MimeType(),
+ Size: mediaPeerLink.Size(),
+ })
+ }
+ }
+
+ feed.Entries = append(feed.Entries, entry)
+ }
+
+ return feed
+}
diff --git a/internal/reader/atom/atom_10_test.go b/internal/reader/atom/atom_10_test.go
index f778e8e6..be6e9148 100644
--- a/internal/reader/atom/atom_10_test.go
+++ b/internal/reader/atom/atom_10_test.go
@@ -12,7 +12,6 @@ import (
func TestParseAtomSample(t *testing.T) {
data := `
-
Example Feed2003-12-13T18:30:02Z
@@ -20,7 +19,6 @@ func TestParseAtomSample(t *testing.T) {
John Doeurn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6
-
Atom-Powered Robots Run Amok
@@ -28,7 +26,6 @@ func TestParseAtomSample(t *testing.T) {
2003-12-13T18:30:02ZSome text.
-
`
feed, err := Parse("http://example.org/feed.xml", bytes.NewReader([]byte(data)), "10")
@@ -420,7 +417,7 @@ func TestParseEntryWithPlainTextTitle(t *testing.T) {
expected := `AT&T bought by SBC!`
for i := range 2 {
if feed.Entries[i].Title != expected {
- t.Errorf("Incorrect title for entry #%d, got: %q", i, feed.Entries[i].Title)
+ t.Errorf("Incorrect title for entry #%d, got: %q instead of %q", i, feed.Entries[i].Title, expected)
}
}
}
@@ -430,33 +427,20 @@ func TestParseEntryWithHTMLTitle(t *testing.T) {
Example Feed
-
- <code>Test</code> Test
-
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 2003-12-13T18:30:02Z
- Some text.
+ <code>Code</code> Test
+
-
-
-
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 2003-12-13T18:30:02Z
- Some text.
+
+
-
-
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 2003-12-13T18:30:02Z
- Some text.
+
-
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
@@ -464,11 +448,11 @@ func TestParseEntryWithHTMLTitle(t *testing.T) {
t.Fatal(err)
}
- if feed.Entries[0].Title != "Test Test" {
+ if feed.Entries[0].Title != "Code Test" {
t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
}
- if feed.Entries[1].Title != "Test “Test”" {
+ if feed.Entries[1].Title != "Test with “unicode quote”" {
t.Errorf("Incorrect entry title, got: %q", feed.Entries[1].Title)
}
@@ -502,8 +486,8 @@ func TestParseEntryWithXHTMLTitle(t *testing.T) {
t.Fatal(err)
}
- if feed.Entries[0].Title != `This is XHTML content.` {
- t.Errorf("Incorrect entry title, got: %q", feed.Entries[1].Title)
+ if feed.Entries[0].Title != `This is XHTML content.` {
+ t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
}
}
@@ -608,7 +592,7 @@ func TestParseEntryWithDoubleEncodedEntitiesTitle(t *testing.T) {
t.Fatal(err)
}
- if feed.Entries[0].Title != `'AT&T'` {
+ if feed.Entries[0].Title != `'AT&T'` {
t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
}
}
@@ -644,31 +628,21 @@ func TestParseEntryWithHTMLSummary(t *testing.T) {
Example Feed
-
- Example
+ Example 1
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 2003-12-13T18:30:02Z
- <code>std::unique_ptr<S></code>
+ <code>std::unique_ptr<S> myvar;</code>
-
- Example
+ Example 2
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 2003-12-13T18:30:02Z
- <code>std::unique_ptr<S></code>
+ <code>std::unique_ptr<S> myvar;</code>
-
- Example
+ Example 3
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 2003-12-13T18:30:02Z
- std::unique_ptr<S>]]>
+ std::unique_ptr<S> myvar;]]>
-
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
@@ -676,7 +650,11 @@ func TestParseEntryWithHTMLSummary(t *testing.T) {
t.Fatal(err)
}
- expected := `std::unique_ptr<S>`
+ if len(feed.Entries) != 3 {
+ t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ expected := `std::unique_ptr<S> myvar;`
for i := range 3 {
if feed.Entries[i].Content != expected {
t.Errorf("Incorrect content for entry #%d, got: %q", i, feed.Entries[i].Content)
@@ -728,7 +706,7 @@ func TestParseEntryWithTextSummary(t *testing.T) {
t.Fatal(err)
}
- expected := `AT&T <S>`
+ expected := `AT&T `
for i := range 4 {
if feed.Entries[i].Content != expected {
t.Errorf("Incorrect content for entry #%d, got: %q", i, feed.Entries[i].Content)
@@ -747,7 +725,7 @@ func TestParseEntryWithTextContent(t *testing.T) {
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a2003-12-13T18:30:02Z
- AT&T <S>
+ AT&T <strong>Strong Element</strong>
@@ -755,7 +733,7 @@ func TestParseEntryWithTextContent(t *testing.T) {
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a2003-12-13T18:30:02Z
- AT&T <S>
+ AT&T <strong>Strong Element</strong>
@@ -763,7 +741,7 @@ func TestParseEntryWithTextContent(t *testing.T) {
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a2003-12-13T18:30:02Z
- AT&T <S>
+ AT&T <strong>Strong Element</strong>
@@ -771,7 +749,7 @@ func TestParseEntryWithTextContent(t *testing.T) {
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a2003-12-13T18:30:02Z
- ]]>
+ Strong Element]]>`
@@ -781,10 +759,10 @@ func TestParseEntryWithTextContent(t *testing.T) {
t.Fatal(err)
}
- expected := `AT&T <S>`
+ expected := `AT&T Strong Element`
for i := range 4 {
if feed.Entries[i].Content != expected {
- t.Errorf("Incorrect content for entry #%d, got: %q", i, feed.Entries[i].Content)
+ t.Errorf("Incorrect content for entry #%d, got: %q instead of %q", i, feed.Entries[i].Content, expected)
}
}
}
@@ -925,7 +903,6 @@ func TestParseEntryWithMultipleAuthors(t *testing.T) {
Example Feed
-
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
@@ -938,7 +915,6 @@ func TestParseEntryWithMultipleAuthors(t *testing.T) {
Bob
-
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
@@ -951,7 +927,7 @@ func TestParseEntryWithMultipleAuthors(t *testing.T) {
}
}
-func TestParseEntryWithoutAuthor(t *testing.T) {
+func TestParseFeedWithEntryWithoutAuthor(t *testing.T) {
data := `
Example Feed
@@ -959,14 +935,12 @@ func TestParseEntryWithoutAuthor(t *testing.T) {
John Doe
-
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a2003-12-13T18:30:02ZSome text.
-
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
@@ -990,14 +964,15 @@ func TestParseFeedWithMultipleAuthors(t *testing.T) {
Bob
-
+
+ Bob
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a2003-12-13T18:30:02ZSome text.
-
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
@@ -1015,14 +990,12 @@ func TestParseFeedWithoutAuthor(t *testing.T) {
Example Feed
-
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a2003-12-13T18:30:02ZSome text.
-
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
@@ -1608,27 +1581,18 @@ func TestAbsoluteCommentsURL(t *testing.T) {
}
}
-func TestParseFeedWithCategories(t *testing.T) {
+func TestParseItemWithCategories(t *testing.T) {
data := `
Example Feed
-
- Alice
-
-
- Bob
-
-
-
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+
2003-12-13T18:30:02ZSome text.
-
+
-
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
@@ -1637,22 +1601,53 @@ func TestParseFeedWithCategories(t *testing.T) {
}
if len(feed.Entries[0].Tags) != 2 {
- t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
+ t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
}
- expected := "Tech"
+ expected := "Science"
result := feed.Entries[0].Tags[0]
if result != expected {
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
}
- expected = "Science"
+ expected = "ZZZZ"
result = feed.Entries[0].Tags[1]
if result != expected {
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
}
}
+func TestParseFeedWithCategories(t *testing.T) {
+ data := `
+
+ Example Feed
+
+
+
+
+
+
+ 2003-12-13T18:30:02Z
+ Some text.
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(feed.Entries[0].Tags) != 1 {
+ t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
+ }
+
+ expected := "Some Label"
+ result := feed.Entries[0].Tags[0]
+ if result != expected {
+ t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
+ }
+}
+
func TestParseFeedWithIconURL(t *testing.T) {
data := `
diff --git a/internal/reader/atom/atom_common.go b/internal/reader/atom/atom_common.go
index 4b283d44..debd46f1 100644
--- a/internal/reader/atom/atom_common.go
+++ b/internal/reader/atom/atom_common.go
@@ -3,77 +3,91 @@
package atom // import "miniflux.app/v2/internal/reader/atom"
-import "strings"
+import (
+ "strings"
+)
-type atomPerson struct {
- Name string `xml:"name"`
+// Specs: https://datatracker.ietf.org/doc/html/rfc4287#section-3.2
+type AtomPerson struct {
+ // The "atom:name" element's content conveys a human-readable name for the author.
+ // It MAY be the name of a corporation or other entity no individual authors can be named.
+ // Person constructs MUST contain exactly one "atom:name" element, whose content MUST be a string.
+ Name string `xml:"name"`
+
+ // The "atom:email" element's content conveys an e-mail address associated with the Person construct.
+ // Person constructs MAY contain an atom:email element, but MUST NOT contain more than one.
+ // Its content MUST be an e-mail address [RFC2822].
+ // Ordering of the element children of Person constructs MUST NOT be considered significant.
Email string `xml:"email"`
}
-func (a *atomPerson) String() string {
- name := ""
-
- switch {
- case a.Name != "":
- name = a.Name
- case a.Email != "":
- name = a.Email
+func (a *AtomPerson) PersonName() string {
+ name := strings.TrimSpace(a.Name)
+ if name != "" {
+ return name
}
- return strings.TrimSpace(name)
+ return strings.TrimSpace(a.Email)
}
-type atomAuthors []*atomPerson
+type AtomPersons []*AtomPerson
-func (a atomAuthors) String() string {
- var authors []string
+func (a AtomPersons) PersonNames() []string {
+ var names []string
+ authorNamesMap := make(map[string]bool)
for _, person := range a {
- authors = append(authors, person.String())
+ personName := person.PersonName()
+ if _, ok := authorNamesMap[personName]; !ok {
+ names = append(names, personName)
+ authorNamesMap[personName] = true
+ }
}
- return strings.Join(authors, ", ")
+ return names
}
-type atomLink struct {
- URL string `xml:"href,attr"`
+// Specs: https://datatracker.ietf.org/doc/html/rfc4287#section-4.2.7
+type AtomLink struct {
+ Href string `xml:"href,attr"`
Type string `xml:"type,attr"`
Rel string `xml:"rel,attr"`
Length string `xml:"length,attr"`
+ Title string `xml:"title,attr"`
}
-type atomLinks []*atomLink
+type AtomLinks []*AtomLink
-func (a atomLinks) originalLink() string {
+func (a AtomLinks) OriginalLink() string {
for _, link := range a {
if strings.EqualFold(link.Rel, "alternate") {
- return strings.TrimSpace(link.URL)
+ return strings.TrimSpace(link.Href)
}
if link.Rel == "" && (link.Type == "" || link.Type == "text/html") {
- return strings.TrimSpace(link.URL)
+ return strings.TrimSpace(link.Href)
}
}
return ""
}
-func (a atomLinks) firstLinkWithRelation(relation string) string {
+func (a AtomLinks) firstLinkWithRelation(relation string) string {
for _, link := range a {
if strings.EqualFold(link.Rel, relation) {
- return strings.TrimSpace(link.URL)
+ return strings.TrimSpace(link.Href)
}
}
return ""
}
-func (a atomLinks) firstLinkWithRelationAndType(relation string, contentTypes ...string) string {
+func (a AtomLinks) firstLinkWithRelationAndType(relation string, contentTypes ...string) string {
for _, link := range a {
if strings.EqualFold(link.Rel, relation) {
for _, contentType := range contentTypes {
if strings.EqualFold(link.Type, contentType) {
- return strings.TrimSpace(link.URL)
+ return strings.TrimSpace(link.Href)
}
}
}
@@ -81,3 +95,46 @@ func (a atomLinks) firstLinkWithRelationAndType(relation string, contentTypes ..
return ""
}
+
+// The "atom:category" element conveys information about a category
+// associated with an entry or feed. This specification assigns no
+// meaning to the content (if any) of this element.
+//
+// Specs: https://datatracker.ietf.org/doc/html/rfc4287#section-4.2.2
+type AtomCategory struct {
+ // The "term" attribute is a string that identifies the category to
+ // which the entry or feed belongs. Category elements MUST have a
+ // "term" attribute.
+ Term string `xml:"term,attr"`
+
+ // The "scheme" attribute is an IRI that identifies a categorization
+ // scheme. Category elements MAY have a "scheme" attribute.
+ Scheme string `xml:"scheme,attr"`
+
+ // The "label" attribute provides a human-readable label for display in
+ // end-user applications. The content of the "label" attribute is
+ // Language-Sensitive. Entities such as "&" and "<" represent
+ // their corresponding characters ("&" and "<", respectively), not
+ // markup. Category elements MAY have a "label" attribute.
+ Label string `xml:"label,attr"`
+}
+
+type AtomCategories []AtomCategory
+
+func (ac AtomCategories) CategoryNames() []string {
+ var categories []string
+
+ for _, category := range ac {
+ label := strings.TrimSpace(category.Label)
+ if label != "" {
+ categories = append(categories, label)
+ } else {
+ term := strings.TrimSpace(category.Term)
+ if term != "" {
+ categories = append(categories, term)
+ }
+ }
+ }
+
+ return categories
+}
diff --git a/internal/reader/atom/parser.go b/internal/reader/atom/parser.go
index 55c17ee5..f97985bc 100644
--- a/internal/reader/atom/parser.go
+++ b/internal/reader/atom/parser.go
@@ -11,22 +11,20 @@ import (
xml_decoder "miniflux.app/v2/internal/reader/xml"
)
-type atomFeed interface {
- Transform(baseURL string) *model.Feed
-}
-
// Parse returns a normalized feed struct from a Atom feed.
func Parse(baseURL string, r io.ReadSeeker, version string) (*model.Feed, error) {
- var rawFeed atomFeed
- if version == "0.3" {
- rawFeed = new(atom03Feed)
- } else {
- rawFeed = new(atom10Feed)
+ switch version {
+ case "0.3":
+ atomFeed := new(Atom03Feed)
+ if err := xml_decoder.NewXMLDecoder(r).Decode(atomFeed); err != nil {
+ return nil, fmt.Errorf("atom: unable to parse Atom 0.3 feed: %w", err)
+ }
+ return NewAtom03Adapter(atomFeed).BuildFeed(baseURL), nil
+ default:
+ atomFeed := new(Atom10Feed)
+ if err := xml_decoder.NewXMLDecoder(r).Decode(atomFeed); err != nil {
+ return nil, fmt.Errorf("atom: unable to parse Atom 1.0 feed: %w", err)
+ }
+ return NewAtom10Adapter(atomFeed).BuildFeed(baseURL), nil
}
-
- if err := xml_decoder.NewXMLDecoder(r).Decode(rawFeed); err != nil {
- return nil, fmt.Errorf("atom: unable to parse feed: %w", err)
- }
-
- return rawFeed.Transform(baseURL), nil
}
diff --git a/internal/reader/json/adapter.go b/internal/reader/json/adapter.go
index d62ff976..9e577d3e 100644
--- a/internal/reader/json/adapter.go
+++ b/internal/reader/json/adapter.go
@@ -98,7 +98,6 @@ func (j *JSONAdapter) BuildFeed(feedURL string) *model.Feed {
}
// Populate the entry date.
- entry.Date = time.Now()
for _, value := range []string{item.DatePublished, item.DateModified} {
value = strings.TrimSpace(value)
if value != "" {
@@ -114,6 +113,9 @@ func (j *JSONAdapter) BuildFeed(feedURL string) *model.Feed {
}
}
}
+ if entry.Date.IsZero() {
+ entry.Date = time.Now()
+ }
// Populate the entry author.
itemAuthors := append(item.Authors, j.jsonFeed.Authors...)
diff --git a/internal/reader/parser/parser_test.go b/internal/reader/parser/parser_test.go
index 447f73d3..9ab55a0c 100644
--- a/internal/reader/parser/parser_test.go
+++ b/internal/reader/parser/parser_test.go
@@ -85,7 +85,35 @@ func FuzzParse(f *testing.F) {
})
}
-func TestParseAtom(t *testing.T) {
+func TestParseAtom03Feed(t *testing.T) {
+ data := `
+
+ dive into mark
+
+ 2003-12-13T18:30:02Z
+ Mark Pilgrim
+
+ Atom 0.3 snapshot
+
+ tag:diveintomark.org,2003:3.2397
+ 2003-12-13T08:29:29-04:00
+ 2003-12-13T18:30:02Z
+ It's a test
+ HTML content