From dd4fb660c19fd1f6ce5716f9f5783eb7565fed2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Fri, 15 Mar 2024 16:39:32 -0700 Subject: [PATCH] Refactor Atom parser to use an adapter --- internal/reader/atom/atom_03.go | 216 +++++-------- internal/reader/atom/atom_03_adapter.go | 115 +++++++ internal/reader/atom/atom_03_test.go | 26 +- internal/reader/atom/atom_10.go | 407 ++++++++++-------------- internal/reader/atom/atom_10_adapter.go | 210 ++++++++++++ internal/reader/atom/atom_10_test.go | 145 ++++----- internal/reader/atom/atom_common.go | 111 +++++-- internal/reader/atom/parser.go | 28 +- internal/reader/json/adapter.go | 4 +- internal/reader/parser/parser_test.go | 30 +- internal/reader/rss/adapter.go | 3 +- 11 files changed, 795 insertions(+), 500 deletions(-) create mode 100644 internal/reader/atom/atom_03_adapter.go create mode 100644 internal/reader/atom/atom_10_adapter.go diff --git a/internal/reader/atom/atom_03.go b/internal/reader/atom/atom_03.go index edcb83dc..fb458e91 100644 --- a/internal/reader/atom/atom_03.go +++ b/internal/reader/atom/atom_03.go @@ -6,158 +6,114 @@ package atom // import "miniflux.app/v2/internal/reader/atom" import ( "encoding/base64" "html" - "log/slog" "strings" - "time" - - "miniflux.app/v2/internal/crypto" - "miniflux.app/v2/internal/model" - "miniflux.app/v2/internal/reader/date" - "miniflux.app/v2/internal/reader/sanitizer" - "miniflux.app/v2/internal/urllib" ) // Specs: http://web.archive.org/web/20060811235523/http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html -type atom03Feed struct { - ID string `xml:"id"` - Title atom03Text `xml:"title"` - Author atomPerson `xml:"author"` - Links atomLinks `xml:"link"` - Entries []atom03Entry `xml:"entry"` +type Atom03Feed struct { + Version string `xml:"version,attr"` + + // The "atom:id" element's content conveys a permanent, globally unique identifier for the feed. + // It MUST NOT change over time, even if the feed is relocated. atom:feed elements MAY contain an atom:id element, + // but MUST NOT contain more than one. The content of this element, when present, MUST be a URI. + ID string `xml:"http://purl.org/atom/ns# id"` + + // The "atom:title" element is a Content construct that conveys a human-readable title for the feed. + // atom:feed elements MUST contain exactly one atom:title element. + // If the feed describes a Web resource, its content SHOULD be the same as that resource's title. + Title Atom03Content `xml:"http://purl.org/atom/ns# title"` + + // The "atom:link" element is a Link construct that conveys a URI associated with the feed. + // The nature of the relationship as well as the link itself is determined by the element's content. + // atom:feed elements MUST contain at least one atom:link element with a rel attribute value of "alternate". + // atom:feed elements MUST NOT contain more than one atom:link element with a rel attribute value of "alternate" that has the same type attribute value. + // atom:feed elements MAY contain additional atom:link elements beyond those described above. + Links AtomLinks `xml:"http://purl.org/atom/ns# link"` + + // The "atom:author" element is a Person construct that indicates the default author of the feed. + // atom:feed elements MUST contain exactly one atom:author element, + // UNLESS all of the atom:feed element's child atom:entry elements contain an atom:author element. + // atom:feed elements MUST NOT contain more than one atom:author element. + Author AtomPerson `xml:"http://purl.org/atom/ns# author"` + + // The "atom:entry" element's represents an individual entry that is contained by the feed. + // atom:feed elements MAY contain one or more atom:entry elements. + Entries []Atom03Entry `xml:"http://purl.org/atom/ns# entry"` } -func (a *atom03Feed) Transform(baseURL string) *model.Feed { - var err error +type Atom03Entry struct { + // The "atom:id" element's content conveys a permanent, globally unique identifier for the entry. + // It MUST NOT change over time, even if other representations of the entry (such as a web representation pointed to by the entry's atom:link element) are relocated. + // If the same entry is syndicated in two atom:feeds published by the same entity, the entry's atom:id MUST be the same in both feeds. + ID string `xml:"id"` - feed := new(model.Feed) + // The "atom:title" element is a Content construct that conveys a human-readable title for the entry. + // atom:entry elements MUST have exactly one "atom:title" element. + // If an entry describes a Web resource, its content SHOULD be the same as that resource's title. + Title Atom03Content `xml:"title"` - feedURL := a.Links.firstLinkWithRelation("self") - feed.FeedURL, err = urllib.AbsoluteURL(baseURL, feedURL) - if err != nil { - feed.FeedURL = feedURL - } + // The "atom:modified" element is a Date construct that indicates the time that the entry was last modified. + // atom:entry elements MUST contain an atom:modified element, but MUST NOT contain more than one. + // The content of an atom:modified element MUST have a time zone whose value SHOULD be "UTC". + Modified string `xml:"modified"` - siteURL := a.Links.originalLink() - feed.SiteURL, err = urllib.AbsoluteURL(baseURL, siteURL) - if err != nil { - feed.SiteURL = siteURL - } + // The "atom:issued" element is a Date construct that indicates the time that the entry was issued. + // atom:entry elements MUST contain an atom:issued element, but MUST NOT contain more than one. + // The content of an atom:issued element MAY omit a time zone. + Issued string `xml:"issued"` - feed.Title = a.Title.String() - if feed.Title == "" { - feed.Title = feed.SiteURL - } + // The "atom:created" element is a Date construct that indicates the time that the entry was created. + // atom:entry elements MAY contain an atom:created element, but MUST NOT contain more than one. + // The content of an atom:created element MUST have a time zone whose value SHOULD be "UTC". + // If atom:created is not present, its content MUST considered to be the same as that of atom:modified. + Created string `xml:"created"` - for _, entry := range a.Entries { - item := entry.Transform() - entryURL, err := urllib.AbsoluteURL(feed.SiteURL, item.URL) - if err == nil { - item.URL = entryURL - } + // The "atom:link" element is a Link construct that conveys a URI associated with the entry. + // The nature of the relationship as well as the link itself is determined by the element's content. + // atom:entry elements MUST contain at least one atom:link element with a rel attribute value of "alternate". + // atom:entry elements MUST NOT contain more than one atom:link element with a rel attribute value of "alternate" that has the same type attribute value. + // atom:entry elements MAY contain additional atom:link elements beyond those described above. + Links AtomLinks `xml:"link"` - if item.Author == "" { - item.Author = a.Author.String() - } + // The "atom:summary" element is a Content construct that conveys a short summary, abstract or excerpt of the entry. + // atom:entry elements MAY contain an atom:created element, but MUST NOT contain more than one. + Summary Atom03Content `xml:"summary"` - if item.Title == "" { - item.Title = sanitizer.TruncateHTML(item.Content, 100) - } + // The "atom:content" element is a Content construct that conveys the content of the entry. + // atom:entry elements MAY contain one or more atom:content elements. + Content Atom03Content `xml:"content"` - if item.Title == "" { - item.Title = item.URL - } - - feed.Entries = append(feed.Entries, item) - } - - return feed + // The "atom:author" element is a Person construct that indicates the default author of the entry. + // atom:entry elements MUST contain exactly one atom:author element, + // UNLESS the atom:feed element containing them contains an atom:author element itself. + // atom:entry elements MUST NOT contain more than one atom:author element. + Author AtomPerson `xml:"author"` } -type atom03Entry struct { - ID string `xml:"id"` - Title atom03Text `xml:"title"` - Modified string `xml:"modified"` - Issued string `xml:"issued"` - Created string `xml:"created"` - Links atomLinks `xml:"link"` - Summary atom03Text `xml:"summary"` - Content atom03Text `xml:"content"` - Author atomPerson `xml:"author"` -} +type Atom03Content struct { + // Content constructs MAY have a "type" attribute, whose value indicates the media type of the content. + // When present, this attribute's value MUST be a registered media type [RFC2045]. + // If not present, its value MUST be considered to be "text/plain". + Type string `xml:"type,attr"` -func (a *atom03Entry) Transform() *model.Entry { - entry := model.NewEntry() - entry.URL = a.Links.originalLink() - entry.Date = a.entryDate() - entry.Author = a.Author.String() - entry.Hash = a.entryHash() - entry.Content = a.entryContent() - entry.Title = a.entryTitle() - return entry -} + // Content constructs MAY have a "mode" attribute, whose value indicates the method used to encode the content. + // When present, this attribute's value MUST be listed below. + // If not present, its value MUST be considered to be "xml". + // + // "xml": A mode attribute with the value "xml" indicates that the element's content is inline xml (for example, namespace-qualified XHTML). + // + // "escaped": A mode attribute with the value "escaped" indicates that the element's content is an escaped string. + // Processors MUST unescape the element's content before considering it as content of the indicated media type. + // + // "base64": A mode attribute with the value "base64" indicates that the element's content is base64-encoded [RFC2045]. + // Processors MUST decode the element's content before considering it as content of the the indicated media type. + Mode string `xml:"mode,attr"` -func (a *atom03Entry) entryTitle() string { - return sanitizer.StripTags(a.Title.String()) -} - -func (a *atom03Entry) entryContent() string { - content := a.Content.String() - if content != "" { - return content - } - - summary := a.Summary.String() - if summary != "" { - return summary - } - - return "" -} - -func (a *atom03Entry) entryDate() time.Time { - dateText := "" - for _, value := range []string{a.Issued, a.Modified, a.Created} { - if value != "" { - dateText = value - break - } - } - - if dateText != "" { - result, err := date.Parse(dateText) - if err != nil { - slog.Debug("Unable to parse date from Atom 0.3 feed", - slog.String("date", dateText), - slog.String("id", a.ID), - slog.Any("error", err), - ) - return time.Now() - } - - return result - } - - return time.Now() -} - -func (a *atom03Entry) entryHash() string { - for _, value := range []string{a.ID, a.Links.originalLink()} { - if value != "" { - return crypto.Hash(value) - } - } - - return "" -} - -type atom03Text struct { - Type string `xml:"type,attr"` - Mode string `xml:"mode,attr"` CharData string `xml:",chardata"` InnerXML string `xml:",innerxml"` } -func (a *atom03Text) String() string { +func (a *Atom03Content) Content() string { content := "" switch { diff --git a/internal/reader/atom/atom_03_adapter.go b/internal/reader/atom/atom_03_adapter.go new file mode 100644 index 00000000..02d78ec8 --- /dev/null +++ b/internal/reader/atom/atom_03_adapter.go @@ -0,0 +1,115 @@ +// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package atom // import "miniflux.app/v2/internal/reader/atom" + +import ( + "log/slog" + "time" + + "miniflux.app/v2/internal/crypto" + "miniflux.app/v2/internal/model" + "miniflux.app/v2/internal/reader/date" + "miniflux.app/v2/internal/reader/sanitizer" + "miniflux.app/v2/internal/urllib" +) + +type Atom03Adapter struct { + atomFeed *Atom03Feed +} + +func NewAtom03Adapter(atomFeed *Atom03Feed) *Atom03Adapter { + return &Atom03Adapter{atomFeed} +} + +func (a *Atom03Adapter) BuildFeed(baseURL string) *model.Feed { + feed := new(model.Feed) + + // Populate the feed URL. + feedURL := a.atomFeed.Links.firstLinkWithRelation("self") + if feedURL != "" { + if absoluteFeedURL, err := urllib.AbsoluteURL(baseURL, feedURL); err == nil { + feed.FeedURL = absoluteFeedURL + } + } else { + feed.FeedURL = baseURL + } + + // Populate the site URL. + siteURL := a.atomFeed.Links.OriginalLink() + if siteURL != "" { + if absoluteSiteURL, err := urllib.AbsoluteURL(baseURL, siteURL); err == nil { + feed.SiteURL = absoluteSiteURL + } + } else { + feed.SiteURL = baseURL + } + + // Populate the feed title. + feed.Title = a.atomFeed.Title.Content() + if feed.Title == "" { + feed.Title = feed.SiteURL + } + + for _, atomEntry := range a.atomFeed.Entries { + entry := model.NewEntry() + + // Populate the entry URL. + entry.URL = atomEntry.Links.OriginalLink() + if entry.URL != "" { + if absoluteEntryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL); err == nil { + entry.URL = absoluteEntryURL + } + } + + // Populate the entry content. + entry.Content = atomEntry.Content.Content() + if entry.Content == "" { + entry.Content = atomEntry.Summary.Content() + } + + // Populate the entry title. + entry.Title = atomEntry.Title.Content() + if entry.Title == "" { + entry.Title = sanitizer.TruncateHTML(entry.Content, 100) + } + if entry.Title == "" { + entry.Title = entry.URL + } + + // Populate the entry author. + entry.Author = atomEntry.Author.PersonName() + if entry.Author == "" { + entry.Author = a.atomFeed.Author.PersonName() + } + + // Populate the entry date. + for _, value := range []string{atomEntry.Issued, atomEntry.Modified, atomEntry.Created} { + if parsedDate, err := date.Parse(value); err == nil { + entry.Date = parsedDate + break + } else { + slog.Debug("Unable to parse date from Atom 0.3 feed", + slog.String("date", value), + slog.String("id", atomEntry.ID), + slog.Any("error", err), + ) + } + } + if entry.Date.IsZero() { + entry.Date = time.Now() + } + + // Generate the entry hash. + for _, value := range []string{atomEntry.ID, atomEntry.Links.OriginalLink()} { + if value != "" { + entry.Hash = crypto.Hash(value) + break + } + } + + feed.Entries = append(feed.Entries, entry) + } + + return feed +} diff --git a/internal/reader/atom/atom_03_test.go b/internal/reader/atom/atom_03_test.go index 321c0d82..54662bc9 100644 --- a/internal/reader/atom/atom_03_test.go +++ b/internal/reader/atom/atom_03_test.go @@ -27,7 +27,7 @@ func TestParseAtom03(t *testing.T) { ` - feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)), "0.3") + feed, err := Parse("http://diveintomark.org/atom.xml", bytes.NewReader([]byte(data)), "0.3") if err != nil { t.Fatal(err) } @@ -36,7 +36,7 @@ func TestParseAtom03(t *testing.T) { t.Errorf("Incorrect title, got: %s", feed.Title) } - if feed.FeedURL != "http://diveintomark.org/" { + if feed.FeedURL != "http://diveintomark.org/atom.xml" { t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) } @@ -74,6 +74,28 @@ func TestParseAtom03(t *testing.T) { } } +func TestParseAtom03WithoutSiteURL(t *testing.T) { + data := ` + + 2003-12-13T18:30:02Z + Mark Pilgrim + + Atom 0.3 snapshot + + tag:diveintomark.org,2003:3.2397 + + ` + + feed, err := Parse("http://diveintomark.org/atom.xml", bytes.NewReader([]byte(data)), "0.3") + if err != nil { + t.Fatal(err) + } + + if feed.SiteURL != "http://diveintomark.org/atom.xml" { + t.Errorf("Incorrect title, got: %s", feed.Title) + } +} + func TestParseAtom03WithoutFeedTitle(t *testing.T) { data := ` diff --git a/internal/reader/atom/atom_10.go b/internal/reader/atom/atom_10.go index 798a8748..201d00d1 100644 --- a/internal/reader/atom/atom_10.go +++ b/internal/reader/atom/atom_10.go @@ -6,286 +6,199 @@ package atom // import "miniflux.app/v2/internal/reader/atom" import ( "encoding/xml" "html" - "log/slog" - "strconv" "strings" - "time" - "miniflux.app/v2/internal/crypto" - "miniflux.app/v2/internal/model" - "miniflux.app/v2/internal/reader/date" "miniflux.app/v2/internal/reader/media" "miniflux.app/v2/internal/reader/sanitizer" - "miniflux.app/v2/internal/urllib" ) +// The "atom:feed" element is the document (i.e., top-level) element of +// an Atom Feed Document, acting as a container for metadata and data +// associated with the feed. Its element children consist of metadata +// elements followed by zero or more atom:entry child elements. +// // Specs: // https://tools.ietf.org/html/rfc4287 // https://validator.w3.org/feed/docs/atom.html -type atom10Feed struct { - XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"` - ID string `xml:"id"` - Title atom10Text `xml:"title"` - Authors atomAuthors `xml:"author"` - Icon string `xml:"icon"` - Links atomLinks `xml:"link"` - Entries []atom10Entry `xml:"entry"` +type Atom10Feed struct { + XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"` + + // The "atom:id" element conveys a permanent, universally unique + // identifier for an entry or feed. + // + // Its content MUST be an IRI, as defined by [RFC3987]. Note that the + // definition of "IRI" excludes relative references. Though the IRI + // might use a dereferencable scheme, Atom Processors MUST NOT assume it + // can be dereferenced. + // + // atom:feed elements MUST contain exactly one atom:id element. + ID string `xml:"http://www.w3.org/2005/Atom id"` + + // The "atom:title" element is a Text construct that conveys a human- + // readable title for an entry or feed. + // + // atom:feed elements MUST contain exactly one atom:title element. + Title Atom10Text `xml:"http://www.w3.org/2005/Atom title"` + + // The "atom:author" element is a Person construct that indicates the + // author of the entry or feed. + // + // atom:feed elements MUST contain one or more atom:author elements, + // unless all of the atom:feed element's child atom:entry elements + // contain at least one atom:author element. + Authors AtomPersons `xml:"http://www.w3.org/2005/Atom author"` + + // The "atom:icon" element's content is an IRI reference [RFC3987] that + // identifies an image that provides iconic visual identification for a + // feed. + // + // atom:feed elements MUST NOT contain more than one atom:icon element. + Icon string `xml:"http://www.w3.org/2005/Atom icon"` + + // The "atom:logo" element's content is an IRI reference [RFC3987] that + // identifies an image that provides visual identification for a feed. + // + // atom:feed elements MUST NOT contain more than one atom:logo element. + Logo string `xml:"http://www.w3.org/2005/Atom logo"` + + // atom:feed elements SHOULD contain one atom:link element with a rel + // attribute value of "self". This is the preferred URI for + // retrieving Atom Feed Documents representing this Atom feed. + // + // atom:feed elements MUST NOT contain more than one atom:link + // element with a rel attribute value of "alternate" that has the + // same combination of type and hreflang attribute values. + Links AtomLinks `xml:"http://www.w3.org/2005/Atom link"` + + // The "atom:category" element conveys information about a category + // associated with an entry or feed. This specification assigns no + // meaning to the content (if any) of this element. + // + // atom:feed elements MAY contain any number of atom:category + // elements. + Categories AtomCategories `xml:"http://www.w3.org/2005/Atom category"` + + Entries []Atom10Entry `xml:"http://www.w3.org/2005/Atom entry"` } -func (a *atom10Feed) Transform(baseURL string) *model.Feed { - var err error +type Atom10Entry struct { + // The "atom:id" element conveys a permanent, universally unique + // identifier for an entry or feed. + // + // Its content MUST be an IRI, as defined by [RFC3987]. Note that the + // definition of "IRI" excludes relative references. Though the IRI + // might use a dereferencable scheme, Atom Processors MUST NOT assume it + // can be dereferenced. + // + // atom:entry elements MUST contain exactly one atom:id element. + ID string `xml:"http://www.w3.org/2005/Atom id"` - feed := new(model.Feed) + // The "atom:title" element is a Text construct that conveys a human- + // readable title for an entry or feed. + // + // atom:entry elements MUST contain exactly one atom:title element. + Title Atom10Text `xml:"http://www.w3.org/2005/Atom title"` - feedURL := a.Links.firstLinkWithRelation("self") - feed.FeedURL, err = urllib.AbsoluteURL(baseURL, feedURL) - if err != nil { - feed.FeedURL = feedURL - } + // The "atom:published" element is a Date construct indicating an + // instant in time associated with an event early in the life cycle of + // the entry. + Published string `xml:"http://www.w3.org/2005/Atom published"` - siteURL := a.Links.originalLink() - feed.SiteURL, err = urllib.AbsoluteURL(baseURL, siteURL) - if err != nil { - feed.SiteURL = siteURL - } + // The "atom:updated" element is a Date construct indicating the most + // recent instant in time when an entry or feed was modified in a way + // the publisher considers significant. Therefore, not all + // modifications necessarily result in a changed atom:updated value. + // + // atom:entry elements MUST contain exactly one atom:updated element. + Updated string `xml:"http://www.w3.org/2005/Atom updated"` - feed.Title = html.UnescapeString(a.Title.String()) - if feed.Title == "" { - feed.Title = feed.SiteURL - } + // atom:entry elements MUST NOT contain more than one atom:link + // element with a rel attribute value of "alternate" that has the + // same combination of type and hreflang attribute values. + Links AtomLinks `xml:"http://www.w3.org/2005/Atom link"` - feed.IconURL = strings.TrimSpace(a.Icon) + // atom:entry elements MUST contain an atom:summary element in either + // of the following cases: + // * the atom:entry contains an atom:content that has a "src" + // attribute (and is thus empty). + // * the atom:entry contains content that is encoded in Base64; + // i.e., the "type" attribute of atom:content is a MIME media type + // [MIMEREG], but is not an XML media type [RFC3023], does not + // begin with "text/", and does not end with "/xml" or "+xml". + // + // atom:entry elements MUST NOT contain more than one atom:summary + // element. + Summary Atom10Text `xml:"http://www.w3.org/2005/Atom summary"` - for _, entry := range a.Entries { - item := entry.Transform() - entryURL, err := urllib.AbsoluteURL(feed.SiteURL, item.URL) - if err == nil { - item.URL = entryURL - } + // atom:entry elements MUST NOT contain more than one atom:content + // element. + Content Atom10Text `xml:"http://www.w3.org/2005/Atom content"` - if item.Author == "" { - item.Author = a.Authors.String() - } + // The "atom:author" element is a Person construct that indicates the + // author of the entry or feed. + // + // atom:entry elements MUST contain one or more atom:author elements + Authors AtomPersons `xml:"http://www.w3.org/2005/Atom author"` - if item.Title == "" { - item.Title = sanitizer.TruncateHTML(item.Content, 100) - } + // The "atom:category" element conveys information about a category + // associated with an entry or feed. This specification assigns no + // meaning to the content (if any) of this element. + // + // atom:entry elements MAY contain any number of atom:category + // elements. + Categories AtomCategories `xml:"http://www.w3.org/2005/Atom category"` - if item.Title == "" { - item.Title = item.URL - } - - feed.Entries = append(feed.Entries, item) - } - - return feed -} - -type atom10Entry struct { - ID string `xml:"id"` - Title atom10Text `xml:"title"` - Published string `xml:"published"` - Updated string `xml:"updated"` - Links atomLinks `xml:"link"` - Summary atom10Text `xml:"summary"` - Content atom10Text `xml:"http://www.w3.org/2005/Atom content"` - Authors atomAuthors `xml:"author"` - Categories []atom10Category `xml:"category"` media.MediaItemElement } -func (a *atom10Entry) Transform() *model.Entry { - entry := model.NewEntry() - entry.URL = a.Links.originalLink() - entry.Date = a.entryDate() - entry.Author = a.Authors.String() - entry.Hash = a.entryHash() - entry.Content = a.entryContent() - entry.Title = a.entryTitle() - entry.Enclosures = a.entryEnclosures() - entry.CommentsURL = a.entryCommentsURL() - entry.Tags = a.entryCategories() - return entry -} - -func (a *atom10Entry) entryTitle() string { - return html.UnescapeString(a.Title.String()) -} - -func (a *atom10Entry) entryContent() string { - content := a.Content.String() - if content != "" { - return content - } - - summary := a.Summary.String() - if summary != "" { - return summary - } - - mediaDescription := a.FirstMediaDescription() - if mediaDescription != "" { - return mediaDescription - } - - return "" -} - -// Note: The published date represents the original creation date for YouTube feeds. -// Example: -// 2019-01-26T08:02:28+00:00 -// 2019-01-29T07:27:27+00:00 -func (a *atom10Entry) entryDate() time.Time { - dateText := a.Published - if dateText == "" { - dateText = a.Updated - } - - if dateText != "" { - result, err := date.Parse(dateText) - if err != nil { - slog.Debug("Unable to parse date from Atom 0.3 feed", - slog.String("date", dateText), - slog.String("id", a.ID), - slog.Any("error", err), - ) - return time.Now() - } - - return result - } - - return time.Now() -} - -func (a *atom10Entry) entryHash() string { - for _, value := range []string{a.ID, a.Links.originalLink()} { - if value != "" { - return crypto.Hash(value) - } - } - - return "" -} - -func (a *atom10Entry) entryEnclosures() model.EnclosureList { - enclosures := make(model.EnclosureList, 0) - duplicates := make(map[string]bool) - - for _, mediaThumbnail := range a.AllMediaThumbnails() { - if _, found := duplicates[mediaThumbnail.URL]; !found { - duplicates[mediaThumbnail.URL] = true - enclosures = append(enclosures, &model.Enclosure{ - URL: mediaThumbnail.URL, - MimeType: mediaThumbnail.MimeType(), - Size: mediaThumbnail.Size(), - }) - } - } - - for _, link := range a.Links { - if strings.EqualFold(link.Rel, "enclosure") { - if link.URL == "" { - continue - } - - if _, found := duplicates[link.URL]; !found { - duplicates[link.URL] = true - length, _ := strconv.ParseInt(link.Length, 10, 0) - enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length}) - } - } - } - - for _, mediaContent := range a.AllMediaContents() { - if _, found := duplicates[mediaContent.URL]; !found { - duplicates[mediaContent.URL] = true - enclosures = append(enclosures, &model.Enclosure{ - URL: mediaContent.URL, - MimeType: mediaContent.MimeType(), - Size: mediaContent.Size(), - }) - } - } - - for _, mediaPeerLink := range a.AllMediaPeerLinks() { - if _, found := duplicates[mediaPeerLink.URL]; !found { - duplicates[mediaPeerLink.URL] = true - enclosures = append(enclosures, &model.Enclosure{ - URL: mediaPeerLink.URL, - MimeType: mediaPeerLink.MimeType(), - Size: mediaPeerLink.Size(), - }) - } - } - - return enclosures -} - -func (r *atom10Entry) entryCategories() []string { - categoryList := make([]string, 0) - - for _, atomCategory := range r.Categories { - if strings.TrimSpace(atomCategory.Label) != "" { - categoryList = append(categoryList, strings.TrimSpace(atomCategory.Label)) - } else { - categoryList = append(categoryList, strings.TrimSpace(atomCategory.Term)) - } - } - - return categoryList -} - -// See https://tools.ietf.org/html/rfc4685#section-4 -// If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml". -// We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS. -func (a *atom10Entry) entryCommentsURL() string { - commentsURL := a.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml") - if urllib.IsAbsoluteURL(commentsURL) { - return commentsURL - } - return "" -} - -type atom10Text struct { - Type string `xml:"type,attr"` - CharData string `xml:",chardata"` - InnerXML string `xml:",innerxml"` - XHTMLRootElement atomXHTMLRootElement `xml:"http://www.w3.org/1999/xhtml div"` -} - -type atom10Category struct { - Term string `xml:"term,attr"` - Label string `xml:"label,attr"` -} - +// A Text construct contains human-readable text, usually in small +// quantities. The content of Text constructs is Language-Sensitive. +// Specs: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1 // Text: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.1 // HTML: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.2 // XHTML: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.3 -func (a *atom10Text) String() string { +type Atom10Text struct { + Type string `xml:"type,attr"` + CharData string `xml:",chardata"` + InnerXML string `xml:",innerxml"` + XHTMLRootElement AtomXHTMLRootElement `xml:"http://www.w3.org/1999/xhtml div"` +} + +func (a *Atom10Text) Body() string { var content string - switch { - case a.Type == "", a.Type == "text", a.Type == "text/plain": - if strings.HasPrefix(strings.TrimSpace(a.InnerXML), ` 0 { + categories = slices.Compact(categories) + sort.Strings(categories) + entry.Tags = categories + } + + // Populate the commentsURL if defined. + // See https://tools.ietf.org/html/rfc4685#section-4 + // If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml". + // We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS. + commentsURL := atomEntry.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml") + if urllib.IsAbsoluteURL(commentsURL) { + entry.CommentsURL = commentsURL + } + + // Generate the entry hash. + for _, value := range []string{atomEntry.ID, atomEntry.Links.OriginalLink()} { + if value != "" { + entry.Hash = crypto.Hash(value) + break + } + } + + // Populate the entry enclosures. + uniqueEnclosuresMap := make(map[string]bool) + + for _, mediaThumbnail := range atomEntry.AllMediaThumbnails() { + if _, found := uniqueEnclosuresMap[mediaThumbnail.URL]; !found { + uniqueEnclosuresMap[mediaThumbnail.URL] = true + entry.Enclosures = append(entry.Enclosures, &model.Enclosure{ + URL: mediaThumbnail.URL, + MimeType: mediaThumbnail.MimeType(), + Size: mediaThumbnail.Size(), + }) + } + } + + for _, link := range atomEntry.Links { + if strings.EqualFold(link.Rel, "enclosure") { + if link.Href == "" { + continue + } + + if _, found := uniqueEnclosuresMap[link.Href]; !found { + uniqueEnclosuresMap[link.Href] = true + length, _ := strconv.ParseInt(link.Length, 10, 0) + entry.Enclosures = append(entry.Enclosures, &model.Enclosure{ + URL: link.Href, + MimeType: link.Type, + Size: length, + }) + } + } + } + + for _, mediaContent := range atomEntry.AllMediaContents() { + if _, found := uniqueEnclosuresMap[mediaContent.URL]; !found { + uniqueEnclosuresMap[mediaContent.URL] = true + entry.Enclosures = append(entry.Enclosures, &model.Enclosure{ + URL: mediaContent.URL, + MimeType: mediaContent.MimeType(), + Size: mediaContent.Size(), + }) + } + } + + for _, mediaPeerLink := range atomEntry.AllMediaPeerLinks() { + if _, found := uniqueEnclosuresMap[mediaPeerLink.URL]; !found { + uniqueEnclosuresMap[mediaPeerLink.URL] = true + entry.Enclosures = append(entry.Enclosures, &model.Enclosure{ + URL: mediaPeerLink.URL, + MimeType: mediaPeerLink.MimeType(), + Size: mediaPeerLink.Size(), + }) + } + } + + feed.Entries = append(feed.Entries, entry) + } + + return feed +} diff --git a/internal/reader/atom/atom_10_test.go b/internal/reader/atom/atom_10_test.go index f778e8e6..be6e9148 100644 --- a/internal/reader/atom/atom_10_test.go +++ b/internal/reader/atom/atom_10_test.go @@ -12,7 +12,6 @@ import ( func TestParseAtomSample(t *testing.T) { data := ` - Example Feed 2003-12-13T18:30:02Z @@ -20,7 +19,6 @@ func TestParseAtomSample(t *testing.T) { John Doe urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6 - Atom-Powered Robots Run Amok @@ -28,7 +26,6 @@ func TestParseAtomSample(t *testing.T) { 2003-12-13T18:30:02Z Some text. - ` feed, err := Parse("http://example.org/feed.xml", bytes.NewReader([]byte(data)), "10") @@ -420,7 +417,7 @@ func TestParseEntryWithPlainTextTitle(t *testing.T) { expected := `AT&T bought by SBC!` for i := range 2 { if feed.Entries[i].Title != expected { - t.Errorf("Incorrect title for entry #%d, got: %q", i, feed.Entries[i].Title) + t.Errorf("Incorrect title for entry #%d, got: %q instead of %q", i, feed.Entries[i].Title, expected) } } } @@ -430,33 +427,20 @@ func TestParseEntryWithHTMLTitle(t *testing.T) { Example Feed - - <code>Test</code> Test - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. + <code>Code</code> Test + - - <![CDATA[Test “Test”]]> - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. + <![CDATA[Test with “unicode quote”]]> + - <![CDATA[Entry title with space around CDATA]]> - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. + - ` feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10") @@ -464,11 +448,11 @@ func TestParseEntryWithHTMLTitle(t *testing.T) { t.Fatal(err) } - if feed.Entries[0].Title != "Test Test" { + if feed.Entries[0].Title != "Code Test" { t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) } - if feed.Entries[1].Title != "Test “Test”" { + if feed.Entries[1].Title != "Test with “unicode quote”" { t.Errorf("Incorrect entry title, got: %q", feed.Entries[1].Title) } @@ -502,8 +486,8 @@ func TestParseEntryWithXHTMLTitle(t *testing.T) { t.Fatal(err) } - if feed.Entries[0].Title != `This is XHTML content.` { - t.Errorf("Incorrect entry title, got: %q", feed.Entries[1].Title) + if feed.Entries[0].Title != `This is XHTML content.` { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) } } @@ -608,7 +592,7 @@ func TestParseEntryWithDoubleEncodedEntitiesTitle(t *testing.T) { t.Fatal(err) } - if feed.Entries[0].Title != `'AT&T'` { + if feed.Entries[0].Title != `'AT&T'` { t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) } } @@ -644,31 +628,21 @@ func TestParseEntryWithHTMLSummary(t *testing.T) { Example Feed - - Example + Example 1 - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - <code>std::unique_ptr&lt;S&gt;</code> + <code>std::unique_ptr&lt;S&gt; myvar;</code> - - Example + Example 2 - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - <code>std::unique_ptr&lt;S&gt;</code> + <code>std::unique_ptr&lt;S&gt; myvar;</code> - - Example + Example 3 - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - std::unique_ptr<S>]]> + std::unique_ptr<S> myvar;]]> - ` feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10") @@ -676,7 +650,11 @@ func TestParseEntryWithHTMLSummary(t *testing.T) { t.Fatal(err) } - expected := `std::unique_ptr<S>` + if len(feed.Entries) != 3 { + t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + expected := `std::unique_ptr<S> myvar;` for i := range 3 { if feed.Entries[i].Content != expected { t.Errorf("Incorrect content for entry #%d, got: %q", i, feed.Entries[i].Content) @@ -728,7 +706,7 @@ func TestParseEntryWithTextSummary(t *testing.T) { t.Fatal(err) } - expected := `AT&T <S>` + expected := `AT&T ` for i := range 4 { if feed.Entries[i].Content != expected { t.Errorf("Incorrect content for entry #%d, got: %q", i, feed.Entries[i].Content) @@ -747,7 +725,7 @@ func TestParseEntryWithTextContent(t *testing.T) { urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z - AT&T <S> + AT&T <strong>Strong Element</strong> @@ -755,7 +733,7 @@ func TestParseEntryWithTextContent(t *testing.T) { urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z - AT&T <S> + AT&T <strong>Strong Element</strong> @@ -763,7 +741,7 @@ func TestParseEntryWithTextContent(t *testing.T) { urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z - AT&T <S> + AT&T <strong>Strong Element</strong> @@ -771,7 +749,7 @@ func TestParseEntryWithTextContent(t *testing.T) { urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z - ]]> + Strong Element]]> ` @@ -781,10 +759,10 @@ func TestParseEntryWithTextContent(t *testing.T) { t.Fatal(err) } - expected := `AT&T <S>` + expected := `AT&T Strong Element` for i := range 4 { if feed.Entries[i].Content != expected { - t.Errorf("Incorrect content for entry #%d, got: %q", i, feed.Entries[i].Content) + t.Errorf("Incorrect content for entry #%d, got: %q instead of %q", i, feed.Entries[i].Content, expected) } } } @@ -925,7 +903,6 @@ func TestParseEntryWithMultipleAuthors(t *testing.T) { Example Feed - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a @@ -938,7 +915,6 @@ func TestParseEntryWithMultipleAuthors(t *testing.T) { Bob - ` feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10") @@ -951,7 +927,7 @@ func TestParseEntryWithMultipleAuthors(t *testing.T) { } } -func TestParseEntryWithoutAuthor(t *testing.T) { +func TestParseFeedWithEntryWithoutAuthor(t *testing.T) { data := ` Example Feed @@ -959,14 +935,12 @@ func TestParseEntryWithoutAuthor(t *testing.T) { John Doe - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z Some text. - ` feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10") @@ -990,14 +964,15 @@ func TestParseFeedWithMultipleAuthors(t *testing.T) { Bob - + + Bob + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z Some text. - ` feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10") @@ -1015,14 +990,12 @@ func TestParseFeedWithoutAuthor(t *testing.T) { Example Feed - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z Some text. - ` feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10") @@ -1608,27 +1581,18 @@ func TestAbsoluteCommentsURL(t *testing.T) { } } -func TestParseFeedWithCategories(t *testing.T) { +func TestParseItemWithCategories(t *testing.T) { data := ` Example Feed - - Alice - - - Bob - - - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z Some text. - + - ` feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10") @@ -1637,22 +1601,53 @@ func TestParseFeedWithCategories(t *testing.T) { } if len(feed.Entries[0].Tags) != 2 { - t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags)) + t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags)) } - expected := "Tech" + expected := "Science" result := feed.Entries[0].Tags[0] if result != expected { t.Errorf("Incorrect entry category, got %q instead of %q", result, expected) } - expected = "Science" + expected = "ZZZZ" result = feed.Entries[0].Tags[1] if result != expected { t.Errorf("Incorrect entry category, got %q instead of %q", result, expected) } } +func TestParseFeedWithCategories(t *testing.T) { + data := ` + + Example Feed + + + + + + + 2003-12-13T18:30:02Z + Some text. + + ` + + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10") + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries[0].Tags) != 1 { + t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags)) + } + + expected := "Some Label" + result := feed.Entries[0].Tags[0] + if result != expected { + t.Errorf("Incorrect entry category, got %q instead of %q", result, expected) + } +} + func TestParseFeedWithIconURL(t *testing.T) { data := ` diff --git a/internal/reader/atom/atom_common.go b/internal/reader/atom/atom_common.go index 4b283d44..debd46f1 100644 --- a/internal/reader/atom/atom_common.go +++ b/internal/reader/atom/atom_common.go @@ -3,77 +3,91 @@ package atom // import "miniflux.app/v2/internal/reader/atom" -import "strings" +import ( + "strings" +) -type atomPerson struct { - Name string `xml:"name"` +// Specs: https://datatracker.ietf.org/doc/html/rfc4287#section-3.2 +type AtomPerson struct { + // The "atom:name" element's content conveys a human-readable name for the author. + // It MAY be the name of a corporation or other entity no individual authors can be named. + // Person constructs MUST contain exactly one "atom:name" element, whose content MUST be a string. + Name string `xml:"name"` + + // The "atom:email" element's content conveys an e-mail address associated with the Person construct. + // Person constructs MAY contain an atom:email element, but MUST NOT contain more than one. + // Its content MUST be an e-mail address [RFC2822]. + // Ordering of the element children of Person constructs MUST NOT be considered significant. Email string `xml:"email"` } -func (a *atomPerson) String() string { - name := "" - - switch { - case a.Name != "": - name = a.Name - case a.Email != "": - name = a.Email +func (a *AtomPerson) PersonName() string { + name := strings.TrimSpace(a.Name) + if name != "" { + return name } - return strings.TrimSpace(name) + return strings.TrimSpace(a.Email) } -type atomAuthors []*atomPerson +type AtomPersons []*AtomPerson -func (a atomAuthors) String() string { - var authors []string +func (a AtomPersons) PersonNames() []string { + var names []string + authorNamesMap := make(map[string]bool) for _, person := range a { - authors = append(authors, person.String()) + personName := person.PersonName() + if _, ok := authorNamesMap[personName]; !ok { + names = append(names, personName) + authorNamesMap[personName] = true + } } - return strings.Join(authors, ", ") + return names } -type atomLink struct { - URL string `xml:"href,attr"` +// Specs: https://datatracker.ietf.org/doc/html/rfc4287#section-4.2.7 +type AtomLink struct { + Href string `xml:"href,attr"` Type string `xml:"type,attr"` Rel string `xml:"rel,attr"` Length string `xml:"length,attr"` + Title string `xml:"title,attr"` } -type atomLinks []*atomLink +type AtomLinks []*AtomLink -func (a atomLinks) originalLink() string { +func (a AtomLinks) OriginalLink() string { for _, link := range a { if strings.EqualFold(link.Rel, "alternate") { - return strings.TrimSpace(link.URL) + return strings.TrimSpace(link.Href) } if link.Rel == "" && (link.Type == "" || link.Type == "text/html") { - return strings.TrimSpace(link.URL) + return strings.TrimSpace(link.Href) } } return "" } -func (a atomLinks) firstLinkWithRelation(relation string) string { +func (a AtomLinks) firstLinkWithRelation(relation string) string { for _, link := range a { if strings.EqualFold(link.Rel, relation) { - return strings.TrimSpace(link.URL) + return strings.TrimSpace(link.Href) } } return "" } -func (a atomLinks) firstLinkWithRelationAndType(relation string, contentTypes ...string) string { +func (a AtomLinks) firstLinkWithRelationAndType(relation string, contentTypes ...string) string { for _, link := range a { if strings.EqualFold(link.Rel, relation) { for _, contentType := range contentTypes { if strings.EqualFold(link.Type, contentType) { - return strings.TrimSpace(link.URL) + return strings.TrimSpace(link.Href) } } } @@ -81,3 +95,46 @@ func (a atomLinks) firstLinkWithRelationAndType(relation string, contentTypes .. return "" } + +// The "atom:category" element conveys information about a category +// associated with an entry or feed. This specification assigns no +// meaning to the content (if any) of this element. +// +// Specs: https://datatracker.ietf.org/doc/html/rfc4287#section-4.2.2 +type AtomCategory struct { + // The "term" attribute is a string that identifies the category to + // which the entry or feed belongs. Category elements MUST have a + // "term" attribute. + Term string `xml:"term,attr"` + + // The "scheme" attribute is an IRI that identifies a categorization + // scheme. Category elements MAY have a "scheme" attribute. + Scheme string `xml:"scheme,attr"` + + // The "label" attribute provides a human-readable label for display in + // end-user applications. The content of the "label" attribute is + // Language-Sensitive. Entities such as "&" and "<" represent + // their corresponding characters ("&" and "<", respectively), not + // markup. Category elements MAY have a "label" attribute. + Label string `xml:"label,attr"` +} + +type AtomCategories []AtomCategory + +func (ac AtomCategories) CategoryNames() []string { + var categories []string + + for _, category := range ac { + label := strings.TrimSpace(category.Label) + if label != "" { + categories = append(categories, label) + } else { + term := strings.TrimSpace(category.Term) + if term != "" { + categories = append(categories, term) + } + } + } + + return categories +} diff --git a/internal/reader/atom/parser.go b/internal/reader/atom/parser.go index 55c17ee5..f97985bc 100644 --- a/internal/reader/atom/parser.go +++ b/internal/reader/atom/parser.go @@ -11,22 +11,20 @@ import ( xml_decoder "miniflux.app/v2/internal/reader/xml" ) -type atomFeed interface { - Transform(baseURL string) *model.Feed -} - // Parse returns a normalized feed struct from a Atom feed. func Parse(baseURL string, r io.ReadSeeker, version string) (*model.Feed, error) { - var rawFeed atomFeed - if version == "0.3" { - rawFeed = new(atom03Feed) - } else { - rawFeed = new(atom10Feed) + switch version { + case "0.3": + atomFeed := new(Atom03Feed) + if err := xml_decoder.NewXMLDecoder(r).Decode(atomFeed); err != nil { + return nil, fmt.Errorf("atom: unable to parse Atom 0.3 feed: %w", err) + } + return NewAtom03Adapter(atomFeed).BuildFeed(baseURL), nil + default: + atomFeed := new(Atom10Feed) + if err := xml_decoder.NewXMLDecoder(r).Decode(atomFeed); err != nil { + return nil, fmt.Errorf("atom: unable to parse Atom 1.0 feed: %w", err) + } + return NewAtom10Adapter(atomFeed).BuildFeed(baseURL), nil } - - if err := xml_decoder.NewXMLDecoder(r).Decode(rawFeed); err != nil { - return nil, fmt.Errorf("atom: unable to parse feed: %w", err) - } - - return rawFeed.Transform(baseURL), nil } diff --git a/internal/reader/json/adapter.go b/internal/reader/json/adapter.go index d62ff976..9e577d3e 100644 --- a/internal/reader/json/adapter.go +++ b/internal/reader/json/adapter.go @@ -98,7 +98,6 @@ func (j *JSONAdapter) BuildFeed(feedURL string) *model.Feed { } // Populate the entry date. - entry.Date = time.Now() for _, value := range []string{item.DatePublished, item.DateModified} { value = strings.TrimSpace(value) if value != "" { @@ -114,6 +113,9 @@ func (j *JSONAdapter) BuildFeed(feedURL string) *model.Feed { } } } + if entry.Date.IsZero() { + entry.Date = time.Now() + } // Populate the entry author. itemAuthors := append(item.Authors, j.jsonFeed.Authors...) diff --git a/internal/reader/parser/parser_test.go b/internal/reader/parser/parser_test.go index 447f73d3..9ab55a0c 100644 --- a/internal/reader/parser/parser_test.go +++ b/internal/reader/parser/parser_test.go @@ -85,7 +85,35 @@ func FuzzParse(f *testing.F) { }) } -func TestParseAtom(t *testing.T) { +func TestParseAtom03Feed(t *testing.T) { + data := ` + + dive into mark + + 2003-12-13T18:30:02Z + Mark Pilgrim + + Atom 0.3 snapshot + + tag:diveintomark.org,2003:3.2397 + 2003-12-13T08:29:29-04:00 + 2003-12-13T18:30:02Z + It's a test + HTML content

]]>
+
+
` + + feed, err := ParseFeed("https://example.org/", strings.NewReader(data)) + if err != nil { + t.Error(err) + } + + if feed.Title != "dive into mark" { + t.Errorf("Incorrect title, got: %s", feed.Title) + } +} + +func TestParseAtom10Feed(t *testing.T) { data := ` diff --git a/internal/reader/rss/adapter.go b/internal/reader/rss/adapter.go index fe1eed80..2909fc6b 100644 --- a/internal/reader/rss/adapter.go +++ b/internal/reader/rss/adapter.go @@ -69,7 +69,6 @@ func (r *RSSAdapter) BuildFeed(feedURL string) *model.Feed { for _, item := range r.rss.Channel.Items { entry := model.NewEntry() - entry.Author = findEntryAuthor(&item) entry.Date = findEntryDate(&item) entry.Content = findEntryContent(&item) entry.Enclosures = findEntryEnclosures(&item) @@ -91,11 +90,11 @@ func (r *RSSAdapter) BuildFeed(feedURL string) *model.Feed { if entry.Title == "" { entry.Title = sanitizer.TruncateHTML(entry.Content, 100) } - if entry.Title == "" { entry.Title = entry.URL } + entry.Author = findEntryAuthor(&item) if entry.Author == "" { entry.Author = findFeedAuthor(&r.rss.Channel) }