diff --git a/internal/reader/dublincore/dublincore.go b/internal/reader/dublincore/dublincore.go index e2e2607c..fd4b4911 100644 --- a/internal/reader/dublincore/dublincore.go +++ b/internal/reader/dublincore/dublincore.go @@ -20,6 +20,7 @@ func (feed *DublinCoreFeedElement) GetSanitizedCreator() string { // DublinCoreItemElement represents Dublin Core entry XML elements. type DublinCoreItemElement struct { + DublinCoreTitle string `xml:"http://purl.org/dc/elements/1.1/ title"` DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"` DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"` DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"` diff --git a/internal/reader/rdf/parser_test.go b/internal/reader/rdf/parser_test.go index 67b8c569..12b4f784 100644 --- a/internal/reader/rdf/parser_test.go +++ b/internal/reader/rdf/parser_test.go @@ -406,7 +406,7 @@ func TestParseItemWithoutDate(t *testing.T) { func TestParseItemWithEncodedHTMLTitle(t *testing.T) { data := ` - + Example http://example.org @@ -425,7 +425,7 @@ func TestParseItemWithEncodedHTMLTitle(t *testing.T) { } if feed.Entries[0].Title != `AT&T` { - t.Errorf("Incorrect entry title, got: %v", feed.Entries[0].Title) + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) } } @@ -502,7 +502,7 @@ func TestParseFeedWithURLWrappedInSpaces(t *testing.T) { <![CDATA[ - Microscale Collagen and Fibroblast Interactions Enhance Primary Human Hepatocyte Functions in 3-Dimensional Models + Microscale Collagen and Fibroblast Interactions Enhance Primary Human Hepatocyte Functions in 3-Dimensional Models ]]> @@ -568,7 +568,7 @@ func TestParseRDFWithContentEncoded(t *testing.T) { expected := `

Test

` result := feed.Entries[0].Content if result != expected { - t.Errorf(`Unexpected entry URL, got %q instead of %q`, result, expected) + t.Errorf(`Unexpected entry content, got %q instead of %q`, result, expected) } } @@ -601,6 +601,105 @@ func TestParseRDFWithEncodedHTMLDescription(t *testing.T) { expected := `AT&T ` result := feed.Entries[0].Content if result != expected { - t.Errorf(`Unexpected entry URL, got %v instead of %v`, result, expected) + t.Errorf(`Unexpected entry content, got %v instead of %v`, result, expected) + } +} + +func TestParseRDFItemWithDuplicateTitleElement(t *testing.T) { + data := ` + + + Example Feed + http://example.org/ + + + Item Title + + http://example.org/ + Test + + ` + + feed, err := Parse("http://example.org/", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries)) + } + + expected := `Item Title` + result := feed.Entries[0].Title + if result != expected { + t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected) + } +} + +func TestParseRDFItemWithDublinCoreTitleElement(t *testing.T) { + data := ` + + + Example Feed + http://example.org/ + + + Dublin Core Title + http://example.org/ + Test + + ` + + feed, err := Parse("http://example.org/", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries)) + } + + expected := `Dublin Core Title` + result := feed.Entries[0].Title + if result != expected { + t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected) + } +} + +func TestParseRDFItemWitEmptyTitleElement(t *testing.T) { + data := ` + + + Example Feed + http://example.org/ + + + + http://example.org/item + Test + + ` + + feed, err := Parse("http://example.org/", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries)) + } + + expected := `http://example.org/item` + result := feed.Entries[0].Title + if result != expected { + t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected) } } diff --git a/internal/reader/rdf/rdf.go b/internal/reader/rdf/rdf.go index 5a123a2f..8ce454d7 100644 --- a/internal/reader/rdf/rdf.go +++ b/internal/reader/rdf/rdf.go @@ -58,7 +58,7 @@ func (r *rdfFeed) Transform(baseURL string) *model.Feed { } type rdfItem struct { - Title string `xml:"title"` + Title string `xml:"http://purl.org/rss/1.0/ title"` Link string `xml:"link"` Description string `xml:"description"` dublincore.DublinCoreItemElement @@ -72,11 +72,21 @@ func (r *rdfItem) Transform() *model.Entry { entry.Content = r.entryContent() entry.Hash = r.entryHash() entry.Date = r.entryDate() + + if entry.Title == "" { + entry.Title = entry.URL + } return entry } func (r *rdfItem) entryTitle() string { - return html.UnescapeString(strings.TrimSpace(r.Title)) + for _, title := range []string{r.Title, r.DublinCoreTitle} { + title = strings.TrimSpace(title) + if title != "" { + return html.UnescapeString(title) + } + } + return "" } func (r *rdfItem) entryContent() string {