Handle RSS feed title with encoded Unicode entities

This commit is contained in:
Frédéric Guillot 2021-04-30 22:49:17 -07:00 committed by fguillot
parent 1c9f000576
commit 5b8eb4735c
2 changed files with 20 additions and 1 deletions

View file

@ -998,6 +998,25 @@ func TestParseFeedTitleWithHTMLEntity(t *testing.T) {
}
}
func TestParseFeedTitleWithUnicodeEntityAndCdata(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
<channel>
<link>https://example.org/</link>
<title><![CDATA[Jenny&#8217;s Newsletter]]></title>
</channel>
</rss>`
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if feed.Title != `Jennys Newsletter` {
t.Errorf(`Incorrect title, got: %q`, feed.Title)
}
}
func TestParseItemTitleWithHTMLEntity(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">

View file

@ -53,7 +53,7 @@ func (r *rssFeed) Transform(baseURL string) *model.Feed {
feed.FeedURL = feedURL
}
feed.Title = strings.TrimSpace(r.Title)
feed.Title = html.UnescapeString(strings.TrimSpace(r.Title))
if feed.Title == "" {
feed.Title = feed.SiteURL
}