Fix incorrect parsing of Atom entry content of type HTML
This commit is contained in:
parent
49171c5e8c
commit
14888f1cb8
2 changed files with 154 additions and 23 deletions
|
@ -48,7 +48,7 @@ func (a *atom10Feed) Transform(baseURL string) *model.Feed {
|
||||||
feed.SiteURL = siteURL
|
feed.SiteURL = siteURL
|
||||||
}
|
}
|
||||||
|
|
||||||
feed.Title = a.Title.String()
|
feed.Title = html.UnescapeString(a.Title.String())
|
||||||
if feed.Title == "" {
|
if feed.Title == "" {
|
||||||
feed.Title = feed.SiteURL
|
feed.Title = feed.SiteURL
|
||||||
}
|
}
|
||||||
|
@ -100,7 +100,7 @@ func (a *atom10Entry) Transform() *model.Entry {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *atom10Entry) entryTitle() string {
|
func (a *atom10Entry) entryTitle() string {
|
||||||
return a.Title.String()
|
return html.UnescapeString(a.Title.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *atom10Entry) entryContent() string {
|
func (a *atom10Entry) entryContent() string {
|
||||||
|
@ -221,20 +221,19 @@ func (a *atom10Entry) entryCommentsURL() string {
|
||||||
}
|
}
|
||||||
|
|
||||||
type atom10Text struct {
|
type atom10Text struct {
|
||||||
Type string `xml:"type,attr"`
|
Type string `xml:"type,attr"`
|
||||||
Data string `xml:",chardata"`
|
CharData string `xml:",chardata"`
|
||||||
XML string `xml:",innerxml"`
|
InnerXML string `xml:",innerxml"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *atom10Text) String() string {
|
func (a *atom10Text) String() string {
|
||||||
content := ""
|
var content string
|
||||||
|
|
||||||
switch {
|
if a.Type == "xhtml" {
|
||||||
case a.Type == "xhtml":
|
content = a.InnerXML
|
||||||
content = a.XML
|
} else {
|
||||||
default:
|
content = a.CharData
|
||||||
content = a.Data
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return html.UnescapeString(strings.TrimSpace(content))
|
return strings.TrimSpace(content)
|
||||||
}
|
}
|
||||||
|
|
|
@ -244,7 +244,33 @@ func TestParseEntryTitleWithWhitespaces(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) {
|
func TestParseEntryWithPlainTextTitle(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link href="http://example.org/"/>
|
||||||
|
|
||||||
|
<entry>
|
||||||
|
<title type="text">AT&T bought by SBC!</title>
|
||||||
|
<link href="http://example.org/2003/12/13/atom03"/>
|
||||||
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<summary>Some text.</summary>
|
||||||
|
</entry>
|
||||||
|
|
||||||
|
</feed>`
|
||||||
|
|
||||||
|
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Title != `AT&T bought by SBC!` {
|
||||||
|
t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseEntryWithHTMLAndCDATATitle(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
<title>Example Feed</title>
|
<title>Example Feed</title>
|
||||||
|
@ -270,7 +296,7 @@ func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseEntryTitleWithHTML(t *testing.T) {
|
func TestParseEntryWithHTMLTitle(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
<title>Example Feed</title>
|
<title>Example Feed</title>
|
||||||
|
@ -296,7 +322,7 @@ func TestParseEntryTitleWithHTML(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseEntryTitleWithXHTML(t *testing.T) {
|
func TestParseEntryWithXHTMLTitle(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
<title>Example Feed</title>
|
<title>Example Feed</title>
|
||||||
|
@ -322,7 +348,7 @@ func TestParseEntryTitleWithXHTML(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseEntryTitleWithNumericCharacterReference(t *testing.T) {
|
func TestParseEntryWithNumericCharacterReferenceTitle(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
<title>Example Feed</title>
|
<title>Example Feed</title>
|
||||||
|
@ -348,7 +374,7 @@ func TestParseEntryTitleWithNumericCharacterReference(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseEntryTitleWithDoubleEncodedEntities(t *testing.T) {
|
func TestParseEntryWithDoubleEncodedEntitiesTitle(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
<title>Example Feed</title>
|
<title>Example Feed</title>
|
||||||
|
@ -374,14 +400,14 @@ func TestParseEntryTitleWithDoubleEncodedEntities(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseEntrySummaryWithXHTML(t *testing.T) {
|
func TestParseEntryWithXHTMLSummary(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
<title>Example Feed</title>
|
<title>Example Feed</title>
|
||||||
<link href="http://example.org/"/>
|
<link href="http://example.org/"/>
|
||||||
|
|
||||||
<entry>
|
<entry>
|
||||||
<title type="xhtml"><code>Test</code> Test</title>
|
<title type="xhtml">Example</title>
|
||||||
<link href="http://example.org/2003/12/13/atom03"/>
|
<link href="http://example.org/2003/12/13/atom03"/>
|
||||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
<updated>2003-12-13T18:30:02Z</updated>
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
@ -400,14 +426,14 @@ func TestParseEntrySummaryWithXHTML(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseEntrySummaryWithHTML(t *testing.T) {
|
func TestParseEntryWithHTMLAndCDATASummary(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
<title>Example Feed</title>
|
<title>Example Feed</title>
|
||||||
<link href="http://example.org/"/>
|
<link href="http://example.org/"/>
|
||||||
|
|
||||||
<entry>
|
<entry>
|
||||||
<title type="html"><code>Test</code> Test</title>
|
<title type="html">Example</title>
|
||||||
<link href="http://example.org/2003/12/13/atom03"/>
|
<link href="http://example.org/2003/12/13/atom03"/>
|
||||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
<updated>2003-12-13T18:30:02Z</updated>
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
@ -426,14 +452,14 @@ func TestParseEntrySummaryWithHTML(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseEntrySummaryWithPlainText(t *testing.T) {
|
func TestParseEntryWithPlainTextAndCDATASummary(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
<title>Example Feed</title>
|
<title>Example Feed</title>
|
||||||
<link href="http://example.org/"/>
|
<link href="http://example.org/"/>
|
||||||
|
|
||||||
<entry>
|
<entry>
|
||||||
<title type="html"><code>Test</code> Test</title>
|
<title type="html">Example</title>
|
||||||
<link href="http://example.org/2003/12/13/atom03"/>
|
<link href="http://example.org/2003/12/13/atom03"/>
|
||||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
<updated>2003-12-13T18:30:02Z</updated>
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
@ -452,6 +478,112 @@ func TestParseEntrySummaryWithPlainText(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseEntryWithTextAndCDATAContent(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link href="http://example.org/"/>
|
||||||
|
|
||||||
|
<entry>
|
||||||
|
<title type="html">Example</title>
|
||||||
|
<link href="http://example.org/2003/12/13/atom03"/>
|
||||||
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<content><![CDATA[AT&T bought by SBC!]]></content>
|
||||||
|
</entry>
|
||||||
|
|
||||||
|
</feed>`
|
||||||
|
|
||||||
|
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Content != "AT&T bought by SBC!" {
|
||||||
|
t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseEntryWithTextContent(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link href="http://example.org/"/>
|
||||||
|
|
||||||
|
<entry>
|
||||||
|
<title type="html">Example</title>
|
||||||
|
<link href="http://example.org/2003/12/13/atom03"/>
|
||||||
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<content>AT&T bought by SBC!</content>
|
||||||
|
</entry>
|
||||||
|
|
||||||
|
</feed>`
|
||||||
|
|
||||||
|
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Content != "AT&T bought by SBC!" {
|
||||||
|
t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseEntryWithHTMLContent(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link href="http://example.org/"/>
|
||||||
|
|
||||||
|
<entry>
|
||||||
|
<title type="html">Example</title>
|
||||||
|
<link href="http://example.org/2003/12/13/atom03"/>
|
||||||
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<content type="html">AT&amp;T bought <b>by SBC</b>!</content>
|
||||||
|
</entry>
|
||||||
|
|
||||||
|
</feed>`
|
||||||
|
|
||||||
|
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Content != "AT&T bought <b>by SBC</b>!" {
|
||||||
|
t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseEntryWithXHTMLContent(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link href="http://example.org/"/>
|
||||||
|
|
||||||
|
<entry>
|
||||||
|
<title type="html">Example</title>
|
||||||
|
<link href="http://example.org/2003/12/13/atom03"/>
|
||||||
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<content type="xhtml">
|
||||||
|
<div xmlns="http://www.w3.org/1999/xhtml">AT&T bought <b>by SBC</b>!</div>
|
||||||
|
</content>
|
||||||
|
</entry>
|
||||||
|
|
||||||
|
</feed>`
|
||||||
|
|
||||||
|
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Content != `<div xmlns="http://www.w3.org/1999/xhtml">AT&T bought <b>by SBC</b>!</div>` {
|
||||||
|
t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseEntryWithAuthorName(t *testing.T) {
|
func TestParseEntryWithAuthorName(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
|
Loading…
Add table
Reference in a new issue