Fix incorrect parsing of Atom entry content of type HTML
This commit is contained in:
parent
49171c5e8c
commit
14888f1cb8
2 changed files with 154 additions and 23 deletions
|
@ -48,7 +48,7 @@ func (a *atom10Feed) Transform(baseURL string) *model.Feed {
|
|||
feed.SiteURL = siteURL
|
||||
}
|
||||
|
||||
feed.Title = a.Title.String()
|
||||
feed.Title = html.UnescapeString(a.Title.String())
|
||||
if feed.Title == "" {
|
||||
feed.Title = feed.SiteURL
|
||||
}
|
||||
|
@ -100,7 +100,7 @@ func (a *atom10Entry) Transform() *model.Entry {
|
|||
}
|
||||
|
||||
func (a *atom10Entry) entryTitle() string {
|
||||
return a.Title.String()
|
||||
return html.UnescapeString(a.Title.String())
|
||||
}
|
||||
|
||||
func (a *atom10Entry) entryContent() string {
|
||||
|
@ -221,20 +221,19 @@ func (a *atom10Entry) entryCommentsURL() string {
|
|||
}
|
||||
|
||||
type atom10Text struct {
|
||||
Type string `xml:"type,attr"`
|
||||
Data string `xml:",chardata"`
|
||||
XML string `xml:",innerxml"`
|
||||
Type string `xml:"type,attr"`
|
||||
CharData string `xml:",chardata"`
|
||||
InnerXML string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
func (a *atom10Text) String() string {
|
||||
content := ""
|
||||
var content string
|
||||
|
||||
switch {
|
||||
case a.Type == "xhtml":
|
||||
content = a.XML
|
||||
default:
|
||||
content = a.Data
|
||||
if a.Type == "xhtml" {
|
||||
content = a.InnerXML
|
||||
} else {
|
||||
content = a.CharData
|
||||
}
|
||||
|
||||
return html.UnescapeString(strings.TrimSpace(content))
|
||||
return strings.TrimSpace(content)
|
||||
}
|
||||
|
|
|
@ -244,7 +244,33 @@ func TestParseEntryTitleWithWhitespaces(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) {
|
||||
func TestParseEntryWithPlainTextTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
|
||||
<entry>
|
||||
<title type="text">AT&T bought by SBC!</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != `AT&T bought by SBC!` {
|
||||
t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithHTMLAndCDATATitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
|
@ -270,7 +296,7 @@ func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntryTitleWithHTML(t *testing.T) {
|
||||
func TestParseEntryWithHTMLTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
|
@ -296,7 +322,7 @@ func TestParseEntryTitleWithHTML(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntryTitleWithXHTML(t *testing.T) {
|
||||
func TestParseEntryWithXHTMLTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
|
@ -322,7 +348,7 @@ func TestParseEntryTitleWithXHTML(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntryTitleWithNumericCharacterReference(t *testing.T) {
|
||||
func TestParseEntryWithNumericCharacterReferenceTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
|
@ -348,7 +374,7 @@ func TestParseEntryTitleWithNumericCharacterReference(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntryTitleWithDoubleEncodedEntities(t *testing.T) {
|
||||
func TestParseEntryWithDoubleEncodedEntitiesTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
|
@ -374,14 +400,14 @@ func TestParseEntryTitleWithDoubleEncodedEntities(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntrySummaryWithXHTML(t *testing.T) {
|
||||
func TestParseEntryWithXHTMLSummary(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
|
||||
<entry>
|
||||
<title type="xhtml"><code>Test</code> Test</title>
|
||||
<title type="xhtml">Example</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
|
@ -400,14 +426,14 @@ func TestParseEntrySummaryWithXHTML(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntrySummaryWithHTML(t *testing.T) {
|
||||
func TestParseEntryWithHTMLAndCDATASummary(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
|
||||
<entry>
|
||||
<title type="html"><code>Test</code> Test</title>
|
||||
<title type="html">Example</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
|
@ -426,14 +452,14 @@ func TestParseEntrySummaryWithHTML(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntrySummaryWithPlainText(t *testing.T) {
|
||||
func TestParseEntryWithPlainTextAndCDATASummary(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
|
||||
<entry>
|
||||
<title type="html"><code>Test</code> Test</title>
|
||||
<title type="html">Example</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
|
@ -452,6 +478,112 @@ func TestParseEntrySummaryWithPlainText(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithTextAndCDATAContent(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
|
||||
<entry>
|
||||
<title type="html">Example</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<content><![CDATA[AT&T bought by SBC!]]></content>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != "AT&T bought by SBC!" {
|
||||
t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithTextContent(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
|
||||
<entry>
|
||||
<title type="html">Example</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<content>AT&T bought by SBC!</content>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != "AT&T bought by SBC!" {
|
||||
t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithHTMLContent(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
|
||||
<entry>
|
||||
<title type="html">Example</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<content type="html">AT&amp;T bought <b>by SBC</b>!</content>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != "AT&T bought <b>by SBC</b>!" {
|
||||
t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithXHTMLContent(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
|
||||
<entry>
|
||||
<title type="html">Example</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<content type="xhtml">
|
||||
<div xmlns="http://www.w3.org/1999/xhtml">AT&T bought <b>by SBC</b>!</div>
|
||||
</content>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != `<div xmlns="http://www.w3.org/1999/xhtml">AT&T bought <b>by SBC</b>!</div>` {
|
||||
t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithAuthorName(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
|
Loading…
Reference in a new issue