miniflux/reader/rdf/parser_test.go

406 lines
12 KiB
Go
Raw Normal View History

2017-11-21 03:34:11 +01:00
// Copyright 2017 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
2018-08-25 06:51:50 +02:00
package rdf // import "miniflux.app/reader/rdf"
2017-11-21 03:34:11 +01:00
import (
"bytes"
"strings"
"testing"
2017-11-21 04:25:30 +01:00
"time"
2017-11-21 03:34:11 +01:00
)
func TestParseRDFSample(t *testing.T) {
data := `
<?xml version="1.0"?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://purl.org/rss/1.0/"
>
<channel rdf:about="http://www.xml.com/xml/news.rss">
<title>XML.com</title>
<link>http://xml.com/pub</link>
<description>
XML.com features a rich mix of information and services
for the XML community.
</description>
<image rdf:resource="http://xml.com/universal/images/xml_tiny.gif" />
<items>
<rdf:Seq>
<rdf:li resource="http://xml.com/pub/2000/08/09/xslt/xslt.html" />
<rdf:li resource="http://xml.com/pub/2000/08/09/rdfdb/index.html" />
</rdf:Seq>
</items>
<textinput rdf:resource="http://search.xml.com" />
</channel>
<image rdf:about="http://xml.com/universal/images/xml_tiny.gif">
<title>XML.com</title>
<link>http://www.xml.com</link>
<url>http://xml.com/universal/images/xml_tiny.gif</url>
</image>
<item rdf:about="http://xml.com/pub/2000/08/09/xslt/xslt.html">
<title>Processing Inclusions with XSLT</title>
<link>http://xml.com/pub/2000/08/09/xslt/xslt.html</link>
<description>
Processing document inclusions with general XML tools can be
problematic. This article proposes a way of preserving inclusion
information through SAX-based processing.
</description>
</item>
<item rdf:about="http://xml.com/pub/2000/08/09/rdfdb/index.html">
<title>Putting RDF to Work</title>
<link>http://xml.com/pub/2000/08/09/rdfdb/index.html</link>
<description>
Tool and API support for the Resource Description Framework
is slowly coming of age. Edd Dumbill takes a look at RDFDB,
one of the most exciting new RDF toolkits.
</description>
</item>
<textinput rdf:about="http://search.xml.com">
<title>Search XML.com</title>
<description>Search XML.com's XML collection</description>
<name>s</name>
<link>http://search.xml.com</link>
</textinput>
</rdf:RDF>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
2017-11-21 03:34:11 +01:00
}
if feed.Title != "XML.com" {
t.Errorf("Incorrect title, got: %s", feed.Title)
}
if feed.FeedURL != "" {
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
}
if feed.SiteURL != "http://xml.com/pub" {
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
}
if len(feed.Entries) != 2 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if feed.Entries[1].Hash != "8aaeee5d3ab50351422fbded41078ee88c73bf1441085b16a8c09fd90a7db321" {
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
}
if feed.Entries[1].URL != "http://xml.com/pub/2000/08/09/rdfdb/index.html" {
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
}
if feed.Entries[1].Title != "Putting RDF to Work" {
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
}
if strings.HasSuffix(feed.Entries[1].Content, "Tool and API support") {
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
}
2017-11-21 04:25:30 +01:00
if feed.Entries[1].Date.Year() != time.Now().Year() {
t.Errorf("Entry date should not be empty")
}
2017-11-21 03:34:11 +01:00
}
func TestParseRDFSampleWithDublinCore(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
xmlns:co="http://purl.org/rss/1.0/modules/company/"
xmlns:ti="http://purl.org/rss/1.0/modules/textinput/"
xmlns="http://purl.org/rss/1.0/"
>
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
<title>Meerkat</title>
<link>http://meerkat.oreillynet.com</link>
<description>Meerkat: An Open Wire Service</description>
<dc:publisher>The O'Reilly Network</dc:publisher>
<dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
<dc:rights>Copyright &#169; 2000 O'Reilly &amp; Associates, Inc.</dc:rights>
<dc:date>2000-01-01T12:00+00:00</dc:date>
<sy:updatePeriod>hourly</sy:updatePeriod>
<sy:updateFrequency>2</sy:updateFrequency>
<sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
<image rdf:resource="http://meerkat.oreillynet.com/icons/meerkat-powered.jpg" />
<items>
<rdf:Seq>
<rdf:li resource="http://c.moreover.com/click/here.pl?r123" />
</rdf:Seq>
</items>
<textinput rdf:resource="http://meerkat.oreillynet.com" />
</channel>
<image rdf:about="http://meerkat.oreillynet.com/icons/meerkat-powered.jpg">
<title>Meerkat Powered!</title>
<url>http://meerkat.oreillynet.com/icons/meerkat-powered.jpg</url>
<link>http://meerkat.oreillynet.com</link>
</image>
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
<title>XML: A Disruptive Technology</title>
<link>http://c.moreover.com/click/here.pl?r123</link>
<dc:description>
XML is placing increasingly heavy loads on the existing technical
infrastructure of the Internet.
</dc:description>
<dc:publisher>The O'Reilly Network</dc:publisher>
<dc:creator>Simon St.Laurent (mailto:simonstl@simonstl.com)</dc:creator>
<dc:rights>Copyright &#169; 2000 O'Reilly &amp; Associates, Inc.</dc:rights>
<dc:subject>XML</dc:subject>
<co:name>XML.com</co:name>
<co:market>NASDAQ</co:market>
<co:symbol>XML</co:symbol>
</item>
<textinput rdf:about="http://meerkat.oreillynet.com">
<title>Search Meerkat</title>
<description>Search Meerkat's RSS Database...</description>
<name>s</name>
<link>http://meerkat.oreillynet.com/</link>
<ti:function>search</ti:function>
<ti:inputType>regex</ti:inputType>
</textinput>
</rdf:RDF>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
2017-11-21 03:34:11 +01:00
}
if feed.Title != "Meerkat" {
t.Errorf("Incorrect title, got: %s", feed.Title)
}
if feed.FeedURL != "" {
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
}
if feed.SiteURL != "http://meerkat.oreillynet.com" {
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if feed.Entries[0].Hash != "fa4ef7c300b175ca66f92f226b5dba5caa2a9619f031101bf56e5b884b02cd97" {
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
}
if feed.Entries[0].URL != "http://c.moreover.com/click/here.pl?r123" {
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
}
if feed.Entries[0].Title != "XML: A Disruptive Technology" {
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
}
if strings.HasSuffix(feed.Entries[0].Content, "XML is placing increasingly") {
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
}
if feed.Entries[0].Author != "Simon St.Laurent (mailto:simonstl@simonstl.com)" {
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
}
}
func TestParseItemWithOnlyFeedAuthor(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns="http://purl.org/rss/1.0/"
>
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
<title>Meerkat</title>
<link>http://meerkat.oreillynet.com</link>
<dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
</channel>
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
<title>XML: A Disruptive Technology</title>
<link>http://c.moreover.com/click/here.pl?r123</link>
<dc:description>
XML is placing increasingly heavy loads on the existing technical
infrastructure of the Internet.
</dc:description>
</item>
</rdf:RDF>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
2017-11-21 03:34:11 +01:00
}
if feed.Entries[0].Author != "Rael Dornfest (mailto:rael@oreilly.com)" {
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
}
}
2017-12-14 05:16:15 +01:00
func TestParseItemRelativeURL(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
<channel>
<title>Example</title>
<link>http://example.org</link>
</channel>
<item>
<title>Title</title>
<description>Test</description>
<link>something.html</link>
</item>
</rdf:RDF>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
2017-12-14 05:16:15 +01:00
}
if feed.Entries[0].URL != "http://example.org/something.html" {
t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL)
}
}
2017-11-21 03:34:11 +01:00
func TestParseItemWithoutLink(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://purl.org/rss/1.0/"
>
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
<title>Meerkat</title>
<link>http://meerkat.oreillynet.com</link>
</channel>
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
<title>Title</title>
<description>Test</description>
</item>
</rdf:RDF>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
2017-11-21 03:34:11 +01:00
}
if feed.Entries[0].Hash != "37f5223ebd58639aa62a49afbb61df960efb7dc5db5181dfb3cedd9a49ad34c6" {
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
}
if feed.Entries[0].URL != "http://meerkat.oreillynet.com" {
t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL)
}
}
func TestParseItemWithDublicCoreDate(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
<channel>
<title>Example</title>
<link>http://example.org</link>
</channel>
<item>
<title>Title</title>
<description>Test</description>
<link>http://example.org/test.html</link>
<dc:creator>Tester</dc:creator>
<dc:date>2018-04-10T05:00:00+00:00</dc:date>
</item>
</rdf:RDF>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
expectedDate := time.Date(2018, time.April, 10, 5, 0, 0, 0, time.UTC)
if !feed.Entries[0].Date.Equal(expectedDate) {
t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
}
}
func TestParseItemWithoutDate(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
<channel>
<title>Example</title>
<link>http://example.org</link>
</channel>
<item>
<title>Title</title>
<description>Test</description>
<link>http://example.org/test.html</link>
</item>
</rdf:RDF>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
expectedDate := time.Now().In(time.Local)
diff := expectedDate.Sub(feed.Entries[0].Date)
if diff > time.Second {
t.Errorf("Incorrect entry date, got: %v", diff)
}
}
2017-11-21 03:34:11 +01:00
func TestParseInvalidXml(t *testing.T) {
data := `garbage`
_, err := Parse(bytes.NewBufferString(data))
if err == nil {
t.Fatal("Parse should returns an error")
}
}
func TestParseFeedWithHTMLEntity(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
<channel>
<title>Example &nbsp; Feed</title>
<link>http://example.org</link>
</channel>
</rdf:RDF>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if feed.Title != "Example \u00a0 Feed" {
t.Errorf(`Incorrect title, got: %q`, feed.Title)
2017-11-21 03:34:11 +01:00
}
}