2023-06-19 23:42:47 +02:00
|
|
|
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0
|
2017-11-20 06:10:04 +01:00
|
|
|
|
2023-08-11 04:46:45 +02:00
|
|
|
package parser // import "miniflux.app/v2/internal/reader/parser"
|
2017-11-20 06:10:04 +01:00
|
|
|
|
|
|
|
import (
|
2023-10-23 01:07:06 +02:00
|
|
|
"strings"
|
2017-11-20 06:10:04 +01:00
|
|
|
"testing"
|
|
|
|
)
|
|
|
|
|
2024-02-26 13:44:42 +01:00
|
|
|
func FuzzParse(f *testing.F) {
|
|
|
|
f.Add("https://z.org", `<?xml version="1.0" encoding="utf-8"?>
|
|
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
|
|
<title>Example Feed</title>
|
|
|
|
<link href="http://z.org/"/>
|
|
|
|
<link href="/k"/>
|
|
|
|
<updated>2003-12-13T18:30:02Z</updated>
|
|
|
|
<author><name>John Doe</name></author>
|
|
|
|
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
|
|
|
<entry>
|
|
|
|
<title>a</title>
|
|
|
|
<link href="http://example.org/b"/>
|
|
|
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
|
|
|
<updated>2003-12-13T18:30:02Z</updated>
|
|
|
|
<summary>c</summary>
|
|
|
|
</entry>
|
|
|
|
</feed>`)
|
|
|
|
f.Add("https://z.org", `<?xml version="1.0"?>
|
|
|
|
<rss version="2.0">
|
|
|
|
<channel>
|
|
|
|
<title>a</title>
|
|
|
|
<link>http://z.org</link>
|
|
|
|
<item>
|
|
|
|
<title>a</title>
|
|
|
|
<link>http://z.org</link>
|
|
|
|
<description>d</description>
|
|
|
|
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
|
|
|
<guid>l</guid>
|
|
|
|
</item>
|
|
|
|
</channel>
|
|
|
|
</rss>`)
|
|
|
|
f.Add("https://z.org", `<?xml version="1.0" encoding="utf-8"?>
|
|
|
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
|
|
|
<channel>
|
|
|
|
<title>a</title>
|
|
|
|
<link>http://z.org/</link>
|
|
|
|
</channel>
|
|
|
|
<item>
|
|
|
|
<title>a</title>
|
|
|
|
<link>/</link>
|
|
|
|
<description>c</description>
|
|
|
|
</item>
|
|
|
|
</rdf:RDF>`)
|
|
|
|
f.Add("http://z.org", `{
|
|
|
|
"version": "http://jsonfeed.org/version/1",
|
|
|
|
"title": "a",
|
|
|
|
"home_page_url": "http://z.org/",
|
|
|
|
"feed_url": "http://z.org/a.json",
|
|
|
|
"items": [
|
|
|
|
{"id": "2","content_text": "a","url": "https://z.org/2"},
|
|
|
|
{"id": "1","content_html": "<a","url":"http://z.org/1"}]}`)
|
|
|
|
f.Fuzz(func(t *testing.T, url string, data string) {
|
|
|
|
ParseFeed(url, strings.NewReader(data))
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2017-11-20 06:10:04 +01:00
|
|
|
func TestParseAtom(t *testing.T) {
|
|
|
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
|
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
|
|
|
|
|
|
<title>Example Feed</title>
|
|
|
|
<link href="http://example.org/"/>
|
|
|
|
<updated>2003-12-13T18:30:02Z</updated>
|
|
|
|
<author>
|
|
|
|
<name>John Doe</name>
|
|
|
|
</author>
|
|
|
|
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
|
|
|
|
|
|
|
<entry>
|
|
|
|
<title>Atom-Powered Robots Run Amok</title>
|
|
|
|
<link href="http://example.org/2003/12/13/atom03"/>
|
|
|
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
|
|
|
<updated>2003-12-13T18:30:02Z</updated>
|
|
|
|
<summary>Some text.</summary>
|
|
|
|
</entry>
|
|
|
|
|
|
|
|
</feed>`
|
|
|
|
|
2023-10-23 01:07:06 +02:00
|
|
|
feed, err := ParseFeed("https://example.org/", strings.NewReader(data))
|
2017-11-20 06:10:04 +01:00
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.Title != "Example Feed" {
|
|
|
|
t.Errorf("Incorrect title, got: %s", feed.Title)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-03 05:47:11 +01:00
|
|
|
func TestParseAtomFeedWithRelativeURL(t *testing.T) {
|
|
|
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
|
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
|
|
<title>Example Feed</title>
|
|
|
|
<link href="/blog/atom.xml" rel="self" type="application/atom+xml"/>
|
|
|
|
<link href="/blog"/>
|
|
|
|
|
|
|
|
<entry>
|
|
|
|
<title>Test</title>
|
|
|
|
<link href="/blog/article.html"/>
|
|
|
|
<link href="/blog/article.html" rel="alternate" type="text/html"/>
|
|
|
|
<id>/blog/article.html</id>
|
|
|
|
<updated>2003-12-13T18:30:02Z</updated>
|
|
|
|
<summary>Some text.</summary>
|
|
|
|
</entry>
|
|
|
|
|
|
|
|
</feed>`
|
|
|
|
|
2023-10-23 01:07:06 +02:00
|
|
|
feed, err := ParseFeed("https://example.org/blog/atom.xml", strings.NewReader(data))
|
2020-12-03 05:47:11 +01:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.FeedURL != "https://example.org/blog/atom.xml" {
|
|
|
|
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.SiteURL != "https://example.org/blog" {
|
|
|
|
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.Entries[0].URL != "https://example.org/blog/article.html" {
|
|
|
|
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-21 03:34:11 +01:00
|
|
|
func TestParseRSS(t *testing.T) {
|
2017-11-20 06:10:04 +01:00
|
|
|
data := `<?xml version="1.0"?>
|
|
|
|
<rss version="2.0">
|
|
|
|
<channel>
|
|
|
|
<title>Liftoff News</title>
|
|
|
|
<link>http://liftoff.msfc.nasa.gov/</link>
|
|
|
|
<item>
|
|
|
|
<title>Star City</title>
|
|
|
|
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
|
|
|
|
<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.</description>
|
|
|
|
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
|
|
|
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
|
|
|
|
</item>
|
|
|
|
</channel>
|
|
|
|
</rss>`
|
|
|
|
|
2023-10-23 01:07:06 +02:00
|
|
|
feed, err := ParseFeed("http://liftoff.msfc.nasa.gov/", strings.NewReader(data))
|
2017-11-20 06:10:04 +01:00
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.Title != "Liftoff News" {
|
|
|
|
t.Errorf("Incorrect title, got: %s", feed.Title)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-03 05:47:11 +01:00
|
|
|
func TestParseRSSFeedWithRelativeURL(t *testing.T) {
|
|
|
|
data := `<?xml version="1.0"?>
|
|
|
|
<rss version="2.0">
|
|
|
|
<channel>
|
|
|
|
<title>Example Feed</title>
|
|
|
|
<link>/blog</link>
|
|
|
|
<item>
|
|
|
|
<title>Example Entry</title>
|
|
|
|
<link>/blog/article.html</link>
|
|
|
|
<description>Something</description>
|
|
|
|
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
|
|
|
<guid>1234</guid>
|
|
|
|
</item>
|
|
|
|
</channel>
|
|
|
|
</rss>`
|
|
|
|
|
2023-10-23 01:07:06 +02:00
|
|
|
feed, err := ParseFeed("http://example.org/rss.xml", strings.NewReader(data))
|
2020-12-03 05:47:11 +01:00
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.Title != "Example Feed" {
|
|
|
|
t.Errorf("Incorrect title, got: %s", feed.Title)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.FeedURL != "http://example.org/rss.xml" {
|
|
|
|
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.SiteURL != "http://example.org/blog" {
|
|
|
|
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.Entries[0].URL != "http://example.org/blog/article.html" {
|
|
|
|
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-21 03:34:11 +01:00
|
|
|
func TestParseRDF(t *testing.T) {
|
|
|
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
|
|
|
<rdf:RDF
|
|
|
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
|
|
|
xmlns="http://purl.org/rss/1.0/"
|
|
|
|
>
|
|
|
|
|
|
|
|
<channel>
|
|
|
|
<title>RDF Example</title>
|
|
|
|
<link>http://example.org/</link>
|
|
|
|
</channel>
|
|
|
|
|
|
|
|
<item>
|
|
|
|
<title>Title</title>
|
|
|
|
<link>http://example.org/item</link>
|
|
|
|
<description>Test</description>
|
|
|
|
</item>
|
|
|
|
</rdf:RDF>`
|
|
|
|
|
2023-10-23 01:07:06 +02:00
|
|
|
feed, err := ParseFeed("http://example.org/", strings.NewReader(data))
|
2017-11-21 03:34:11 +01:00
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.Title != "RDF Example" {
|
|
|
|
t.Errorf("Incorrect title, got: %s", feed.Title)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-03 05:47:11 +01:00
|
|
|
func TestParseRDFWithRelativeURL(t *testing.T) {
|
|
|
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
|
|
|
<rdf:RDF
|
|
|
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
|
|
|
xmlns="http://purl.org/rss/1.0/"
|
|
|
|
>
|
|
|
|
|
|
|
|
<channel>
|
|
|
|
<title>RDF Example</title>
|
|
|
|
<link>/blog</link>
|
|
|
|
</channel>
|
|
|
|
|
|
|
|
<item>
|
|
|
|
<title>Title</title>
|
|
|
|
<link>/blog/article.html</link>
|
|
|
|
<description>Test</description>
|
|
|
|
</item>
|
|
|
|
</rdf:RDF>`
|
|
|
|
|
2023-10-23 01:07:06 +02:00
|
|
|
feed, err := ParseFeed("http://example.org/rdf.xml", strings.NewReader(data))
|
2020-12-03 05:47:11 +01:00
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.FeedURL != "http://example.org/rdf.xml" {
|
|
|
|
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.SiteURL != "http://example.org/blog" {
|
|
|
|
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.Entries[0].URL != "http://example.org/blog/article.html" {
|
|
|
|
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-20 06:10:04 +01:00
|
|
|
func TestParseJson(t *testing.T) {
|
|
|
|
data := `{
|
|
|
|
"version": "https://jsonfeed.org/version/1",
|
|
|
|
"title": "My Example Feed",
|
|
|
|
"home_page_url": "https://example.org/",
|
|
|
|
"feed_url": "https://example.org/feed.json",
|
|
|
|
"items": [
|
|
|
|
{
|
|
|
|
"id": "2",
|
|
|
|
"content_text": "This is a second item.",
|
|
|
|
"url": "https://example.org/second-item"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"id": "1",
|
|
|
|
"content_html": "<p>Hello, world!</p>",
|
|
|
|
"url": "https://example.org/initial-post"
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}`
|
|
|
|
|
2023-10-23 01:07:06 +02:00
|
|
|
feed, err := ParseFeed("https://example.org/feed.json", strings.NewReader(data))
|
2017-11-20 06:10:04 +01:00
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.Title != "My Example Feed" {
|
|
|
|
t.Errorf("Incorrect title, got: %s", feed.Title)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-03 05:47:11 +01:00
|
|
|
func TestParseJsonFeedWithRelativeURL(t *testing.T) {
|
|
|
|
data := `{
|
|
|
|
"version": "https://jsonfeed.org/version/1",
|
|
|
|
"title": "My Example Feed",
|
|
|
|
"home_page_url": "/blog",
|
|
|
|
"feed_url": "/blog/feed.json",
|
|
|
|
"items": [
|
|
|
|
{
|
|
|
|
"id": "2",
|
|
|
|
"content_text": "This is a second item.",
|
|
|
|
"url": "/blog/article.html"
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}`
|
|
|
|
|
2023-10-23 01:07:06 +02:00
|
|
|
feed, err := ParseFeed("https://example.org/blog/feed.json", strings.NewReader(data))
|
2020-12-03 05:47:11 +01:00
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.Title != "My Example Feed" {
|
|
|
|
t.Errorf("Incorrect title, got: %s", feed.Title)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.FeedURL != "https://example.org/blog/feed.json" {
|
|
|
|
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.SiteURL != "https://example.org/blog" {
|
|
|
|
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
|
|
|
}
|
|
|
|
|
|
|
|
if feed.Entries[0].URL != "https://example.org/blog/article.html" {
|
|
|
|
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-20 06:10:04 +01:00
|
|
|
func TestParseUnknownFeed(t *testing.T) {
|
|
|
|
data := `
|
|
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
|
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
|
|
<head>
|
|
|
|
<title>Title of document</title>
|
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
some content
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
`
|
|
|
|
|
2023-10-23 01:07:06 +02:00
|
|
|
_, err := ParseFeed("https://example.org/", strings.NewReader(data))
|
2017-11-20 06:10:04 +01:00
|
|
|
if err == nil {
|
|
|
|
t.Error("ParseFeed must returns an error")
|
|
|
|
}
|
|
|
|
}
|
2018-02-08 03:47:47 +01:00
|
|
|
|
|
|
|
func TestParseEmptyFeed(t *testing.T) {
|
2023-10-23 01:07:06 +02:00
|
|
|
_, err := ParseFeed("", strings.NewReader(""))
|
2018-02-08 03:47:47 +01:00
|
|
|
if err == nil {
|
|
|
|
t.Error("ParseFeed must returns an error")
|
|
|
|
}
|
|
|
|
}
|