miniflux/internal/reader/parser/parser_test.go

// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package parser // import "miniflux.app/v2/internal/reader/parser"

import (
	"strings"
	"testing"
)

func TestParseAtom(t *testing.T) {
	data := `<?xml version="1.0" encoding="utf-8"?>
	<feed xmlns="http://www.w3.org/2005/Atom">

	  <title>Example Feed</title>
	  <link href="http://example.org/"/>
	  <updated>2003-12-13T18:30:02Z</updated>
	  <author>
		<name>John Doe</name>
	  </author>
	  <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>

	  <entry>
		<title>Atom-Powered Robots Run Amok</title>
		<link href="http://example.org/2003/12/13/atom03"/>
		<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
		<updated>2003-12-13T18:30:02Z</updated>
		<summary>Some text.</summary>
	  </entry>

	</feed>`

	feed, err := ParseFeed("https://example.org/", strings.NewReader(data))
	if err != nil {
		t.Error(err)
	}

	if feed.Title != "Example Feed" {
		t.Errorf("Incorrect title, got: %s", feed.Title)
	}
}

func TestParseAtomFeedWithRelativeURL(t *testing.T) {
	data := `<?xml version="1.0" encoding="utf-8"?>
	<feed xmlns="http://www.w3.org/2005/Atom">
	  <title>Example Feed</title>
	  <link href="/blog/atom.xml" rel="self" type="application/atom+xml"/>
	  <link href="/blog"/>

	  <entry>
		<title>Test</title>
		<link href="/blog/article.html"/>
		<link href="/blog/article.html" rel="alternate" type="text/html"/>
		<id>/blog/article.html</id>
		<updated>2003-12-13T18:30:02Z</updated>
		<summary>Some text.</summary>
	  </entry>

	</feed>`

	feed, err := ParseFeed("https://example.org/blog/atom.xml", strings.NewReader(data))
	if err != nil {
		t.Fatal(err)
	}

	if feed.FeedURL != "https://example.org/blog/atom.xml" {
		t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
	}

	if feed.SiteURL != "https://example.org/blog" {
		t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
	}

	if feed.Entries[0].URL != "https://example.org/blog/article.html" {
		t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
	}
}

func TestParseRSS(t *testing.T) {
	data := `<?xml version="1.0"?>
	<rss version="2.0">
	<channel>
		<title>Liftoff News</title>
		<link>http://liftoff.msfc.nasa.gov/</link>
		<item>
			<title>Star City</title>
			<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
			<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
			<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
			<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
		</item>
	</channel>
	</rss>`

	feed, err := ParseFeed("http://liftoff.msfc.nasa.gov/", strings.NewReader(data))
	if err != nil {
		t.Error(err)
	}

	if feed.Title != "Liftoff News" {
		t.Errorf("Incorrect title, got: %s", feed.Title)
	}
}

func TestParseRSSFeedWithRelativeURL(t *testing.T) {
	data := `<?xml version="1.0"?>
	<rss version="2.0">
	<channel>
		<title>Example Feed</title>
		<link>/blog</link>
		<item>
			<title>Example Entry</title>
			<link>/blog/article.html</link>
			<description>Something</description>
			<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
			<guid>1234</guid>
		</item>
	</channel>
	</rss>`

	feed, err := ParseFeed("http://example.org/rss.xml", strings.NewReader(data))
	if err != nil {
		t.Error(err)
	}

	if feed.Title != "Example Feed" {
		t.Errorf("Incorrect title, got: %s", feed.Title)
	}

	if feed.FeedURL != "http://example.org/rss.xml" {
		t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
	}

	if feed.SiteURL != "http://example.org/blog" {
		t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
	}

	if feed.Entries[0].URL != "http://example.org/blog/article.html" {
		t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
	}
}

func TestParseRDF(t *testing.T) {
	data := `<?xml version="1.0" encoding="utf-8"?>
		<rdf:RDF
		  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
		  xmlns="http://purl.org/rss/1.0/"
		>

		  <channel>
			<title>RDF Example</title>
			<link>http://example.org/</link>
		  </channel>

		  <item>
			<title>Title</title>
			<link>http://example.org/item</link>
			<description>Test</description>
		  </item>
		</rdf:RDF>`

	feed, err := ParseFeed("http://example.org/", strings.NewReader(data))
	if err != nil {
		t.Error(err)
	}

	if feed.Title != "RDF Example" {
		t.Errorf("Incorrect title, got: %s", feed.Title)
	}
}

func TestParseRDFWithRelativeURL(t *testing.T) {
	data := `<?xml version="1.0" encoding="utf-8"?>
		<rdf:RDF
		  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
		  xmlns="http://purl.org/rss/1.0/"
		>

		  <channel>
			<title>RDF Example</title>
			<link>/blog</link>
		  </channel>

		  <item>
			<title>Title</title>
			<link>/blog/article.html</link>
			<description>Test</description>
		  </item>
		</rdf:RDF>`

	feed, err := ParseFeed("http://example.org/rdf.xml", strings.NewReader(data))
	if err != nil {
		t.Error(err)
	}

	if feed.FeedURL != "http://example.org/rdf.xml" {
		t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
	}

	if feed.SiteURL != "http://example.org/blog" {
		t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
	}

	if feed.Entries[0].URL != "http://example.org/blog/article.html" {
		t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
	}
}

func TestParseJson(t *testing.T) {
	data := `{
		"version": "https://jsonfeed.org/version/1",
		"title": "My Example Feed",
		"home_page_url": "https://example.org/",
		"feed_url": "https://example.org/feed.json",
		"items": [
			{
				"id": "2",
				"content_text": "This is a second item.",
				"url": "https://example.org/second-item"
			},
			{
				"id": "1",
				"content_html": "<p>Hello, world!</p>",
				"url": "https://example.org/initial-post"
			}
		]
	}`

	feed, err := ParseFeed("https://example.org/feed.json", strings.NewReader(data))
	if err != nil {
		t.Error(err)
	}

	if feed.Title != "My Example Feed" {
		t.Errorf("Incorrect title, got: %s", feed.Title)
	}
}

func TestParseJsonFeedWithRelativeURL(t *testing.T) {
	data := `{
		"version": "https://jsonfeed.org/version/1",
		"title": "My Example Feed",
		"home_page_url": "/blog",
		"feed_url": "/blog/feed.json",
		"items": [
			{
				"id": "2",
				"content_text": "This is a second item.",
				"url": "/blog/article.html"
			}
		]
	}`

	feed, err := ParseFeed("https://example.org/blog/feed.json", strings.NewReader(data))
	if err != nil {
		t.Error(err)
	}

	if feed.Title != "My Example Feed" {
		t.Errorf("Incorrect title, got: %s", feed.Title)
	}

	if feed.FeedURL != "https://example.org/blog/feed.json" {
		t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
	}

	if feed.SiteURL != "https://example.org/blog" {
		t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
	}

	if feed.Entries[0].URL != "https://example.org/blog/article.html" {
		t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
	}
}

func TestParseUnknownFeed(t *testing.T) {
	data := `
		<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
		<html xmlns="http://www.w3.org/1999/xhtml">
			<head>
				<title>Title of document</title>
			</head>
			<body>
				some content
			</body>
		</html>
	`

	_, err := ParseFeed("https://example.org/", strings.NewReader(data))
	if err == nil {
		t.Error("ParseFeed must returns an error")
	}
}

func TestParseEmptyFeed(t *testing.T) {
	_, err := ParseFeed("", strings.NewReader(""))
	if err == nil {
		t.Error("ParseFeed must returns an error")
	}
}
Replace copyright header with SPDX identifier 2023-06-19 23:42:47 +02:00			`// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.`
			`// SPDX-License-Identifier: Apache-2.0`
First commit 2017-11-20 06:10:04 +01:00
Move internal packages to an internal folder For reference: https://go.dev/doc/go1.4#internalpackages 2023-08-11 04:46:45 +02:00			`package parser // import "miniflux.app/v2/internal/reader/parser"`
First commit 2017-11-20 06:10:04 +01:00
			`import (`
Refactor feed discovery and avoid an extra HTTP request if the url provided is the feed 2023-10-23 01:07:06 +02:00			`"strings"`
First commit 2017-11-20 06:10:04 +01:00			`"testing"`
			`)`

			`func TestParseAtom(t *testing.T) {`
			data := `<?xml version="1.0" encoding="utf-8"?>
			`<feed xmlns="http://www.w3.org/2005/Atom">`

			`<title>Example Feed</title>`
			`<link href="http://example.org/"/>`
			`<updated>2003-12-13T18:30:02Z</updated>`
			`<author>`
			`<name>John Doe</name>`
			`</author>`
			`<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>`

			`<entry>`
			`<title>Atom-Powered Robots Run Amok</title>`
			`<link href="http://example.org/2003/12/13/atom03"/>`
			`<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>`
			`<updated>2003-12-13T18:30:02Z</updated>`
			`<summary>Some text.</summary>`
			`</entry>`

			</feed>`

Refactor feed discovery and avoid an extra HTTP request if the url provided is the feed 2023-10-23 01:07:06 +02:00			`feed, err := ParseFeed("https://example.org/", strings.NewReader(data))`
First commit 2017-11-20 06:10:04 +01:00			`if err != nil {`
			`t.Error(err)`
			`}`

			`if feed.Title != "Example Feed" {`
			`t.Errorf("Incorrect title, got: %s", feed.Title)`
			`}`
			`}`

Handle invalid feeds with relative URLs 2020-12-03 05:47:11 +01:00			`func TestParseAtomFeedWithRelativeURL(t *testing.T) {`
			data := `<?xml version="1.0" encoding="utf-8"?>
			`<feed xmlns="http://www.w3.org/2005/Atom">`
			`<title>Example Feed</title>`
			`<link href="/blog/atom.xml" rel="self" type="application/atom+xml"/>`
			`<link href="/blog"/>`

			`<entry>`
			`<title>Test</title>`
			`<link href="/blog/article.html"/>`
			`<link href="/blog/article.html" rel="alternate" type="text/html"/>`
			`<id>/blog/article.html</id>`
			`<updated>2003-12-13T18:30:02Z</updated>`
			`<summary>Some text.</summary>`
			`</entry>`

			</feed>`

Refactor feed discovery and avoid an extra HTTP request if the url provided is the feed 2023-10-23 01:07:06 +02:00			`feed, err := ParseFeed("https://example.org/blog/atom.xml", strings.NewReader(data))`
Handle invalid feeds with relative URLs 2020-12-03 05:47:11 +01:00			`if err != nil {`
			`t.Fatal(err)`
			`}`

			`if feed.FeedURL != "https://example.org/blog/atom.xml" {`
			`t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)`
			`}`

			`if feed.SiteURL != "https://example.org/blog" {`
			`t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)`
			`}`

			`if feed.Entries[0].URL != "https://example.org/blog/article.html" {`
			`t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)`
			`}`
			`}`

Add parser for RDF feeds 2017-11-21 03:34:11 +01:00			`func TestParseRSS(t *testing.T) {`
First commit 2017-11-20 06:10:04 +01:00			data := `<?xml version="1.0"?>
			`<rss version="2.0">`
			`<channel>`
			`<title>Liftoff News</title>`
			`<link>http://liftoff.msfc.nasa.gov/</link>`
			`<item>`
			`<title>Star City</title>`
			`<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>`
			`<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.</description>`
			`<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>`
			`<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>`
			`</item>`
			`</channel>`
			</rss>`

Refactor feed discovery and avoid an extra HTTP request if the url provided is the feed 2023-10-23 01:07:06 +02:00			`feed, err := ParseFeed("http://liftoff.msfc.nasa.gov/", strings.NewReader(data))`
First commit 2017-11-20 06:10:04 +01:00			`if err != nil {`
			`t.Error(err)`
			`}`

			`if feed.Title != "Liftoff News" {`
			`t.Errorf("Incorrect title, got: %s", feed.Title)`
			`}`
			`}`

Handle invalid feeds with relative URLs 2020-12-03 05:47:11 +01:00			`func TestParseRSSFeedWithRelativeURL(t *testing.T) {`
			data := `<?xml version="1.0"?>
			`<rss version="2.0">`
			`<channel>`
			`<title>Example Feed</title>`
			`<link>/blog</link>`
			`<item>`
			`<title>Example Entry</title>`
			`<link>/blog/article.html</link>`
			`<description>Something</description>`
			`<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>`
			`<guid>1234</guid>`
			`</item>`
			`</channel>`
			</rss>`

Refactor feed discovery and avoid an extra HTTP request if the url provided is the feed 2023-10-23 01:07:06 +02:00			`feed, err := ParseFeed("http://example.org/rss.xml", strings.NewReader(data))`
Handle invalid feeds with relative URLs 2020-12-03 05:47:11 +01:00			`if err != nil {`
			`t.Error(err)`
			`}`

			`if feed.Title != "Example Feed" {`
			`t.Errorf("Incorrect title, got: %s", feed.Title)`
			`}`

			`if feed.FeedURL != "http://example.org/rss.xml" {`
			`t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)`
			`}`

			`if feed.SiteURL != "http://example.org/blog" {`
			`t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)`
			`}`

			`if feed.Entries[0].URL != "http://example.org/blog/article.html" {`
			`t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)`
			`}`
			`}`

Add parser for RDF feeds 2017-11-21 03:34:11 +01:00			`func TestParseRDF(t *testing.T) {`
			data := `<?xml version="1.0" encoding="utf-8"?>
			`<rdf:RDF`
			`xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"`
			`xmlns="http://purl.org/rss/1.0/"`
			`>`

			`<channel>`
			`<title>RDF Example</title>`
			`<link>http://example.org/</link>`
			`</channel>`

			`<item>`
			`<title>Title</title>`
			`<link>http://example.org/item</link>`
			`<description>Test</description>`
			`</item>`
			</rdf:RDF>`

Refactor feed discovery and avoid an extra HTTP request if the url provided is the feed 2023-10-23 01:07:06 +02:00			`feed, err := ParseFeed("http://example.org/", strings.NewReader(data))`
Add parser for RDF feeds 2017-11-21 03:34:11 +01:00			`if err != nil {`
			`t.Error(err)`
			`}`

			`if feed.Title != "RDF Example" {`
			`t.Errorf("Incorrect title, got: %s", feed.Title)`
			`}`
			`}`

Handle invalid feeds with relative URLs 2020-12-03 05:47:11 +01:00			`func TestParseRDFWithRelativeURL(t *testing.T) {`
			data := `<?xml version="1.0" encoding="utf-8"?>
			`<rdf:RDF`
			`xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"`
			`xmlns="http://purl.org/rss/1.0/"`
			`>`

			`<channel>`
			`<title>RDF Example</title>`
			`<link>/blog</link>`
			`</channel>`

			`<item>`
			`<title>Title</title>`
			`<link>/blog/article.html</link>`
			`<description>Test</description>`
			`</item>`
			</rdf:RDF>`

Refactor feed discovery and avoid an extra HTTP request if the url provided is the feed 2023-10-23 01:07:06 +02:00			`feed, err := ParseFeed("http://example.org/rdf.xml", strings.NewReader(data))`
Handle invalid feeds with relative URLs 2020-12-03 05:47:11 +01:00			`if err != nil {`
			`t.Error(err)`
			`}`

			`if feed.FeedURL != "http://example.org/rdf.xml" {`
			`t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)`
			`}`

			`if feed.SiteURL != "http://example.org/blog" {`
			`t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)`
			`}`

			`if feed.Entries[0].URL != "http://example.org/blog/article.html" {`
			`t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)`
			`}`
			`}`

First commit 2017-11-20 06:10:04 +01:00			`func TestParseJson(t *testing.T) {`
			data := `{
			`"version": "https://jsonfeed.org/version/1",`
			`"title": "My Example Feed",`
			`"home_page_url": "https://example.org/",`
			`"feed_url": "https://example.org/feed.json",`
			`"items": [`
			`{`
			`"id": "2",`
			`"content_text": "This is a second item.",`
			`"url": "https://example.org/second-item"`
			`},`
			`{`
			`"id": "1",`
			`"content_html": "<p>Hello, world!</p>",`
			`"url": "https://example.org/initial-post"`
			`}`
			`]`
			}`

Refactor feed discovery and avoid an extra HTTP request if the url provided is the feed 2023-10-23 01:07:06 +02:00			`feed, err := ParseFeed("https://example.org/feed.json", strings.NewReader(data))`
First commit 2017-11-20 06:10:04 +01:00			`if err != nil {`
			`t.Error(err)`
			`}`

			`if feed.Title != "My Example Feed" {`
			`t.Errorf("Incorrect title, got: %s", feed.Title)`
			`}`
			`}`

Handle invalid feeds with relative URLs 2020-12-03 05:47:11 +01:00			`func TestParseJsonFeedWithRelativeURL(t *testing.T) {`
			data := `{
			`"version": "https://jsonfeed.org/version/1",`
			`"title": "My Example Feed",`
			`"home_page_url": "/blog",`
			`"feed_url": "/blog/feed.json",`
			`"items": [`
			`{`
			`"id": "2",`
			`"content_text": "This is a second item.",`
			`"url": "/blog/article.html"`
			`}`
			`]`
			}`

Refactor feed discovery and avoid an extra HTTP request if the url provided is the feed 2023-10-23 01:07:06 +02:00			`feed, err := ParseFeed("https://example.org/blog/feed.json", strings.NewReader(data))`
Handle invalid feeds with relative URLs 2020-12-03 05:47:11 +01:00			`if err != nil {`
			`t.Error(err)`
			`}`

			`if feed.Title != "My Example Feed" {`
			`t.Errorf("Incorrect title, got: %s", feed.Title)`
			`}`

			`if feed.FeedURL != "https://example.org/blog/feed.json" {`
			`t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)`
			`}`

			`if feed.SiteURL != "https://example.org/blog" {`
			`t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)`
			`}`

			`if feed.Entries[0].URL != "https://example.org/blog/article.html" {`
			`t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)`
			`}`
			`}`

First commit 2017-11-20 06:10:04 +01:00			`func TestParseUnknownFeed(t *testing.T) {`
			data := `
			`<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">`
			`<html xmlns="http://www.w3.org/1999/xhtml">`
			`<head>`
			`<title>Title of document</title>`
			`</head>`
			`<body>`
			`some content`
			`</body>`
			`</html>`
			`

Refactor feed discovery and avoid an extra HTTP request if the url provided is the feed 2023-10-23 01:07:06 +02:00			`_, err := ParseFeed("https://example.org/", strings.NewReader(data))`
First commit 2017-11-20 06:10:04 +01:00			`if err == nil {`
			`t.Error("ParseFeed must returns an error")`
			`}`
			`}`
Improve error handling when the response is empty 2018-02-08 03:47:47 +01:00
			`func TestParseEmptyFeed(t *testing.T) {`
Refactor feed discovery and avoid an extra HTTP request if the url provided is the feed 2023-10-23 01:07:06 +02:00			`_, err := ParseFeed("", strings.NewReader(""))`
Improve error handling when the response is empty 2018-02-08 03:47:47 +01:00			`if err == nil {`
			`t.Error("ParseFeed must returns an error")`
			`}`
			`}`