Add parser for RDF feeds
This commit is contained in:
parent
c5cd38de83
commit
89307010ad
16 changed files with 491 additions and 36 deletions
|
@ -1,5 +1,5 @@
|
||||||
// Code generated by go generate; DO NOT EDIT.
|
// Code generated by go generate; DO NOT EDIT.
|
||||||
// 2017-11-20 17:09:36.257679981 -0800 PST m=+0.024050336
|
// 2017-11-20 18:31:16.993089344 -0800 PST m=+0.032236726
|
||||||
|
|
||||||
package locale
|
package locale
|
||||||
|
|
||||||
|
@ -126,10 +126,11 @@ var Translations = map[string]string{
|
||||||
"Unable to execute request: %v": "Impossible d'exécuter cette requête: %v",
|
"Unable to execute request: %v": "Impossible d'exécuter cette requête: %v",
|
||||||
"Last Parsing Error": "Dernière erreur d'analyse",
|
"Last Parsing Error": "Dernière erreur d'analyse",
|
||||||
"There is a problem with this feed": "Il y a un problème avec cet abonnement",
|
"There is a problem with this feed": "Il y a un problème avec cet abonnement",
|
||||||
"Unable to parse OPML file: %v": "Impossible de lire le fichier OPML : %v",
|
"Unable to parse OPML file: %v.": "Impossible de lire ce fichier OPML : %v.",
|
||||||
"Unable to parse RSS feed: %v": "Impossible de lire ce flux RSS: %v",
|
"Unable to parse RSS feed: %v.": "Impossible de lire ce flux RSS: %v.",
|
||||||
"Unable to parse Atom feed: %v": "Impossible de lire ce flux Atom: %v",
|
"Unable to parse Atom feed: %v.": "Impossible de lire ce flux Atom: %v.",
|
||||||
"Unable to parse JSON feed: %v": "Impossible de lire ce flux Json: %v",
|
"Unable to parse JSON feed: %v.": "Impossible de lire ce flux JSON: %v.",
|
||||||
|
"Unable to parse RDF feed: %v.": "Impossible de lire ce flux RDF: %v.",
|
||||||
"Unable to normalize encoding: %v.": "Impossible de normaliser l'encodage : %v."
|
"Unable to normalize encoding: %v.": "Impossible de normaliser l'encodage : %v."
|
||||||
}
|
}
|
||||||
`,
|
`,
|
||||||
|
@ -137,5 +138,5 @@ var Translations = map[string]string{
|
||||||
|
|
||||||
var TranslationsChecksums = map[string]string{
|
var TranslationsChecksums = map[string]string{
|
||||||
"en_US": "6fe95384260941e8a5a3c695a655a932e0a8a6a572c1e45cb2b1ae8baa01b897",
|
"en_US": "6fe95384260941e8a5a3c695a655a932e0a8a6a572c1e45cb2b1ae8baa01b897",
|
||||||
"fr_FR": "0ff93081d867ab27a190b5cbe6aaed65dbdcd80079ad667b515428a147cb20ee",
|
"fr_FR": "946d1c30bcb862ef35741786cdb5768900ad0d704e802472e481540f9b6542e5",
|
||||||
}
|
}
|
||||||
|
|
|
@ -110,9 +110,10 @@
|
||||||
"Unable to execute request: %v": "Impossible d'exécuter cette requête: %v",
|
"Unable to execute request: %v": "Impossible d'exécuter cette requête: %v",
|
||||||
"Last Parsing Error": "Dernière erreur d'analyse",
|
"Last Parsing Error": "Dernière erreur d'analyse",
|
||||||
"There is a problem with this feed": "Il y a un problème avec cet abonnement",
|
"There is a problem with this feed": "Il y a un problème avec cet abonnement",
|
||||||
"Unable to parse OPML file: %v": "Impossible de lire le fichier OPML : %v",
|
"Unable to parse OPML file: %v.": "Impossible de lire ce fichier OPML : %v.",
|
||||||
"Unable to parse RSS feed: %v": "Impossible de lire ce flux RSS: %v",
|
"Unable to parse RSS feed: %v.": "Impossible de lire ce flux RSS: %v.",
|
||||||
"Unable to parse Atom feed: %v": "Impossible de lire ce flux Atom: %v",
|
"Unable to parse Atom feed: %v.": "Impossible de lire ce flux Atom: %v.",
|
||||||
"Unable to parse JSON feed: %v": "Impossible de lire ce flux Json: %v",
|
"Unable to parse JSON feed: %v.": "Impossible de lire ce flux JSON: %v.",
|
||||||
|
"Unable to parse RDF feed: %v.": "Impossible de lire ce flux RDF: %v.",
|
||||||
"Unable to normalize encoding: %v.": "Impossible de normaliser l'encodage : %v."
|
"Unable to normalize encoding: %v.": "Impossible de normaliser l'encodage : %v."
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,7 +14,7 @@ import (
|
||||||
"golang.org/x/net/html/charset"
|
"golang.org/x/net/html/charset"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Parse returns a normalized feed struct.
|
// Parse returns a normalized feed struct from a Atom feed.
|
||||||
func Parse(data io.Reader) (*model.Feed, error) {
|
func Parse(data io.Reader) (*model.Feed, error) {
|
||||||
atomFeed := new(AtomFeed)
|
atomFeed := new(AtomFeed)
|
||||||
decoder := xml.NewDecoder(data)
|
decoder := xml.NewDecoder(data)
|
||||||
|
@ -22,7 +22,7 @@ func Parse(data io.Reader) (*model.Feed, error) {
|
||||||
|
|
||||||
err := decoder.Decode(atomFeed)
|
err := decoder.Decode(atomFeed)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.NewLocalizedError("Unable to parse Atom feed: %v", err)
|
return nil, errors.NewLocalizedError("Unable to parse Atom feed: %v.", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return atomFeed.Transform(), nil
|
return atomFeed.Transform(), nil
|
||||||
|
|
|
@ -8,25 +8,30 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/xml"
|
"encoding/xml"
|
||||||
"errors"
|
"errors"
|
||||||
"github.com/miniflux/miniflux2/helper"
|
|
||||||
"github.com/miniflux/miniflux2/model"
|
|
||||||
"github.com/miniflux/miniflux2/reader/feed/atom"
|
|
||||||
"github.com/miniflux/miniflux2/reader/feed/json"
|
|
||||||
"github.com/miniflux/miniflux2/reader/feed/rss"
|
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/miniflux/miniflux2/helper"
|
||||||
|
"github.com/miniflux/miniflux2/model"
|
||||||
|
"github.com/miniflux/miniflux2/reader/feed/atom"
|
||||||
|
"github.com/miniflux/miniflux2/reader/feed/json"
|
||||||
|
"github.com/miniflux/miniflux2/reader/feed/rdf"
|
||||||
|
"github.com/miniflux/miniflux2/reader/feed/rss"
|
||||||
|
|
||||||
"golang.org/x/net/html/charset"
|
"golang.org/x/net/html/charset"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// List of feed formats.
|
||||||
const (
|
const (
|
||||||
FormatRss = "rss"
|
FormatRDF = "rdf"
|
||||||
|
FormatRSS = "rss"
|
||||||
FormatAtom = "atom"
|
FormatAtom = "atom"
|
||||||
FormatJson = "json"
|
FormatJSON = "json"
|
||||||
FormatUnknown = "unknown"
|
FormatUnknown = "unknown"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// DetectFeedFormat detect feed format from input data.
|
||||||
func DetectFeedFormat(data io.Reader) string {
|
func DetectFeedFormat(data io.Reader) string {
|
||||||
defer helper.ExecutionTime(time.Now(), "[Feed:DetectFeedFormat]")
|
defer helper.ExecutionTime(time.Now(), "[Feed:DetectFeedFormat]")
|
||||||
|
|
||||||
|
@ -45,15 +50,17 @@ func DetectFeedFormat(data io.Reader) string {
|
||||||
if element, ok := token.(xml.StartElement); ok {
|
if element, ok := token.(xml.StartElement); ok {
|
||||||
switch element.Name.Local {
|
switch element.Name.Local {
|
||||||
case "rss":
|
case "rss":
|
||||||
return FormatRss
|
return FormatRSS
|
||||||
case "feed":
|
case "feed":
|
||||||
return FormatAtom
|
return FormatAtom
|
||||||
|
case "RDF":
|
||||||
|
return FormatRDF
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if strings.HasPrefix(strings.TrimSpace(buffer.String()), "{") {
|
if strings.HasPrefix(strings.TrimSpace(buffer.String()), "{") {
|
||||||
return FormatJson
|
return FormatJSON
|
||||||
}
|
}
|
||||||
|
|
||||||
return FormatUnknown
|
return FormatUnknown
|
||||||
|
@ -72,10 +79,12 @@ func parseFeed(data io.Reader) (*model.Feed, error) {
|
||||||
switch format {
|
switch format {
|
||||||
case FormatAtom:
|
case FormatAtom:
|
||||||
return atom.Parse(reader)
|
return atom.Parse(reader)
|
||||||
case FormatRss:
|
case FormatRSS:
|
||||||
return rss.Parse(reader)
|
return rss.Parse(reader)
|
||||||
case FormatJson:
|
case FormatJSON:
|
||||||
return json.Parse(reader)
|
return json.Parse(reader)
|
||||||
|
case FormatRDF:
|
||||||
|
return rdf.Parse(reader)
|
||||||
default:
|
default:
|
||||||
return nil, errors.New("Unsupported feed format")
|
return nil, errors.New("Unsupported feed format")
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,12 +9,21 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestDetectRDF(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://my.netscape.com/rdf/simple/0.9/"></rdf:RDF>`
|
||||||
|
format := DetectFeedFormat(bytes.NewBufferString(data))
|
||||||
|
|
||||||
|
if format != FormatRDF {
|
||||||
|
t.Errorf("Wrong format detected: %s instead of %s", format, FormatRDF)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestDetectRSS(t *testing.T) {
|
func TestDetectRSS(t *testing.T) {
|
||||||
data := `<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`
|
data := `<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`
|
||||||
format := DetectFeedFormat(bytes.NewBufferString(data))
|
format := DetectFeedFormat(bytes.NewBufferString(data))
|
||||||
|
|
||||||
if format != FormatRss {
|
if format != FormatRSS {
|
||||||
t.Errorf("Wrong format detected: %s instead of %s", format, FormatRss)
|
t.Errorf("Wrong format detected: %s instead of %s", format, FormatRSS)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,8 +54,8 @@ func TestDetectJSON(t *testing.T) {
|
||||||
`
|
`
|
||||||
format := DetectFeedFormat(bytes.NewBufferString(data))
|
format := DetectFeedFormat(bytes.NewBufferString(data))
|
||||||
|
|
||||||
if format != FormatJson {
|
if format != FormatJSON {
|
||||||
t.Errorf("Wrong format detected: %s instead of %s", format, FormatJson)
|
t.Errorf("Wrong format detected: %s instead of %s", format, FormatJSON)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -93,7 +102,7 @@ func TestParseAtom(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseRss(t *testing.T) {
|
func TestParseRSS(t *testing.T) {
|
||||||
data := `<?xml version="1.0"?>
|
data := `<?xml version="1.0"?>
|
||||||
<rss version="2.0">
|
<rss version="2.0">
|
||||||
<channel>
|
<channel>
|
||||||
|
@ -119,6 +128,35 @@ func TestParseRss(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseRDF(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns="http://purl.org/rss/1.0/"
|
||||||
|
>
|
||||||
|
|
||||||
|
<channel>
|
||||||
|
<title>RDF Example</title>
|
||||||
|
<link>http://example.org/</link>
|
||||||
|
</channel>
|
||||||
|
|
||||||
|
<item>
|
||||||
|
<title>Title</title>
|
||||||
|
<link>http://example.org/item</link>
|
||||||
|
<description>Test</description>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := parseFeed(bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Title != "RDF Example" {
|
||||||
|
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseJson(t *testing.T) {
|
func TestParseJson(t *testing.T) {
|
||||||
data := `{
|
data := `{
|
||||||
"version": "https://jsonfeed.org/version/1",
|
"version": "https://jsonfeed.org/version/1",
|
||||||
|
|
28
reader/feed/rdf/parser.go
Normal file
28
reader/feed/rdf/parser.go
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||||
|
// Use of this source code is governed by the Apache 2.0
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package rdf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/xml"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/miniflux/miniflux2/errors"
|
||||||
|
"github.com/miniflux/miniflux2/model"
|
||||||
|
"golang.org/x/net/html/charset"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Parse returns a normalized feed struct from a RDF feed.
|
||||||
|
func Parse(data io.Reader) (*model.Feed, error) {
|
||||||
|
feed := new(rdfFeed)
|
||||||
|
decoder := xml.NewDecoder(data)
|
||||||
|
decoder.CharsetReader = charset.NewReaderLabel
|
||||||
|
|
||||||
|
err := decoder.Decode(feed)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.NewLocalizedError("Unable to parse RDF feed: %v.", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return feed.Transform(), nil
|
||||||
|
}
|
307
reader/feed/rdf/parser_test.go
Normal file
307
reader/feed/rdf/parser_test.go
Normal file
|
@ -0,0 +1,307 @@
|
||||||
|
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||||
|
// Use of this source code is governed by the Apache 2.0
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package rdf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/miniflux/miniflux2/errors"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseRDFSample(t *testing.T) {
|
||||||
|
data := `
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns="http://purl.org/rss/1.0/"
|
||||||
|
>
|
||||||
|
|
||||||
|
<channel rdf:about="http://www.xml.com/xml/news.rss">
|
||||||
|
<title>XML.com</title>
|
||||||
|
<link>http://xml.com/pub</link>
|
||||||
|
<description>
|
||||||
|
XML.com features a rich mix of information and services
|
||||||
|
for the XML community.
|
||||||
|
</description>
|
||||||
|
|
||||||
|
<image rdf:resource="http://xml.com/universal/images/xml_tiny.gif" />
|
||||||
|
|
||||||
|
<items>
|
||||||
|
<rdf:Seq>
|
||||||
|
<rdf:li resource="http://xml.com/pub/2000/08/09/xslt/xslt.html" />
|
||||||
|
<rdf:li resource="http://xml.com/pub/2000/08/09/rdfdb/index.html" />
|
||||||
|
</rdf:Seq>
|
||||||
|
</items>
|
||||||
|
|
||||||
|
<textinput rdf:resource="http://search.xml.com" />
|
||||||
|
|
||||||
|
</channel>
|
||||||
|
|
||||||
|
<image rdf:about="http://xml.com/universal/images/xml_tiny.gif">
|
||||||
|
<title>XML.com</title>
|
||||||
|
<link>http://www.xml.com</link>
|
||||||
|
<url>http://xml.com/universal/images/xml_tiny.gif</url>
|
||||||
|
</image>
|
||||||
|
|
||||||
|
<item rdf:about="http://xml.com/pub/2000/08/09/xslt/xslt.html">
|
||||||
|
<title>Processing Inclusions with XSLT</title>
|
||||||
|
<link>http://xml.com/pub/2000/08/09/xslt/xslt.html</link>
|
||||||
|
<description>
|
||||||
|
Processing document inclusions with general XML tools can be
|
||||||
|
problematic. This article proposes a way of preserving inclusion
|
||||||
|
information through SAX-based processing.
|
||||||
|
</description>
|
||||||
|
</item>
|
||||||
|
|
||||||
|
<item rdf:about="http://xml.com/pub/2000/08/09/rdfdb/index.html">
|
||||||
|
<title>Putting RDF to Work</title>
|
||||||
|
<link>http://xml.com/pub/2000/08/09/rdfdb/index.html</link>
|
||||||
|
<description>
|
||||||
|
Tool and API support for the Resource Description Framework
|
||||||
|
is slowly coming of age. Edd Dumbill takes a look at RDFDB,
|
||||||
|
one of the most exciting new RDF toolkits.
|
||||||
|
</description>
|
||||||
|
</item>
|
||||||
|
|
||||||
|
<textinput rdf:about="http://search.xml.com">
|
||||||
|
<title>Search XML.com</title>
|
||||||
|
<description>Search XML.com's XML collection</description>
|
||||||
|
<name>s</name>
|
||||||
|
<link>http://search.xml.com</link>
|
||||||
|
</textinput>
|
||||||
|
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse(bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Title != "XML.com" {
|
||||||
|
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.FeedURL != "" {
|
||||||
|
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.SiteURL != "http://xml.com/pub" {
|
||||||
|
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(feed.Entries) != 2 {
|
||||||
|
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[1].Hash != "8aaeee5d3ab50351422fbded41078ee88c73bf1441085b16a8c09fd90a7db321" {
|
||||||
|
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[1].URL != "http://xml.com/pub/2000/08/09/rdfdb/index.html" {
|
||||||
|
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[1].Title != "Putting RDF to Work" {
|
||||||
|
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.HasSuffix(feed.Entries[1].Content, "Tool and API support") {
|
||||||
|
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseRDFSampleWithDublinCore(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
|
||||||
|
xmlns:co="http://purl.org/rss/1.0/modules/company/"
|
||||||
|
xmlns:ti="http://purl.org/rss/1.0/modules/textinput/"
|
||||||
|
xmlns="http://purl.org/rss/1.0/"
|
||||||
|
>
|
||||||
|
|
||||||
|
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
|
||||||
|
<title>Meerkat</title>
|
||||||
|
<link>http://meerkat.oreillynet.com</link>
|
||||||
|
<description>Meerkat: An Open Wire Service</description>
|
||||||
|
<dc:publisher>The O'Reilly Network</dc:publisher>
|
||||||
|
<dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
|
||||||
|
<dc:rights>Copyright © 2000 O'Reilly & Associates, Inc.</dc:rights>
|
||||||
|
<dc:date>2000-01-01T12:00+00:00</dc:date>
|
||||||
|
<sy:updatePeriod>hourly</sy:updatePeriod>
|
||||||
|
<sy:updateFrequency>2</sy:updateFrequency>
|
||||||
|
<sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
|
||||||
|
|
||||||
|
<image rdf:resource="http://meerkat.oreillynet.com/icons/meerkat-powered.jpg" />
|
||||||
|
|
||||||
|
<items>
|
||||||
|
<rdf:Seq>
|
||||||
|
<rdf:li resource="http://c.moreover.com/click/here.pl?r123" />
|
||||||
|
</rdf:Seq>
|
||||||
|
</items>
|
||||||
|
|
||||||
|
<textinput rdf:resource="http://meerkat.oreillynet.com" />
|
||||||
|
|
||||||
|
</channel>
|
||||||
|
|
||||||
|
<image rdf:about="http://meerkat.oreillynet.com/icons/meerkat-powered.jpg">
|
||||||
|
<title>Meerkat Powered!</title>
|
||||||
|
<url>http://meerkat.oreillynet.com/icons/meerkat-powered.jpg</url>
|
||||||
|
<link>http://meerkat.oreillynet.com</link>
|
||||||
|
</image>
|
||||||
|
|
||||||
|
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
|
||||||
|
<title>XML: A Disruptive Technology</title>
|
||||||
|
<link>http://c.moreover.com/click/here.pl?r123</link>
|
||||||
|
<dc:description>
|
||||||
|
XML is placing increasingly heavy loads on the existing technical
|
||||||
|
infrastructure of the Internet.
|
||||||
|
</dc:description>
|
||||||
|
<dc:publisher>The O'Reilly Network</dc:publisher>
|
||||||
|
<dc:creator>Simon St.Laurent (mailto:simonstl@simonstl.com)</dc:creator>
|
||||||
|
<dc:rights>Copyright © 2000 O'Reilly & Associates, Inc.</dc:rights>
|
||||||
|
<dc:subject>XML</dc:subject>
|
||||||
|
<co:name>XML.com</co:name>
|
||||||
|
<co:market>NASDAQ</co:market>
|
||||||
|
<co:symbol>XML</co:symbol>
|
||||||
|
</item>
|
||||||
|
|
||||||
|
<textinput rdf:about="http://meerkat.oreillynet.com">
|
||||||
|
<title>Search Meerkat</title>
|
||||||
|
<description>Search Meerkat's RSS Database...</description>
|
||||||
|
<name>s</name>
|
||||||
|
<link>http://meerkat.oreillynet.com/</link>
|
||||||
|
<ti:function>search</ti:function>
|
||||||
|
<ti:inputType>regex</ti:inputType>
|
||||||
|
</textinput>
|
||||||
|
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse(bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Title != "Meerkat" {
|
||||||
|
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.FeedURL != "" {
|
||||||
|
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.SiteURL != "http://meerkat.oreillynet.com" {
|
||||||
|
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(feed.Entries) != 1 {
|
||||||
|
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Hash != "fa4ef7c300b175ca66f92f226b5dba5caa2a9619f031101bf56e5b884b02cd97" {
|
||||||
|
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].URL != "http://c.moreover.com/click/here.pl?r123" {
|
||||||
|
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Title != "XML: A Disruptive Technology" {
|
||||||
|
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.HasSuffix(feed.Entries[0].Content, "XML is placing increasingly") {
|
||||||
|
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Author != "Simon St.Laurent (mailto:simonstl@simonstl.com)" {
|
||||||
|
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseItemWithOnlyFeedAuthor(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns="http://purl.org/rss/1.0/"
|
||||||
|
>
|
||||||
|
|
||||||
|
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
|
||||||
|
<title>Meerkat</title>
|
||||||
|
<link>http://meerkat.oreillynet.com</link>
|
||||||
|
<dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
|
||||||
|
</channel>
|
||||||
|
|
||||||
|
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
|
||||||
|
<title>XML: A Disruptive Technology</title>
|
||||||
|
<link>http://c.moreover.com/click/here.pl?r123</link>
|
||||||
|
<dc:description>
|
||||||
|
XML is placing increasingly heavy loads on the existing technical
|
||||||
|
infrastructure of the Internet.
|
||||||
|
</dc:description>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse(bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Author != "Rael Dornfest (mailto:rael@oreilly.com)" {
|
||||||
|
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseItemWithoutLink(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns="http://purl.org/rss/1.0/"
|
||||||
|
>
|
||||||
|
|
||||||
|
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
|
||||||
|
<title>Meerkat</title>
|
||||||
|
<link>http://meerkat.oreillynet.com</link>
|
||||||
|
</channel>
|
||||||
|
|
||||||
|
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
|
||||||
|
<title>Title</title>
|
||||||
|
<description>Test</description>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse(bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Hash != "37f5223ebd58639aa62a49afbb61df960efb7dc5db5181dfb3cedd9a49ad34c6" {
|
||||||
|
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].URL != "http://meerkat.oreillynet.com" {
|
||||||
|
t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseInvalidXml(t *testing.T) {
|
||||||
|
data := `garbage`
|
||||||
|
_, err := Parse(bytes.NewBufferString(data))
|
||||||
|
if err == nil {
|
||||||
|
t.Error("Parse should returns an error")
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok := err.(errors.LocalizedError); !ok {
|
||||||
|
t.Error("The error returned must be a LocalizedError")
|
||||||
|
}
|
||||||
|
}
|
71
reader/feed/rdf/rdf.go
Normal file
71
reader/feed/rdf/rdf.go
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||||
|
// Use of this source code is governed by the Apache 2.0
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package rdf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/xml"
|
||||||
|
|
||||||
|
"github.com/miniflux/miniflux2/helper"
|
||||||
|
"github.com/miniflux/miniflux2/reader/processor"
|
||||||
|
"github.com/miniflux/miniflux2/reader/sanitizer"
|
||||||
|
|
||||||
|
"github.com/miniflux/miniflux2/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
type rdfFeed struct {
|
||||||
|
XMLName xml.Name `xml:"RDF"`
|
||||||
|
Title string `xml:"channel>title"`
|
||||||
|
Link string `xml:"channel>link"`
|
||||||
|
Creator string `xml:"channel>creator"`
|
||||||
|
Items []rdfItem `xml:"item"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *rdfFeed) Transform() *model.Feed {
|
||||||
|
feed := new(model.Feed)
|
||||||
|
feed.Title = sanitizer.StripTags(r.Title)
|
||||||
|
feed.SiteURL = r.Link
|
||||||
|
|
||||||
|
for _, item := range r.Items {
|
||||||
|
entry := item.Transform()
|
||||||
|
|
||||||
|
if entry.Author == "" && r.Creator != "" {
|
||||||
|
entry.Author = sanitizer.StripTags(r.Creator)
|
||||||
|
}
|
||||||
|
|
||||||
|
if entry.URL == "" {
|
||||||
|
entry.URL = feed.SiteURL
|
||||||
|
}
|
||||||
|
|
||||||
|
feed.Entries = append(feed.Entries, entry)
|
||||||
|
}
|
||||||
|
|
||||||
|
return feed
|
||||||
|
}
|
||||||
|
|
||||||
|
type rdfItem struct {
|
||||||
|
Title string `xml:"title"`
|
||||||
|
Link string `xml:"link"`
|
||||||
|
Description string `xml:"description"`
|
||||||
|
Creator string `xml:"creator"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *rdfItem) Transform() *model.Entry {
|
||||||
|
entry := new(model.Entry)
|
||||||
|
entry.Title = sanitizer.StripTags(r.Title)
|
||||||
|
entry.Author = sanitizer.StripTags(r.Creator)
|
||||||
|
entry.URL = r.Link
|
||||||
|
entry.Content = processor.ItemContentProcessor(entry.URL, r.Description)
|
||||||
|
entry.Hash = getHash(r)
|
||||||
|
return entry
|
||||||
|
}
|
||||||
|
|
||||||
|
func getHash(r *rdfItem) string {
|
||||||
|
value := r.Link
|
||||||
|
if value == "" {
|
||||||
|
value = r.Title + r.Description
|
||||||
|
}
|
||||||
|
|
||||||
|
return helper.Hash(value)
|
||||||
|
}
|
|
@ -22,7 +22,7 @@ func Parse(data io.Reader) (*model.Feed, error) {
|
||||||
|
|
||||||
err := decoder.Decode(feed)
|
err := decoder.Decode(feed)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.NewLocalizedError("Unable to parse RSS feed: %v", err)
|
return nil, errors.NewLocalizedError("Unable to parse RSS feed: %v.", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return feed.Transform(), nil
|
return feed.Transform(), nil
|
||||||
|
|
|
@ -20,7 +20,7 @@ func Parse(data io.Reader) (SubcriptionList, error) {
|
||||||
|
|
||||||
err := decoder.Decode(opml)
|
err := decoder.Decode(opml)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.NewLocalizedError("Unable to parse OPML file: %v", err)
|
return nil, errors.NewLocalizedError("Unable to parse OPML file: %v.", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return opml.Transform(), nil
|
return opml.Transform(), nil
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Code generated by go generate; DO NOT EDIT.
|
// Code generated by go generate; DO NOT EDIT.
|
||||||
// 2017-11-20 17:09:36.239163817 -0800 PST m=+0.005534172
|
// 2017-11-20 18:31:16.964945842 -0800 PST m=+0.004093224
|
||||||
|
|
||||||
package static
|
package static
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Code generated by go generate; DO NOT EDIT.
|
// Code generated by go generate; DO NOT EDIT.
|
||||||
// 2017-11-20 17:09:36.24112331 -0800 PST m=+0.007493665
|
// 2017-11-20 18:31:16.967667594 -0800 PST m=+0.006814976
|
||||||
|
|
||||||
package static
|
package static
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Code generated by go generate; DO NOT EDIT.
|
// Code generated by go generate; DO NOT EDIT.
|
||||||
// 2017-11-20 17:09:36.242888415 -0800 PST m=+0.009258770
|
// 2017-11-20 18:31:16.972315949 -0800 PST m=+0.011463331
|
||||||
|
|
||||||
package static
|
package static
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Code generated by go generate; DO NOT EDIT.
|
// Code generated by go generate; DO NOT EDIT.
|
||||||
// 2017-11-20 17:09:36.256513528 -0800 PST m=+0.022883883
|
// 2017-11-20 18:31:16.991583598 -0800 PST m=+0.030730980
|
||||||
|
|
||||||
package template
|
package template
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Code generated by go generate; DO NOT EDIT.
|
// Code generated by go generate; DO NOT EDIT.
|
||||||
// 2017-11-20 17:09:36.24386504 -0800 PST m=+0.010235395
|
// 2017-11-20 18:31:16.974386894 -0800 PST m=+0.013534276
|
||||||
|
|
||||||
package template
|
package template
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Code generated by go generate; DO NOT EDIT.
|
// Code generated by go generate; DO NOT EDIT.
|
||||||
// 2017-11-20 17:09:36.23789781 -0800 PST m=+0.004268165
|
// 2017-11-20 18:31:16.963285699 -0800 PST m=+0.002433081
|
||||||
|
|
||||||
package sql
|
package sql
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue