Strip HTML tags from DublinCore Creator tags
This commit is contained in:
parent
344a237af8
commit
36f013670e
5 changed files with 53 additions and 20 deletions
|
@ -1,16 +1,30 @@
|
||||||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
package rdf // import "miniflux.app/v2/internal/reader/rdf"
|
package dublincore // import "miniflux.app/v2/internal/reader/dublincore"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"miniflux.app/v2/internal/reader/sanitizer"
|
||||||
|
)
|
||||||
|
|
||||||
// DublinCoreFeedElement represents Dublin Core feed XML elements.
|
// DublinCoreFeedElement represents Dublin Core feed XML elements.
|
||||||
type DublinCoreFeedElement struct {
|
type DublinCoreFeedElement struct {
|
||||||
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ channel>creator"`
|
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ channel>creator"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// DublinCoreEntryElement represents Dublin Core entry XML elements.
|
func (feed *DublinCoreFeedElement) GetSanitizedCreator() string {
|
||||||
type DublinCoreEntryElement struct {
|
return strings.TrimSpace(sanitizer.StripTags(feed.DublinCoreCreator))
|
||||||
|
}
|
||||||
|
|
||||||
|
// DublinCoreItemElement represents Dublin Core entry XML elements.
|
||||||
|
type DublinCoreItemElement struct {
|
||||||
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
|
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
|
||||||
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
|
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
|
||||||
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
|
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (item *DublinCoreItemElement) GetSanitizedCreator() string {
|
||||||
|
return strings.TrimSpace(sanitizer.StripTags(item.DublinCoreCreator))
|
||||||
|
}
|
|
@ -349,6 +349,34 @@ func TestParseItemWithDublicCoreDate(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseItemWithEncodedHTMLInDCCreatorField(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
|
||||||
|
<channel>
|
||||||
|
<title>Example</title>
|
||||||
|
<link>http://example.org</link>
|
||||||
|
</channel>
|
||||||
|
|
||||||
|
<item>
|
||||||
|
<title>Title</title>
|
||||||
|
<description>Test</description>
|
||||||
|
<link>http://example.org/test.html</link>
|
||||||
|
<dc:creator><a href="http://example.org/author1">Author 1</a> (University 1), <a href="http://example.org/author2">Author 2</a> (University 2)</dc:creator>
|
||||||
|
<dc:date>2018-04-10T05:00:00+00:00</dc:date>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse("http://example.org", bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedAuthor := "Author 1 (University 1), Author 2 (University 2)"
|
||||||
|
if feed.Entries[0].Author != expectedAuthor {
|
||||||
|
t.Errorf("Incorrect entry author, got: %s, want: %s", feed.Entries[0].Author, expectedAuthor)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseItemWithoutDate(t *testing.T) {
|
func TestParseItemWithoutDate(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
||||||
|
|
|
@ -13,6 +13,7 @@ import (
|
||||||
"miniflux.app/v2/internal/logger"
|
"miniflux.app/v2/internal/logger"
|
||||||
"miniflux.app/v2/internal/model"
|
"miniflux.app/v2/internal/model"
|
||||||
"miniflux.app/v2/internal/reader/date"
|
"miniflux.app/v2/internal/reader/date"
|
||||||
|
"miniflux.app/v2/internal/reader/dublincore"
|
||||||
"miniflux.app/v2/internal/reader/sanitizer"
|
"miniflux.app/v2/internal/reader/sanitizer"
|
||||||
"miniflux.app/v2/internal/urllib"
|
"miniflux.app/v2/internal/urllib"
|
||||||
)
|
)
|
||||||
|
@ -22,7 +23,7 @@ type rdfFeed struct {
|
||||||
Title string `xml:"channel>title"`
|
Title string `xml:"channel>title"`
|
||||||
Link string `xml:"channel>link"`
|
Link string `xml:"channel>link"`
|
||||||
Items []rdfItem `xml:"item"`
|
Items []rdfItem `xml:"item"`
|
||||||
DublinCoreFeedElement
|
dublincore.DublinCoreFeedElement
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *rdfFeed) Transform(baseURL string) *model.Feed {
|
func (r *rdfFeed) Transform(baseURL string) *model.Feed {
|
||||||
|
@ -38,7 +39,7 @@ func (r *rdfFeed) Transform(baseURL string) *model.Feed {
|
||||||
for _, item := range r.Items {
|
for _, item := range r.Items {
|
||||||
entry := item.Transform()
|
entry := item.Transform()
|
||||||
if entry.Author == "" && r.DublinCoreCreator != "" {
|
if entry.Author == "" && r.DublinCoreCreator != "" {
|
||||||
entry.Author = strings.TrimSpace(r.DublinCoreCreator)
|
entry.Author = r.GetSanitizedCreator()
|
||||||
}
|
}
|
||||||
|
|
||||||
if entry.URL == "" {
|
if entry.URL == "" {
|
||||||
|
@ -60,7 +61,7 @@ type rdfItem struct {
|
||||||
Title string `xml:"title"`
|
Title string `xml:"title"`
|
||||||
Link string `xml:"link"`
|
Link string `xml:"link"`
|
||||||
Description string `xml:"description"`
|
Description string `xml:"description"`
|
||||||
DublinCoreEntryElement
|
dublincore.DublinCoreItemElement
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *rdfItem) Transform() *model.Entry {
|
func (r *rdfItem) Transform() *model.Entry {
|
||||||
|
@ -88,7 +89,7 @@ func (r *rdfItem) entryContent() string {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *rdfItem) entryAuthor() string {
|
func (r *rdfItem) entryAuthor() string {
|
||||||
return strings.TrimSpace(r.DublinCoreCreator)
|
return r.GetSanitizedCreator()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *rdfItem) entryURL() string {
|
func (r *rdfItem) entryURL() string {
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
package rss // import "miniflux.app/v2/internal/reader/rss"
|
|
||||||
|
|
||||||
// DublinCoreElement represents Dublin Core XML elements.
|
|
||||||
type DublinCoreElement struct {
|
|
||||||
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
|
|
||||||
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
|
|
||||||
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
|
|
||||||
}
|
|
|
@ -15,6 +15,7 @@ import (
|
||||||
"miniflux.app/v2/internal/logger"
|
"miniflux.app/v2/internal/logger"
|
||||||
"miniflux.app/v2/internal/model"
|
"miniflux.app/v2/internal/model"
|
||||||
"miniflux.app/v2/internal/reader/date"
|
"miniflux.app/v2/internal/reader/date"
|
||||||
|
"miniflux.app/v2/internal/reader/dublincore"
|
||||||
"miniflux.app/v2/internal/reader/media"
|
"miniflux.app/v2/internal/reader/media"
|
||||||
"miniflux.app/v2/internal/reader/sanitizer"
|
"miniflux.app/v2/internal/reader/sanitizer"
|
||||||
"miniflux.app/v2/internal/urllib"
|
"miniflux.app/v2/internal/urllib"
|
||||||
|
@ -182,7 +183,7 @@ type rssItem struct {
|
||||||
CommentLinks []rssCommentLink `xml:"comments"`
|
CommentLinks []rssCommentLink `xml:"comments"`
|
||||||
EnclosureLinks []rssEnclosure `xml:"enclosure"`
|
EnclosureLinks []rssEnclosure `xml:"enclosure"`
|
||||||
Categories []rssCategory `xml:"category"`
|
Categories []rssCategory `xml:"category"`
|
||||||
DublinCoreElement
|
dublincore.DublinCoreItemElement
|
||||||
FeedBurnerElement
|
FeedBurnerElement
|
||||||
PodcastEntryElement
|
PodcastEntryElement
|
||||||
media.Element
|
media.Element
|
||||||
|
@ -250,7 +251,7 @@ func (r *rssItem) entryAuthor() string {
|
||||||
}
|
}
|
||||||
|
|
||||||
if author == "" {
|
if author == "" {
|
||||||
author = r.DublinCoreCreator
|
author = r.GetSanitizedCreator()
|
||||||
}
|
}
|
||||||
|
|
||||||
return sanitizer.StripTags(strings.TrimSpace(author))
|
return sanitizer.StripTags(strings.TrimSpace(author))
|
||||||
|
|
Loading…
Reference in a new issue