Parse podcast categories
This commit is contained in:
parent
f8e50947f2
commit
6d97f8b458
3 changed files with 113 additions and 40 deletions
|
@ -22,6 +22,17 @@ type ItunesFeedElement struct {
|
|||
ItunesType string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd type"`
|
||||
}
|
||||
|
||||
func (i *ItunesFeedElement) GetItunesCategories() []string {
|
||||
var categories []string
|
||||
for _, category := range i.ItunesCategories {
|
||||
categories = append(categories, category.Text)
|
||||
if category.SubCategory != nil {
|
||||
categories = append(categories, category.SubCategory.Text)
|
||||
}
|
||||
}
|
||||
return categories
|
||||
}
|
||||
|
||||
type ItunesItemElement struct {
|
||||
ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd author"`
|
||||
ItunesEpisode string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd episode"`
|
||||
|
|
|
@ -1434,18 +1434,17 @@ func TestParseEntryWithRSSDescriptionAndMediaDescription(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithCategoryAndInnerHTML(t *testing.T) {
|
||||
func TestParseFeedWithCategories(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
|
||||
<category>Category 1</category>
|
||||
<category><![CDATA[Category 2]]></category>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<category>Category 1</category>
|
||||
<category>Category 2</category>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
@ -1459,27 +1458,99 @@ func TestParseEntryWithCategoryAndInnerHTML(t *testing.T) {
|
|||
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := "Category 2"
|
||||
result := feed.Entries[0].Tags[1]
|
||||
if result != expected {
|
||||
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
|
||||
expected := []string{"Category 1", "Category 2"}
|
||||
result := feed.Entries[0].Tags
|
||||
|
||||
for i, tag := range result {
|
||||
if tag != expected[i] {
|
||||
t.Errorf("Incorrect tag, got: %q", tag)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithCategoryAndCDATA(t *testing.T) {
|
||||
func TestParseEntryWithCategories(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
|
||||
<category>Category 3</category>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<category>Category 1</category>
|
||||
<category><![CDATA[Category 2]]></category>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Tags) != 3 {
|
||||
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := []string{"Category 1", "Category 2", "Category 3"}
|
||||
result := feed.Entries[0].Tags
|
||||
|
||||
for i, tag := range result {
|
||||
if tag != expected[i] {
|
||||
t.Errorf("Incorrect tag, got: %q", tag)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithItunesCategories(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<itunes:category text="Society & Culture">
|
||||
<itunes:category text="Documentary" />
|
||||
</itunes:category>
|
||||
<itunes:category text="Health">
|
||||
<itunes:category text="Mental Health" />
|
||||
</itunes:category>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Tags) != 4 {
|
||||
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := []string{"Society & Culture", "Documentary", "Health", "Mental Health"}
|
||||
result := feed.Entries[0].Tags
|
||||
|
||||
for i, tag := range result {
|
||||
if tag != expected[i] {
|
||||
t.Errorf("Incorrect tag, got: %q", tag)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithGooglePlayCategory(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:gplay="http://www.google.com/schemas/play-podcasts/1.0" version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<gplay:category text="Art"></gplay:category>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<author>
|
||||
by <![CDATA[Foo Bar]]>
|
||||
</author>
|
||||
<category>Sample Category</category>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
@ -1493,10 +1564,13 @@ func TestParseEntryWithCategoryAndCDATA(t *testing.T) {
|
|||
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := "Sample Category"
|
||||
result := feed.Entries[0].Tags[0]
|
||||
if result != expected {
|
||||
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
|
||||
expected := []string{"Art"}
|
||||
result := feed.Entries[0].Tags
|
||||
|
||||
for i, tag := range result {
|
||||
if tag != expected[i] {
|
||||
t.Errorf("Incorrect tag, got: %q", tag)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ type rssFeed struct {
|
|||
}
|
||||
|
||||
type rssChannel struct {
|
||||
Categories []string `xml:"rss category"`
|
||||
Title string `xml:"rss title"`
|
||||
Link string `xml:"rss link"`
|
||||
ImageURL string `xml:"rss image>url"`
|
||||
|
@ -111,6 +112,13 @@ func (r *rssFeed) Transform(baseURL string) *model.Feed {
|
|||
entry.Title = entry.URL
|
||||
}
|
||||
|
||||
entry.Tags = append(entry.Tags, r.Channel.Categories...)
|
||||
entry.Tags = append(entry.Tags, r.Channel.GetItunesCategories()...)
|
||||
|
||||
if r.Channel.GooglePlayCategory.Text != "" {
|
||||
entry.Tags = append(entry.Tags, r.Channel.GooglePlayCategory.Text)
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, entry)
|
||||
}
|
||||
|
||||
|
@ -165,12 +173,6 @@ type rssEnclosure struct {
|
|||
Length string `xml:"length,attr"`
|
||||
}
|
||||
|
||||
type rssCategory struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Inner string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
func (enclosure *rssEnclosure) Size() int64 {
|
||||
if enclosure.Length == "" {
|
||||
return 0
|
||||
|
@ -188,7 +190,7 @@ type rssItem struct {
|
|||
Author rssAuthor `xml:"rss author"`
|
||||
Comments string `xml:"rss comments"`
|
||||
EnclosureLinks []rssEnclosure `xml:"rss enclosure"`
|
||||
Categories []rssCategory `xml:"rss category"`
|
||||
Categories []string `xml:"rss category"`
|
||||
dublincore.DublinCoreItemElement
|
||||
FeedBurnerElement
|
||||
media.Element
|
||||
|
@ -208,7 +210,7 @@ func (r *rssItem) Transform() *model.Entry {
|
|||
entry.Content = r.entryContent()
|
||||
entry.Title = r.entryTitle()
|
||||
entry.Enclosures = r.entryEnclosures()
|
||||
entry.Tags = r.entryCategories()
|
||||
entry.Tags = r.Categories
|
||||
if duration, err := normalizeDuration(r.ItunesDuration); err == nil {
|
||||
entry.ReadingTime = duration
|
||||
}
|
||||
|
@ -383,20 +385,6 @@ func (r *rssItem) entryEnclosures() model.EnclosureList {
|
|||
return enclosures
|
||||
}
|
||||
|
||||
func (r *rssItem) entryCategories() []string {
|
||||
categoryList := make([]string, 0)
|
||||
|
||||
for _, rssCategory := range r.Categories {
|
||||
if strings.Contains(rssCategory.Inner, "<![CDATA[") {
|
||||
categoryList = append(categoryList, strings.TrimSpace(rssCategory.Data))
|
||||
} else {
|
||||
categoryList = append(categoryList, strings.TrimSpace(rssCategory.Inner))
|
||||
}
|
||||
}
|
||||
|
||||
return categoryList
|
||||
}
|
||||
|
||||
func (r *rssItem) entryCommentsURL() string {
|
||||
commentsURL := strings.TrimSpace(r.Comments)
|
||||
if commentsURL != "" && urllib.IsAbsoluteURL(commentsURL) {
|
||||
|
|
Loading…
Reference in a new issue