Parse podcast categories
This commit is contained in:
parent
f8e50947f2
commit
6d97f8b458
3 changed files with 113 additions and 40 deletions
|
@ -22,6 +22,17 @@ type ItunesFeedElement struct {
|
||||||
ItunesType string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd type"`
|
ItunesType string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd type"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *ItunesFeedElement) GetItunesCategories() []string {
|
||||||
|
var categories []string
|
||||||
|
for _, category := range i.ItunesCategories {
|
||||||
|
categories = append(categories, category.Text)
|
||||||
|
if category.SubCategory != nil {
|
||||||
|
categories = append(categories, category.SubCategory.Text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return categories
|
||||||
|
}
|
||||||
|
|
||||||
type ItunesItemElement struct {
|
type ItunesItemElement struct {
|
||||||
ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd author"`
|
ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd author"`
|
||||||
ItunesEpisode string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd episode"`
|
ItunesEpisode string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd episode"`
|
||||||
|
|
|
@ -1434,18 +1434,17 @@ func TestParseEntryWithRSSDescriptionAndMediaDescription(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseEntryWithCategoryAndInnerHTML(t *testing.T) {
|
func TestParseFeedWithCategories(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||||
<channel>
|
<channel>
|
||||||
<title>Example</title>
|
<title>Example</title>
|
||||||
<link>https://example.org/</link>
|
<link>https://example.org/</link>
|
||||||
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
|
<category>Category 1</category>
|
||||||
|
<category><![CDATA[Category 2]]></category>
|
||||||
<item>
|
<item>
|
||||||
<title>Test</title>
|
<title>Test</title>
|
||||||
<link>https://example.org/item</link>
|
<link>https://example.org/item</link>
|
||||||
<category>Category 1</category>
|
|
||||||
<category>Category 2</category>
|
|
||||||
</item>
|
</item>
|
||||||
</channel>
|
</channel>
|
||||||
</rss>`
|
</rss>`
|
||||||
|
@ -1459,27 +1458,99 @@ func TestParseEntryWithCategoryAndInnerHTML(t *testing.T) {
|
||||||
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||||
}
|
}
|
||||||
|
|
||||||
expected := "Category 2"
|
expected := []string{"Category 1", "Category 2"}
|
||||||
result := feed.Entries[0].Tags[1]
|
result := feed.Entries[0].Tags
|
||||||
if result != expected {
|
|
||||||
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
|
for i, tag := range result {
|
||||||
|
if tag != expected[i] {
|
||||||
|
t.Errorf("Incorrect tag, got: %q", tag)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseEntryWithCategoryAndCDATA(t *testing.T) {
|
func TestParseEntryWithCategories(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||||
<channel>
|
<channel>
|
||||||
<title>Example</title>
|
<title>Example</title>
|
||||||
<link>https://example.org/</link>
|
<link>https://example.org/</link>
|
||||||
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
|
<category>Category 3</category>
|
||||||
|
<item>
|
||||||
|
<title>Test</title>
|
||||||
|
<link>https://example.org/item</link>
|
||||||
|
<category>Category 1</category>
|
||||||
|
<category><![CDATA[Category 2]]></category>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>`
|
||||||
|
|
||||||
|
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(feed.Entries[0].Tags) != 3 {
|
||||||
|
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||||
|
}
|
||||||
|
|
||||||
|
expected := []string{"Category 1", "Category 2", "Category 3"}
|
||||||
|
result := feed.Entries[0].Tags
|
||||||
|
|
||||||
|
for i, tag := range result {
|
||||||
|
if tag != expected[i] {
|
||||||
|
t.Errorf("Incorrect tag, got: %q", tag)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseFeedWithItunesCategories(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>Example</title>
|
||||||
|
<link>https://example.org/</link>
|
||||||
|
<itunes:category text="Society & Culture">
|
||||||
|
<itunes:category text="Documentary" />
|
||||||
|
</itunes:category>
|
||||||
|
<itunes:category text="Health">
|
||||||
|
<itunes:category text="Mental Health" />
|
||||||
|
</itunes:category>
|
||||||
|
<item>
|
||||||
|
<title>Test</title>
|
||||||
|
<link>https://example.org/item</link>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>`
|
||||||
|
|
||||||
|
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(feed.Entries[0].Tags) != 4 {
|
||||||
|
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||||
|
}
|
||||||
|
|
||||||
|
expected := []string{"Society & Culture", "Documentary", "Health", "Mental Health"}
|
||||||
|
result := feed.Entries[0].Tags
|
||||||
|
|
||||||
|
for i, tag := range result {
|
||||||
|
if tag != expected[i] {
|
||||||
|
t.Errorf("Incorrect tag, got: %q", tag)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseFeedWithGooglePlayCategory(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:gplay="http://www.google.com/schemas/play-podcasts/1.0" version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>Example</title>
|
||||||
|
<link>https://example.org/</link>
|
||||||
|
<gplay:category text="Art"></gplay:category>
|
||||||
<item>
|
<item>
|
||||||
<title>Test</title>
|
<title>Test</title>
|
||||||
<link>https://example.org/item</link>
|
<link>https://example.org/item</link>
|
||||||
<author>
|
|
||||||
by <![CDATA[Foo Bar]]>
|
|
||||||
</author>
|
|
||||||
<category>Sample Category</category>
|
|
||||||
</item>
|
</item>
|
||||||
</channel>
|
</channel>
|
||||||
</rss>`
|
</rss>`
|
||||||
|
@ -1493,10 +1564,13 @@ func TestParseEntryWithCategoryAndCDATA(t *testing.T) {
|
||||||
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||||
}
|
}
|
||||||
|
|
||||||
expected := "Sample Category"
|
expected := []string{"Art"}
|
||||||
result := feed.Entries[0].Tags[0]
|
result := feed.Entries[0].Tags
|
||||||
if result != expected {
|
|
||||||
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
|
for i, tag := range result {
|
||||||
|
if tag != expected[i] {
|
||||||
|
t.Errorf("Incorrect tag, got: %q", tag)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@ type rssFeed struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type rssChannel struct {
|
type rssChannel struct {
|
||||||
|
Categories []string `xml:"rss category"`
|
||||||
Title string `xml:"rss title"`
|
Title string `xml:"rss title"`
|
||||||
Link string `xml:"rss link"`
|
Link string `xml:"rss link"`
|
||||||
ImageURL string `xml:"rss image>url"`
|
ImageURL string `xml:"rss image>url"`
|
||||||
|
@ -111,6 +112,13 @@ func (r *rssFeed) Transform(baseURL string) *model.Feed {
|
||||||
entry.Title = entry.URL
|
entry.Title = entry.URL
|
||||||
}
|
}
|
||||||
|
|
||||||
|
entry.Tags = append(entry.Tags, r.Channel.Categories...)
|
||||||
|
entry.Tags = append(entry.Tags, r.Channel.GetItunesCategories()...)
|
||||||
|
|
||||||
|
if r.Channel.GooglePlayCategory.Text != "" {
|
||||||
|
entry.Tags = append(entry.Tags, r.Channel.GooglePlayCategory.Text)
|
||||||
|
}
|
||||||
|
|
||||||
feed.Entries = append(feed.Entries, entry)
|
feed.Entries = append(feed.Entries, entry)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -165,12 +173,6 @@ type rssEnclosure struct {
|
||||||
Length string `xml:"length,attr"`
|
Length string `xml:"length,attr"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type rssCategory struct {
|
|
||||||
XMLName xml.Name
|
|
||||||
Data string `xml:",chardata"`
|
|
||||||
Inner string `xml:",innerxml"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (enclosure *rssEnclosure) Size() int64 {
|
func (enclosure *rssEnclosure) Size() int64 {
|
||||||
if enclosure.Length == "" {
|
if enclosure.Length == "" {
|
||||||
return 0
|
return 0
|
||||||
|
@ -188,7 +190,7 @@ type rssItem struct {
|
||||||
Author rssAuthor `xml:"rss author"`
|
Author rssAuthor `xml:"rss author"`
|
||||||
Comments string `xml:"rss comments"`
|
Comments string `xml:"rss comments"`
|
||||||
EnclosureLinks []rssEnclosure `xml:"rss enclosure"`
|
EnclosureLinks []rssEnclosure `xml:"rss enclosure"`
|
||||||
Categories []rssCategory `xml:"rss category"`
|
Categories []string `xml:"rss category"`
|
||||||
dublincore.DublinCoreItemElement
|
dublincore.DublinCoreItemElement
|
||||||
FeedBurnerElement
|
FeedBurnerElement
|
||||||
media.Element
|
media.Element
|
||||||
|
@ -208,7 +210,7 @@ func (r *rssItem) Transform() *model.Entry {
|
||||||
entry.Content = r.entryContent()
|
entry.Content = r.entryContent()
|
||||||
entry.Title = r.entryTitle()
|
entry.Title = r.entryTitle()
|
||||||
entry.Enclosures = r.entryEnclosures()
|
entry.Enclosures = r.entryEnclosures()
|
||||||
entry.Tags = r.entryCategories()
|
entry.Tags = r.Categories
|
||||||
if duration, err := normalizeDuration(r.ItunesDuration); err == nil {
|
if duration, err := normalizeDuration(r.ItunesDuration); err == nil {
|
||||||
entry.ReadingTime = duration
|
entry.ReadingTime = duration
|
||||||
}
|
}
|
||||||
|
@ -383,20 +385,6 @@ func (r *rssItem) entryEnclosures() model.EnclosureList {
|
||||||
return enclosures
|
return enclosures
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *rssItem) entryCategories() []string {
|
|
||||||
categoryList := make([]string, 0)
|
|
||||||
|
|
||||||
for _, rssCategory := range r.Categories {
|
|
||||||
if strings.Contains(rssCategory.Inner, "<![CDATA[") {
|
|
||||||
categoryList = append(categoryList, strings.TrimSpace(rssCategory.Data))
|
|
||||||
} else {
|
|
||||||
categoryList = append(categoryList, strings.TrimSpace(rssCategory.Inner))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return categoryList
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *rssItem) entryCommentsURL() string {
|
func (r *rssItem) entryCommentsURL() string {
|
||||||
commentsURL := strings.TrimSpace(r.Comments)
|
commentsURL := strings.TrimSpace(r.Comments)
|
||||||
if commentsURL != "" && urllib.IsAbsoluteURL(commentsURL) {
|
if commentsURL != "" && urllib.IsAbsoluteURL(commentsURL) {
|
||||||
|
|
Loading…
Reference in a new issue