Add support of media elements for RSS 2 feeds

This commit is contained in:
Frédéric Guillot 2019-11-28 21:21:00 -08:00
parent c43c9458a9
commit f90e9dfab0
2 changed files with 221 additions and 19 deletions

View file

@ -652,3 +652,122 @@ func TestParseWithInvalidCharacterEntity(t *testing.T) {
t.Errorf(`Incorrect url, got: %q`, feed.SiteURL)
}
}
func TestParseEntryWithMediaGroup(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<title>My Example Feed</title>
<link>http://example.org</link>
<item>
<title>Example Item</title>
<link>http://www.example.org/entries/1</link>
<enclosure type="application/x-bittorrent" url="https://example.org/file3.torrent" length="670053113">
</enclosure>
<media:group>
<media:content type="application/x-bittorrent" url="https://example.org/file1.torrent"></media:content>
<media:content type="application/x-bittorrent" url="https://example.org/file2.torrent" isDefault="true"></media:content>
<media:content type="application/x-bittorrent" url="https://example.org/file3.torrent"></media:content>
<media:content type="application/x-bittorrent" url="https://example.org/file4.torrent"></media:content>
<media:content type="application/x-bittorrent" url="https://example.org/file5.torrent" fileSize="42"></media:content>
<media:rating>nonadult</media:rating>
</media:group>
<media:thumbnail url="https://example.org/image.jpg" height="122" width="223"></media:thumbnail>
</item>
</channel>
</rss>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if len(feed.Entries[0].Enclosures) != 6 {
t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
}
expectedResults := []struct {
url string
mimeType string
size int64
}{
{"https://example.org/image.jpg", "image/*", 0},
{"https://example.org/file3.torrent", "application/x-bittorrent", 670053113},
{"https://example.org/file1.torrent", "application/x-bittorrent", 0},
{"https://example.org/file2.torrent", "application/x-bittorrent", 0},
{"https://example.org/file4.torrent", "application/x-bittorrent", 0},
{"https://example.org/file5.torrent", "application/x-bittorrent", 42},
}
for index, enclosure := range feed.Entries[0].Enclosures {
if expectedResults[index].url != enclosure.URL {
t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url)
}
if expectedResults[index].mimeType != enclosure.MimeType {
t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType)
}
if expectedResults[index].size != enclosure.Size {
t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size)
}
}
}
func TestParseEntryWithMediaContent(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<title>My Example Feed</title>
<link>http://example.org</link>
<item>
<title>Example Item</title>
<link>http://www.example.org/entries/1</link>
<media:thumbnail url="https://example.org/thumbnail.jpg" />
<media:content url="https://example.org/media1.jpg" medium="image">
<media:title type="html">Some Title for Media 1</media:title>
</media:content>
<media:content url="https://example.org/media2.jpg" medium="image" />
</item>
</channel>
</rss>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if len(feed.Entries[0].Enclosures) != 3 {
t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
}
expectedResults := []struct {
url string
mimeType string
size int64
}{
{"https://example.org/thumbnail.jpg", "image/*", 0},
{"https://example.org/media1.jpg", "image/*", 0},
{"https://example.org/media2.jpg", "image/*", 0},
}
for index, enclosure := range feed.Entries[0].Enclosures {
if expectedResults[index].url != enclosure.URL {
t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url)
}
if expectedResults[index].mimeType != enclosure.MimeType {
t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType)
}
if expectedResults[index].size != enclosure.Size {
t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size)
}
}
}

View file

@ -56,20 +56,71 @@ type rssEnclosure struct {
Length string `xml:"length,attr"`
}
func (enclosure *rssEnclosure) Size() int64 {
if enclosure.Length == "" {
return 0
}
size, _ := strconv.ParseInt(enclosure.Length, 10, 0)
return size
}
type rssItem struct {
GUID string `xml:"guid"`
Title string `xml:"title"`
Links []rssLink `xml:"link"`
OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
CommentLinks []rssCommentLink `xml:"comments"`
Description string `xml:"description"`
EncodedContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
PubDate string `xml:"pubDate"`
Date string `xml:"http://purl.org/dc/elements/1.1/ date"`
Authors []rssAuthor `xml:"author"`
Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
EnclosureLinks []rssEnclosure `xml:"enclosure"`
OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
GUID string `xml:"guid"`
Title string `xml:"title"`
Links []rssLink `xml:"link"`
OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
CommentLinks []rssCommentLink `xml:"comments"`
Description string `xml:"description"`
EncodedContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
PubDate string `xml:"pubDate"`
Date string `xml:"http://purl.org/dc/elements/1.1/ date"`
Authors []rssAuthor `xml:"author"`
Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
EnclosureLinks []rssEnclosure `xml:"enclosure"`
OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
MediaGroup []rssMediaGroup `xml:"http://search.yahoo.com/mrss/ group"`
MediaContents []rssMediaContent `xml:"http://search.yahoo.com/mrss/ content"`
MediaThumbnails []rssMediaThumbnails `xml:"http://search.yahoo.com/mrss/ thumbnail"`
}
type rssMediaGroup struct {
MediaList []rssMediaContent `xml:"content"`
}
type rssMediaContent struct {
URL string `xml:"url,attr"`
Type string `xml:"type,attr"`
FileSize string `xml:"fileSize,attr"`
Medium string `xml:"medium,attr"`
}
func (mediaContent *rssMediaContent) MimeType() string {
switch {
case mediaContent.Type == "" && mediaContent.Medium == "image":
return "image/*"
case mediaContent.Type == "" && mediaContent.Medium == "video":
return "video/*"
case mediaContent.Type == "" && mediaContent.Medium == "audio":
return "audio/*"
case mediaContent.Type == "" && mediaContent.Medium == "video":
return "video/*"
case mediaContent.Type != "":
return mediaContent.Type
default:
return "application/octet-stream"
}
}
func (mediaContent *rssMediaContent) Size() int64 {
if mediaContent.FileSize == "" {
return 0
}
size, _ := strconv.ParseInt(mediaContent.FileSize, 10, 0)
return size
}
type rssMediaThumbnails struct {
URL string `xml:"url,attr"`
}
func (r *rssFeed) SiteURL() string {
@ -200,9 +251,20 @@ func (r *rssItem) URL() string {
func (r *rssItem) Enclosures() model.EnclosureList {
enclosures := make(model.EnclosureList, 0)
duplicates := make(map[string]bool, 0)
for _, mediaThumbnail := range r.MediaThumbnails {
if _, found := duplicates[mediaThumbnail.URL]; !found {
duplicates[mediaThumbnail.URL] = true
enclosures = append(enclosures, &model.Enclosure{
URL: mediaThumbnail.URL,
MimeType: "image/*",
Size: 0,
})
}
}
for _, enclosure := range r.EnclosureLinks {
length, _ := strconv.ParseInt(enclosure.Length, 10, 0)
enclosureURL := enclosure.URL
if r.OrigEnclosureLink != "" {
@ -212,11 +274,32 @@ func (r *rssItem) Enclosures() model.EnclosureList {
}
}
enclosures = append(enclosures, &model.Enclosure{
URL: enclosureURL,
MimeType: enclosure.Type,
Size: length,
})
if _, found := duplicates[enclosureURL]; !found {
duplicates[enclosureURL] = true
enclosures = append(enclosures, &model.Enclosure{
URL: enclosureURL,
MimeType: enclosure.Type,
Size: enclosure.Size(),
})
}
}
for _, mediaContentItem := range r.MediaGroup {
for _, mediaContent := range mediaContentItem.MediaList {
r.MediaContents = append(r.MediaContents, mediaContent)
}
}
for _, mediaContent := range r.MediaContents {
if _, found := duplicates[mediaContent.URL]; !found {
duplicates[mediaContent.URL] = true
enclosures = append(enclosures, &model.Enclosure{
URL: mediaContent.URL,
MimeType: mediaContent.MimeType(),
Size: mediaContent.Size(),
})
}
}
return enclosures