Parse <category>
from Feeds (RSS, Atom and JSON)
This commit is contained in:
parent
ff8d68c151
commit
8f9ccc6540
12 changed files with 252 additions and 11 deletions
|
@ -132,6 +132,8 @@ func (h *handler) findEntries(w http.ResponseWriter, r *http.Request, feedID int
|
|||
return
|
||||
}
|
||||
|
||||
tags := request.QueryStringParamList(r, "tags")
|
||||
|
||||
builder := h.store.NewEntryQueryBuilder(userID)
|
||||
builder.WithFeedID(feedID)
|
||||
builder.WithCategoryID(categoryID)
|
||||
|
@ -140,6 +142,7 @@ func (h *handler) findEntries(w http.ResponseWriter, r *http.Request, feedID int
|
|||
builder.WithDirection(direction)
|
||||
builder.WithOffset(offset)
|
||||
builder.WithLimit(limit)
|
||||
builder.WithTags(tags)
|
||||
configureFilters(builder, r)
|
||||
|
||||
entries, err := builder.GetEntries()
|
||||
|
|
|
@ -218,6 +218,7 @@ type Entry struct {
|
|||
ReadingTime int `json:"reading_time"`
|
||||
Enclosures Enclosures `json:"enclosures,omitempty"`
|
||||
Feed *Feed `json:"feed,omitempty"`
|
||||
Tags []string `json:"tags"`
|
||||
}
|
||||
|
||||
// Entries represents a list of entries.
|
||||
|
|
|
@ -638,4 +638,10 @@ var migrations = []func(tx *sql.Tx) error{
|
|||
_, err = tx.Exec(sql)
|
||||
return err
|
||||
},
|
||||
func(tx *sql.Tx) (err error) {
|
||||
_, err = tx.Exec(`
|
||||
ALTER TABLE entries ADD COLUMN tags text[] default '{}';
|
||||
`)
|
||||
return
|
||||
},
|
||||
}
|
||||
|
|
|
@ -37,6 +37,7 @@ type Entry struct {
|
|||
ReadingTime int `json:"reading_time"`
|
||||
Enclosures EnclosureList `json:"enclosures"`
|
||||
Feed *Feed `json:"feed,omitempty"`
|
||||
Tags []string `json:"tags"`
|
||||
}
|
||||
|
||||
// Entries represents a list of entries.
|
||||
|
|
|
@ -80,14 +80,15 @@ func (a *atom10Feed) Transform(baseURL string) *model.Feed {
|
|||
}
|
||||
|
||||
type atom10Entry struct {
|
||||
ID string `xml:"id"`
|
||||
Title atom10Text `xml:"title"`
|
||||
Published string `xml:"published"`
|
||||
Updated string `xml:"updated"`
|
||||
Links atomLinks `xml:"link"`
|
||||
Summary atom10Text `xml:"summary"`
|
||||
Content atom10Text `xml:"http://www.w3.org/2005/Atom content"`
|
||||
Authors atomAuthors `xml:"author"`
|
||||
ID string `xml:"id"`
|
||||
Title atom10Text `xml:"title"`
|
||||
Published string `xml:"published"`
|
||||
Updated string `xml:"updated"`
|
||||
Links atomLinks `xml:"link"`
|
||||
Summary atom10Text `xml:"summary"`
|
||||
Content atom10Text `xml:"http://www.w3.org/2005/Atom content"`
|
||||
Authors atomAuthors `xml:"author"`
|
||||
Categories []atom10Category `xml:"category"`
|
||||
media.Element
|
||||
}
|
||||
|
||||
|
@ -101,6 +102,7 @@ func (a *atom10Entry) Transform() *model.Entry {
|
|||
entry.Title = a.entryTitle()
|
||||
entry.Enclosures = a.entryEnclosures()
|
||||
entry.CommentsURL = a.entryCommentsURL()
|
||||
entry.Tags = a.entryCategories()
|
||||
return entry
|
||||
}
|
||||
|
||||
|
@ -214,6 +216,20 @@ func (a *atom10Entry) entryEnclosures() model.EnclosureList {
|
|||
return enclosures
|
||||
}
|
||||
|
||||
func (r *atom10Entry) entryCategories() []string {
|
||||
var categoryList []string
|
||||
|
||||
for _, atomCategory := range r.Categories {
|
||||
if strings.TrimSpace(atomCategory.Label) != "" {
|
||||
categoryList = append(categoryList, strings.TrimSpace(atomCategory.Label))
|
||||
} else {
|
||||
categoryList = append(categoryList, strings.TrimSpace(atomCategory.Term))
|
||||
}
|
||||
}
|
||||
|
||||
return categoryList
|
||||
}
|
||||
|
||||
// See https://tools.ietf.org/html/rfc4685#section-4
|
||||
// If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
|
||||
// We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
|
||||
|
@ -232,6 +248,11 @@ type atom10Text struct {
|
|||
XHTMLRootElement atomXHTMLRootElement `xml:"http://www.w3.org/1999/xhtml div"`
|
||||
}
|
||||
|
||||
type atom10Category struct {
|
||||
Term string `xml:"term,attr"`
|
||||
Label string `xml:"label,attr"`
|
||||
}
|
||||
|
||||
// Text: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.1
|
||||
// HTML: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.2
|
||||
// XHTML: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.3
|
||||
|
|
|
@ -1604,3 +1604,48 @@ func TestAbsoluteCommentsURL(t *testing.T) {
|
|||
t.Errorf("Incorrect entry comments URL, got: %s", feed.Entries[0].CommentsURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithCategories(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
<author>
|
||||
<name>Alice</name>
|
||||
</author>
|
||||
<author>
|
||||
<name>Bob</name>
|
||||
</author>
|
||||
|
||||
<entry>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
<category term='Tech' />
|
||||
<category term='Technology' label='Science' />
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Tags) != 2 {
|
||||
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := "Tech"
|
||||
result := feed.Entries[0].Tags[0]
|
||||
if result != expected {
|
||||
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
|
||||
}
|
||||
|
||||
expected = "Science"
|
||||
result = feed.Entries[0].Tags[1]
|
||||
if result != expected {
|
||||
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ type jsonItem struct {
|
|||
Authors []jsonAuthor `json:"authors"`
|
||||
Author jsonAuthor `json:"author"`
|
||||
Attachments []jsonAttachment `json:"attachments"`
|
||||
Tags []string `json:"tags"`
|
||||
}
|
||||
|
||||
type jsonAttachment struct {
|
||||
|
@ -181,6 +182,7 @@ func (j *jsonItem) Transform() *model.Entry {
|
|||
entry.Content = j.GetContent()
|
||||
entry.Title = strings.TrimSpace(j.GetTitle())
|
||||
entry.Enclosures = j.GetEnclosures()
|
||||
entry.Tags = j.Tags
|
||||
return entry
|
||||
}
|
||||
|
||||
|
|
|
@ -575,3 +575,45 @@ func TestParseInvalidJSON(t *testing.T) {
|
|||
t.Error("Parse should returns an error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTags(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
|
||||
"title": "Brent Simmons’s Microblog",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"author": {
|
||||
"name": "Brent Simmons",
|
||||
"url": "http://example.org/",
|
||||
"avatar": "https://example.org/avatar.png"
|
||||
},
|
||||
"items": [
|
||||
{
|
||||
"id": "2347259",
|
||||
"url": "https://example.org/2347259",
|
||||
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
|
||||
"date_published": "2016-02-09T14:22:00-07:00",
|
||||
"tags": [
|
||||
"tag 1",
|
||||
"tag 2"
|
||||
]
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Tags) != 2 {
|
||||
t.Errorf("Incorrect number of Tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := "tag 2"
|
||||
result := feed.Entries[0].Tags[1]
|
||||
if result != expected {
|
||||
t.Errorf("Incorrect entry tag, got %q instead of %q", result, expected)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1426,3 +1426,69 @@ func TestEntryDescriptionFromGooglePlayDescription(t *testing.T) {
|
|||
t.Errorf(`Unexpected podcast content, got %q instead of %q`, result, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithCategoryAndInnerHTML(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<category>Category 1</category>
|
||||
<category>Category 2</category>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Tags) != 2 {
|
||||
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := "Category 2"
|
||||
result := feed.Entries[0].Tags[1]
|
||||
if result != expected {
|
||||
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithCategoryAndCDATA(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<author>
|
||||
by <![CDATA[Foo Bar]]>
|
||||
</author>
|
||||
<category>Sample Category</category>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Tags) != 1 {
|
||||
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := "Sample Category"
|
||||
result := feed.Entries[0].Tags[0]
|
||||
if result != expected {
|
||||
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -156,6 +156,12 @@ type rssEnclosure struct {
|
|||
Length string `xml:"length,attr"`
|
||||
}
|
||||
|
||||
type rssCategory struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Inner string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
func (enclosure *rssEnclosure) Size() int64 {
|
||||
if enclosure.Length == "" {
|
||||
return 0
|
||||
|
@ -173,6 +179,7 @@ type rssItem struct {
|
|||
Authors []rssAuthor `xml:"author"`
|
||||
CommentLinks []rssCommentLink `xml:"comments"`
|
||||
EnclosureLinks []rssEnclosure `xml:"enclosure"`
|
||||
Categories []rssCategory `xml:"category"`
|
||||
DublinCoreElement
|
||||
FeedBurnerElement
|
||||
PodcastEntryElement
|
||||
|
@ -189,6 +196,8 @@ func (r *rssItem) Transform() *model.Entry {
|
|||
entry.Content = r.entryContent()
|
||||
entry.Title = r.entryTitle()
|
||||
entry.Enclosures = r.entryEnclosures()
|
||||
entry.Tags = r.entryCategories()
|
||||
|
||||
return entry
|
||||
}
|
||||
|
||||
|
@ -372,6 +381,20 @@ func (r *rssItem) entryEnclosures() model.EnclosureList {
|
|||
return enclosures
|
||||
}
|
||||
|
||||
func (r *rssItem) entryCategories() []string {
|
||||
var categoryList []string
|
||||
|
||||
for _, rssCategory := range r.Categories {
|
||||
if strings.Contains(rssCategory.Inner, "<![CDATA[") {
|
||||
categoryList = append(categoryList, strings.TrimSpace(rssCategory.Data))
|
||||
} else {
|
||||
categoryList = append(categoryList, strings.TrimSpace(rssCategory.Inner))
|
||||
}
|
||||
}
|
||||
|
||||
return categoryList
|
||||
}
|
||||
|
||||
func (r *rssItem) entryCommentsURL() string {
|
||||
for _, commentLink := range r.CommentLinks {
|
||||
if commentLink.XMLName.Space == "" {
|
||||
|
|
|
@ -119,7 +119,8 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
|
|||
feed_id,
|
||||
reading_time,
|
||||
changed_at,
|
||||
document_vectors
|
||||
document_vectors,
|
||||
tags
|
||||
)
|
||||
VALUES
|
||||
(
|
||||
|
@ -134,7 +135,8 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
|
|||
$9,
|
||||
$10,
|
||||
now(),
|
||||
setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($6, ''), 500000)), 'B')
|
||||
setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($6, ''), 500000)), 'B'),
|
||||
$11
|
||||
)
|
||||
RETURNING
|
||||
id, status
|
||||
|
@ -151,6 +153,7 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
|
|||
entry.UserID,
|
||||
entry.FeedID,
|
||||
entry.ReadingTime,
|
||||
pq.Array(removeDuplicates(entry.Tags)),
|
||||
).Scan(&entry.ID, &entry.Status)
|
||||
|
||||
if err != nil {
|
||||
|
@ -183,7 +186,8 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
|
|||
content=$4,
|
||||
author=$5,
|
||||
reading_time=$6,
|
||||
document_vectors = setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($4, ''), 500000)), 'B')
|
||||
document_vectors = setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($4, ''), 500000)), 'B'),
|
||||
tags=$10
|
||||
WHERE
|
||||
user_id=$7 AND feed_id=$8 AND hash=$9
|
||||
RETURNING
|
||||
|
@ -200,6 +204,7 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
|
|||
entry.UserID,
|
||||
entry.FeedID,
|
||||
entry.Hash,
|
||||
pq.Array(removeDuplicates(entry.Tags)),
|
||||
).Scan(&entry.ID)
|
||||
|
||||
if err != nil {
|
||||
|
@ -535,3 +540,16 @@ func (s *Storage) UnshareEntry(userID int64, entryID int64) (err error) {
|
|||
}
|
||||
return
|
||||
}
|
||||
|
||||
// removeDuplicate removes duplicate entries from a slice
|
||||
func removeDuplicates[T string | int](sliceList []T) []T {
|
||||
allKeys := make(map[T]bool)
|
||||
list := []T{}
|
||||
for _, item := range sliceList {
|
||||
if _, value := allKeys[item]; !value {
|
||||
allKeys[item] = true
|
||||
list = append(list, item)
|
||||
}
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
|
|
@ -135,6 +135,17 @@ func (e *EntryQueryBuilder) WithStatuses(statuses []string) *EntryQueryBuilder {
|
|||
return e
|
||||
}
|
||||
|
||||
// WithTags filter by a list of entry tags.
|
||||
func (e *EntryQueryBuilder) WithTags(tags []string) *EntryQueryBuilder {
|
||||
if len(tags) > 0 {
|
||||
for _, cat := range tags {
|
||||
e.conditions = append(e.conditions, fmt.Sprintf("$%d = ANY(e.tags)", len(e.args)+1))
|
||||
e.args = append(e.args, cat)
|
||||
}
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// WithoutStatus set the entry status that should not be returned.
|
||||
func (e *EntryQueryBuilder) WithoutStatus(status string) *EntryQueryBuilder {
|
||||
if status != "" {
|
||||
|
@ -250,6 +261,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
|
|||
e.reading_time,
|
||||
e.created_at,
|
||||
e.changed_at,
|
||||
e.tags,
|
||||
f.title as feed_title,
|
||||
f.feed_url,
|
||||
f.site_url,
|
||||
|
@ -312,6 +324,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
|
|||
&entry.ReadingTime,
|
||||
&entry.CreatedAt,
|
||||
&entry.ChangedAt,
|
||||
pq.Array(&entry.Tags),
|
||||
&entry.Feed.Title,
|
||||
&entry.Feed.FeedURL,
|
||||
&entry.Feed.SiteURL,
|
||||
|
|
Loading…
Reference in a new issue