Calculate reading time during feed processing
The goal is to speed up the user interface. Detecting the language based on the content is pretty slow.
This commit is contained in:
parent
b1c9977711
commit
de7a613098
12 changed files with 84 additions and 50 deletions
|
@ -129,20 +129,21 @@ type Feeds []*Feed
|
||||||
|
|
||||||
// Entry represents a subscription item in the system.
|
// Entry represents a subscription item in the system.
|
||||||
type Entry struct {
|
type Entry struct {
|
||||||
ID int64 `json:"id"`
|
ID int64 `json:"id"`
|
||||||
UserID int64 `json:"user_id"`
|
UserID int64 `json:"user_id"`
|
||||||
FeedID int64 `json:"feed_id"`
|
FeedID int64 `json:"feed_id"`
|
||||||
Status string `json:"status"`
|
Status string `json:"status"`
|
||||||
Hash string `json:"hash"`
|
Hash string `json:"hash"`
|
||||||
Title string `json:"title"`
|
Title string `json:"title"`
|
||||||
URL string `json:"url"`
|
URL string `json:"url"`
|
||||||
Date time.Time `json:"published_at"`
|
Date time.Time `json:"published_at"`
|
||||||
Content string `json:"content"`
|
Content string `json:"content"`
|
||||||
Author string `json:"author"`
|
Author string `json:"author"`
|
||||||
ShareCode string `json:"share_code"`
|
ShareCode string `json:"share_code"`
|
||||||
Starred bool `json:"starred"`
|
Starred bool `json:"starred"`
|
||||||
Enclosures Enclosures `json:"enclosures,omitempty"`
|
ReadingTime int `json:"reading_time"`
|
||||||
Feed *Feed `json:"feed,omitempty"`
|
Enclosures Enclosures `json:"enclosures,omitempty"`
|
||||||
|
Feed *Feed `json:"feed,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Entries represents a list of entries.
|
// Entries represents a list of entries.
|
||||||
|
|
|
@ -12,7 +12,7 @@ import (
|
||||||
"miniflux.app/logger"
|
"miniflux.app/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
const schemaVersion = 40
|
const schemaVersion = 41
|
||||||
|
|
||||||
// Migrate executes database migrations.
|
// Migrate executes database migrations.
|
||||||
func Migrate(db *sql.DB) {
|
func Migrate(db *sql.DB) {
|
||||||
|
|
|
@ -203,6 +203,7 @@ alter table users add column entry_direction entry_sorting_direction default 'as
|
||||||
add column keeplist_rules text not null default ''
|
add column keeplist_rules text not null default ''
|
||||||
;
|
;
|
||||||
`,
|
`,
|
||||||
|
"schema_version_41": `alter table entries add column reading_time int not null default 0;`,
|
||||||
"schema_version_5": `create table integrations (
|
"schema_version_5": `create table integrations (
|
||||||
user_id int not null,
|
user_id int not null,
|
||||||
pinboard_enabled bool default 'f',
|
pinboard_enabled bool default 'f',
|
||||||
|
@ -264,6 +265,7 @@ var SqlMapChecksums = map[string]string{
|
||||||
"schema_version_39": "b0f90b97502921d4681a07c64d180a91a0b4ccac7d3c1dbe30519ad6f1bf1737",
|
"schema_version_39": "b0f90b97502921d4681a07c64d180a91a0b4ccac7d3c1dbe30519ad6f1bf1737",
|
||||||
"schema_version_4": "216ea3a7d3e1704e40c797b5dc47456517c27dbb6ca98bf88812f4f63d74b5d9",
|
"schema_version_4": "216ea3a7d3e1704e40c797b5dc47456517c27dbb6ca98bf88812f4f63d74b5d9",
|
||||||
"schema_version_40": "6a8fec92399f853ed6817aff4cfa43255dce4c19afad796e41519d09de62105e",
|
"schema_version_40": "6a8fec92399f853ed6817aff4cfa43255dce4c19afad796e41519d09de62105e",
|
||||||
|
"schema_version_41": "128e118ce61267ea1f6ae03b63a6d4734eae87e520b00e309ad083f1f6afdfe5",
|
||||||
"schema_version_5": "46397e2f5f2c82116786127e9f6a403e975b14d2ca7b652a48cd1ba843e6a27c",
|
"schema_version_5": "46397e2f5f2c82116786127e9f6a403e975b14d2ca7b652a48cd1ba843e6a27c",
|
||||||
"schema_version_6": "9d05b4fb223f0e60efc716add5048b0ca9c37511cf2041721e20505d6d798ce4",
|
"schema_version_6": "9d05b4fb223f0e60efc716add5048b0ca9c37511cf2041721e20505d6d798ce4",
|
||||||
"schema_version_7": "33f298c9aa30d6de3ca28e1270df51c2884d7596f1283a75716e2aeb634cd05c",
|
"schema_version_7": "33f298c9aa30d6de3ca28e1270df51c2884d7596f1283a75716e2aeb634cd05c",
|
||||||
|
|
1
database/sql/schema_version_41.sql
Normal file
1
database/sql/schema_version_41.sql
Normal file
|
@ -0,0 +1 @@
|
||||||
|
alter table entries add column reading_time int not null default 0;
|
|
@ -33,6 +33,7 @@ type Entry struct {
|
||||||
Author string `json:"author"`
|
Author string `json:"author"`
|
||||||
ShareCode string `json:"share_code"`
|
ShareCode string `json:"share_code"`
|
||||||
Starred bool `json:"starred"`
|
Starred bool `json:"starred"`
|
||||||
|
ReadingTime int `json:"reading_time"`
|
||||||
Enclosures EnclosureList `json:"enclosures,omitempty"`
|
Enclosures EnclosureList `json:"enclosures,omitempty"`
|
||||||
Feed *Feed `json:"feed,omitempty"`
|
Feed *Feed `json:"feed,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,8 +5,11 @@
|
||||||
package processor
|
package processor
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"math"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"miniflux.app/config"
|
"miniflux.app/config"
|
||||||
"miniflux.app/logger"
|
"miniflux.app/logger"
|
||||||
|
@ -16,6 +19,8 @@ import (
|
||||||
"miniflux.app/reader/sanitizer"
|
"miniflux.app/reader/sanitizer"
|
||||||
"miniflux.app/reader/scraper"
|
"miniflux.app/reader/scraper"
|
||||||
"miniflux.app/storage"
|
"miniflux.app/storage"
|
||||||
|
|
||||||
|
"github.com/rylans/getlang"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ProcessFeedEntries downloads original web page for entries and apply filters.
|
// ProcessFeedEntries downloads original web page for entries and apply filters.
|
||||||
|
@ -58,6 +63,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed) {
|
||||||
// The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered.
|
// The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered.
|
||||||
entry.Content = sanitizer.Sanitize(entry.URL, entry.Content)
|
entry.Content = sanitizer.Sanitize(entry.URL, entry.Content)
|
||||||
|
|
||||||
|
entry.ReadingTime = calculateReadingTime(entry.Content)
|
||||||
filteredEntries = append(filteredEntries, entry)
|
filteredEntries = append(filteredEntries, entry)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,7 +114,23 @@ func ProcessEntryWebPage(entry *model.Entry) error {
|
||||||
|
|
||||||
if content != "" {
|
if content != "" {
|
||||||
entry.Content = content
|
entry.Content = content
|
||||||
|
entry.ReadingTime = calculateReadingTime(content)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func calculateReadingTime(content string) int {
|
||||||
|
sanitizedContent := sanitizer.StripTags(content)
|
||||||
|
languageInfo := getlang.FromString(sanitizedContent)
|
||||||
|
|
||||||
|
var timeToReadInt int
|
||||||
|
if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
|
||||||
|
timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / 500))
|
||||||
|
} else {
|
||||||
|
nbOfWords := len(strings.Fields(sanitizedContent))
|
||||||
|
timeToReadInt = int(math.Ceil(float64(nbOfWords) / 265))
|
||||||
|
}
|
||||||
|
|
||||||
|
return timeToReadInt
|
||||||
|
}
|
||||||
|
|
|
@ -75,11 +75,11 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error {
|
||||||
UPDATE
|
UPDATE
|
||||||
entries
|
entries
|
||||||
SET
|
SET
|
||||||
content=$1
|
content=$1, reading_time=$2
|
||||||
WHERE
|
WHERE
|
||||||
id=$2 AND user_id=$3
|
id=$3 AND user_id=$4
|
||||||
`
|
`
|
||||||
_, err = tx.Exec(query, entry.Content, entry.ID, entry.UserID)
|
_, err = tx.Exec(query, entry.Content, entry.ReadingTime, entry.ID, entry.UserID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
tx.Rollback()
|
tx.Rollback()
|
||||||
return fmt.Errorf(`store: unable to update content of entry #%d: %v`, entry.ID, err)
|
return fmt.Errorf(`store: unable to update content of entry #%d: %v`, entry.ID, err)
|
||||||
|
@ -106,9 +106,35 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error {
|
||||||
func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
|
func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
|
||||||
query := `
|
query := `
|
||||||
INSERT INTO entries
|
INSERT INTO entries
|
||||||
(title, hash, url, comments_url, published_at, content, author, user_id, feed_id, changed_at, document_vectors)
|
(
|
||||||
|
title,
|
||||||
|
hash,
|
||||||
|
url,
|
||||||
|
comments_url,
|
||||||
|
published_at,
|
||||||
|
content,
|
||||||
|
author,
|
||||||
|
user_id,
|
||||||
|
feed_id,
|
||||||
|
reading_time,
|
||||||
|
changed_at,
|
||||||
|
document_vectors
|
||||||
|
)
|
||||||
VALUES
|
VALUES
|
||||||
($1, $2, $3, $4, $5, $6, $7, $8, $9, now(), setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($6, '') for 1000000)), 'B'))
|
(
|
||||||
|
$1,
|
||||||
|
$2,
|
||||||
|
$3,
|
||||||
|
$4,
|
||||||
|
$5,
|
||||||
|
$6,
|
||||||
|
$7,
|
||||||
|
$8,
|
||||||
|
$9,
|
||||||
|
$10,
|
||||||
|
now(),
|
||||||
|
setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($6, '') for 1000000)), 'B')
|
||||||
|
)
|
||||||
RETURNING
|
RETURNING
|
||||||
id, status
|
id, status
|
||||||
`
|
`
|
||||||
|
@ -123,6 +149,7 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
|
||||||
entry.Author,
|
entry.Author,
|
||||||
entry.UserID,
|
entry.UserID,
|
||||||
entry.FeedID,
|
entry.FeedID,
|
||||||
|
entry.ReadingTime,
|
||||||
).Scan(&entry.ID, &entry.Status)
|
).Scan(&entry.ID, &entry.Status)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -154,9 +181,10 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
|
||||||
comments_url=$3,
|
comments_url=$3,
|
||||||
content=$4,
|
content=$4,
|
||||||
author=$5,
|
author=$5,
|
||||||
|
reading_time=$6,
|
||||||
document_vectors = setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($4, '') for 1000000)), 'B')
|
document_vectors = setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($4, '') for 1000000)), 'B')
|
||||||
WHERE
|
WHERE
|
||||||
user_id=$6 AND feed_id=$7 AND hash=$8
|
user_id=$7 AND feed_id=$8 AND hash=$9
|
||||||
RETURNING
|
RETURNING
|
||||||
id
|
id
|
||||||
`
|
`
|
||||||
|
@ -167,6 +195,7 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
|
||||||
entry.CommentsURL,
|
entry.CommentsURL,
|
||||||
entry.Content,
|
entry.Content,
|
||||||
entry.Author,
|
entry.Author,
|
||||||
|
entry.ReadingTime,
|
||||||
entry.UserID,
|
entry.UserID,
|
||||||
entry.FeedID,
|
entry.FeedID,
|
||||||
entry.Hash,
|
entry.Hash,
|
||||||
|
|
|
@ -226,6 +226,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
|
||||||
e.content,
|
e.content,
|
||||||
e.status,
|
e.status,
|
||||||
e.starred,
|
e.starred,
|
||||||
|
e.reading_time,
|
||||||
f.title as feed_title,
|
f.title as feed_title,
|
||||||
f.feed_url,
|
f.feed_url,
|
||||||
f.site_url,
|
f.site_url,
|
||||||
|
@ -284,6 +285,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
|
||||||
&entry.Content,
|
&entry.Content,
|
||||||
&entry.Status,
|
&entry.Status,
|
||||||
&entry.Starred,
|
&entry.Starred,
|
||||||
|
&entry.ReadingTime,
|
||||||
&entry.Feed.Title,
|
&entry.Feed.Title,
|
||||||
&entry.Feed.FeedURL,
|
&entry.Feed.FeedURL,
|
||||||
&entry.Feed.SiteURL,
|
&entry.Feed.SiteURL,
|
||||||
|
|
|
@ -242,10 +242,10 @@ SOFTWARE.
|
||||||
<li>
|
<li>
|
||||||
<time datetime="{{ isodate .entry.Date }}" title="{{ isodate .entry.Date }}">{{ elapsed .user.Timezone .entry.Date }}</time>
|
<time datetime="{{ isodate .entry.Date }}" title="{{ isodate .entry.Date }}">{{ elapsed .user.Timezone .entry.Date }}</time>
|
||||||
</li>
|
</li>
|
||||||
{{ if .user.ShowReadingTime }}
|
{{ if and .user.ShowReadingTime (gt .entry.ReadingTime 0) }}
|
||||||
<li>
|
<li>
|
||||||
<span>
|
<span>
|
||||||
{{ plural "entry.estimated_reading_time" (timeToRead .entry.Content) (timeToRead .entry.Content) }}
|
{{ plural "entry.estimated_reading_time" .entry.ReadingTime .entry.ReadingTime }}
|
||||||
</span>
|
</span>
|
||||||
</li>
|
</li>
|
||||||
{{ end }}
|
{{ end }}
|
||||||
|
@ -523,7 +523,7 @@ var templateCommonMapChecksums = map[string]string{
|
||||||
"feed_list": "931e43d328a116318c510de5658c688cd940b934c86b6ec82a472e1f81e020ae",
|
"feed_list": "931e43d328a116318c510de5658c688cd940b934c86b6ec82a472e1f81e020ae",
|
||||||
"feed_menu": "318d8662dda5ca9dfc75b909c8461e79c86fb5082df1428f67aaf856f19f4b50",
|
"feed_menu": "318d8662dda5ca9dfc75b909c8461e79c86fb5082df1428f67aaf856f19f4b50",
|
||||||
"icons": "9a41753778072f286216085d8712495e2ccca20c7a24f5c982775436a3d38579",
|
"icons": "9a41753778072f286216085d8712495e2ccca20c7a24f5c982775436a3d38579",
|
||||||
"item_meta": "eb72c6e2a924759af20b8ef41f2ce7495aedc053181c2e5ca1b063f9410c58b0",
|
"item_meta": "56ab09d7dd46eeb2e2ee11ddcec0c157a5832c896dbd2887d9e2b013680b2af6",
|
||||||
"layout": "65767e7dbebe1f7ed42895ecd5a737b0693e4a2ec35e84e3e391f462beb11977",
|
"layout": "65767e7dbebe1f7ed42895ecd5a737b0693e4a2ec35e84e3e391f462beb11977",
|
||||||
"pagination": "7b61288e86283c4cf0dc83bcbf8bf1c00c7cb29e60201c8c0b633b2450d2911f",
|
"pagination": "7b61288e86283c4cf0dc83bcbf8bf1c00c7cb29e60201c8c0b633b2450d2911f",
|
||||||
"settings_menu": "e2b777630c0efdbc529800303c01d6744ed3af80ec505ac5a5b3f99c9b989156",
|
"settings_menu": "e2b777630c0efdbc529800303c01d6744ed3af80ec505ac5a5b3f99c9b989156",
|
||||||
|
|
|
@ -65,9 +65,6 @@ func (e *Engine) Render(name, language string, data interface{}) []byte {
|
||||||
"plural": func(key string, n int, args ...interface{}) string {
|
"plural": func(key string, n int, args ...interface{}) string {
|
||||||
return printer.Plural(key, n, args...)
|
return printer.Plural(key, n, args...)
|
||||||
},
|
},
|
||||||
"timeToRead": func(content string) int {
|
|
||||||
return timeToRead(content)
|
|
||||||
},
|
|
||||||
})
|
})
|
||||||
|
|
||||||
var b bytes.Buffer
|
var b bytes.Buffer
|
||||||
|
|
|
@ -11,19 +11,16 @@ import (
|
||||||
"net/mail"
|
"net/mail"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
"unicode/utf8"
|
|
||||||
|
|
||||||
"miniflux.app/config"
|
"miniflux.app/config"
|
||||||
"miniflux.app/http/route"
|
"miniflux.app/http/route"
|
||||||
"miniflux.app/locale"
|
"miniflux.app/locale"
|
||||||
"miniflux.app/model"
|
"miniflux.app/model"
|
||||||
"miniflux.app/proxy"
|
"miniflux.app/proxy"
|
||||||
"miniflux.app/reader/sanitizer"
|
|
||||||
"miniflux.app/timezone"
|
"miniflux.app/timezone"
|
||||||
"miniflux.app/url"
|
"miniflux.app/url"
|
||||||
|
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
"github.com/rylans/getlang"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type funcMap struct {
|
type funcMap struct {
|
||||||
|
@ -94,9 +91,6 @@ func (f *funcMap) Map() template.FuncMap {
|
||||||
"plural": func(key string, n int, args ...interface{}) string {
|
"plural": func(key string, n int, args ...interface{}) string {
|
||||||
return ""
|
return ""
|
||||||
},
|
},
|
||||||
"timeToRead": func(content string) int {
|
|
||||||
return 0
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -195,18 +189,3 @@ func formatFileSize(b int64) string {
|
||||||
return fmt.Sprintf("%.1f %ciB",
|
return fmt.Sprintf("%.1f %ciB",
|
||||||
float64(b)/float64(div), "KMGTPE"[exp])
|
float64(b)/float64(div), "KMGTPE"[exp])
|
||||||
}
|
}
|
||||||
|
|
||||||
func timeToRead(content string) int {
|
|
||||||
sanitizedContent := sanitizer.StripTags(content)
|
|
||||||
languageInfo := getlang.FromString(sanitizedContent)
|
|
||||||
|
|
||||||
var timeToReadInt int
|
|
||||||
if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
|
|
||||||
timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / 500))
|
|
||||||
} else {
|
|
||||||
nbOfWords := len(strings.Fields(sanitizedContent))
|
|
||||||
timeToReadInt = int(math.Ceil(float64(nbOfWords) / 265))
|
|
||||||
}
|
|
||||||
|
|
||||||
return timeToReadInt
|
|
||||||
}
|
|
||||||
|
|
|
@ -7,10 +7,10 @@
|
||||||
<li>
|
<li>
|
||||||
<time datetime="{{ isodate .entry.Date }}" title="{{ isodate .entry.Date }}">{{ elapsed .user.Timezone .entry.Date }}</time>
|
<time datetime="{{ isodate .entry.Date }}" title="{{ isodate .entry.Date }}">{{ elapsed .user.Timezone .entry.Date }}</time>
|
||||||
</li>
|
</li>
|
||||||
{{ if .user.ShowReadingTime }}
|
{{ if and .user.ShowReadingTime (gt .entry.ReadingTime 0) }}
|
||||||
<li>
|
<li>
|
||||||
<span>
|
<span>
|
||||||
{{ plural "entry.estimated_reading_time" (timeToRead .entry.Content) (timeToRead .entry.Content) }}
|
{{ plural "entry.estimated_reading_time" .entry.ReadingTime .entry.ReadingTime }}
|
||||||
</span>
|
</span>
|
||||||
</li>
|
</li>
|
||||||
{{ end }}
|
{{ end }}
|
||||||
|
|
Loading…
Reference in a new issue