Calculate reading time during feed processing
The goal is to speed up the user interface. Detecting the language based on the content is pretty slow.
This commit is contained in:
parent
b1c9977711
commit
de7a613098
12 changed files with 84 additions and 50 deletions
|
@ -129,20 +129,21 @@ type Feeds []*Feed
|
|||
|
||||
// Entry represents a subscription item in the system.
|
||||
type Entry struct {
|
||||
ID int64 `json:"id"`
|
||||
UserID int64 `json:"user_id"`
|
||||
FeedID int64 `json:"feed_id"`
|
||||
Status string `json:"status"`
|
||||
Hash string `json:"hash"`
|
||||
Title string `json:"title"`
|
||||
URL string `json:"url"`
|
||||
Date time.Time `json:"published_at"`
|
||||
Content string `json:"content"`
|
||||
Author string `json:"author"`
|
||||
ShareCode string `json:"share_code"`
|
||||
Starred bool `json:"starred"`
|
||||
Enclosures Enclosures `json:"enclosures,omitempty"`
|
||||
Feed *Feed `json:"feed,omitempty"`
|
||||
ID int64 `json:"id"`
|
||||
UserID int64 `json:"user_id"`
|
||||
FeedID int64 `json:"feed_id"`
|
||||
Status string `json:"status"`
|
||||
Hash string `json:"hash"`
|
||||
Title string `json:"title"`
|
||||
URL string `json:"url"`
|
||||
Date time.Time `json:"published_at"`
|
||||
Content string `json:"content"`
|
||||
Author string `json:"author"`
|
||||
ShareCode string `json:"share_code"`
|
||||
Starred bool `json:"starred"`
|
||||
ReadingTime int `json:"reading_time"`
|
||||
Enclosures Enclosures `json:"enclosures,omitempty"`
|
||||
Feed *Feed `json:"feed,omitempty"`
|
||||
}
|
||||
|
||||
// Entries represents a list of entries.
|
||||
|
|
|
@ -12,7 +12,7 @@ import (
|
|||
"miniflux.app/logger"
|
||||
)
|
||||
|
||||
const schemaVersion = 40
|
||||
const schemaVersion = 41
|
||||
|
||||
// Migrate executes database migrations.
|
||||
func Migrate(db *sql.DB) {
|
||||
|
|
|
@ -203,6 +203,7 @@ alter table users add column entry_direction entry_sorting_direction default 'as
|
|||
add column keeplist_rules text not null default ''
|
||||
;
|
||||
`,
|
||||
"schema_version_41": `alter table entries add column reading_time int not null default 0;`,
|
||||
"schema_version_5": `create table integrations (
|
||||
user_id int not null,
|
||||
pinboard_enabled bool default 'f',
|
||||
|
@ -264,6 +265,7 @@ var SqlMapChecksums = map[string]string{
|
|||
"schema_version_39": "b0f90b97502921d4681a07c64d180a91a0b4ccac7d3c1dbe30519ad6f1bf1737",
|
||||
"schema_version_4": "216ea3a7d3e1704e40c797b5dc47456517c27dbb6ca98bf88812f4f63d74b5d9",
|
||||
"schema_version_40": "6a8fec92399f853ed6817aff4cfa43255dce4c19afad796e41519d09de62105e",
|
||||
"schema_version_41": "128e118ce61267ea1f6ae03b63a6d4734eae87e520b00e309ad083f1f6afdfe5",
|
||||
"schema_version_5": "46397e2f5f2c82116786127e9f6a403e975b14d2ca7b652a48cd1ba843e6a27c",
|
||||
"schema_version_6": "9d05b4fb223f0e60efc716add5048b0ca9c37511cf2041721e20505d6d798ce4",
|
||||
"schema_version_7": "33f298c9aa30d6de3ca28e1270df51c2884d7596f1283a75716e2aeb634cd05c",
|
||||
|
|
1
database/sql/schema_version_41.sql
Normal file
1
database/sql/schema_version_41.sql
Normal file
|
@ -0,0 +1 @@
|
|||
alter table entries add column reading_time int not null default 0;
|
|
@ -33,6 +33,7 @@ type Entry struct {
|
|||
Author string `json:"author"`
|
||||
ShareCode string `json:"share_code"`
|
||||
Starred bool `json:"starred"`
|
||||
ReadingTime int `json:"reading_time"`
|
||||
Enclosures EnclosureList `json:"enclosures,omitempty"`
|
||||
Feed *Feed `json:"feed,omitempty"`
|
||||
}
|
||||
|
|
|
@ -5,8 +5,11 @@
|
|||
package processor
|
||||
|
||||
import (
|
||||
"math"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"miniflux.app/config"
|
||||
"miniflux.app/logger"
|
||||
|
@ -16,6 +19,8 @@ import (
|
|||
"miniflux.app/reader/sanitizer"
|
||||
"miniflux.app/reader/scraper"
|
||||
"miniflux.app/storage"
|
||||
|
||||
"github.com/rylans/getlang"
|
||||
)
|
||||
|
||||
// ProcessFeedEntries downloads original web page for entries and apply filters.
|
||||
|
@ -58,6 +63,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed) {
|
|||
// The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered.
|
||||
entry.Content = sanitizer.Sanitize(entry.URL, entry.Content)
|
||||
|
||||
entry.ReadingTime = calculateReadingTime(entry.Content)
|
||||
filteredEntries = append(filteredEntries, entry)
|
||||
}
|
||||
|
||||
|
@ -108,7 +114,23 @@ func ProcessEntryWebPage(entry *model.Entry) error {
|
|||
|
||||
if content != "" {
|
||||
entry.Content = content
|
||||
entry.ReadingTime = calculateReadingTime(content)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func calculateReadingTime(content string) int {
|
||||
sanitizedContent := sanitizer.StripTags(content)
|
||||
languageInfo := getlang.FromString(sanitizedContent)
|
||||
|
||||
var timeToReadInt int
|
||||
if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
|
||||
timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / 500))
|
||||
} else {
|
||||
nbOfWords := len(strings.Fields(sanitizedContent))
|
||||
timeToReadInt = int(math.Ceil(float64(nbOfWords) / 265))
|
||||
}
|
||||
|
||||
return timeToReadInt
|
||||
}
|
||||
|
|
|
@ -75,11 +75,11 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error {
|
|||
UPDATE
|
||||
entries
|
||||
SET
|
||||
content=$1
|
||||
content=$1, reading_time=$2
|
||||
WHERE
|
||||
id=$2 AND user_id=$3
|
||||
id=$3 AND user_id=$4
|
||||
`
|
||||
_, err = tx.Exec(query, entry.Content, entry.ID, entry.UserID)
|
||||
_, err = tx.Exec(query, entry.Content, entry.ReadingTime, entry.ID, entry.UserID)
|
||||
if err != nil {
|
||||
tx.Rollback()
|
||||
return fmt.Errorf(`store: unable to update content of entry #%d: %v`, entry.ID, err)
|
||||
|
@ -106,9 +106,35 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error {
|
|||
func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
|
||||
query := `
|
||||
INSERT INTO entries
|
||||
(title, hash, url, comments_url, published_at, content, author, user_id, feed_id, changed_at, document_vectors)
|
||||
(
|
||||
title,
|
||||
hash,
|
||||
url,
|
||||
comments_url,
|
||||
published_at,
|
||||
content,
|
||||
author,
|
||||
user_id,
|
||||
feed_id,
|
||||
reading_time,
|
||||
changed_at,
|
||||
document_vectors
|
||||
)
|
||||
VALUES
|
||||
($1, $2, $3, $4, $5, $6, $7, $8, $9, now(), setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($6, '') for 1000000)), 'B'))
|
||||
(
|
||||
$1,
|
||||
$2,
|
||||
$3,
|
||||
$4,
|
||||
$5,
|
||||
$6,
|
||||
$7,
|
||||
$8,
|
||||
$9,
|
||||
$10,
|
||||
now(),
|
||||
setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($6, '') for 1000000)), 'B')
|
||||
)
|
||||
RETURNING
|
||||
id, status
|
||||
`
|
||||
|
@ -123,6 +149,7 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
|
|||
entry.Author,
|
||||
entry.UserID,
|
||||
entry.FeedID,
|
||||
entry.ReadingTime,
|
||||
).Scan(&entry.ID, &entry.Status)
|
||||
|
||||
if err != nil {
|
||||
|
@ -154,9 +181,10 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
|
|||
comments_url=$3,
|
||||
content=$4,
|
||||
author=$5,
|
||||
reading_time=$6,
|
||||
document_vectors = setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($4, '') for 1000000)), 'B')
|
||||
WHERE
|
||||
user_id=$6 AND feed_id=$7 AND hash=$8
|
||||
user_id=$7 AND feed_id=$8 AND hash=$9
|
||||
RETURNING
|
||||
id
|
||||
`
|
||||
|
@ -167,6 +195,7 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
|
|||
entry.CommentsURL,
|
||||
entry.Content,
|
||||
entry.Author,
|
||||
entry.ReadingTime,
|
||||
entry.UserID,
|
||||
entry.FeedID,
|
||||
entry.Hash,
|
||||
|
|
|
@ -226,6 +226,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
|
|||
e.content,
|
||||
e.status,
|
||||
e.starred,
|
||||
e.reading_time,
|
||||
f.title as feed_title,
|
||||
f.feed_url,
|
||||
f.site_url,
|
||||
|
@ -284,6 +285,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
|
|||
&entry.Content,
|
||||
&entry.Status,
|
||||
&entry.Starred,
|
||||
&entry.ReadingTime,
|
||||
&entry.Feed.Title,
|
||||
&entry.Feed.FeedURL,
|
||||
&entry.Feed.SiteURL,
|
||||
|
|
|
@ -242,10 +242,10 @@ SOFTWARE.
|
|||
<li>
|
||||
<time datetime="{{ isodate .entry.Date }}" title="{{ isodate .entry.Date }}">{{ elapsed .user.Timezone .entry.Date }}</time>
|
||||
</li>
|
||||
{{ if .user.ShowReadingTime }}
|
||||
{{ if and .user.ShowReadingTime (gt .entry.ReadingTime 0) }}
|
||||
<li>
|
||||
<span>
|
||||
{{ plural "entry.estimated_reading_time" (timeToRead .entry.Content) (timeToRead .entry.Content) }}
|
||||
{{ plural "entry.estimated_reading_time" .entry.ReadingTime .entry.ReadingTime }}
|
||||
</span>
|
||||
</li>
|
||||
{{ end }}
|
||||
|
@ -523,7 +523,7 @@ var templateCommonMapChecksums = map[string]string{
|
|||
"feed_list": "931e43d328a116318c510de5658c688cd940b934c86b6ec82a472e1f81e020ae",
|
||||
"feed_menu": "318d8662dda5ca9dfc75b909c8461e79c86fb5082df1428f67aaf856f19f4b50",
|
||||
"icons": "9a41753778072f286216085d8712495e2ccca20c7a24f5c982775436a3d38579",
|
||||
"item_meta": "eb72c6e2a924759af20b8ef41f2ce7495aedc053181c2e5ca1b063f9410c58b0",
|
||||
"item_meta": "56ab09d7dd46eeb2e2ee11ddcec0c157a5832c896dbd2887d9e2b013680b2af6",
|
||||
"layout": "65767e7dbebe1f7ed42895ecd5a737b0693e4a2ec35e84e3e391f462beb11977",
|
||||
"pagination": "7b61288e86283c4cf0dc83bcbf8bf1c00c7cb29e60201c8c0b633b2450d2911f",
|
||||
"settings_menu": "e2b777630c0efdbc529800303c01d6744ed3af80ec505ac5a5b3f99c9b989156",
|
||||
|
|
|
@ -65,9 +65,6 @@ func (e *Engine) Render(name, language string, data interface{}) []byte {
|
|||
"plural": func(key string, n int, args ...interface{}) string {
|
||||
return printer.Plural(key, n, args...)
|
||||
},
|
||||
"timeToRead": func(content string) int {
|
||||
return timeToRead(content)
|
||||
},
|
||||
})
|
||||
|
||||
var b bytes.Buffer
|
||||
|
|
|
@ -11,19 +11,16 @@ import (
|
|||
"net/mail"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"miniflux.app/config"
|
||||
"miniflux.app/http/route"
|
||||
"miniflux.app/locale"
|
||||
"miniflux.app/model"
|
||||
"miniflux.app/proxy"
|
||||
"miniflux.app/reader/sanitizer"
|
||||
"miniflux.app/timezone"
|
||||
"miniflux.app/url"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/rylans/getlang"
|
||||
)
|
||||
|
||||
type funcMap struct {
|
||||
|
@ -94,9 +91,6 @@ func (f *funcMap) Map() template.FuncMap {
|
|||
"plural": func(key string, n int, args ...interface{}) string {
|
||||
return ""
|
||||
},
|
||||
"timeToRead": func(content string) int {
|
||||
return 0
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -195,18 +189,3 @@ func formatFileSize(b int64) string {
|
|||
return fmt.Sprintf("%.1f %ciB",
|
||||
float64(b)/float64(div), "KMGTPE"[exp])
|
||||
}
|
||||
|
||||
func timeToRead(content string) int {
|
||||
sanitizedContent := sanitizer.StripTags(content)
|
||||
languageInfo := getlang.FromString(sanitizedContent)
|
||||
|
||||
var timeToReadInt int
|
||||
if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
|
||||
timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / 500))
|
||||
} else {
|
||||
nbOfWords := len(strings.Fields(sanitizedContent))
|
||||
timeToReadInt = int(math.Ceil(float64(nbOfWords) / 265))
|
||||
}
|
||||
|
||||
return timeToReadInt
|
||||
}
|
||||
|
|
|
@ -7,10 +7,10 @@
|
|||
<li>
|
||||
<time datetime="{{ isodate .entry.Date }}" title="{{ isodate .entry.Date }}">{{ elapsed .user.Timezone .entry.Date }}</time>
|
||||
</li>
|
||||
{{ if .user.ShowReadingTime }}
|
||||
{{ if and .user.ShowReadingTime (gt .entry.ReadingTime 0) }}
|
||||
<li>
|
||||
<span>
|
||||
{{ plural "entry.estimated_reading_time" (timeToRead .entry.Content) (timeToRead .entry.Content) }}
|
||||
{{ plural "entry.estimated_reading_time" .entry.ReadingTime .entry.ReadingTime }}
|
||||
</span>
|
||||
</li>
|
||||
{{ end }}
|
||||
|
|
Loading…
Reference in a new issue