Replace github.com/rylans/getlang with github.com/abadojack/whatlanggo
github.com/rylans/getlang doesn't seems to be updated anymore
This commit is contained in:
parent
09e9b0361d
commit
7b541af253
5 changed files with 103 additions and 27 deletions
3
go.mod
3
go.mod
|
@ -4,12 +4,12 @@ module miniflux.app/v2
|
|||
|
||||
require (
|
||||
github.com/PuerkitoBio/goquery v1.8.1
|
||||
github.com/abadojack/whatlanggo v1.0.1
|
||||
github.com/coreos/go-oidc/v3 v3.6.0
|
||||
github.com/gorilla/mux v1.8.0
|
||||
github.com/lib/pq v1.10.9
|
||||
github.com/mccutchen/go-httpbin/v2 v2.11.1
|
||||
github.com/prometheus/client_golang v1.17.0
|
||||
github.com/rylans/getlang v0.0.0-20201227074721-9e7f44ff8aa0
|
||||
github.com/tdewolff/minify/v2 v2.12.9
|
||||
github.com/yuin/goldmark v1.5.6
|
||||
golang.org/x/crypto v0.14.0
|
||||
|
@ -29,6 +29,7 @@ require (
|
|||
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect
|
||||
github.com/prometheus/common v0.44.0 // indirect
|
||||
github.com/prometheus/procfs v0.11.1 // indirect
|
||||
github.com/stretchr/testify v1.8.4 // indirect
|
||||
github.com/tdewolff/parse/v2 v2.6.8 // indirect
|
||||
golang.org/x/sys v0.13.0 // indirect
|
||||
golang.org/x/text v0.13.0 // indirect
|
||||
|
|
11
go.sum
11
go.sum
|
@ -1,5 +1,7 @@
|
|||
github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
|
||||
github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
|
||||
github.com/abadojack/whatlanggo v1.0.1 h1:19N6YogDnf71CTHm3Mp2qhYfkRdyvbgwWdd2EPxJRG4=
|
||||
github.com/abadojack/whatlanggo v1.0.1/go.mod h1:66WiQbSbJBIlOZMsvbKe5m6pzQovxCH9B/K8tQB2uoc=
|
||||
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
|
||||
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
|
@ -40,12 +42,10 @@ github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdO
|
|||
github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY=
|
||||
github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI=
|
||||
github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY=
|
||||
github.com/rylans/getlang v0.0.0-20201227074721-9e7f44ff8aa0 h1:qSaU9YAEIxk/ozcmY1hiauktAYTpbwYIrPdQ0L2E8UM=
|
||||
github.com/rylans/getlang v0.0.0-20201227074721-9e7f44ff8aa0/go.mod h1:3vfmZI6aJd5Rb9W2TQ0Nmupl+qem21R05+hmCscI0Bk=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/tdewolff/minify/v2 v2.12.9 h1:dvn5MtmuQ/DFMwqf5j8QhEVpPX6fi3WGImhv8RUB4zA=
|
||||
github.com/tdewolff/minify/v2 v2.12.9/go.mod h1:qOqdlDfL+7v0/fyymB+OP497nIxJYSvX4MQWA8OoiXU=
|
||||
github.com/tdewolff/parse/v2 v2.6.8 h1:mhNZXYCx//xG7Yq2e/kVLNZw4YfYmeHbhx+Zc0OvFMA=
|
||||
|
@ -110,7 +110,8 @@ google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ
|
|||
google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
|
||||
google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
mvdan.cc/xurls/v2 v2.5.0 h1:lyBNOm8Wo71UknhUs4QTFUNNMyxy2JEIaKKo0RWOh+8=
|
||||
mvdan.cc/xurls/v2 v2.5.0/go.mod h1:yQgaGQ1rFtJUzkmKiHYSSfuQxqfYmd//X6PxvholpeE=
|
||||
|
|
|
@ -7,25 +7,22 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"math"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
"miniflux.app/v2/internal/metric"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/browser"
|
||||
"miniflux.app/v2/internal/reader/readingtime"
|
||||
"miniflux.app/v2/internal/reader/rewrite"
|
||||
"miniflux.app/v2/internal/reader/sanitizer"
|
||||
"miniflux.app/v2/internal/reader/scraper"
|
||||
"miniflux.app/v2/internal/storage"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/rylans/getlang"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -174,7 +171,7 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
|
|||
|
||||
if content != "" {
|
||||
entry.Content = content
|
||||
entry.ReadingTime = calculateReadingTime(content, user)
|
||||
entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
|
||||
}
|
||||
|
||||
rewrite.Rewriter(url, entry, entry.Feed.RewriteRules)
|
||||
|
@ -252,7 +249,7 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod
|
|||
}
|
||||
// Handle YT error case and non-YT entries.
|
||||
if entry.ReadingTime == 0 {
|
||||
entry.ReadingTime = calculateReadingTime(entry.Content, user)
|
||||
entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -360,18 +357,3 @@ func parseISO8601(from string) (time.Duration, error) {
|
|||
|
||||
return d, nil
|
||||
}
|
||||
|
||||
func calculateReadingTime(content string, user *model.User) int {
|
||||
sanitizedContent := sanitizer.StripTags(content)
|
||||
languageInfo := getlang.FromString(sanitizedContent)
|
||||
|
||||
var timeToReadInt int
|
||||
if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
|
||||
timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / float64(user.CJKReadingSpeed)))
|
||||
} else {
|
||||
nbOfWords := len(strings.Fields(sanitizedContent))
|
||||
timeToReadInt = int(math.Ceil(float64(nbOfWords) / float64(user.DefaultReadingSpeed)))
|
||||
}
|
||||
|
||||
return timeToReadInt
|
||||
}
|
||||
|
|
31
internal/reader/readingtime/readingtime.go
Normal file
31
internal/reader/readingtime/readingtime.go
Normal file
|
@ -0,0 +1,31 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
// Package readtime provides a function to estimate the reading time of an article.
|
||||
package readingtime
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"miniflux.app/v2/internal/reader/sanitizer"
|
||||
|
||||
"github.com/abadojack/whatlanggo"
|
||||
)
|
||||
|
||||
// EstimateReadingTime returns the estimated reading time of an article in minute.
|
||||
func EstimateReadingTime(content string, defaultReadingSpeed, cjkReadingSpeed int) int {
|
||||
sanitizedContent := sanitizer.StripTags(content)
|
||||
langInfo := whatlanggo.Detect(sanitizedContent)
|
||||
|
||||
var timeToReadInt int
|
||||
if langInfo.IsReliable() && (langInfo.Lang == whatlanggo.Jpn || langInfo.Lang == whatlanggo.Cmn || langInfo.Lang == whatlanggo.Kor) {
|
||||
timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / float64(cjkReadingSpeed)))
|
||||
} else {
|
||||
nbOfWords := len(strings.Fields(sanitizedContent))
|
||||
timeToReadInt = int(math.Ceil(float64(nbOfWords) / float64(defaultReadingSpeed)))
|
||||
}
|
||||
|
||||
return timeToReadInt
|
||||
}
|
61
internal/reader/readingtime/readingtime_test.go
Normal file
61
internal/reader/readingtime/readingtime_test.go
Normal file
|
@ -0,0 +1,61 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package readingtime
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestEstimateReadingTimeInEnglish(t *testing.T) {
|
||||
sampleText := `
|
||||
In turpis lacus, sollicitudin non accumsan sed, suscipit eget magna. Morbi id
|
||||
neque enim. Aenean ac lacus consectetur, accumsan elit ac, suscipit dui. Donec
|
||||
congue mi et nisl bibendum, venenatis fringilla orci tristique. Nullam ullamcorper
|
||||
cursus justo, ac iaculis ante euismod a. Fusce dapibus lacus arcu, consectetur
|
||||
porttitor odio finibus ac. Integer dictum faucibus egestas. Etiam magna diam, placerat
|
||||
sed velit vitae, lobortis accumsan nisi. Sed viverra dui in odio commodo dapibus.
|
||||
Sed pulvinar metus finibus, hendrerit diam eu, faucibus lectus. Mauris est tellus,
|
||||
convallis et velit sit amet, convallis sagittis nunc. Quisque at ex leo. Donec eget leo
|
||||
vel nibh porta molestie. Aenean pellentesque purus non laoreet aliquam.
|
||||
|
||||
In feugiat eget arcu nec sodales. Nunc rutrum felis in tellus venenatis, sit
|
||||
amet tincidunt augue varius. Nunc nec dignissim quam. In euismod gravida rhoncus.
|
||||
Vivamus eget nibh sed diam malesuada facilisis. Donec ac convallis elit. Fusce
|
||||
fermentum tincidunt est. Nunc viverra, eros in gravida convallis, ex augue vehicula
|
||||
magna, sed tincidunt metus sem et mauris. In pretium purus odio, a auctor tellus
|
||||
ornare vel. Donec ac dolor pulvinar, placerat elit eget, ultrices nisi. Donec
|
||||
tincidunt magna eget pretium sodales. In urna lorem, consectetur in fringilla eget,
|
||||
rutrum et erat. Proin fringilla, lectus eget commodo consequat, est massa lacinia
|
||||
lorem, ut ultricies nunc erat id sapien.
|
||||
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce fermentum id
|
||||
sem sed commodo. Ut eget mauris eu lectus mollis aliquam. Fusce convallis, quam
|
||||
vel volutpat aliquet, nunc sem rhoncus magna, a iaculis enim ex nec neque.
|
||||
Suspendisse vel imperdiet leo. Quisque ultrices semper commodo. Pellentesque nec libero et
|
||||
mauris gravida porta vitae id nunc. Fusce sed sem sed augue gravida ultricies at nec
|
||||
turpis. Sed semper eu urna sit amet malesuada. Suspendisse blandit condimentum elit,
|
||||
in scelerisque tellus convallis eu. Nunc eleifend sem et mauris vestibulum
|
||||
mattis. Praesent ultricies pellentesque eros non posuere.
|
||||
`
|
||||
|
||||
readingTime := EstimateReadingTime(sampleText, 200, 500)
|
||||
if readingTime != 2 {
|
||||
t.Errorf(`Wrong reading time, got %d instead of 2`, readingTime)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEstimateReadingTimeInChinese(t *testing.T) {
|
||||
sampleText := `
|
||||
労問委格名町違載式新青脂通由。割止書円画民京般著治登門画拡下。有国同観教田美森素説砂者徴多。上治速相支存色分繰年活元事集遣逆山。身消年森発世財間世変悲原記潟旅好手真今。現通浪口特愛始信川節身方一表著購。郁不使権草定内防並要更一条露加。載交源図訴際属年券重供健三洗。事北残却女鮎朝分要廷込宣政愛無投事。
|
||||
|
||||
問警技亮参沼洗請米物模人。誰探重午局新戦報投性病庭。典向載問千著書故表視新権最石車音端乏大。白僚三掲局係仕表広無旧見要最裁。額寄済生年余講前本次載隊劇。権成観始応泉早高拓了経地本稼室目犯井出。暮載必広傷内校岡公南散広転行別釈。康運行関本掲隠泉傷退報告。独変年換差取予口男旅挑講禁姿。出芳工類胸管払時済潟髪内豊。
|
||||
|
||||
康浴部問玲玉追球化就店岡問画路投。施先太業阪能敏所陸不供探掲方用。手右演社援発示竹育対橋除際愛功旬転好使公。利時改本項輸属嘆員複携者地剤。天政朝戸祝言月接住世黙極者議編連。囲淑覧重弾必治物健賄開頂外称豊開名銀戸院。政稿調励廃演手生告題営味董演何南峰貨。学横公得行提大品回猿齢利込家前役把煎。天代者内身慢作業署間地日。
|
||||
|
||||
中個興本広坂態掲神中能等無滞長対。号処月画界意気様党目購栃欠歌暮。一耳供意盛四俊健必財下画例本判著堺要北王。宮大攻人水一備治首闘振円分建前趣校。目少供午見掲岡安画入情薦続土世始。診読格七久改急目斉実配正。性止月模多様更社発掲雪奇芸量全兵経負。予転済反問止下生買再無旅的。模治明以共会必華浅知館版領送。
|
||||
`
|
||||
|
||||
readingTime := EstimateReadingTime(sampleText, 200, 500)
|
||||
if readingTime != 2 {
|
||||
t.Errorf(`Wrong reading time, got %d instead of 2`, readingTime)
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue