diff --git a/internal/reader/readability/readability.go b/internal/reader/readability/readability.go index 443f2138..ec127bca 100644 --- a/internal/reader/readability/readability.go +++ b/internal/reader/readability/readability.go @@ -21,7 +21,8 @@ const ( ) var ( - divToPElementsRegexp = regexp.MustCompile(`(?i)<(?:a|blockquote|dl|div|img|ol|p|pre|table|ul)`) + divToPElementsRegexp = regexp.MustCompile(`(?i)<(a|blockquote|dl|div|img|ol|p|pre|table|ul)`) + sentenceRegexp = regexp.MustCompile(`\.( |$)`) blacklistCandidatesRegexp = regexp.MustCompile(`(?i)popupbody|-ad|g-plus`) okMaybeItsACandidateRegexp = regexp.MustCompile(`(?i)and|article|body|column|main|shadow`) @@ -113,11 +114,9 @@ func getArticle(topCandidate *candidate, candidates candidateList) string { content := s.Text() contentLength := len(content) - if contentLength >= 80 { - if linkDensity < .25 { - append = true - } - } else if linkDensity == 0 && (content[len(content)-1] == '.' || strings.Contains(content, ". ")) { + if contentLength >= 80 && linkDensity < .25 { + append = true + } else if contentLength < 80 && linkDensity == 0 && sentenceRegexp.MatchString(content) { append = true } }