2017-12-12 07:16:32 +01:00
// Copyright 2017 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
2018-08-25 06:51:50 +02:00
package rewrite // import "miniflux.app/reader/rewrite"
2017-12-12 07:16:32 +01:00
import (
2017-12-14 06:30:40 +01:00
2019-08-14 09:33:54 +02:00
2019-08-13 17:44:23 +02:00
2017-12-12 07:16:32 +01:00
var (
2018-10-09 05:47:10 +02:00
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
2020-09-06 22:41:42 +02:00
invidioRegex = regexp.MustCompile(`https?:\/\/(.*)\/watch\?v=(.*)`)
2018-10-09 05:47:10 +02:00
imgRegex = regexp.MustCompile(`<img [^>]+>`)
textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)
2017-12-12 07:16:32 +01:00
func addImageTitle(entryURL, entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
2018-06-26 23:39:56 +02:00
matches := doc.Find("img[src][title]")
if matches.Length() > 0 {
matches.Each(func(i int, img *goquery.Selection) {
altAttr := img.AttrOr("alt", "")
srcAttr, _ := img.Attr("src")
titleAttr, _ := img.Attr("title")
2019-08-14 09:33:54 +02:00
img.ReplaceWithHtml(`<figure><img src="` + srcAttr + `" alt="` + altAttr + `"/><figcaption><p>` + html.EscapeString(titleAttr) + `</p></figcaption></figure>`)
2018-06-26 23:39:56 +02:00
output, _ := doc.Find("body").First().Html()
return output
2017-12-12 07:16:32 +01:00
return entryContent
2019-08-13 17:44:23 +02:00
func addMailtoSubject(entryURL, entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
matches := doc.Find(`a[href^="mailto:"]`)
if matches.Length() > 0 {
matches.Each(func(i int, a *goquery.Selection) {
hrefAttr, _ := a.Attr("href")
mailto, err := url.Parse(hrefAttr)
if err != nil {
subject := mailto.Query().Get("subject")
if subject == "" {
a.AppendHtml(" [" + html.EscapeString(subject) + "]")
output, _ := doc.Find("body").First().Html()
return output
return entryContent
2018-07-09 07:22:48 +02:00
func addDynamicImage(entryURL, entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
// Ordered most preferred to least preferred.
candidateAttrs := []string{
changed := false
doc.Find("img,div").Each(func(i int, img *goquery.Selection) {
for _, candidateAttr := range candidateAttrs {
if srcAttr, found := img.Attr(candidateAttr); found {
changed = true
if img.Is("img") {
2018-08-25 06:51:50 +02:00
img.SetAttr("src", srcAttr)
2018-07-09 07:22:48 +02:00
} else {
altAttr := img.AttrOr("alt", "")
img.ReplaceWithHtml(`<img src="` + srcAttr + `" alt="` + altAttr + `"/>`)
2018-08-25 06:51:50 +02:00
2018-07-09 07:22:48 +02:00
if !changed {
doc.Find("noscript").Each(func(i int, noscript *goquery.Selection) {
matches := imgRegex.FindAllString(noscript.Text(), 2)
if len(matches) == 1 {
changed = true
if changed {
output, _ := doc.Find("body").First().Html()
return output
return entryContent
2020-09-30 07:22:25 +02:00
func fixMediumImages(entryURL, entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
doc.Find("figure.paragraph-image").Each(func(i int, paragraphImage *goquery.Selection) {
noscriptElement := paragraphImage.Find("noscript")
2020-10-20 06:04:14 +02:00
if noscriptElement.Length() > 0 {
output, _ := doc.Find("body").First().Html()
return output
func useNoScriptImages(entryURL, entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
doc.Find("figure").Each(func(i int, figureElement *goquery.Selection) {
imgElement := figureElement.Find("img")
if imgElement.Length() > 0 {
noscriptElement := figureElement.Find("noscript")
if noscriptElement.Length() > 0 {
2020-09-30 07:22:25 +02:00
output, _ := doc.Find("body").First().Html()
return output
2017-12-12 07:16:32 +01:00
func addYoutubeVideo(entryURL, entryContent string) string {
matches := youtubeRegex.FindStringSubmatch(entryURL)
if len(matches) == 2 {
video := `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/` + matches[1] + `" allowfullscreen></iframe>`
2019-12-01 07:46:12 +01:00
return video + `<br>` + entryContent
2017-12-12 07:16:32 +01:00
return entryContent
2017-12-14 06:30:40 +01:00
2020-03-21 04:45:37 +01:00
func addYoutubeVideoUsingInvidiousPlayer(entryURL, entryContent string) string {
matches := youtubeRegex.FindStringSubmatch(entryURL)
if len(matches) == 2 {
video := `<iframe width="650" height="350" frameborder="0" src="https://invidio.us/embed/` + matches[1] + `" allowfullscreen></iframe>`
return video + `<br>` + entryContent
return entryContent
func addInvidiousVideo(entryURL, entryContent string) string {
matches := invidioRegex.FindStringSubmatch(entryURL)
2020-09-06 22:41:42 +02:00
if len(matches) == 3 {
video := `<iframe width="650" height="350" frameborder="0" src="https://` + matches[1] + `/embed/` + matches[2] + `" allowfullscreen></iframe>`
2020-03-21 04:45:37 +01:00
return video + `<br>` + entryContent
return entryContent
2017-12-14 06:30:40 +01:00
func addPDFLink(entryURL, entryContent string) string {
if strings.HasSuffix(entryURL, ".pdf") {
return fmt.Sprintf(`<a href="%s">PDF</a><br>%s`, entryURL, entryContent)
return entryContent
2018-10-09 05:47:10 +02:00
func replaceTextLinks(input string) string {
return textLinkRegex.ReplaceAllString(input, `<a href="${1}">${1}</a>`)
func replaceLineFeeds(input string) string {
return strings.Replace(input, "\n", "<br>", -1)
2020-11-25 23:51:54 +01:00
func replaceCustom(entryContent string, searchTerm string, replaceTerm string) string {
re, err := regexp.Compile(searchTerm)
if err == nil {
return re.ReplaceAllString(entryContent, replaceTerm)
return entryContent