Add rewrite rule to use noscript content for images rendered with Javascript

This commit is contained in:
Frédéric Guillot 2020-10-19 21:04:14 -07:00 committed by Frédéric Guillot
parent 5ed1d7537a
commit b50778d3eb
3 changed files with 49 additions and 1 deletions

View file

@ -147,7 +147,31 @@ func fixMediumImages(entryURL, entryContent string) string {
doc.Find("figure.paragraph-image").Each(func(i int, paragraphImage *goquery.Selection) {
noscriptElement := paragraphImage.Find("noscript")
if noscriptElement.Length() > 0 {
paragraphImage.ReplaceWithHtml(noscriptElement.Text())
}
})
output, _ := doc.Find("body").First().Html()
return output
}
func useNoScriptImages(entryURL, entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
doc.Find("figure").Each(func(i int, figureElement *goquery.Selection) {
imgElement := figureElement.Find("img")
if imgElement.Length() > 0 {
noscriptElement := figureElement.Find("noscript")
if noscriptElement.Length() > 0 {
figureElement.PrependHtml(noscriptElement.Text())
imgElement.Remove()
noscriptElement.Remove()
}
}
})
output, _ := doc.Find("body").First().Html()

View file

@ -45,6 +45,8 @@ func Rewriter(entryURL, entryContent, customRewriteRules string) string {
entryContent = replaceTextLinks(entryContent)
case "fix_medium_images":
entryContent = fixMediumImages(entryURL, entryContent)
case "use_noscript_figure_images":
entryContent = useNoScriptImages(entryURL, entryContent)
}
}

View file

@ -208,3 +208,25 @@ func TestMediumImage(t *testing.T) {
t.Errorf(`Not expected output: %s`, output)
}
}
func TestRewriteNoScriptImageWithoutNoScriptTag(t *testing.T) {
content := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><figcaption>MDN Logo</figcaption></figure>`
expected := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."/><figcaption>MDN Logo</figcaption></figure>`
output := Rewriter("https://example.org/article", content, "use_noscript_figure_images")
output = strings.TrimSpace(output)
if expected != output {
t.Errorf(`Not expected output: %s`, output)
}
}
func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) {
content := `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><noscript><img src="http://example.org/logo.svg"></noscript><figcaption>MDN Logo</figcaption></figure>`
expected := `<figure><img src="http://example.org/logo.svg"/><figcaption>MDN Logo</figcaption></figure>`
output := Rewriter("https://example.org/article", content, "use_noscript_figure_images")
output = strings.TrimSpace(output)
if expected != output {
t.Errorf(`Not expected output: %s`, output)
}
}