Improve sanitizer to remove script and noscript contents
These tags where removed but the content was rendered as escaped HTML. See #157
This commit is contained in:
parent
7039df9af1
commit
d847b10e32
2 changed files with 33 additions and 0 deletions
|
@ -25,6 +25,7 @@ func Sanitize(baseURL, input string) string {
|
|||
tokenizer := html.NewTokenizer(bytes.NewBufferString(input))
|
||||
var buffer bytes.Buffer
|
||||
var tagStack []string
|
||||
scriptTagDepth := 0
|
||||
|
||||
for {
|
||||
if tokenizer.Next() == html.ErrorToken {
|
||||
|
@ -39,6 +40,10 @@ func Sanitize(baseURL, input string) string {
|
|||
token := tokenizer.Token()
|
||||
switch token.Type {
|
||||
case html.TextToken:
|
||||
if scriptTagDepth > 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
buffer.WriteString(html.EscapeString(token.Data))
|
||||
case html.StartTagToken:
|
||||
tagName := token.DataAtom.String()
|
||||
|
@ -55,11 +60,15 @@ func Sanitize(baseURL, input string) string {
|
|||
|
||||
tagStack = append(tagStack, tagName)
|
||||
}
|
||||
} else if isScriptTag(tagName) {
|
||||
scriptTagDepth++
|
||||
}
|
||||
case html.EndTagToken:
|
||||
tagName := token.DataAtom.String()
|
||||
if isValidTag(tagName) && inList(tagName, tagStack) {
|
||||
buffer.WriteString(fmt.Sprintf("</%s>", tagName))
|
||||
} else if isScriptTag(tagName) {
|
||||
scriptTagDepth--
|
||||
}
|
||||
case html.SelfClosingTagToken:
|
||||
tagName := token.DataAtom.String()
|
||||
|
@ -384,3 +393,7 @@ func rewriteIframeURL(link string) string {
|
|||
|
||||
return link
|
||||
}
|
||||
|
||||
func isScriptTag(tagName string) bool {
|
||||
return tagName == "script" || tagName == "noscript"
|
||||
}
|
||||
|
|
|
@ -212,3 +212,23 @@ func TestReplaceIframeURL(t *testing.T) {
|
|||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceNoScript(t *testing.T) {
|
||||
input := `<p>Before paragraph.</p><noscript>Inside <code>noscript</code> tag with an image: <img src="http://example.org/" alt="Test"></noscript><p>After paragraph.</p>`
|
||||
expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceScript(t *testing.T) {
|
||||
input := `<p>Before paragraph.</p><script type="text/javascript">alert("1");</script><p>After paragraph.</p>`
|
||||
expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue