sanitizer: add support for HTML hidden
attribute
This commit adjusts the `Sanitize` function to skip tags with the `hidden` attribute, similar to how it skips blocked tags and their contents.
This commit is contained in:
parent
3ef2522c62
commit
ee5e18ea9f
2 changed files with 20 additions and 9 deletions
|
@ -82,7 +82,7 @@ func Sanitize(baseURL, input string) string {
|
||||||
var buffer strings.Builder
|
var buffer strings.Builder
|
||||||
var tagStack []string
|
var tagStack []string
|
||||||
var parentTag string
|
var parentTag string
|
||||||
blacklistedTagDepth := 0
|
var blockedStack []string
|
||||||
|
|
||||||
tokenizer := html.NewTokenizer(strings.NewReader(input))
|
tokenizer := html.NewTokenizer(strings.NewReader(input))
|
||||||
for {
|
for {
|
||||||
|
@ -98,7 +98,7 @@ func Sanitize(baseURL, input string) string {
|
||||||
token := tokenizer.Token()
|
token := tokenizer.Token()
|
||||||
switch token.Type {
|
switch token.Type {
|
||||||
case html.TextToken:
|
case html.TextToken:
|
||||||
if blacklistedTagDepth > 0 {
|
if len(blockedStack) > 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -116,7 +116,10 @@ func Sanitize(baseURL, input string) string {
|
||||||
if isPixelTracker(tagName, token.Attr) {
|
if isPixelTracker(tagName, token.Attr) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if isValidTag(tagName) {
|
|
||||||
|
if isBlockedTag(tagName) || slices.ContainsFunc(token.Attr, func(attr html.Attribute) bool { return attr.Key == "hidden" }) {
|
||||||
|
blockedStack = append(blockedStack, tagName)
|
||||||
|
} else if len(blockedStack) == 0 && isValidTag(tagName) {
|
||||||
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
|
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
|
||||||
|
|
||||||
if hasRequiredAttributes(tagName, attrNames) {
|
if hasRequiredAttributes(tagName, attrNames) {
|
||||||
|
@ -128,22 +131,20 @@ func Sanitize(baseURL, input string) string {
|
||||||
|
|
||||||
tagStack = append(tagStack, tagName)
|
tagStack = append(tagStack, tagName)
|
||||||
}
|
}
|
||||||
} else if isBlockedTag(tagName) {
|
|
||||||
blacklistedTagDepth++
|
|
||||||
}
|
}
|
||||||
case html.EndTagToken:
|
case html.EndTagToken:
|
||||||
tagName := token.DataAtom.String()
|
tagName := token.DataAtom.String()
|
||||||
if isValidTag(tagName) && slices.Contains(tagStack, tagName) {
|
if len(blockedStack) > 0 && blockedStack[len(blockedStack)-1] == tagName {
|
||||||
|
blockedStack = blockedStack[:len(blockedStack)-1]
|
||||||
|
} else if len(blockedStack) == 0 && isValidTag(tagName) && slices.Contains(tagStack, tagName) {
|
||||||
buffer.WriteString("</" + tagName + ">")
|
buffer.WriteString("</" + tagName + ">")
|
||||||
} else if isBlockedTag(tagName) {
|
|
||||||
blacklistedTagDepth--
|
|
||||||
}
|
}
|
||||||
case html.SelfClosingTagToken:
|
case html.SelfClosingTagToken:
|
||||||
tagName := token.DataAtom.String()
|
tagName := token.DataAtom.String()
|
||||||
if isPixelTracker(tagName, token.Attr) {
|
if isPixelTracker(tagName, token.Attr) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if isValidTag(tagName) {
|
if isValidTag(tagName) && len(blockedStack) == 0 {
|
||||||
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
|
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
|
||||||
if hasRequiredAttributes(tagName, attrNames) {
|
if hasRequiredAttributes(tagName, attrNames) {
|
||||||
if len(attrNames) > 0 {
|
if len(attrNames) > 0 {
|
||||||
|
|
|
@ -630,3 +630,13 @@ func TestReplaceStyle(t *testing.T) {
|
||||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestHiddenParagraph(t *testing.T) {
|
||||||
|
input := `<p>Before paragraph.</p><p hidden>This should <em>not</em> appear in the <strong>output</strong></p><p>After paragraph.</p>`
|
||||||
|
expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
|
||||||
|
output := Sanitize("http://example.org/", input)
|
||||||
|
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue