package html // import "github.com/tdewolff/parse/html"
import (
"bytes"
"fmt"
"io"
"testing"
"github.com/tdewolff/parse"
"github.com/tdewolff/test"
)
type TTs []TokenType
func TestTokens(t *testing.T) {
var tokenTests = []struct {
html string
expected []TokenType
}{
{"", TTs{StartTagToken, StartTagCloseToken, EndTagToken}},
{"", TTs{StartTagToken, StartTagVoidToken}},
{"", TTs{CommentToken}},
{"", TTs{CommentToken}},
{"
text
", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
{"", TTs{StartTagToken, AttributeToken, StartTagVoidToken}},
{"", TTs{StartTagToken, AttributeToken, AttributeToken, StartTagVoidToken}},
{"", TTs{StartTagToken, AttributeToken, AttributeToken, AttributeToken, AttributeToken, StartTagVoidToken}},
{"", TTs{DoctypeToken}},
{"", TTs{DoctypeToken}},
{"", TTs{CommentToken}},
{"0bogus>", TTs{CommentToken}},
{"", TTs{CommentToken}},
{"< ", TTs{TextToken}},
{"", TTs{TextToken}},
// raw tags
{"", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
{"", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
{"", TTs{StartTagToken, StartTagCloseToken, TextToken}},
{"", TTs{StartTagToken, StartTagCloseToken, EndTagToken}},
{"';", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken, TextToken, EndTagToken}},
{"';-->", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken, TextToken, EndTagToken}},
{"';-->", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
{"", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
{"", TTs{TextToken}},
{"", TTs{SvgToken}},
{"", TTs{MathToken}},
{``, TTs{SvgToken}},
{"", TTs{StartTagToken, StartTagCloseToken, SvgToken, EndTagToken}},
// early endings
{"", TTs{StartTagToken, StartTagCloseToken, TextToken}},
// go-fuzz
{">", TTs{EndTagToken}},
}
for _, tt := range tokenTests {
t.Run(tt.html, func(t *testing.T) {
l := NewLexer(bytes.NewBufferString(tt.html))
i := 0
for {
token, _ := l.Next()
if token == ErrorToken {
test.T(t, l.Err(), io.EOF)
test.T(t, i, len(tt.expected), "when error occurred we must be at the end")
break
}
test.That(t, i < len(tt.expected), "index", i, "must not exceed expected token types size", len(tt.expected))
if i < len(tt.expected) {
test.T(t, token, tt.expected[i], "token types must match")
}
i++
}
})
}
test.T(t, TokenType(100).String(), "Invalid(100)")
}
func TestTags(t *testing.T) {
var tagTests = []struct {
html string
expected string
}{
{"", "foo:bar.qux-norf"},
{"", "foo?bar/qux"},
{"", " note SYSTEM \"Note.dtd\""},
{"", "foo"},
// early endings
{"", []string{"a", "\"b\""}},
{"\"' />", []string{"checked", "", "value", "'=/>\"'"}},
{"", []string{"bar", "\" a \n\t\r b \""}},
{"", []string{"a", ""}},
{"", []string{"/", "/"}},
// early endings
{"John Doe"))
out := ""
for {
tt, data := l.Next()
if tt == ErrorToken {
break
}
out += string(data)
}
fmt.Println(out)
// Output: John Doe
}