193 lines
6 KiB
Go
193 lines
6 KiB
Go
package xml // import "github.com/tdewolff/parse/xml"
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"testing"
|
|
|
|
"github.com/tdewolff/parse"
|
|
"github.com/tdewolff/test"
|
|
)
|
|
|
|
type TTs []TokenType
|
|
|
|
func TestTokens(t *testing.T) {
|
|
var tokenTests = []struct {
|
|
xml string
|
|
expected []TokenType
|
|
}{
|
|
{"", TTs{}},
|
|
{"<!-- comment -->", TTs{CommentToken}},
|
|
{"<!-- comment \n multi \r line -->", TTs{CommentToken}},
|
|
{"<foo/>", TTs{StartTagToken, StartTagCloseVoidToken}},
|
|
{"<foo \t\r\n/>", TTs{StartTagToken, StartTagCloseVoidToken}},
|
|
{"<foo:bar.qux-norf/>", TTs{StartTagToken, StartTagCloseVoidToken}},
|
|
{"<foo></foo>", TTs{StartTagToken, StartTagCloseToken, EndTagToken}},
|
|
{"<foo>text</foo>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
|
|
{"<foo/> text", TTs{StartTagToken, StartTagCloseVoidToken, TextToken}},
|
|
{"<a> <b> <c>text</c> </b> </a>", TTs{StartTagToken, StartTagCloseToken, TextToken, StartTagToken, StartTagCloseToken, TextToken, StartTagToken, StartTagCloseToken, TextToken, EndTagToken, TextToken, EndTagToken, TextToken, EndTagToken}},
|
|
{"<foo a='a' b=\"b\" c=c/>", TTs{StartTagToken, AttributeToken, AttributeToken, AttributeToken, StartTagCloseVoidToken}},
|
|
{"<foo a=\"\"/>", TTs{StartTagToken, AttributeToken, StartTagCloseVoidToken}},
|
|
{"<foo a-b=\"\"/>", TTs{StartTagToken, AttributeToken, StartTagCloseVoidToken}},
|
|
{"<foo \nchecked \r\n value\r=\t'=/>\"' />", TTs{StartTagToken, AttributeToken, AttributeToken, StartTagCloseVoidToken}},
|
|
{"<?xml?>", TTs{StartTagPIToken, StartTagClosePIToken}},
|
|
{"<?xml a=\"a\" ?>", TTs{StartTagPIToken, AttributeToken, StartTagClosePIToken}},
|
|
{"<?xml a=a?>", TTs{StartTagPIToken, AttributeToken, StartTagClosePIToken}},
|
|
{"<![CDATA[ test ]]>", TTs{CDATAToken}},
|
|
{"<!DOCTYPE>", TTs{DOCTYPEToken}},
|
|
{"<!DOCTYPE note SYSTEM \"Note.dtd\">", TTs{DOCTYPEToken}},
|
|
{`<!DOCTYPE note [<!ENTITY nbsp " "><!ENTITY writer "Writer: Donald Duck."><!ENTITY copyright "Copyright:]> W3Schools.">]>`, TTs{DOCTYPEToken}},
|
|
{"<!foo>", TTs{StartTagToken, StartTagCloseToken}},
|
|
|
|
// early endings
|
|
{"<!-- comment", TTs{CommentToken}},
|
|
{"<foo", TTs{StartTagToken}},
|
|
{"</foo", TTs{EndTagToken}},
|
|
{"<foo x", TTs{StartTagToken, AttributeToken}},
|
|
{"<foo x=", TTs{StartTagToken, AttributeToken}},
|
|
{"<foo x='", TTs{StartTagToken, AttributeToken}},
|
|
{"<foo x=''", TTs{StartTagToken, AttributeToken}},
|
|
{"<?xml", TTs{StartTagPIToken}},
|
|
{"<![CDATA[ test", TTs{CDATAToken}},
|
|
{"<!DOCTYPE note SYSTEM", TTs{DOCTYPEToken}},
|
|
|
|
// go fuzz
|
|
{"</", TTs{EndTagToken}},
|
|
{"</\n", TTs{EndTagToken}},
|
|
}
|
|
for _, tt := range tokenTests {
|
|
t.Run(tt.xml, func(t *testing.T) {
|
|
l := NewLexer(bytes.NewBufferString(tt.xml))
|
|
i := 0
|
|
for {
|
|
token, _ := l.Next()
|
|
if token == ErrorToken {
|
|
test.T(t, l.Err(), io.EOF)
|
|
test.T(t, i, len(tt.expected), "when error occurred we must be at the end")
|
|
break
|
|
}
|
|
test.That(t, i < len(tt.expected), "index", i, "must not exceed expected token types size", len(tt.expected))
|
|
if i < len(tt.expected) {
|
|
test.T(t, token, tt.expected[i], "token types must match")
|
|
}
|
|
i++
|
|
}
|
|
})
|
|
}
|
|
|
|
test.T(t, TokenType(100).String(), "Invalid(100)")
|
|
}
|
|
|
|
func TestTags(t *testing.T) {
|
|
var tagTests = []struct {
|
|
xml string
|
|
expected string
|
|
}{
|
|
{"<foo:bar.qux-norf/>", "foo:bar.qux-norf"},
|
|
{"<?xml?>", "xml"},
|
|
{"<foo?bar/qux>", "foo?bar/qux"},
|
|
{"<!DOCTYPE note SYSTEM \"Note.dtd\">", " note SYSTEM \"Note.dtd\""},
|
|
|
|
// early endings
|
|
{"<foo ", "foo"},
|
|
}
|
|
for _, tt := range tagTests {
|
|
t.Run(tt.xml, func(t *testing.T) {
|
|
l := NewLexer(bytes.NewBufferString(tt.xml))
|
|
for {
|
|
token, _ := l.Next()
|
|
if token == ErrorToken {
|
|
test.T(t, l.Err(), io.EOF)
|
|
test.Fail(t, "when error occurred we must be at the end")
|
|
break
|
|
} else if token == StartTagToken || token == StartTagPIToken || token == EndTagToken || token == DOCTYPEToken {
|
|
test.String(t, string(l.Text()), tt.expected, "tags must match")
|
|
break
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestAttributes(t *testing.T) {
|
|
var attributeTests = []struct {
|
|
attr string
|
|
expected []string
|
|
}{
|
|
{"<foo a=\"b\" />", []string{"a", "\"b\""}},
|
|
{"<foo \nchecked \r\n value\r=\t'=/>\"' />", []string{"checked", "", "value", "'=/>\"'"}},
|
|
{"<foo bar=\" a \n\t\r b \" />", []string{"bar", "\" a b \""}},
|
|
{"<?xml a=b?>", []string{"a", "b"}},
|
|
{"<foo /=? >", []string{"/", "?"}},
|
|
|
|
// early endings
|
|
{"<foo x", []string{"x", ""}},
|
|
{"<foo x=", []string{"x", ""}},
|
|
{"<foo x='", []string{"x", "'"}},
|
|
}
|
|
for _, tt := range attributeTests {
|
|
t.Run(tt.attr, func(t *testing.T) {
|
|
l := NewLexer(bytes.NewBufferString(tt.attr))
|
|
i := 0
|
|
for {
|
|
token, _ := l.Next()
|
|
if token == ErrorToken {
|
|
test.T(t, l.Err(), io.EOF)
|
|
test.T(t, i, len(tt.expected), "when error occurred we must be at the end")
|
|
break
|
|
} else if token == AttributeToken {
|
|
test.That(t, i+1 < len(tt.expected), "index", i+1, "must not exceed expected attributes size", len(tt.expected))
|
|
if i+1 < len(tt.expected) {
|
|
test.String(t, string(l.Text()), tt.expected[i], "attribute keys must match")
|
|
test.String(t, string(l.AttrVal()), tt.expected[i+1], "attribute keys must match")
|
|
i += 2
|
|
}
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestErrors(t *testing.T) {
|
|
var errorTests = []struct {
|
|
xml string
|
|
col int
|
|
}{
|
|
{"a\x00b", 2},
|
|
}
|
|
for _, tt := range errorTests {
|
|
t.Run(tt.xml, func(t *testing.T) {
|
|
l := NewLexer(bytes.NewBufferString(tt.xml))
|
|
for {
|
|
token, _ := l.Next()
|
|
if token == ErrorToken {
|
|
if tt.col == 0 {
|
|
test.T(t, l.Err(), io.EOF)
|
|
} else if perr, ok := l.Err().(*parse.Error); ok {
|
|
test.T(t, perr.Col, tt.col)
|
|
} else {
|
|
test.Fail(t, "bad error:", l.Err())
|
|
}
|
|
break
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
func ExampleNewLexer() {
|
|
l := NewLexer(bytes.NewBufferString("<span class='user'>John Doe</span>"))
|
|
out := ""
|
|
for {
|
|
tt, data := l.Next()
|
|
if tt == ErrorToken {
|
|
break
|
|
}
|
|
out += string(data)
|
|
}
|
|
fmt.Println(out)
|
|
// Output: <span class='user'>John Doe</span>
|
|
}
|