291 lines
9.1 KiB
Go
291 lines
9.1 KiB
Go
// Copyright 2012 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package collate
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"golang.org/x/text/collate/build"
|
|
"golang.org/x/text/internal/colltab"
|
|
"golang.org/x/text/unicode/norm"
|
|
)
|
|
|
|
type ColElems []Weights
|
|
|
|
type input struct {
|
|
str string
|
|
ces [][]int
|
|
}
|
|
|
|
type check struct {
|
|
in string
|
|
n int
|
|
out ColElems
|
|
}
|
|
|
|
type tableTest struct {
|
|
in []input
|
|
chk []check
|
|
}
|
|
|
|
func w(ce ...int) Weights {
|
|
return W(ce...)
|
|
}
|
|
|
|
var defaults = w(0)
|
|
|
|
func pt(p, t int) []int {
|
|
return []int{p, defaults.Secondary, t}
|
|
}
|
|
|
|
func makeTable(in []input) (*Collator, error) {
|
|
b := build.NewBuilder()
|
|
for _, r := range in {
|
|
if e := b.Add([]rune(r.str), r.ces, nil); e != nil {
|
|
panic(e)
|
|
}
|
|
}
|
|
t, err := b.Build()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return NewFromTable(t), nil
|
|
}
|
|
|
|
// modSeq holds a seqeunce of modifiers in increasing order of CCC long enough
|
|
// to cause a segment overflow if not handled correctly. The last rune in this
|
|
// list has a CCC of 214.
|
|
var modSeq = []rune{
|
|
0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BB,
|
|
0x05BC, 0x05BD, 0x05BF, 0x05C1, 0x05C2, 0xFB1E, 0x064B, 0x064C, 0x064D, 0x064E,
|
|
0x064F, 0x0650, 0x0651, 0x0652, 0x0670, 0x0711, 0x0C55, 0x0C56, 0x0E38, 0x0E48,
|
|
0x0EB8, 0x0EC8, 0x0F71, 0x0F72, 0x0F74, 0x0321, 0x1DCE,
|
|
}
|
|
|
|
var mods []input
|
|
var modW = func() ColElems {
|
|
ws := ColElems{}
|
|
for _, r := range modSeq {
|
|
rune := norm.NFC.PropertiesString(string(r))
|
|
ws = append(ws, w(0, int(rune.CCC())))
|
|
mods = append(mods, input{string(r), [][]int{{0, int(rune.CCC())}}})
|
|
}
|
|
return ws
|
|
}()
|
|
|
|
var appendNextTests = []tableTest{
|
|
{ // test getWeights
|
|
[]input{
|
|
{"a", [][]int{{100}}},
|
|
{"b", [][]int{{105}}},
|
|
{"c", [][]int{{110}}},
|
|
{"ß", [][]int{{120}}},
|
|
},
|
|
[]check{
|
|
{"a", 1, ColElems{w(100)}},
|
|
{"b", 1, ColElems{w(105)}},
|
|
{"c", 1, ColElems{w(110)}},
|
|
{"d", 1, ColElems{w(0x50064)}},
|
|
{"ab", 1, ColElems{w(100)}},
|
|
{"bc", 1, ColElems{w(105)}},
|
|
{"dd", 1, ColElems{w(0x50064)}},
|
|
{"ß", 2, ColElems{w(120)}},
|
|
},
|
|
},
|
|
{ // test expansion
|
|
[]input{
|
|
{"u", [][]int{{100}}},
|
|
{"U", [][]int{{100}, {0, 25}}},
|
|
{"w", [][]int{{100}, {100}}},
|
|
{"W", [][]int{{100}, {0, 25}, {100}, {0, 25}}},
|
|
},
|
|
[]check{
|
|
{"u", 1, ColElems{w(100)}},
|
|
{"U", 1, ColElems{w(100), w(0, 25)}},
|
|
{"w", 1, ColElems{w(100), w(100)}},
|
|
{"W", 1, ColElems{w(100), w(0, 25), w(100), w(0, 25)}},
|
|
},
|
|
},
|
|
{ // test decompose
|
|
[]input{
|
|
{"D", [][]int{pt(104, 8)}},
|
|
{"z", [][]int{pt(130, 8)}},
|
|
{"\u030C", [][]int{{0, 40}}}, // Caron
|
|
{"\u01C5", [][]int{pt(104, 9), pt(130, 4), {0, 40, 0x1F}}}, // Dž = D+z+caron
|
|
},
|
|
[]check{
|
|
{"\u01C5", 2, ColElems{w(pt(104, 9)...), w(pt(130, 4)...), w(0, 40, 0x1F)}},
|
|
},
|
|
},
|
|
{ // test basic contraction
|
|
[]input{
|
|
{"a", [][]int{{100}}},
|
|
{"ab", [][]int{{101}}},
|
|
{"aab", [][]int{{101}, {101}}},
|
|
{"abc", [][]int{{102}}},
|
|
{"b", [][]int{{200}}},
|
|
{"c", [][]int{{300}}},
|
|
{"d", [][]int{{400}}},
|
|
},
|
|
[]check{
|
|
{"a", 1, ColElems{w(100)}},
|
|
{"aa", 1, ColElems{w(100)}},
|
|
{"aac", 1, ColElems{w(100)}},
|
|
{"d", 1, ColElems{w(400)}},
|
|
{"ab", 2, ColElems{w(101)}},
|
|
{"abb", 2, ColElems{w(101)}},
|
|
{"aab", 3, ColElems{w(101), w(101)}},
|
|
{"aaba", 3, ColElems{w(101), w(101)}},
|
|
{"abc", 3, ColElems{w(102)}},
|
|
{"abcd", 3, ColElems{w(102)}},
|
|
},
|
|
},
|
|
{ // test discontinuous contraction
|
|
append(mods, []input{
|
|
// modifiers; secondary weight equals ccc
|
|
{"\u0316", [][]int{{0, 220}}},
|
|
{"\u0317", [][]int{{0, 220}, {0, 220}}},
|
|
{"\u302D", [][]int{{0, 222}}},
|
|
{"\u302E", [][]int{{0, 225}}}, // used as starter
|
|
{"\u302F", [][]int{{0, 224}}}, // used as starter
|
|
{"\u18A9", [][]int{{0, 228}}},
|
|
{"\u0300", [][]int{{0, 230}}},
|
|
{"\u0301", [][]int{{0, 230}}},
|
|
{"\u0315", [][]int{{0, 232}}},
|
|
{"\u031A", [][]int{{0, 232}}},
|
|
{"\u035C", [][]int{{0, 233}}},
|
|
{"\u035F", [][]int{{0, 233}}},
|
|
{"\u035D", [][]int{{0, 234}}},
|
|
{"\u035E", [][]int{{0, 234}}},
|
|
{"\u0345", [][]int{{0, 240}}},
|
|
|
|
// starters
|
|
{"a", [][]int{{100}}},
|
|
{"b", [][]int{{200}}},
|
|
{"c", [][]int{{300}}},
|
|
{"\u03B1", [][]int{{900}}},
|
|
{"\x01", [][]int{{0, 0, 0, 0}}},
|
|
|
|
// contractions
|
|
{"a\u0300", [][]int{{101}}},
|
|
{"a\u0301", [][]int{{102}}},
|
|
{"a\u035E", [][]int{{110}}},
|
|
{"a\u035Eb\u035E", [][]int{{115}}},
|
|
{"ac\u035Eaca\u035E", [][]int{{116}}},
|
|
{"a\u035Db\u035D", [][]int{{117}}},
|
|
{"a\u0301\u035Db", [][]int{{120}}},
|
|
{"a\u0301\u035F", [][]int{{121}}},
|
|
{"a\u0301\u035Fb", [][]int{{119}}},
|
|
{"\u03B1\u0345", [][]int{{901}, {902}}},
|
|
{"\u302E\u302F", [][]int{{0, 131}, {0, 131}}},
|
|
{"\u302F\u18A9", [][]int{{0, 130}}},
|
|
}...),
|
|
[]check{
|
|
{"a\x01\u0300", 1, ColElems{w(100)}},
|
|
{"ab", 1, ColElems{w(100)}}, // closing segment
|
|
{"a\u0316\u0300b", 5, ColElems{w(101), w(0, 220)}}, // closing segment
|
|
{"a\u0316\u0300", 5, ColElems{w(101), w(0, 220)}}, // no closing segment
|
|
{"a\u0316\u0300\u035Cb", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end
|
|
{"a\u0316\u0300\u035C", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end
|
|
|
|
{"a\u0316\u0301b", 5, ColElems{w(102), w(0, 220)}}, // closing segment
|
|
{"a\u0316\u0301", 5, ColElems{w(102), w(0, 220)}}, // no closing segment
|
|
{"a\u0316\u0301\u035Cb", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end
|
|
{"a\u0316\u0301\u035C", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end
|
|
|
|
// match blocked by modifier with same ccc
|
|
{"a\u0301\u0315\u031A\u035Fb", 3, ColElems{w(102)}},
|
|
|
|
// multiple gaps
|
|
{"a\u0301\u035Db", 6, ColElems{w(120)}},
|
|
{"a\u0301\u035F", 5, ColElems{w(121)}},
|
|
{"a\u0301\u035Fb", 6, ColElems{w(119)}},
|
|
{"a\u0316\u0301\u035F", 7, ColElems{w(121), w(0, 220)}},
|
|
{"a\u0301\u0315\u035Fb", 7, ColElems{w(121), w(0, 232)}},
|
|
{"a\u0316\u0301\u0315\u035Db", 5, ColElems{w(102), w(0, 220)}},
|
|
{"a\u0316\u0301\u0315\u035F", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
|
|
{"a\u0316\u0301\u0315\u035Fb", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
|
|
{"a\u0316\u0301\u0315\u035F\u035D", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
|
|
{"a\u0316\u0301\u0315\u035F\u035Db", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
|
|
|
|
// handling of segment overflow
|
|
{ // just fits within segment
|
|
"a" + string(modSeq[:30]) + "\u0301",
|
|
3 + len(string(modSeq[:30])),
|
|
append(ColElems{w(102)}, modW[:30]...),
|
|
},
|
|
{"a" + string(modSeq[:31]) + "\u0301", 1, ColElems{w(100)}}, // overflow
|
|
{"a" + string(modSeq) + "\u0301", 1, ColElems{w(100)}},
|
|
{ // just fits within segment with two interstitial runes
|
|
"a" + string(modSeq[:28]) + "\u0301\u0315\u035F",
|
|
7 + len(string(modSeq[:28])),
|
|
append(append(ColElems{w(121)}, modW[:28]...), w(0, 232)),
|
|
},
|
|
{ // second half does not fit within segment
|
|
"a" + string(modSeq[:29]) + "\u0301\u0315\u035F",
|
|
3 + len(string(modSeq[:29])),
|
|
append(ColElems{w(102)}, modW[:29]...),
|
|
},
|
|
|
|
// discontinuity can only occur in last normalization segment
|
|
{"a\u035Eb\u035E", 6, ColElems{w(115)}},
|
|
{"a\u0316\u035Eb\u035E", 5, ColElems{w(110), w(0, 220)}},
|
|
{"a\u035Db\u035D", 6, ColElems{w(117)}},
|
|
{"a\u0316\u035Db\u035D", 1, ColElems{w(100)}},
|
|
{"a\u035Eb\u0316\u035E", 8, ColElems{w(115), w(0, 220)}},
|
|
{"a\u035Db\u0316\u035D", 8, ColElems{w(117), w(0, 220)}},
|
|
{"ac\u035Eaca\u035E", 9, ColElems{w(116)}},
|
|
{"a\u0316c\u035Eaca\u035E", 1, ColElems{w(100)}},
|
|
{"ac\u035Eac\u0316a\u035E", 1, ColElems{w(100)}},
|
|
|
|
// expanding contraction
|
|
{"\u03B1\u0345", 4, ColElems{w(901), w(902)}},
|
|
|
|
// Theoretical possibilities
|
|
// contraction within a gap
|
|
{"a\u302F\u18A9\u0301", 9, ColElems{w(102), w(0, 130)}},
|
|
// expansion within a gap
|
|
{"a\u0317\u0301", 5, ColElems{w(102), w(0, 220), w(0, 220)}},
|
|
// repeating CCC blocks last modifier
|
|
{"a\u302E\u302F\u0301", 1, ColElems{w(100)}},
|
|
// The trailing combining characters (with lower CCC) should block the first one.
|
|
// TODO: make the following pass.
|
|
// {"a\u035E\u0316\u0316", 1, ColElems{w(100)}},
|
|
{"a\u035F\u035Eb", 5, ColElems{w(110), w(0, 233)}},
|
|
// Last combiner should match after normalization.
|
|
// TODO: make the following pass.
|
|
// {"a\u035D\u0301", 3, ColElems{w(102), w(0, 234)}},
|
|
// The first combiner is blocking the second one as they have the same CCC.
|
|
{"a\u035D\u035Eb", 1, ColElems{w(100)}},
|
|
},
|
|
},
|
|
}
|
|
|
|
func TestAppendNext(t *testing.T) {
|
|
for i, tt := range appendNextTests {
|
|
c, err := makeTable(tt.in)
|
|
if err != nil {
|
|
t.Errorf("%d: error creating table: %v", i, err)
|
|
continue
|
|
}
|
|
for j, chk := range tt.chk {
|
|
ws, n := c.t.AppendNext(nil, []byte(chk.in))
|
|
if n != chk.n {
|
|
t.Errorf("%d:%d: bytes consumed was %d; want %d", i, j, n, chk.n)
|
|
}
|
|
out := convertFromWeights(chk.out)
|
|
if len(ws) != len(out) {
|
|
t.Errorf("%d:%d: len(ws) was %d; want %d (%X vs %X)\n%X", i, j, len(ws), len(out), ws, out, chk.in)
|
|
continue
|
|
}
|
|
for k, w := range ws {
|
|
w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0)
|
|
if w != out[k] {
|
|
t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k])
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|