458 lines
13 KiB
Go
458 lines
13 KiB
Go
// Copyright 2016 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// +build ignore
|
|
|
|
package main
|
|
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"reflect"
|
|
"strings"
|
|
"unicode/utf8"
|
|
|
|
"golang.org/x/text/internal"
|
|
"golang.org/x/text/internal/gen"
|
|
"golang.org/x/text/internal/number"
|
|
"golang.org/x/text/internal/stringset"
|
|
"golang.org/x/text/language"
|
|
"golang.org/x/text/unicode/cldr"
|
|
)
|
|
|
|
var (
|
|
test = flag.Bool("test", false,
|
|
"test existing tables; can be used to compare web data with package data.")
|
|
outputFile = flag.String("output", "tables.go", "output file")
|
|
outputTestFile = flag.String("testoutput", "data_test.go", "output file")
|
|
|
|
draft = flag.String("draft",
|
|
"contributed",
|
|
`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
|
|
)
|
|
|
|
func main() {
|
|
gen.Init()
|
|
|
|
const pkg = "number"
|
|
|
|
gen.Repackage("gen_common.go", "common.go", pkg)
|
|
// Read the CLDR zip file.
|
|
r := gen.OpenCLDRCoreZip()
|
|
defer r.Close()
|
|
|
|
d := &cldr.Decoder{}
|
|
d.SetDirFilter("supplemental", "main")
|
|
d.SetSectionFilter("numbers", "numberingSystem")
|
|
data, err := d.DecodeZip(r)
|
|
if err != nil {
|
|
log.Fatalf("DecodeZip: %v", err)
|
|
}
|
|
|
|
w := gen.NewCodeWriter()
|
|
defer w.WriteGoFile(*outputFile, pkg)
|
|
|
|
fmt.Fprintln(w, `import "golang.org/x/text/internal/stringset"`)
|
|
|
|
gen.WriteCLDRVersion(w)
|
|
|
|
genNumSystem(w, data)
|
|
genSymbols(w, data)
|
|
genFormats(w, data)
|
|
}
|
|
|
|
var systemMap = map[string]system{"latn": 0}
|
|
|
|
func getNumberSystem(str string) system {
|
|
ns, ok := systemMap[str]
|
|
if !ok {
|
|
log.Fatalf("No index for numbering system %q", str)
|
|
}
|
|
return ns
|
|
}
|
|
|
|
func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) {
|
|
numSysData := []systemData{
|
|
{digitSize: 1, zero: [4]byte{'0'}},
|
|
}
|
|
|
|
for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
|
|
if len(ns.Digits) == 0 {
|
|
continue
|
|
}
|
|
switch ns.Id {
|
|
case "latn":
|
|
// hard-wired
|
|
continue
|
|
case "hanidec":
|
|
// non-consecutive digits: treat as "algorithmic"
|
|
continue
|
|
}
|
|
|
|
zero, sz := utf8.DecodeRuneInString(ns.Digits)
|
|
if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte
|
|
log.Fatalf("Last byte of zero value overflows for %s", ns.Id)
|
|
}
|
|
|
|
i := rune(0)
|
|
for _, r := range ns.Digits {
|
|
// Verify that we can do simple math on the UTF-8 byte sequence
|
|
// of zero to get the digit.
|
|
if zero+i != r {
|
|
// Runes not consecutive.
|
|
log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r)
|
|
}
|
|
i++
|
|
}
|
|
var x [utf8.UTFMax]byte
|
|
utf8.EncodeRune(x[:], zero)
|
|
id := system(len(numSysData))
|
|
systemMap[ns.Id] = id
|
|
numSysData = append(numSysData, systemData{
|
|
id: id,
|
|
digitSize: byte(sz),
|
|
zero: x,
|
|
})
|
|
}
|
|
w.WriteVar("numSysData", numSysData)
|
|
|
|
algoID := system(len(numSysData))
|
|
fmt.Fprintln(w, "const (")
|
|
for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
|
|
id, ok := systemMap[ns.Id]
|
|
if !ok {
|
|
id = algoID
|
|
systemMap[ns.Id] = id
|
|
algoID++
|
|
}
|
|
fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id)
|
|
}
|
|
fmt.Fprintln(w, "numNumberSystems")
|
|
fmt.Fprintln(w, ")")
|
|
|
|
fmt.Fprintln(w, "var systemMap = map[string]system{")
|
|
for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
|
|
fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id))
|
|
w.Size += len(ns.Id) + 16 + 1 // very coarse approximation
|
|
}
|
|
fmt.Fprintln(w, "}")
|
|
}
|
|
|
|
func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) {
|
|
d, err := cldr.ParseDraft(*draft)
|
|
if err != nil {
|
|
log.Fatalf("invalid draft level: %v", err)
|
|
}
|
|
|
|
nNumberSystems := system(len(systemMap))
|
|
|
|
type symbols [NumSymbolTypes]string
|
|
|
|
type key struct {
|
|
tag int // from language.CompactIndex
|
|
system system
|
|
}
|
|
symbolMap := map[key]*symbols{}
|
|
|
|
defaults := map[int]system{}
|
|
|
|
for _, lang := range data.Locales() {
|
|
ldml := data.RawLDML(lang)
|
|
if ldml.Numbers == nil {
|
|
continue
|
|
}
|
|
langIndex, ok := language.CompactIndex(language.MustParse(lang))
|
|
if !ok {
|
|
log.Fatalf("No compact index for language %s", lang)
|
|
}
|
|
if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 {
|
|
defaults[langIndex] = getNumberSystem(d[0].Data())
|
|
}
|
|
|
|
syms := cldr.MakeSlice(&ldml.Numbers.Symbols)
|
|
syms.SelectDraft(d)
|
|
|
|
getFirst := func(name string, x interface{}) string {
|
|
v := reflect.ValueOf(x)
|
|
slice := cldr.MakeSlice(x)
|
|
slice.SelectAnyOf("alt", "", "alt")
|
|
if reflect.Indirect(v).Len() == 0 {
|
|
return ""
|
|
} else if reflect.Indirect(v).Len() > 1 {
|
|
log.Fatalf("%s: multiple values of %q within single symbol not supported.", lang, name)
|
|
}
|
|
return reflect.Indirect(v).Index(0).MethodByName("Data").Call(nil)[0].String()
|
|
}
|
|
|
|
for _, sym := range ldml.Numbers.Symbols {
|
|
if sym.NumberSystem == "" {
|
|
// This is just linking the default of root to "latn".
|
|
continue
|
|
}
|
|
symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{
|
|
SymDecimal: getFirst("decimal", &sym.Decimal),
|
|
SymGroup: getFirst("group", &sym.Group),
|
|
SymList: getFirst("list", &sym.List),
|
|
SymPercentSign: getFirst("percentSign", &sym.PercentSign),
|
|
SymPlusSign: getFirst("plusSign", &sym.PlusSign),
|
|
SymMinusSign: getFirst("minusSign", &sym.MinusSign),
|
|
SymExponential: getFirst("exponential", &sym.Exponential),
|
|
SymSuperscriptingExponent: getFirst("superscriptingExponent", &sym.SuperscriptingExponent),
|
|
SymPerMille: getFirst("perMille", &sym.PerMille),
|
|
SymInfinity: getFirst("infinity", &sym.Infinity),
|
|
SymNan: getFirst("nan", &sym.Nan),
|
|
SymTimeSeparator: getFirst("timeSeparator", &sym.TimeSeparator),
|
|
}
|
|
}
|
|
}
|
|
|
|
// Expand all values.
|
|
for k, syms := range symbolMap {
|
|
for t := SymDecimal; t < NumSymbolTypes; t++ {
|
|
p := k.tag
|
|
for syms[t] == "" {
|
|
p = int(internal.Parent[p])
|
|
if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" {
|
|
syms[t] = (*pSyms)[t]
|
|
break
|
|
}
|
|
if p == 0 /* und */ {
|
|
// Default to root, latn.
|
|
syms[t] = (*symbolMap[key{}])[t]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Unique the symbol sets and write the string data.
|
|
m := map[symbols]int{}
|
|
sb := stringset.NewBuilder()
|
|
|
|
symIndex := [][NumSymbolTypes]byte{}
|
|
|
|
for ns := system(0); ns < nNumberSystems; ns++ {
|
|
for _, l := range data.Locales() {
|
|
langIndex, _ := language.CompactIndex(language.MustParse(l))
|
|
s := symbolMap[key{langIndex, ns}]
|
|
if s == nil {
|
|
continue
|
|
}
|
|
if _, ok := m[*s]; !ok {
|
|
m[*s] = len(symIndex)
|
|
sb.Add(s[:]...)
|
|
var x [NumSymbolTypes]byte
|
|
for i := SymDecimal; i < NumSymbolTypes; i++ {
|
|
x[i] = byte(sb.Index((*s)[i]))
|
|
}
|
|
symIndex = append(symIndex, x)
|
|
}
|
|
}
|
|
}
|
|
w.WriteVar("symIndex", symIndex)
|
|
w.WriteVar("symData", sb.Set())
|
|
|
|
// resolveSymbolIndex gets the index from the closest matching locale,
|
|
// including the locale itself.
|
|
resolveSymbolIndex := func(langIndex int, ns system) byte {
|
|
for {
|
|
if sym := symbolMap[key{langIndex, ns}]; sym != nil {
|
|
return byte(m[*sym])
|
|
}
|
|
if langIndex == 0 {
|
|
return 0 // und, latn
|
|
}
|
|
langIndex = int(internal.Parent[langIndex])
|
|
}
|
|
}
|
|
|
|
// Create an index with the symbols for each locale for the latn numbering
|
|
// system. If this is not the default, or the only one, for a locale, we
|
|
// will overwrite the value later.
|
|
var langToDefaults [language.NumCompactTags]byte
|
|
for _, l := range data.Locales() {
|
|
langIndex, _ := language.CompactIndex(language.MustParse(l))
|
|
langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0)
|
|
}
|
|
|
|
// Delete redundant entries.
|
|
for _, l := range data.Locales() {
|
|
langIndex, _ := language.CompactIndex(language.MustParse(l))
|
|
def := defaults[langIndex]
|
|
syms := symbolMap[key{langIndex, def}]
|
|
if syms == nil {
|
|
continue
|
|
}
|
|
for ns := system(0); ns < nNumberSystems; ns++ {
|
|
if ns == def {
|
|
continue
|
|
}
|
|
if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms {
|
|
delete(symbolMap, key{langIndex, ns})
|
|
}
|
|
}
|
|
}
|
|
|
|
// Create a sorted list of alternatives per language. This will only need to
|
|
// be referenced if a user specified an alternative numbering system.
|
|
var langToAlt []altSymData
|
|
for _, l := range data.Locales() {
|
|
langIndex, _ := language.CompactIndex(language.MustParse(l))
|
|
start := len(langToAlt)
|
|
if start > 0x7F {
|
|
log.Fatal("Number of alternative assignments > 0x7F")
|
|
}
|
|
// Create the entry for the default value.
|
|
def := defaults[langIndex]
|
|
langToAlt = append(langToAlt, altSymData{
|
|
compactTag: uint16(langIndex),
|
|
system: def,
|
|
symIndex: resolveSymbolIndex(langIndex, def),
|
|
})
|
|
|
|
for ns := system(0); ns < nNumberSystems; ns++ {
|
|
if def == ns {
|
|
continue
|
|
}
|
|
if sym := symbolMap[key{langIndex, ns}]; sym != nil {
|
|
langToAlt = append(langToAlt, altSymData{
|
|
compactTag: uint16(langIndex),
|
|
system: ns,
|
|
symIndex: resolveSymbolIndex(langIndex, ns),
|
|
})
|
|
}
|
|
}
|
|
if def == 0 && len(langToAlt) == start+1 {
|
|
// No additional data: erase the entry.
|
|
langToAlt = langToAlt[:start]
|
|
} else {
|
|
// Overwrite the entry in langToDefaults.
|
|
langToDefaults[langIndex] = 0x80 | byte(start)
|
|
}
|
|
}
|
|
w.WriteComment(`
|
|
langToDefaults maps a compact language index to the default numbering system
|
|
and default symbol set`)
|
|
w.WriteVar("langToDefaults", langToDefaults)
|
|
|
|
w.WriteComment(`
|
|
langToAlt is a list of numbering system and symbol set pairs, sorted and
|
|
marked by compact language index.`)
|
|
w.WriteVar("langToAlt", langToAlt)
|
|
}
|
|
|
|
// genFormats generates the lookup table for decimal, scientific and percent
|
|
// patterns.
|
|
//
|
|
// CLDR allows for patterns to be different per language for different numbering
|
|
// systems. In practice the patterns are set to be consistent for a language
|
|
// independent of the numbering system. genFormats verifies that no language
|
|
// deviates from this.
|
|
func genFormats(w *gen.CodeWriter, data *cldr.CLDR) {
|
|
d, err := cldr.ParseDraft(*draft)
|
|
if err != nil {
|
|
log.Fatalf("invalid draft level: %v", err)
|
|
}
|
|
|
|
// Fill the first slot with a dummy so we can identify unspecified tags.
|
|
formats := []number.Pattern{{}}
|
|
patterns := map[string]int{}
|
|
|
|
// TODO: It would be possible to eliminate two of these slices by having
|
|
// another indirection and store a reference to the combination of patterns.
|
|
decimal := make([]byte, language.NumCompactTags)
|
|
scientific := make([]byte, language.NumCompactTags)
|
|
percent := make([]byte, language.NumCompactTags)
|
|
|
|
for _, lang := range data.Locales() {
|
|
ldml := data.RawLDML(lang)
|
|
if ldml.Numbers == nil {
|
|
continue
|
|
}
|
|
langIndex, ok := language.CompactIndex(language.MustParse(lang))
|
|
if !ok {
|
|
log.Fatalf("No compact index for language %s", lang)
|
|
}
|
|
type patternSlice []*struct {
|
|
cldr.Common
|
|
Numbers string `xml:"numbers,attr"`
|
|
Count string `xml:"count,attr"`
|
|
}
|
|
|
|
add := func(name string, tags []byte, ps patternSlice) {
|
|
sl := cldr.MakeSlice(&ps)
|
|
sl.SelectDraft(d)
|
|
if len(ps) == 0 {
|
|
return
|
|
}
|
|
if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] {
|
|
log.Fatalf("Inconsistent %d patterns for language %s", name, lang)
|
|
}
|
|
s := ps[0].Data()
|
|
|
|
index, ok := patterns[s]
|
|
if !ok {
|
|
nf, err := number.ParsePattern(s)
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
index = len(formats)
|
|
patterns[s] = index
|
|
formats = append(formats, *nf)
|
|
}
|
|
tags[langIndex] = byte(index)
|
|
}
|
|
|
|
for _, df := range ldml.Numbers.DecimalFormats {
|
|
for _, l := range df.DecimalFormatLength {
|
|
if l.Type != "" {
|
|
continue
|
|
}
|
|
for _, f := range l.DecimalFormat {
|
|
add("decimal", decimal, f.Pattern)
|
|
}
|
|
}
|
|
}
|
|
for _, df := range ldml.Numbers.ScientificFormats {
|
|
for _, l := range df.ScientificFormatLength {
|
|
if l.Type != "" {
|
|
continue
|
|
}
|
|
for _, f := range l.ScientificFormat {
|
|
add("scientific", scientific, f.Pattern)
|
|
}
|
|
}
|
|
}
|
|
for _, df := range ldml.Numbers.PercentFormats {
|
|
for _, l := range df.PercentFormatLength {
|
|
if l.Type != "" {
|
|
continue
|
|
}
|
|
for _, f := range l.PercentFormat {
|
|
add("percent", percent, f.Pattern)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Complete the parent tag array to reflect inheritance. An index of 0
|
|
// indicates an unspecified value.
|
|
for _, data := range [][]byte{decimal, scientific, percent} {
|
|
for i := range data {
|
|
p := uint16(i)
|
|
for ; data[p] == 0; p = internal.Parent[p] {
|
|
}
|
|
data[i] = data[p]
|
|
}
|
|
}
|
|
w.WriteVar("tagToDecimal", decimal)
|
|
w.WriteVar("tagToScientific", scientific)
|
|
w.WriteVar("tagToPercent", percent)
|
|
|
|
value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1)
|
|
// Break up the lines. This won't give ideal perfect formatting, but it is
|
|
// better than one huge line.
|
|
value = strings.Replace(value, ", ", ",\n", -1)
|
|
fmt.Fprintf(w, "var formats = %s\n", value)
|
|
}
|