vendor: Mega update all dependencies

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4080
This commit is contained in:
Jakob Borg
2017-04-05 14:34:41 +00:00
parent 49c1527724
commit a1bcc15458
1354 changed files with 55066 additions and 797850 deletions

27
vendor/golang.org/x/text/internal/gen/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

351
vendor/golang.org/x/text/internal/gen/code.go generated vendored Normal file
View File

@@ -0,0 +1,351 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gen
import (
"bytes"
"encoding/gob"
"fmt"
"hash"
"hash/fnv"
"io"
"log"
"os"
"reflect"
"strings"
"unicode"
"unicode/utf8"
)
// This file contains utilities for generating code.
// TODO: other write methods like:
// - slices, maps, types, etc.
// CodeWriter is a utility for writing structured code. It computes the content
// hash and size of written content. It ensures there are newlines between
// written code blocks.
type CodeWriter struct {
buf bytes.Buffer
Size int
Hash hash.Hash32 // content hash
gob *gob.Encoder
// For comments we skip the usual one-line separator if they are followed by
// a code block.
skipSep bool
}
func (w *CodeWriter) Write(p []byte) (n int, err error) {
return w.buf.Write(p)
}
// NewCodeWriter returns a new CodeWriter.
func NewCodeWriter() *CodeWriter {
h := fnv.New32()
return &CodeWriter{Hash: h, gob: gob.NewEncoder(h)}
}
// WriteGoFile appends the buffer with the total size of all created structures
// and writes it as a Go file to the the given file with the given package name.
func (w *CodeWriter) WriteGoFile(filename, pkg string) {
f, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer f.Close()
if _, err = w.WriteGo(f, pkg); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteGo appends the buffer with the total size of all created structures and
// writes it as a Go file to the the given writer with the given package name.
func (w *CodeWriter) WriteGo(out io.Writer, pkg string) (n int, err error) {
sz := w.Size
w.WriteComment("Total table size %d bytes (%dKiB); checksum: %X\n", sz, sz/1024, w.Hash.Sum32())
defer w.buf.Reset()
return WriteGo(out, pkg, w.buf.Bytes())
}
func (w *CodeWriter) printf(f string, x ...interface{}) {
fmt.Fprintf(w, f, x...)
}
func (w *CodeWriter) insertSep() {
if w.skipSep {
w.skipSep = false
return
}
// Use at least two newlines to ensure a blank space between the previous
// block. WriteGoFile will remove extraneous newlines.
w.printf("\n\n")
}
// WriteComment writes a comment block. All line starts are prefixed with "//".
// Initial empty lines are gobbled. The indentation for the first line is
// stripped from consecutive lines.
func (w *CodeWriter) WriteComment(comment string, args ...interface{}) {
s := fmt.Sprintf(comment, args...)
s = strings.Trim(s, "\n")
// Use at least two newlines to ensure a blank space between the previous
// block. WriteGoFile will remove extraneous newlines.
w.printf("\n\n// ")
w.skipSep = true
// strip first indent level.
sep := "\n"
for ; len(s) > 0 && (s[0] == '\t' || s[0] == ' '); s = s[1:] {
sep += s[:1]
}
strings.NewReplacer(sep, "\n// ", "\n", "\n// ").WriteString(w, s)
w.printf("\n")
}
func (w *CodeWriter) writeSizeInfo(size int) {
w.printf("// Size: %d bytes\n", size)
}
// WriteConst writes a constant of the given name and value.
func (w *CodeWriter) WriteConst(name string, x interface{}) {
w.insertSep()
v := reflect.ValueOf(x)
switch v.Type().Kind() {
case reflect.String:
w.printf("const %s %s = ", name, typeName(x))
w.WriteString(v.String())
w.printf("\n")
default:
w.printf("const %s = %#v\n", name, x)
}
}
// WriteVar writes a variable of the given name and value.
func (w *CodeWriter) WriteVar(name string, x interface{}) {
w.insertSep()
v := reflect.ValueOf(x)
oldSize := w.Size
sz := int(v.Type().Size())
w.Size += sz
switch v.Type().Kind() {
case reflect.String:
w.printf("var %s %s = ", name, typeName(x))
w.WriteString(v.String())
case reflect.Struct:
w.gob.Encode(x)
fallthrough
case reflect.Slice, reflect.Array:
w.printf("var %s = ", name)
w.writeValue(v)
w.writeSizeInfo(w.Size - oldSize)
default:
w.printf("var %s %s = ", name, typeName(x))
w.gob.Encode(x)
w.writeValue(v)
w.writeSizeInfo(w.Size - oldSize)
}
w.printf("\n")
}
func (w *CodeWriter) writeValue(v reflect.Value) {
x := v.Interface()
switch v.Kind() {
case reflect.String:
w.WriteString(v.String())
case reflect.Array:
// Don't double count: callers of WriteArray count on the size being
// added, so we need to discount it here.
w.Size -= int(v.Type().Size())
w.writeSlice(x, true)
case reflect.Slice:
w.writeSlice(x, false)
case reflect.Struct:
w.printf("%s{\n", typeName(v.Interface()))
t := v.Type()
for i := 0; i < v.NumField(); i++ {
w.printf("%s: ", t.Field(i).Name)
w.writeValue(v.Field(i))
w.printf(",\n")
}
w.printf("}")
default:
w.printf("%#v", x)
}
}
// WriteString writes a string literal.
func (w *CodeWriter) WriteString(s string) {
s = strings.Replace(s, `\`, `\\`, -1)
io.WriteString(w.Hash, s) // content hash
w.Size += len(s)
const maxInline = 40
if len(s) <= maxInline {
w.printf("%q", s)
return
}
// We will render the string as a multi-line string.
const maxWidth = 80 - 4 - len(`"`) - len(`" +`)
// When starting on its own line, go fmt indents line 2+ an extra level.
n, max := maxWidth, maxWidth-4
// As per https://golang.org/issue/18078, the compiler has trouble
// compiling the concatenation of many strings, s0 + s1 + s2 + ... + sN,
// for large N. We insert redundant, explicit parentheses to work around
// that, lowering the N at any given step: (s0 + s1 + ... + s63) + (s64 +
// ... + s127) + etc + (etc + ... + sN).
explicitParens, extraComment := len(s) > 128*1024, ""
if explicitParens {
w.printf(`(`)
extraComment = "; the redundant, explicit parens are for https://golang.org/issue/18078"
}
// Print "" +\n, if a string does not start on its own line.
b := w.buf.Bytes()
if p := len(bytes.TrimRight(b, " \t")); p > 0 && b[p-1] != '\n' {
w.printf("\"\" + // Size: %d bytes%s\n", len(s), extraComment)
n, max = maxWidth, maxWidth
}
w.printf(`"`)
for sz, p, nLines := 0, 0, 0; p < len(s); {
var r rune
r, sz = utf8.DecodeRuneInString(s[p:])
out := s[p : p+sz]
chars := 1
if !unicode.IsPrint(r) || r == utf8.RuneError || r == '"' {
switch sz {
case 1:
out = fmt.Sprintf("\\x%02x", s[p])
case 2, 3:
out = fmt.Sprintf("\\u%04x", r)
case 4:
out = fmt.Sprintf("\\U%08x", r)
}
chars = len(out)
}
if n -= chars; n < 0 {
nLines++
if explicitParens && nLines&63 == 63 {
w.printf("\") + (\"")
}
w.printf("\" +\n\"")
n = max - len(out)
}
w.printf("%s", out)
p += sz
}
w.printf(`"`)
if explicitParens {
w.printf(`)`)
}
}
// WriteSlice writes a slice value.
func (w *CodeWriter) WriteSlice(x interface{}) {
w.writeSlice(x, false)
}
// WriteArray writes an array value.
func (w *CodeWriter) WriteArray(x interface{}) {
w.writeSlice(x, true)
}
func (w *CodeWriter) writeSlice(x interface{}, isArray bool) {
v := reflect.ValueOf(x)
w.gob.Encode(v.Len())
w.Size += v.Len() * int(v.Type().Elem().Size())
name := typeName(x)
if isArray {
name = fmt.Sprintf("[%d]%s", v.Len(), name[strings.Index(name, "]")+1:])
}
if isArray {
w.printf("%s{\n", name)
} else {
w.printf("%s{ // %d elements\n", name, v.Len())
}
switch kind := v.Type().Elem().Kind(); kind {
case reflect.String:
for _, s := range x.([]string) {
w.WriteString(s)
w.printf(",\n")
}
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
// nLine and nBlock are the number of elements per line and block.
nLine, nBlock, format := 8, 64, "%d,"
switch kind {
case reflect.Uint8:
format = "%#02x,"
case reflect.Uint16:
format = "%#04x,"
case reflect.Uint32:
nLine, nBlock, format = 4, 32, "%#08x,"
case reflect.Uint, reflect.Uint64:
nLine, nBlock, format = 4, 32, "%#016x,"
case reflect.Int8:
nLine = 16
}
n := nLine
for i := 0; i < v.Len(); i++ {
if i%nBlock == 0 && v.Len() > nBlock {
w.printf("// Entry %X - %X\n", i, i+nBlock-1)
}
x := v.Index(i).Interface()
w.gob.Encode(x)
w.printf(format, x)
if n--; n == 0 {
n = nLine
w.printf("\n")
}
}
w.printf("\n")
case reflect.Struct:
zero := reflect.Zero(v.Type().Elem()).Interface()
for i := 0; i < v.Len(); i++ {
x := v.Index(i).Interface()
w.gob.EncodeValue(v)
if !reflect.DeepEqual(zero, x) {
line := fmt.Sprintf("%#v,\n", x)
line = line[strings.IndexByte(line, '{'):]
w.printf("%d: ", i)
w.printf(line)
}
}
case reflect.Array:
for i := 0; i < v.Len(); i++ {
w.printf("%d: %#v,\n", i, v.Index(i).Interface())
}
default:
panic("gen: slice elem type not supported")
}
w.printf("}")
}
// WriteType writes a definition of the type of the given value and returns the
// type name.
func (w *CodeWriter) WriteType(x interface{}) string {
t := reflect.TypeOf(x)
w.printf("type %s struct {\n", t.Name())
for i := 0; i < t.NumField(); i++ {
w.printf("\t%s %s\n", t.Field(i).Name, t.Field(i).Type)
}
w.printf("}\n")
return t.Name()
}
// typeName returns the name of the go type of x.
func typeName(x interface{}) string {
t := reflect.ValueOf(x).Type()
return strings.Replace(fmt.Sprint(t), "main.", "", 1)
}

281
vendor/golang.org/x/text/internal/gen/gen.go generated vendored Normal file
View File

@@ -0,0 +1,281 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package gen contains common code for the various code generation tools in the
// text repository. Its usage ensures consistency between tools.
//
// This package defines command line flags that are common to most generation
// tools. The flags allow for specifying specific Unicode and CLDR versions
// in the public Unicode data repository (http://www.unicode.org/Public).
//
// A local Unicode data mirror can be set through the flag -local or the
// environment variable UNICODE_DIR. The former takes precedence. The local
// directory should follow the same structure as the public repository.
//
// IANA data can also optionally be mirrored by putting it in the iana directory
// rooted at the top of the local mirror. Beware, though, that IANA data is not
// versioned. So it is up to the developer to use the right version.
package gen // import "golang.org/x/text/internal/gen"
import (
"bytes"
"flag"
"fmt"
"go/build"
"go/format"
"io"
"io/ioutil"
"log"
"net/http"
"os"
"path"
"path/filepath"
"sync"
"unicode"
"golang.org/x/text/unicode/cldr"
)
var (
url = flag.String("url",
"http://www.unicode.org/Public",
"URL of Unicode database directory")
iana = flag.String("iana",
"http://www.iana.org",
"URL of the IANA repository")
unicodeVersion = flag.String("unicode",
getEnv("UNICODE_VERSION", unicode.Version),
"unicode version to use")
cldrVersion = flag.String("cldr",
getEnv("CLDR_VERSION", cldr.Version),
"cldr version to use")
)
func getEnv(name, def string) string {
if v := os.Getenv(name); v != "" {
return v
}
return def
}
// Init performs common initialization for a gen command. It parses the flags
// and sets up the standard logging parameters.
func Init() {
log.SetPrefix("")
log.SetFlags(log.Lshortfile)
flag.Parse()
}
const header = `// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package %s
`
// UnicodeVersion reports the requested Unicode version.
func UnicodeVersion() string {
return *unicodeVersion
}
// UnicodeVersion reports the requested CLDR version.
func CLDRVersion() string {
return *cldrVersion
}
// IsLocal reports whether data files are available locally.
func IsLocal() bool {
dir, err := localReadmeFile()
if err != nil {
return false
}
if _, err = os.Stat(dir); err != nil {
return false
}
return true
}
// OpenUCDFile opens the requested UCD file. The file is specified relative to
// the public Unicode root directory. It will call log.Fatal if there are any
// errors.
func OpenUCDFile(file string) io.ReadCloser {
return openUnicode(path.Join(*unicodeVersion, "ucd", file))
}
// OpenCLDRCoreZip opens the CLDR core zip file. It will call log.Fatal if there
// are any errors.
func OpenCLDRCoreZip() io.ReadCloser {
return OpenUnicodeFile("cldr", *cldrVersion, "core.zip")
}
// OpenUnicodeFile opens the requested file of the requested category from the
// root of the Unicode data archive. The file is specified relative to the
// public Unicode root directory. If version is "", it will use the default
// Unicode version. It will call log.Fatal if there are any errors.
func OpenUnicodeFile(category, version, file string) io.ReadCloser {
if version == "" {
version = UnicodeVersion()
}
return openUnicode(path.Join(category, version, file))
}
// OpenIANAFile opens the requested IANA file. The file is specified relative
// to the IANA root, which is typically either http://www.iana.org or the
// iana directory in the local mirror. It will call log.Fatal if there are any
// errors.
func OpenIANAFile(path string) io.ReadCloser {
return Open(*iana, "iana", path)
}
var (
dirMutex sync.Mutex
localDir string
)
const permissions = 0755
func localReadmeFile() (string, error) {
p, err := build.Import("golang.org/x/text", "", build.FindOnly)
if err != nil {
return "", fmt.Errorf("Could not locate package: %v", err)
}
return filepath.Join(p.Dir, "DATA", "README"), nil
}
func getLocalDir() string {
dirMutex.Lock()
defer dirMutex.Unlock()
readme, err := localReadmeFile()
if err != nil {
log.Fatal(err)
}
dir := filepath.Dir(readme)
if _, err := os.Stat(readme); err != nil {
if err := os.MkdirAll(dir, permissions); err != nil {
log.Fatalf("Could not create directory: %v", err)
}
ioutil.WriteFile(readme, []byte(readmeTxt), permissions)
}
return dir
}
const readmeTxt = `Generated by golang.org/x/text/internal/gen. DO NOT EDIT.
This directory contains downloaded files used to generate the various tables
in the golang.org/x/text subrepo.
Note that the language subtag repo (iana/assignments/language-subtag-registry)
and all other times in the iana subdirectory are not versioned and will need
to be periodically manually updated. The easiest way to do this is to remove
the entire iana directory. This is mostly of concern when updating the language
package.
`
// Open opens subdir/path if a local directory is specified and the file exists,
// where subdir is a directory relative to the local root, or fetches it from
// urlRoot/path otherwise. It will call log.Fatal if there are any errors.
func Open(urlRoot, subdir, path string) io.ReadCloser {
file := filepath.Join(getLocalDir(), subdir, filepath.FromSlash(path))
return open(file, urlRoot, path)
}
func openUnicode(path string) io.ReadCloser {
file := filepath.Join(getLocalDir(), filepath.FromSlash(path))
return open(file, *url, path)
}
// TODO: automatically periodically update non-versioned files.
func open(file, urlRoot, path string) io.ReadCloser {
if f, err := os.Open(file); err == nil {
return f
}
r := get(urlRoot, path)
defer r.Close()
b, err := ioutil.ReadAll(r)
if err != nil {
log.Fatalf("Could not download file: %v", err)
}
os.MkdirAll(filepath.Dir(file), permissions)
if err := ioutil.WriteFile(file, b, permissions); err != nil {
log.Fatalf("Could not create file: %v", err)
}
return ioutil.NopCloser(bytes.NewReader(b))
}
func get(root, path string) io.ReadCloser {
url := root + "/" + path
fmt.Printf("Fetching %s...", url)
defer fmt.Println(" done.")
resp, err := http.Get(url)
if err != nil {
log.Fatalf("HTTP GET: %v", err)
}
if resp.StatusCode != 200 {
log.Fatalf("Bad GET status for %q: %q", url, resp.Status)
}
return resp.Body
}
// TODO: use Write*Version in all applicable packages.
// WriteUnicodeVersion writes a constant for the Unicode version from which the
// tables are generated.
func WriteUnicodeVersion(w io.Writer) {
fmt.Fprintf(w, "// UnicodeVersion is the Unicode version from which the tables in this package are derived.\n")
fmt.Fprintf(w, "const UnicodeVersion = %q\n\n", UnicodeVersion())
}
// WriteCLDRVersion writes a constant for the CLDR version from which the
// tables are generated.
func WriteCLDRVersion(w io.Writer) {
fmt.Fprintf(w, "// CLDRVersion is the CLDR version from which the tables in this package are derived.\n")
fmt.Fprintf(w, "const CLDRVersion = %q\n\n", CLDRVersion())
}
// WriteGoFile prepends a standard file comment and package statement to the
// given bytes, applies gofmt, and writes them to a file with the given name.
// It will call log.Fatal if there are any errors.
func WriteGoFile(filename, pkg string, b []byte) {
w, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer w.Close()
if _, err = WriteGo(w, pkg, b); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteGo prepends a standard file comment and package statement to the given
// bytes, applies gofmt, and writes them to w.
func WriteGo(w io.Writer, pkg string, b []byte) (n int, err error) {
src := []byte(fmt.Sprintf(header, pkg))
src = append(src, b...)
formatted, err := format.Source(src)
if err != nil {
// Print the generated code even in case of an error so that the
// returned error can be meaningfully interpreted.
n, _ = w.Write(src)
return n, err
}
return w.Write(formatted)
}
// Repackage rewrites a Go file from belonging to package main to belonging to
// the given package.
func Repackage(inFile, outFile, pkg string) {
src, err := ioutil.ReadFile(inFile)
if err != nil {
log.Fatalf("reading %s: %v", inFile, err)
}
const toDelete = "package main\n\n"
i := bytes.Index(src, []byte(toDelete))
if i < 0 {
log.Fatalf("Could not find %q in %s.", toDelete, inFile)
}
w := &bytes.Buffer{}
w.Write(src[i+len(toDelete):])
WriteGoFile(outFile, pkg, w.Bytes())
}

27
vendor/golang.org/x/text/internal/triegen/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

58
vendor/golang.org/x/text/internal/triegen/compact.go generated vendored Normal file
View File

@@ -0,0 +1,58 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package triegen
// This file defines Compacter and its implementations.
import "io"
// A Compacter generates an alternative, more space-efficient way to store a
// trie value block. A trie value block holds all possible values for the last
// byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block
// always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0).
type Compacter interface {
// Size returns whether the Compacter could encode the given block as well
// as its size in case it can. len(v) is always 64.
Size(v []uint64) (sz int, ok bool)
// Store stores the block using the Compacter's compression method.
// It returns a handle with which the block can be retrieved.
// len(v) is always 64.
Store(v []uint64) uint32
// Print writes the data structures associated to the given store to w.
Print(w io.Writer) error
// Handler returns the name of a function that gets called during trie
// lookup for blocks generated by the Compacter. The function should be of
// the form func (n uint32, b byte) uint64, where n is the index returned by
// the Compacter's Store method and b is the last byte of the UTF-8
// encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the
// block.
Handler() string
}
// simpleCompacter is the default Compacter used by builder. It implements a
// normal trie block.
type simpleCompacter builder
func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) {
return blockSize * b.ValueSize, true
}
func (b *simpleCompacter) Store(v []uint64) uint32 {
h := uint32(len(b.ValueBlocks) - blockOffset)
b.ValueBlocks = append(b.ValueBlocks, v)
return h
}
func (b *simpleCompacter) Print(io.Writer) error {
// Structures are printed in print.go.
return nil
}
func (b *simpleCompacter) Handler() string {
panic("Handler should be special-cased for this Compacter")
}

251
vendor/golang.org/x/text/internal/triegen/print.go generated vendored Normal file
View File

@@ -0,0 +1,251 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package triegen
import (
"bytes"
"fmt"
"io"
"strings"
"text/template"
)
// print writes all the data structures as well as the code necessary to use the
// trie to w.
func (b *builder) print(w io.Writer) error {
b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize
b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize
b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize
b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize
b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize
// If we only have one root trie, all starter blocks are at position 0 and
// we can access the arrays directly.
if len(b.Trie) == 1 {
// At this point we cannot refer to the generated tables directly.
b.ASCIIBlock = b.Name + "Values"
b.StarterBlock = b.Name + "Index"
} else {
// Otherwise we need to have explicit starter indexes in the trie
// structure.
b.ASCIIBlock = "t.ascii"
b.StarterBlock = "t.utf8Start"
}
b.SourceType = "[]byte"
if err := lookupGen.Execute(w, b); err != nil {
return err
}
b.SourceType = "string"
if err := lookupGen.Execute(w, b); err != nil {
return err
}
if err := trieGen.Execute(w, b); err != nil {
return err
}
for _, c := range b.Compactions {
if err := c.c.Print(w); err != nil {
return err
}
}
return nil
}
func printValues(n int, values []uint64) string {
w := &bytes.Buffer{}
boff := n * blockSize
fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff)
var newline bool
for i, v := range values {
if i%6 == 0 {
newline = true
}
if v != 0 {
if newline {
fmt.Fprintf(w, "\n")
newline = false
}
fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v)
}
}
return w.String()
}
func printIndex(b *builder, nr int, n *node) string {
w := &bytes.Buffer{}
boff := nr * blockSize
fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff)
var newline bool
for i, c := range n.children {
if i%8 == 0 {
newline = true
}
if c != nil {
v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index)
if v != 0 {
if newline {
fmt.Fprintf(w, "\n")
newline = false
}
fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v)
}
}
}
return w.String()
}
var (
trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{
"printValues": printValues,
"printIndex": printIndex,
"title": strings.Title,
"dec": func(x int) int { return x - 1 },
"psize": func(n int) string {
return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024)
},
}).Parse(trieTemplate))
lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate))
)
// TODO: consider the return type of lookup. It could be uint64, even if the
// internal value type is smaller. We will have to verify this with the
// performance of unicode/norm, which is very sensitive to such changes.
const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}}
// {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}.
type {{.Name}}Trie struct { {{if $multi}}
ascii []{{.ValueType}} // index for ASCII bytes
utf8Start []{{.IndexType}} // index for UTF-8 bytes >= 0xC0
{{end}}}
func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}}
h := {{.Name}}TrieHandles[i]
return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] }
}
type {{.Name}}TrieHandle struct {
ascii, multi {{.IndexType}}
}
// {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes
var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{
{{range .Trie}} { {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}}
{{end}}}{{else}}
return &{{.Name}}Trie{}
}
{{end}}
// lookupValue determines the type of block n and looks up the value for b.
func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} {
switch { {{range $i, $c := .Compactions}}
{{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}}
n -= {{$c.Offset}}{{end}}
return {{print $b.ValueType}}({{$c.Handler}}){{end}}
}
}
// {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes
// The third block is the zero block.
var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} {
{{range $i, $v := .ValueBlocks}}{{printValues $i $v}}
{{end}}}
// {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes
// Block 0 is the zero block.
var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} {
{{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}}
{{end}}}
`
// TODO: consider allowing zero-length strings after evaluating performance with
// unicode/norm.
const lookupTemplate = `
// lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return {{.ASCIIBlock}}[c0], 1
case c0 < 0xC2:
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return 0, 0
}
i := {{.StarterBlock}}[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c1), 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return 0, 0
}
i := {{.StarterBlock}}[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = {{.Name}}Index[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c2), 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return 0, 0
}
i := {{.StarterBlock}}[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = {{.Name}}Index[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
o = uint32(i)<<6 + uint32(c2)
i = {{.Name}}Index[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return 0, 3 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c3), 4
}
// Illegal rune
return 0, 1
}
// lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s.
// s must start with a full and valid UTF-8 encoded rune.
func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} {
c0 := s[0]
if c0 < 0x80 { // is ASCII
return {{.ASCIIBlock}}[c0]
}
i := {{.StarterBlock}}[c0]
if c0 < 0xE0 { // 2-byte UTF-8
return t.lookupValue(uint32(i), s[1])
}
i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])]
if c0 < 0xF0 { // 3-byte UTF-8
return t.lookupValue(uint32(i), s[2])
}
i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])]
if c0 < 0xF8 { // 4-byte UTF-8
return t.lookupValue(uint32(i), s[3])
}
return 0
}
`

494
vendor/golang.org/x/text/internal/triegen/triegen.go generated vendored Normal file
View File

@@ -0,0 +1,494 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package triegen implements a code generator for a trie for associating
// unsigned integer values with UTF-8 encoded runes.
//
// Many of the go.text packages use tries for storing per-rune information. A
// trie is especially useful if many of the runes have the same value. If this
// is the case, many blocks can be expected to be shared allowing for
// information on many runes to be stored in little space.
//
// As most of the lookups are done directly on []byte slices, the tries use the
// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to
// runes and contributes a little bit to better performance. It also naturally
// provides a fast path for ASCII.
//
// Space is also an issue. There are many code points defined in Unicode and as
// a result tables can get quite large. So every byte counts. The triegen
// package automatically chooses the smallest integer values to represent the
// tables. Compacters allow further compression of the trie by allowing for
// alternative representations of individual trie blocks.
//
// triegen allows generating multiple tries as a single structure. This is
// useful when, for example, one wants to generate tries for several languages
// that have a lot of values in common. Some existing libraries for
// internationalization store all per-language data as a dynamically loadable
// chunk. The go.text packages are designed with the assumption that the user
// typically wants to compile in support for all supported languages, in line
// with the approach common to Go to create a single standalone binary. The
// multi-root trie approach can give significant storage savings in this
// scenario.
//
// triegen generates both tables and code. The code is optimized to use the
// automatically chosen data types. The following code is generated for a Trie
// or multiple Tries named "foo":
// - type fooTrie
// The trie type.
//
// - func newFooTrie(x int) *fooTrie
// Trie constructor, where x is the index of the trie passed to Gen.
//
// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int)
// The lookup method, where uintX is automatically chosen.
//
// - func lookupString, lookupUnsafe and lookupStringUnsafe
// Variants of the above.
//
// - var fooValues and fooIndex and any tables generated by Compacters.
// The core trie data.
//
// - var fooTrieHandles
// Indexes of starter blocks in case of multiple trie roots.
//
// It is recommended that users test the generated trie by checking the returned
// value for every rune. Such exhaustive tests are possible as the the number of
// runes in Unicode is limited.
package triegen // import "golang.org/x/text/internal/triegen"
// TODO: Arguably, the internally optimized data types would not have to be
// exposed in the generated API. We could also investigate not generating the
// code, but using it through a package. We would have to investigate the impact
// on performance of making such change, though. For packages like unicode/norm,
// small changes like this could tank performance.
import (
"encoding/binary"
"fmt"
"hash/crc64"
"io"
"log"
"unicode/utf8"
)
// builder builds a set of tries for associating values with runes. The set of
// tries can share common index and value blocks.
type builder struct {
Name string
// ValueType is the type of the trie values looked up.
ValueType string
// ValueSize is the byte size of the ValueType.
ValueSize int
// IndexType is the type of trie index values used for all UTF-8 bytes of
// a rune except the last one.
IndexType string
// IndexSize is the byte size of the IndexType.
IndexSize int
// SourceType is used when generating the lookup functions. If the user
// requests StringSupport, all lookup functions will be generated for
// string input as well.
SourceType string
Trie []*Trie
IndexBlocks []*node
ValueBlocks [][]uint64
Compactions []compaction
Checksum uint64
ASCIIBlock string
StarterBlock string
indexBlockIdx map[uint64]int
valueBlockIdx map[uint64]nodeIndex
asciiBlockIdx map[uint64]int
// Stats are used to fill out the template.
Stats struct {
NValueEntries int
NValueBytes int
NIndexEntries int
NIndexBytes int
NHandleBytes int
}
err error
}
// A nodeIndex encodes the index of a node, which is defined by the compaction
// which stores it and an index within the compaction. For internal nodes, the
// compaction is always 0.
type nodeIndex struct {
compaction int
index int
}
// compaction keeps track of stats used for the compaction.
type compaction struct {
c Compacter
blocks []*node
maxHandle uint32
totalSize int
// Used by template-based generator and thus exported.
Cutoff uint32
Offset uint32
Handler string
}
func (b *builder) setError(err error) {
if b.err == nil {
b.err = err
}
}
// An Option can be passed to Gen.
type Option func(b *builder) error
// Compact configures the trie generator to use the given Compacter.
func Compact(c Compacter) Option {
return func(b *builder) error {
b.Compactions = append(b.Compactions, compaction{
c: c,
Handler: c.Handler() + "(n, b)"})
return nil
}
}
// Gen writes Go code for a shared trie lookup structure to w for the given
// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will
// return the *nameTrie for tries[x]. A value can be looked up by using one of
// the various lookup methods defined on nameTrie. It returns the table size of
// the generated trie.
func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) {
// The index contains two dummy blocks, followed by the zero block. The zero
// block is at offset 0x80, so that the offset for the zero block for
// continuation bytes is 0.
b := &builder{
Name: name,
Trie: tries,
IndexBlocks: []*node{{}, {}, {}},
Compactions: []compaction{{
Handler: name + "Values[n<<6+uint32(b)]",
}},
// The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero
// block.
indexBlockIdx: map[uint64]int{0: 0},
valueBlockIdx: map[uint64]nodeIndex{0: {}},
asciiBlockIdx: map[uint64]int{},
}
b.Compactions[0].c = (*simpleCompacter)(b)
for _, f := range opts {
if err := f(b); err != nil {
return 0, err
}
}
b.build()
if b.err != nil {
return 0, b.err
}
if err = b.print(w); err != nil {
return 0, err
}
return b.Size(), nil
}
// A Trie represents a single root node of a trie. A builder may build several
// overlapping tries at once.
type Trie struct {
root *node
hiddenTrie
}
// hiddenTrie contains values we want to be visible to the template generator,
// but hidden from the API documentation.
type hiddenTrie struct {
Name string
Checksum uint64
ASCIIIndex int
StarterIndex int
}
// NewTrie returns a new trie root.
func NewTrie(name string) *Trie {
return &Trie{
&node{
children: make([]*node, blockSize),
values: make([]uint64, utf8.RuneSelf),
},
hiddenTrie{Name: name},
}
}
// Gen is a convenience wrapper around the Gen func passing t as the only trie
// and uses the name passed to NewTrie. It returns the size of the generated
// tables.
func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) {
return Gen(w, t.Name, []*Trie{t}, opts...)
}
// node is a node of the intermediate trie structure.
type node struct {
// children holds this node's children. It is always of length 64.
// A child node may be nil.
children []*node
// values contains the values of this node. If it is non-nil, this node is
// either a root or leaf node:
// For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F].
// For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF].
values []uint64
index nodeIndex
}
// Insert associates value with the given rune. Insert will panic if a non-zero
// value is passed for an invalid rune.
func (t *Trie) Insert(r rune, value uint64) {
if value == 0 {
return
}
s := string(r)
if []rune(s)[0] != r && value != 0 {
// Note: The UCD tables will always assign what amounts to a zero value
// to a surrogate. Allowing a zero value for an illegal rune allows
// users to iterate over [0..MaxRune] without having to explicitly
// exclude surrogates, which would be tedious.
panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r))
}
if len(s) == 1 {
// It is a root node value (ASCII).
t.root.values[s[0]] = value
return
}
n := t.root
for ; len(s) > 1; s = s[1:] {
if n.children == nil {
n.children = make([]*node, blockSize)
}
p := s[0] % blockSize
c := n.children[p]
if c == nil {
c = &node{}
n.children[p] = c
}
if len(s) > 2 && c.values != nil {
log.Fatalf("triegen: insert(%U): found internal node with values", r)
}
n = c
}
if n.values == nil {
n.values = make([]uint64, blockSize)
}
if n.children != nil {
log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r)
}
n.values[s[0]-0x80] = value
}
// Size returns the number of bytes the generated trie will take to store. It
// needs to be exported as it is used in the templates.
func (b *builder) Size() int {
// Index blocks.
sz := len(b.IndexBlocks) * blockSize * b.IndexSize
// Skip the first compaction, which represents the normal value blocks, as
// its totalSize does not account for the ASCII blocks, which are managed
// separately.
sz += len(b.ValueBlocks) * blockSize * b.ValueSize
for _, c := range b.Compactions[1:] {
sz += c.totalSize
}
// TODO: this computation does not account for the fixed overhead of a using
// a compaction, either code or data. As for data, though, the typical
// overhead of data is in the order of bytes (2 bytes for cases). Further,
// the savings of using a compaction should anyway be substantial for it to
// be worth it.
// For multi-root tries, we also need to account for the handles.
if len(b.Trie) > 1 {
sz += 2 * b.IndexSize * len(b.Trie)
}
return sz
}
func (b *builder) build() {
// Compute the sizes of the values.
var vmax uint64
for _, t := range b.Trie {
vmax = maxValue(t.root, vmax)
}
b.ValueType, b.ValueSize = getIntType(vmax)
// Compute all block allocations.
// TODO: first compute the ASCII blocks for all tries and then the other
// nodes. ASCII blocks are more restricted in placement, as they require two
// blocks to be placed consecutively. Processing them first may improve
// sharing (at least one zero block can be expected to be saved.)
for _, t := range b.Trie {
b.Checksum += b.buildTrie(t)
}
// Compute the offsets for all the Compacters.
offset := uint32(0)
for i := range b.Compactions {
c := &b.Compactions[i]
c.Offset = offset
offset += c.maxHandle + 1
c.Cutoff = offset
}
// Compute the sizes of indexes.
// TODO: different byte positions could have different sizes. So far we have
// not found a case where this is beneficial.
imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff)
for _, ib := range b.IndexBlocks {
if x := uint64(ib.index.index); x > imax {
imax = x
}
}
b.IndexType, b.IndexSize = getIntType(imax)
}
func maxValue(n *node, max uint64) uint64 {
if n == nil {
return max
}
for _, c := range n.children {
max = maxValue(c, max)
}
for _, v := range n.values {
if max < v {
max = v
}
}
return max
}
func getIntType(v uint64) (string, int) {
switch {
case v < 1<<8:
return "uint8", 1
case v < 1<<16:
return "uint16", 2
case v < 1<<32:
return "uint32", 4
}
return "uint64", 8
}
const (
blockSize = 64
// Subtract two blocks to offset 0x80, the first continuation byte.
blockOffset = 2
// Subtract three blocks to offset 0xC0, the first non-ASCII starter.
rootBlockOffset = 3
)
var crcTable = crc64.MakeTable(crc64.ISO)
func (b *builder) buildTrie(t *Trie) uint64 {
n := t.root
// Get the ASCII offset. For the first trie, the ASCII block will be at
// position 0.
hasher := crc64.New(crcTable)
binary.Write(hasher, binary.BigEndian, n.values)
hash := hasher.Sum64()
v, ok := b.asciiBlockIdx[hash]
if !ok {
v = len(b.ValueBlocks)
b.asciiBlockIdx[hash] = v
b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:])
if v == 0 {
// Add the zero block at position 2 so that it will be assigned a
// zero reference in the lookup blocks.
// TODO: always do this? This would allow us to remove a check from
// the trie lookup, but at the expense of extra space. Analyze
// performance for unicode/norm.
b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize))
}
}
t.ASCIIIndex = v
// Compute remaining offsets.
t.Checksum = b.computeOffsets(n, true)
// We already subtracted the normal blockOffset from the index. Subtract the
// difference for starter bytes.
t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset)
return t.Checksum
}
func (b *builder) computeOffsets(n *node, root bool) uint64 {
// For the first trie, the root lookup block will be at position 3, which is
// the offset for UTF-8 non-ASCII starter bytes.
first := len(b.IndexBlocks) == rootBlockOffset
if first {
b.IndexBlocks = append(b.IndexBlocks, n)
}
// We special-case the cases where all values recursively are 0. This allows
// for the use of a zero block to which all such values can be directed.
hash := uint64(0)
if n.children != nil || n.values != nil {
hasher := crc64.New(crcTable)
for _, c := range n.children {
var v uint64
if c != nil {
v = b.computeOffsets(c, false)
}
binary.Write(hasher, binary.BigEndian, v)
}
binary.Write(hasher, binary.BigEndian, n.values)
hash = hasher.Sum64()
}
if first {
b.indexBlockIdx[hash] = rootBlockOffset - blockOffset
}
// Compacters don't apply to internal nodes.
if n.children != nil {
v, ok := b.indexBlockIdx[hash]
if !ok {
v = len(b.IndexBlocks) - blockOffset
b.IndexBlocks = append(b.IndexBlocks, n)
b.indexBlockIdx[hash] = v
}
n.index = nodeIndex{0, v}
} else {
h, ok := b.valueBlockIdx[hash]
if !ok {
bestI, bestSize := 0, blockSize*b.ValueSize
for i, c := range b.Compactions[1:] {
if sz, ok := c.c.Size(n.values); ok && bestSize > sz {
bestI, bestSize = i+1, sz
}
}
c := &b.Compactions[bestI]
c.totalSize += bestSize
v := c.c.Store(n.values)
if c.maxHandle < v {
c.maxHandle = v
}
h = nodeIndex{bestI, int(v)}
b.valueBlockIdx[hash] = h
}
n.index = h
}
return hash
}

27
vendor/golang.org/x/text/internal/ucd/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

376
vendor/golang.org/x/text/internal/ucd/ucd.go generated vendored Normal file
View File

@@ -0,0 +1,376 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package ucd provides a parser for Unicode Character Database files, the
// format of which is defined in http://www.unicode.org/reports/tr44/. See
// http://www.unicode.org/Public/UCD/latest/ucd/ for example files.
//
// It currently does not support substitutions of missing fields.
package ucd // import "golang.org/x/text/internal/ucd"
import (
"bufio"
"bytes"
"errors"
"io"
"log"
"regexp"
"strconv"
"strings"
)
// UnicodeData.txt fields.
const (
CodePoint = iota
Name
GeneralCategory
CanonicalCombiningClass
BidiClass
DecompMapping
DecimalValue
DigitValue
NumericValue
BidiMirrored
Unicode1Name
ISOComment
SimpleUppercaseMapping
SimpleLowercaseMapping
SimpleTitlecaseMapping
)
// Parse calls f for each entry in the given reader of a UCD file. It will close
// the reader upon return. It will call log.Fatal if any error occurred.
//
// This implements the most common usage pattern of using Parser.
func Parse(r io.ReadCloser, f func(p *Parser)) {
defer r.Close()
p := New(r)
for p.Next() {
f(p)
}
if err := p.Err(); err != nil {
r.Close() // os.Exit will cause defers not to be called.
log.Fatal(err)
}
}
// An Option is used to configure a Parser.
type Option func(p *Parser)
func keepRanges(p *Parser) {
p.keepRanges = true
}
var (
// KeepRanges prevents the expansion of ranges. The raw ranges can be
// obtained by calling Range(0) on the parser.
KeepRanges Option = keepRanges
)
// The Part option register a handler for lines starting with a '@'. The text
// after a '@' is available as the first field. Comments are handled as usual.
func Part(f func(p *Parser)) Option {
return func(p *Parser) {
p.partHandler = f
}
}
// The CommentHandler option passes comments that are on a line by itself to
// a given handler.
func CommentHandler(f func(s string)) Option {
return func(p *Parser) {
p.commentHandler = f
}
}
// A Parser parses Unicode Character Database (UCD) files.
type Parser struct {
scanner *bufio.Scanner
keepRanges bool // Don't expand rune ranges in field 0.
err error
comment []byte
field [][]byte
// parsedRange is needed in case Range(0) is called more than once for one
// field. In some cases this requires scanning ahead.
parsedRange bool
rangeStart, rangeEnd rune
partHandler func(p *Parser)
commentHandler func(s string)
}
func (p *Parser) setError(err error) {
if p.err == nil {
p.err = err
}
}
func (p *Parser) getField(i int) []byte {
if i >= len(p.field) {
return nil
}
return p.field[i]
}
// Err returns a non-nil error if any error occurred during parsing.
func (p *Parser) Err() error {
return p.err
}
// New returns a Parser for the given Reader.
func New(r io.Reader, o ...Option) *Parser {
p := &Parser{
scanner: bufio.NewScanner(r),
}
for _, f := range o {
f(p)
}
return p
}
// Next parses the next line in the file. It returns true if a line was parsed
// and false if it reached the end of the file.
func (p *Parser) Next() bool {
if !p.keepRanges && p.rangeStart < p.rangeEnd {
p.rangeStart++
return true
}
p.comment = nil
p.field = p.field[:0]
p.parsedRange = false
for p.scanner.Scan() {
b := p.scanner.Bytes()
if len(b) == 0 {
continue
}
if b[0] == '#' {
if p.commentHandler != nil {
p.commentHandler(strings.TrimSpace(string(b[1:])))
}
continue
}
// Parse line
if i := bytes.IndexByte(b, '#'); i != -1 {
p.comment = bytes.TrimSpace(b[i+1:])
b = b[:i]
}
if b[0] == '@' {
if p.partHandler != nil {
p.field = append(p.field, bytes.TrimSpace(b[1:]))
p.partHandler(p)
p.field = p.field[:0]
}
p.comment = nil
continue
}
for {
i := bytes.IndexByte(b, ';')
if i == -1 {
p.field = append(p.field, bytes.TrimSpace(b))
break
}
p.field = append(p.field, bytes.TrimSpace(b[:i]))
b = b[i+1:]
}
if !p.keepRanges {
p.rangeStart, p.rangeEnd = p.getRange(0)
}
return true
}
p.setError(p.scanner.Err())
return false
}
func parseRune(b []byte) (rune, error) {
if len(b) > 2 && b[0] == 'U' && b[1] == '+' {
b = b[2:]
}
x, err := strconv.ParseUint(string(b), 16, 32)
return rune(x), err
}
func (p *Parser) parseRune(b []byte) rune {
x, err := parseRune(b)
p.setError(err)
return x
}
// Rune parses and returns field i as a rune.
func (p *Parser) Rune(i int) rune {
if i > 0 || p.keepRanges {
return p.parseRune(p.getField(i))
}
return p.rangeStart
}
// Runes interprets and returns field i as a sequence of runes.
func (p *Parser) Runes(i int) (runes []rune) {
add := func(b []byte) {
if b = bytes.TrimSpace(b); len(b) > 0 {
runes = append(runes, p.parseRune(b))
}
}
for b := p.getField(i); ; {
i := bytes.IndexByte(b, ' ')
if i == -1 {
add(b)
break
}
add(b[:i])
b = b[i+1:]
}
return
}
var (
errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>")
// reRange matches one line of a legacy rune range.
reRange = regexp.MustCompile("^([0-9A-F]*);<([^,]*), ([^>]*)>(.*)$")
)
// Range parses and returns field i as a rune range. A range is inclusive at
// both ends. If the field only has one rune, first and last will be identical.
// It supports the legacy format for ranges used in UnicodeData.txt.
func (p *Parser) Range(i int) (first, last rune) {
if !p.keepRanges {
return p.rangeStart, p.rangeStart
}
return p.getRange(i)
}
func (p *Parser) getRange(i int) (first, last rune) {
b := p.getField(i)
if k := bytes.Index(b, []byte("..")); k != -1 {
return p.parseRune(b[:k]), p.parseRune(b[k+2:])
}
// The first field may not be a rune, in which case we may ignore any error
// and set the range as 0..0.
x, err := parseRune(b)
if err != nil {
// Disable range parsing henceforth. This ensures that an error will be
// returned if the user subsequently will try to parse this field as
// a Rune.
p.keepRanges = true
}
// Special case for UnicodeData that was retained for backwards compatibility.
if i == 0 && len(p.field) > 1 && bytes.HasSuffix(p.field[1], []byte("First>")) {
if p.parsedRange {
return p.rangeStart, p.rangeEnd
}
mf := reRange.FindStringSubmatch(p.scanner.Text())
if mf == nil || !p.scanner.Scan() {
p.setError(errIncorrectLegacyRange)
return x, x
}
// Using Bytes would be more efficient here, but Text is a lot easier
// and this is not a frequent case.
ml := reRange.FindStringSubmatch(p.scanner.Text())
if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] {
p.setError(errIncorrectLegacyRange)
return x, x
}
p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Bytes()[:len(ml[1])])
p.parsedRange = true
return p.rangeStart, p.rangeEnd
}
return x, x
}
// bools recognizes all valid UCD boolean values.
var bools = map[string]bool{
"": false,
"N": false,
"No": false,
"F": false,
"False": false,
"Y": true,
"Yes": true,
"T": true,
"True": true,
}
// Bool parses and returns field i as a boolean value.
func (p *Parser) Bool(i int) bool {
b := p.getField(i)
for s, v := range bools {
if bstrEq(b, s) {
return v
}
}
p.setError(strconv.ErrSyntax)
return false
}
// Int parses and returns field i as an integer value.
func (p *Parser) Int(i int) int {
x, err := strconv.ParseInt(string(p.getField(i)), 10, 64)
p.setError(err)
return int(x)
}
// Uint parses and returns field i as an unsigned integer value.
func (p *Parser) Uint(i int) uint {
x, err := strconv.ParseUint(string(p.getField(i)), 10, 64)
p.setError(err)
return uint(x)
}
// Float parses and returns field i as a decimal value.
func (p *Parser) Float(i int) float64 {
x, err := strconv.ParseFloat(string(p.getField(i)), 64)
p.setError(err)
return x
}
// String parses and returns field i as a string value.
func (p *Parser) String(i int) string {
return string(p.getField(i))
}
// Strings parses and returns field i as a space-separated list of strings.
func (p *Parser) Strings(i int) []string {
ss := strings.Split(string(p.getField(i)), " ")
for i, s := range ss {
ss[i] = strings.TrimSpace(s)
}
return ss
}
// Comment returns the comments for the current line.
func (p *Parser) Comment() string {
return string(p.comment)
}
var errUndefinedEnum = errors.New("ucd: undefined enum value")
// Enum interprets and returns field i as a value that must be one of the values
// in enum.
func (p *Parser) Enum(i int, enum ...string) string {
b := p.getField(i)
for _, s := range enum {
if bstrEq(b, s) {
return s
}
}
p.setError(errUndefinedEnum)
return ""
}
func bstrEq(b []byte, s string) bool {
if len(b) != len(s) {
return false
}
for i, c := range b {
if c != s[i] {
return false
}
}
return true
}

27
vendor/golang.org/x/text/transform/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -1,37 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package transform_test
import (
"fmt"
"unicode"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
func ExampleRemoveFunc() {
input := []byte(`tschüß; до свидания`)
b := make([]byte, len(input))
t := transform.RemoveFunc(unicode.IsSpace)
n, _, _ := t.Transform(b, input, true)
fmt.Println(string(b[:n]))
t = transform.RemoveFunc(func(r rune) bool {
return !unicode.Is(unicode.Latin, r)
})
n, _, _ = t.Transform(b, input, true)
fmt.Println(string(b[:n]))
n, _, _ = t.Transform(b, norm.NFD.Bytes(input), true)
fmt.Println(string(b[:n]))
// Output:
// tschüß;досвидания
// tschüß
// tschuß
}

View File

@@ -24,6 +24,10 @@ var (
// complete the transformation.
ErrShortSrc = errors.New("transform: short source buffer")
// ErrEndOfSpan means that the input and output (the transformed input)
// are not identical.
ErrEndOfSpan = errors.New("transform: input and output are not identical")
// errInconsistentByteCount means that Transform returned success (nil
// error) but also returned nSrc inconsistent with the src argument.
errInconsistentByteCount = errors.New("transform: inconsistent byte count returned")
@@ -60,6 +64,41 @@ type Transformer interface {
Reset()
}
// SpanningTransformer extends the Transformer interface with a Span method
// that determines how much of the input already conforms to the Transformer.
type SpanningTransformer interface {
Transformer
// Span returns a position in src such that transforming src[:n] results in
// identical output src[:n] for these bytes. It does not necessarily return
// the largest such n. The atEOF argument tells whether src represents the
// last bytes of the input.
//
// Callers should always account for the n bytes consumed before
// considering the error err.
//
// A nil error means that all input bytes are known to be identical to the
// output produced by the Transformer. A nil error can be be returned
// regardless of whether atEOF is true. If err is nil, then then n must
// equal len(src); the converse is not necessarily true.
//
// ErrEndOfSpan means that the Transformer output may differ from the
// input after n bytes. Note that n may be len(src), meaning that the output
// would contain additional bytes after otherwise identical output.
// ErrShortSrc means that src had insufficient data to determine whether the
// remaining bytes would change. Other than the error conditions listed
// here, implementations are free to report other errors that arise.
//
// Calling Span can modify the Transformer state as a side effect. In
// effect, it does the transformation just as calling Transform would, only
// without copying to a destination buffer and only up to a point it can
// determine the input and output bytes are the same. This is obviously more
// limited than calling Transform, but can be more efficient in terms of
// copying and allocating buffers. Calls to Span and Transform may be
// interleaved.
Span(src []byte, atEOF bool) (n int, err error)
}
// NopResetter can be embedded by implementations of Transformer to add a nop
// Reset method.
type NopResetter struct{}
@@ -207,7 +246,9 @@ func (w *Writer) Write(data []byte) (n int, err error) {
return n, werr
}
src = src[nSrc:]
if w.n > 0 && len(src) <= n {
if w.n == 0 {
n += nSrc
} else if len(src) <= n {
// Enough bytes from w.src have been consumed. We make src point
// to data instead to reduce the copying.
w.n = 0
@@ -216,35 +257,46 @@ func (w *Writer) Write(data []byte) (n int, err error) {
if n < len(data) && (err == nil || err == ErrShortSrc) {
continue
}
} else {
n += nSrc
}
switch {
case err == ErrShortDst && (nDst > 0 || nSrc > 0):
case err == ErrShortSrc && len(src) < len(w.src):
m := copy(w.src, src)
// If w.n > 0, bytes from data were already copied to w.src and n
// was already set to the number of bytes consumed.
if w.n == 0 {
n += m
switch err {
case ErrShortDst:
// This error is okay as long as we are making progress.
if nDst > 0 || nSrc > 0 {
continue
}
case ErrShortSrc:
if len(src) < len(w.src) {
m := copy(w.src, src)
// If w.n > 0, bytes from data were already copied to w.src and n
// was already set to the number of bytes consumed.
if w.n == 0 {
n += m
}
w.n = m
err = nil
} else if nDst > 0 || nSrc > 0 {
// Not enough buffer to store the remainder. Keep processing as
// long as there is progress. Without this case, transforms that
// require a lookahead larger than the buffer may result in an
// error. This is not something one may expect to be common in
// practice, but it may occur when buffers are set to small
// sizes during testing.
continue
}
case nil:
if w.n > 0 {
err = errInconsistentByteCount
}
w.n = m
return n, nil
case err == nil && w.n > 0:
return n, errInconsistentByteCount
default:
return n, err
}
return n, err
}
}
// Close implements the io.Closer interface.
func (w *Writer) Close() error {
for src := w.src[:w.n]; len(src) > 0; {
src := w.src[:w.n]
for {
nDst, nSrc, err := w.t.Transform(w.dst, src, true)
if nDst == 0 {
return err
}
if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
return werr
}
@@ -253,7 +305,6 @@ func (w *Writer) Close() error {
}
src = src[nSrc:]
}
return nil
}
type nop struct{ NopResetter }
@@ -266,6 +317,10 @@ func (nop) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return n, n, err
}
func (nop) Span(src []byte, atEOF bool) (n int, err error) {
return len(src), nil
}
type discard struct{ NopResetter }
func (discard) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@@ -277,8 +332,8 @@ var (
// by consuming all bytes and writing nothing.
Discard Transformer = discard{}
// Nop is a Transformer that copies src to dst.
Nop Transformer = nop{}
// Nop is a SpanningTransformer that copies src to dst.
Nop SpanningTransformer = nop{}
)
// chain is a sequence of links. A chain with N Transformers has N+1 links and
@@ -346,6 +401,8 @@ func (c *chain) Reset() {
}
}
// TODO: make chain use Span (is going to be fun to implement!)
// Transform applies the transformers of c in sequence.
func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
// Set up src and dst in the chain.
@@ -436,8 +493,7 @@ func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err erro
return dstL.n, srcL.p, err
}
// RemoveFunc returns a Transformer that removes from the input all runes r for
// which f(r) is true. Illegal bytes in the input are replaced by RuneError.
// Deprecated: use runes.Remove instead.
func RemoveFunc(f func(r rune) bool) Transformer {
return removeF(f)
}
@@ -493,7 +549,9 @@ func (t removeF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err err
// of b to the start of the new slice.
func grow(b []byte, n int) []byte {
m := len(b)
if m <= 256 {
if m <= 32 {
m = 64
} else if m <= 256 {
m *= 2
} else {
m += m >> 1
@@ -508,11 +566,14 @@ const initialBufSize = 128
// String returns a string with the result of converting s[:n] using t, where
// n <= len(s). If err == nil, n will be len(s). It calls Reset on t.
func String(t Transformer, s string) (result string, n int, err error) {
if s == "" {
return "", 0, nil
}
t.Reset()
if s == "" {
// Fast path for the common case for empty input. Results in about a
// 86% reduction of running time for BenchmarkStringLowerEmpty.
if _, _, err := t.Transform(nil, nil, true); err == nil {
return "", 0, nil
}
}
// Allocate only once. Note that both dst and src escape when passed to
// Transform.
@@ -520,74 +581,88 @@ func String(t Transformer, s string) (result string, n int, err error) {
dst := buf[:initialBufSize:initialBufSize]
src := buf[initialBufSize : 2*initialBufSize]
// Avoid allocation if the transformed string is identical to the original.
// After this loop, pDst will point to the furthest point in s for which it
// could be detected that t gives equal results, src[:nSrc] will
// indicated the last processed chunk of s for which the output is not equal
// and dst[:nDst] will be the transform of this chunk.
var nDst, nSrc int
pDst := 0 // Used as index in both src and dst in this loop.
// The input string s is transformed in multiple chunks (starting with a
// chunk size of initialBufSize). nDst and nSrc are per-chunk (or
// per-Transform-call) indexes, pDst and pSrc are overall indexes.
nDst, nSrc := 0, 0
pDst, pSrc := 0, 0
// pPrefix is the length of a common prefix: the first pPrefix bytes of the
// result will equal the first pPrefix bytes of s. It is not guaranteed to
// be the largest such value, but if pPrefix, len(result) and len(s) are
// all equal after the final transform (i.e. calling Transform with atEOF
// being true returned nil error) then we don't need to allocate a new
// result string.
pPrefix := 0
for {
n := copy(src, s[pDst:])
nDst, nSrc, err = t.Transform(dst, src[:n], pDst+n == len(s))
// Invariant: pDst == pPrefix && pSrc == pPrefix.
// Note 1: we will not enter the loop with pDst == len(s) and we will
// not end the loop with it either. So if nSrc is 0, this means there is
// some kind of error from which we cannot recover given the current
// buffer sizes. We will give up in this case.
// Note 2: it is not entirely correct to simply do a bytes.Equal as
// a Transformer may buffer internally. It will work in most cases,
// though, and no harm is done if it doesn't work.
// TODO: let transformers implement an optional Spanner interface, akin
// to norm's QuickSpan. This would even allow us to avoid any allocation.
if nSrc == 0 || !bytes.Equal(dst[:nDst], src[:nSrc]) {
break
}
if pDst += nDst; pDst == len(s) {
return s, pDst, nil
}
}
// Move the bytes seen so far to dst.
pSrc := pDst + nSrc
if pDst+nDst <= initialBufSize {
copy(dst[pDst:], dst[:nDst])
} else {
b := make([]byte, len(s)+nDst-nSrc)
copy(b[pDst:], dst[:nDst])
dst = b
}
copy(dst, s[:pDst])
pDst += nDst
if err != nil && err != ErrShortDst && err != ErrShortSrc {
return string(dst[:pDst]), pSrc, err
}
// Complete the string with the remainder.
for {
n := copy(src, s[pSrc:])
nDst, nSrc, err = t.Transform(dst[pDst:], src[:n], pSrc+n == len(s))
nDst, nSrc, err = t.Transform(dst, src[:n], pSrc+n == len(s))
pDst += nDst
pSrc += nSrc
switch err {
case nil:
if pSrc == len(s) {
return string(dst[:pDst]), pSrc, nil
// TODO: let transformers implement an optional Spanner interface, akin
// to norm's QuickSpan. This would even allow us to avoid any allocation.
if !bytes.Equal(dst[:nDst], src[:nSrc]) {
break
}
pPrefix = pSrc
if err == ErrShortDst {
// A buffer can only be short if a transformer modifies its input.
break
} else if err == ErrShortSrc {
if nSrc == 0 {
// No progress was made.
break
}
case ErrShortDst:
// Do not grow as long as we can make progress. This may avoid
// excessive allocations.
// Equal so far and !atEOF, so continue checking.
} else if err != nil || pPrefix == len(s) {
return string(s[:pPrefix]), pPrefix, err
}
}
// Post-condition: pDst == pPrefix + nDst && pSrc == pPrefix + nSrc.
// We have transformed the first pSrc bytes of the input s to become pDst
// transformed bytes. Those transformed bytes are discontiguous: the first
// pPrefix of them equal s[:pPrefix] and the last nDst of them equal
// dst[:nDst]. We copy them around, into a new dst buffer if necessary, so
// that they become one contiguous slice: dst[:pDst].
if pPrefix != 0 {
newDst := dst
if pDst > len(newDst) {
newDst = make([]byte, len(s)+nDst-nSrc)
}
copy(newDst[pPrefix:pDst], dst[:nDst])
copy(newDst[:pPrefix], s[:pPrefix])
dst = newDst
}
// Prevent duplicate Transform calls with atEOF being true at the end of
// the input. Also return if we have an unrecoverable error.
if (err == nil && pSrc == len(s)) ||
(err != nil && err != ErrShortDst && err != ErrShortSrc) {
return string(dst[:pDst]), pSrc, err
}
// Transform the remaining input, growing dst and src buffers as necessary.
for {
n := copy(src, s[pSrc:])
nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], pSrc+n == len(s))
pDst += nDst
pSrc += nSrc
// If we got ErrShortDst or ErrShortSrc, do not grow as long as we can
// make progress. This may avoid excessive allocations.
if err == ErrShortDst {
if nDst == 0 {
dst = grow(dst, pDst)
}
case ErrShortSrc:
} else if err == ErrShortSrc {
if nSrc == 0 {
src = grow(src, 0)
}
default:
} else if err != nil || pSrc == len(s) {
return string(dst[:pDst]), pSrc, err
}
}

File diff suppressed because it is too large Load Diff

27
vendor/golang.org/x/text/unicode/cldr/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

100
vendor/golang.org/x/text/unicode/cldr/base.go generated vendored Normal file
View File

@@ -0,0 +1,100 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
import (
"encoding/xml"
"regexp"
"strconv"
)
// Elem is implemented by every XML element.
type Elem interface {
setEnclosing(Elem)
setName(string)
enclosing() Elem
GetCommon() *Common
}
type hidden struct {
CharData string `xml:",chardata"`
Alias *struct {
Common
Source string `xml:"source,attr"`
Path string `xml:"path,attr"`
} `xml:"alias"`
Def *struct {
Common
Choice string `xml:"choice,attr,omitempty"`
Type string `xml:"type,attr,omitempty"`
} `xml:"default"`
}
// Common holds several of the most common attributes and sub elements
// of an XML element.
type Common struct {
XMLName xml.Name
name string
enclElem Elem
Type string `xml:"type,attr,omitempty"`
Reference string `xml:"reference,attr,omitempty"`
Alt string `xml:"alt,attr,omitempty"`
ValidSubLocales string `xml:"validSubLocales,attr,omitempty"`
Draft string `xml:"draft,attr,omitempty"`
hidden
}
// Default returns the default type to select from the enclosed list
// or "" if no default value is specified.
func (e *Common) Default() string {
if e.Def == nil {
return ""
}
if e.Def.Choice != "" {
return e.Def.Choice
} else if e.Def.Type != "" {
// Type is still used by the default element in collation.
return e.Def.Type
}
return ""
}
// GetCommon returns e. It is provided such that Common implements Elem.
func (e *Common) GetCommon() *Common {
return e
}
// Data returns the character data accumulated for this element.
func (e *Common) Data() string {
e.CharData = charRe.ReplaceAllStringFunc(e.CharData, replaceUnicode)
return e.CharData
}
func (e *Common) setName(s string) {
e.name = s
}
func (e *Common) enclosing() Elem {
return e.enclElem
}
func (e *Common) setEnclosing(en Elem) {
e.enclElem = en
}
// Escape characters that can be escaped without further escaping the string.
var charRe = regexp.MustCompile(`&#x[0-9a-fA-F]*;|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\[abtnvfr]`)
// replaceUnicode converts hexadecimal Unicode codepoint notations to a one-rune string.
// It assumes the input string is correctly formatted.
func replaceUnicode(s string) string {
if s[1] == '#' {
r, _ := strconv.ParseInt(s[3:len(s)-1], 16, 32)
return string(r)
}
r, _, _, _ := strconv.UnquoteChar(s, 0)
return string(r)
}

130
vendor/golang.org/x/text/unicode/cldr/cldr.go generated vendored Normal file
View File

@@ -0,0 +1,130 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run makexml.go -output xml.go
// Package cldr provides a parser for LDML and related XML formats.
// This package is intended to be used by the table generation tools
// for the various internationalization-related packages.
// As the XML types are generated from the CLDR DTD, and as the CLDR standard
// is periodically amended, this package may change considerably over time.
// This mostly means that data may appear and disappear between versions.
// That is, old code should keep compiling for newer versions, but data
// may have moved or changed.
// CLDR version 22 is the first version supported by this package.
// Older versions may not work.
package cldr // import "golang.org/x/text/unicode/cldr"
import (
"fmt"
"sort"
)
// CLDR provides access to parsed data of the Unicode Common Locale Data Repository.
type CLDR struct {
parent map[string][]string
locale map[string]*LDML
resolved map[string]*LDML
bcp47 *LDMLBCP47
supp *SupplementalData
}
func makeCLDR() *CLDR {
return &CLDR{
parent: make(map[string][]string),
locale: make(map[string]*LDML),
resolved: make(map[string]*LDML),
bcp47: &LDMLBCP47{},
supp: &SupplementalData{},
}
}
// BCP47 returns the parsed BCP47 LDML data. If no such data was parsed, nil is returned.
func (cldr *CLDR) BCP47() *LDMLBCP47 {
return nil
}
// Draft indicates the draft level of an element.
type Draft int
const (
Approved Draft = iota
Contributed
Provisional
Unconfirmed
)
var drafts = []string{"unconfirmed", "provisional", "contributed", "approved", ""}
// ParseDraft returns the Draft value corresponding to the given string. The
// empty string corresponds to Approved.
func ParseDraft(level string) (Draft, error) {
if level == "" {
return Approved, nil
}
for i, s := range drafts {
if level == s {
return Unconfirmed - Draft(i), nil
}
}
return Approved, fmt.Errorf("cldr: unknown draft level %q", level)
}
func (d Draft) String() string {
return drafts[len(drafts)-1-int(d)]
}
// SetDraftLevel sets which draft levels to include in the evaluated LDML.
// Any draft element for which the draft level is higher than lev will be excluded.
// If multiple draft levels are available for a single element, the one with the
// lowest draft level will be selected, unless preferDraft is true, in which case
// the highest draft will be chosen.
// It is assumed that the underlying LDML is canonicalized.
func (cldr *CLDR) SetDraftLevel(lev Draft, preferDraft bool) {
// TODO: implement
cldr.resolved = make(map[string]*LDML)
}
// RawLDML returns the LDML XML for id in unresolved form.
// id must be one of the strings returned by Locales.
func (cldr *CLDR) RawLDML(loc string) *LDML {
return cldr.locale[loc]
}
// LDML returns the fully resolved LDML XML for loc, which must be one of
// the strings returned by Locales.
func (cldr *CLDR) LDML(loc string) (*LDML, error) {
return cldr.resolve(loc)
}
// Supplemental returns the parsed supplemental data. If no such data was parsed,
// nil is returned.
func (cldr *CLDR) Supplemental() *SupplementalData {
return cldr.supp
}
// Locales returns the locales for which there exist files.
// Valid sublocales for which there is no file are not included.
// The root locale is always sorted first.
func (cldr *CLDR) Locales() []string {
loc := []string{"root"}
hasRoot := false
for l, _ := range cldr.locale {
if l == "root" {
hasRoot = true
continue
}
loc = append(loc, l)
}
sort.Strings(loc[1:])
if !hasRoot {
return loc[1:]
}
return loc
}
// Get fills in the fields of x based on the XPath path.
func Get(e Elem, path string) (res Elem, err error) {
return walkXPath(e, path)
}

359
vendor/golang.org/x/text/unicode/cldr/collate.go generated vendored Normal file
View File

@@ -0,0 +1,359 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
import (
"bufio"
"encoding/xml"
"errors"
"fmt"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)
// RuleProcessor can be passed to Collator's Process method, which
// parses the rules and calls the respective method for each rule found.
type RuleProcessor interface {
Reset(anchor string, before int) error
Insert(level int, str, context, extend string) error
Index(id string)
}
const (
// cldrIndex is a Unicode-reserved sentinel value used to mark the start
// of a grouping within an index.
// We ignore any rule that starts with this rune.
// See http://unicode.org/reports/tr35/#Collation_Elements for details.
cldrIndex = "\uFDD0"
// specialAnchor is the format in which to represent logical reset positions,
// such as "first tertiary ignorable".
specialAnchor = "<%s/>"
)
// Process parses the rules for the tailorings of this collation
// and calls the respective methods of p for each rule found.
func (c Collation) Process(p RuleProcessor) (err error) {
if len(c.Cr) > 0 {
if len(c.Cr) > 1 {
return fmt.Errorf("multiple cr elements, want 0 or 1")
}
return processRules(p, c.Cr[0].Data())
}
if c.Rules.Any != nil {
return c.processXML(p)
}
return errors.New("no tailoring data")
}
// processRules parses rules in the Collation Rule Syntax defined in
// http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Tailorings.
func processRules(p RuleProcessor, s string) (err error) {
chk := func(s string, e error) string {
if err == nil {
err = e
}
return s
}
i := 0 // Save the line number for use after the loop.
scanner := bufio.NewScanner(strings.NewReader(s))
for ; scanner.Scan() && err == nil; i++ {
for s := skipSpace(scanner.Text()); s != "" && s[0] != '#'; s = skipSpace(s) {
level := 5
var ch byte
switch ch, s = s[0], s[1:]; ch {
case '&': // followed by <anchor> or '[' <key> ']'
if s = skipSpace(s); consume(&s, '[') {
s = chk(parseSpecialAnchor(p, s))
} else {
s = chk(parseAnchor(p, 0, s))
}
case '<': // sort relation '<'{1,4}, optionally followed by '*'.
for level = 1; consume(&s, '<'); level++ {
}
if level > 4 {
err = fmt.Errorf("level %d > 4", level)
}
fallthrough
case '=': // identity relation, optionally followed by *.
if consume(&s, '*') {
s = chk(parseSequence(p, level, s))
} else {
s = chk(parseOrder(p, level, s))
}
default:
chk("", fmt.Errorf("illegal operator %q", ch))
break
}
}
}
if chk("", scanner.Err()); err != nil {
return fmt.Errorf("%d: %v", i, err)
}
return nil
}
// parseSpecialAnchor parses the anchor syntax which is either of the form
// ['before' <level>] <anchor>
// or
// [<label>]
// The starting should already be consumed.
func parseSpecialAnchor(p RuleProcessor, s string) (tail string, err error) {
i := strings.IndexByte(s, ']')
if i == -1 {
return "", errors.New("unmatched bracket")
}
a := strings.TrimSpace(s[:i])
s = s[i+1:]
if strings.HasPrefix(a, "before ") {
l, err := strconv.ParseUint(skipSpace(a[len("before "):]), 10, 3)
if err != nil {
return s, err
}
return parseAnchor(p, int(l), s)
}
return s, p.Reset(fmt.Sprintf(specialAnchor, a), 0)
}
func parseAnchor(p RuleProcessor, level int, s string) (tail string, err error) {
anchor, s, err := scanString(s)
if err != nil {
return s, err
}
return s, p.Reset(anchor, level)
}
func parseOrder(p RuleProcessor, level int, s string) (tail string, err error) {
var value, context, extend string
if value, s, err = scanString(s); err != nil {
return s, err
}
if strings.HasPrefix(value, cldrIndex) {
p.Index(value[len(cldrIndex):])
return
}
if consume(&s, '|') {
if context, s, err = scanString(s); err != nil {
return s, errors.New("missing string after context")
}
}
if consume(&s, '/') {
if extend, s, err = scanString(s); err != nil {
return s, errors.New("missing string after extension")
}
}
return s, p.Insert(level, value, context, extend)
}
// scanString scans a single input string.
func scanString(s string) (str, tail string, err error) {
if s = skipSpace(s); s == "" {
return s, s, errors.New("missing string")
}
buf := [16]byte{} // small but enough to hold most cases.
value := buf[:0]
for s != "" {
if consume(&s, '\'') {
i := strings.IndexByte(s, '\'')
if i == -1 {
return "", "", errors.New(`unmatched single quote`)
}
if i == 0 {
value = append(value, '\'')
} else {
value = append(value, s[:i]...)
}
s = s[i+1:]
continue
}
r, sz := utf8.DecodeRuneInString(s)
if unicode.IsSpace(r) || strings.ContainsRune("&<=#", r) {
break
}
value = append(value, s[:sz]...)
s = s[sz:]
}
return string(value), skipSpace(s), nil
}
func parseSequence(p RuleProcessor, level int, s string) (tail string, err error) {
if s = skipSpace(s); s == "" {
return s, errors.New("empty sequence")
}
last := rune(0)
for s != "" {
r, sz := utf8.DecodeRuneInString(s)
s = s[sz:]
if r == '-' {
// We have a range. The first element was already written.
if last == 0 {
return s, errors.New("range without starter value")
}
r, sz = utf8.DecodeRuneInString(s)
s = s[sz:]
if r == utf8.RuneError || r < last {
return s, fmt.Errorf("invalid range %q-%q", last, r)
}
for i := last + 1; i <= r; i++ {
if err := p.Insert(level, string(i), "", ""); err != nil {
return s, err
}
}
last = 0
continue
}
if unicode.IsSpace(r) || unicode.IsPunct(r) {
break
}
// normal case
if err := p.Insert(level, string(r), "", ""); err != nil {
return s, err
}
last = r
}
return s, nil
}
func skipSpace(s string) string {
return strings.TrimLeftFunc(s, unicode.IsSpace)
}
// consumes returns whether the next byte is ch. If so, it gobbles it by
// updating s.
func consume(s *string, ch byte) (ok bool) {
if *s == "" || (*s)[0] != ch {
return false
}
*s = (*s)[1:]
return true
}
// The following code parses Collation rules of CLDR version 24 and before.
var lmap = map[byte]int{
'p': 1,
's': 2,
't': 3,
'i': 5,
}
type rulesElem struct {
Rules struct {
Common
Any []*struct {
XMLName xml.Name
rule
} `xml:",any"`
} `xml:"rules"`
}
type rule struct {
Value string `xml:",chardata"`
Before string `xml:"before,attr"`
Any []*struct {
XMLName xml.Name
rule
} `xml:",any"`
}
var emptyValueError = errors.New("cldr: empty rule value")
func (r *rule) value() (string, error) {
// Convert hexadecimal Unicode codepoint notation to a string.
s := charRe.ReplaceAllStringFunc(r.Value, replaceUnicode)
r.Value = s
if s == "" {
if len(r.Any) != 1 {
return "", emptyValueError
}
r.Value = fmt.Sprintf(specialAnchor, r.Any[0].XMLName.Local)
r.Any = nil
} else if len(r.Any) != 0 {
return "", fmt.Errorf("cldr: XML elements found in collation rule: %v", r.Any)
}
return r.Value, nil
}
func (r rule) process(p RuleProcessor, name, context, extend string) error {
v, err := r.value()
if err != nil {
return err
}
switch name {
case "p", "s", "t", "i":
if strings.HasPrefix(v, cldrIndex) {
p.Index(v[len(cldrIndex):])
return nil
}
if err := p.Insert(lmap[name[0]], v, context, extend); err != nil {
return err
}
case "pc", "sc", "tc", "ic":
level := lmap[name[0]]
for _, s := range v {
if err := p.Insert(level, string(s), context, extend); err != nil {
return err
}
}
default:
return fmt.Errorf("cldr: unsupported tag: %q", name)
}
return nil
}
// processXML parses the format of CLDR versions 24 and older.
func (c Collation) processXML(p RuleProcessor) (err error) {
// Collation is generated and defined in xml.go.
var v string
for _, r := range c.Rules.Any {
switch r.XMLName.Local {
case "reset":
level := 0
switch r.Before {
case "primary", "1":
level = 1
case "secondary", "2":
level = 2
case "tertiary", "3":
level = 3
case "":
default:
return fmt.Errorf("cldr: unknown level %q", r.Before)
}
v, err = r.value()
if err == nil {
err = p.Reset(v, level)
}
case "x":
var context, extend string
for _, r1 := range r.Any {
v, err = r1.value()
switch r1.XMLName.Local {
case "context":
context = v
case "extend":
extend = v
}
}
for _, r1 := range r.Any {
if t := r1.XMLName.Local; t == "context" || t == "extend" {
continue
}
r1.rule.process(p, r1.XMLName.Local, context, extend)
}
default:
err = r.rule.process(p, r.XMLName.Local, "", "")
}
if err != nil {
return err
}
}
return nil
}

171
vendor/golang.org/x/text/unicode/cldr/decode.go generated vendored Normal file
View File

@@ -0,0 +1,171 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
import (
"archive/zip"
"bytes"
"encoding/xml"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
"regexp"
)
// A Decoder loads an archive of CLDR data.
type Decoder struct {
dirFilter []string
sectionFilter []string
loader Loader
cldr *CLDR
curLocale string
}
// SetSectionFilter takes a list top-level LDML element names to which
// evaluation of LDML should be limited. It automatically calls SetDirFilter.
func (d *Decoder) SetSectionFilter(filter ...string) {
d.sectionFilter = filter
// TODO: automatically set dir filter
}
// SetDirFilter limits the loading of LDML XML files of the specied directories.
// Note that sections may be split across directories differently for different CLDR versions.
// For more robust code, use SetSectionFilter.
func (d *Decoder) SetDirFilter(dir ...string) {
d.dirFilter = dir
}
// A Loader provides access to the files of a CLDR archive.
type Loader interface {
Len() int
Path(i int) string
Reader(i int) (io.ReadCloser, error)
}
var fileRe = regexp.MustCompile(".*/(.*)/(.*)\\.xml")
// Decode loads and decodes the files represented by l.
func (d *Decoder) Decode(l Loader) (cldr *CLDR, err error) {
d.cldr = makeCLDR()
for i := 0; i < l.Len(); i++ {
fname := l.Path(i)
if m := fileRe.FindStringSubmatch(fname); m != nil {
if len(d.dirFilter) > 0 && !in(d.dirFilter, m[1]) {
continue
}
var r io.Reader
if r, err = l.Reader(i); err == nil {
err = d.decode(m[1], m[2], r)
}
if err != nil {
return nil, err
}
}
}
d.cldr.finalize(d.sectionFilter)
return d.cldr, nil
}
func (d *Decoder) decode(dir, id string, r io.Reader) error {
var v interface{}
var l *LDML
cldr := d.cldr
switch {
case dir == "supplemental":
v = cldr.supp
case dir == "transforms":
return nil
case dir == "bcp47":
v = cldr.bcp47
case dir == "validity":
return nil
default:
ok := false
if v, ok = cldr.locale[id]; !ok {
l = &LDML{}
v, cldr.locale[id] = l, l
}
}
x := xml.NewDecoder(r)
if err := x.Decode(v); err != nil {
log.Printf("%s/%s: %v", dir, id, err)
return err
}
if l != nil {
if l.Identity == nil {
return fmt.Errorf("%s/%s: missing identity element", dir, id)
}
// TODO: verify when CLDR bug http://unicode.org/cldr/trac/ticket/8970
// is resolved.
// path := strings.Split(id, "_")
// if lang := l.Identity.Language.Type; lang != path[0] {
// return fmt.Errorf("%s/%s: language was %s; want %s", dir, id, lang, path[0])
// }
}
return nil
}
type pathLoader []string
func makePathLoader(path string) (pl pathLoader, err error) {
err = filepath.Walk(path, func(path string, _ os.FileInfo, err error) error {
pl = append(pl, path)
return err
})
return pl, err
}
func (pl pathLoader) Len() int {
return len(pl)
}
func (pl pathLoader) Path(i int) string {
return pl[i]
}
func (pl pathLoader) Reader(i int) (io.ReadCloser, error) {
return os.Open(pl[i])
}
// DecodePath loads CLDR data from the given path.
func (d *Decoder) DecodePath(path string) (cldr *CLDR, err error) {
loader, err := makePathLoader(path)
if err != nil {
return nil, err
}
return d.Decode(loader)
}
type zipLoader struct {
r *zip.Reader
}
func (zl zipLoader) Len() int {
return len(zl.r.File)
}
func (zl zipLoader) Path(i int) string {
return zl.r.File[i].Name
}
func (zl zipLoader) Reader(i int) (io.ReadCloser, error) {
return zl.r.File[i].Open()
}
// DecodeZip loads CLDR data from the zip archive for which r is the source.
func (d *Decoder) DecodeZip(r io.Reader) (cldr *CLDR, err error) {
buffer, err := ioutil.ReadAll(r)
if err != nil {
return nil, err
}
archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
if err != nil {
return nil, err
}
return d.Decode(zipLoader{archive})
}

400
vendor/golang.org/x/text/unicode/cldr/makexml.go generated vendored Normal file
View File

@@ -0,0 +1,400 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// This tool generates types for the various XML formats of CLDR.
package main
import (
"archive/zip"
"bytes"
"encoding/xml"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"regexp"
"strings"
"golang.org/x/text/internal/gen"
)
var outputFile = flag.String("output", "xml.go", "output file name")
func main() {
flag.Parse()
r := gen.OpenCLDRCoreZip()
buffer, err := ioutil.ReadAll(r)
if err != nil {
log.Fatal("Could not read zip file")
}
r.Close()
z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
if err != nil {
log.Fatalf("Could not read zip archive: %v", err)
}
var buf bytes.Buffer
version := gen.CLDRVersion()
for _, dtd := range files {
for _, f := range z.File {
if strings.HasSuffix(f.Name, dtd.file+".dtd") {
r, err := f.Open()
failOnError(err)
b := makeBuilder(&buf, dtd)
b.parseDTD(r)
b.resolve(b.index[dtd.top[0]])
b.write()
if b.version != "" && version != b.version {
println(f.Name)
log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
}
break
}
}
}
fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
fmt.Fprintf(&buf, "const Version = %q\n", version)
gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
}
func failOnError(err error) {
if err != nil {
log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
os.Exit(1)
}
}
// configuration data per DTD type
type dtd struct {
file string // base file name
root string // Go name of the root XML element
top []string // create a different type for this section
skipElem []string // hard-coded or deprecated elements
skipAttr []string // attributes to exclude
predefined []string // hard-coded elements exist of the form <name>Elem
forceRepeat []string // elements to make slices despite DTD
}
var files = []dtd{
{
file: "ldmlBCP47",
root: "LDMLBCP47",
top: []string{"ldmlBCP47"},
skipElem: []string{
"cldrVersion", // deprecated, not used
},
},
{
file: "ldmlSupplemental",
root: "SupplementalData",
top: []string{"supplementalData"},
skipElem: []string{
"cldrVersion", // deprecated, not used
},
forceRepeat: []string{
"plurals", // data defined in plurals.xml and ordinals.xml
},
},
{
file: "ldml",
root: "LDML",
top: []string{
"ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
},
skipElem: []string{
"cp", // not used anywhere
"special", // not used anywhere
"fallback", // deprecated, not used
"alias", // in Common
"default", // in Common
},
skipAttr: []string{
"hiraganaQuarternary", // typo in DTD, correct version included as well
},
predefined: []string{"rules"},
},
}
var comments = map[string]string{
"ldmlBCP47": `
// LDMLBCP47 holds information on allowable values for various variables in LDML.
`,
"supplementalData": `
// SupplementalData holds information relevant for internationalization
// and proper use of CLDR, but that is not contained in the locale hierarchy.
`,
"ldml": `
// LDML is the top-level type for locale-specific data.
`,
"collation": `
// Collation contains rules that specify a certain sort-order,
// as a tailoring of the root order.
// The parsed rules are obtained by passing a RuleProcessor to Collation's
// Process method.
`,
"calendar": `
// Calendar specifies the fields used for formatting and parsing dates and times.
// The month and quarter names are identified numerically, starting at 1.
// The day (of the week) names are identified with short strings, since there is
// no universally-accepted numeric designation.
`,
"dates": `
// Dates contains information regarding the format and parsing of dates and times.
`,
"localeDisplayNames": `
// LocaleDisplayNames specifies localized display names for for scripts, languages,
// countries, currencies, and variants.
`,
"numbers": `
// Numbers supplies information for formatting and parsing numbers and currencies.
`,
}
type element struct {
name string // XML element name
category string // elements contained by this element
signature string // category + attrKey*
attr []*attribute // attributes supported by this element.
sub []struct { // parsed and evaluated sub elements of this element.
e *element
repeat bool // true if the element needs to be a slice
}
resolved bool // prevent multiple resolutions of this element.
}
type attribute struct {
name string
key string
list []string
tag string // Go tag
}
var (
reHead = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
reAttr = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
reElem = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
reToken = regexp.MustCompile(`\w\-`)
)
// builder is used to read in the DTD files from CLDR and generate Go code
// to be used with the encoding/xml package.
type builder struct {
w io.Writer
index map[string]*element
elem []*element
info dtd
version string
}
func makeBuilder(w io.Writer, d dtd) builder {
return builder{
w: w,
index: make(map[string]*element),
elem: []*element{},
info: d,
}
}
// parseDTD parses a DTD file.
func (b *builder) parseDTD(r io.Reader) {
for d := xml.NewDecoder(r); ; {
t, err := d.Token()
if t == nil {
break
}
failOnError(err)
dir, ok := t.(xml.Directive)
if !ok {
continue
}
m := reHead.FindSubmatch(dir)
dir = dir[len(m[0]):]
ename := string(m[2])
el, elementFound := b.index[ename]
switch string(m[1]) {
case "ELEMENT":
if elementFound {
log.Fatal("parseDTD: duplicate entry for element %q", ename)
}
m := reElem.FindSubmatch(dir)
if m == nil {
log.Fatalf("parseDTD: invalid element %q", string(dir))
}
if len(m[0]) != len(dir) {
log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
}
s := string(m[1])
el = &element{
name: ename,
category: s,
}
b.index[ename] = el
case "ATTLIST":
if !elementFound {
log.Fatalf("parseDTD: unknown element %q", ename)
}
s := string(dir)
m := reAttr.FindStringSubmatch(s)
if m == nil {
log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
}
if m[4] == "FIXED" {
b.version = m[5]
} else {
switch m[1] {
case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
case "type", "choice":
default:
el.attr = append(el.attr, &attribute{
name: m[1],
key: s,
list: reToken.FindAllString(m[3], -1),
})
el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
}
}
}
}
}
var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)
// resolve takes a parsed element and converts it into structured data
// that can be used to generate the XML code.
func (b *builder) resolve(e *element) {
if e.resolved {
return
}
b.elem = append(b.elem, e)
e.resolved = true
s := e.category
found := make(map[string]bool)
sequenceStart := []int{}
for len(s) > 0 {
m := reCat.FindStringSubmatch(s)
if m == nil {
log.Fatalf("%s: invalid category string %q", e.name, s)
}
repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
switch m[1] {
case "":
case "(":
sequenceStart = append(sequenceStart, len(e.sub))
case ")":
if len(sequenceStart) == 0 {
log.Fatalf("%s: unmatched closing parenthesis", e.name)
}
for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
e.sub[i].repeat = e.sub[i].repeat || repeat
}
sequenceStart = sequenceStart[:len(sequenceStart)-1]
default:
if in(b.info.skipElem, m[1]) {
} else if sub, ok := b.index[m[1]]; ok {
if !found[sub.name] {
e.sub = append(e.sub, struct {
e *element
repeat bool
}{sub, repeat})
found[sub.name] = true
b.resolve(sub)
}
} else if m[1] == "#PCDATA" || m[1] == "ANY" {
} else if m[1] != "EMPTY" {
log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
}
}
s = s[len(m[0]):]
}
}
// return true if s is contained in set.
func in(set []string, s string) bool {
for _, v := range set {
if v == s {
return true
}
}
return false
}
var repl = strings.NewReplacer("-", " ", "_", " ")
// title puts the first character or each character following '_' in title case and
// removes all occurrences of '_'.
func title(s string) string {
return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
}
// writeElem generates Go code for a single element, recursively.
func (b *builder) writeElem(tab int, e *element) {
p := func(f string, x ...interface{}) {
f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
fmt.Fprintf(b.w, f, x...)
}
if len(e.sub) == 0 && len(e.attr) == 0 {
p("Common")
return
}
p("struct {")
tab++
p("\nCommon")
for _, attr := range e.attr {
if !in(b.info.skipAttr, attr.name) {
p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
}
}
for _, sub := range e.sub {
if in(b.info.predefined, sub.e.name) {
p("\n%sElem", sub.e.name)
continue
}
if in(b.info.skipElem, sub.e.name) {
continue
}
p("\n%s ", title(sub.e.name))
if sub.repeat {
p("[]")
}
p("*")
if in(b.info.top, sub.e.name) {
p(title(sub.e.name))
} else {
b.writeElem(tab, sub.e)
}
p(" `xml:\"%s\"`", sub.e.name)
}
tab--
p("\n}")
}
// write generates the Go XML code.
func (b *builder) write() {
for i, name := range b.info.top {
e := b.index[name]
if e != nil {
fmt.Fprintf(b.w, comments[name])
name := title(e.name)
if i == 0 {
name = b.info.root
}
fmt.Fprintf(b.w, "type %s ", name)
b.writeElem(0, e)
fmt.Fprint(b.w, "\n")
}
}
}

602
vendor/golang.org/x/text/unicode/cldr/resolve.go generated vendored Normal file
View File

@@ -0,0 +1,602 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
// This file implements the various inheritance constructs defined by LDML.
// See http://www.unicode.org/reports/tr35/#Inheritance_and_Validity
// for more details.
import (
"fmt"
"log"
"reflect"
"regexp"
"sort"
"strings"
)
// fieldIter iterates over fields in a struct. It includes
// fields of embedded structs.
type fieldIter struct {
v reflect.Value
index, n []int
}
func iter(v reflect.Value) fieldIter {
if v.Kind() != reflect.Struct {
log.Panicf("value %v must be a struct", v)
}
i := fieldIter{
v: v,
index: []int{0},
n: []int{v.NumField()},
}
i.descent()
return i
}
func (i *fieldIter) descent() {
for f := i.field(); f.Anonymous && f.Type.NumField() > 0; f = i.field() {
i.index = append(i.index, 0)
i.n = append(i.n, f.Type.NumField())
}
}
func (i *fieldIter) done() bool {
return len(i.index) == 1 && i.index[0] >= i.n[0]
}
func skip(f reflect.StructField) bool {
return !f.Anonymous && (f.Name[0] < 'A' || f.Name[0] > 'Z')
}
func (i *fieldIter) next() {
for {
k := len(i.index) - 1
i.index[k]++
if i.index[k] < i.n[k] {
if !skip(i.field()) {
break
}
} else {
if k == 0 {
return
}
i.index = i.index[:k]
i.n = i.n[:k]
}
}
i.descent()
}
func (i *fieldIter) value() reflect.Value {
return i.v.FieldByIndex(i.index)
}
func (i *fieldIter) field() reflect.StructField {
return i.v.Type().FieldByIndex(i.index)
}
type visitor func(v reflect.Value) error
var stopDescent = fmt.Errorf("do not recurse")
func (f visitor) visit(x interface{}) error {
return f.visitRec(reflect.ValueOf(x))
}
// visit recursively calls f on all nodes in v.
func (f visitor) visitRec(v reflect.Value) error {
if v.Kind() == reflect.Ptr {
if v.IsNil() {
return nil
}
return f.visitRec(v.Elem())
}
if err := f(v); err != nil {
if err == stopDescent {
return nil
}
return err
}
switch v.Kind() {
case reflect.Struct:
for i := iter(v); !i.done(); i.next() {
if err := f.visitRec(i.value()); err != nil {
return err
}
}
case reflect.Slice:
for i := 0; i < v.Len(); i++ {
if err := f.visitRec(v.Index(i)); err != nil {
return err
}
}
}
return nil
}
// getPath is used for error reporting purposes only.
func getPath(e Elem) string {
if e == nil {
return "<nil>"
}
if e.enclosing() == nil {
return e.GetCommon().name
}
if e.GetCommon().Type == "" {
return fmt.Sprintf("%s.%s", getPath(e.enclosing()), e.GetCommon().name)
}
return fmt.Sprintf("%s.%s[type=%s]", getPath(e.enclosing()), e.GetCommon().name, e.GetCommon().Type)
}
// xmlName returns the xml name of the element or attribute
func xmlName(f reflect.StructField) (name string, attr bool) {
tags := strings.Split(f.Tag.Get("xml"), ",")
for _, s := range tags {
attr = attr || s == "attr"
}
return tags[0], attr
}
func findField(v reflect.Value, key string) (reflect.Value, error) {
v = reflect.Indirect(v)
for i := iter(v); !i.done(); i.next() {
if n, _ := xmlName(i.field()); n == key {
return i.value(), nil
}
}
return reflect.Value{}, fmt.Errorf("cldr: no field %q in element %#v", key, v.Interface())
}
var xpathPart = regexp.MustCompile(`(\pL+)(?:\[@(\pL+)='([\w-]+)'\])?`)
func walkXPath(e Elem, path string) (res Elem, err error) {
for _, c := range strings.Split(path, "/") {
if c == ".." {
if e = e.enclosing(); e == nil {
panic("path ..")
return nil, fmt.Errorf(`cldr: ".." moves past root in path %q`, path)
}
continue
} else if c == "" {
continue
}
m := xpathPart.FindStringSubmatch(c)
if len(m) == 0 || len(m[0]) != len(c) {
return nil, fmt.Errorf("cldr: syntax error in path component %q", c)
}
v, err := findField(reflect.ValueOf(e), m[1])
if err != nil {
return nil, err
}
switch v.Kind() {
case reflect.Slice:
i := 0
if m[2] != "" || v.Len() > 1 {
if m[2] == "" {
m[2] = "type"
if m[3] = e.GetCommon().Default(); m[3] == "" {
return nil, fmt.Errorf("cldr: type selector or default value needed for element %s", m[1])
}
}
for ; i < v.Len(); i++ {
vi := v.Index(i)
key, err := findField(vi.Elem(), m[2])
if err != nil {
return nil, err
}
key = reflect.Indirect(key)
if key.Kind() == reflect.String && key.String() == m[3] {
break
}
}
}
if i == v.Len() || v.Index(i).IsNil() {
return nil, fmt.Errorf("no %s found with %s==%s", m[1], m[2], m[3])
}
e = v.Index(i).Interface().(Elem)
case reflect.Ptr:
if v.IsNil() {
return nil, fmt.Errorf("cldr: element %q not found within element %q", m[1], e.GetCommon().name)
}
var ok bool
if e, ok = v.Interface().(Elem); !ok {
return nil, fmt.Errorf("cldr: %q is not an XML element", m[1])
} else if m[2] != "" || m[3] != "" {
return nil, fmt.Errorf("cldr: no type selector allowed for element %s", m[1])
}
default:
return nil, fmt.Errorf("cldr: %q is not an XML element", m[1])
}
}
return e, nil
}
const absPrefix = "//ldml/"
func (cldr *CLDR) resolveAlias(e Elem, src, path string) (res Elem, err error) {
if src != "locale" {
if !strings.HasPrefix(path, absPrefix) {
return nil, fmt.Errorf("cldr: expected absolute path, found %q", path)
}
path = path[len(absPrefix):]
if e, err = cldr.resolve(src); err != nil {
return nil, err
}
}
return walkXPath(e, path)
}
func (cldr *CLDR) resolveAndMergeAlias(e Elem) error {
alias := e.GetCommon().Alias
if alias == nil {
return nil
}
a, err := cldr.resolveAlias(e, alias.Source, alias.Path)
if err != nil {
return fmt.Errorf("%v: error evaluating path %q: %v", getPath(e), alias.Path, err)
}
// Ensure alias node was already evaluated. TODO: avoid double evaluation.
err = cldr.resolveAndMergeAlias(a)
v := reflect.ValueOf(e).Elem()
for i := iter(reflect.ValueOf(a).Elem()); !i.done(); i.next() {
if vv := i.value(); vv.Kind() != reflect.Ptr || !vv.IsNil() {
if _, attr := xmlName(i.field()); !attr {
v.FieldByIndex(i.index).Set(vv)
}
}
}
return err
}
func (cldr *CLDR) aliasResolver() visitor {
return func(v reflect.Value) (err error) {
if e, ok := v.Addr().Interface().(Elem); ok {
err = cldr.resolveAndMergeAlias(e)
if err == nil && blocking[e.GetCommon().name] {
return stopDescent
}
}
return err
}
}
// elements within blocking elements do not inherit.
// Taken from CLDR's supplementalMetaData.xml.
var blocking = map[string]bool{
"identity": true,
"supplementalData": true,
"cldrTest": true,
"collation": true,
"transform": true,
}
// Distinguishing attributes affect inheritance; two elements with different
// distinguishing attributes are treated as different for purposes of inheritance,
// except when such attributes occur in the indicated elements.
// Taken from CLDR's supplementalMetaData.xml.
var distinguishing = map[string][]string{
"key": nil,
"request_id": nil,
"id": nil,
"registry": nil,
"alt": nil,
"iso4217": nil,
"iso3166": nil,
"mzone": nil,
"from": nil,
"to": nil,
"type": []string{
"abbreviationFallback",
"default",
"mapping",
"measurementSystem",
"preferenceOrdering",
},
"numberSystem": nil,
}
func in(set []string, s string) bool {
for _, v := range set {
if v == s {
return true
}
}
return false
}
// attrKey computes a key based on the distinguishable attributes of
// an element and it's values.
func attrKey(v reflect.Value, exclude ...string) string {
parts := []string{}
ename := v.Interface().(Elem).GetCommon().name
v = v.Elem()
for i := iter(v); !i.done(); i.next() {
if name, attr := xmlName(i.field()); attr {
if except, ok := distinguishing[name]; ok && !in(exclude, name) && !in(except, ename) {
v := i.value()
if v.Kind() == reflect.Ptr {
v = v.Elem()
}
if v.IsValid() {
parts = append(parts, fmt.Sprintf("%s=%s", name, v.String()))
}
}
}
}
sort.Strings(parts)
return strings.Join(parts, ";")
}
// Key returns a key for e derived from all distinguishing attributes
// except those specified by exclude.
func Key(e Elem, exclude ...string) string {
return attrKey(reflect.ValueOf(e), exclude...)
}
// linkEnclosing sets the enclosing element as well as the name
// for all sub-elements of child, recursively.
func linkEnclosing(parent, child Elem) {
child.setEnclosing(parent)
v := reflect.ValueOf(child).Elem()
for i := iter(v); !i.done(); i.next() {
vf := i.value()
if vf.Kind() == reflect.Slice {
for j := 0; j < vf.Len(); j++ {
linkEnclosing(child, vf.Index(j).Interface().(Elem))
}
} else if vf.Kind() == reflect.Ptr && !vf.IsNil() && vf.Elem().Kind() == reflect.Struct {
linkEnclosing(child, vf.Interface().(Elem))
}
}
}
func setNames(e Elem, name string) {
e.setName(name)
v := reflect.ValueOf(e).Elem()
for i := iter(v); !i.done(); i.next() {
vf := i.value()
name, _ = xmlName(i.field())
if vf.Kind() == reflect.Slice {
for j := 0; j < vf.Len(); j++ {
setNames(vf.Index(j).Interface().(Elem), name)
}
} else if vf.Kind() == reflect.Ptr && !vf.IsNil() && vf.Elem().Kind() == reflect.Struct {
setNames(vf.Interface().(Elem), name)
}
}
}
// deepCopy copies elements of v recursively. All elements of v that may
// be modified by inheritance are explicitly copied.
func deepCopy(v reflect.Value) reflect.Value {
switch v.Kind() {
case reflect.Ptr:
if v.IsNil() || v.Elem().Kind() != reflect.Struct {
return v
}
nv := reflect.New(v.Elem().Type())
nv.Elem().Set(v.Elem())
deepCopyRec(nv.Elem(), v.Elem())
return nv
case reflect.Slice:
nv := reflect.MakeSlice(v.Type(), v.Len(), v.Len())
for i := 0; i < v.Len(); i++ {
deepCopyRec(nv.Index(i), v.Index(i))
}
return nv
}
panic("deepCopy: must be called with pointer or slice")
}
// deepCopyRec is only called by deepCopy.
func deepCopyRec(nv, v reflect.Value) {
if v.Kind() == reflect.Struct {
t := v.Type()
for i := 0; i < v.NumField(); i++ {
if name, attr := xmlName(t.Field(i)); name != "" && !attr {
deepCopyRec(nv.Field(i), v.Field(i))
}
}
} else {
nv.Set(deepCopy(v))
}
}
// newNode is used to insert a missing node during inheritance.
func (cldr *CLDR) newNode(v, enc reflect.Value) reflect.Value {
n := reflect.New(v.Type())
for i := iter(v); !i.done(); i.next() {
if name, attr := xmlName(i.field()); name == "" || attr {
n.Elem().FieldByIndex(i.index).Set(i.value())
}
}
n.Interface().(Elem).GetCommon().setEnclosing(enc.Addr().Interface().(Elem))
return n
}
// v, parent must be pointers to struct
func (cldr *CLDR) inheritFields(v, parent reflect.Value) (res reflect.Value, err error) {
t := v.Type()
nv := reflect.New(t)
nv.Elem().Set(v)
for i := iter(v); !i.done(); i.next() {
vf := i.value()
f := i.field()
name, attr := xmlName(f)
if name == "" || attr {
continue
}
pf := parent.FieldByIndex(i.index)
if blocking[name] {
if vf.IsNil() {
vf = pf
}
nv.Elem().FieldByIndex(i.index).Set(deepCopy(vf))
continue
}
switch f.Type.Kind() {
case reflect.Ptr:
if f.Type.Elem().Kind() == reflect.Struct {
if !vf.IsNil() {
if vf, err = cldr.inheritStructPtr(vf, pf); err != nil {
return reflect.Value{}, err
}
vf.Interface().(Elem).setEnclosing(nv.Interface().(Elem))
nv.Elem().FieldByIndex(i.index).Set(vf)
} else if !pf.IsNil() {
n := cldr.newNode(pf.Elem(), v)
if vf, err = cldr.inheritStructPtr(n, pf); err != nil {
return reflect.Value{}, err
}
vf.Interface().(Elem).setEnclosing(nv.Interface().(Elem))
nv.Elem().FieldByIndex(i.index).Set(vf)
}
}
case reflect.Slice:
vf, err := cldr.inheritSlice(nv.Elem(), vf, pf)
if err != nil {
return reflect.Zero(t), err
}
nv.Elem().FieldByIndex(i.index).Set(vf)
}
}
return nv, nil
}
func root(e Elem) *LDML {
for ; e.enclosing() != nil; e = e.enclosing() {
}
return e.(*LDML)
}
// inheritStructPtr first merges possible aliases in with v and then inherits
// any underspecified elements from parent.
func (cldr *CLDR) inheritStructPtr(v, parent reflect.Value) (r reflect.Value, err error) {
if !v.IsNil() {
e := v.Interface().(Elem).GetCommon()
alias := e.Alias
if alias == nil && !parent.IsNil() {
alias = parent.Interface().(Elem).GetCommon().Alias
}
if alias != nil {
a, err := cldr.resolveAlias(v.Interface().(Elem), alias.Source, alias.Path)
if a != nil {
if v, err = cldr.inheritFields(v.Elem(), reflect.ValueOf(a).Elem()); err != nil {
return reflect.Value{}, err
}
}
}
if !parent.IsNil() {
return cldr.inheritFields(v.Elem(), parent.Elem())
}
} else if parent.IsNil() {
panic("should not reach here")
}
return v, nil
}
// Must be slice of struct pointers.
func (cldr *CLDR) inheritSlice(enc, v, parent reflect.Value) (res reflect.Value, err error) {
t := v.Type()
index := make(map[string]reflect.Value)
if !v.IsNil() {
for i := 0; i < v.Len(); i++ {
vi := v.Index(i)
key := attrKey(vi)
index[key] = vi
}
}
if !parent.IsNil() {
for i := 0; i < parent.Len(); i++ {
vi := parent.Index(i)
key := attrKey(vi)
if w, ok := index[key]; ok {
index[key], err = cldr.inheritStructPtr(w, vi)
} else {
n := cldr.newNode(vi.Elem(), enc)
index[key], err = cldr.inheritStructPtr(n, vi)
}
index[key].Interface().(Elem).setEnclosing(enc.Addr().Interface().(Elem))
if err != nil {
return v, err
}
}
}
keys := make([]string, 0, len(index))
for k, _ := range index {
keys = append(keys, k)
}
sort.Strings(keys)
sl := reflect.MakeSlice(t, len(index), len(index))
for i, k := range keys {
sl.Index(i).Set(index[k])
}
return sl, nil
}
func parentLocale(loc string) string {
parts := strings.Split(loc, "_")
if len(parts) == 1 {
return "root"
}
parts = parts[:len(parts)-1]
key := strings.Join(parts, "_")
return key
}
func (cldr *CLDR) resolve(loc string) (res *LDML, err error) {
if r := cldr.resolved[loc]; r != nil {
return r, nil
}
x := cldr.RawLDML(loc)
if x == nil {
return nil, fmt.Errorf("cldr: unknown locale %q", loc)
}
var v reflect.Value
if loc == "root" {
x = deepCopy(reflect.ValueOf(x)).Interface().(*LDML)
linkEnclosing(nil, x)
err = cldr.aliasResolver().visit(x)
} else {
key := parentLocale(loc)
var parent *LDML
for ; cldr.locale[key] == nil; key = parentLocale(key) {
}
if parent, err = cldr.resolve(key); err != nil {
return nil, err
}
v, err = cldr.inheritFields(reflect.ValueOf(x).Elem(), reflect.ValueOf(parent).Elem())
x = v.Interface().(*LDML)
linkEnclosing(nil, x)
}
if err != nil {
return nil, err
}
cldr.resolved[loc] = x
return x, err
}
// finalize finalizes the initialization of the raw LDML structs. It also
// removed unwanted fields, as specified by filter, so that they will not
// be unnecessarily evaluated.
func (cldr *CLDR) finalize(filter []string) {
for _, x := range cldr.locale {
if filter != nil {
v := reflect.ValueOf(x).Elem()
t := v.Type()
for i := 0; i < v.NumField(); i++ {
f := t.Field(i)
name, _ := xmlName(f)
if name != "" && name != "identity" && !in(filter, name) {
v.Field(i).Set(reflect.Zero(f.Type))
}
}
}
linkEnclosing(nil, x) // for resolving aliases and paths
setNames(x, "ldml")
}
}

144
vendor/golang.org/x/text/unicode/cldr/slice.go generated vendored Normal file
View File

@@ -0,0 +1,144 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
import (
"fmt"
"reflect"
"sort"
)
// Slice provides utilities for modifying slices of elements.
// It can be wrapped around any slice of which the element type implements
// interface Elem.
type Slice struct {
ptr reflect.Value
typ reflect.Type
}
// Value returns the reflect.Value of the underlying slice.
func (s *Slice) Value() reflect.Value {
return s.ptr.Elem()
}
// MakeSlice wraps a pointer to a slice of Elems.
// It replaces the array pointed to by the slice so that subsequent modifications
// do not alter the data in a CLDR type.
// It panics if an incorrect type is passed.
func MakeSlice(slicePtr interface{}) Slice {
ptr := reflect.ValueOf(slicePtr)
if ptr.Kind() != reflect.Ptr {
panic(fmt.Sprintf("MakeSlice: argument must be pointer to slice, found %v", ptr.Type()))
}
sl := ptr.Elem()
if sl.Kind() != reflect.Slice {
panic(fmt.Sprintf("MakeSlice: argument must point to a slice, found %v", sl.Type()))
}
intf := reflect.TypeOf((*Elem)(nil)).Elem()
if !sl.Type().Elem().Implements(intf) {
panic(fmt.Sprintf("MakeSlice: element type of slice (%v) does not implement Elem", sl.Type().Elem()))
}
nsl := reflect.MakeSlice(sl.Type(), sl.Len(), sl.Len())
reflect.Copy(nsl, sl)
sl.Set(nsl)
return Slice{
ptr: ptr,
typ: sl.Type().Elem().Elem(),
}
}
func (s Slice) indexForAttr(a string) []int {
for i := iter(reflect.Zero(s.typ)); !i.done(); i.next() {
if n, _ := xmlName(i.field()); n == a {
return i.index
}
}
panic(fmt.Sprintf("MakeSlice: no attribute %q for type %v", a, s.typ))
}
// Filter filters s to only include elements for which fn returns true.
func (s Slice) Filter(fn func(e Elem) bool) {
k := 0
sl := s.Value()
for i := 0; i < sl.Len(); i++ {
vi := sl.Index(i)
if fn(vi.Interface().(Elem)) {
sl.Index(k).Set(vi)
k++
}
}
sl.Set(sl.Slice(0, k))
}
// Group finds elements in s for which fn returns the same value and groups
// them in a new Slice.
func (s Slice) Group(fn func(e Elem) string) []Slice {
m := make(map[string][]reflect.Value)
sl := s.Value()
for i := 0; i < sl.Len(); i++ {
vi := sl.Index(i)
key := fn(vi.Interface().(Elem))
m[key] = append(m[key], vi)
}
keys := []string{}
for k, _ := range m {
keys = append(keys, k)
}
sort.Strings(keys)
res := []Slice{}
for _, k := range keys {
nsl := reflect.New(sl.Type())
nsl.Elem().Set(reflect.Append(nsl.Elem(), m[k]...))
res = append(res, MakeSlice(nsl.Interface()))
}
return res
}
// SelectAnyOf filters s to contain only elements for which attr matches
// any of the values.
func (s Slice) SelectAnyOf(attr string, values ...string) {
index := s.indexForAttr(attr)
s.Filter(func(e Elem) bool {
vf := reflect.ValueOf(e).Elem().FieldByIndex(index)
return in(values, vf.String())
})
}
// SelectOnePerGroup filters s to include at most one element e per group of
// elements matching Key(attr), where e has an attribute a that matches any
// the values in v.
// If more than one element in a group matches a value in v preference
// is given to the element that matches the first value in v.
func (s Slice) SelectOnePerGroup(a string, v []string) {
index := s.indexForAttr(a)
grouped := s.Group(func(e Elem) string { return Key(e, a) })
sl := s.Value()
sl.Set(sl.Slice(0, 0))
for _, g := range grouped {
e := reflect.Value{}
found := len(v)
gsl := g.Value()
for i := 0; i < gsl.Len(); i++ {
vi := gsl.Index(i).Elem().FieldByIndex(index)
j := 0
for ; j < len(v) && v[j] != vi.String(); j++ {
}
if j < found {
found = j
e = gsl.Index(i)
}
}
if found < len(v) {
sl.Set(reflect.Append(sl, e))
}
}
}
// SelectDraft drops all elements from the list with a draft level smaller than d
// and selects the highest draft level of the remaining.
// This method assumes that the input CLDR is canonicalized.
func (s Slice) SelectDraft(d Draft) {
s.SelectOnePerGroup("draft", drafts[len(drafts)-2-int(d):])
}

1456
vendor/golang.org/x/text/unicode/cldr/xml.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

27
vendor/golang.org/x/text/unicode/norm/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -1,130 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "testing"
// TestCase is used for most tests.
type TestCase struct {
in []rune
out []rune
}
func runTests(t *testing.T, name string, fm Form, tests []TestCase) {
rb := reorderBuffer{}
rb.init(fm, nil)
for i, test := range tests {
rb.setFlusher(nil, appendFlush)
for j, rune := range test.in {
b := []byte(string(rune))
src := inputBytes(b)
info := rb.f.info(src, 0)
if j == 0 {
rb.ss.first(info)
} else {
rb.ss.next(info)
}
if rb.insertFlush(src, 0, info) < 0 {
t.Errorf("%s:%d: insert failed for rune %d", name, i, j)
}
}
rb.doFlush()
was := string(rb.out)
want := string(test.out)
if len(was) != len(want) {
t.Errorf("%s:%d: length = %d; want %d", name, i, len(was), len(want))
}
if was != want {
k, pfx := pidx(was, want)
t.Errorf("%s:%d: \nwas %s%+q; \nwant %s%+q", name, i, pfx, was[k:], pfx, want[k:])
}
}
}
func TestFlush(t *testing.T) {
const (
hello = "Hello "
world = "world!"
)
buf := make([]byte, maxByteBufferSize)
p := copy(buf, hello)
out := buf[p:]
rb := reorderBuffer{}
rb.initString(NFC, world)
if i := rb.flushCopy(out); i != 0 {
t.Errorf("wrote bytes on flush of empty buffer. (len(out) = %d)", i)
}
for i := range world {
// No need to set streamSafe values for this test.
rb.insertFlush(rb.src, i, rb.f.info(rb.src, i))
n := rb.flushCopy(out)
out = out[n:]
p += n
}
was := buf[:p]
want := hello + world
if string(was) != want {
t.Errorf(`output after flush was "%s"; want "%s"`, string(was), want)
}
if rb.nrune != 0 {
t.Errorf("non-null size of info buffer (rb.nrune == %d)", rb.nrune)
}
if rb.nbyte != 0 {
t.Errorf("non-null size of byte buffer (rb.nbyte == %d)", rb.nbyte)
}
}
var insertTests = []TestCase{
{[]rune{'a'}, []rune{'a'}},
{[]rune{0x300}, []rune{0x300}},
{[]rune{0x300, 0x316}, []rune{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220
{[]rune{0x316, 0x300}, []rune{0x316, 0x300}},
{[]rune{0x41, 0x316, 0x300}, []rune{0x41, 0x316, 0x300}},
{[]rune{0x41, 0x300, 0x316}, []rune{0x41, 0x316, 0x300}},
{[]rune{0x300, 0x316, 0x41}, []rune{0x316, 0x300, 0x41}},
{[]rune{0x41, 0x300, 0x40, 0x316}, []rune{0x41, 0x300, 0x40, 0x316}},
}
func TestInsert(t *testing.T) {
runTests(t, "TestInsert", NFD, insertTests)
}
var decompositionNFDTest = []TestCase{
{[]rune{0xC0}, []rune{0x41, 0x300}},
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
{[]rune{0x01C4}, []rune{0x01C4}},
{[]rune{0x320E}, []rune{0x320E}},
{[]rune("음ẻ과"), []rune{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}},
}
var decompositionNFKDTest = []TestCase{
{[]rune{0xC0}, []rune{0x41, 0x300}},
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
{[]rune{0x01C4}, []rune{0x44, 0x5A, 0x030C}},
{[]rune{0x320E}, []rune{0x28, 0x1100, 0x1161, 0x29}},
}
func TestDecomposition(t *testing.T) {
runTests(t, "TestDecompositionNFD", NFD, decompositionNFDTest)
runTests(t, "TestDecompositionNFKD", NFKD, decompositionNFKDTest)
}
var compositionTest = []TestCase{
{[]rune{0x41, 0x300}, []rune{0xC0}},
{[]rune{0x41, 0x316}, []rune{0x41, 0x316}},
{[]rune{0x41, 0x300, 0x35D}, []rune{0xC0, 0x35D}},
{[]rune{0x41, 0x316, 0x300}, []rune{0xC0, 0x316}},
// blocking starter
{[]rune{0x41, 0x316, 0x40, 0x300}, []rune{0x41, 0x316, 0x40, 0x300}},
{[]rune{0x1100, 0x1161}, []rune{0xAC00}},
// parenthesized Hangul, alternate between ASCII and Hangul.
{[]rune{0x28, 0x1100, 0x1161, 0x29}, []rune{0x28, 0xAC00, 0x29}},
}
func TestComposition(t *testing.T) {
runTests(t, "TestComposition", NFC, compositionTest)
}

View File

@@ -1,82 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm_test
import (
"bytes"
"fmt"
"unicode/utf8"
"golang.org/x/text/unicode/norm"
)
// EqualSimple uses a norm.Iter to compare two non-normalized
// strings for equivalence.
func EqualSimple(a, b string) bool {
var ia, ib norm.Iter
ia.InitString(norm.NFKD, a)
ib.InitString(norm.NFKD, b)
for !ia.Done() && !ib.Done() {
if !bytes.Equal(ia.Next(), ib.Next()) {
return false
}
}
return ia.Done() && ib.Done()
}
// FindPrefix finds the longest common prefix of ASCII characters
// of a and b.
func FindPrefix(a, b string) int {
i := 0
for ; i < len(a) && i < len(b) && a[i] < utf8.RuneSelf && a[i] == b[i]; i++ {
}
return i
}
// EqualOpt is like EqualSimple, but optimizes the special
// case for ASCII characters.
func EqualOpt(a, b string) bool {
n := FindPrefix(a, b)
a, b = a[n:], b[n:]
var ia, ib norm.Iter
ia.InitString(norm.NFKD, a)
ib.InitString(norm.NFKD, b)
for !ia.Done() && !ib.Done() {
if !bytes.Equal(ia.Next(), ib.Next()) {
return false
}
if n := int64(FindPrefix(a[ia.Pos():], b[ib.Pos():])); n != 0 {
ia.Seek(n, 1)
ib.Seek(n, 1)
}
}
return ia.Done() && ib.Done()
}
var compareTests = []struct{ a, b string }{
{"aaa", "aaa"},
{"aaa", "aab"},
{"a\u0300a", "\u00E0a"},
{"a\u0300\u0320b", "a\u0320\u0300b"},
{"\u1E0A\u0323", "\x44\u0323\u0307"},
// A character that decomposes into multiple segments
// spans several iterations.
{"\u3304", "\u30A4\u30CB\u30F3\u30AF\u3099"},
}
func ExampleIter() {
for i, t := range compareTests {
r0 := EqualSimple(t.a, t.b)
r1 := EqualOpt(t.a, t.b)
fmt.Printf("%d: %v %v\n", i, r0, r1)
}
// Output:
// 0: true true
// 1: false false
// 2: true true
// 3: true true
// 4: true true
// 5: true true
}

View File

@@ -10,7 +10,7 @@ package norm
// and its corresponding decomposing form share the same trie. Each trie maps
// a rune to a uint16. The values take two forms. For v >= 0x8000:
// bits
// 15: 1 (inverse of NFD_QD bit of qcInfo)
// 15: 1 (inverse of NFD_QC bit of qcInfo)
// 13..7: qcInfo (see below). isYesD is always true (no decompostion).
// 6..0: ccc (compressed CCC value).
// For v < 0x8000, the respective rune has a decomposition and v is an index
@@ -56,28 +56,31 @@ type formInfo struct {
nextMain iterFunc
}
var formTable []*formInfo
func init() {
formTable = make([]*formInfo, 4)
for i := range formTable {
f := &formInfo{}
formTable[i] = f
f.form = Form(i)
if Form(i) == NFKD || Form(i) == NFKC {
f.compatibility = true
f.info = lookupInfoNFKC
} else {
f.info = lookupInfoNFC
}
f.nextMain = nextDecomposed
if Form(i) == NFC || Form(i) == NFKC {
f.nextMain = nextComposed
f.composing = true
}
}
}
var formTable = []*formInfo{{
form: NFC,
composing: true,
compatibility: false,
info: lookupInfoNFC,
nextMain: nextComposed,
}, {
form: NFD,
composing: false,
compatibility: false,
info: lookupInfoNFC,
nextMain: nextDecomposed,
}, {
form: NFKC,
composing: true,
compatibility: true,
info: lookupInfoNFKC,
nextMain: nextComposed,
}, {
form: NFKD,
composing: false,
compatibility: true,
info: lookupInfoNFKC,
nextMain: nextDecomposed,
}}
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
// unexpected behavior for the user. For example, in NFD, there is a boundary

View File

@@ -1,54 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build test
package norm
import "testing"
func TestProperties(t *testing.T) {
var d runeData
CK := [2]string{"C", "K"}
for k, r := 1, rune(0); r < 0x2ffff; r++ {
if k < len(testData) && r == testData[k].r {
d = testData[k]
k++
}
s := string(r)
for j, p := range []Properties{NFC.PropertiesString(s), NFKC.PropertiesString(s)} {
f := d.f[j]
if p.CCC() != d.ccc {
t.Errorf("%U: ccc(%s): was %d; want %d %X", r, CK[j], p.CCC(), d.ccc, p.index)
}
if p.isYesC() != (f.qc == Yes) {
t.Errorf("%U: YesC(%s): was %v; want %v", r, CK[j], p.isYesC(), f.qc == Yes)
}
if p.combinesBackward() != (f.qc == Maybe) {
t.Errorf("%U: combines backwards(%s): was %v; want %v", r, CK[j], p.combinesBackward(), f.qc == Maybe)
}
if p.nLeadingNonStarters() != d.nLead {
t.Errorf("%U: nLead(%s): was %d; want %d %#v %#v", r, CK[j], p.nLeadingNonStarters(), d.nLead, p, d)
}
if p.nTrailingNonStarters() != d.nTrail {
t.Errorf("%U: nTrail(%s): was %d; want %d %#v %#v", r, CK[j], p.nTrailingNonStarters(), d.nTrail, p, d)
}
if p.combinesForward() != f.combinesForward {
t.Errorf("%U: combines forward(%s): was %v; want %v %#v", r, CK[j], p.combinesForward(), f.combinesForward, p)
}
// Skip Hangul as it is algorithmically computed.
if r >= hangulBase && r < hangulEnd {
continue
}
if p.hasDecomposition() {
if has := f.decomposition != ""; !has {
t.Errorf("%U: hasDecomposition(%s): was %v; want %v", r, CK[j], p.hasDecomposition(), has)
}
if string(p.Decomposition()) != f.decomposition {
t.Errorf("%U: decomp(%s): was %+q; want %+q", r, CK[j], p.Decomposition(), f.decomposition)
}
}
}
}
}

View File

@@ -1,98 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"strings"
"testing"
)
func doIterNorm(f Form, s string) []byte {
acc := []byte{}
i := Iter{}
i.InitString(f, s)
for !i.Done() {
acc = append(acc, i.Next()...)
}
return acc
}
func TestIterNext(t *testing.T) {
runNormTests(t, "IterNext", func(f Form, out []byte, s string) []byte {
return doIterNorm(f, string(append(out, s...)))
})
}
type SegmentTest struct {
in string
out []string
}
var segmentTests = []SegmentTest{
{"\u1E0A\u0323a", []string{"\x44\u0323\u0307", "a", ""}},
{rep('a', segSize), append(strings.Split(rep('a', segSize), ""), "")},
{rep('a', segSize+2), append(strings.Split(rep('a', segSize+2), ""), "")},
{rep('a', segSize) + "\u0300aa",
append(strings.Split(rep('a', segSize-1), ""), "a\u0300", "a", "a", "")},
// U+0f73 is NOT treated as a starter as it is a modifier
{"a" + grave(29) + "\u0f73", []string{"a" + grave(29), cgj + "\u0f73"}},
{"a\u0f73", []string{"a\u0f73"}},
// U+ff9e is treated as a non-starter.
// TODO: should we? Note that this will only affect iteration, as whether
// or not we do so does not affect the normalization output and will either
// way result in consistent iteration output.
{"a" + grave(30) + "\uff9e", []string{"a" + grave(30), cgj + "\uff9e"}},
{"a\uff9e", []string{"a\uff9e"}},
}
var segmentTestsK = []SegmentTest{
{"\u3332", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u3099", ""}},
// last segment of multi-segment decomposition needs normalization
{"\u3332\u093C", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u093C\u3099", ""}},
{"\u320E", []string{"\x28", "\uAC00", "\x29"}},
// last segment should be copied to start of buffer.
{"\ufdfa", []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645", ""}},
{"\ufdfa" + grave(30), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), ""}},
{"\uFDFA" + grave(64), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), cgj + grave(30), cgj + grave(4), ""}},
// Hangul and Jamo are grouped togeter.
{"\uAC00", []string{"\u1100\u1161", ""}},
{"\uAC01", []string{"\u1100\u1161\u11A8", ""}},
{"\u1100\u1161", []string{"\u1100\u1161", ""}},
}
// Note that, by design, segmentation is equal for composing and decomposing forms.
func TestIterSegmentation(t *testing.T) {
segmentTest(t, "SegmentTestD", NFD, segmentTests)
segmentTest(t, "SegmentTestC", NFC, segmentTests)
segmentTest(t, "SegmentTestKD", NFKD, segmentTestsK)
segmentTest(t, "SegmentTestKC", NFKC, segmentTestsK)
}
func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) {
iter := Iter{}
for i, tt := range tests {
iter.InitString(f, tt.in)
for j, seg := range tt.out {
if seg == "" {
if !iter.Done() {
res := string(iter.Next())
t.Errorf(`%s:%d:%d: expected Done()==true, found segment %+q`, name, i, j, res)
}
continue
}
if iter.Done() {
t.Errorf("%s:%d:%d: Done()==true, want false", name, i, j)
}
seg = f.String(seg)
if res := string(iter.Next()); res != seg {
t.Errorf(`%s:%d:%d" segment was %+q (%d); want %+q (%d)`, name, i, j, pc(res), len(res), pc(seg), len(seg))
}
}
}
}

View File

@@ -35,12 +35,9 @@ func main() {
computeNonStarterCounts()
verifyComputed()
printChars()
if *test {
testDerived()
printTestdata()
} else {
makeTables()
}
testDerived()
printTestdata()
makeTables()
}
var (
@@ -602,6 +599,7 @@ func printCharInfoTables(w io.Writer) int {
}
index := normalDecomp
nTrail := chars[r].nTrailingNonStarters
nLead := chars[r].nLeadingNonStarters
if tccc > 0 || lccc > 0 || nTrail > 0 {
tccc <<= 2
tccc |= nTrail
@@ -612,7 +610,7 @@ func printCharInfoTables(w io.Writer) int {
index = firstCCC
}
}
if lccc > 0 {
if lccc > 0 || nLead > 0 {
s += string([]byte{lccc})
if index == firstCCC {
log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r)

View File

@@ -1,14 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm_test
import (
"testing"
)
func TestPlaceHolder(t *testing.T) {
// Does nothing, just allows the Makefile to be canonical
// while waiting for the package itself to be written.
}

View File

@@ -2,13 +2,18 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Note: the file data_test.go that is generated should not be checked in.
//go:generate go run maketables.go triegen.go
//go:generate go run maketables.go triegen.go -test
//go:generate go test -tags test
// Package norm contains types and functions for normalizing Unicode strings.
package norm // import "golang.org/x/text/unicode/norm"
import "unicode/utf8"
import (
"unicode/utf8"
"golang.org/x/text/transform"
)
// A Form denotes a canonical representation of Unicode code points.
// The Unicode-defined normalization and equivalence forms are:
@@ -263,6 +268,34 @@ func (f Form) QuickSpan(b []byte) int {
return n
}
// Span implements transform.SpanningTransformer. It returns a boundary n such
// that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
if n < len(b) {
if !ok {
err = transform.ErrEndOfSpan
} else {
err = transform.ErrShortSrc
}
}
return n, err
}
// SpanString returns a boundary n such that s[0:n] == f(s[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
if n < len(s) {
if !ok {
err = transform.ErrEndOfSpan
} else {
err = transform.ErrShortSrc
}
}
return n, err
}
// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
// whether any non-normalized parts were found. If atEOF is false, n will
// not point past the last segment if this segment might be become
@@ -321,7 +354,7 @@ func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool)
return lastSegStart, false
}
// QuickSpanString returns a boundary n such that b[0:n] == f(s[0:n]).
// QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) QuickSpanString(s string) int {
n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
@@ -344,7 +377,6 @@ func (f Form) firstBoundary(src input, nsrc int) int {
// We should call ss.first here, but we can't as the first rune is
// skipped already. This means FirstBoundary can't really determine
// CGJ insertion points correctly. Luckily it doesn't have to.
// TODO: consider adding NextBoundary
for {
info := fd.info(src, i)
if info.size == 0 {
@@ -369,6 +401,56 @@ func (f Form) FirstBoundaryInString(s string) int {
return f.firstBoundary(inputString(s), len(s))
}
// NextBoundary reports the index of the boundary between the first and next
// segment in b or -1 if atEOF is false and there are not enough bytes to
// determine this boundary.
func (f Form) NextBoundary(b []byte, atEOF bool) int {
return f.nextBoundary(inputBytes(b), len(b), atEOF)
}
// NextBoundaryInString reports the index of the boundary between the first and
// next segment in b or -1 if atEOF is false and there are not enough bytes to
// determine this boundary.
func (f Form) NextBoundaryInString(s string, atEOF bool) int {
return f.nextBoundary(inputString(s), len(s), atEOF)
}
func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
if nsrc == 0 {
if atEOF {
return 0
}
return -1
}
fd := formTable[f]
info := fd.info(src, 0)
if info.size == 0 {
if atEOF {
return 1
}
return -1
}
ss := streamSafe(0)
ss.first(info)
for i := int(info.size); i < nsrc; i += int(info.size) {
info = fd.info(src, i)
if info.size == 0 {
if atEOF {
return i
}
return -1
}
if s := ss.next(info); s != ssSuccess {
return i
}
}
if !atEOF && !info.BoundaryAfter() && !ss.isMax() {
return -1
}
return nsrc
}
// LastBoundary returns the position i of the last boundary in b
// or -1 if b contains no boundary.
func (f Form) LastBoundary(b []byte) int {

File diff suppressed because it is too large Load Diff

View File

@@ -112,7 +112,6 @@ func (r *normReader) Read(p []byte) (int, error) {
}
}
}
panic("should not reach here")
}
// Reader returns a new reader that implements Read

View File

@@ -1,56 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"bytes"
"fmt"
"testing"
)
var bufSizes = []int{1, 2, 3, 4, 5, 6, 7, 8, 100, 101, 102, 103, 4000, 4001, 4002, 4003}
func readFunc(size int) appendFunc {
return func(f Form, out []byte, s string) []byte {
out = append(out, s...)
r := f.Reader(bytes.NewBuffer(out))
buf := make([]byte, size)
result := []byte{}
for n, err := 0, error(nil); err == nil; {
n, err = r.Read(buf)
result = append(result, buf[:n]...)
}
return result
}
}
func TestReader(t *testing.T) {
for _, s := range bufSizes {
name := fmt.Sprintf("TestReader%d", s)
runNormTests(t, name, readFunc(s))
}
}
func writeFunc(size int) appendFunc {
return func(f Form, out []byte, s string) []byte {
in := append(out, s...)
result := new(bytes.Buffer)
w := f.Writer(result)
buf := make([]byte, size)
for n := 0; len(in) > 0; in = in[n:] {
n = copy(buf, in)
_, _ = w.Write(buf[:n])
}
w.Close()
return result.Bytes()
}
}
func TestWriter(t *testing.T) {
for _, s := range bufSizes {
name := fmt.Sprintf("TestWriter%d", s)
runNormTests(t, name, writeFunc(s))
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,101 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"fmt"
"testing"
"golang.org/x/text/transform"
)
func TestTransform(t *testing.T) {
tests := []struct {
f Form
in, out string
eof bool
dstSize int
err error
}{
{NFC, "ab", "ab", true, 2, nil},
{NFC, "qx", "qx", true, 2, nil},
{NFD, "qx", "qx", true, 2, nil},
{NFC, "", "", true, 1, nil},
{NFD, "", "", true, 1, nil},
{NFC, "", "", false, 1, nil},
{NFD, "", "", false, 1, nil},
// Normalized segment does not fit in destination.
{NFD, "ö", "", true, 1, transform.ErrShortDst},
{NFD, "ö", "", true, 2, transform.ErrShortDst},
// As an artifact of the algorithm, only full segments are written.
// This is not strictly required, and some bytes could be written.
// In practice, for Transform to not block, the destination buffer
// should be at least MaxSegmentSize to work anyway and these edge
// conditions will be relatively rare.
{NFC, "ab", "", true, 1, transform.ErrShortDst},
// This is even true for inert runes.
{NFC, "qx", "", true, 1, transform.ErrShortDst},
{NFC, "a\u0300abc", "\u00e0a", true, 4, transform.ErrShortDst},
// We cannot write a segment if succesive runes could still change the result.
{NFD, "ö", "", false, 3, transform.ErrShortSrc},
{NFC, "a\u0300", "", false, 4, transform.ErrShortSrc},
{NFD, "a\u0300", "", false, 4, transform.ErrShortSrc},
{NFC, "ö", "", false, 3, transform.ErrShortSrc},
{NFC, "a\u0300", "", true, 1, transform.ErrShortDst},
// Theoretically could fit, but won't due to simplified checks.
{NFC, "a\u0300", "", true, 2, transform.ErrShortDst},
{NFC, "a\u0300", "", true, 3, transform.ErrShortDst},
{NFC, "a\u0300", "\u00e0", true, 4, nil},
{NFD, "öa\u0300", "o\u0308", false, 8, transform.ErrShortSrc},
{NFD, "öa\u0300ö", "o\u0308a\u0300", true, 8, transform.ErrShortDst},
{NFD, "öa\u0300ö", "o\u0308a\u0300", false, 12, transform.ErrShortSrc},
// Illegal input is copied verbatim.
{NFD, "\xbd\xb2=\xbc ", "\xbd\xb2=\xbc ", true, 8, nil},
}
b := make([]byte, 100)
for i, tt := range tests {
nDst, _, err := tt.f.Transform(b[:tt.dstSize], []byte(tt.in), tt.eof)
out := string(b[:nDst])
if out != tt.out || err != tt.err {
t.Errorf("%d: was %+q (%v); want %+q (%v)", i, out, err, tt.out, tt.err)
}
if want := tt.f.String(tt.in)[:nDst]; want != out {
t.Errorf("%d: incorect normalization: was %+q; want %+q", i, out, want)
}
}
}
var transBufSizes = []int{
MaxTransformChunkSize,
3 * MaxTransformChunkSize / 2,
2 * MaxTransformChunkSize,
3 * MaxTransformChunkSize,
100 * MaxTransformChunkSize,
}
func doTransNorm(f Form, buf []byte, b []byte) []byte {
acc := []byte{}
for p := 0; p < len(b); {
nd, ns, _ := f.Transform(buf[:], b[p:], true)
p += ns
acc = append(acc, buf[:nd]...)
}
return acc
}
func TestTransformNorm(t *testing.T) {
for _, sz := range transBufSizes {
buf := make([]byte, sz)
runNormTests(t, fmt.Sprintf("Transform:%d", sz), func(f Form, out []byte, s string) []byte {
return doTransNorm(f, buf, append(out, s...))
})
}
}

View File

@@ -1,275 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"bufio"
"bytes"
"fmt"
"regexp"
"runtime"
"strconv"
"strings"
"sync"
"testing"
"time"
"unicode/utf8"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
)
var once sync.Once
func skipShort(t *testing.T) {
testtext.SkipIfNotLong(t)
once.Do(func() { loadTestData(t) })
}
// This regression test runs the test set in NormalizationTest.txt
// (taken from http://www.unicode.org/Public/<unicode.Version>/ucd/).
//
// NormalizationTest.txt has form:
// @Part0 # Specific cases
// #
// 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE
// 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW
//
// Each test has 5 columns (c1, c2, c3, c4, c5), where
// (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1))
//
// CONFORMANCE:
// 1. The following invariants must be true for all conformant implementations
//
// NFC
// c2 == NFC(c1) == NFC(c2) == NFC(c3)
// c4 == NFC(c4) == NFC(c5)
//
// NFD
// c3 == NFD(c1) == NFD(c2) == NFD(c3)
// c5 == NFD(c4) == NFD(c5)
//
// NFKC
// c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
//
// NFKD
// c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
//
// 2. For every code point X assigned in this version of Unicode that is not
// specifically listed in Part 1, the following invariants must be true
// for all conformant implementations:
//
// X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
//
// Column types.
const (
cRaw = iota
cNFC
cNFD
cNFKC
cNFKD
cMaxColumns
)
// Holds data from NormalizationTest.txt
var part []Part
type Part struct {
name string
number int
tests []Test
}
type Test struct {
name string
partnr int
number int
r rune // used for character by character test
cols [cMaxColumns]string // Each has 5 entries, see below.
}
func (t Test) Name() string {
if t.number < 0 {
return part[t.partnr].name
}
return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number)
}
var partRe = regexp.MustCompile(`@Part(\d) # (.*)$`)
var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)$`)
var counter int
// Load the data form NormalizationTest.txt
func loadTestData(t *testing.T) {
f := gen.OpenUCDFile("NormalizationTest.txt")
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 || line[0] == '#' {
continue
}
m := partRe.FindStringSubmatch(line)
if m != nil {
if len(m) < 3 {
t.Fatal("Failed to parse Part: ", line)
}
i, err := strconv.Atoi(m[1])
if err != nil {
t.Fatal(err)
}
name := m[2]
part = append(part, Part{name: name[:len(name)-1], number: i})
continue
}
m = testRe.FindStringSubmatch(line)
if m == nil || len(m) < 7 {
t.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
}
test := Test{name: m[6], partnr: len(part) - 1, number: counter}
counter++
for j := 1; j < len(m)-1; j++ {
for _, split := range strings.Split(m[j], " ") {
r, err := strconv.ParseUint(split, 16, 64)
if err != nil {
t.Fatal(err)
}
if test.r == 0 {
// save for CharacterByCharacterTests
test.r = rune(r)
}
var buf [utf8.UTFMax]byte
sz := utf8.EncodeRune(buf[:], rune(r))
test.cols[j-1] += string(buf[:sz])
}
}
part := &part[len(part)-1]
part.tests = append(part.tests, test)
}
if scanner.Err() != nil {
t.Fatal(scanner.Err())
}
}
func cmpResult(t *testing.T, tc *Test, name string, f Form, gold, test, result string) {
if gold != result {
t.Errorf("%s:%s: %s(%+q)=%+q; want %+q: %s",
tc.Name(), name, fstr[f], test, result, gold, tc.name)
}
}
func cmpIsNormal(t *testing.T, tc *Test, name string, f Form, test string, result, want bool) {
if result != want {
t.Errorf("%s:%s: %s(%+q)=%v; want %v", tc.Name(), name, fstr[f], test, result, want)
}
}
func doTest(t *testing.T, tc *Test, f Form, gold, test string) {
testb := []byte(test)
result := f.Bytes(testb)
cmpResult(t, tc, "Bytes", f, gold, test, string(result))
sresult := f.String(test)
cmpResult(t, tc, "String", f, gold, test, sresult)
acc := []byte{}
i := Iter{}
i.InitString(f, test)
for !i.Done() {
acc = append(acc, i.Next()...)
}
cmpResult(t, tc, "Iter.Next", f, gold, test, string(acc))
buf := make([]byte, 128)
acc = nil
for p := 0; p < len(testb); {
nDst, nSrc, _ := f.Transform(buf, testb[p:], true)
acc = append(acc, buf[:nDst]...)
p += nSrc
}
cmpResult(t, tc, "Transform", f, gold, test, string(acc))
for i := range test {
out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...)
cmpResult(t, tc, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out))
}
cmpIsNormal(t, tc, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold)
cmpIsNormal(t, tc, "IsNormalString", f, test, f.IsNormalString(test), test == gold)
}
func doConformanceTests(t *testing.T, tc *Test, partn int) {
for i := 0; i <= 2; i++ {
doTest(t, tc, NFC, tc.cols[1], tc.cols[i])
doTest(t, tc, NFD, tc.cols[2], tc.cols[i])
doTest(t, tc, NFKC, tc.cols[3], tc.cols[i])
doTest(t, tc, NFKD, tc.cols[4], tc.cols[i])
}
for i := 3; i <= 4; i++ {
doTest(t, tc, NFC, tc.cols[3], tc.cols[i])
doTest(t, tc, NFD, tc.cols[4], tc.cols[i])
doTest(t, tc, NFKC, tc.cols[3], tc.cols[i])
doTest(t, tc, NFKD, tc.cols[4], tc.cols[i])
}
}
func TestCharacterByCharacter(t *testing.T) {
skipShort(t)
tests := part[1].tests
var last rune = 0
for i := 0; i <= len(tests); i++ { // last one is special case
var r rune
if i == len(tests) {
r = 0x2FA1E // Don't have to go to 0x10FFFF
} else {
r = tests[i].r
}
for last++; last < r; last++ {
// Check all characters that were not explicitly listed in the test.
tc := &Test{partnr: 1, number: -1}
char := string(last)
doTest(t, tc, NFC, char, char)
doTest(t, tc, NFD, char, char)
doTest(t, tc, NFKC, char, char)
doTest(t, tc, NFKD, char, char)
}
if i < len(tests) {
doConformanceTests(t, &tests[i], 1)
}
}
}
func TestStandardTests(t *testing.T) {
skipShort(t)
for _, j := range []int{0, 2, 3} {
for _, test := range part[j].tests {
doConformanceTests(t, &test, j)
}
}
}
// TestPerformance verifies that normalization is O(n). If any of the
// code does not properly check for maxCombiningChars, normalization
// may exhibit O(n**2) behavior.
func TestPerformance(t *testing.T) {
skipShort(t)
runtime.GOMAXPROCS(2)
success := make(chan bool, 1)
go func() {
buf := bytes.Repeat([]byte("\u035D"), 1024*1024)
buf = append(buf, "\u035B"...)
NFC.Append(nil, buf...)
success <- true
}()
timeout := time.After(1 * time.Second)
select {
case <-success:
// test completed before the timeout
case <-timeout:
t.Errorf(`unexpectedly long time to complete PerformanceTest`)
}
}