Update all deps to latest version

2014-07-23 08:31:36 +02:00
parent 08ca9f9378
commit 544fea51b0
26 changed files with 1197 additions and 808 deletions
--- a/Godeps/_workspace/src/code.google.com/p/go.text/transform/transform.go
+++ b/Godeps/_workspace/src/code.google.com/p/go.text/transform/transform.go
@@ -9,6 +9,7 @@
 package transform

 import (
+	"bytes"
 	"errors"
 	"io"
 	"unicode/utf8"
@@ -127,7 +128,7 @@ func (r *Reader) Read(p []byte) (int, error) {
 				// cannot read more bytes into src.
 				r.transformComplete = r.err != nil
 				continue
-			case err == ErrShortDst && r.dst1 != 0:
+			case err == ErrShortDst && (r.dst1 != 0 || n != 0):
 				// Make room in dst by copying out, and try again.
 				continue
 			case err == ErrShortSrc && r.src1-r.src0 != len(r.src) && r.err == nil:
@@ -210,7 +211,7 @@ func (w *Writer) Write(data []byte) (n int, err error) {
 			n += nSrc
 		}
 		switch {
-		case err == ErrShortDst && nDst > 0:
+		case err == ErrShortDst && (nDst > 0 || nSrc > 0):
 		case err == ErrShortSrc && len(src) < len(w.src):
 			m := copy(w.src, src)
 			// If w.n > 0, bytes from data were already copied to w.src and n
@@ -467,30 +468,125 @@ func (t removeF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err err
 	return
 }

-// Bytes returns a new byte slice with the result of converting b using t.
-// If any unrecoverable error occurs it returns nil.
-func Bytes(t Transformer, b []byte) []byte {
-	out := make([]byte, len(b))
-	n := 0
-	for {
-		nDst, nSrc, err := t.Transform(out[n:], b, true)
-		n += nDst
-		if err == nil {
-			return out[:n]
-		} else if err != ErrShortDst {
-			return nil
-		}
-		b = b[nSrc:]
+// grow returns a new []byte that is longer than b, and copies the first n bytes
+// of b to the start of the new slice.
+func grow(b []byte, n int) []byte {
+	m := len(b)
+	if m <= 256 {
+		m *= 2
+	} else {
+		m += m >> 1
+	}
+	buf := make([]byte, m)
+	copy(buf, b[:n])
+	return buf
+}

-		// Grow the destination buffer.
-		sz := len(out)
-		if sz <= 256 {
-			sz *= 2
-		} else {
-			sz += sz >> 1
+const initialBufSize = 128
+
+// String returns a string with the result of converting s[:n] using t, where
+// n <= len(s). If err == nil, n will be len(s).
+func String(t Transformer, s string) (result string, n int, err error) {
+	if s == "" {
+		return "", 0, nil
+	}
+
+	// Allocate only once. Note that both dst and src escape when passed to
+	// Transform.
+	buf := [2 * initialBufSize]byte{}
+	dst := buf[:initialBufSize:initialBufSize]
+	src := buf[initialBufSize : 2*initialBufSize]
+
+	// Avoid allocation if the transformed string is identical to the original.
+	// After this loop, pDst will point to the furthest point in s for which it
+	// could be detected that t gives equal results, src[:nSrc] will
+	// indicated the last processed chunk of s for which the output is not equal
+	// and dst[:nDst] will be the transform of this chunk.
+	var nDst, nSrc int
+	pDst := 0 // Used as index in both src and dst in this loop.
+	for {
+		n := copy(src, s[pDst:])
+		nDst, nSrc, err = t.Transform(dst, src[:n], pDst+n == len(s))
+
+		// Note 1: we will not enter the loop with pDst == len(s) and we will
+		// not end the loop with it either. So if nSrc is 0, this means there is
+		// some kind of error from which we cannot recover given the current
+		// buffer sizes. We will give up in this case.
+		// Note 2: it is not entirely correct to simply do a bytes.Equal as
+		// a Transformer may buffer internally. It will work in most cases,
+		// though, and no harm is done if it doesn't work.
+		// TODO:  let transformers implement an optional Spanner interface, akin
+		// to norm's QuickSpan. This would even allow us to avoid any allocation.
+		if nSrc == 0 || !bytes.Equal(dst[:nDst], src[:nSrc]) {
+			break
+		}
+
+		if pDst += nDst; pDst == len(s) {
+			return s, pDst, nil
+		}
+	}
+
+	// Move the bytes seen so far to dst.
+	pSrc := pDst + nSrc
+	if pDst+nDst <= initialBufSize {
+		copy(dst[pDst:], dst[:nDst])
+	} else {
+		b := make([]byte, len(s)+nDst-nSrc)
+		copy(b[pDst:], dst[:nDst])
+		dst = b
+	}
+	copy(dst, s[:pDst])
+	pDst += nDst
+
+	if err != nil && err != ErrShortDst && err != ErrShortSrc {
+		return string(dst[:pDst]), pSrc, err
+	}
+
+	// Complete the string with the remainder.
+	for {
+		n := copy(src, s[pSrc:])
+		nDst, nSrc, err = t.Transform(dst[pDst:], src[:n], pSrc+n == len(s))
+		pDst += nDst
+		pSrc += nSrc
+
+		switch err {
+		case nil:
+			if pSrc == len(s) {
+				return string(dst[:pDst]), pSrc, nil
+			}
+		case ErrShortDst:
+			// Do not grow as long as we can make progress. This may avoid
+			// excessive allocations.
+			if nDst == 0 {
+				dst = grow(dst, pDst)
+			}
+		case ErrShortSrc:
+			if nSrc == 0 {
+				src = grow(src, 0)
+			}
+		default:
+			return string(dst[:pDst]), pSrc, err
+		}
+	}
+}
+
+// Bytes returns a new byte slice with the result of converting b[:n] using t,
+// where n <= len(b). If err == nil, n will be len(b).
+func Bytes(t Transformer, b []byte) (result []byte, n int, err error) {
+	dst := make([]byte, len(b))
+	pDst, pSrc := 0, 0
+	for {
+		nDst, nSrc, err := t.Transform(dst[pDst:], b[pSrc:], true)
+		pDst += nDst
+		pSrc += nSrc
+		if err != ErrShortDst {
+			return dst[:pDst], pSrc, err
+		}
+
+		// Grow the destination buffer, but do not grow as long as we can make
+		// progress. This may avoid excessive allocations.
+		if nDst == 0 {
+			dst = grow(dst, pDst)
 		}
-		out2 := make([]byte, sz)
-		copy(out2, out[:n])
-		out = out2
 	}
 }
--- a/Godeps/_workspace/src/code.google.com/p/go.text/transform/transform_test.go
+++ b/Godeps/_workspace/src/code.google.com/p/go.text/transform/transform_test.go
@@ -12,6 +12,7 @@ import (
 	"strconv"
 	"strings"
 	"testing"
+	"time"
 	"unicode/utf8"
 )

@@ -132,6 +133,43 @@ func (e rleEncode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err e
 	return nDst, nSrc, nil
 }

+// trickler consumes all input bytes, but writes a single byte at a time to dst.
+type trickler []byte
+
+func (t *trickler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	*t = append(*t, src...)
+	if len(*t) == 0 {
+		return 0, 0, nil
+	}
+	if len(dst) == 0 {
+		return 0, len(src), ErrShortDst
+	}
+	dst[0] = (*t)[0]
+	*t = (*t)[1:]
+	if len(*t) > 0 {
+		err = ErrShortDst
+	}
+	return 1, len(src), err
+}
+
+// delayedTrickler is like trickler, but delays writing output to dst. This is
+// highly unlikely to be relevant in practice, but it seems like a good idea
+// to have some tolerance as long as progress can be detected.
+type delayedTrickler []byte
+
+func (t *delayedTrickler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	if len(*t) > 0 && len(dst) > 0 {
+		dst[0] = (*t)[0]
+		*t = (*t)[1:]
+		nDst = 1
+	}
+	*t = append(*t, src...)
+	if len(*t) > 0 {
+		err = ErrShortDst
+	}
+	return nDst, len(src), err
+}
+
 type testCase struct {
 	desc     string
 	t        Transformer
@@ -170,6 +208,15 @@ func (c chain) String() string {
 }

 var testCases = []testCase{
+	{
+		desc:    "empty",
+		t:       lowerCaseASCII{},
+		src:     "",
+		dstSize: 100,
+		srcSize: 100,
+		wantStr: "",
+	},
+
 	{
 		desc:    "basic",
 		t:       lowerCaseASCII{},
@@ -378,6 +425,24 @@ var testCases = []testCase{
 		ioSize:  10,
 		wantStr: "4a6b2b4c4d1d",
 	},
+
+	{
+		desc:    "trickler",
+		t:       &trickler{},
+		src:     "abcdefghijklm",
+		dstSize: 3,
+		srcSize: 15,
+		wantStr: "abcdefghijklm",
+	},
+
+	{
+		desc:    "delayedTrickler",
+		t:       &delayedTrickler{},
+		src:     "abcdefghijklm",
+		dstSize: 3,
+		srcSize: 15,
+		wantStr: "abcdefghijklm",
+	},
 }

 func TestReader(t *testing.T) {
@@ -685,7 +750,7 @@ func doTransform(tc testCase) (res string, iter int, err error) {
 		switch {
 		case err == nil && len(in) != 0:
 		case err == ErrShortSrc && nSrc > 0:
-		case err == ErrShortDst && nDst > 0:
+		case err == ErrShortDst && (nDst > 0 || nSrc > 0):
 		default:
 			return string(out), iter, err
 		}
@@ -875,27 +940,136 @@ func TestRemoveFunc(t *testing.T) {
 	}
 }

-func TestBytes(t *testing.T) {
+func testString(t *testing.T, f func(Transformer, string) (string, int, error)) {
 	for _, tt := range append(testCases, chainTests()...) {
 		if tt.desc == "allowStutter = true" {
 			// We don't have control over the buffer size, so we eliminate tests
 			// that depend on a specific buffer size being set.
 			continue
 		}
-		got := Bytes(tt.t, []byte(tt.src))
-		if tt.wantErr != nil {
-			if tt.wantErr != ErrShortDst && tt.wantErr != ErrShortSrc {
-				// Bytes should return nil for non-recoverable errors.
-				if g, w := (got == nil), (tt.wantErr != nil); g != w {
-					t.Errorf("%s:error: got %v; want %v", tt.desc, g, w)
-				}
-			}
-			// The output strings in the tests that expect an error will
-			// almost certainly not be the same as the result of Bytes.
+		reset(tt.t)
+		if tt.wantErr == ErrShortDst || tt.wantErr == ErrShortSrc {
+			// The result string will be different.
 			continue
 		}
-		if string(got) != tt.wantStr {
+		got, n, err := f(tt.t, tt.src)
+		if tt.wantErr != err {
+			t.Errorf("%s:error: got %v; want %v", tt.desc, err, tt.wantErr)
+		}
+		if got, want := err == nil, n == len(tt.src); got != want {
+			t.Errorf("%s:n: got %v; want %v", tt.desc, got, want)
+		}
+		if got != tt.wantStr {
 			t.Errorf("%s:string: got %q; want %q", tt.desc, got, tt.wantStr)
 		}
 	}
 }
+
+func TestBytes(t *testing.T) {
+	testString(t, func(z Transformer, s string) (string, int, error) {
+		b, n, err := Bytes(z, []byte(s))
+		return string(b), n, err
+	})
+}
+
+func TestString(t *testing.T) {
+	testString(t, String)
+
+	// Overrun the internal destination buffer.
+	for i, s := range []string{
+		strings.Repeat("a", initialBufSize-1),
+		strings.Repeat("a", initialBufSize+0),
+		strings.Repeat("a", initialBufSize+1),
+		strings.Repeat("A", initialBufSize-1),
+		strings.Repeat("A", initialBufSize+0),
+		strings.Repeat("A", initialBufSize+1),
+		strings.Repeat("A", 2*initialBufSize-1),
+		strings.Repeat("A", 2*initialBufSize+0),
+		strings.Repeat("A", 2*initialBufSize+1),
+		strings.Repeat("a", initialBufSize-2) + "A",
+		strings.Repeat("a", initialBufSize-1) + "A",
+		strings.Repeat("a", initialBufSize+0) + "A",
+		strings.Repeat("a", initialBufSize+1) + "A",
+	} {
+		got, _, _ := String(lowerCaseASCII{}, s)
+		if want := strings.ToLower(s); got != want {
+			t.Errorf("%d:dst buffer test: got %s (%d); want %s (%d)", i, got, len(got), want, len(want))
+		}
+	}
+
+	// Overrun the internal source buffer.
+	for i, s := range []string{
+		strings.Repeat("a", initialBufSize-1),
+		strings.Repeat("a", initialBufSize+0),
+		strings.Repeat("a", initialBufSize+1),
+		strings.Repeat("a", 2*initialBufSize+1),
+		strings.Repeat("a", 2*initialBufSize+0),
+		strings.Repeat("a", 2*initialBufSize+1),
+	} {
+		got, _, _ := String(rleEncode{}, s)
+		if want := fmt.Sprintf("%da", len(s)); got != want {
+			t.Errorf("%d:src buffer test: got %s (%d); want %s (%d)", i, got, len(got), want, len(want))
+		}
+	}
+
+	// Test allocations for non-changing strings.
+	// Note we still need to allocate a single buffer.
+	for i, s := range []string{
+		"",
+		"123",
+		"123456789",
+		strings.Repeat("a", initialBufSize),
+		strings.Repeat("a", 10*initialBufSize),
+	} {
+		if n := testing.AllocsPerRun(5, func() { String(&lowerCaseASCII{}, s) }); n > 1 {
+			t.Errorf("%d: #allocs was %f; want 1", i, n)
+		}
+	}
+}
+
+// TestBytesAllocation tests that buffer growth stays limited with the trickler
+// transformer, which behaves oddly but within spec. In case buffer growth is
+// not correctly handled, the test will either panic with a failed allocation or
+// thrash. To ensure the tests terminate under the last condition, we time out
+// after some sufficiently long period of time.
+func TestBytesAllocation(t *testing.T) {
+	done := make(chan bool)
+	go func() {
+		in := bytes.Repeat([]byte{'a'}, 1000)
+		tr := trickler(make([]byte, 1))
+		Bytes(&tr, in)
+		done <- true
+	}()
+	select {
+	case <-done:
+	case <-time.After(3 * time.Second):
+		t.Error("time out, likely due to excessive allocation")
+	}
+}
+
+// TestStringAllocation tests that buffer growth stays limited with the trickler
+// transformer, which behaves oddly but within spec. In case buffer growth is
+// not correctly handled, the test will either panic with a failed allocation or
+// thrash. To ensure the tests terminate under the last condition, we time out
+// after some sufficiently long period of time.
+func TestStringAllocation(t *testing.T) {
+	done := make(chan bool)
+	go func() {
+		in := strings.Repeat("a", 1000)
+		tr := trickler(make([]byte, 1))
+		String(&tr, in)
+		done <- true
+	}()
+	select {
+	case <-done:
+	case <-time.After(3 * time.Second):
+		t.Error("time out, likely due to excessive allocation")
+	}
+}
+
+func BenchmarkStringLower(b *testing.B) {
+	in := strings.Repeat("a", 4096)
+	for i := 0; i < b.N; i++ {
+		String(&lowerCaseASCII{}, in)
+	}
+}