vendor: Mega update all dependencies

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4080
This commit is contained in:
Jakob Borg
2017-04-05 14:34:41 +00:00
parent 49c1527724
commit a1bcc15458
1354 changed files with 55066 additions and 797850 deletions

View File

@@ -1,15 +0,0 @@
# This is the official list of Snappy-Go authors for copyright purposes.
# This file is distinct from the CONTRIBUTORS files.
# See the latter for an explanation.
# Names should be added to this file as
# Name or Organization <email address>
# The email address is not required for organizations.
# Please keep the list sorted.
Damian Gryski <dgryski@gmail.com>
Google Inc.
Jan Mercl <0xjnml@gmail.com>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Sebastien Binet <seb.binet@gmail.com>

View File

@@ -1,37 +0,0 @@
# This is the official list of people who can contribute
# (and typically have contributed) code to the Snappy-Go repository.
# The AUTHORS file lists the copyright holders; this file
# lists people. For example, Google employees are listed here
# but not in AUTHORS, because Google holds the copyright.
#
# The submission process automatically checks to make sure
# that people submitting code are listed in this file (by email address).
#
# Names should be added to this file only after verifying that
# the individual or the individual's organization has agreed to
# the appropriate Contributor License Agreement, found here:
#
# http://code.google.com/legal/individual-cla-v1.0.html
# http://code.google.com/legal/corporate-cla-v1.0.html
#
# The agreement for individuals can be filled out on the web.
#
# When adding J Random Contributor's name to this file,
# either J's name or J's organization's name should be
# added to the AUTHORS file, depending on whether the
# individual or corporate CLA was used.
# Names should be added to this file like so:
# Name <email address>
# Please keep the list sorted.
Damian Gryski <dgryski@gmail.com>
Jan Mercl <0xjnml@gmail.com>
Kai Backman <kaib@golang.org>
Marc-Antoine Ruel <maruel@chromium.org>
Nigel Tao <nigeltao@golang.org>
Rob Pike <r@golang.org>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Russ Cox <rsc@golang.org>
Sebastien Binet <seb.binet@gmail.com>

View File

@@ -1,7 +0,0 @@
The Snappy compression format in the Go programming language.
To download and install from source:
$ go get github.com/golang/snappy
Unless otherwise noted, the Snappy-Go source files are distributed
under the BSD-style license found in the LICENSE file.

View File

@@ -1,6 +1,9 @@
/*
To build the snappytool binary:
g++ main.cpp /usr/lib/libsnappy.a -o snappytool
or, if you have built the C++ snappy library from source:
g++ main.cpp /path/to/your/snappy/.libs/libsnappy.a -o snappytool
after running "make" from your snappy checkout directory.
*/
#include <errno.h>

View File

@@ -18,7 +18,6 @@ var (
// ErrUnsupported reports that the input isn't supported.
ErrUnsupported = errors.New("snappy: unsupported input")
errUnsupportedCopy4Tag = errors.New("snappy: unsupported COPY_4 tag")
errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
)
@@ -46,7 +45,6 @@ func decodedLen(src []byte) (blockLen, headerLen int, err error) {
const (
decodeErrCodeCorrupt = 1
decodeErrCodeUnsupportedLiteralLength = 2
decodeErrCodeUnsupportedCopy4Tag = 3
)
// Decode returns the decoded form of src. The returned slice may be a sub-
@@ -69,8 +67,6 @@ func Decode(dst, src []byte) ([]byte, error) {
return dst, nil
case decodeErrCodeUnsupportedLiteralLength:
return nil, errUnsupportedLiteralLength
case decodeErrCodeUnsupportedCopy4Tag:
return nil, errUnsupportedCopy4Tag
}
return nil, ErrCorrupt
}
@@ -108,9 +104,9 @@ func (r *Reader) Reset(reader io.Reader) {
r.readHeader = false
}
func (r *Reader) readFull(p []byte) (ok bool) {
func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
if _, r.err = io.ReadFull(r.r, p); r.err != nil {
if r.err == io.ErrUnexpectedEOF {
if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
r.err = ErrCorrupt
}
return false
@@ -129,7 +125,7 @@ func (r *Reader) Read(p []byte) (int, error) {
r.i += n
return n, nil
}
if !r.readFull(r.buf[:4]) {
if !r.readFull(r.buf[:4], true) {
return 0, r.err
}
chunkType := r.buf[0]
@@ -156,7 +152,7 @@ func (r *Reader) Read(p []byte) (int, error) {
return 0, r.err
}
buf := r.buf[:chunkLen]
if !r.readFull(buf) {
if !r.readFull(buf, false) {
return 0, r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
@@ -189,13 +185,17 @@ func (r *Reader) Read(p []byte) (int, error) {
return 0, r.err
}
buf := r.buf[:checksumSize]
if !r.readFull(buf) {
if !r.readFull(buf, false) {
return 0, r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
// Read directly into r.decoded instead of via r.buf.
n := chunkLen - checksumSize
if !r.readFull(r.decoded[:n]) {
if n > len(r.decoded) {
r.err = ErrCorrupt
return 0, r.err
}
if !r.readFull(r.decoded[:n], false) {
return 0, r.err
}
if crc(r.decoded[:n]) != checksum {
@@ -211,7 +211,7 @@ func (r *Reader) Read(p []byte) (int, error) {
r.err = ErrCorrupt
return 0, r.err
}
if !r.readFull(r.buf[:len(magicBody)]) {
if !r.readFull(r.buf[:len(magicBody)], false) {
return 0, r.err
}
for i := 0; i < len(magicBody); i++ {
@@ -230,7 +230,7 @@ func (r *Reader) Read(p []byte) (int, error) {
}
// Section 4.4 Padding (chunk type 0xfe).
// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
if !r.readFull(r.buf[:chunkLen]) {
if !r.readFull(r.buf[:chunkLen], false) {
return 0, r.err
}
}

View File

@@ -2,6 +2,10 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
package snappy
// decode has the same semantics as in decode_other.go.

View File

@@ -2,12 +2,16 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// func decode(dst, src []byte) int
//
// The asm code generally follows the pure Go code in decode_other.go, except
// where marked with a "!!!".
// func decode(dst, src []byte) int
//
// All local variables fit into registers. The non-zero stack size is only to
// spill registers and push args when issuing a CALL. The register allocation:
@@ -222,6 +226,25 @@ tagLit63:
// ----------------------------------------
// The code below handles copy tags.
tagCopy4:
// case tagCopy4:
// s += 5
ADDQ $5, SI
// if uint(s) > uint(len(src)) { etc }
MOVQ SI, BX
SUBQ R11, BX
CMPQ BX, R12
JA errCorrupt
// length = 1 + int(src[s-5])>>2
SHRQ $2, CX
INCQ CX
// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
MOVLQZX -4(SI), DX
JMP doCopy
tagCopy2:
// case tagCopy2:
// s += 3
@@ -237,7 +260,7 @@ tagCopy2:
SHRQ $2, CX
INCQ CX
// offset = int(src[s-2]) | int(src[s-1])<<8
// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
MOVWQZX -2(SI), DX
JMP doCopy
@@ -247,7 +270,7 @@ tagCopy:
// - CX == src[s]
CMPQ BX, $2
JEQ tagCopy2
JA errUC4T
JA tagCopy4
// case tagCopy1:
// s += 2
@@ -259,7 +282,7 @@ tagCopy:
CMPQ BX, R12
JA errCorrupt
// offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
MOVQ CX, DX
ANDQ $0xe0, DX
SHLQ $3, DX
@@ -465,8 +488,3 @@ errCorrupt:
// return decodeErrCodeCorrupt
MOVQ $1, ret+48(FP)
RET
errUC4T:
// return decodeErrCodeUnsupportedCopy4Tag
MOVQ $3, ret+48(FP)
RET

View File

@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64
// +build !amd64 appengine !gc noasm
package snappy
@@ -63,7 +63,7 @@ func decode(dst, src []byte) int {
return decodeErrCodeCorrupt
}
length = 4 + int(src[s-2])>>2&0x7
offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
case tagCopy2:
s += 3
@@ -71,10 +71,15 @@ func decode(dst, src []byte) int {
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-3])>>2
offset = int(src[s-2]) | int(src[s-1])<<8
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
case tagCopy4:
return decodeErrCodeUnsupportedCopy4Tag
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-5])>>2
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
}
if offset <= 0 || d < offset || length > len(dst)-d {

View File

@@ -10,78 +10,11 @@ import (
"io"
)
// maxOffset limits how far copy back-references can go, the same as the C++
// code.
const maxOffset = 1 << 15
// emitLiteral writes a literal chunk and returns the number of bytes written.
func emitLiteral(dst, lit []byte) int {
i, n := 0, uint(len(lit)-1)
switch {
case n < 60:
dst[0] = uint8(n)<<2 | tagLiteral
i = 1
case n < 1<<8:
dst[0] = 60<<2 | tagLiteral
dst[1] = uint8(n)
i = 2
case n < 1<<16:
dst[0] = 61<<2 | tagLiteral
dst[1] = uint8(n)
dst[2] = uint8(n >> 8)
i = 3
case n < 1<<24:
dst[0] = 62<<2 | tagLiteral
dst[1] = uint8(n)
dst[2] = uint8(n >> 8)
dst[3] = uint8(n >> 16)
i = 4
case int64(n) < 1<<32:
dst[0] = 63<<2 | tagLiteral
dst[1] = uint8(n)
dst[2] = uint8(n >> 8)
dst[3] = uint8(n >> 16)
dst[4] = uint8(n >> 24)
i = 5
default:
panic("snappy: source buffer is too long")
}
if copy(dst[i:], lit) != len(lit) {
panic("snappy: destination buffer is too short")
}
return i + len(lit)
}
// emitCopy writes a copy chunk and returns the number of bytes written.
func emitCopy(dst []byte, offset, length int32) int {
i := 0
for length > 0 {
x := length - 4
if 0 <= x && x < 1<<3 && offset < 1<<11 {
dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1
dst[i+1] = uint8(offset)
i += 2
break
}
x = length
if x > 1<<6 {
x = 1 << 6
}
dst[i+0] = uint8(x-1)<<2 | tagCopy2
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
i += 3
length -= x
}
return i
}
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// It is valid to pass a nil dst.
// The dst and src must not overlap. It is valid to pass a nil dst.
func Encode(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
@@ -98,94 +31,43 @@ func Encode(dst, src []byte) []byte {
if len(p) > maxBlockSize {
p, src = p[:maxBlockSize], p[maxBlockSize:]
}
d += encodeBlock(dst[d:], p)
if len(p) < minNonLiteralBlockSize {
d += emitLiteral(dst[d:], p)
} else {
d += encodeBlock(dst[d:], p)
}
}
return dst[:d]
}
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
// inputMargin is the minimum number of extra input bytes to keep, inside
// encodeBlock's inner loop. On some architectures, this margin lets us
// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
// literals can be implemented as a single load to and store from a 16-byte
// register. That literal's actual length can be as short as 1 byte, so this
// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
// the encoding loop will fix up the copy overrun, and this inputMargin ensures
// that we don't overrun the dst and src buffers.
const inputMargin = 16 - 1
// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
// could be encoded with a copy tag. This is the minimum with respect to the
// algorithm used by encodeBlock, not a minimum enforced by the file format.
//
// It also assumes that:
// len(dst) >= MaxEncodedLen(len(src)) &&
// 0 < len(src) && len(src) <= maxBlockSize
func encodeBlock(dst, src []byte) (d int) {
// Return early if src is short.
if len(src) <= 4 {
return emitLiteral(dst, src)
}
// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
const maxTableSize = 1 << 14
shift, tableSize := uint(32-8), 1<<8
for tableSize < maxTableSize && tableSize < len(src) {
shift--
tableSize *= 2
}
var table [maxTableSize]int32
// Iterate over the source bytes.
var (
s int32 // The iterator position.
t int32 // The last position with the same hash as s.
lit int32 // The start position of any pending literal bytes.
// Copied from the C++ snappy implementation:
//
// Heuristic match skipping: If 32 bytes are scanned with no matches
// found, start looking only at every other byte. If 32 more bytes are
// scanned, look at every third byte, etc.. When a match is found,
// immediately go back to looking at every byte. This is a small loss
// (~5% performance, ~0.1% density) for compressible data due to more
// bookkeeping, but for non-compressible data (such as JPEG) it's a
// huge win since the compressor quickly "realizes" the data is
// incompressible and doesn't bother looking for matches everywhere.
//
// The "skip" variable keeps track of how many bytes there are since
// the last match; dividing it by 32 (ie. right-shifting by five) gives
// the number of bytes to move ahead for each iteration.
skip uint32 = 32
)
for uint32(s+3) < uint32(len(src)) { // The uint32 conversions catch overflow from the +3.
// Update the hash table.
b0, b1, b2, b3 := src[s], src[s+1], src[s+2], src[s+3]
h := uint32(b0) | uint32(b1)<<8 | uint32(b2)<<16 | uint32(b3)<<24
p := &table[(h*0x1e35a7bd)>>shift]
// We need to to store values in [-1, inf) in table. To save
// some initialization time, (re)use the table's zero value
// and shift the values against this zero: add 1 on writes,
// subtract 1 on reads.
t, *p = *p-1, s+1
// If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte.
if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] {
s += int32(skip >> 5)
skip++
continue
}
skip = 32
// Otherwise, we have a match. First, emit any pending literal bytes.
if lit != s {
d += emitLiteral(dst[d:], src[lit:s])
}
// Extend the match to be as long as possible.
s0 := s
s, t = s+4, t+4
for int(s) < len(src) && src[s] == src[t] {
s++
t++
}
// Emit the copied bytes.
d += emitCopy(dst[d:], s-t, s-s0)
lit = s
}
// Emit any final pending literal bytes and return.
if int(lit) != len(src) {
d += emitLiteral(dst[d:], src[lit:])
}
return d
}
// The encoded output must start with at least a 1 byte literal, as there are
// no previous bytes to copy. A minimal (1 byte) copy after that, generated
// from an emitCopy call in encodeBlock's main loop, would require at least
// another inputMargin bytes, for the reason above: we want any emitLiteral
// calls inside encodeBlock's main loop to use the fast path if possible, which
// requires being able to overrun by inputMargin bytes. Thus,
// minNonLiteralBlockSize equals 1 + 1 + inputMargin.
//
// The C++ code doesn't use this exact threshold, but it could, as discussed at
// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion
// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an
// optimization. It should not affect the encoded form. This is tested by
// TestSameEncodingAsCppShortCopies.
const minNonLiteralBlockSize = 1 + 1 + inputMargin
// MaxEncodedLen returns the maximum length of a snappy block, given its
// uncompressed length.
@@ -256,7 +138,7 @@ func NewBufferedWriter(w io.Writer) *Writer {
}
}
// Writer is an io.Writer than can write Snappy-compressed bytes.
// Writer is an io.Writer that can write Snappy-compressed bytes.
type Writer struct {
w io.Writer
err error

29
vendor/github.com/golang/snappy/encode_amd64.go generated vendored Normal file
View File

@@ -0,0 +1,29 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
package snappy
// emitLiteral has the same semantics as in encode_other.go.
//
//go:noescape
func emitLiteral(dst, lit []byte) int
// emitCopy has the same semantics as in encode_other.go.
//
//go:noescape
func emitCopy(dst []byte, offset, length int) int
// extendMatch has the same semantics as in encode_other.go.
//
//go:noescape
func extendMatch(src []byte, i, j int) int
// encodeBlock has the same semantics as in encode_other.go.
//
//go:noescape
func encodeBlock(dst, src []byte) (d int)

730
vendor/github.com/golang/snappy/encode_amd64.s generated vendored Normal file
View File

@@ -0,0 +1,730 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a
// Go toolchain regression. See https://github.com/golang/go/issues/15426 and
// https://github.com/golang/snappy/issues/29
//
// As a workaround, the package was built with a known good assembler, and
// those instructions were disassembled by "objdump -d" to yield the
// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
// style comments, in AT&T asm syntax. Note that rsp here is a physical
// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm).
// The instructions were then encoded as "BYTE $0x.." sequences, which assemble
// fine on Go 1.6.
// The asm code generally follows the pure Go code in encode_other.go, except
// where marked with a "!!!".
// ----------------------------------------------------------------------------
// func emitLiteral(dst, lit []byte) int
//
// All local variables fit into registers. The register allocation:
// - AX len(lit)
// - BX n
// - DX return value
// - DI &dst[i]
// - R10 &lit[0]
//
// The 24 bytes of stack space is to call runtime·memmove.
//
// The unusual register allocation of local variables, such as R10 for the
// source pointer, matches the allocation used at the call site in encodeBlock,
// which makes it easier to manually inline this function.
TEXT ·emitLiteral(SB), NOSPLIT, $24-56
MOVQ dst_base+0(FP), DI
MOVQ lit_base+24(FP), R10
MOVQ lit_len+32(FP), AX
MOVQ AX, DX
MOVL AX, BX
SUBL $1, BX
CMPL BX, $60
JLT oneByte
CMPL BX, $256
JLT twoBytes
threeBytes:
MOVB $0xf4, 0(DI)
MOVW BX, 1(DI)
ADDQ $3, DI
ADDQ $3, DX
JMP memmove
twoBytes:
MOVB $0xf0, 0(DI)
MOVB BX, 1(DI)
ADDQ $2, DI
ADDQ $2, DX
JMP memmove
oneByte:
SHLB $2, BX
MOVB BX, 0(DI)
ADDQ $1, DI
ADDQ $1, DX
memmove:
MOVQ DX, ret+48(FP)
// copy(dst[i:], lit)
//
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
// DI, R10 and AX as arguments.
MOVQ DI, 0(SP)
MOVQ R10, 8(SP)
MOVQ AX, 16(SP)
CALL runtime·memmove(SB)
RET
// ----------------------------------------------------------------------------
// func emitCopy(dst []byte, offset, length int) int
//
// All local variables fit into registers. The register allocation:
// - AX length
// - SI &dst[0]
// - DI &dst[i]
// - R11 offset
//
// The unusual register allocation of local variables, such as R11 for the
// offset, matches the allocation used at the call site in encodeBlock, which
// makes it easier to manually inline this function.
TEXT ·emitCopy(SB), NOSPLIT, $0-48
MOVQ dst_base+0(FP), DI
MOVQ DI, SI
MOVQ offset+24(FP), R11
MOVQ length+32(FP), AX
loop0:
// for length >= 68 { etc }
CMPL AX, $68
JLT step1
// Emit a length 64 copy, encoded as 3 bytes.
MOVB $0xfe, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $64, AX
JMP loop0
step1:
// if length > 64 { etc }
CMPL AX, $64
JLE step2
// Emit a length 60 copy, encoded as 3 bytes.
MOVB $0xee, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $60, AX
step2:
// if length >= 12 || offset >= 2048 { goto step3 }
CMPL AX, $12
JGE step3
CMPL R11, $2048
JGE step3
// Emit the remaining copy, encoded as 2 bytes.
MOVB R11, 1(DI)
SHRL $8, R11
SHLB $5, R11
SUBB $4, AX
SHLB $2, AX
ORB AX, R11
ORB $1, R11
MOVB R11, 0(DI)
ADDQ $2, DI
// Return the number of bytes written.
SUBQ SI, DI
MOVQ DI, ret+40(FP)
RET
step3:
// Emit the remaining copy, encoded as 3 bytes.
SUBL $1, AX
SHLB $2, AX
ORB $2, AX
MOVB AX, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
// Return the number of bytes written.
SUBQ SI, DI
MOVQ DI, ret+40(FP)
RET
// ----------------------------------------------------------------------------
// func extendMatch(src []byte, i, j int) int
//
// All local variables fit into registers. The register allocation:
// - DX &src[0]
// - SI &src[j]
// - R13 &src[len(src) - 8]
// - R14 &src[len(src)]
// - R15 &src[i]
//
// The unusual register allocation of local variables, such as R15 for a source
// pointer, matches the allocation used at the call site in encodeBlock, which
// makes it easier to manually inline this function.
TEXT ·extendMatch(SB), NOSPLIT, $0-48
MOVQ src_base+0(FP), DX
MOVQ src_len+8(FP), R14
MOVQ i+24(FP), R15
MOVQ j+32(FP), SI
ADDQ DX, R14
ADDQ DX, R15
ADDQ DX, SI
MOVQ R14, R13
SUBQ $8, R13
cmp8:
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
CMPQ SI, R13
JA cmp1
MOVQ (R15), AX
MOVQ (SI), BX
CMPQ AX, BX
JNE bsf
ADDQ $8, R15
ADDQ $8, SI
JMP cmp8
bsf:
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs. The BSF instruction finds the
// least significant 1 bit, the amd64 architecture is little-endian, and
// the shift by 3 converts a bit index to a byte index.
XORQ AX, BX
BSFQ BX, BX
SHRQ $3, BX
ADDQ BX, SI
// Convert from &src[ret] to ret.
SUBQ DX, SI
MOVQ SI, ret+40(FP)
RET
cmp1:
// In src's tail, compare 1 byte at a time.
CMPQ SI, R14
JAE extendMatchEnd
MOVB (R15), AX
MOVB (SI), BX
CMPB AX, BX
JNE extendMatchEnd
ADDQ $1, R15
ADDQ $1, SI
JMP cmp1
extendMatchEnd:
// Convert from &src[ret] to ret.
SUBQ DX, SI
MOVQ SI, ret+40(FP)
RET
// ----------------------------------------------------------------------------
// func encodeBlock(dst, src []byte) (d int)
//
// All local variables fit into registers, other than "var table". The register
// allocation:
// - AX . .
// - BX . .
// - CX 56 shift (note that amd64 shifts by non-immediates must use CX).
// - DX 64 &src[0], tableSize
// - SI 72 &src[s]
// - DI 80 &dst[d]
// - R9 88 sLimit
// - R10 . &src[nextEmit]
// - R11 96 prevHash, currHash, nextHash, offset
// - R12 104 &src[base], skip
// - R13 . &src[nextS], &src[len(src) - 8]
// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x
// - R15 112 candidate
//
// The second column (56, 64, etc) is the stack offset to spill the registers
// when calling other functions. We could pack this slightly tighter, but it's
// simpler to have a dedicated spill map independent of the function called.
//
// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill
// local variables (registers) during calls gives 32768 + 56 + 64 = 32888.
TEXT ·encodeBlock(SB), 0, $32888-56
MOVQ dst_base+0(FP), DI
MOVQ src_base+24(FP), SI
MOVQ src_len+32(FP), R14
// shift, tableSize := uint32(32-8), 1<<8
MOVQ $24, CX
MOVQ $256, DX
calcShift:
// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
// shift--
// }
CMPQ DX, $16384
JGE varTable
CMPQ DX, R14
JGE varTable
SUBQ $1, CX
SHLQ $1, DX
JMP calcShift
varTable:
// var table [maxTableSize]uint16
//
// In the asm code, unlike the Go code, we can zero-initialize only the
// first tableSize elements. Each uint16 element is 2 bytes and each MOVOU
// writes 16 bytes, so we can do only tableSize/8 writes instead of the
// 2048 writes that would zero-initialize all of table's 32768 bytes.
SHRQ $3, DX
LEAQ table-32768(SP), BX
PXOR X0, X0
memclr:
MOVOU X0, 0(BX)
ADDQ $16, BX
SUBQ $1, DX
JNZ memclr
// !!! DX = &src[0]
MOVQ SI, DX
// sLimit := len(src) - inputMargin
MOVQ R14, R9
SUBQ $15, R9
// !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't
// change for the rest of the function.
MOVQ CX, 56(SP)
MOVQ DX, 64(SP)
MOVQ R9, 88(SP)
// nextEmit := 0
MOVQ DX, R10
// s := 1
ADDQ $1, SI
// nextHash := hash(load32(src, s), shift)
MOVL 0(SI), R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
outer:
// for { etc }
// skip := 32
MOVQ $32, R12
// nextS := s
MOVQ SI, R13
// candidate := 0
MOVQ $0, R15
inner0:
// for { etc }
// s := nextS
MOVQ R13, SI
// bytesBetweenHashLookups := skip >> 5
MOVQ R12, R14
SHRQ $5, R14
// nextS = s + bytesBetweenHashLookups
ADDQ R14, R13
// skip += bytesBetweenHashLookups
ADDQ R14, R12
// if nextS > sLimit { goto emitRemainder }
MOVQ R13, AX
SUBQ DX, AX
CMPQ AX, R9
JA emitRemainder
// candidate = int(table[nextHash])
// XXX: MOVWQZX table-32768(SP)(R11*2), R15
// XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
BYTE $0x4e
BYTE $0x0f
BYTE $0xb7
BYTE $0x7c
BYTE $0x5c
BYTE $0x78
// table[nextHash] = uint16(s)
MOVQ SI, AX
SUBQ DX, AX
// XXX: MOVW AX, table-32768(SP)(R11*2)
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
BYTE $0x66
BYTE $0x42
BYTE $0x89
BYTE $0x44
BYTE $0x5c
BYTE $0x78
// nextHash = hash(load32(src, nextS), shift)
MOVL 0(R13), R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// if load32(src, s) != load32(src, candidate) { continue } break
MOVL 0(SI), AX
MOVL (DX)(R15*1), BX
CMPL AX, BX
JNE inner0
fourByteMatch:
// As per the encode_other.go code:
//
// A 4-byte match has been found. We'll later see etc.
// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
// on inputMargin in encode.go.
MOVQ SI, AX
SUBQ R10, AX
CMPQ AX, $16
JLE emitLiteralFastPath
// ----------------------------------------
// Begin inline of the emitLiteral call.
//
// d += emitLiteral(dst[d:], src[nextEmit:s])
MOVL AX, BX
SUBL $1, BX
CMPL BX, $60
JLT inlineEmitLiteralOneByte
CMPL BX, $256
JLT inlineEmitLiteralTwoBytes
inlineEmitLiteralThreeBytes:
MOVB $0xf4, 0(DI)
MOVW BX, 1(DI)
ADDQ $3, DI
JMP inlineEmitLiteralMemmove
inlineEmitLiteralTwoBytes:
MOVB $0xf0, 0(DI)
MOVB BX, 1(DI)
ADDQ $2, DI
JMP inlineEmitLiteralMemmove
inlineEmitLiteralOneByte:
SHLB $2, BX
MOVB BX, 0(DI)
ADDQ $1, DI
inlineEmitLiteralMemmove:
// Spill local variables (registers) onto the stack; call; unspill.
//
// copy(dst[i:], lit)
//
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
// DI, R10 and AX as arguments.
MOVQ DI, 0(SP)
MOVQ R10, 8(SP)
MOVQ AX, 16(SP)
ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)".
MOVQ SI, 72(SP)
MOVQ DI, 80(SP)
MOVQ R15, 112(SP)
CALL runtime·memmove(SB)
MOVQ 56(SP), CX
MOVQ 64(SP), DX
MOVQ 72(SP), SI
MOVQ 80(SP), DI
MOVQ 88(SP), R9
MOVQ 112(SP), R15
JMP inner1
inlineEmitLiteralEnd:
// End inline of the emitLiteral call.
// ----------------------------------------
emitLiteralFastPath:
// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
MOVB AX, BX
SUBB $1, BX
SHLB $2, BX
MOVB BX, (DI)
ADDQ $1, DI
// !!! Implement the copy from lit to dst as a 16-byte load and store.
// (Encode's documentation says that dst and src must not overlap.)
//
// This always copies 16 bytes, instead of only len(lit) bytes, but that's
// OK. Subsequent iterations will fix up the overrun.
//
// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
// 16-byte loads and stores. This technique probably wouldn't be as
// effective on architectures that are fussier about alignment.
MOVOU 0(R10), X0
MOVOU X0, 0(DI)
ADDQ AX, DI
inner1:
// for { etc }
// base := s
MOVQ SI, R12
// !!! offset := base - candidate
MOVQ R12, R11
SUBQ R15, R11
SUBQ DX, R11
// ----------------------------------------
// Begin inline of the extendMatch call.
//
// s = extendMatch(src, candidate+4, s+4)
// !!! R14 = &src[len(src)]
MOVQ src_len+32(FP), R14
ADDQ DX, R14
// !!! R13 = &src[len(src) - 8]
MOVQ R14, R13
SUBQ $8, R13
// !!! R15 = &src[candidate + 4]
ADDQ $4, R15
ADDQ DX, R15
// !!! s += 4
ADDQ $4, SI
inlineExtendMatchCmp8:
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
CMPQ SI, R13
JA inlineExtendMatchCmp1
MOVQ (R15), AX
MOVQ (SI), BX
CMPQ AX, BX
JNE inlineExtendMatchBSF
ADDQ $8, R15
ADDQ $8, SI
JMP inlineExtendMatchCmp8
inlineExtendMatchBSF:
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs. The BSF instruction finds the
// least significant 1 bit, the amd64 architecture is little-endian, and
// the shift by 3 converts a bit index to a byte index.
XORQ AX, BX
BSFQ BX, BX
SHRQ $3, BX
ADDQ BX, SI
JMP inlineExtendMatchEnd
inlineExtendMatchCmp1:
// In src's tail, compare 1 byte at a time.
CMPQ SI, R14
JAE inlineExtendMatchEnd
MOVB (R15), AX
MOVB (SI), BX
CMPB AX, BX
JNE inlineExtendMatchEnd
ADDQ $1, R15
ADDQ $1, SI
JMP inlineExtendMatchCmp1
inlineExtendMatchEnd:
// End inline of the extendMatch call.
// ----------------------------------------
// ----------------------------------------
// Begin inline of the emitCopy call.
//
// d += emitCopy(dst[d:], base-candidate, s-base)
// !!! length := s - base
MOVQ SI, AX
SUBQ R12, AX
inlineEmitCopyLoop0:
// for length >= 68 { etc }
CMPL AX, $68
JLT inlineEmitCopyStep1
// Emit a length 64 copy, encoded as 3 bytes.
MOVB $0xfe, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $64, AX
JMP inlineEmitCopyLoop0
inlineEmitCopyStep1:
// if length > 64 { etc }
CMPL AX, $64
JLE inlineEmitCopyStep2
// Emit a length 60 copy, encoded as 3 bytes.
MOVB $0xee, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $60, AX
inlineEmitCopyStep2:
// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
CMPL AX, $12
JGE inlineEmitCopyStep3
CMPL R11, $2048
JGE inlineEmitCopyStep3
// Emit the remaining copy, encoded as 2 bytes.
MOVB R11, 1(DI)
SHRL $8, R11
SHLB $5, R11
SUBB $4, AX
SHLB $2, AX
ORB AX, R11
ORB $1, R11
MOVB R11, 0(DI)
ADDQ $2, DI
JMP inlineEmitCopyEnd
inlineEmitCopyStep3:
// Emit the remaining copy, encoded as 3 bytes.
SUBL $1, AX
SHLB $2, AX
ORB $2, AX
MOVB AX, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
inlineEmitCopyEnd:
// End inline of the emitCopy call.
// ----------------------------------------
// nextEmit = s
MOVQ SI, R10
// if s >= sLimit { goto emitRemainder }
MOVQ SI, AX
SUBQ DX, AX
CMPQ AX, R9
JAE emitRemainder
// As per the encode_other.go code:
//
// We could immediately etc.
// x := load64(src, s-1)
MOVQ -1(SI), R14
// prevHash := hash(uint32(x>>0), shift)
MOVL R14, R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// table[prevHash] = uint16(s-1)
MOVQ SI, AX
SUBQ DX, AX
SUBQ $1, AX
// XXX: MOVW AX, table-32768(SP)(R11*2)
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
BYTE $0x66
BYTE $0x42
BYTE $0x89
BYTE $0x44
BYTE $0x5c
BYTE $0x78
// currHash := hash(uint32(x>>8), shift)
SHRQ $8, R14
MOVL R14, R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// candidate = int(table[currHash])
// XXX: MOVWQZX table-32768(SP)(R11*2), R15
// XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
BYTE $0x4e
BYTE $0x0f
BYTE $0xb7
BYTE $0x7c
BYTE $0x5c
BYTE $0x78
// table[currHash] = uint16(s)
ADDQ $1, AX
// XXX: MOVW AX, table-32768(SP)(R11*2)
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
BYTE $0x66
BYTE $0x42
BYTE $0x89
BYTE $0x44
BYTE $0x5c
BYTE $0x78
// if uint32(x>>8) == load32(src, candidate) { continue }
MOVL (DX)(R15*1), BX
CMPL R14, BX
JEQ inner1
// nextHash = hash(uint32(x>>16), shift)
SHRQ $8, R14
MOVL R14, R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// s++
ADDQ $1, SI
// break out of the inner1 for loop, i.e. continue the outer loop.
JMP outer
emitRemainder:
// if nextEmit < len(src) { etc }
MOVQ src_len+32(FP), AX
ADDQ DX, AX
CMPQ R10, AX
JEQ encodeBlockEnd
// d += emitLiteral(dst[d:], src[nextEmit:])
//
// Push args.
MOVQ DI, 0(SP)
MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative.
MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative.
MOVQ R10, 24(SP)
SUBQ R10, AX
MOVQ AX, 32(SP)
MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative.
// Spill local variables (registers) onto the stack; call; unspill.
MOVQ DI, 80(SP)
CALL ·emitLiteral(SB)
MOVQ 80(SP), DI
// Finish the "d +=" part of "d += emitLiteral(etc)".
ADDQ 48(SP), DI
encodeBlockEnd:
MOVQ dst_base+0(FP), AX
SUBQ AX, DI
MOVQ DI, d+48(FP)
RET

238
vendor/github.com/golang/snappy/encode_other.go generated vendored Normal file
View File

@@ -0,0 +1,238 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64 appengine !gc noasm
package snappy
func load32(b []byte, i int) uint32 {
b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func load64(b []byte, i int) uint64 {
b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
// emitLiteral writes a literal chunk and returns the number of bytes written.
//
// It assumes that:
// dst is long enough to hold the encoded bytes
// 1 <= len(lit) && len(lit) <= 65536
func emitLiteral(dst, lit []byte) int {
i, n := 0, uint(len(lit)-1)
switch {
case n < 60:
dst[0] = uint8(n)<<2 | tagLiteral
i = 1
case n < 1<<8:
dst[0] = 60<<2 | tagLiteral
dst[1] = uint8(n)
i = 2
default:
dst[0] = 61<<2 | tagLiteral
dst[1] = uint8(n)
dst[2] = uint8(n >> 8)
i = 3
}
return i + copy(dst[i:], lit)
}
// emitCopy writes a copy chunk and returns the number of bytes written.
//
// It assumes that:
// dst is long enough to hold the encoded bytes
// 1 <= offset && offset <= 65535
// 4 <= length && length <= 65535
func emitCopy(dst []byte, offset, length int) int {
i := 0
// The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
// threshold for this loop is a little higher (at 68 = 64 + 4), and the
// length emitted down below is is a little lower (at 60 = 64 - 4), because
// it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
// by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
// a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as
// 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a
// tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an
// encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1.
for length >= 68 {
// Emit a length 64 copy, encoded as 3 bytes.
dst[i+0] = 63<<2 | tagCopy2
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
i += 3
length -= 64
}
if length > 64 {
// Emit a length 60 copy, encoded as 3 bytes.
dst[i+0] = 59<<2 | tagCopy2
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
i += 3
length -= 60
}
if length >= 12 || offset >= 2048 {
// Emit the remaining copy, encoded as 3 bytes.
dst[i+0] = uint8(length-1)<<2 | tagCopy2
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
return i + 3
}
// Emit the remaining copy, encoded as 2 bytes.
dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
dst[i+1] = uint8(offset)
return i + 2
}
// extendMatch returns the largest k such that k <= len(src) and that
// src[i:i+k-j] and src[j:k] have the same contents.
//
// It assumes that:
// 0 <= i && i < j && j <= len(src)
func extendMatch(src []byte, i, j int) int {
for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
}
return j
}
func hash(u, shift uint32) uint32 {
return (u * 0x1e35a7bd) >> shift
}
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
// len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlock(dst, src []byte) (d int) {
// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
// The table element type is uint16, as s < sLimit and sLimit < len(src)
// and len(src) <= maxBlockSize and maxBlockSize == 65536.
const (
maxTableSize = 1 << 14
// tableMask is redundant, but helps the compiler eliminate bounds
// checks.
tableMask = maxTableSize - 1
)
shift := uint32(32 - 8)
for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
shift--
}
// In Go, all array elements are zero-initialized, so there is no advantage
// to a smaller tableSize per se. However, it matches the C++ algorithm,
// and in the asm versions of this code, we can get away with zeroing only
// the first tableSize elements.
var table [maxTableSize]uint16
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := len(src) - inputMargin
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := 0
// The encoded form must start with a literal, as there are no previous
// bytes to copy, so we start looking for hash matches at s == 1.
s := 1
nextHash := hash(load32(src, s), shift)
for {
// Copied from the C++ snappy implementation:
//
// Heuristic match skipping: If 32 bytes are scanned with no matches
// found, start looking only at every other byte. If 32 more bytes are
// scanned (or skipped), look at every third byte, etc.. When a match
// is found, immediately go back to looking at every byte. This is a
// small loss (~5% performance, ~0.1% density) for compressible data
// due to more bookkeeping, but for non-compressible data (such as
// JPEG) it's a huge win since the compressor quickly "realizes" the
// data is incompressible and doesn't bother looking for matches
// everywhere.
//
// The "skip" variable keeps track of how many bytes there are since
// the last match; dividing it by 32 (ie. right-shifting by five) gives
// the number of bytes to move ahead for each iteration.
skip := 32
nextS := s
candidate := 0
for {
s = nextS
bytesBetweenHashLookups := skip >> 5
nextS = s + bytesBetweenHashLookups
skip += bytesBetweenHashLookups
if nextS > sLimit {
goto emitRemainder
}
candidate = int(table[nextHash&tableMask])
table[nextHash&tableMask] = uint16(s)
nextHash = hash(load32(src, nextS), shift)
if load32(src, s) == load32(src, candidate) {
break
}
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
d += emitLiteral(dst[d:], src[nextEmit:s])
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
base := s
// Extend the 4-byte match as long as possible.
//
// This is an inlined version of:
// s = extendMatch(src, candidate+4, s+4)
s += 4
for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 {
}
d += emitCopy(dst[d:], base-candidate, s-base)
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load64(src, s-1)
prevHash := hash(uint32(x>>0), shift)
table[prevHash&tableMask] = uint16(s - 1)
currHash := hash(uint32(x>>8), shift)
candidate = int(table[currHash&tableMask])
table[currHash&tableMask] = uint16(s)
if uint32(x>>8) != load32(src, candidate) {
nextHash = hash(uint32(x>>16), shift)
s++
break
}
}
}
emitRemainder:
if nextEmit < len(src) {
d += emitLiteral(dst[d:], src[nextEmit:])
}
return d
}

View File

@@ -32,7 +32,10 @@ Lempel-Ziv compression algorithms. In particular:
- For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
The length is 1 + m. The offset is the little-endian unsigned integer
denoted by the next 2 bytes.
- For l == 3, this tag is a legacy format that is no longer supported.
- For l == 3, this tag is a legacy format that is no longer issued by most
encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in
[1, 65). The length is 1 + m. The offset is the little-endian unsigned
integer denoted by the next 4 bytes.
*/
const (
tagLiteral = 0x00

View File

@@ -1,973 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"bytes"
"encoding/binary"
"flag"
"fmt"
"io"
"io/ioutil"
"math/rand"
"net/http"
"os"
"path/filepath"
"strings"
"testing"
)
var download = flag.Bool("download", false, "If true, download any missing files before running benchmarks")
func TestMaxEncodedLenOfMaxBlockSize(t *testing.T) {
got := maxEncodedLenOfMaxBlockSize
want := MaxEncodedLen(maxBlockSize)
if got != want {
t.Fatalf("got %d, want %d", got, want)
}
}
func cmp(a, b []byte) error {
if bytes.Equal(a, b) {
return nil
}
if len(a) != len(b) {
return fmt.Errorf("got %d bytes, want %d", len(a), len(b))
}
for i := range a {
if a[i] != b[i] {
return fmt.Errorf("byte #%d: got 0x%02x, want 0x%02x", i, a[i], b[i])
}
}
return nil
}
func roundtrip(b, ebuf, dbuf []byte) error {
d, err := Decode(dbuf, Encode(ebuf, b))
if err != nil {
return fmt.Errorf("decoding error: %v", err)
}
if err := cmp(d, b); err != nil {
return fmt.Errorf("roundtrip mismatch: %v", err)
}
return nil
}
func TestEmpty(t *testing.T) {
if err := roundtrip(nil, nil, nil); err != nil {
t.Fatal(err)
}
}
func TestSmallCopy(t *testing.T) {
for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
for i := 0; i < 32; i++ {
s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb"
if err := roundtrip([]byte(s), ebuf, dbuf); err != nil {
t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err)
}
}
}
}
}
func TestSmallRand(t *testing.T) {
rng := rand.New(rand.NewSource(1))
for n := 1; n < 20000; n += 23 {
b := make([]byte, n)
for i := range b {
b[i] = uint8(rng.Intn(256))
}
if err := roundtrip(b, nil, nil); err != nil {
t.Fatal(err)
}
}
}
func TestSmallRegular(t *testing.T) {
for n := 1; n < 20000; n += 23 {
b := make([]byte, n)
for i := range b {
b[i] = uint8(i%10 + 'a')
}
if err := roundtrip(b, nil, nil); err != nil {
t.Fatal(err)
}
}
}
func TestInvalidVarint(t *testing.T) {
testCases := []struct {
desc string
input string
}{{
"invalid varint, final byte has continuation bit set",
"\xff",
}, {
"invalid varint, value overflows uint64",
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00",
}, {
// https://github.com/google/snappy/blob/master/format_description.txt
// says that "the stream starts with the uncompressed length [as a
// varint] (up to a maximum of 2^32 - 1)".
"valid varint (as uint64), but value overflows uint32",
"\x80\x80\x80\x80\x10",
}}
for _, tc := range testCases {
input := []byte(tc.input)
if _, err := DecodedLen(input); err != ErrCorrupt {
t.Errorf("%s: DecodedLen: got %v, want ErrCorrupt", tc.desc, err)
}
if _, err := Decode(nil, input); err != ErrCorrupt {
t.Errorf("%s: Decode: got %v, want ErrCorrupt", tc.desc, err)
}
}
}
func TestDecode(t *testing.T) {
lit40Bytes := make([]byte, 40)
for i := range lit40Bytes {
lit40Bytes[i] = byte(i)
}
lit40 := string(lit40Bytes)
testCases := []struct {
desc string
input string
want string
wantErr error
}{{
`decodedLen=0; valid input`,
"\x00",
"",
nil,
}, {
`decodedLen=3; tagLiteral, 0-byte length; length=3; valid input`,
"\x03" + "\x08\xff\xff\xff",
"\xff\xff\xff",
nil,
}, {
`decodedLen=2; tagLiteral, 0-byte length; length=3; not enough dst bytes`,
"\x02" + "\x08\xff\xff\xff",
"",
ErrCorrupt,
}, {
`decodedLen=3; tagLiteral, 0-byte length; length=3; not enough src bytes`,
"\x03" + "\x08\xff\xff",
"",
ErrCorrupt,
}, {
`decodedLen=40; tagLiteral, 0-byte length; length=40; valid input`,
"\x28" + "\x9c" + lit40,
lit40,
nil,
}, {
`decodedLen=1; tagLiteral, 1-byte length; not enough length bytes`,
"\x01" + "\xf0",
"",
ErrCorrupt,
}, {
`decodedLen=3; tagLiteral, 1-byte length; length=3; valid input`,
"\x03" + "\xf0\x02\xff\xff\xff",
"\xff\xff\xff",
nil,
}, {
`decodedLen=1; tagLiteral, 2-byte length; not enough length bytes`,
"\x01" + "\xf4\x00",
"",
ErrCorrupt,
}, {
`decodedLen=3; tagLiteral, 2-byte length; length=3; valid input`,
"\x03" + "\xf4\x02\x00\xff\xff\xff",
"\xff\xff\xff",
nil,
}, {
`decodedLen=1; tagLiteral, 3-byte length; not enough length bytes`,
"\x01" + "\xf8\x00\x00",
"",
ErrCorrupt,
}, {
`decodedLen=3; tagLiteral, 3-byte length; length=3; valid input`,
"\x03" + "\xf8\x02\x00\x00\xff\xff\xff",
"\xff\xff\xff",
nil,
}, {
`decodedLen=1; tagLiteral, 4-byte length; not enough length bytes`,
"\x01" + "\xfc\x00\x00\x00",
"",
ErrCorrupt,
}, {
`decodedLen=1; tagLiteral, 4-byte length; length=3; not enough dst bytes`,
"\x01" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
"",
ErrCorrupt,
}, {
`decodedLen=4; tagLiteral, 4-byte length; length=3; not enough src bytes`,
"\x04" + "\xfc\x02\x00\x00\x00\xff",
"",
ErrCorrupt,
}, {
`decodedLen=3; tagLiteral, 4-byte length; length=3; valid input`,
"\x03" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
"\xff\xff\xff",
nil,
}, {
`decodedLen=4; tagCopy1, 1 extra length|offset byte; not enough extra bytes`,
"\x04" + "\x01",
"",
ErrCorrupt,
}, {
`decodedLen=4; tagCopy2, 2 extra length|offset bytes; not enough extra bytes`,
"\x04" + "\x02\x00",
"",
ErrCorrupt,
}, {
`decodedLen=4; tagCopy4; unsupported COPY_4 tag`,
"\x04" + "\x03\x00\x00\x00\x00",
"",
errUnsupportedCopy4Tag,
}, {
`decodedLen=4; tagLiteral (4 bytes "abcd"); valid input`,
"\x04" + "\x0cabcd",
"abcd",
nil,
}, {
`decodedLen=13; tagLiteral (4 bytes "abcd"); tagCopy1; length=9 offset=4; valid input`,
"\x0d" + "\x0cabcd" + "\x15\x04",
"abcdabcdabcda",
nil,
}, {
`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; valid input`,
"\x08" + "\x0cabcd" + "\x01\x04",
"abcdabcd",
nil,
}, {
`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=2; valid input`,
"\x08" + "\x0cabcd" + "\x01\x02",
"abcdcdcd",
nil,
}, {
`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=1; valid input`,
"\x08" + "\x0cabcd" + "\x01\x01",
"abcddddd",
nil,
}, {
`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=0; zero offset`,
"\x08" + "\x0cabcd" + "\x01\x00",
"",
ErrCorrupt,
}, {
`decodedLen=9; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; inconsistent dLen`,
"\x09" + "\x0cabcd" + "\x01\x04",
"",
ErrCorrupt,
}, {
`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=5; offset too large`,
"\x08" + "\x0cabcd" + "\x01\x05",
"",
ErrCorrupt,
}, {
`decodedLen=7; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; length too large`,
"\x07" + "\x0cabcd" + "\x01\x04",
"",
ErrCorrupt,
}, {
`decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy2; length=2 offset=3; valid input`,
"\x06" + "\x0cabcd" + "\x06\x03\x00",
"abcdbc",
nil,
}}
const (
// notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
// not present in either the input or the output. It is written to dBuf
// to check that Decode does not write bytes past the end of
// dBuf[:dLen].
//
// The magic number 37 was chosen because it is prime. A more 'natural'
// number like 32 might lead to a false negative if, for example, a
// byte was incorrectly copied 4*8 bytes later.
notPresentBase = 0xa0
notPresentLen = 37
)
var dBuf [100]byte
loop:
for i, tc := range testCases {
input := []byte(tc.input)
for _, x := range input {
if notPresentBase <= x && x < notPresentBase+notPresentLen {
t.Errorf("#%d (%s): input shouldn't contain %#02x\ninput: % x", i, tc.desc, x, input)
continue loop
}
}
dLen, n := binary.Uvarint(input)
if n <= 0 {
t.Errorf("#%d (%s): invalid varint-encoded dLen", i, tc.desc)
continue
}
if dLen > uint64(len(dBuf)) {
t.Errorf("#%d (%s): dLen %d is too large", i, tc.desc, dLen)
continue
}
for j := range dBuf {
dBuf[j] = byte(notPresentBase + j%notPresentLen)
}
g, gotErr := Decode(dBuf[:], input)
if got := string(g); got != tc.want || gotErr != tc.wantErr {
t.Errorf("#%d (%s):\ngot %q, %v\nwant %q, %v",
i, tc.desc, got, gotErr, tc.want, tc.wantErr)
continue
}
for j, x := range dBuf {
if uint64(j) < dLen {
continue
}
if w := byte(notPresentBase + j%notPresentLen); x != w {
t.Errorf("#%d (%s): Decode overrun: dBuf[%d] was modified: got %#02x, want %#02x\ndBuf: % x",
i, tc.desc, j, x, w, dBuf)
continue loop
}
}
}
}
// TestDecodeLengthOffset tests decoding an encoding of the form literal +
// copy-length-offset + literal. For example: "abcdefghijkl" + "efghij" + "AB".
func TestDecodeLengthOffset(t *testing.T) {
const (
prefix = "abcdefghijklmnopqr"
suffix = "ABCDEFGHIJKLMNOPQR"
// notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
// not present in either the input or the output. It is written to
// gotBuf to check that Decode does not write bytes past the end of
// gotBuf[:totalLen].
//
// The magic number 37 was chosen because it is prime. A more 'natural'
// number like 32 might lead to a false negative if, for example, a
// byte was incorrectly copied 4*8 bytes later.
notPresentBase = 0xa0
notPresentLen = 37
)
var gotBuf, wantBuf, inputBuf [128]byte
for length := 1; length <= 18; length++ {
for offset := 1; offset <= 18; offset++ {
loop:
for suffixLen := 0; suffixLen <= 18; suffixLen++ {
totalLen := len(prefix) + length + suffixLen
inputLen := binary.PutUvarint(inputBuf[:], uint64(totalLen))
inputBuf[inputLen] = tagLiteral + 4*byte(len(prefix)-1)
inputLen++
inputLen += copy(inputBuf[inputLen:], prefix)
inputBuf[inputLen+0] = tagCopy2 + 4*byte(length-1)
inputBuf[inputLen+1] = byte(offset)
inputBuf[inputLen+2] = 0x00
inputLen += 3
if suffixLen > 0 {
inputBuf[inputLen] = tagLiteral + 4*byte(suffixLen-1)
inputLen++
inputLen += copy(inputBuf[inputLen:], suffix[:suffixLen])
}
input := inputBuf[:inputLen]
for i := range gotBuf {
gotBuf[i] = byte(notPresentBase + i%notPresentLen)
}
got, err := Decode(gotBuf[:], input)
if err != nil {
t.Errorf("length=%d, offset=%d; suffixLen=%d: %v", length, offset, suffixLen, err)
continue
}
wantLen := 0
wantLen += copy(wantBuf[wantLen:], prefix)
for i := 0; i < length; i++ {
wantBuf[wantLen] = wantBuf[wantLen-offset]
wantLen++
}
wantLen += copy(wantBuf[wantLen:], suffix[:suffixLen])
want := wantBuf[:wantLen]
for _, x := range input {
if notPresentBase <= x && x < notPresentBase+notPresentLen {
t.Errorf("length=%d, offset=%d; suffixLen=%d: input shouldn't contain %#02x\ninput: % x",
length, offset, suffixLen, x, input)
continue loop
}
}
for i, x := range gotBuf {
if i < totalLen {
continue
}
if w := byte(notPresentBase + i%notPresentLen); x != w {
t.Errorf("length=%d, offset=%d; suffixLen=%d; totalLen=%d: "+
"Decode overrun: gotBuf[%d] was modified: got %#02x, want %#02x\ngotBuf: % x",
length, offset, suffixLen, totalLen, i, x, w, gotBuf)
continue loop
}
}
for _, x := range want {
if notPresentBase <= x && x < notPresentBase+notPresentLen {
t.Errorf("length=%d, offset=%d; suffixLen=%d: want shouldn't contain %#02x\nwant: % x",
length, offset, suffixLen, x, want)
continue loop
}
}
if !bytes.Equal(got, want) {
t.Errorf("length=%d, offset=%d; suffixLen=%d:\ninput % x\ngot % x\nwant % x",
length, offset, suffixLen, input, got, want)
continue
}
}
}
}
}
func TestDecodeGoldenInput(t *testing.T) {
src, err := ioutil.ReadFile("testdata/pi.txt.rawsnappy")
if err != nil {
t.Fatalf("ReadFile: %v", err)
}
got, err := Decode(nil, src)
if err != nil {
t.Fatalf("Decode: %v", err)
}
want, err := ioutil.ReadFile("testdata/pi.txt")
if err != nil {
t.Fatalf("ReadFile: %v", err)
}
if err := cmp(got, want); err != nil {
t.Fatal(err)
}
}
// TestSlowForwardCopyOverrun tests the "expand the pattern" algorithm
// described in decode_amd64.s and its claim of a 10 byte overrun worst case.
func TestSlowForwardCopyOverrun(t *testing.T) {
const base = 100
for length := 1; length < 18; length++ {
for offset := 1; offset < 18; offset++ {
highWaterMark := base
d := base
l := length
o := offset
// makeOffsetAtLeast8
for o < 8 {
if end := d + 8; highWaterMark < end {
highWaterMark = end
}
l -= o
d += o
o += o
}
// fixUpSlowForwardCopy
a := d
d += l
// finishSlowForwardCopy
for l > 0 {
if end := a + 8; highWaterMark < end {
highWaterMark = end
}
a += 8
l -= 8
}
dWant := base + length
overrun := highWaterMark - dWant
if d != dWant || overrun < 0 || 10 < overrun {
t.Errorf("length=%d, offset=%d: d and overrun: got (%d, %d), want (%d, something in [0, 10])",
length, offset, d, overrun, dWant)
}
}
}
}
// TestEncodeNoiseThenRepeats encodes input for which the first half is very
// incompressible and the second half is very compressible. The encoded form's
// length should be closer to 50% of the original length than 100%.
func TestEncodeNoiseThenRepeats(t *testing.T) {
for _, origLen := range []int{32 * 1024, 256 * 1024, 2048 * 1024} {
src := make([]byte, origLen)
rng := rand.New(rand.NewSource(1))
firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
for i := range firstHalf {
firstHalf[i] = uint8(rng.Intn(256))
}
for i := range secondHalf {
secondHalf[i] = uint8(i >> 8)
}
dst := Encode(nil, src)
if got, want := len(dst), origLen*3/4; got >= want {
t.Errorf("origLen=%d: got %d encoded bytes, want less than %d", origLen, got, want)
}
}
}
func TestFramingFormat(t *testing.T) {
// src is comprised of alternating 1e5-sized sequences of random
// (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen
// because it is larger than maxBlockSize (64k).
src := make([]byte, 1e6)
rng := rand.New(rand.NewSource(1))
for i := 0; i < 10; i++ {
if i%2 == 0 {
for j := 0; j < 1e5; j++ {
src[1e5*i+j] = uint8(rng.Intn(256))
}
} else {
for j := 0; j < 1e5; j++ {
src[1e5*i+j] = uint8(i)
}
}
}
buf := new(bytes.Buffer)
if _, err := NewWriter(buf).Write(src); err != nil {
t.Fatalf("Write: encoding: %v", err)
}
dst, err := ioutil.ReadAll(NewReader(buf))
if err != nil {
t.Fatalf("ReadAll: decoding: %v", err)
}
if err := cmp(dst, src); err != nil {
t.Fatal(err)
}
}
func TestWriterGoldenOutput(t *testing.T) {
buf := new(bytes.Buffer)
w := NewBufferedWriter(buf)
defer w.Close()
w.Write([]byte("abcd")) // Not compressible.
w.Flush()
w.Write(bytes.Repeat([]byte{'A'}, 100)) // Compressible.
w.Flush()
got := buf.String()
want := strings.Join([]string{
magicChunk,
"\x01\x08\x00\x00", // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
"\x68\x10\xe6\xb6", // Checksum.
"\x61\x62\x63\x64", // Uncompressed payload: "abcd".
"\x00\x0d\x00\x00", // Compressed chunk, 13 bytes long (including 4 byte checksum).
"\x37\xcb\xbc\x9d", // Checksum.
"\x64", // Compressed payload: Uncompressed length (varint encoded): 100.
"\x00\x41", // Compressed payload: tagLiteral, length=1, "A".
"\xfe\x01\x00", // Compressed payload: tagCopy2, length=64, offset=1.
"\x8a\x01\x00", // Compressed payload: tagCopy2, length=35, offset=1.
}, "")
if got != want {
t.Fatalf("\ngot: % x\nwant: % x", got, want)
}
}
func TestNewBufferedWriter(t *testing.T) {
// Test all 32 possible sub-sequences of these 5 input slices.
//
// Their lengths sum to 400,000, which is over 6 times the Writer ibuf
// capacity: 6 * maxBlockSize is 393,216.
inputs := [][]byte{
bytes.Repeat([]byte{'a'}, 40000),
bytes.Repeat([]byte{'b'}, 150000),
bytes.Repeat([]byte{'c'}, 60000),
bytes.Repeat([]byte{'d'}, 120000),
bytes.Repeat([]byte{'e'}, 30000),
}
loop:
for i := 0; i < 1<<uint(len(inputs)); i++ {
var want []byte
buf := new(bytes.Buffer)
w := NewBufferedWriter(buf)
for j, input := range inputs {
if i&(1<<uint(j)) == 0 {
continue
}
if _, err := w.Write(input); err != nil {
t.Errorf("i=%#02x: j=%d: Write: %v", i, j, err)
continue loop
}
want = append(want, input...)
}
if err := w.Close(); err != nil {
t.Errorf("i=%#02x: Close: %v", i, err)
continue
}
got, err := ioutil.ReadAll(NewReader(buf))
if err != nil {
t.Errorf("i=%#02x: ReadAll: %v", i, err)
continue
}
if err := cmp(got, want); err != nil {
t.Errorf("i=%#02x: %v", i, err)
continue
}
}
}
func TestFlush(t *testing.T) {
buf := new(bytes.Buffer)
w := NewBufferedWriter(buf)
defer w.Close()
if _, err := w.Write(bytes.Repeat([]byte{'x'}, 20)); err != nil {
t.Fatalf("Write: %v", err)
}
if n := buf.Len(); n != 0 {
t.Fatalf("before Flush: %d bytes were written to the underlying io.Writer, want 0", n)
}
if err := w.Flush(); err != nil {
t.Fatalf("Flush: %v", err)
}
if n := buf.Len(); n == 0 {
t.Fatalf("after Flush: %d bytes were written to the underlying io.Writer, want non-0", n)
}
}
func TestReaderReset(t *testing.T) {
gold := bytes.Repeat([]byte("All that is gold does not glitter,\n"), 10000)
buf := new(bytes.Buffer)
if _, err := NewWriter(buf).Write(gold); err != nil {
t.Fatalf("Write: %v", err)
}
encoded, invalid, partial := buf.String(), "invalid", "partial"
r := NewReader(nil)
for i, s := range []string{encoded, invalid, partial, encoded, partial, invalid, encoded, encoded} {
if s == partial {
r.Reset(strings.NewReader(encoded))
if _, err := r.Read(make([]byte, 101)); err != nil {
t.Errorf("#%d: %v", i, err)
continue
}
continue
}
r.Reset(strings.NewReader(s))
got, err := ioutil.ReadAll(r)
switch s {
case encoded:
if err != nil {
t.Errorf("#%d: %v", i, err)
continue
}
if err := cmp(got, gold); err != nil {
t.Errorf("#%d: %v", i, err)
continue
}
case invalid:
if err == nil {
t.Errorf("#%d: got nil error, want non-nil", i)
continue
}
}
}
}
func TestWriterReset(t *testing.T) {
gold := bytes.Repeat([]byte("Not all those who wander are lost;\n"), 10000)
const n = 20
for _, buffered := range []bool{false, true} {
var w *Writer
if buffered {
w = NewBufferedWriter(nil)
defer w.Close()
} else {
w = NewWriter(nil)
}
var gots, wants [][]byte
failed := false
for i := 0; i <= n; i++ {
buf := new(bytes.Buffer)
w.Reset(buf)
want := gold[:len(gold)*i/n]
if _, err := w.Write(want); err != nil {
t.Errorf("#%d: Write: %v", i, err)
failed = true
continue
}
if buffered {
if err := w.Flush(); err != nil {
t.Errorf("#%d: Flush: %v", i, err)
failed = true
continue
}
}
got, err := ioutil.ReadAll(NewReader(buf))
if err != nil {
t.Errorf("#%d: ReadAll: %v", i, err)
failed = true
continue
}
gots = append(gots, got)
wants = append(wants, want)
}
if failed {
continue
}
for i := range gots {
if err := cmp(gots[i], wants[i]); err != nil {
t.Errorf("#%d: %v", i, err)
}
}
}
}
func TestWriterResetWithoutFlush(t *testing.T) {
buf0 := new(bytes.Buffer)
buf1 := new(bytes.Buffer)
w := NewBufferedWriter(buf0)
if _, err := w.Write([]byte("xxx")); err != nil {
t.Fatalf("Write #0: %v", err)
}
// Note that we don't Flush the Writer before calling Reset.
w.Reset(buf1)
if _, err := w.Write([]byte("yyy")); err != nil {
t.Fatalf("Write #1: %v", err)
}
if err := w.Flush(); err != nil {
t.Fatalf("Flush: %v", err)
}
got, err := ioutil.ReadAll(NewReader(buf1))
if err != nil {
t.Fatalf("ReadAll: %v", err)
}
if err := cmp(got, []byte("yyy")); err != nil {
t.Fatal(err)
}
}
type writeCounter int
func (c *writeCounter) Write(p []byte) (int, error) {
*c++
return len(p), nil
}
// TestNumUnderlyingWrites tests that each Writer flush only makes one or two
// Write calls on its underlying io.Writer, depending on whether or not the
// flushed buffer was compressible.
func TestNumUnderlyingWrites(t *testing.T) {
testCases := []struct {
input []byte
want int
}{
{bytes.Repeat([]byte{'x'}, 100), 1},
{bytes.Repeat([]byte{'y'}, 100), 1},
{[]byte("ABCDEFGHIJKLMNOPQRST"), 2},
}
var c writeCounter
w := NewBufferedWriter(&c)
defer w.Close()
for i, tc := range testCases {
c = 0
if _, err := w.Write(tc.input); err != nil {
t.Errorf("#%d: Write: %v", i, err)
continue
}
if err := w.Flush(); err != nil {
t.Errorf("#%d: Flush: %v", i, err)
continue
}
if int(c) != tc.want {
t.Errorf("#%d: got %d underlying writes, want %d", i, c, tc.want)
continue
}
}
}
func benchDecode(b *testing.B, src []byte) {
encoded := Encode(nil, src)
// Bandwidth is in amount of uncompressed data.
b.SetBytes(int64(len(src)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
Decode(src, encoded)
}
}
func benchEncode(b *testing.B, src []byte) {
// Bandwidth is in amount of uncompressed data.
b.SetBytes(int64(len(src)))
dst := make([]byte, MaxEncodedLen(len(src)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
Encode(dst, src)
}
}
func readFile(b testing.TB, filename string) []byte {
src, err := ioutil.ReadFile(filename)
if err != nil {
b.Skipf("skipping benchmark: %v", err)
}
if len(src) == 0 {
b.Fatalf("%s has zero length", filename)
}
return src
}
// expand returns a slice of length n containing repeated copies of src.
func expand(src []byte, n int) []byte {
dst := make([]byte, n)
for x := dst; len(x) > 0; {
i := copy(x, src)
x = x[i:]
}
return dst
}
func benchWords(b *testing.B, n int, decode bool) {
// Note: the file is OS-language dependent so the resulting values are not
// directly comparable for non-US-English OS installations.
data := expand(readFile(b, "/usr/share/dict/words"), n)
if decode {
benchDecode(b, data)
} else {
benchEncode(b, data)
}
}
func BenchmarkWordsDecode1e1(b *testing.B) { benchWords(b, 1e1, true) }
func BenchmarkWordsDecode1e2(b *testing.B) { benchWords(b, 1e2, true) }
func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) }
func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) }
func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) }
func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) }
func BenchmarkWordsEncode1e1(b *testing.B) { benchWords(b, 1e1, false) }
func BenchmarkWordsEncode1e2(b *testing.B) { benchWords(b, 1e2, false) }
func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) }
func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) }
func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) }
func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) }
func BenchmarkRandomEncode(b *testing.B) {
rng := rand.New(rand.NewSource(1))
data := make([]byte, 1<<20)
for i := range data {
data[i] = uint8(rng.Intn(256))
}
benchEncode(b, data)
}
// testFiles' values are copied directly from
// https://raw.githubusercontent.com/google/snappy/master/snappy_unittest.cc
// The label field is unused in snappy-go.
var testFiles = []struct {
label string
filename string
sizeLimit int
}{
{"html", "html", 0},
{"urls", "urls.10K", 0},
{"jpg", "fireworks.jpeg", 0},
{"jpg_200", "fireworks.jpeg", 200},
{"pdf", "paper-100k.pdf", 0},
{"html4", "html_x_4", 0},
{"txt1", "alice29.txt", 0},
{"txt2", "asyoulik.txt", 0},
{"txt3", "lcet10.txt", 0},
{"txt4", "plrabn12.txt", 0},
{"pb", "geo.protodata", 0},
{"gaviota", "kppkn.gtb", 0},
}
const (
// The benchmark data files are at this canonical URL.
benchURL = "https://raw.githubusercontent.com/google/snappy/master/testdata/"
// They are copied to this local directory.
benchDir = "testdata/bench"
)
func downloadBenchmarkFiles(b *testing.B, basename string) (errRet error) {
filename := filepath.Join(benchDir, basename)
if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 {
return nil
}
if !*download {
b.Skipf("test data not found; skipping benchmark without the -download flag")
}
// Download the official snappy C++ implementation reference test data
// files for benchmarking.
if err := os.MkdirAll(benchDir, 0777); err != nil && !os.IsExist(err) {
return fmt.Errorf("failed to create %s: %s", benchDir, err)
}
f, err := os.Create(filename)
if err != nil {
return fmt.Errorf("failed to create %s: %s", filename, err)
}
defer f.Close()
defer func() {
if errRet != nil {
os.Remove(filename)
}
}()
url := benchURL + basename
resp, err := http.Get(url)
if err != nil {
return fmt.Errorf("failed to download %s: %s", url, err)
}
defer resp.Body.Close()
if s := resp.StatusCode; s != http.StatusOK {
return fmt.Errorf("downloading %s: HTTP status code %d (%s)", url, s, http.StatusText(s))
}
_, err = io.Copy(f, resp.Body)
if err != nil {
return fmt.Errorf("failed to download %s to %s: %s", url, filename, err)
}
return nil
}
func benchFile(b *testing.B, n int, decode bool) {
if err := downloadBenchmarkFiles(b, testFiles[n].filename); err != nil {
b.Fatalf("failed to download testdata: %s", err)
}
data := readFile(b, filepath.Join(benchDir, testFiles[n].filename))
if n := testFiles[n].sizeLimit; 0 < n && n < len(data) {
data = data[:n]
}
if decode {
benchDecode(b, data)
} else {
benchEncode(b, data)
}
}
// Naming convention is kept similar to what snappy's C++ implementation uses.
func Benchmark_UFlat0(b *testing.B) { benchFile(b, 0, true) }
func Benchmark_UFlat1(b *testing.B) { benchFile(b, 1, true) }
func Benchmark_UFlat2(b *testing.B) { benchFile(b, 2, true) }
func Benchmark_UFlat3(b *testing.B) { benchFile(b, 3, true) }
func Benchmark_UFlat4(b *testing.B) { benchFile(b, 4, true) }
func Benchmark_UFlat5(b *testing.B) { benchFile(b, 5, true) }
func Benchmark_UFlat6(b *testing.B) { benchFile(b, 6, true) }
func Benchmark_UFlat7(b *testing.B) { benchFile(b, 7, true) }
func Benchmark_UFlat8(b *testing.B) { benchFile(b, 8, true) }
func Benchmark_UFlat9(b *testing.B) { benchFile(b, 9, true) }
func Benchmark_UFlat10(b *testing.B) { benchFile(b, 10, true) }
func Benchmark_UFlat11(b *testing.B) { benchFile(b, 11, true) }
func Benchmark_ZFlat0(b *testing.B) { benchFile(b, 0, false) }
func Benchmark_ZFlat1(b *testing.B) { benchFile(b, 1, false) }
func Benchmark_ZFlat2(b *testing.B) { benchFile(b, 2, false) }
func Benchmark_ZFlat3(b *testing.B) { benchFile(b, 3, false) }
func Benchmark_ZFlat4(b *testing.B) { benchFile(b, 4, false) }
func Benchmark_ZFlat5(b *testing.B) { benchFile(b, 5, false) }
func Benchmark_ZFlat6(b *testing.B) { benchFile(b, 6, false) }
func Benchmark_ZFlat7(b *testing.B) { benchFile(b, 7, false) }
func Benchmark_ZFlat8(b *testing.B) { benchFile(b, 8, false) }
func Benchmark_ZFlat9(b *testing.B) { benchFile(b, 9, false) }
func Benchmark_ZFlat10(b *testing.B) { benchFile(b, 10, false) }
func Benchmark_ZFlat11(b *testing.B) { benchFile(b, 11, false) }

File diff suppressed because one or more lines are too long

Binary file not shown.