Use Go 1.5 vendoring instead of Godeps

Change made by: - running "gvt fetch" on each of the packages mentioned in Godeps/Godeps.json - `rm -rf Godeps` - tweaking the build scripts to not mention Godeps - tweaking the build scripts to test `./lib/...`, `./cmd/...` explicitly (to avoid testing vendor) - tweaking the build scripts to not juggle GOPATH for Godeps and instead set GO15VENDOREXPERIMENT. This also results in some updated packages at the same time I bet. Building with Go 1.3 and 1.4 still *works* but won't use our vendored dependencies - the user needs to have the actual packages in their GOPATH then, which they'll get with a normal "go get". Building with Go 1.6+ will get our vendored dependencies by default even when not using our build script, which is nice. By doing this we gain some freedom in that we can pick and choose manually what to include in vendor, as it's not based on just dependency analysis of our own code. This is also a risk as we might pick up dependencies we are unaware of, as the build may work locally with those packages present in GOPATH. On the other hand the build server will detect this as it has no packages in it's GOPATH beyond what is included in the repo. Recommended tool to manage dependencies is github.com/FiloSottile/gvt.
2016-03-05 21:01:58 +01:00
parent 9259425a9a
commit 65aaa607ab
694 changed files with 65763 additions and 3541 deletions
@@ -0,0 +1,15 @@
+# This is the official list of Snappy-Go authors for copyright purposes.
+# This file is distinct from the CONTRIBUTORS files.
+# See the latter for an explanation.
+
+# Names should be added to this file as
+#	Name or Organization <email address>
+# The email address is not required for organizations.
+
+# Please keep the list sorted.
+
+Damian Gryski <dgryski@gmail.com>
+Google Inc.
+Jan Mercl <0xjnml@gmail.com>
+Rodolfo Carvalho <rhcarvalho@gmail.com>
+Sebastien Binet <seb.binet@gmail.com>
@@ -0,0 +1,37 @@
+# This is the official list of people who can contribute
+# (and typically have contributed) code to the Snappy-Go repository.
+# The AUTHORS file lists the copyright holders; this file
+# lists people.  For example, Google employees are listed here
+# but not in AUTHORS, because Google holds the copyright.
+#
+# The submission process automatically checks to make sure
+# that people submitting code are listed in this file (by email address).
+#
+# Names should be added to this file only after verifying that
+# the individual or the individual's organization has agreed to
+# the appropriate Contributor License Agreement, found here:
+#
+#     http://code.google.com/legal/individual-cla-v1.0.html
+#     http://code.google.com/legal/corporate-cla-v1.0.html
+#
+# The agreement for individuals can be filled out on the web.
+#
+# When adding J Random Contributor's name to this file,
+# either J's name or J's organization's name should be
+# added to the AUTHORS file, depending on whether the
+# individual or corporate CLA was used.
+
+# Names should be added to this file like so:
+#     Name <email address>
+
+# Please keep the list sorted.
+
+Damian Gryski <dgryski@gmail.com>
+Jan Mercl <0xjnml@gmail.com>
+Kai Backman <kaib@golang.org>
+Marc-Antoine Ruel <maruel@chromium.org>
+Nigel Tao <nigeltao@golang.org>
+Rob Pike <r@golang.org>
+Rodolfo Carvalho <rhcarvalho@gmail.com>
+Russ Cox <rsc@golang.org>
+Sebastien Binet <seb.binet@gmail.com>
@@ -0,0 +1,27 @@
+Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,7 @@
+The Snappy compression format in the Go programming language.
+
+To download and install from source:
+$ go get github.com/golang/snappy
+
+Unless otherwise noted, the Snappy-Go source files are distributed
+under the BSD-style license found in the LICENSE file.
@@ -0,0 +1,74 @@
+/*
+To build the snappytool binary:
+g++ main.cpp /usr/lib/libsnappy.a -o snappytool
+*/
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "snappy.h"
+
+#define N 1000000
+
+char dst[N];
+char src[N];
+
+int main(int argc, char** argv) {
+  // Parse args.
+  if (argc != 2) {
+    fprintf(stderr, "exactly one of -d or -e must be given\n");
+    return 1;
+  }
+  bool decode = strcmp(argv[1], "-d") == 0;
+  bool encode = strcmp(argv[1], "-e") == 0;
+  if (decode == encode) {
+    fprintf(stderr, "exactly one of -d or -e must be given\n");
+    return 1;
+  }
+
+  // Read all of stdin into src[:s].
+  size_t s = 0;
+  while (1) {
+    if (s == N) {
+      fprintf(stderr, "input too large\n");
+      return 1;
+    }
+    ssize_t n = read(0, src+s, N-s);
+    if (n == 0) {
+      break;
+    }
+    if (n < 0) {
+      fprintf(stderr, "read error: %s\n", strerror(errno));
+      // TODO: handle EAGAIN, EINTR?
+      return 1;
+    }
+    s += n;
+  }
+
+  // Encode or decode src[:s] to dst[:d], and write to stdout.
+  size_t d = 0;
+  if (encode) {
+    if (N < snappy::MaxCompressedLength(s)) {
+      fprintf(stderr, "input too large after encoding\n");
+      return 1;
+    }
+    snappy::RawCompress(src, s, dst, &d);
+  } else {
+    if (!snappy::GetUncompressedLength(src, s, &d)) {
+      fprintf(stderr, "could not get uncompressed length\n");
+      return 1;
+    }
+    if (N < d) {
+      fprintf(stderr, "input too large after decoding\n");
+      return 1;
+    }
+    if (!snappy::RawUncompress(src, s, dst)) {
+      fprintf(stderr, "input was not valid Snappy-compressed data\n");
+      return 1;
+    }
+  }
+  write(1, dst, d);
+  return 0;
+}
@@ -0,0 +1,237 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package snappy
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+)
+
+var (
+	// ErrCorrupt reports that the input is invalid.
+	ErrCorrupt = errors.New("snappy: corrupt input")
+	// ErrTooLarge reports that the uncompressed length is too large.
+	ErrTooLarge = errors.New("snappy: decoded block is too large")
+	// ErrUnsupported reports that the input isn't supported.
+	ErrUnsupported = errors.New("snappy: unsupported input")
+
+	errUnsupportedCopy4Tag      = errors.New("snappy: unsupported COPY_4 tag")
+	errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
+)
+
+// DecodedLen returns the length of the decoded block.
+func DecodedLen(src []byte) (int, error) {
+	v, _, err := decodedLen(src)
+	return v, err
+}
+
+// decodedLen returns the length of the decoded block and the number of bytes
+// that the length header occupied.
+func decodedLen(src []byte) (blockLen, headerLen int, err error) {
+	v, n := binary.Uvarint(src)
+	if n <= 0 || v > 0xffffffff {
+		return 0, 0, ErrCorrupt
+	}
+
+	const wordSize = 32 << (^uint(0) >> 32 & 1)
+	if wordSize == 32 && v > 0x7fffffff {
+		return 0, 0, ErrTooLarge
+	}
+	return int(v), n, nil
+}
+
+const (
+	decodeErrCodeCorrupt                  = 1
+	decodeErrCodeUnsupportedLiteralLength = 2
+	decodeErrCodeUnsupportedCopy4Tag      = 3
+)
+
+// Decode returns the decoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire decoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+func Decode(dst, src []byte) ([]byte, error) {
+	dLen, s, err := decodedLen(src)
+	if err != nil {
+		return nil, err
+	}
+	if dLen <= len(dst) {
+		dst = dst[:dLen]
+	} else {
+		dst = make([]byte, dLen)
+	}
+	switch decode(dst, src[s:]) {
+	case 0:
+		return dst, nil
+	case decodeErrCodeUnsupportedLiteralLength:
+		return nil, errUnsupportedLiteralLength
+	case decodeErrCodeUnsupportedCopy4Tag:
+		return nil, errUnsupportedCopy4Tag
+	}
+	return nil, ErrCorrupt
+}
+
+// NewReader returns a new Reader that decompresses from r, using the framing
+// format described at
+// https://github.com/google/snappy/blob/master/framing_format.txt
+func NewReader(r io.Reader) *Reader {
+	return &Reader{
+		r:       r,
+		decoded: make([]byte, maxBlockSize),
+		buf:     make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize),
+	}
+}
+
+// Reader is an io.Reader that can read Snappy-compressed bytes.
+type Reader struct {
+	r       io.Reader
+	err     error
+	decoded []byte
+	buf     []byte
+	// decoded[i:j] contains decoded bytes that have not yet been passed on.
+	i, j       int
+	readHeader bool
+}
+
+// Reset discards any buffered data, resets all state, and switches the Snappy
+// reader to read from r. This permits reusing a Reader rather than allocating
+// a new one.
+func (r *Reader) Reset(reader io.Reader) {
+	r.r = reader
+	r.err = nil
+	r.i = 0
+	r.j = 0
+	r.readHeader = false
+}
+
+func (r *Reader) readFull(p []byte) (ok bool) {
+	if _, r.err = io.ReadFull(r.r, p); r.err != nil {
+		if r.err == io.ErrUnexpectedEOF {
+			r.err = ErrCorrupt
+		}
+		return false
+	}
+	return true
+}
+
+// Read satisfies the io.Reader interface.
+func (r *Reader) Read(p []byte) (int, error) {
+	if r.err != nil {
+		return 0, r.err
+	}
+	for {
+		if r.i < r.j {
+			n := copy(p, r.decoded[r.i:r.j])
+			r.i += n
+			return n, nil
+		}
+		if !r.readFull(r.buf[:4]) {
+			return 0, r.err
+		}
+		chunkType := r.buf[0]
+		if !r.readHeader {
+			if chunkType != chunkTypeStreamIdentifier {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			r.readHeader = true
+		}
+		chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
+		if chunkLen > len(r.buf) {
+			r.err = ErrUnsupported
+			return 0, r.err
+		}
+
+		// The chunk types are specified at
+		// https://github.com/google/snappy/blob/master/framing_format.txt
+		switch chunkType {
+		case chunkTypeCompressedData:
+			// Section 4.2. Compressed data (chunk type 0x00).
+			if chunkLen < checksumSize {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			buf := r.buf[:chunkLen]
+			if !r.readFull(buf) {
+				return 0, r.err
+			}
+			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
+			buf = buf[checksumSize:]
+
+			n, err := DecodedLen(buf)
+			if err != nil {
+				r.err = err
+				return 0, r.err
+			}
+			if n > len(r.decoded) {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if _, err := Decode(r.decoded, buf); err != nil {
+				r.err = err
+				return 0, r.err
+			}
+			if crc(r.decoded[:n]) != checksum {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			r.i, r.j = 0, n
+			continue
+
+		case chunkTypeUncompressedData:
+			// Section 4.3. Uncompressed data (chunk type 0x01).
+			if chunkLen < checksumSize {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			buf := r.buf[:checksumSize]
+			if !r.readFull(buf) {
+				return 0, r.err
+			}
+			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
+			// Read directly into r.decoded instead of via r.buf.
+			n := chunkLen - checksumSize
+			if !r.readFull(r.decoded[:n]) {
+				return 0, r.err
+			}
+			if crc(r.decoded[:n]) != checksum {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			r.i, r.j = 0, n
+			continue
+
+		case chunkTypeStreamIdentifier:
+			// Section 4.1. Stream identifier (chunk type 0xff).
+			if chunkLen != len(magicBody) {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if !r.readFull(r.buf[:len(magicBody)]) {
+				return 0, r.err
+			}
+			for i := 0; i < len(magicBody); i++ {
+				if r.buf[i] != magicBody[i] {
+					r.err = ErrCorrupt
+					return 0, r.err
+				}
+			}
+			continue
+		}
+
+		if chunkType <= 0x7f {
+			// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
+			r.err = ErrUnsupported
+			return 0, r.err
+		}
+		// Section 4.4 Padding (chunk type 0xfe).
+		// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
+		if !r.readFull(r.buf[:chunkLen]) {
+			return 0, r.err
+		}
+	}
+}
@@ -0,0 +1,10 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package snappy
+
+// decode has the same semantics as in decode_other.go.
+//
+//go:noescape
+func decode(dst, src []byte) int
@@ -0,0 +1,472 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func decode(dst, src []byte) int
+//
+// The asm code generally follows the pure Go code in decode_other.go, except
+// where marked with a "!!!".
+//
+// All local variables fit into registers. The non-zero stack size is only to
+// spill registers and push args when issuing a CALL. The register allocation:
+//	- AX	scratch
+//	- BX	scratch
+//	- CX	length or x
+//	- DX	offset
+//	- SI	&src[s]
+//	- DI	&dst[d]
+//	+ R8	dst_base
+//	+ R9	dst_len
+//	+ R10	dst_base + dst_len
+//	+ R11	src_base
+//	+ R12	src_len
+//	+ R13	src_base + src_len
+//	- R14	used by doCopy
+//	- R15	used by doCopy
+//
+// The registers R8-R13 (marked with a "+") are set at the start of the
+// function, and after a CALL returns, and are not otherwise modified.
+//
+// The d variable is implicitly DI - R8,  and len(dst)-d is R10 - DI.
+// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI.
+TEXT ·decode(SB), NOSPLIT, $48-56
+	// Initialize SI, DI and R8-R13.
+	MOVQ dst_base+0(FP), R8
+	MOVQ dst_len+8(FP), R9
+	MOVQ R8, DI
+	MOVQ R8, R10
+	ADDQ R9, R10
+	MOVQ src_base+24(FP), R11
+	MOVQ src_len+32(FP), R12
+	MOVQ R11, SI
+	MOVQ R11, R13
+	ADDQ R12, R13
+
+loop:
+	// for s < len(src)
+	CMPQ SI, R13
+	JEQ  end
+
+	// CX = uint32(src[s])
+	//
+	// switch src[s] & 0x03
+	MOVBLZX (SI), CX
+	MOVL    CX, BX
+	ANDL    $3, BX
+	CMPL    BX, $1
+	JAE     tagCopy
+
+	// ----------------------------------------
+	// The code below handles literal tags.
+
+	// case tagLiteral:
+	// x := uint32(src[s] >> 2)
+	// switch
+	SHRL $2, CX
+	CMPL CX, $60
+	JAE  tagLit60Plus
+
+	// case x < 60:
+	// s++
+	INCQ SI
+
+doLit:
+	// This is the end of the inner "switch", when we have a literal tag.
+	//
+	// We assume that CX == x and x fits in a uint32, where x is the variable
+	// used in the pure Go decode_other.go code.
+
+	// length = int(x) + 1
+	//
+	// Unlike the pure Go code, we don't need to check if length <= 0 because
+	// CX can hold 64 bits, so the increment cannot overflow.
+	INCQ CX
+
+	// Prepare to check if copying length bytes will run past the end of dst or
+	// src.
+	//
+	// AX = len(dst) - d
+	// BX = len(src) - s
+	MOVQ R10, AX
+	SUBQ DI, AX
+	MOVQ R13, BX
+	SUBQ SI, BX
+
+	// !!! Try a faster technique for short (16 or fewer bytes) copies.
+	//
+	// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
+	//   goto callMemmove // Fall back on calling runtime·memmove.
+	// }
+	//
+	// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
+	// against 21 instead of 16, because it cannot assume that all of its input
+	// is contiguous in memory and so it needs to leave enough source bytes to
+	// read the next tag without refilling buffers, but Go's Decode assumes
+	// contiguousness (the src argument is a []byte).
+	CMPQ CX, $16
+	JGT  callMemmove
+	CMPQ AX, $16
+	JLT  callMemmove
+	CMPQ BX, $16
+	JLT  callMemmove
+
+	// !!! Implement the copy from src to dst as a 16-byte load and store.
+	// (Decode's documentation says that dst and src must not overlap.)
+	//
+	// This always copies 16 bytes, instead of only length bytes, but that's
+	// OK. If the input is a valid Snappy encoding then subsequent iterations
+	// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
+	// non-nil error), so the overrun will be ignored.
+	//
+	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
+	// 16-byte loads and stores. This technique probably wouldn't be as
+	// effective on architectures that are fussier about alignment.
+	MOVOU 0(SI), X0
+	MOVOU X0, 0(DI)
+
+	// d += length
+	// s += length
+	ADDQ CX, DI
+	ADDQ CX, SI
+	JMP  loop
+
+callMemmove:
+	// if length > len(dst)-d || length > len(src)-s { etc }
+	CMPQ CX, AX
+	JGT  errCorrupt
+	CMPQ CX, BX
+	JGT  errCorrupt
+
+	// copy(dst[d:], src[s:s+length])
+	//
+	// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
+	// DI, SI and CX as arguments. Coincidentally, we also need to spill those
+	// three registers to the stack, to save local variables across the CALL.
+	MOVQ DI, 0(SP)
+	MOVQ SI, 8(SP)
+	MOVQ CX, 16(SP)
+	MOVQ DI, 24(SP)
+	MOVQ SI, 32(SP)
+	MOVQ CX, 40(SP)
+	CALL runtime·memmove(SB)
+
+	// Restore local variables: unspill registers from the stack and
+	// re-calculate R8-R13.
+	MOVQ 24(SP), DI
+	MOVQ 32(SP), SI
+	MOVQ 40(SP), CX
+	MOVQ dst_base+0(FP), R8
+	MOVQ dst_len+8(FP), R9
+	MOVQ R8, R10
+	ADDQ R9, R10
+	MOVQ src_base+24(FP), R11
+	MOVQ src_len+32(FP), R12
+	MOVQ R11, R13
+	ADDQ R12, R13
+
+	// d += length
+	// s += length
+	ADDQ CX, DI
+	ADDQ CX, SI
+	JMP  loop
+
+tagLit60Plus:
+	// !!! This fragment does the
+	//
+	// s += x - 58; if uint(s) > uint(len(src)) { etc }
+	//
+	// checks. In the asm version, we code it once instead of once per switch case.
+	ADDQ CX, SI
+	SUBQ $58, SI
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// case x == 60:
+	CMPL CX, $61
+	JEQ  tagLit61
+	JA   tagLit62Plus
+
+	// x = uint32(src[s-1])
+	MOVBLZX -1(SI), CX
+	JMP     doLit
+
+tagLit61:
+	// case x == 61:
+	// x = uint32(src[s-2]) | uint32(src[s-1])<<8
+	MOVWLZX -2(SI), CX
+	JMP     doLit
+
+tagLit62Plus:
+	CMPL CX, $62
+	JA   tagLit63
+
+	// case x == 62:
+	// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
+	MOVWLZX -3(SI), CX
+	MOVBLZX -1(SI), BX
+	SHLL    $16, BX
+	ORL     BX, CX
+	JMP     doLit
+
+tagLit63:
+	// case x == 63:
+	// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
+	MOVL -4(SI), CX
+	JMP  doLit
+
+// The code above handles literal tags.
+// ----------------------------------------
+// The code below handles copy tags.
+
+tagCopy2:
+	// case tagCopy2:
+	// s += 3
+	ADDQ $3, SI
+
+	// if uint(s) > uint(len(src)) { etc }
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// length = 1 + int(src[s-3])>>2
+	SHRQ $2, CX
+	INCQ CX
+
+	// offset = int(src[s-2]) | int(src[s-1])<<8
+	MOVWQZX -2(SI), DX
+	JMP     doCopy
+
+tagCopy:
+	// We have a copy tag. We assume that:
+	//	- BX == src[s] & 0x03
+	//	- CX == src[s]
+	CMPQ BX, $2
+	JEQ  tagCopy2
+	JA   errUC4T
+
+	// case tagCopy1:
+	// s += 2
+	ADDQ $2, SI
+
+	// if uint(s) > uint(len(src)) { etc }
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
+	MOVQ    CX, DX
+	ANDQ    $0xe0, DX
+	SHLQ    $3, DX
+	MOVBQZX -1(SI), BX
+	ORQ     BX, DX
+
+	// length = 4 + int(src[s-2])>>2&0x7
+	SHRQ $2, CX
+	ANDQ $7, CX
+	ADDQ $4, CX
+
+doCopy:
+	// This is the end of the outer "switch", when we have a copy tag.
+	//
+	// We assume that:
+	//	- CX == length && CX > 0
+	//	- DX == offset
+
+	// if offset <= 0 { etc }
+	CMPQ DX, $0
+	JLE  errCorrupt
+
+	// if d < offset { etc }
+	MOVQ DI, BX
+	SUBQ R8, BX
+	CMPQ BX, DX
+	JLT  errCorrupt
+
+	// if length > len(dst)-d { etc }
+	MOVQ R10, BX
+	SUBQ DI, BX
+	CMPQ CX, BX
+	JGT  errCorrupt
+
+	// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
+	//
+	// Set:
+	//	- R14 = len(dst)-d
+	//	- R15 = &dst[d-offset]
+	MOVQ R10, R14
+	SUBQ DI, R14
+	MOVQ DI, R15
+	SUBQ DX, R15
+
+	// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
+	//
+	// First, try using two 8-byte load/stores, similar to the doLit technique
+	// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
+	// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
+	// and not one 16-byte load/store, and the first store has to be before the
+	// second load, due to the overlap if offset is in the range [8, 16).
+	//
+	// if length > 16 || offset < 8 || len(dst)-d < 16 {
+	//   goto slowForwardCopy
+	// }
+	// copy 16 bytes
+	// d += length
+	CMPQ CX, $16
+	JGT  slowForwardCopy
+	CMPQ DX, $8
+	JLT  slowForwardCopy
+	CMPQ R14, $16
+	JLT  slowForwardCopy
+	MOVQ 0(R15), AX
+	MOVQ AX, 0(DI)
+	MOVQ 8(R15), BX
+	MOVQ BX, 8(DI)
+	ADDQ CX, DI
+	JMP  loop
+
+slowForwardCopy:
+	// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
+	// can still try 8-byte load stores, provided we can overrun up to 10 extra
+	// bytes. As above, the overrun will be fixed up by subsequent iterations
+	// of the outermost loop.
+	//
+	// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
+	// commentary says:
+	//
+	// ----
+	//
+	// The main part of this loop is a simple copy of eight bytes at a time
+	// until we've copied (at least) the requested amount of bytes.  However,
+	// if d and d-offset are less than eight bytes apart (indicating a
+	// repeating pattern of length < 8), we first need to expand the pattern in
+	// order to get the correct results. For instance, if the buffer looks like
+	// this, with the eight-byte <d-offset> and <d> patterns marked as
+	// intervals:
+	//
+	//    abxxxxxxxxxxxx
+	//    [------]           d-offset
+	//      [------]         d
+	//
+	// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
+	// once, after which we can move <d> two bytes without moving <d-offset>:
+	//
+	//    ababxxxxxxxxxx
+	//    [------]           d-offset
+	//        [------]       d
+	//
+	// and repeat the exercise until the two no longer overlap.
+	//
+	// This allows us to do very well in the special case of one single byte
+	// repeated many times, without taking a big hit for more general cases.
+	//
+	// The worst case of extra writing past the end of the match occurs when
+	// offset == 1 and length == 1; the last copy will read from byte positions
+	// [0..7] and write to [4..11], whereas it was only supposed to write to
+	// position 1. Thus, ten excess bytes.
+	//
+	// ----
+	//
+	// That "10 byte overrun" worst case is confirmed by Go's
+	// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
+	// and finishSlowForwardCopy algorithm.
+	//
+	// if length > len(dst)-d-10 {
+	//   goto verySlowForwardCopy
+	// }
+	SUBQ $10, R14
+	CMPQ CX, R14
+	JGT  verySlowForwardCopy
+
+makeOffsetAtLeast8:
+	// !!! As above, expand the pattern so that offset >= 8 and we can use
+	// 8-byte load/stores.
+	//
+	// for offset < 8 {
+	//   copy 8 bytes from dst[d-offset:] to dst[d:]
+	//   length -= offset
+	//   d      += offset
+	//   offset += offset
+	//   // The two previous lines together means that d-offset, and therefore
+	//   // R15, is unchanged.
+	// }
+	CMPQ DX, $8
+	JGE  fixUpSlowForwardCopy
+	MOVQ (R15), BX
+	MOVQ BX, (DI)
+	SUBQ DX, CX
+	ADDQ DX, DI
+	ADDQ DX, DX
+	JMP  makeOffsetAtLeast8
+
+fixUpSlowForwardCopy:
+	// !!! Add length (which might be negative now) to d (implied by DI being
+	// &dst[d]) so that d ends up at the right place when we jump back to the
+	// top of the loop. Before we do that, though, we save DI to AX so that, if
+	// length is positive, copying the remaining length bytes will write to the
+	// right place.
+	MOVQ DI, AX
+	ADDQ CX, DI
+
+finishSlowForwardCopy:
+	// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
+	// length means that we overrun, but as above, that will be fixed up by
+	// subsequent iterations of the outermost loop.
+	CMPQ CX, $0
+	JLE  loop
+	MOVQ (R15), BX
+	MOVQ BX, (AX)
+	ADDQ $8, R15
+	ADDQ $8, AX
+	SUBQ $8, CX
+	JMP  finishSlowForwardCopy
+
+verySlowForwardCopy:
+	// verySlowForwardCopy is a simple implementation of forward copy. In C
+	// parlance, this is a do/while loop instead of a while loop, since we know
+	// that length > 0. In Go syntax:
+	//
+	// for {
+	//   dst[d] = dst[d - offset]
+	//   d++
+	//   length--
+	//   if length == 0 {
+	//     break
+	//   }
+	// }
+	MOVB (R15), BX
+	MOVB BX, (DI)
+	INCQ R15
+	INCQ DI
+	DECQ CX
+	JNZ  verySlowForwardCopy
+	JMP  loop
+
+// The code above handles copy tags.
+// ----------------------------------------
+
+end:
+	// This is the end of the "for s < len(src)".
+	//
+	// if d != len(dst) { etc }
+	CMPQ DI, R10
+	JNE  errCorrupt
+
+	// return 0
+	MOVQ $0, ret+48(FP)
+	RET
+
+errCorrupt:
+	// return decodeErrCodeCorrupt
+	MOVQ $1, ret+48(FP)
+	RET
+
+errUC4T:
+	// return decodeErrCodeUnsupportedCopy4Tag
+	MOVQ $3, ret+48(FP)
+	RET
@@ -0,0 +1,96 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64
+
+package snappy
+
+// decode writes the decoding of src to dst. It assumes that the varint-encoded
+// length of the decompressed bytes has already been read, and that len(dst)
+// equals that length.
+//
+// It returns 0 on success or a decodeErrCodeXxx error code on failure.
+func decode(dst, src []byte) int {
+	var d, s, offset, length int
+	for s < len(src) {
+		switch src[s] & 0x03 {
+		case tagLiteral:
+			x := uint32(src[s] >> 2)
+			switch {
+			case x < 60:
+				s++
+			case x == 60:
+				s += 2
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-1])
+			case x == 61:
+				s += 3
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-2]) | uint32(src[s-1])<<8
+			case x == 62:
+				s += 4
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
+			case x == 63:
+				s += 5
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
+			}
+			length = int(x) + 1
+			if length <= 0 {
+				return decodeErrCodeUnsupportedLiteralLength
+			}
+			if length > len(dst)-d || length > len(src)-s {
+				return decodeErrCodeCorrupt
+			}
+			copy(dst[d:], src[s:s+length])
+			d += length
+			s += length
+			continue
+
+		case tagCopy1:
+			s += 2
+			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+				return decodeErrCodeCorrupt
+			}
+			length = 4 + int(src[s-2])>>2&0x7
+			offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
+
+		case tagCopy2:
+			s += 3
+			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+				return decodeErrCodeCorrupt
+			}
+			length = 1 + int(src[s-3])>>2
+			offset = int(src[s-2]) | int(src[s-1])<<8
+
+		case tagCopy4:
+			return decodeErrCodeUnsupportedCopy4Tag
+		}
+
+		if offset <= 0 || d < offset || length > len(dst)-d {
+			return decodeErrCodeCorrupt
+		}
+		// Copy from an earlier sub-slice of dst to a later sub-slice. Unlike
+		// the built-in copy function, this byte-by-byte copy always runs
+		// forwards, even if the slices overlap. Conceptually, this is:
+		//
+		// d += forwardCopy(dst[d:d+length], dst[d-offset:])
+		for end := d + length; d != end; d++ {
+			dst[d] = dst[d-offset]
+		}
+	}
+	if d != len(dst) {
+		return decodeErrCodeCorrupt
+	}
+	return 0
+}
@@ -0,0 +1,403 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package snappy
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+)
+
+// maxOffset limits how far copy back-references can go, the same as the C++
+// code.
+const maxOffset = 1 << 15
+
+// emitLiteral writes a literal chunk and returns the number of bytes written.
+func emitLiteral(dst, lit []byte) int {
+	i, n := 0, uint(len(lit)-1)
+	switch {
+	case n < 60:
+		dst[0] = uint8(n)<<2 | tagLiteral
+		i = 1
+	case n < 1<<8:
+		dst[0] = 60<<2 | tagLiteral
+		dst[1] = uint8(n)
+		i = 2
+	case n < 1<<16:
+		dst[0] = 61<<2 | tagLiteral
+		dst[1] = uint8(n)
+		dst[2] = uint8(n >> 8)
+		i = 3
+	case n < 1<<24:
+		dst[0] = 62<<2 | tagLiteral
+		dst[1] = uint8(n)
+		dst[2] = uint8(n >> 8)
+		dst[3] = uint8(n >> 16)
+		i = 4
+	case int64(n) < 1<<32:
+		dst[0] = 63<<2 | tagLiteral
+		dst[1] = uint8(n)
+		dst[2] = uint8(n >> 8)
+		dst[3] = uint8(n >> 16)
+		dst[4] = uint8(n >> 24)
+		i = 5
+	default:
+		panic("snappy: source buffer is too long")
+	}
+	if copy(dst[i:], lit) != len(lit) {
+		panic("snappy: destination buffer is too short")
+	}
+	return i + len(lit)
+}
+
+// emitCopy writes a copy chunk and returns the number of bytes written.
+func emitCopy(dst []byte, offset, length int32) int {
+	i := 0
+	for length > 0 {
+		x := length - 4
+		if 0 <= x && x < 1<<3 && offset < 1<<11 {
+			dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1
+			dst[i+1] = uint8(offset)
+			i += 2
+			break
+		}
+
+		x = length
+		if x > 1<<6 {
+			x = 1 << 6
+		}
+		dst[i+0] = uint8(x-1)<<2 | tagCopy2
+		dst[i+1] = uint8(offset)
+		dst[i+2] = uint8(offset >> 8)
+		i += 3
+		length -= x
+	}
+	return i
+}
+
+// Encode returns the encoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire encoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// It is valid to pass a nil dst.
+func Encode(dst, src []byte) []byte {
+	if n := MaxEncodedLen(len(src)); n < 0 {
+		panic(ErrTooLarge)
+	} else if len(dst) < n {
+		dst = make([]byte, n)
+	}
+
+	// The block starts with the varint-encoded length of the decompressed bytes.
+	d := binary.PutUvarint(dst, uint64(len(src)))
+
+	for len(src) > 0 {
+		p := src
+		src = nil
+		if len(p) > maxBlockSize {
+			p, src = p[:maxBlockSize], p[maxBlockSize:]
+		}
+		d += encodeBlock(dst[d:], p)
+	}
+	return dst[:d]
+}
+
+// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
+// assumes that the varint-encoded length of the decompressed bytes has already
+// been written.
+//
+// It also assumes that:
+//	len(dst) >= MaxEncodedLen(len(src)) &&
+// 	0 < len(src) && len(src) <= maxBlockSize
+func encodeBlock(dst, src []byte) (d int) {
+	// Return early if src is short.
+	if len(src) <= 4 {
+		return emitLiteral(dst, src)
+	}
+
+	// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
+	const maxTableSize = 1 << 14
+	shift, tableSize := uint(32-8), 1<<8
+	for tableSize < maxTableSize && tableSize < len(src) {
+		shift--
+		tableSize *= 2
+	}
+	var table [maxTableSize]int32
+
+	// Iterate over the source bytes.
+	var (
+		s   int32 // The iterator position.
+		t   int32 // The last position with the same hash as s.
+		lit int32 // The start position of any pending literal bytes.
+
+		// Copied from the C++ snappy implementation:
+		//
+		// Heuristic match skipping: If 32 bytes are scanned with no matches
+		// found, start looking only at every other byte. If 32 more bytes are
+		// scanned, look at every third byte, etc.. When a match is found,
+		// immediately go back to looking at every byte. This is a small loss
+		// (~5% performance, ~0.1% density) for compressible data due to more
+		// bookkeeping, but for non-compressible data (such as JPEG) it's a
+		// huge win since the compressor quickly "realizes" the data is
+		// incompressible and doesn't bother looking for matches everywhere.
+		//
+		// The "skip" variable keeps track of how many bytes there are since
+		// the last match; dividing it by 32 (ie. right-shifting by five) gives
+		// the number of bytes to move ahead for each iteration.
+		skip uint32 = 32
+	)
+	for uint32(s+3) < uint32(len(src)) { // The uint32 conversions catch overflow from the +3.
+		// Update the hash table.
+		b0, b1, b2, b3 := src[s], src[s+1], src[s+2], src[s+3]
+		h := uint32(b0) | uint32(b1)<<8 | uint32(b2)<<16 | uint32(b3)<<24
+		p := &table[(h*0x1e35a7bd)>>shift]
+		// We need to to store values in [-1, inf) in table. To save
+		// some initialization time, (re)use the table's zero value
+		// and shift the values against this zero: add 1 on writes,
+		// subtract 1 on reads.
+		t, *p = *p-1, s+1
+		// If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte.
+		if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] {
+			s += int32(skip >> 5)
+			skip++
+			continue
+		}
+		skip = 32
+		// Otherwise, we have a match. First, emit any pending literal bytes.
+		if lit != s {
+			d += emitLiteral(dst[d:], src[lit:s])
+		}
+		// Extend the match to be as long as possible.
+		s0 := s
+		s, t = s+4, t+4
+		for int(s) < len(src) && src[s] == src[t] {
+			s++
+			t++
+		}
+		// Emit the copied bytes.
+		d += emitCopy(dst[d:], s-t, s-s0)
+		lit = s
+	}
+
+	// Emit any final pending literal bytes and return.
+	if int(lit) != len(src) {
+		d += emitLiteral(dst[d:], src[lit:])
+	}
+	return d
+}
+
+// MaxEncodedLen returns the maximum length of a snappy block, given its
+// uncompressed length.
+//
+// It will return a negative value if srcLen is too large to encode.
+func MaxEncodedLen(srcLen int) int {
+	n := uint64(srcLen)
+	if n > 0xffffffff {
+		return -1
+	}
+	// Compressed data can be defined as:
+	//    compressed := item* literal*
+	//    item       := literal* copy
+	//
+	// The trailing literal sequence has a space blowup of at most 62/60
+	// since a literal of length 60 needs one tag byte + one extra byte
+	// for length information.
+	//
+	// Item blowup is trickier to measure. Suppose the "copy" op copies
+	// 4 bytes of data. Because of a special check in the encoding code,
+	// we produce a 4-byte copy only if the offset is < 65536. Therefore
+	// the copy op takes 3 bytes to encode, and this type of item leads
+	// to at most the 62/60 blowup for representing literals.
+	//
+	// Suppose the "copy" op copies 5 bytes of data. If the offset is big
+	// enough, it will take 5 bytes to encode the copy op. Therefore the
+	// worst case here is a one-byte literal followed by a five-byte copy.
+	// That is, 6 bytes of input turn into 7 bytes of "compressed" data.
+	//
+	// This last factor dominates the blowup, so the final estimate is:
+	n = 32 + n + n/6
+	if n > 0xffffffff {
+		return -1
+	}
+	return int(n)
+}
+
+var errClosed = errors.New("snappy: Writer is closed")
+
+// NewWriter returns a new Writer that compresses to w.
+//
+// The Writer returned does not buffer writes. There is no need to Flush or
+// Close such a Writer.
+//
+// Deprecated: the Writer returned is not suitable for many small writes, only
+// for few large writes. Use NewBufferedWriter instead, which is efficient
+// regardless of the frequency and shape of the writes, and remember to Close
+// that Writer when done.
+func NewWriter(w io.Writer) *Writer {
+	return &Writer{
+		w:    w,
+		obuf: make([]byte, obufLen),
+	}
+}
+
+// NewBufferedWriter returns a new Writer that compresses to w, using the
+// framing format described at
+// https://github.com/google/snappy/blob/master/framing_format.txt
+//
+// The Writer returned buffers writes. Users must call Close to guarantee all
+// data has been forwarded to the underlying io.Writer. They may also call
+// Flush zero or more times before calling Close.
+func NewBufferedWriter(w io.Writer) *Writer {
+	return &Writer{
+		w:    w,
+		ibuf: make([]byte, 0, maxBlockSize),
+		obuf: make([]byte, obufLen),
+	}
+}
+
+// Writer is an io.Writer than can write Snappy-compressed bytes.
+type Writer struct {
+	w   io.Writer
+	err error
+
+	// ibuf is a buffer for the incoming (uncompressed) bytes.
+	//
+	// Its use is optional. For backwards compatibility, Writers created by the
+	// NewWriter function have ibuf == nil, do not buffer incoming bytes, and
+	// therefore do not need to be Flush'ed or Close'd.
+	ibuf []byte
+
+	// obuf is a buffer for the outgoing (compressed) bytes.
+	obuf []byte
+
+	// wroteStreamHeader is whether we have written the stream header.
+	wroteStreamHeader bool
+}
+
+// Reset discards the writer's state and switches the Snappy writer to write to
+// w. This permits reusing a Writer rather than allocating a new one.
+func (w *Writer) Reset(writer io.Writer) {
+	w.w = writer
+	w.err = nil
+	if w.ibuf != nil {
+		w.ibuf = w.ibuf[:0]
+	}
+	w.wroteStreamHeader = false
+}
+
+// Write satisfies the io.Writer interface.
+func (w *Writer) Write(p []byte) (nRet int, errRet error) {
+	if w.ibuf == nil {
+		// Do not buffer incoming bytes. This does not perform or compress well
+		// if the caller of Writer.Write writes many small slices. This
+		// behavior is therefore deprecated, but still supported for backwards
+		// compatibility with code that doesn't explicitly Flush or Close.
+		return w.write(p)
+	}
+
+	// The remainder of this method is based on bufio.Writer.Write from the
+	// standard library.
+
+	for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil {
+		var n int
+		if len(w.ibuf) == 0 {
+			// Large write, empty buffer.
+			// Write directly from p to avoid copy.
+			n, _ = w.write(p)
+		} else {
+			n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
+			w.ibuf = w.ibuf[:len(w.ibuf)+n]
+			w.Flush()
+		}
+		nRet += n
+		p = p[n:]
+	}
+	if w.err != nil {
+		return nRet, w.err
+	}
+	n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
+	w.ibuf = w.ibuf[:len(w.ibuf)+n]
+	nRet += n
+	return nRet, nil
+}
+
+func (w *Writer) write(p []byte) (nRet int, errRet error) {
+	if w.err != nil {
+		return 0, w.err
+	}
+	for len(p) > 0 {
+		obufStart := len(magicChunk)
+		if !w.wroteStreamHeader {
+			w.wroteStreamHeader = true
+			copy(w.obuf, magicChunk)
+			obufStart = 0
+		}
+
+		var uncompressed []byte
+		if len(p) > maxBlockSize {
+			uncompressed, p = p[:maxBlockSize], p[maxBlockSize:]
+		} else {
+			uncompressed, p = p, nil
+		}
+		checksum := crc(uncompressed)
+
+		// Compress the buffer, discarding the result if the improvement
+		// isn't at least 12.5%.
+		compressed := Encode(w.obuf[obufHeaderLen:], uncompressed)
+		chunkType := uint8(chunkTypeCompressedData)
+		chunkLen := 4 + len(compressed)
+		obufEnd := obufHeaderLen + len(compressed)
+		if len(compressed) >= len(uncompressed)-len(uncompressed)/8 {
+			chunkType = chunkTypeUncompressedData
+			chunkLen = 4 + len(uncompressed)
+			obufEnd = obufHeaderLen
+		}
+
+		// Fill in the per-chunk header that comes before the body.
+		w.obuf[len(magicChunk)+0] = chunkType
+		w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0)
+		w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8)
+		w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16)
+		w.obuf[len(magicChunk)+4] = uint8(checksum >> 0)
+		w.obuf[len(magicChunk)+5] = uint8(checksum >> 8)
+		w.obuf[len(magicChunk)+6] = uint8(checksum >> 16)
+		w.obuf[len(magicChunk)+7] = uint8(checksum >> 24)
+
+		if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil {
+			w.err = err
+			return nRet, err
+		}
+		if chunkType == chunkTypeUncompressedData {
+			if _, err := w.w.Write(uncompressed); err != nil {
+				w.err = err
+				return nRet, err
+			}
+		}
+		nRet += len(uncompressed)
+	}
+	return nRet, nil
+}
+
+// Flush flushes the Writer to its underlying io.Writer.
+func (w *Writer) Flush() error {
+	if w.err != nil {
+		return w.err
+	}
+	if len(w.ibuf) == 0 {
+		return nil
+	}
+	w.write(w.ibuf)
+	w.ibuf = w.ibuf[:0]
+	return w.err
+}
+
+// Close calls Flush and then closes the Writer.
+func (w *Writer) Close() error {
+	w.Flush()
+	ret := w.err
+	if w.err == nil {
+		w.err = errClosed
+	}
+	return ret
+}
@@ -0,0 +1,84 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package snappy implements the snappy block-based compression format.
+// It aims for very high speeds and reasonable compression.
+//
+// The C++ snappy implementation is at https://github.com/google/snappy
+package snappy // import "github.com/golang/snappy"
+
+import (
+	"hash/crc32"
+)
+
+/*
+Each encoded block begins with the varint-encoded length of the decoded data,
+followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
+first byte of each chunk is broken into its 2 least and 6 most significant bits
+called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
+Zero means a literal tag. All other values mean a copy tag.
+
+For literal tags:
+  - If m < 60, the next 1 + m bytes are literal bytes.
+  - Otherwise, let n be the little-endian unsigned integer denoted by the next
+    m - 59 bytes. The next 1 + n bytes after that are literal bytes.
+
+For copy tags, length bytes are copied from offset bytes ago, in the style of
+Lempel-Ziv compression algorithms. In particular:
+  - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
+    The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
+    of the offset. The next byte is bits 0-7 of the offset.
+  - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
+    The length is 1 + m. The offset is the little-endian unsigned integer
+    denoted by the next 2 bytes.
+  - For l == 3, this tag is a legacy format that is no longer supported.
+*/
+const (
+	tagLiteral = 0x00
+	tagCopy1   = 0x01
+	tagCopy2   = 0x02
+	tagCopy4   = 0x03
+)
+
+const (
+	checksumSize    = 4
+	chunkHeaderSize = 4
+	magicChunk      = "\xff\x06\x00\x00" + magicBody
+	magicBody       = "sNaPpY"
+
+	// maxBlockSize is the maximum size of the input to encodeBlock. It is not
+	// part of the wire format per se, but some parts of the encoder assume
+	// that an offset fits into a uint16.
+	//
+	// Also, for the framing format (Writer type instead of Encode function),
+	// https://github.com/google/snappy/blob/master/framing_format.txt says
+	// that "the uncompressed data in a chunk must be no longer than 65536
+	// bytes".
+	maxBlockSize = 65536
+
+	// maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is
+	// hard coded to be a const instead of a variable, so that obufLen can also
+	// be a const. Their equivalence is confirmed by
+	// TestMaxEncodedLenOfMaxBlockSize.
+	maxEncodedLenOfMaxBlockSize = 76490
+
+	obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize
+	obufLen       = obufHeaderLen + maxEncodedLenOfMaxBlockSize
+)
+
+const (
+	chunkTypeCompressedData   = 0x00
+	chunkTypeUncompressedData = 0x01
+	chunkTypePadding          = 0xfe
+	chunkTypeStreamIdentifier = 0xff
+)
+
+var crcTable = crc32.MakeTable(crc32.Castagnoli)
+
+// crc implements the checksum specified in section 3 of
+// https://github.com/google/snappy/blob/master/framing_format.txt
+func crc(b []byte) uint32 {
+	c := crc32.Update(0, crcTable, b)
+	return uint32(c>>15|c<<17) + 0xa282ead8
+}
@@ -0,0 +1,973 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package snappy
+
+import (
+	"bytes"
+	"encoding/binary"
+	"flag"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"math/rand"
+	"net/http"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+var download = flag.Bool("download", false, "If true, download any missing files before running benchmarks")
+
+func TestMaxEncodedLenOfMaxBlockSize(t *testing.T) {
+	got := maxEncodedLenOfMaxBlockSize
+	want := MaxEncodedLen(maxBlockSize)
+	if got != want {
+		t.Fatalf("got %d, want %d", got, want)
+	}
+}
+
+func cmp(a, b []byte) error {
+	if bytes.Equal(a, b) {
+		return nil
+	}
+	if len(a) != len(b) {
+		return fmt.Errorf("got %d bytes, want %d", len(a), len(b))
+	}
+	for i := range a {
+		if a[i] != b[i] {
+			return fmt.Errorf("byte #%d: got 0x%02x, want 0x%02x", i, a[i], b[i])
+		}
+	}
+	return nil
+}
+
+func roundtrip(b, ebuf, dbuf []byte) error {
+	d, err := Decode(dbuf, Encode(ebuf, b))
+	if err != nil {
+		return fmt.Errorf("decoding error: %v", err)
+	}
+	if err := cmp(d, b); err != nil {
+		return fmt.Errorf("roundtrip mismatch: %v", err)
+	}
+	return nil
+}
+
+func TestEmpty(t *testing.T) {
+	if err := roundtrip(nil, nil, nil); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestSmallCopy(t *testing.T) {
+	for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
+		for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
+			for i := 0; i < 32; i++ {
+				s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb"
+				if err := roundtrip([]byte(s), ebuf, dbuf); err != nil {
+					t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err)
+				}
+			}
+		}
+	}
+}
+
+func TestSmallRand(t *testing.T) {
+	rng := rand.New(rand.NewSource(1))
+	for n := 1; n < 20000; n += 23 {
+		b := make([]byte, n)
+		for i := range b {
+			b[i] = uint8(rng.Intn(256))
+		}
+		if err := roundtrip(b, nil, nil); err != nil {
+			t.Fatal(err)
+		}
+	}
+}
+
+func TestSmallRegular(t *testing.T) {
+	for n := 1; n < 20000; n += 23 {
+		b := make([]byte, n)
+		for i := range b {
+			b[i] = uint8(i%10 + 'a')
+		}
+		if err := roundtrip(b, nil, nil); err != nil {
+			t.Fatal(err)
+		}
+	}
+}
+
+func TestInvalidVarint(t *testing.T) {
+	testCases := []struct {
+		desc  string
+		input string
+	}{{
+		"invalid varint, final byte has continuation bit set",
+		"\xff",
+	}, {
+		"invalid varint, value overflows uint64",
+		"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00",
+	}, {
+		// https://github.com/google/snappy/blob/master/format_description.txt
+		// says that "the stream starts with the uncompressed length [as a
+		// varint] (up to a maximum of 2^32 - 1)".
+		"valid varint (as uint64), but value overflows uint32",
+		"\x80\x80\x80\x80\x10",
+	}}
+
+	for _, tc := range testCases {
+		input := []byte(tc.input)
+		if _, err := DecodedLen(input); err != ErrCorrupt {
+			t.Errorf("%s: DecodedLen: got %v, want ErrCorrupt", tc.desc, err)
+		}
+		if _, err := Decode(nil, input); err != ErrCorrupt {
+			t.Errorf("%s: Decode: got %v, want ErrCorrupt", tc.desc, err)
+		}
+	}
+}
+
+func TestDecode(t *testing.T) {
+	lit40Bytes := make([]byte, 40)
+	for i := range lit40Bytes {
+		lit40Bytes[i] = byte(i)
+	}
+	lit40 := string(lit40Bytes)
+
+	testCases := []struct {
+		desc    string
+		input   string
+		want    string
+		wantErr error
+	}{{
+		`decodedLen=0; valid input`,
+		"\x00",
+		"",
+		nil,
+	}, {
+		`decodedLen=3; tagLiteral, 0-byte length; length=3; valid input`,
+		"\x03" + "\x08\xff\xff\xff",
+		"\xff\xff\xff",
+		nil,
+	}, {
+		`decodedLen=2; tagLiteral, 0-byte length; length=3; not enough dst bytes`,
+		"\x02" + "\x08\xff\xff\xff",
+		"",
+		ErrCorrupt,
+	}, {
+		`decodedLen=3; tagLiteral, 0-byte length; length=3; not enough src bytes`,
+		"\x03" + "\x08\xff\xff",
+		"",
+		ErrCorrupt,
+	}, {
+		`decodedLen=40; tagLiteral, 0-byte length; length=40; valid input`,
+		"\x28" + "\x9c" + lit40,
+		lit40,
+		nil,
+	}, {
+		`decodedLen=1; tagLiteral, 1-byte length; not enough length bytes`,
+		"\x01" + "\xf0",
+		"",
+		ErrCorrupt,
+	}, {
+		`decodedLen=3; tagLiteral, 1-byte length; length=3; valid input`,
+		"\x03" + "\xf0\x02\xff\xff\xff",
+		"\xff\xff\xff",
+		nil,
+	}, {
+		`decodedLen=1; tagLiteral, 2-byte length; not enough length bytes`,
+		"\x01" + "\xf4\x00",
+		"",
+		ErrCorrupt,
+	}, {
+		`decodedLen=3; tagLiteral, 2-byte length; length=3; valid input`,
+		"\x03" + "\xf4\x02\x00\xff\xff\xff",
+		"\xff\xff\xff",
+		nil,
+	}, {
+		`decodedLen=1; tagLiteral, 3-byte length; not enough length bytes`,
+		"\x01" + "\xf8\x00\x00",
+		"",
+		ErrCorrupt,
+	}, {
+		`decodedLen=3; tagLiteral, 3-byte length; length=3; valid input`,
+		"\x03" + "\xf8\x02\x00\x00\xff\xff\xff",
+		"\xff\xff\xff",
+		nil,
+	}, {
+		`decodedLen=1; tagLiteral, 4-byte length; not enough length bytes`,
+		"\x01" + "\xfc\x00\x00\x00",
+		"",
+		ErrCorrupt,
+	}, {
+		`decodedLen=1; tagLiteral, 4-byte length; length=3; not enough dst bytes`,
+		"\x01" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
+		"",
+		ErrCorrupt,
+	}, {
+		`decodedLen=4; tagLiteral, 4-byte length; length=3; not enough src bytes`,
+		"\x04" + "\xfc\x02\x00\x00\x00\xff",
+		"",
+		ErrCorrupt,
+	}, {
+		`decodedLen=3; tagLiteral, 4-byte length; length=3; valid input`,
+		"\x03" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
+		"\xff\xff\xff",
+		nil,
+	}, {
+		`decodedLen=4; tagCopy1, 1 extra length|offset byte; not enough extra bytes`,
+		"\x04" + "\x01",
+		"",
+		ErrCorrupt,
+	}, {
+		`decodedLen=4; tagCopy2, 2 extra length|offset bytes; not enough extra bytes`,
+		"\x04" + "\x02\x00",
+		"",
+		ErrCorrupt,
+	}, {
+		`decodedLen=4; tagCopy4; unsupported COPY_4 tag`,
+		"\x04" + "\x03\x00\x00\x00\x00",
+		"",
+		errUnsupportedCopy4Tag,
+	}, {
+		`decodedLen=4; tagLiteral (4 bytes "abcd"); valid input`,
+		"\x04" + "\x0cabcd",
+		"abcd",
+		nil,
+	}, {
+		`decodedLen=13; tagLiteral (4 bytes "abcd"); tagCopy1; length=9 offset=4; valid input`,
+		"\x0d" + "\x0cabcd" + "\x15\x04",
+		"abcdabcdabcda",
+		nil,
+	}, {
+		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; valid input`,
+		"\x08" + "\x0cabcd" + "\x01\x04",
+		"abcdabcd",
+		nil,
+	}, {
+		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=2; valid input`,
+		"\x08" + "\x0cabcd" + "\x01\x02",
+		"abcdcdcd",
+		nil,
+	}, {
+		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=1; valid input`,
+		"\x08" + "\x0cabcd" + "\x01\x01",
+		"abcddddd",
+		nil,
+	}, {
+		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=0; zero offset`,
+		"\x08" + "\x0cabcd" + "\x01\x00",
+		"",
+		ErrCorrupt,
+	}, {
+		`decodedLen=9; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; inconsistent dLen`,
+		"\x09" + "\x0cabcd" + "\x01\x04",
+		"",
+		ErrCorrupt,
+	}, {
+		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=5; offset too large`,
+		"\x08" + "\x0cabcd" + "\x01\x05",
+		"",
+		ErrCorrupt,
+	}, {
+		`decodedLen=7; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; length too large`,
+		"\x07" + "\x0cabcd" + "\x01\x04",
+		"",
+		ErrCorrupt,
+	}, {
+		`decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy2; length=2 offset=3; valid input`,
+		"\x06" + "\x0cabcd" + "\x06\x03\x00",
+		"abcdbc",
+		nil,
+	}}
+
+	const (
+		// notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
+		// not present in either the input or the output. It is written to dBuf
+		// to check that Decode does not write bytes past the end of
+		// dBuf[:dLen].
+		//
+		// The magic number 37 was chosen because it is prime. A more 'natural'
+		// number like 32 might lead to a false negative if, for example, a
+		// byte was incorrectly copied 4*8 bytes later.
+		notPresentBase = 0xa0
+		notPresentLen  = 37
+	)
+
+	var dBuf [100]byte
+loop:
+	for i, tc := range testCases {
+		input := []byte(tc.input)
+		for _, x := range input {
+			if notPresentBase <= x && x < notPresentBase+notPresentLen {
+				t.Errorf("#%d (%s): input shouldn't contain %#02x\ninput: % x", i, tc.desc, x, input)
+				continue loop
+			}
+		}
+
+		dLen, n := binary.Uvarint(input)
+		if n <= 0 {
+			t.Errorf("#%d (%s): invalid varint-encoded dLen", i, tc.desc)
+			continue
+		}
+		if dLen > uint64(len(dBuf)) {
+			t.Errorf("#%d (%s): dLen %d is too large", i, tc.desc, dLen)
+			continue
+		}
+
+		for j := range dBuf {
+			dBuf[j] = byte(notPresentBase + j%notPresentLen)
+		}
+		g, gotErr := Decode(dBuf[:], input)
+		if got := string(g); got != tc.want || gotErr != tc.wantErr {
+			t.Errorf("#%d (%s):\ngot  %q, %v\nwant %q, %v",
+				i, tc.desc, got, gotErr, tc.want, tc.wantErr)
+			continue
+		}
+		for j, x := range dBuf {
+			if uint64(j) < dLen {
+				continue
+			}
+			if w := byte(notPresentBase + j%notPresentLen); x != w {
+				t.Errorf("#%d (%s): Decode overrun: dBuf[%d] was modified: got %#02x, want %#02x\ndBuf: % x",
+					i, tc.desc, j, x, w, dBuf)
+				continue loop
+			}
+		}
+	}
+}
+
+// TestDecodeLengthOffset tests decoding an encoding of the form literal +
+// copy-length-offset + literal. For example: "abcdefghijkl" + "efghij" + "AB".
+func TestDecodeLengthOffset(t *testing.T) {
+	const (
+		prefix = "abcdefghijklmnopqr"
+		suffix = "ABCDEFGHIJKLMNOPQR"
+
+		// notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
+		// not present in either the input or the output. It is written to
+		// gotBuf to check that Decode does not write bytes past the end of
+		// gotBuf[:totalLen].
+		//
+		// The magic number 37 was chosen because it is prime. A more 'natural'
+		// number like 32 might lead to a false negative if, for example, a
+		// byte was incorrectly copied 4*8 bytes later.
+		notPresentBase = 0xa0
+		notPresentLen  = 37
+	)
+	var gotBuf, wantBuf, inputBuf [128]byte
+	for length := 1; length <= 18; length++ {
+		for offset := 1; offset <= 18; offset++ {
+		loop:
+			for suffixLen := 0; suffixLen <= 18; suffixLen++ {
+				totalLen := len(prefix) + length + suffixLen
+
+				inputLen := binary.PutUvarint(inputBuf[:], uint64(totalLen))
+				inputBuf[inputLen] = tagLiteral + 4*byte(len(prefix)-1)
+				inputLen++
+				inputLen += copy(inputBuf[inputLen:], prefix)
+				inputBuf[inputLen+0] = tagCopy2 + 4*byte(length-1)
+				inputBuf[inputLen+1] = byte(offset)
+				inputBuf[inputLen+2] = 0x00
+				inputLen += 3
+				if suffixLen > 0 {
+					inputBuf[inputLen] = tagLiteral + 4*byte(suffixLen-1)
+					inputLen++
+					inputLen += copy(inputBuf[inputLen:], suffix[:suffixLen])
+				}
+				input := inputBuf[:inputLen]
+
+				for i := range gotBuf {
+					gotBuf[i] = byte(notPresentBase + i%notPresentLen)
+				}
+				got, err := Decode(gotBuf[:], input)
+				if err != nil {
+					t.Errorf("length=%d, offset=%d; suffixLen=%d: %v", length, offset, suffixLen, err)
+					continue
+				}
+
+				wantLen := 0
+				wantLen += copy(wantBuf[wantLen:], prefix)
+				for i := 0; i < length; i++ {
+					wantBuf[wantLen] = wantBuf[wantLen-offset]
+					wantLen++
+				}
+				wantLen += copy(wantBuf[wantLen:], suffix[:suffixLen])
+				want := wantBuf[:wantLen]
+
+				for _, x := range input {
+					if notPresentBase <= x && x < notPresentBase+notPresentLen {
+						t.Errorf("length=%d, offset=%d; suffixLen=%d: input shouldn't contain %#02x\ninput: % x",
+							length, offset, suffixLen, x, input)
+						continue loop
+					}
+				}
+				for i, x := range gotBuf {
+					if i < totalLen {
+						continue
+					}
+					if w := byte(notPresentBase + i%notPresentLen); x != w {
+						t.Errorf("length=%d, offset=%d; suffixLen=%d; totalLen=%d: "+
+							"Decode overrun: gotBuf[%d] was modified: got %#02x, want %#02x\ngotBuf: % x",
+							length, offset, suffixLen, totalLen, i, x, w, gotBuf)
+						continue loop
+					}
+				}
+				for _, x := range want {
+					if notPresentBase <= x && x < notPresentBase+notPresentLen {
+						t.Errorf("length=%d, offset=%d; suffixLen=%d: want shouldn't contain %#02x\nwant: % x",
+							length, offset, suffixLen, x, want)
+						continue loop
+					}
+				}
+
+				if !bytes.Equal(got, want) {
+					t.Errorf("length=%d, offset=%d; suffixLen=%d:\ninput % x\ngot   % x\nwant  % x",
+						length, offset, suffixLen, input, got, want)
+					continue
+				}
+			}
+		}
+	}
+}
+
+func TestDecodeGoldenInput(t *testing.T) {
+	src, err := ioutil.ReadFile("testdata/pi.txt.rawsnappy")
+	if err != nil {
+		t.Fatalf("ReadFile: %v", err)
+	}
+	got, err := Decode(nil, src)
+	if err != nil {
+		t.Fatalf("Decode: %v", err)
+	}
+	want, err := ioutil.ReadFile("testdata/pi.txt")
+	if err != nil {
+		t.Fatalf("ReadFile: %v", err)
+	}
+	if err := cmp(got, want); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// TestSlowForwardCopyOverrun tests the "expand the pattern" algorithm
+// described in decode_amd64.s and its claim of a 10 byte overrun worst case.
+func TestSlowForwardCopyOverrun(t *testing.T) {
+	const base = 100
+
+	for length := 1; length < 18; length++ {
+		for offset := 1; offset < 18; offset++ {
+			highWaterMark := base
+			d := base
+			l := length
+			o := offset
+
+			// makeOffsetAtLeast8
+			for o < 8 {
+				if end := d + 8; highWaterMark < end {
+					highWaterMark = end
+				}
+				l -= o
+				d += o
+				o += o
+			}
+
+			// fixUpSlowForwardCopy
+			a := d
+			d += l
+
+			// finishSlowForwardCopy
+			for l > 0 {
+				if end := a + 8; highWaterMark < end {
+					highWaterMark = end
+				}
+				a += 8
+				l -= 8
+			}
+
+			dWant := base + length
+			overrun := highWaterMark - dWant
+			if d != dWant || overrun < 0 || 10 < overrun {
+				t.Errorf("length=%d, offset=%d: d and overrun: got (%d, %d), want (%d, something in [0, 10])",
+					length, offset, d, overrun, dWant)
+			}
+		}
+	}
+}
+
+// TestEncodeNoiseThenRepeats encodes input for which the first half is very
+// incompressible and the second half is very compressible. The encoded form's
+// length should be closer to 50% of the original length than 100%.
+func TestEncodeNoiseThenRepeats(t *testing.T) {
+	for _, origLen := range []int{32 * 1024, 256 * 1024, 2048 * 1024} {
+		src := make([]byte, origLen)
+		rng := rand.New(rand.NewSource(1))
+		firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
+		for i := range firstHalf {
+			firstHalf[i] = uint8(rng.Intn(256))
+		}
+		for i := range secondHalf {
+			secondHalf[i] = uint8(i >> 8)
+		}
+		dst := Encode(nil, src)
+		if got, want := len(dst), origLen*3/4; got >= want {
+			t.Errorf("origLen=%d: got %d encoded bytes, want less than %d", origLen, got, want)
+		}
+	}
+}
+
+func TestFramingFormat(t *testing.T) {
+	// src is comprised of alternating 1e5-sized sequences of random
+	// (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen
+	// because it is larger than maxBlockSize (64k).
+	src := make([]byte, 1e6)
+	rng := rand.New(rand.NewSource(1))
+	for i := 0; i < 10; i++ {
+		if i%2 == 0 {
+			for j := 0; j < 1e5; j++ {
+				src[1e5*i+j] = uint8(rng.Intn(256))
+			}
+		} else {
+			for j := 0; j < 1e5; j++ {
+				src[1e5*i+j] = uint8(i)
+			}
+		}
+	}
+
+	buf := new(bytes.Buffer)
+	if _, err := NewWriter(buf).Write(src); err != nil {
+		t.Fatalf("Write: encoding: %v", err)
+	}
+	dst, err := ioutil.ReadAll(NewReader(buf))
+	if err != nil {
+		t.Fatalf("ReadAll: decoding: %v", err)
+	}
+	if err := cmp(dst, src); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestWriterGoldenOutput(t *testing.T) {
+	buf := new(bytes.Buffer)
+	w := NewBufferedWriter(buf)
+	defer w.Close()
+	w.Write([]byte("abcd")) // Not compressible.
+	w.Flush()
+	w.Write(bytes.Repeat([]byte{'A'}, 100)) // Compressible.
+	w.Flush()
+	got := buf.String()
+	want := strings.Join([]string{
+		magicChunk,
+		"\x01\x08\x00\x00", // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
+		"\x68\x10\xe6\xb6", // Checksum.
+		"\x61\x62\x63\x64", // Uncompressed payload: "abcd".
+		"\x00\x0d\x00\x00", // Compressed chunk, 13 bytes long (including 4 byte checksum).
+		"\x37\xcb\xbc\x9d", // Checksum.
+		"\x64",             // Compressed payload: Uncompressed length (varint encoded): 100.
+		"\x00\x41",         // Compressed payload: tagLiteral, length=1, "A".
+		"\xfe\x01\x00",     // Compressed payload: tagCopy2,   length=64, offset=1.
+		"\x8a\x01\x00",     // Compressed payload: tagCopy2,   length=35, offset=1.
+	}, "")
+	if got != want {
+		t.Fatalf("\ngot:  % x\nwant: % x", got, want)
+	}
+}
+
+func TestNewBufferedWriter(t *testing.T) {
+	// Test all 32 possible sub-sequences of these 5 input slices.
+	//
+	// Their lengths sum to 400,000, which is over 6 times the Writer ibuf
+	// capacity: 6 * maxBlockSize is 393,216.
+	inputs := [][]byte{
+		bytes.Repeat([]byte{'a'}, 40000),
+		bytes.Repeat([]byte{'b'}, 150000),
+		bytes.Repeat([]byte{'c'}, 60000),
+		bytes.Repeat([]byte{'d'}, 120000),
+		bytes.Repeat([]byte{'e'}, 30000),
+	}
+loop:
+	for i := 0; i < 1<<uint(len(inputs)); i++ {
+		var want []byte
+		buf := new(bytes.Buffer)
+		w := NewBufferedWriter(buf)
+		for j, input := range inputs {
+			if i&(1<<uint(j)) == 0 {
+				continue
+			}
+			if _, err := w.Write(input); err != nil {
+				t.Errorf("i=%#02x: j=%d: Write: %v", i, j, err)
+				continue loop
+			}
+			want = append(want, input...)
+		}
+		if err := w.Close(); err != nil {
+			t.Errorf("i=%#02x: Close: %v", i, err)
+			continue
+		}
+		got, err := ioutil.ReadAll(NewReader(buf))
+		if err != nil {
+			t.Errorf("i=%#02x: ReadAll: %v", i, err)
+			continue
+		}
+		if err := cmp(got, want); err != nil {
+			t.Errorf("i=%#02x: %v", i, err)
+			continue
+		}
+	}
+}
+
+func TestFlush(t *testing.T) {
+	buf := new(bytes.Buffer)
+	w := NewBufferedWriter(buf)
+	defer w.Close()
+	if _, err := w.Write(bytes.Repeat([]byte{'x'}, 20)); err != nil {
+		t.Fatalf("Write: %v", err)
+	}
+	if n := buf.Len(); n != 0 {
+		t.Fatalf("before Flush: %d bytes were written to the underlying io.Writer, want 0", n)
+	}
+	if err := w.Flush(); err != nil {
+		t.Fatalf("Flush: %v", err)
+	}
+	if n := buf.Len(); n == 0 {
+		t.Fatalf("after Flush: %d bytes were written to the underlying io.Writer, want non-0", n)
+	}
+}
+
+func TestReaderReset(t *testing.T) {
+	gold := bytes.Repeat([]byte("All that is gold does not glitter,\n"), 10000)
+	buf := new(bytes.Buffer)
+	if _, err := NewWriter(buf).Write(gold); err != nil {
+		t.Fatalf("Write: %v", err)
+	}
+	encoded, invalid, partial := buf.String(), "invalid", "partial"
+	r := NewReader(nil)
+	for i, s := range []string{encoded, invalid, partial, encoded, partial, invalid, encoded, encoded} {
+		if s == partial {
+			r.Reset(strings.NewReader(encoded))
+			if _, err := r.Read(make([]byte, 101)); err != nil {
+				t.Errorf("#%d: %v", i, err)
+				continue
+			}
+			continue
+		}
+		r.Reset(strings.NewReader(s))
+		got, err := ioutil.ReadAll(r)
+		switch s {
+		case encoded:
+			if err != nil {
+				t.Errorf("#%d: %v", i, err)
+				continue
+			}
+			if err := cmp(got, gold); err != nil {
+				t.Errorf("#%d: %v", i, err)
+				continue
+			}
+		case invalid:
+			if err == nil {
+				t.Errorf("#%d: got nil error, want non-nil", i)
+				continue
+			}
+		}
+	}
+}
+
+func TestWriterReset(t *testing.T) {
+	gold := bytes.Repeat([]byte("Not all those who wander are lost;\n"), 10000)
+	const n = 20
+	for _, buffered := range []bool{false, true} {
+		var w *Writer
+		if buffered {
+			w = NewBufferedWriter(nil)
+			defer w.Close()
+		} else {
+			w = NewWriter(nil)
+		}
+
+		var gots, wants [][]byte
+		failed := false
+		for i := 0; i <= n; i++ {
+			buf := new(bytes.Buffer)
+			w.Reset(buf)
+			want := gold[:len(gold)*i/n]
+			if _, err := w.Write(want); err != nil {
+				t.Errorf("#%d: Write: %v", i, err)
+				failed = true
+				continue
+			}
+			if buffered {
+				if err := w.Flush(); err != nil {
+					t.Errorf("#%d: Flush: %v", i, err)
+					failed = true
+					continue
+				}
+			}
+			got, err := ioutil.ReadAll(NewReader(buf))
+			if err != nil {
+				t.Errorf("#%d: ReadAll: %v", i, err)
+				failed = true
+				continue
+			}
+			gots = append(gots, got)
+			wants = append(wants, want)
+		}
+		if failed {
+			continue
+		}
+		for i := range gots {
+			if err := cmp(gots[i], wants[i]); err != nil {
+				t.Errorf("#%d: %v", i, err)
+			}
+		}
+	}
+}
+
+func TestWriterResetWithoutFlush(t *testing.T) {
+	buf0 := new(bytes.Buffer)
+	buf1 := new(bytes.Buffer)
+	w := NewBufferedWriter(buf0)
+	if _, err := w.Write([]byte("xxx")); err != nil {
+		t.Fatalf("Write #0: %v", err)
+	}
+	// Note that we don't Flush the Writer before calling Reset.
+	w.Reset(buf1)
+	if _, err := w.Write([]byte("yyy")); err != nil {
+		t.Fatalf("Write #1: %v", err)
+	}
+	if err := w.Flush(); err != nil {
+		t.Fatalf("Flush: %v", err)
+	}
+	got, err := ioutil.ReadAll(NewReader(buf1))
+	if err != nil {
+		t.Fatalf("ReadAll: %v", err)
+	}
+	if err := cmp(got, []byte("yyy")); err != nil {
+		t.Fatal(err)
+	}
+}
+
+type writeCounter int
+
+func (c *writeCounter) Write(p []byte) (int, error) {
+	*c++
+	return len(p), nil
+}
+
+// TestNumUnderlyingWrites tests that each Writer flush only makes one or two
+// Write calls on its underlying io.Writer, depending on whether or not the
+// flushed buffer was compressible.
+func TestNumUnderlyingWrites(t *testing.T) {
+	testCases := []struct {
+		input []byte
+		want  int
+	}{
+		{bytes.Repeat([]byte{'x'}, 100), 1},
+		{bytes.Repeat([]byte{'y'}, 100), 1},
+		{[]byte("ABCDEFGHIJKLMNOPQRST"), 2},
+	}
+
+	var c writeCounter
+	w := NewBufferedWriter(&c)
+	defer w.Close()
+	for i, tc := range testCases {
+		c = 0
+		if _, err := w.Write(tc.input); err != nil {
+			t.Errorf("#%d: Write: %v", i, err)
+			continue
+		}
+		if err := w.Flush(); err != nil {
+			t.Errorf("#%d: Flush: %v", i, err)
+			continue
+		}
+		if int(c) != tc.want {
+			t.Errorf("#%d: got %d underlying writes, want %d", i, c, tc.want)
+			continue
+		}
+	}
+}
+
+func benchDecode(b *testing.B, src []byte) {
+	encoded := Encode(nil, src)
+	// Bandwidth is in amount of uncompressed data.
+	b.SetBytes(int64(len(src)))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Decode(src, encoded)
+	}
+}
+
+func benchEncode(b *testing.B, src []byte) {
+	// Bandwidth is in amount of uncompressed data.
+	b.SetBytes(int64(len(src)))
+	dst := make([]byte, MaxEncodedLen(len(src)))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Encode(dst, src)
+	}
+}
+
+func readFile(b testing.TB, filename string) []byte {
+	src, err := ioutil.ReadFile(filename)
+	if err != nil {
+		b.Skipf("skipping benchmark: %v", err)
+	}
+	if len(src) == 0 {
+		b.Fatalf("%s has zero length", filename)
+	}
+	return src
+}
+
+// expand returns a slice of length n containing repeated copies of src.
+func expand(src []byte, n int) []byte {
+	dst := make([]byte, n)
+	for x := dst; len(x) > 0; {
+		i := copy(x, src)
+		x = x[i:]
+	}
+	return dst
+}
+
+func benchWords(b *testing.B, n int, decode bool) {
+	// Note: the file is OS-language dependent so the resulting values are not
+	// directly comparable for non-US-English OS installations.
+	data := expand(readFile(b, "/usr/share/dict/words"), n)
+	if decode {
+		benchDecode(b, data)
+	} else {
+		benchEncode(b, data)
+	}
+}
+
+func BenchmarkWordsDecode1e1(b *testing.B) { benchWords(b, 1e1, true) }
+func BenchmarkWordsDecode1e2(b *testing.B) { benchWords(b, 1e2, true) }
+func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) }
+func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) }
+func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) }
+func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) }
+func BenchmarkWordsEncode1e1(b *testing.B) { benchWords(b, 1e1, false) }
+func BenchmarkWordsEncode1e2(b *testing.B) { benchWords(b, 1e2, false) }
+func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) }
+func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) }
+func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) }
+func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) }
+
+func BenchmarkRandomEncode(b *testing.B) {
+	rng := rand.New(rand.NewSource(1))
+	data := make([]byte, 1<<20)
+	for i := range data {
+		data[i] = uint8(rng.Intn(256))
+	}
+	benchEncode(b, data)
+}
+
+// testFiles' values are copied directly from
+// https://raw.githubusercontent.com/google/snappy/master/snappy_unittest.cc
+// The label field is unused in snappy-go.
+var testFiles = []struct {
+	label     string
+	filename  string
+	sizeLimit int
+}{
+	{"html", "html", 0},
+	{"urls", "urls.10K", 0},
+	{"jpg", "fireworks.jpeg", 0},
+	{"jpg_200", "fireworks.jpeg", 200},
+	{"pdf", "paper-100k.pdf", 0},
+	{"html4", "html_x_4", 0},
+	{"txt1", "alice29.txt", 0},
+	{"txt2", "asyoulik.txt", 0},
+	{"txt3", "lcet10.txt", 0},
+	{"txt4", "plrabn12.txt", 0},
+	{"pb", "geo.protodata", 0},
+	{"gaviota", "kppkn.gtb", 0},
+}
+
+const (
+	// The benchmark data files are at this canonical URL.
+	benchURL = "https://raw.githubusercontent.com/google/snappy/master/testdata/"
+
+	// They are copied to this local directory.
+	benchDir = "testdata/bench"
+)
+
+func downloadBenchmarkFiles(b *testing.B, basename string) (errRet error) {
+	filename := filepath.Join(benchDir, basename)
+	if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 {
+		return nil
+	}
+
+	if !*download {
+		b.Skipf("test data not found; skipping benchmark without the -download flag")
+	}
+	// Download the official snappy C++ implementation reference test data
+	// files for benchmarking.
+	if err := os.MkdirAll(benchDir, 0777); err != nil && !os.IsExist(err) {
+		return fmt.Errorf("failed to create %s: %s", benchDir, err)
+	}
+
+	f, err := os.Create(filename)
+	if err != nil {
+		return fmt.Errorf("failed to create %s: %s", filename, err)
+	}
+	defer f.Close()
+	defer func() {
+		if errRet != nil {
+			os.Remove(filename)
+		}
+	}()
+	url := benchURL + basename
+	resp, err := http.Get(url)
+	if err != nil {
+		return fmt.Errorf("failed to download %s: %s", url, err)
+	}
+	defer resp.Body.Close()
+	if s := resp.StatusCode; s != http.StatusOK {
+		return fmt.Errorf("downloading %s: HTTP status code %d (%s)", url, s, http.StatusText(s))
+	}
+	_, err = io.Copy(f, resp.Body)
+	if err != nil {
+		return fmt.Errorf("failed to download %s to %s: %s", url, filename, err)
+	}
+	return nil
+}
+
+func benchFile(b *testing.B, n int, decode bool) {
+	if err := downloadBenchmarkFiles(b, testFiles[n].filename); err != nil {
+		b.Fatalf("failed to download testdata: %s", err)
+	}
+	data := readFile(b, filepath.Join(benchDir, testFiles[n].filename))
+	if n := testFiles[n].sizeLimit; 0 < n && n < len(data) {
+		data = data[:n]
+	}
+	if decode {
+		benchDecode(b, data)
+	} else {
+		benchEncode(b, data)
+	}
+}
+
+// Naming convention is kept similar to what snappy's C++ implementation uses.
+func Benchmark_UFlat0(b *testing.B)  { benchFile(b, 0, true) }
+func Benchmark_UFlat1(b *testing.B)  { benchFile(b, 1, true) }
+func Benchmark_UFlat2(b *testing.B)  { benchFile(b, 2, true) }
+func Benchmark_UFlat3(b *testing.B)  { benchFile(b, 3, true) }
+func Benchmark_UFlat4(b *testing.B)  { benchFile(b, 4, true) }
+func Benchmark_UFlat5(b *testing.B)  { benchFile(b, 5, true) }
+func Benchmark_UFlat6(b *testing.B)  { benchFile(b, 6, true) }
+func Benchmark_UFlat7(b *testing.B)  { benchFile(b, 7, true) }
+func Benchmark_UFlat8(b *testing.B)  { benchFile(b, 8, true) }
+func Benchmark_UFlat9(b *testing.B)  { benchFile(b, 9, true) }
+func Benchmark_UFlat10(b *testing.B) { benchFile(b, 10, true) }
+func Benchmark_UFlat11(b *testing.B) { benchFile(b, 11, true) }
+func Benchmark_ZFlat0(b *testing.B)  { benchFile(b, 0, false) }
+func Benchmark_ZFlat1(b *testing.B)  { benchFile(b, 1, false) }
+func Benchmark_ZFlat2(b *testing.B)  { benchFile(b, 2, false) }
+func Benchmark_ZFlat3(b *testing.B)  { benchFile(b, 3, false) }
+func Benchmark_ZFlat4(b *testing.B)  { benchFile(b, 4, false) }
+func Benchmark_ZFlat5(b *testing.B)  { benchFile(b, 5, false) }
+func Benchmark_ZFlat6(b *testing.B)  { benchFile(b, 6, false) }
+func Benchmark_ZFlat7(b *testing.B)  { benchFile(b, 7, false) }
+func Benchmark_ZFlat8(b *testing.B)  { benchFile(b, 8, false) }
+func Benchmark_ZFlat9(b *testing.B)  { benchFile(b, 9, false) }
+func Benchmark_ZFlat10(b *testing.B) { benchFile(b, 10, false) }
+func Benchmark_ZFlat11(b *testing.B) { benchFile(b, 11, false) }