vendor: Update everything

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4620
This commit is contained in:
Jakob Borg
2017-12-29 11:38:00 +00:00
parent 1296a22069
commit c24bf7ea55
1070 changed files with 294926 additions and 488191 deletions

View File

@@ -14,7 +14,9 @@ const (
Size = 4
)
type digest struct {
// Adler32 is a digest which satisfies the rollinghash.Hash32 interface.
// It implements the adler32 algorithm https://en.wikipedia.org/wiki/Adler-32
type Adler32 struct {
a, b uint32
// window is treated like a circular buffer, where the oldest element
@@ -26,44 +28,43 @@ type digest struct {
vanilla hash.Hash32
}
// Reset resets the Hash to its initial state.
func (d *digest) Reset() {
d.window = d.window[:0] // Reset the size but don't reallocate
// Reset resets the digest to its initial state.
func (d *Adler32) Reset() {
d.window = d.window[:1] // Reset the size but don't reallocate
d.window[0] = 0
d.a = 1
d.b = 0
d.oldest = 0
}
// New returns a new rollinghash.Hash32 computing the rolling Adler-32
// checksum. The window is copied from the last Write(). This window is
// only used to determine which is the oldest element (leaving the
// window). The calls to Roll() do not recompute the whole checksum.
func New() rollinghash.Hash32 {
return &digest{
// New returns a new Adler32 digest
func New() *Adler32 {
return &Adler32{
a: 1,
b: 0,
window: make([]byte, 0),
window: make([]byte, 1, rollinghash.DefaultWindowCap),
oldest: 0,
vanilla: vanilla.New(),
}
}
// Size returns the number of bytes Sum will return.
func (d *digest) Size() int { return Size }
// Size is 4 bytes
func (d *Adler32) Size() int { return Size }
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all
// writes are a multiple of the block size.
func (d *digest) BlockSize() int { return 1 }
// BlockSize is 1 byte
func (d *Adler32) BlockSize() int { return 1 }
// Write (via the embedded io.Writer interface) adds more data to the
// running hash. It never returns an error.
func (d *digest) Write(p []byte) (int, error) {
// Write (re)initializes the rolling window with the input byte slice and
// adds its data to the digest.
func (d *Adler32) Write(p []byte) (int, error) {
// Copy the window, avoiding allocations where possible
if len(d.window) != len(p) {
if cap(d.window) >= len(p) {
d.window = d.window[:len(p)]
l := len(p)
if l == 0 {
l = 1
}
if len(d.window) != l {
if cap(d.window) >= l {
d.window = d.window[:l]
} else {
d.window = make([]byte, len(p))
}
@@ -79,23 +80,20 @@ func (d *digest) Write(p []byte) (int, error) {
return len(d.window), nil
}
func (d *digest) Sum32() uint32 {
// Sum32 returns the hash as a uint32
func (d *Adler32) Sum32() uint32 {
return d.b<<16 | d.a
}
func (d *digest) Sum(b []byte) []byte {
// Sum returns the hash as a byte slice
func (d *Adler32) Sum(b []byte) []byte {
v := d.Sum32()
return append(b, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
}
// Roll updates the checksum of the window from the leaving byte and the
// entering byte. See
// http://stackoverflow.com/questions/40985080/why-does-my-rolling-adler32-checksum-not-work-in-go-modulo-arithmetic
func (d *digest) Roll(b byte) {
if len(d.window) == 0 {
d.window = make([]byte, 1)
d.window[0] = b
}
// Roll updates the checksum of the window from the entering byte. You
// MUST initialize a window with Write() before calling this method.
func (d *Adler32) Roll(b byte) {
// extract the entering/leaving bytes and update the circular buffer.
enter := uint32(b)
leave := uint32(d.window[d.oldest])
@@ -105,7 +103,7 @@ func (d *digest) Roll(b byte) {
d.oldest = 0
}
// compute
// See http://stackoverflow.com/questions/40985080/why-does-my-rolling-adler32-checksum-not-work-in-go-modulo-arithmetic
d.a = (d.a + Mod + enter - leave) % Mod
d.b = (d.b + (d.n*leave/Mod+1)*Mod + d.a - (d.n * leave) - 1) % Mod
}

View File

@@ -0,0 +1,103 @@
// Package rollinghash/bozo32 is a wrong implementation of the rabinkarp
// checksum. In practice, it works very well and exhibits all the
// properties wanted from a rolling checksum, so after realising that this
// code did not implement the rabinkarp checksum as described in the
// original paper, it was renamed from rabinkarp32 to bozo32 and kept
// in this package.
package bozo32
import rollinghash "github.com/chmduquesne/rollinghash"
// The size of the checksum.
const Size = 4
// Bozo32 is a digest which satisfies the rollinghash.Hash32 interface.
type Bozo32 struct {
a uint32
h uint32
aPowerN uint32
// window is treated like a circular buffer, where the oldest element
// is indicated by d.oldest
window []byte
oldest int
}
// Reset resets the Hash to its initial state.
func (d *Bozo32) Reset() {
d.h = 0
d.aPowerN = 1
d.window = nil
d.oldest = 0
}
func NewFromInt(a uint32) *Bozo32 {
return &Bozo32{
a: a,
h: 0,
aPowerN: 1,
window: make([]byte, 1, rollinghash.DefaultWindowCap),
oldest: 0,
}
}
func New() *Bozo32 {
return NewFromInt(65521) // largest prime fitting in 16 bits
}
// Size is 4 bytes
func (d *Bozo32) Size() int { return Size }
// BlockSize is 1 byte
func (d *Bozo32) BlockSize() int { return 1 }
// Write (re)initializes the rolling window with the input byte slice and
// adds its data to the digest. It never returns an error.
func (d *Bozo32) Write(data []byte) (int, error) {
// Copy the window
l := len(data)
if l == 0 {
l = 1
}
if len(d.window) >= l {
d.window = d.window[:l]
} else {
d.window = make([]byte, l)
}
copy(d.window, data)
for _, c := range d.window {
d.h *= d.a
d.h += uint32(c)
d.aPowerN *= d.a
}
return len(d.window), nil
}
// Sum32 returns the hash as a uint32
func (d *Bozo32) Sum32() uint32 {
return d.h
}
// Sum returns the hash as byte slice
func (d *Bozo32) Sum(b []byte) []byte {
v := d.Sum32()
return append(b, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
}
// Roll updates the checksum of the window from the entering byte. You
// MUST initialize a window with Write() before calling this method.
func (d *Bozo32) Roll(c byte) {
// extract the entering/leaving bytes and update the circular buffer.
enter := uint32(c)
leave := uint32(d.window[d.oldest])
d.window[d.oldest] = c
l := len(d.window)
d.oldest += 1
if d.oldest >= l {
d.oldest = 0
}
d.h = d.h*d.a + enter - leave*d.aPowerN
}

View File

@@ -3,69 +3,25 @@
package buzhash32
import rollinghash "github.com/chmduquesne/rollinghash"
import (
"math/rand"
// 256 random integers generated with a dummy python script
var DefaultHash = [256]uint32{
0xa5659a00, 0x2dbfda02, 0xac29a407, 0xce942c08, 0x48513609,
0x325f158, 0xb54e5e13, 0xa9063618, 0xa5793419, 0x554b081a,
0xe5643dac, 0xfb50e41c, 0x2b31661d, 0x335da61f, 0xe702f7b0,
0xe31c1424, 0x6dfed825, 0xd30cf628, 0xba626a2a, 0x74b9c22b,
0xa5d1942d, 0xf364ae2f, 0x70d2e84c, 0x190ad208, 0x92e3b740,
0xd7e9f435, 0x15763836, 0x930ecab4, 0x641ea65e, 0xc0b2eb0a,
0x2675e03e, 0x1a24c63f, 0xeddbcbb7, 0x3ea42bb2, 0x815f5849,
0xa55c284b, 0xbb30964c, 0x6f7acc4e, 0x74538a50, 0x66df9652,
0x2bae8454, 0xfe9d8055, 0x8c866fd4, 0x82f0a63d, 0x8f26365e,
0xe66c3460, 0x6423266, 0x60696abc, 0xf75de6d, 0xd20c86e,
0x69f8c6f, 0x8ac0f470, 0x273aab68, 0x4e044c74, 0xb2ec7875,
0xf642d676, 0xd719e877, 0xee557e78, 0xdd20be7a, 0xd252707e,
0xfa507a7f, 0xee537683, 0x6aac7684, 0x340e3485, 0x1c291288,
0xab89c8c, 0xbe6e6c8d, 0xf99cf2f7, 0x69c65890, 0xd3757491,
0xfeb63895, 0x67067a96, 0xa0089b19, 0x6c449898, 0x4eca749a,
0x1101229b, 0x6b86d29d, 0x9c21be9e, 0xc5904933, 0xe1e820a3,
0x6bd524a6, 0xd4695ea7, 0xc3d007e0, 0xbed8e4a9, 0x1c49d8af,
0xedbae4b1, 0x1d2af6b4, 0x79526b9, 0xbc1d5abb, 0x6a2eb8bc,
0x611b3695, 0x745c3cc4, 0x81005276, 0x5f442c8, 0x42dc30ca,
0x55e460cb, 0x47648cc, 0x20da7122, 0xc4eedccd, 0xc21c14d0,
0x27b5dfa9, 0x7e961fce, 0x8d0296d6, 0xce3684d7, 0x28e96da,
0xedf7dcdc, 0x6817a0df, 0x51caae0, 0x8f226e1, 0xa1a00ce3,
0xf811c6e5, 0x13e96ee6, 0xd4d4e4d1, 0xab160ee9, 0xb2cf06ea,
0xf4ab6eb, 0x998f56f1, 0x16974cf2, 0xd42438f5, 0xe00ba6f7,
0xbf01b8f8, 0x7a8a00f9, 0xdded6a7f, 0xb0ce58fd, 0xe5d81901,
0xcc823b03, 0xc962e704, 0x2b4aff05, 0x5bcb7181, 0xe7207108,
0xf3c93109, 0x1ffb650a, 0x37a31ad7, 0xfe27322d, 0x15b16d11,
0x51a70512, 0xb579d92e, 0x53658284, 0x91fedb1b, 0x2ef0b122,
0x93966523, 0xfa66af26, 0xa7fac32b, 0x7a81692c, 0x4f8d7f2e,
0xf9875730, 0xa5ab2331, 0x79db8333, 0x8be32937, 0xf900af39,
0xd09d4f3a, 0x9b22053d, 0xd2053e1c, 0xd0deaa35, 0x4a975740,
0xcb3706e0, 0x40aea6cd, 0x769fdd44, 0x7e3e4947, 0xc20ac949,
0x3788c34b, 0x9b23f74c, 0xb33e441d, 0x705d8a8d, 0x6a5e3a84,
0xb4f955e3, 0xf681a155, 0x7dec1b56, 0x7bf5df58, 0xd3fa255a,
0x3797c15c, 0xbf511562, 0xb048d65, 0xcd04f367, 0xae3a8368,
0x769c856d, 0xc7bb9d6f, 0xe43e1f71, 0xa24de03e, 0x7f8cb376,
0x618b778, 0x19e02f33, 0x2f810eea, 0x2b1ce595, 0x4f2f7180,
0x72903140, 0x26a44584, 0x6af97e96, 0xb08acb86, 0x4d25cd41,
0x1d74fd89, 0xe0f5b277, 0xbad158c, 0x5fed3b8d, 0x68b26794,
0xcbe58795, 0xc1180797, 0xa1352399, 0x71dacd9c, 0x42b5549a,
0xbf5371a0, 0x7ed41fa1, 0x6fe29a3, 0xa779fba5, 0x48a095a7,
0xc2cad5a8, 0x7d7f15a9, 0xccd195aa, 0x2a9047ac, 0x3ec66ef2,
0x252743ae, 0xdd8827af, 0x85fc5055, 0xb9d5c7b2, 0x5a224fb4,
0xec26e7b6, 0xe4d8f7b7, 0x6e5aa58d, 0xeff753b9, 0x6c391fbb,
0x989f65bc, 0x2fe4a7c1, 0x9d1d9bc3, 0xa09aadc6, 0x2df33fc8,
0x5ec27933, 0x5e7f41cb, 0xb920f7cd, 0xc1a603ce, 0xf0888fcf,
0xdc4ad1d1, 0x34b3dbd4, 0x170981d5, 0x22e5b5d6, 0x13049bd7,
0xf12a8b95, 0xff7e87d9, 0xabb74b84, 0x215cff4f, 0xaf24f7dc,
0xc87461d, 0x41a55e0, 0xfde9b9e1, 0x1d1956fb, 0x13d60de4,
0x435f93e5, 0xe0ab5de6, 0x5c1d3fe7, 0x411a1fe8, 0x55e102a9,
0x3d9b07eb, 0xdd6b8dee, 0x741293f3, 0xa5b10ca9, 0x5abad5fd,
0x22372f55,
rollinghash "github.com/chmduquesne/rollinghash"
)
var defaultHashes [256]uint32
func init() {
defaultHashes = GenerateHashes(1)
}
// The size of the checksum.
const Size = 4
// digest represents the partial evaluation of a checksum.
type digest struct {
// Buzhash32 is a digest which satisfies the rollinghash.Hash32 interface.
// It implements the cyclic polynomial algorithm
// https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial
type Buzhash32 struct {
sum uint32
nRotate uint
nRotateComplement uint // redundant, but pre-computed to spare an operation
@@ -78,44 +34,62 @@ type digest struct {
}
// Reset resets the Hash to its initial state.
func (d *digest) Reset() {
func (d *Buzhash32) Reset() {
d.window = d.window[:0]
d.oldest = 0
d.sum = 0
}
func New() rollinghash.Hash32 {
return NewFromUint32Array(DefaultHash)
// GenerateHashes generates a list of hashes to use with buzhash
func GenerateHashes(seed int64) (res [256]uint32) {
random := rand.New(rand.NewSource(seed))
used := make(map[uint32]bool)
for i, _ := range res {
x := uint32(random.Int63())
for used[x] {
x = uint32(random.Int63())
}
used[x] = true
res[i] = x
}
return res
}
// New returns a buzhash based on a list of hashes provided by a call to
// GenerateHashes, seeded with the default value 1.
func New() *Buzhash32 {
return NewFromUint32Array(defaultHashes)
}
// NewFromUint32Array returns a buzhash based on the provided table uint32 values.
func NewFromUint32Array(b [256]uint32) rollinghash.Hash32 {
return &digest{
func NewFromUint32Array(b [256]uint32) *Buzhash32 {
return &Buzhash32{
sum: 0,
window: make([]byte, 0),
window: make([]byte, 1, rollinghash.DefaultWindowCap),
oldest: 0,
bytehash: b,
}
}
// Size returns the number of bytes Sum will return.
func (d *digest) Size() int { return Size }
// Size is 4 bytes
func (d *Buzhash32) Size() int { return Size }
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all
// writes are a multiple of the block size.
func (d *digest) BlockSize() int { return 1 }
// BlockSize is 1 byte
func (d *Buzhash32) BlockSize() int { return 1 }
// Write (via the embedded io.Writer interface) adds more data to the
// running hash. It never returns an error.
func (d *digest) Write(data []byte) (int, error) {
// Write (re)initializes the rolling window with the input byte slice and
// adds its data to the digest.
func (d *Buzhash32) Write(data []byte) (int, error) {
// Copy the window, avoiding allocations where possible
if len(d.window) != len(data) {
if cap(d.window) >= len(data) {
d.window = d.window[:len(data)]
l := len(data)
if l == 0 {
l = 1
}
if len(d.window) != l {
if cap(d.window) >= l {
d.window = d.window[:l]
} else {
d.window = make([]byte, len(data))
d.window = make([]byte, l)
}
}
copy(d.window, data)
@@ -129,22 +103,20 @@ func (d *digest) Write(data []byte) (int, error) {
return len(d.window), nil
}
func (d *digest) Sum32() uint32 {
// Sum32 returns the hash as a uint32
func (d *Buzhash32) Sum32() uint32 {
return d.sum
}
func (d *digest) Sum(b []byte) []byte {
// Sum returns the hash as byte slice
func (d *Buzhash32) Sum(b []byte) []byte {
v := d.Sum32()
return append(b, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
}
// Roll updates the checksum of the window from the leaving byte and the
// entering byte.
func (d *digest) Roll(c byte) {
if len(d.window) == 0 {
d.window = make([]byte, 1)
d.window[0] = c
}
// Roll updates the checksum of the window from the entering byte. You
// MUST initialize a window with Write() before calling this method.
func (d *Buzhash32) Roll(c byte) {
// extract the entering/leaving bytes and update the circular buffer.
hn := d.bytehash[int(c)]
h0 := d.bytehash[int(d.window[d.oldest])]

View File

@@ -3,103 +3,25 @@
package buzhash64
import rollinghash "github.com/chmduquesne/rollinghash"
import (
"math/rand"
// 256 random integers generated with a dummy python script
var DefaultHash = [256]uint64{
0xd6923700885676e1, 0x2ef758a165917c6c, 0xcac8db9a800db08f,
0x91dfa96019476e5f, 0x61ad4b5c6ec62e4b, 0xbabfc786038a37cb,
0xb68fe9816c09bb98, 0x6dae71ffcf505baf, 0x8f1d5ac180423f59,
0x2ddcaf458c114dae, 0x2975abd372acbb39, 0x620f80a1e7fb8ca0,
0xf8d9b75b40d1fdda, 0x81bff1a297143fab, 0x81935f4d4c31ae6e,
0xf4e0765a732a3a36, 0x0cded3fd708f0f14, 0xa89cb64087b25da9,
0xa69372234eb0602d, 0x773a079265484e2d, 0x8dbc0985c9c4e1cb,
0x000a09a5bc2c80b0, 0xdcaa87a327cead66, 0xd26eaa01fb42ef69,
0x34411456e2c244d7, 0x1082e6fb20af4bea, 0x1e00897e330f3832,
0x4253bef8099f370d, 0x890ce98ec0e8a69c, 0x89eb60e611308754,
0xb39c22caeb5444f1, 0x3e841276d561b022, 0x45292a4e1aaeb117,
0x1a4b1f1d7aeb46d1, 0x7016fc7d7b3114a6, 0x4fc9ea1dfd505a34,
0x97b6013b3739d65e, 0x7fcc6abfae8eb598, 0xff8ec196383c66f2,
0x87ca90161ecaf261, 0xc27ac70e06c9caa3, 0x42c4d7617c362ede,
0xb38656002f3984f0, 0x0520f83a5be24d68, 0x097cdf0f89aa5ad6,
0xcc2c65d8ab0e1e32, 0x8c8ebfd12b2c4fa9, 0x9e99c42db2e8be1d,
0x7bcef376a9003964, 0xbd9bc65dbfebce71, 0xd47a52cea9f0bc02,
0xeadb465977d2d8ca, 0x43065df5caca1a4b, 0x82f5ae94dd2cc349,
0xc4e362ab8614dd84, 0xc8922bf4a4bebf05, 0xb1719f57f9a1ed23,
0xe93a41737e8094ac, 0x33e611a02d4abc93, 0x1dcdb2d07ea310bc,
0xf7a85d96655b03ef, 0x60aafabd410c3180, 0x18c401b08a67ffeb,
0xc1eed3417948c90f, 0x525bfe6ad095d998, 0x2a97938c7fd244c2,
0xbb75ef8569ba728c, 0x53f47ee01b7d1915, 0x51025252faf2890e,
0xf6bd601ee7ad2608, 0x06a07a64f7afbffa, 0x224f41d09b13aed5,
0x9f80d30ece1bcd5c, 0x6ce1076c6780de0c, 0xfd123415c8262763,
0x0d5a643d04d9f438, 0xb92e476b8a36d170, 0x0f533c6c9f196cce,
0x0071ebbeb03d43af, 0x00dcbdee475f482d, 0x3339362a5b7c099c,
0x2f957910672cf39e, 0xd69554bbea71bb60, 0x635dd0f5801c9d13,
0x9832470506cba5cd, 0x77625064508cebba, 0xf428e6bfb38a5d01,
0x4a086e0cf23ab715, 0xb958fe962ca69576, 0x5d0ab146601ee29f,
0x90f0042e06fcc096, 0xba69eaa94dd5cbcc, 0xa821915b9a5fa628,
0xea4f4c03801babed, 0xbc7d5f845d913103, 0xe3cc105d6e4a11ea,
0x251f29b1422b1af5, 0xd700ffdb510d7634, 0x3002ebfda5cc4592,
0xf5614fc379a46223, 0x02cb3e88a92ab123, 0x4dab9392f9075ca5,
0xc8d8c5b39eb3e593, 0x7d6545c168d526df, 0x3cd78f7794445ee4,
0x24e2a4f47772f09a, 0x43be5ca35c81d4ec, 0x77583ba052e5b605,
0x92e07779ea9ccd7f, 0xb9dc8617c0a14ea8, 0x8a2821cb56440f77,
0x15f29e095f8b279e, 0x75c12968e423728c, 0x98cfdf60152b8d2f,
0x3b5a8db5cf80bd68, 0x2356e64e821e3ac4, 0x320b7aef2daff0d4,
0xbae4290e875658bf, 0x3b569a663e0b2445, 0xc494ce552c404288,
0x37a905ddeb550d88, 0x2333bcdc81c0c5c3, 0x8d2682d13259af0c,
0x5ad34026f7e9b8f4, 0x081970325f7f949d, 0xbcf17bf08e61ef19,
0xb3e5da3782fd7f03, 0x8ed53c8ec27635e1, 0x79fca624a1e73b7c,
0xdc9bdb3be0b69b20, 0xc119a348042544cd, 0x1c2408e49ed2a747,
0xe85f0237669d180d, 0x4508bcebda7465f9, 0x5af245c13d3a8ef7,
0xbb8bb6b61f021ed0, 0x48eaa45234935f75, 0x2f78f8fb1695eb65,
0x5dd1e1c8c20a1b76, 0x2f74a22a3159ec45, 0xc64f9c864dfb98cf,
0xf928618091913d32, 0xec08db6828a11873, 0x029ba990fa5cdba6,
0x94b870390499d9ba, 0x1086685fce933b2c, 0x6065be1f390c003a,
0x0f46e9a9d5197803, 0x42833f7327727669, 0xdda6c27eb0d682b3,
0x5ec3a67f39a77d05, 0x818f5646400a80ec, 0xe45c502c1b655c1b,
0xd56ddb4fddd63c56, 0x7ebc81bd9fd90fd1, 0x4f6c111625fb5c8e,
0x6c0fc5f0487dc6ee, 0xc57a12a7159119ed, 0x526bc3b3aadd9dd6,
0xe89f8367962fe1ea, 0x72bac3c1c99d1845, 0x6f56a75582ae96b9,
0x7d23f484a9a317f1, 0xe876956fd23c9f95, 0xdd6411629a0dab0a,
0x827046f4383dad03, 0x36aa4c0e807f9a6d, 0xcfe6ae3f86224a12,
0x84802ff4baf0e073, 0x19d786fe8a6eecd6, 0x38e9f4a7a4ce611a,
0x5442a62e65063565, 0x6a6780a6d0257b82, 0x39af9a8cf5786bd7,
0xe65d071b8fb1c8ee, 0xa63ebe71ad620e4f, 0xdfaaadf4584a0b68,
0x7bb8f20bd9681981, 0xbfa8bbaae1c5db8b, 0xae3a8b06f286932a,
0xe92a89eebe1f3292, 0xf11e1c10444edbd2, 0xaf8308bd4915c7f3,
0x8a1338317833acdc, 0xcec67d8359c7f0e8, 0x3f66a4906e23838a,
0x9e959f9b1c22fef3, 0x8b5404e71735a246, 0xcbddfc7a87347d03,
0x7a0d9bd544622f25, 0x3a78e12aab2f532f, 0xddf89b2aecd51922,
0x38f7465f6d416db4, 0x4349369edbf8ea2a, 0x5e4d38719ad9d621,
0x0ec281878dddca6a, 0x1c92cae74d6b897a, 0xa0c7c7149a8a76b3,
0xc469dca35bf1cb2a, 0x6a902e29fcf0ecd4, 0x8c455620d8f5df32,
0x0b435e9d1c207663, 0x51299e4c5ccbfbd2, 0x365add776bcad536,
0x957aa2746c2bd41e, 0x414ec15efe36e3a1, 0x6faed19dc4940f61,
0x6766d7072a6e1d87, 0x3c01b82ebdff7a2d, 0xbbbe879684ec244c,
0xa425c502184dc5b4, 0x02d77f005bb369ad, 0xb56546c281f8c88f,
0xb49a866ea16fc9e9, 0x93ee62b3965991ec, 0xf03d0958eb9664a9,
0x7e57cce4c6c8d5ab, 0x6ae6f4180ea9c5b1, 0xc45fdb113dfba663,
0x7892fabea1c2d876, 0x7b39106ce2f6d405, 0x12332253ddcff808,
0x877af9766d5147c4, 0xbbfe3ac2eb6e9d3f, 0xd298d13ac6c3c8c4,
0x142bc26ad3606528, 0xb0665de1231f2938, 0xf68498ac39f406ec,
0xc68379a33b570cfe, 0xb43cfe7fcd5d6688, 0x0e18e07f10ee779c,
0xa021ffa7e745086d, 0xa113db9a2c6bdb43, 0xa00e360382ecd221,
0x192dc98cbd494a06, 0xb0c9f52cf0252d86, 0x3efb668bcba50726,
0x114c30f72555d676, 0x99259c3011e85910, 0x5e6c7d80d32133ec,
0xfa445c39db50cb51, 0x14f1d142aac12947, 0x04dcb1a831c0e97a,
0x3102eda0466cb1d7, 0xc57ea8effb8c20f5, 0xa3641775b56361af,
0xaf9608c03cc46398, 0x023b9055ff80b8dc, 0x91965be76eddb8f0,
0xdcdffd182d67712f, 0xe8bf232ef77feef7, 0x0cc8d45930eb0846,
0xef2d62d35924c29a, 0x8a68c569490911e2, 0xc44a865ef922d723,
0xc942fc5e5c343766,
"github.com/chmduquesne/rollinghash"
)
var defaultHashes [256]uint64
func init() {
defaultHashes = GenerateHashes(1)
}
// The size of the checksum.
const Size = 8
// digest represents the partial evaluation of a checksum.
type digest struct {
// Buzhash64 is a digest which satisfies the rollinghash.Hash64 interface.
// It implements the cyclic polynomial algorithm
// https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial
type Buzhash64 struct {
sum uint64
nRotate uint
nRotateComplement uint // redundant, but pre-computed to spare an operation
@@ -112,44 +34,62 @@ type digest struct {
}
// Reset resets the Hash to its initial state.
func (d *digest) Reset() {
func (d *Buzhash64) Reset() {
d.window = d.window[:0]
d.oldest = 0
d.sum = 0
}
func New() rollinghash.Hash64 {
return NewFromUint64Array(DefaultHash)
// GenerateHashes generates a list of hashes to use with buzhash
func GenerateHashes(seed int64) (res [256]uint64) {
random := rand.New(rand.NewSource(seed))
used := make(map[uint64]bool)
for i, _ := range res {
x := uint64(random.Int63())
for used[x] {
x = uint64(random.Int63())
}
used[x] = true
res[i] = x
}
return res
}
// NewFromUint32Array returns a buzhash based on the provided table uint32 values.
func NewFromUint64Array(b [256]uint64) rollinghash.Hash64 {
return &digest{
// New returns a buzhash based on a list of hashes provided by a call to
// GenerateHashes, seeded with the default value 1.
func New() *Buzhash64 {
return NewFromUint64Array(defaultHashes)
}
// NewFromUint64Array returns a buzhash based on the provided table uint64 values.
func NewFromUint64Array(b [256]uint64) *Buzhash64 {
return &Buzhash64{
sum: 0,
window: make([]byte, 0),
window: make([]byte, 1, rollinghash.DefaultWindowCap),
oldest: 0,
bytehash: b,
}
}
// Size returns the number of bytes Sum will return.
func (d *digest) Size() int { return Size }
// Size is 8 bytes
func (d *Buzhash64) Size() int { return Size }
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all
// writes are a multiple of the block size.
func (d *digest) BlockSize() int { return 1 }
// BlockSize is 1 byte
func (d *Buzhash64) BlockSize() int { return 1 }
// Write (via the embedded io.Writer interface) adds more data to the
// running hash. It never returns an error.
func (d *digest) Write(data []byte) (int, error) {
// Write (re)initializes the rolling window with the input byte slice and
// adds its data to the digest.
func (d *Buzhash64) Write(data []byte) (int, error) {
// Copy the window, avoiding allocations where possible
if len(d.window) != len(data) {
if cap(d.window) >= len(data) {
d.window = d.window[:len(data)]
l := len(data)
if l == 0 {
l = 1
}
if len(d.window) != l {
if cap(d.window) >= l {
d.window = d.window[:l]
} else {
d.window = make([]byte, len(data))
d.window = make([]byte, l)
}
}
copy(d.window, data)
@@ -163,22 +103,20 @@ func (d *digest) Write(data []byte) (int, error) {
return len(d.window), nil
}
func (d *digest) Sum64() uint64 {
// Sum64 returns the hash as a uint64
func (d *Buzhash64) Sum64() uint64 {
return d.sum
}
func (d *digest) Sum(b []byte) []byte {
// Sum returns the hash as a byte slice
func (d *Buzhash64) Sum(b []byte) []byte {
v := d.Sum64()
return append(b, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
}
// Roll updates the checksum of the window from the leaving byte and the
// entering byte.
func (d *digest) Roll(c byte) {
if len(d.window) == 0 {
d.window = make([]byte, 1)
d.window[0] = c
}
// Roll updates the checksum of the window from the entering byte. You
// MUST initialize a window with Write() before calling this method.
func (d *Buzhash64) Roll(c byte) {
// extract the entering/leaving bytes and update the circular buffer.
hn := d.bytehash[int(c)]
h0 := d.bytehash[int(d.window[d.oldest])]

View File

@@ -1,89 +0,0 @@
// Package rollinghash/rabinkarp32 implements a particular case of
// rabin-karp where the modulus is 0xffffffff (32 bits of '1')
package rabinkarp32
import rollinghash "github.com/chmduquesne/rollinghash"
// The size of a rabinkarp32 checksum.
const Size = 4
// digest represents the partial evaluation of a checksum.
type digest struct {
a uint32
h uint32
aPowerN uint32
// window is treated like a circular buffer, where the oldest element
// is indicated by d.oldest
window []byte
oldest int
}
// Reset resets the Hash to its initial state.
func (d *digest) Reset() {
d.h = 0
d.aPowerN = 1
d.window = nil
d.oldest = 0
}
func NewFromInt(a uint32) rollinghash.Hash32 {
return &digest{a: a, h: 0, aPowerN: 1, window: nil, oldest: 0}
}
func New() rollinghash.Hash32 {
return NewFromInt(65521) // largest prime fitting in 16 bits
}
// Size returns the number of bytes Sum will return.
func (d *digest) Size() int { return Size }
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all
// writes are a multiple of the block size.
func (d *digest) BlockSize() int { return 1 }
// Write (via the embedded io.Writer interface) adds more data to the
// running hash. It never returns an error.
func (d *digest) Write(data []byte) (int, error) {
// Copy the window
d.window = make([]byte, len(data))
copy(d.window, data)
for _, c := range d.window {
d.h *= d.a
d.h += uint32(c)
d.aPowerN *= d.a
}
return len(d.window), nil
}
func (d *digest) Sum32() uint32 {
return d.h
}
func (d *digest) Sum(b []byte) []byte {
v := d.Sum32()
return append(b, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
}
// Roll updates the checksum of the window from the leaving byte and the
// entering byte.
func (d *digest) Roll(c byte) {
if len(d.window) == 0 {
d.window = make([]byte, 1)
d.window[0] = c
}
// extract the entering/leaving bytes and update the circular buffer.
enter := uint32(c)
leave := uint32(d.window[d.oldest])
d.window[d.oldest] = c
l := len(d.window)
d.oldest += 1
if d.oldest >= l {
d.oldest = 0
}
d.h = d.h*d.a + enter - leave*d.aPowerN
}

View File

@@ -0,0 +1,318 @@
// Copyright (c) 2014, Alexander Neumann <alexander@bumpern.de>
// Copyright (c) 2017, Christophe-Marie Duquesne <chmd@chmd.fr>
//
// This file was adapted from restic https://github.com/restic/chunker
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package rabinkarp64
import (
"encoding/binary"
"errors"
"fmt"
"io"
"math/rand"
"strconv"
)
// Pol is a polynomial from F_2[X].
type Pol uint64
// Add returns x+y.
func (x Pol) Add(y Pol) Pol {
r := Pol(uint64(x) ^ uint64(y))
return r
}
// mulOverflows returns true if the multiplication would overflow uint64.
// Code by Rob Pike, see
// https://groups.google.com/d/msg/golang-nuts/h5oSN5t3Au4/KaNQREhZh0QJ
func mulOverflows(a, b Pol) bool {
if a <= 1 || b <= 1 {
return false
}
c := a.mul(b)
d := c.Div(b)
if d != a {
return true
}
return false
}
func (x Pol) mul(y Pol) Pol {
if x == 0 || y == 0 {
return 0
}
var res Pol
for i := 0; i <= y.Deg(); i++ {
if (y & (1 << uint(i))) > 0 {
res = res.Add(x << uint(i))
}
}
return res
}
// Mul returns x*y. When an overflow occurs, Mul panics.
func (x Pol) Mul(y Pol) Pol {
if mulOverflows(x, y) {
panic("multiplication would overflow uint64")
}
return x.mul(y)
}
// Deg returns the degree of the polynomial x. If x is zero, -1 is returned.
func (x Pol) Deg() int {
// the degree of 0 is -1
if x == 0 {
return -1
}
// see https://graphics.stanford.edu/~seander/bithacks.html#IntegerLog
r := 0
if uint64(x)&0xffffffff00000000 > 0 {
x >>= 32
r |= 32
}
if uint64(x)&0xffff0000 > 0 {
x >>= 16
r |= 16
}
if uint64(x)&0xff00 > 0 {
x >>= 8
r |= 8
}
if uint64(x)&0xf0 > 0 {
x >>= 4
r |= 4
}
if uint64(x)&0xc > 0 {
x >>= 2
r |= 2
}
if uint64(x)&0x2 > 0 {
x >>= 1
r |= 1
}
return r
}
// String returns the coefficients in hex.
func (x Pol) String() string {
return "0x" + strconv.FormatUint(uint64(x), 16)
}
// Expand returns the string representation of the polynomial x.
func (x Pol) Expand() string {
if x == 0 {
return "0"
}
s := ""
for i := x.Deg(); i > 1; i-- {
if x&(1<<uint(i)) > 0 {
s += fmt.Sprintf("+x^%d", i)
}
}
if x&2 > 0 {
s += "+x"
}
if x&1 > 0 {
s += "+1"
}
return s[1:]
}
// DivMod returns x / d = q, and remainder r,
// see https://en.wikipedia.org/wiki/Division_algorithm
func (x Pol) DivMod(d Pol) (Pol, Pol) {
if x == 0 {
return 0, 0
}
if d == 0 {
panic("division by zero")
}
D := d.Deg()
diff := x.Deg() - D
if diff < 0 {
return 0, x
}
var q Pol
for diff >= 0 {
m := d << uint(diff)
q |= (1 << uint(diff))
x = x.Add(m)
diff = x.Deg() - D
}
return q, x
}
// Div returns the integer division result x / d.
func (x Pol) Div(d Pol) Pol {
q, _ := x.DivMod(d)
return q
}
// Mod returns the remainder of x / d
func (x Pol) Mod(d Pol) Pol {
_, r := x.DivMod(d)
return r
}
// I really dislike having a function that does not terminate, so specify a
// really large upper bound for finding a new irreducible polynomial, and
// return an error when no irreducible polynomial has been found within
// randPolMaxTries.
const randPolMaxTries = 1e6
// RandomPolynomial returns a new random irreducible polynomial
// of degree 53 using the input seed as a source.
// It is equivalent to calling DerivePolynomial(rand.Reader).
func RandomPolynomial(seed int64) (Pol, error) {
return DerivePolynomial(rand.New(rand.NewSource(seed)))
}
// DerivePolynomial returns an irreducible polynomial of degree 53
// (largest prime number below 64-8) by reading bytes from source.
// There are (2^53-2/53) irreducible polynomials of degree 53 in
// F_2[X], c.f. Michael O. Rabin (1981): "Fingerprinting by Random
// Polynomials", page 4. If no polynomial could be found in one
// million tries, an error is returned.
func DerivePolynomial(source io.Reader) (Pol, error) {
for i := 0; i < randPolMaxTries; i++ {
var f Pol
// choose polynomial at (pseudo)random
err := binary.Read(source, binary.LittleEndian, &f)
if err != nil {
return 0, err
}
// mask away bits above bit 53
f &= Pol((1 << 54) - 1)
// set highest and lowest bit so that the degree is 53 and the
// polynomial is not trivially reducible
f |= (1 << 53) | 1
// test if f is irreducible
if f.Irreducible() {
return f, nil
}
}
// If this is reached, we haven't found an irreducible polynomial in
// randPolMaxTries. This error is very unlikely to occur.
return 0, errors.New("unable to find new random irreducible polynomial")
}
// GCD computes the Greatest Common Divisor x and f.
func (x Pol) GCD(f Pol) Pol {
if f == 0 {
return x
}
if x == 0 {
return f
}
if x.Deg() < f.Deg() {
x, f = f, x
}
return f.GCD(x.Mod(f))
}
// Irreducible returns true iff x is irreducible over F_2. This function
// uses Ben Or's reducibility test.
//
// For details see "Tests and Constructions of Irreducible Polynomials over
// Finite Fields".
func (x Pol) Irreducible() bool {
for i := 1; i <= x.Deg()/2; i++ {
if x.GCD(qp(uint(i), x)) != 1 {
return false
}
}
return true
}
// MulMod computes x*f mod g
func (x Pol) MulMod(f, g Pol) Pol {
if x == 0 || f == 0 {
return 0
}
var res Pol
for i := 0; i <= f.Deg(); i++ {
if (f & (1 << uint(i))) > 0 {
a := x
for j := 0; j < i; j++ {
a = a.Mul(2).Mod(g)
}
res = res.Add(a).Mod(g)
}
}
return res
}
// qp computes the polynomial (x^(2^p)-x) mod g. This is needed for the
// reducibility test.
func qp(p uint, g Pol) Pol {
num := (1 << p)
i := 1
// start with x
res := Pol(2)
for i < num {
// repeatedly square res
res = res.MulMod(res, g)
i *= 2
}
// add x
return res.Add(2).Mod(g)
}

View File

@@ -0,0 +1,224 @@
// Copyright (c) 2014, Alexander Neumann <alexander@bumpern.de>
// Copyright (c) 2017, Christophe-Marie Duquesne <chmd@chmd.fr>
//
// This file was adapted from restic https://github.com/restic/chunker
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package rabinkarp64
import (
"sync"
"github.com/chmduquesne/rollinghash"
)
const Size = 8
type tables struct {
out [256]Pol
mod [256]Pol
}
// tables are cacheable for a given pol and windowsize
type index struct {
pol Pol
windowsize int
}
type RabinKarp64 struct {
pol Pol
tables *tables
polShift uint
value Pol
// window is treated like a circular buffer, where the oldest element
// is indicated by d.oldest
window []byte
oldest int
}
// cache precomputed tables, these are read-only anyway
var cache struct {
// For a given polynom and a given window size, we get a table
entries map[index]*tables
sync.Mutex
}
func init() {
cache.entries = make(map[index]*tables)
}
func (d *RabinKarp64) buildTables() {
windowsize := len(d.window)
idx := index{d.pol, windowsize}
cache.Lock()
t, ok := cache.entries[idx]
cache.Unlock()
if ok {
d.tables = t
return
}
t = &tables{}
// calculate table for sliding out bytes. The byte to slide out is used as
// the index for the table, the value contains the following:
// out_table[b] = Hash(b || 0 || ... || 0)
// \ windowsize-1 zero bytes /
// To slide out byte b_0 for window size w with known hash
// H := H(b_0 || ... || b_w), it is sufficient to add out_table[b_0]:
// H(b_0 || ... || b_w) + H(b_0 || 0 || ... || 0)
// = H(b_0 + b_0 || b_1 + 0 || ... || b_w + 0)
// = H( 0 || b_1 || ... || b_w)
//
// Afterwards a new byte can be shifted in.
for b := 0; b < 256; b++ {
var h Pol
h <<= 8
h |= Pol(b)
h = h.Mod(d.pol)
for i := 0; i < windowsize-1; i++ {
h <<= 8
h |= Pol(0)
h = h.Mod(d.pol)
}
t.out[b] = h
}
// calculate table for reduction mod Polynomial
k := d.pol.Deg()
for b := 0; b < 256; b++ {
// mod_table[b] = A | B, where A = (b(x) * x^k mod pol) and B = b(x) * x^k
//
// The 8 bits above deg(Polynomial) determine what happens next and so
// these bits are used as a lookup to this table. The value is split in
// two parts: Part A contains the result of the modulus operation, part
// B is used to cancel out the 8 top bits so that one XOR operation is
// enough to reduce modulo Polynomial
t.mod[b] = Pol(uint64(b)<<uint(k)).Mod(d.pol) | (Pol(b) << uint(k))
}
d.tables = t
cache.Lock()
cache.entries[idx] = d.tables
cache.Unlock()
}
// NewFromPol returns a RabinKarp64 digest from a polynomial over GF(2).
// It is assumed that the input polynomial is irreducible. You can obtain
// such a polynomial using the RandomPolynomial function.
func NewFromPol(p Pol) *RabinKarp64 {
res := &RabinKarp64{
pol: p,
tables: nil,
polShift: uint(p.Deg() - 8),
value: 0,
window: make([]byte, 0, rollinghash.DefaultWindowCap),
oldest: 0,
}
return res
}
// New returns a RabinKarp64 digest from the default polynomial obtained
// when using RandomPolynomial with the seed 1.
func New() *RabinKarp64 {
p, err := RandomPolynomial(1)
if err != nil {
panic(err)
}
return NewFromPol(p)
}
// Reset resets the running hash to its initial state
func (d *RabinKarp64) Reset() {
d.tables = nil
d.value = 0
d.window = d.window[:1]
d.window[0] = 0
d.oldest = 0
}
// Size is 8 bytes
func (d *RabinKarp64) Size() int { return Size }
// BlockSize is 1 byte
func (d *RabinKarp64) BlockSize() int { return 1 }
// Write (re)initializes the rolling window with the input byte slice and
// adds its data to the digest. It never returns an error.
func (d *RabinKarp64) Write(data []byte) (int, error) {
// Copy the window
l := len(data)
if l == 0 {
l = 1
}
if len(d.window) >= l {
d.window = d.window[:l]
} else {
d.window = make([]byte, l)
}
copy(d.window, data)
for _, b := range d.window {
d.value <<= 8
d.value |= Pol(b)
d.value = d.value.Mod(d.pol)
}
d.buildTables()
return len(d.window), nil
}
// Sum64 returns the hash as a uint64
func (d *RabinKarp64) Sum64() uint64 {
return uint64(d.value)
}
// Sum returns the hash as byte slice
func (d *RabinKarp64) Sum(b []byte) []byte {
v := d.Sum64()
return append(b, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
}
// Roll updates the checksum of the window from the entering byte. You
// MUST initialize a window with Write() before calling this method.
func (d *RabinKarp64) Roll(c byte) {
// extract the entering/leaving bytes and update the circular buffer.
enter := c
leave := uint64(d.window[d.oldest])
d.window[d.oldest] = c
d.oldest += 1
if d.oldest >= len(d.window) {
d.oldest = 0
}
d.value ^= d.tables.out[leave]
index := byte(d.value >> d.polShift)
d.value <<= 8
d.value |= Pol(enter)
d.value ^= d.tables.mod[index]
}

126
vendor/github.com/chmduquesne/rollinghash/roll/main.go generated vendored Normal file
View File

@@ -0,0 +1,126 @@
package main
import (
"flag"
"fmt"
"io"
"log"
"os"
"time"
"code.cloudfoundry.org/bytefmt"
//rollsum "github.com/chmduquesne/rollinghash/adler32"
//rollsum "github.com/chmduquesne/rollinghash/buzhash32"
rollsum "github.com/chmduquesne/rollinghash/buzhash64"
//rollsum "github.com/chmduquesne/rollinghash/bozo32"
)
const (
KiB = 1024
MiB = 1024 * KiB
GiB = 1024 * MiB
clearscreen = "\033[2J\033[1;1H"
clearline = "\x1b[2K"
)
func genMasks() (res []uint64) {
res = make([]uint64, 64)
ones := ^uint64(0) // 0xffffffffffffffff
for i := 0; i < 64; i++ {
res[i] = ones >> uint(63-i)
}
return
}
func hash2uint64(s []byte) (res uint64) {
for _, b := range s {
res <<= 8
res |= uint64(b)
}
return
}
func main() {
dostats := flag.Bool("stats", false, "Do some stats about the rolling sum")
size := flag.String("size", "256M", "How much data to read")
flag.Parse()
fileSize, err := bytefmt.ToBytes(*size)
if err != nil {
log.Fatal(err)
}
bufsize := 16 * MiB
rbuf := make([]byte, bufsize)
hbuf := make([]byte, 0, 8)
t := time.Now()
f, err := os.Open("/dev/urandom")
if err != nil {
log.Fatal(err)
}
defer func() {
if err := f.Close(); err != nil {
log.Fatal(err)
}
}()
io.ReadFull(f, rbuf)
roll := rollsum.New()
roll.Write(rbuf[:64])
masks := genMasks()
hits := make(map[uint64]uint64)
for _, m := range masks {
hits[m] = 0
}
n := uint64(0)
k := 0
for n < fileSize {
if k >= bufsize {
status := fmt.Sprintf("Byte count: %s", bytefmt.ByteSize(n))
if *dostats {
fmt.Printf(clearscreen)
fmt.Println(status)
for i, m := range masks {
frequency := "NaN"
if hits[m] != 0 {
frequency = bytefmt.ByteSize(n / hits[m])
}
fmt.Printf("0x%016x (%02d bits): every %s\n", m, i+1, frequency)
}
} else {
fmt.Printf(clearline)
fmt.Printf(status)
fmt.Printf("\r")
}
_, err := io.ReadFull(f, rbuf)
if err != nil {
panic(err)
}
k = 0
}
roll.Roll(rbuf[k])
if *dostats {
s := hash2uint64(roll.Sum(hbuf))
for _, m := range masks {
if s&m == m {
hits[m] += 1
} else {
break
}
}
}
k++
n++
}
duration := time.Since(t)
fmt.Printf("Rolled %s of data in %v (%s/s).\n",
bytefmt.ByteSize(n),
duration,
bytefmt.ByteSize(n*1e9/uint64(duration)),
)
}

View File

@@ -7,11 +7,19 @@ package rollinghash
import "hash"
// DefaultWindowCap is the default capacity of the internal window of a
// new Hash.
const DefaultWindowCap = 64
// A Roller is a type that has the method Roll. Roll updates the hash of a
// rolling window from just the entering byte. You MUST call Write()
// BEFORE using this method and provide it with an initial window of size
// at least 1 byte. You can then call this method for every new byte
// entering the window. The byte leaving the window is automatically
// computed from a copy of the window internally kept in the checksum.
// This window is updated along with the internal state of the checksum
// every time Roll() is called.
type Roller interface {
// Roll updates the hash of a rolling window from the entering byte.
// A copy of the window is internally kept from the last Write().
// This copy is updated along with the internal state of the checksum
// in order to determine the new hash very quickly.
Roll(b byte)
}