Updates the package and fixes a test that depended on the old behavior of Write() being equivalent to Reset()+Write() which is no longer the case. The scanner already did resets after each block write, so this is fine.
This commit is contained in:
parent
d1704d5304
commit
c0a26c918a
@ -144,14 +144,13 @@ func TestAdler32Variants(t *testing.T) {
|
|||||||
|
|
||||||
windowSize := 128
|
windowSize := 128
|
||||||
|
|
||||||
hf2.Reset()
|
|
||||||
|
|
||||||
hf3 := rollingAdler32.New()
|
hf3 := rollingAdler32.New()
|
||||||
hf3.Write(data[:windowSize])
|
hf3.Write(data[:windowSize])
|
||||||
|
|
||||||
for i := windowSize; i < len(data); i++ {
|
for i := windowSize; i < len(data); i++ {
|
||||||
if i%windowSize == 0 {
|
if i%windowSize == 0 {
|
||||||
// let the reference function catch up
|
// let the reference function catch up
|
||||||
|
hf2.Reset()
|
||||||
hf2.Write(data[i-windowSize : i])
|
hf2.Write(data[i-windowSize : i])
|
||||||
|
|
||||||
// verify that they are in sync with the rolling function
|
// verify that they are in sync with the rolling function
|
||||||
|
|||||||
50
vendor/github.com/chmduquesne/rollinghash/adler32/adler32.go
generated
vendored
50
vendor/github.com/chmduquesne/rollinghash/adler32/adler32.go
generated
vendored
@ -18,23 +18,24 @@ const (
|
|||||||
// It implements the adler32 algorithm https://en.wikipedia.org/wiki/Adler-32
|
// It implements the adler32 algorithm https://en.wikipedia.org/wiki/Adler-32
|
||||||
type Adler32 struct {
|
type Adler32 struct {
|
||||||
a, b uint32
|
a, b uint32
|
||||||
|
n uint32
|
||||||
|
|
||||||
// window is treated like a circular buffer, where the oldest element
|
// window is treated like a circular buffer, where the oldest element
|
||||||
// is indicated by d.oldest
|
// is indicated by d.oldest
|
||||||
window []byte
|
window []byte
|
||||||
oldest int
|
oldest int
|
||||||
n uint32
|
|
||||||
|
|
||||||
vanilla hash.Hash32
|
vanilla hash.Hash32
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset resets the digest to its initial state.
|
// Reset resets the digest to its initial state.
|
||||||
func (d *Adler32) Reset() {
|
func (d *Adler32) Reset() {
|
||||||
d.window = d.window[:1] // Reset the size but don't reallocate
|
d.window = d.window[:0] // Reset the size but don't reallocate
|
||||||
d.window[0] = 0
|
d.oldest = 0
|
||||||
d.a = 1
|
d.a = 1
|
||||||
d.b = 0
|
d.b = 0
|
||||||
d.oldest = 0
|
d.n = 0
|
||||||
|
d.vanilla.Reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
// New returns a new Adler32 digest
|
// New returns a new Adler32 digest
|
||||||
@ -42,7 +43,8 @@ func New() *Adler32 {
|
|||||||
return &Adler32{
|
return &Adler32{
|
||||||
a: 1,
|
a: 1,
|
||||||
b: 0,
|
b: 0,
|
||||||
window: make([]byte, 1, rollinghash.DefaultWindowCap),
|
n: 0,
|
||||||
|
window: make([]byte, 0, rollinghash.DefaultWindowCap),
|
||||||
oldest: 0,
|
oldest: 0,
|
||||||
vanilla: vanilla.New(),
|
vanilla: vanilla.New(),
|
||||||
}
|
}
|
||||||
@ -54,30 +56,30 @@ func (d *Adler32) Size() int { return Size }
|
|||||||
// BlockSize is 1 byte
|
// BlockSize is 1 byte
|
||||||
func (d *Adler32) BlockSize() int { return 1 }
|
func (d *Adler32) BlockSize() int { return 1 }
|
||||||
|
|
||||||
// Write (re)initializes the rolling window with the input byte slice and
|
// Write appends data to the rolling window and updates the digest.
|
||||||
// adds its data to the digest.
|
func (d *Adler32) Write(data []byte) (int, error) {
|
||||||
func (d *Adler32) Write(p []byte) (int, error) {
|
l := len(data)
|
||||||
// Copy the window, avoiding allocations where possible
|
|
||||||
l := len(p)
|
|
||||||
if l == 0 {
|
if l == 0 {
|
||||||
l = 1
|
return 0, nil
|
||||||
}
|
}
|
||||||
if len(d.window) != l {
|
// Re-arrange the window so that the leftmost element is at index 0
|
||||||
if cap(d.window) >= l {
|
n := len(d.window)
|
||||||
d.window = d.window[:l]
|
if d.oldest != 0 {
|
||||||
} else {
|
tmp := make([]byte, d.oldest)
|
||||||
d.window = make([]byte, len(p))
|
copy(tmp, d.window[:d.oldest])
|
||||||
|
copy(d.window, d.window[d.oldest:])
|
||||||
|
copy(d.window[n-d.oldest:], tmp)
|
||||||
|
d.oldest = 0
|
||||||
}
|
}
|
||||||
}
|
d.window = append(d.window, data...)
|
||||||
copy(d.window, p)
|
|
||||||
|
|
||||||
// Piggy-back on the core implementation
|
// Piggy-back on the core implementation
|
||||||
d.vanilla.Reset()
|
d.vanilla.Reset()
|
||||||
d.vanilla.Write(p)
|
d.vanilla.Write(d.window)
|
||||||
s := d.vanilla.Sum32()
|
s := d.vanilla.Sum32()
|
||||||
d.a, d.b = s&0xffff, s>>16
|
d.a, d.b = s&0xffff, s>>16
|
||||||
d.n = uint32(len(p)) % Mod
|
d.n = uint32(len(d.window)) % Mod
|
||||||
return len(d.window), nil
|
return len(data), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sum32 returns the hash as a uint32
|
// Sum32 returns the hash as a uint32
|
||||||
@ -94,6 +96,12 @@ func (d *Adler32) Sum(b []byte) []byte {
|
|||||||
// Roll updates the checksum of the window from the entering byte. You
|
// Roll updates the checksum of the window from the entering byte. You
|
||||||
// MUST initialize a window with Write() before calling this method.
|
// MUST initialize a window with Write() before calling this method.
|
||||||
func (d *Adler32) Roll(b byte) {
|
func (d *Adler32) Roll(b byte) {
|
||||||
|
// This check costs 10-15% performance. If we disable it, we crash
|
||||||
|
// when the window is empty. If we enable it, we are always correct
|
||||||
|
// (an empty window never changes no matter how much you roll it).
|
||||||
|
//if len(d.window) == 0 {
|
||||||
|
// return
|
||||||
|
//}
|
||||||
// extract the entering/leaving bytes and update the circular buffer.
|
// extract the entering/leaving bytes and update the circular buffer.
|
||||||
enter := uint32(b)
|
enter := uint32(b)
|
||||||
leave := uint32(d.window[d.oldest])
|
leave := uint32(d.window[d.oldest])
|
||||||
|
|||||||
57
vendor/github.com/chmduquesne/rollinghash/bozo32/bozo32.go
generated
vendored
57
vendor/github.com/chmduquesne/rollinghash/bozo32/bozo32.go
generated
vendored
@ -15,8 +15,8 @@ const Size = 4
|
|||||||
// Bozo32 is a digest which satisfies the rollinghash.Hash32 interface.
|
// Bozo32 is a digest which satisfies the rollinghash.Hash32 interface.
|
||||||
type Bozo32 struct {
|
type Bozo32 struct {
|
||||||
a uint32
|
a uint32
|
||||||
h uint32
|
aⁿ uint32
|
||||||
aPowerN uint32
|
value uint32
|
||||||
|
|
||||||
// window is treated like a circular buffer, where the oldest element
|
// window is treated like a circular buffer, where the oldest element
|
||||||
// is indicated by d.oldest
|
// is indicated by d.oldest
|
||||||
@ -26,18 +26,18 @@ type Bozo32 struct {
|
|||||||
|
|
||||||
// Reset resets the Hash to its initial state.
|
// Reset resets the Hash to its initial state.
|
||||||
func (d *Bozo32) Reset() {
|
func (d *Bozo32) Reset() {
|
||||||
d.h = 0
|
d.value = 0
|
||||||
d.aPowerN = 1
|
d.aⁿ = 1
|
||||||
d.window = nil
|
|
||||||
d.oldest = 0
|
d.oldest = 0
|
||||||
|
d.window = d.window[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewFromInt(a uint32) *Bozo32 {
|
func NewFromInt(a uint32) *Bozo32 {
|
||||||
return &Bozo32{
|
return &Bozo32{
|
||||||
a: a,
|
a: a,
|
||||||
h: 0,
|
value: 0,
|
||||||
aPowerN: 1,
|
aⁿ: 1,
|
||||||
window: make([]byte, 1, rollinghash.DefaultWindowCap),
|
window: make([]byte, 0, rollinghash.DefaultWindowCap),
|
||||||
oldest: 0,
|
oldest: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -52,32 +52,37 @@ func (d *Bozo32) Size() int { return Size }
|
|||||||
// BlockSize is 1 byte
|
// BlockSize is 1 byte
|
||||||
func (d *Bozo32) BlockSize() int { return 1 }
|
func (d *Bozo32) BlockSize() int { return 1 }
|
||||||
|
|
||||||
// Write (re)initializes the rolling window with the input byte slice and
|
// Write appends data to the rolling window and updates the digest. It
|
||||||
// adds its data to the digest. It never returns an error.
|
// never returns an error.
|
||||||
func (d *Bozo32) Write(data []byte) (int, error) {
|
func (d *Bozo32) Write(data []byte) (int, error) {
|
||||||
// Copy the window
|
|
||||||
l := len(data)
|
l := len(data)
|
||||||
if l == 0 {
|
if l == 0 {
|
||||||
l = 1
|
return 0, nil
|
||||||
}
|
}
|
||||||
if len(d.window) >= l {
|
// Re-arrange the window so that the leftmost element is at index 0
|
||||||
d.window = d.window[:l]
|
n := len(d.window)
|
||||||
} else {
|
if d.oldest != 0 {
|
||||||
d.window = make([]byte, l)
|
tmp := make([]byte, d.oldest)
|
||||||
|
copy(tmp, d.window[:d.oldest])
|
||||||
|
copy(d.window, d.window[d.oldest:])
|
||||||
|
copy(d.window[n-d.oldest:], tmp)
|
||||||
|
d.oldest = 0
|
||||||
}
|
}
|
||||||
copy(d.window, data)
|
d.window = append(d.window, data...)
|
||||||
|
|
||||||
|
d.value = 0
|
||||||
|
d.aⁿ = 1
|
||||||
for _, c := range d.window {
|
for _, c := range d.window {
|
||||||
d.h *= d.a
|
d.value *= d.a
|
||||||
d.h += uint32(c)
|
d.value += uint32(c)
|
||||||
d.aPowerN *= d.a
|
d.aⁿ *= d.a
|
||||||
}
|
}
|
||||||
return len(d.window), nil
|
return len(data), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sum32 returns the hash as a uint32
|
// Sum32 returns the hash as a uint32
|
||||||
func (d *Bozo32) Sum32() uint32 {
|
func (d *Bozo32) Sum32() uint32 {
|
||||||
return d.h
|
return d.value
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sum returns the hash as byte slice
|
// Sum returns the hash as byte slice
|
||||||
@ -89,6 +94,12 @@ func (d *Bozo32) Sum(b []byte) []byte {
|
|||||||
// Roll updates the checksum of the window from the entering byte. You
|
// Roll updates the checksum of the window from the entering byte. You
|
||||||
// MUST initialize a window with Write() before calling this method.
|
// MUST initialize a window with Write() before calling this method.
|
||||||
func (d *Bozo32) Roll(c byte) {
|
func (d *Bozo32) Roll(c byte) {
|
||||||
|
// This check costs 10-15% performance. If we disable it, we crash
|
||||||
|
// when the window is empty. If we enable it, we are always correct
|
||||||
|
// (an empty window never changes no matter how much you roll it).
|
||||||
|
//if len(d.window) == 0 {
|
||||||
|
// return
|
||||||
|
//}
|
||||||
// extract the entering/leaving bytes and update the circular buffer.
|
// extract the entering/leaving bytes and update the circular buffer.
|
||||||
enter := uint32(c)
|
enter := uint32(c)
|
||||||
leave := uint32(d.window[d.oldest])
|
leave := uint32(d.window[d.oldest])
|
||||||
@ -99,5 +110,5 @@ func (d *Bozo32) Roll(c byte) {
|
|||||||
d.oldest = 0
|
d.oldest = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
d.h = d.h*d.a + enter - leave*d.aPowerN
|
d.value = d.value*d.a + enter - leave*d.aⁿ
|
||||||
}
|
}
|
||||||
|
|||||||
34
vendor/github.com/chmduquesne/rollinghash/buzhash32/buzhash32.go
generated
vendored
34
vendor/github.com/chmduquesne/rollinghash/buzhash32/buzhash32.go
generated
vendored
@ -65,7 +65,7 @@ func New() *Buzhash32 {
|
|||||||
func NewFromUint32Array(b [256]uint32) *Buzhash32 {
|
func NewFromUint32Array(b [256]uint32) *Buzhash32 {
|
||||||
return &Buzhash32{
|
return &Buzhash32{
|
||||||
sum: 0,
|
sum: 0,
|
||||||
window: make([]byte, 1, rollinghash.DefaultWindowCap),
|
window: make([]byte, 0, rollinghash.DefaultWindowCap),
|
||||||
oldest: 0,
|
oldest: 0,
|
||||||
bytehash: b,
|
bytehash: b,
|
||||||
}
|
}
|
||||||
@ -77,30 +77,31 @@ func (d *Buzhash32) Size() int { return Size }
|
|||||||
// BlockSize is 1 byte
|
// BlockSize is 1 byte
|
||||||
func (d *Buzhash32) BlockSize() int { return 1 }
|
func (d *Buzhash32) BlockSize() int { return 1 }
|
||||||
|
|
||||||
// Write (re)initializes the rolling window with the input byte slice and
|
// Write appends data to the rolling window and updates the digest.
|
||||||
// adds its data to the digest.
|
|
||||||
func (d *Buzhash32) Write(data []byte) (int, error) {
|
func (d *Buzhash32) Write(data []byte) (int, error) {
|
||||||
// Copy the window, avoiding allocations where possible
|
|
||||||
l := len(data)
|
l := len(data)
|
||||||
if l == 0 {
|
if l == 0 {
|
||||||
l = 1
|
return 0, nil
|
||||||
}
|
}
|
||||||
if len(d.window) != l {
|
// Re-arrange the window so that the leftmost element is at index 0
|
||||||
if cap(d.window) >= l {
|
n := len(d.window)
|
||||||
d.window = d.window[:l]
|
if d.oldest != 0 {
|
||||||
} else {
|
tmp := make([]byte, d.oldest)
|
||||||
d.window = make([]byte, l)
|
copy(tmp, d.window[:d.oldest])
|
||||||
|
copy(d.window, d.window[d.oldest:])
|
||||||
|
copy(d.window[n-d.oldest:], tmp)
|
||||||
|
d.oldest = 0
|
||||||
}
|
}
|
||||||
}
|
d.window = append(d.window, data...)
|
||||||
copy(d.window, data)
|
|
||||||
|
|
||||||
|
d.sum = 0
|
||||||
for _, c := range d.window {
|
for _, c := range d.window {
|
||||||
d.sum = d.sum<<1 | d.sum>>31
|
d.sum = d.sum<<1 | d.sum>>31
|
||||||
d.sum ^= d.bytehash[int(c)]
|
d.sum ^= d.bytehash[int(c)]
|
||||||
}
|
}
|
||||||
d.nRotate = uint(len(d.window)) % 32
|
d.nRotate = uint(len(d.window)) % 32
|
||||||
d.nRotateComplement = 32 - d.nRotate
|
d.nRotateComplement = 32 - d.nRotate
|
||||||
return len(d.window), nil
|
return len(data), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sum32 returns the hash as a uint32
|
// Sum32 returns the hash as a uint32
|
||||||
@ -117,6 +118,13 @@ func (d *Buzhash32) Sum(b []byte) []byte {
|
|||||||
// Roll updates the checksum of the window from the entering byte. You
|
// Roll updates the checksum of the window from the entering byte. You
|
||||||
// MUST initialize a window with Write() before calling this method.
|
// MUST initialize a window with Write() before calling this method.
|
||||||
func (d *Buzhash32) Roll(c byte) {
|
func (d *Buzhash32) Roll(c byte) {
|
||||||
|
// This check costs 10-15% performance. If we disable it, we crash
|
||||||
|
// when the window is empty. If we enable it, we are always correct
|
||||||
|
// (an empty window never changes no matter how much you roll it).
|
||||||
|
//if len(d.window) == 0 {
|
||||||
|
// return
|
||||||
|
//}
|
||||||
|
|
||||||
// extract the entering/leaving bytes and update the circular buffer.
|
// extract the entering/leaving bytes and update the circular buffer.
|
||||||
hn := d.bytehash[int(c)]
|
hn := d.bytehash[int(c)]
|
||||||
h0 := d.bytehash[int(d.window[d.oldest])]
|
h0 := d.bytehash[int(d.window[d.oldest])]
|
||||||
|
|||||||
35
vendor/github.com/chmduquesne/rollinghash/buzhash64/buzhash64.go
generated
vendored
35
vendor/github.com/chmduquesne/rollinghash/buzhash64/buzhash64.go
generated
vendored
@ -65,7 +65,7 @@ func New() *Buzhash64 {
|
|||||||
func NewFromUint64Array(b [256]uint64) *Buzhash64 {
|
func NewFromUint64Array(b [256]uint64) *Buzhash64 {
|
||||||
return &Buzhash64{
|
return &Buzhash64{
|
||||||
sum: 0,
|
sum: 0,
|
||||||
window: make([]byte, 1, rollinghash.DefaultWindowCap),
|
window: make([]byte, 0, rollinghash.DefaultWindowCap),
|
||||||
oldest: 0,
|
oldest: 0,
|
||||||
bytehash: b,
|
bytehash: b,
|
||||||
}
|
}
|
||||||
@ -77,30 +77,32 @@ func (d *Buzhash64) Size() int { return Size }
|
|||||||
// BlockSize is 1 byte
|
// BlockSize is 1 byte
|
||||||
func (d *Buzhash64) BlockSize() int { return 1 }
|
func (d *Buzhash64) BlockSize() int { return 1 }
|
||||||
|
|
||||||
// Write (re)initializes the rolling window with the input byte slice and
|
// Write appends data to the rolling window and updates the digest. It
|
||||||
// adds its data to the digest.
|
// never returns an error.
|
||||||
func (d *Buzhash64) Write(data []byte) (int, error) {
|
func (d *Buzhash64) Write(data []byte) (int, error) {
|
||||||
// Copy the window, avoiding allocations where possible
|
|
||||||
l := len(data)
|
l := len(data)
|
||||||
if l == 0 {
|
if l == 0 {
|
||||||
l = 1
|
return 0, nil
|
||||||
}
|
}
|
||||||
if len(d.window) != l {
|
// Re-arrange the window so that the leftmost element is at index 0
|
||||||
if cap(d.window) >= l {
|
n := len(d.window)
|
||||||
d.window = d.window[:l]
|
if d.oldest != 0 {
|
||||||
} else {
|
tmp := make([]byte, d.oldest)
|
||||||
d.window = make([]byte, l)
|
copy(tmp, d.window[:d.oldest])
|
||||||
|
copy(d.window, d.window[d.oldest:])
|
||||||
|
copy(d.window[n-d.oldest:], tmp)
|
||||||
|
d.oldest = 0
|
||||||
}
|
}
|
||||||
}
|
d.window = append(d.window, data...)
|
||||||
copy(d.window, data)
|
|
||||||
|
|
||||||
|
d.sum = 0
|
||||||
for _, c := range d.window {
|
for _, c := range d.window {
|
||||||
d.sum = d.sum<<1 | d.sum>>63
|
d.sum = d.sum<<1 | d.sum>>63
|
||||||
d.sum ^= d.bytehash[int(c)]
|
d.sum ^= d.bytehash[int(c)]
|
||||||
}
|
}
|
||||||
d.nRotate = uint(len(d.window)) % 64
|
d.nRotate = uint(len(d.window)) % 64
|
||||||
d.nRotateComplement = 64 - d.nRotate
|
d.nRotateComplement = 64 - d.nRotate
|
||||||
return len(d.window), nil
|
return len(data), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sum64 returns the hash as a uint64
|
// Sum64 returns the hash as a uint64
|
||||||
@ -117,6 +119,13 @@ func (d *Buzhash64) Sum(b []byte) []byte {
|
|||||||
// Roll updates the checksum of the window from the entering byte. You
|
// Roll updates the checksum of the window from the entering byte. You
|
||||||
// MUST initialize a window with Write() before calling this method.
|
// MUST initialize a window with Write() before calling this method.
|
||||||
func (d *Buzhash64) Roll(c byte) {
|
func (d *Buzhash64) Roll(c byte) {
|
||||||
|
// This check costs 10-15% performance. If we disable it, we crash
|
||||||
|
// when the window is empty. If we enable it, we are always correct
|
||||||
|
// (an empty window never changes no matter how much you roll it).
|
||||||
|
//if len(d.window) == 0 {
|
||||||
|
// return
|
||||||
|
//}
|
||||||
|
|
||||||
// extract the entering/leaving bytes and update the circular buffer.
|
// extract the entering/leaving bytes and update the circular buffer.
|
||||||
hn := d.bytehash[int(c)]
|
hn := d.bytehash[int(c)]
|
||||||
h0 := d.bytehash[int(d.window[d.oldest])]
|
h0 := d.bytehash[int(d.window[d.oldest])]
|
||||||
|
|||||||
62
vendor/github.com/chmduquesne/rollinghash/rabinkarp64/rabinkarp64.go
generated
vendored
62
vendor/github.com/chmduquesne/rollinghash/rabinkarp64/rabinkarp64.go
generated
vendored
@ -70,8 +70,10 @@ func init() {
|
|||||||
cache.entries = make(map[index]*tables)
|
cache.entries = make(map[index]*tables)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *RabinKarp64) buildTables() {
|
func (d *RabinKarp64) updateTables() {
|
||||||
windowsize := len(d.window)
|
windowsize := len(d.window)
|
||||||
|
pol := d.pol
|
||||||
|
|
||||||
idx := index{d.pol, windowsize}
|
idx := index{d.pol, windowsize}
|
||||||
|
|
||||||
cache.Lock()
|
cache.Lock()
|
||||||
@ -82,8 +84,15 @@ func (d *RabinKarp64) buildTables() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
t = &tables{}
|
d.tables = buildTables(pol, windowsize)
|
||||||
|
cache.Lock()
|
||||||
|
cache.entries[idx] = d.tables
|
||||||
|
cache.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildTables(pol Pol, windowsize int) (t *tables) {
|
||||||
|
t = &tables{}
|
||||||
// calculate table for sliding out bytes. The byte to slide out is used as
|
// calculate table for sliding out bytes. The byte to slide out is used as
|
||||||
// the index for the table, the value contains the following:
|
// the index for the table, the value contains the following:
|
||||||
// out_table[b] = Hash(b || 0 || ... || 0)
|
// out_table[b] = Hash(b || 0 || ... || 0)
|
||||||
@ -99,17 +108,17 @@ func (d *RabinKarp64) buildTables() {
|
|||||||
var h Pol
|
var h Pol
|
||||||
h <<= 8
|
h <<= 8
|
||||||
h |= Pol(b)
|
h |= Pol(b)
|
||||||
h = h.Mod(d.pol)
|
h = h.Mod(pol)
|
||||||
for i := 0; i < windowsize-1; i++ {
|
for i := 0; i < windowsize-1; i++ {
|
||||||
h <<= 8
|
h <<= 8
|
||||||
h |= Pol(0)
|
h |= Pol(0)
|
||||||
h = h.Mod(d.pol)
|
h = h.Mod(pol)
|
||||||
}
|
}
|
||||||
t.out[b] = h
|
t.out[b] = h
|
||||||
}
|
}
|
||||||
|
|
||||||
// calculate table for reduction mod Polynomial
|
// calculate table for reduction mod Polynomial
|
||||||
k := d.pol.Deg()
|
k := pol.Deg()
|
||||||
for b := 0; b < 256; b++ {
|
for b := 0; b < 256; b++ {
|
||||||
// mod_table[b] = A | B, where A = (b(x) * x^k mod pol) and B = b(x) * x^k
|
// mod_table[b] = A | B, where A = (b(x) * x^k mod pol) and B = b(x) * x^k
|
||||||
//
|
//
|
||||||
@ -118,13 +127,10 @@ func (d *RabinKarp64) buildTables() {
|
|||||||
// two parts: Part A contains the result of the modulus operation, part
|
// two parts: Part A contains the result of the modulus operation, part
|
||||||
// B is used to cancel out the 8 top bits so that one XOR operation is
|
// B is used to cancel out the 8 top bits so that one XOR operation is
|
||||||
// enough to reduce modulo Polynomial
|
// enough to reduce modulo Polynomial
|
||||||
t.mod[b] = Pol(uint64(b)<<uint(k)).Mod(d.pol) | (Pol(b) << uint(k))
|
t.mod[b] = Pol(uint64(b)<<uint(k)).Mod(pol) | (Pol(b) << uint(k))
|
||||||
}
|
}
|
||||||
|
|
||||||
d.tables = t
|
return t
|
||||||
cache.Lock()
|
|
||||||
cache.entries[idx] = d.tables
|
|
||||||
cache.Unlock()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewFromPol returns a RabinKarp64 digest from a polynomial over GF(2).
|
// NewFromPol returns a RabinKarp64 digest from a polynomial over GF(2).
|
||||||
@ -139,6 +145,7 @@ func NewFromPol(p Pol) *RabinKarp64 {
|
|||||||
window: make([]byte, 0, rollinghash.DefaultWindowCap),
|
window: make([]byte, 0, rollinghash.DefaultWindowCap),
|
||||||
oldest: 0,
|
oldest: 0,
|
||||||
}
|
}
|
||||||
|
res.updateTables()
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -156,9 +163,9 @@ func New() *RabinKarp64 {
|
|||||||
func (d *RabinKarp64) Reset() {
|
func (d *RabinKarp64) Reset() {
|
||||||
d.tables = nil
|
d.tables = nil
|
||||||
d.value = 0
|
d.value = 0
|
||||||
d.window = d.window[:1]
|
d.window = d.window[:0]
|
||||||
d.window[0] = 0
|
|
||||||
d.oldest = 0
|
d.oldest = 0
|
||||||
|
d.updateTables()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Size is 8 bytes
|
// Size is 8 bytes
|
||||||
@ -167,30 +174,33 @@ func (d *RabinKarp64) Size() int { return Size }
|
|||||||
// BlockSize is 1 byte
|
// BlockSize is 1 byte
|
||||||
func (d *RabinKarp64) BlockSize() int { return 1 }
|
func (d *RabinKarp64) BlockSize() int { return 1 }
|
||||||
|
|
||||||
// Write (re)initializes the rolling window with the input byte slice and
|
// Write appends data to the rolling window and updates the digest.
|
||||||
// adds its data to the digest. It never returns an error.
|
|
||||||
func (d *RabinKarp64) Write(data []byte) (int, error) {
|
func (d *RabinKarp64) Write(data []byte) (int, error) {
|
||||||
// Copy the window
|
|
||||||
l := len(data)
|
l := len(data)
|
||||||
if l == 0 {
|
if l == 0 {
|
||||||
l = 1
|
return 0, nil
|
||||||
}
|
}
|
||||||
if len(d.window) >= l {
|
// Re-arrange the window so that the leftmost element is at index 0
|
||||||
d.window = d.window[:l]
|
n := len(d.window)
|
||||||
} else {
|
if d.oldest != 0 {
|
||||||
d.window = make([]byte, l)
|
tmp := make([]byte, d.oldest)
|
||||||
|
copy(tmp, d.window[:d.oldest])
|
||||||
|
copy(d.window, d.window[d.oldest:])
|
||||||
|
copy(d.window[n-d.oldest:], tmp)
|
||||||
|
d.oldest = 0
|
||||||
}
|
}
|
||||||
copy(d.window, data)
|
d.window = append(d.window, data...)
|
||||||
|
|
||||||
|
d.value = 0
|
||||||
for _, b := range d.window {
|
for _, b := range d.window {
|
||||||
d.value <<= 8
|
d.value <<= 8
|
||||||
d.value |= Pol(b)
|
d.value |= Pol(b)
|
||||||
d.value = d.value.Mod(d.pol)
|
d.value = d.value.Mod(d.pol)
|
||||||
}
|
}
|
||||||
|
|
||||||
d.buildTables()
|
d.updateTables()
|
||||||
|
|
||||||
return len(d.window), nil
|
return len(data), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sum64 returns the hash as a uint64
|
// Sum64 returns the hash as a uint64
|
||||||
@ -207,6 +217,12 @@ func (d *RabinKarp64) Sum(b []byte) []byte {
|
|||||||
// Roll updates the checksum of the window from the entering byte. You
|
// Roll updates the checksum of the window from the entering byte. You
|
||||||
// MUST initialize a window with Write() before calling this method.
|
// MUST initialize a window with Write() before calling this method.
|
||||||
func (d *RabinKarp64) Roll(c byte) {
|
func (d *RabinKarp64) Roll(c byte) {
|
||||||
|
// This check costs 10-15% performance. If we disable it, we crash
|
||||||
|
// when the window is empty. If we enable it, we are always correct
|
||||||
|
// (an empty window never changes no matter how much you roll it).
|
||||||
|
//if len(d.window) == 0 {
|
||||||
|
// return
|
||||||
|
//}
|
||||||
// extract the entering/leaving bytes and update the circular buffer.
|
// extract the entering/leaving bytes and update the circular buffer.
|
||||||
enter := c
|
enter := c
|
||||||
leave := uint64(d.window[d.oldest])
|
leave := uint64(d.window[d.oldest])
|
||||||
|
|||||||
30
vendor/github.com/chmduquesne/rollinghash/roll/main.go
generated
vendored
30
vendor/github.com/chmduquesne/rollinghash/roll/main.go
generated
vendored
@ -3,9 +3,11 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"hash"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime/pprof"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"code.cloudfoundry.org/bytefmt"
|
"code.cloudfoundry.org/bytefmt"
|
||||||
@ -33,7 +35,10 @@ func genMasks() (res []uint64) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func hash2uint64(s []byte) (res uint64) {
|
// Gets the hash sum as a uint64
|
||||||
|
func sum64(h hash.Hash) (res uint64) {
|
||||||
|
buf := make([]byte, 0, 8)
|
||||||
|
s := h.Sum(buf)
|
||||||
for _, b := range s {
|
for _, b := range s {
|
||||||
res <<= 8
|
res <<= 8
|
||||||
res |= uint64(b)
|
res |= uint64(b)
|
||||||
@ -42,18 +47,27 @@ func hash2uint64(s []byte) (res uint64) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
cpuprofile := flag.String("cpuprofile", "", "write cpu profile to file")
|
||||||
dostats := flag.Bool("stats", false, "Do some stats about the rolling sum")
|
dostats := flag.Bool("stats", false, "Do some stats about the rolling sum")
|
||||||
size := flag.String("size", "256M", "How much data to read")
|
size := flag.String("size", "256M", "How much data to read")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
|
if *cpuprofile != "" {
|
||||||
|
f, err := os.Create(*cpuprofile)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
pprof.StartCPUProfile(f)
|
||||||
|
defer pprof.StopCPUProfile()
|
||||||
|
}
|
||||||
|
|
||||||
fileSize, err := bytefmt.ToBytes(*size)
|
fileSize, err := bytefmt.ToBytes(*size)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
bufsize := 16 * MiB
|
bufsize := 16 * MiB
|
||||||
rbuf := make([]byte, bufsize)
|
buf := make([]byte, bufsize)
|
||||||
hbuf := make([]byte, 0, 8)
|
|
||||||
t := time.Now()
|
t := time.Now()
|
||||||
|
|
||||||
f, err := os.Open("/dev/urandom")
|
f, err := os.Open("/dev/urandom")
|
||||||
@ -66,10 +80,10 @@ func main() {
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
io.ReadFull(f, rbuf)
|
io.ReadFull(f, buf)
|
||||||
|
|
||||||
roll := rollsum.New()
|
roll := rollsum.New()
|
||||||
roll.Write(rbuf[:64])
|
roll.Write(buf[:64])
|
||||||
|
|
||||||
masks := genMasks()
|
masks := genMasks()
|
||||||
hits := make(map[uint64]uint64)
|
hits := make(map[uint64]uint64)
|
||||||
@ -97,15 +111,15 @@ func main() {
|
|||||||
fmt.Printf(status)
|
fmt.Printf(status)
|
||||||
fmt.Printf("\r")
|
fmt.Printf("\r")
|
||||||
}
|
}
|
||||||
_, err := io.ReadFull(f, rbuf)
|
_, err := io.ReadFull(f, buf)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
k = 0
|
k = 0
|
||||||
}
|
}
|
||||||
roll.Roll(rbuf[k])
|
roll.Roll(buf[k])
|
||||||
if *dostats {
|
if *dostats {
|
||||||
s := hash2uint64(roll.Sum(hbuf))
|
s := sum64(roll)
|
||||||
for _, m := range masks {
|
for _, m := range masks {
|
||||||
if s&m == m {
|
if s&m == m {
|
||||||
hits[m] += 1
|
hits[m] += 1
|
||||||
|
|||||||
2
vendor/manifest
vendored
2
vendor/manifest
vendored
@ -94,7 +94,7 @@
|
|||||||
"importpath": "github.com/chmduquesne/rollinghash",
|
"importpath": "github.com/chmduquesne/rollinghash",
|
||||||
"repository": "https://github.com/chmduquesne/rollinghash",
|
"repository": "https://github.com/chmduquesne/rollinghash",
|
||||||
"vcs": "git",
|
"vcs": "git",
|
||||||
"revision": "abb8cbaf9915e48ee20cae94bcd94221b61707a2",
|
"revision": "a60f8e7142b536ea61bb5d84014171189eeaaa81",
|
||||||
"branch": "master",
|
"branch": "master",
|
||||||
"notests": true
|
"notests": true
|
||||||
},
|
},
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user