cmd/syncthing: Enable KCP by default

Also, use upstream library, as my changes have been merged.
This commit is contained in:
Audrius Butkevicius
2017-10-17 23:17:10 +01:00
committed by GitHub
parent 889814a1af
commit fb7264a663
55 changed files with 3016 additions and 2798 deletions

27
vendor/github.com/templexxx/cpufeat/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

32
vendor/github.com/templexxx/cpufeat/cpu.go generated vendored Normal file
View File

@@ -0,0 +1,32 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package cpu implements processor feature detection
// used by the Go standard libary.
package cpufeat
var X86 x86
// The booleans in x86 contain the correspondingly named cpuid feature bit.
// HasAVX and HasAVX2 are only set if the OS does support XMM and YMM registers
// in addition to the cpuid feature bit being set.
// The struct is padded to avoid false sharing.
type x86 struct {
_ [CacheLineSize]byte
HasAES bool
HasAVX bool
HasAVX2 bool
HasBMI1 bool
HasBMI2 bool
HasERMS bool
HasOSXSAVE bool
HasPCLMULQDQ bool
HasPOPCNT bool
HasSSE2 bool
HasSSE3 bool
HasSSSE3 bool
HasSSE41 bool
HasSSE42 bool
_ [CacheLineSize]byte
}

7
vendor/github.com/templexxx/cpufeat/cpu_arm.go generated vendored Normal file
View File

@@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 32

7
vendor/github.com/templexxx/cpufeat/cpu_arm64.go generated vendored Normal file
View File

@@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 32

7
vendor/github.com/templexxx/cpufeat/cpu_mips.go generated vendored Normal file
View File

@@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 32

7
vendor/github.com/templexxx/cpufeat/cpu_mips64.go generated vendored Normal file
View File

@@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 32

7
vendor/github.com/templexxx/cpufeat/cpu_mips64le.go generated vendored Normal file
View File

@@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 32

7
vendor/github.com/templexxx/cpufeat/cpu_mipsle.go generated vendored Normal file
View File

@@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 32

7
vendor/github.com/templexxx/cpufeat/cpu_ppc64.go generated vendored Normal file
View File

@@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 128

7
vendor/github.com/templexxx/cpufeat/cpu_ppc64le.go generated vendored Normal file
View File

@@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 128

7
vendor/github.com/templexxx/cpufeat/cpu_s390x.go generated vendored Normal file
View File

@@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 256

59
vendor/github.com/templexxx/cpufeat/cpu_x86.go generated vendored Normal file
View File

@@ -0,0 +1,59 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build 386 amd64 amd64p32
package cpufeat
const CacheLineSize = 64
// cpuid is implemented in cpu_x86.s.
func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
// xgetbv with ecx = 0 is implemented in cpu_x86.s.
func xgetbv() (eax, edx uint32)
func init() {
maxId, _, _, _ := cpuid(0, 0)
if maxId < 1 {
return
}
_, _, ecx1, edx1 := cpuid(1, 0)
X86.HasSSE2 = isSet(26, edx1)
X86.HasSSE3 = isSet(0, ecx1)
X86.HasPCLMULQDQ = isSet(1, ecx1)
X86.HasSSSE3 = isSet(9, ecx1)
X86.HasSSE41 = isSet(19, ecx1)
X86.HasSSE42 = isSet(20, ecx1)
X86.HasPOPCNT = isSet(23, ecx1)
X86.HasAES = isSet(25, ecx1)
X86.HasOSXSAVE = isSet(27, ecx1)
osSupportsAVX := false
// For XGETBV, OSXSAVE bit is required and sufficient.
if X86.HasOSXSAVE {
eax, _ := xgetbv()
// Check if XMM and YMM registers have OS support.
osSupportsAVX = isSet(1, eax) && isSet(2, eax)
}
X86.HasAVX = isSet(28, ecx1) && osSupportsAVX
if maxId < 7 {
return
}
_, ebx7, _, _ := cpuid(7, 0)
X86.HasBMI1 = isSet(3, ebx7)
X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX
X86.HasBMI2 = isSet(8, ebx7)
X86.HasERMS = isSet(9, ebx7)
}
func isSet(bitpos uint, value uint32) bool {
return value&(1<<bitpos) != 0
}

32
vendor/github.com/templexxx/cpufeat/cpu_x86.s generated vendored Normal file
View File

@@ -0,0 +1,32 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build 386 amd64 amd64p32
#include "textflag.h"
// func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuid(SB), NOSPLIT, $0-24
MOVL eaxArg+0(FP), AX
MOVL ecxArg+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func xgetbv() (eax, edx uint32)
TEXT ·xgetbv(SB),NOSPLIT,$0-8
#ifdef GOOS_nacl
// nacl does not support XGETBV.
MOVL $0, eax+0(FP)
MOVL $0, edx+4(FP)
#else
MOVL $0, CX
WORD $0x010f; BYTE $0xd0 //XGETBV
MOVL AX, eax+0(FP)
MOVL DX, edx+4(FP)
#endif
RET

23
vendor/github.com/templexxx/reedsolomon/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,23 @@
MIT License
Copyright (c) 2017 Templexxx
Copyright (c) 2015 Klaus Post
Copyright (c) 2015 Backblaze
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,193 @@
package main
import (
"flag"
"fmt"
"os"
)
var vects = flag.Uint64("vects", 20, "number of vects (data+parity)")
var data = flag.Uint64("data", 0, "number of data vects; keep it empty if you want to "+
"get the max num of inverse matrix")
func init() {
flag.Usage = func() {
fmt.Printf("Usage of %s:\n", os.Args[0])
fmt.Println(" cntinverse [-flags]")
fmt.Println(" Valid flags:")
flag.PrintDefaults()
}
}
func main() {
flag.Parse()
if *vects > 256 {
fmt.Println("Error: vects must <= 256")
os.Exit(1)
}
if *data == 0 {
n := getMAXCCombination(*vects)
fmt.Println("max num of inverse matrix :", n)
os.Exit(0)
}
n := getCCombination(*vects, *data)
fmt.Println("num of inverse matrix:", n)
os.Exit(0)
}
func getMAXCCombination(a uint64) uint64 {
b := a / 2 // proved in mathtool/combination.jpg
return getCCombination(a, b)
}
func getCCombination(a, b uint64) uint64 {
top := make([]uint64, a-b)
bottom := make([]uint64, a-b-1)
for i := b + 1; i <= a; i++ {
top[i-b-1] = i
}
var i uint64
for i = 2; i <= a-b; i++ {
bottom[i-2] = i
}
for j := 0; j <= 5; j++ {
cleanEven(top, bottom)
clean3(top, bottom)
clean5(top, bottom)
}
cleanCoffeRound1(top, bottom)
if maxBottomBigger5more1(bottom) {
top = shuffTop(top)
cleanCoffeRound1(top, bottom)
cleanCoffeRound1(bottom, top)
cleanCoffeRound1(top, bottom)
cleanCoffeRound1(bottom, top)
cleanCoffeRound1(top, bottom)
cleanCoffeRound1(bottom, top)
}
var topV, bottomV uint64 = 1, 1
for _, t := range top {
topV = topV * t
}
for _, b := range bottom {
bottomV = bottomV * b
}
return topV / bottomV
}
func cleanEven(top, bottom []uint64) {
for i, b := range bottom {
if even(b) {
for j, t := range top {
if even(t) {
top[j] = t / 2
bottom[i] = b / 2
break
}
}
}
}
}
func even(a uint64) bool {
return a&1 == 0
}
func clean3(top, bottom []uint64) {
for i, b := range bottom {
if mod3(b) {
for j, t := range top {
if mod3(t) {
top[j] = t / 3
bottom[i] = b / 3
break
}
}
}
}
}
func mod3(a uint64) bool {
c := a / 3
if 3*c == a {
return true
}
return false
}
func clean5(top, bottom []uint64) {
for i, b := range bottom {
if mod5(b) {
for j, t := range top {
if mod5(t) {
top[j] = t / 5
bottom[i] = b / 5
break
}
}
}
}
}
func mod5(a uint64) bool {
c := a / 5
if 5*c == a {
return true
}
return false
}
func maxBottomBigger5more1(bottom []uint64) bool {
cnt := 0
for _, b := range bottom {
if b >= 5 {
cnt++
}
}
if cnt >= 2 {
return true
}
return false
}
func cleanCoffeRound1(top, bottom []uint64) {
for i, b := range bottom {
for j, t := range top {
if isCoffe(b, t) {
top[j] = t / b
bottom[i] = 1
break
}
}
}
}
func isCoffe(b, t uint64) bool {
c := t / b
if c*b == t {
return true
}
return false
}
func shuffTop(top []uint64) []uint64 {
var tmp uint64 = 1
newLen := len(top) + 1
for i, t := range top {
if t <= 5 {
tmp = tmp * t
newLen--
top[i] = 1
}
}
topNew := make([]uint64, newLen)
topNew[0] = tmp
cnt := 1
for _, t := range top {
if t != 1 {
topNew[cnt] = t
cnt++
}
}
return topNew
}

View File

@@ -0,0 +1,270 @@
package main
import (
"bufio"
"fmt"
"log"
"os"
"strconv"
"strings"
)
// set deg here
const deg = 8 // <= 8
type polynomial [deg + 1]byte
func main() {
f, err := os.OpenFile("tables", os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
log.Fatalln(err)
}
defer f.Close()
outputWriter := bufio.NewWriter(f)
ps := genPrimitivePolynomial()
title := strconv.FormatInt(int64(deg), 10) + " degree primitive polynomial\n"
var pss string
for i, p := range ps {
pf := formatPolynomial(p)
pf = strconv.FormatInt(int64(i+1), 10) + ". " + pf + ";\n"
pss = pss + pf
}
body := fmt.Sprintf(title+"%v", pss)
outputWriter.WriteString(body)
//set primitive polynomial here to generator tables
//x^8+x^4+x^3+x^2+1
var primitivePolynomial polynomial
primitivePolynomial[0] = 1
primitivePolynomial[2] = 1
primitivePolynomial[3] = 1
primitivePolynomial[4] = 1
primitivePolynomial[8] = 1
lenExpTable := (1 << deg) - 1
expTable := genExpTable(primitivePolynomial, lenExpTable)
body = fmt.Sprintf("expTbl: %#v\n", expTable)
outputWriter.WriteString(body)
logTable := genLogTable(expTable)
body = fmt.Sprintf("logTbl: %#v\n", logTable)
outputWriter.WriteString(body)
mulTable := genMulTable(expTable, logTable)
body = fmt.Sprintf("mulTbl: %#v\n", mulTable)
outputWriter.WriteString(body)
lowTable, highTable := genMulTableHalf(mulTable)
body = fmt.Sprintf("lowTbl: %#v\n", lowTable)
outputWriter.WriteString(body)
body = fmt.Sprintf("highTbl: %#v\n", highTable)
outputWriter.WriteString(body)
var combTable [256][32]byte
for i := range combTable {
l := lowTable[i]
for j := 0; j < 16; j++ {
combTable[i][j] = l[j]
}
h := highTable[i][:]
for k := 16; k < 32; k++ {
combTable[i][k] = h[k-16]
}
}
body = fmt.Sprintf("lowhighTbl: %#v\n", combTable)
outputWriter.WriteString(body)
inverseTable := genInverseTable(mulTable)
body = fmt.Sprintf("inverseTbl: %#v\n", inverseTable)
outputWriter.WriteString(body)
outputWriter.Flush()
}
// generate primitive Polynomial
func genPrimitivePolynomial() []polynomial {
// drop Polynomial xso the constant term must be 1
// so there are 2^(deg-1) Polynomials
cnt := 1 << (deg - 1)
var polynomials []polynomial
var p polynomial
p[0] = 1
p[deg] = 1
// gen all Polynomials
for i := 0; i < cnt; i++ {
p = genPolynomial(p, 1)
polynomials = append(polynomials, p)
}
// drop Polynomial x+1, so the cnt of Polynomials is odd
var psRaw []polynomial
for _, p := range polynomials {
var n int
for _, v := range p {
if v == 1 {
n++
}
}
if n&1 != 0 {
psRaw = append(psRaw, p)
}
}
// order of primitive element == 2^deg -1 ?
var ps []polynomial
for _, p := range psRaw {
lenTable := (1 << deg) - 1
table := genExpTable(p, lenTable)
var numOf1 int
for _, v := range table {
// cnt 1 in ExpTable
if int(v) == 1 {
numOf1++
}
}
if numOf1 == 1 {
ps = append(ps, p)
}
}
return ps
}
func genPolynomial(p polynomial, i int) polynomial {
if p[i] == 0 {
p[i] = 1
} else {
p[i] = 0
i++
if i == deg {
return p
}
p = genPolynomial(p, i)
}
return p
}
func genExpTable(primitivePolynomial polynomial, exp int) []byte {
table := make([]byte, exp)
var rawPolynomial polynomial
rawPolynomial[1] = 1
table[0] = byte(1)
table[1] = byte(2)
for i := 2; i < exp; i++ {
rawPolynomial = expGrowPolynomial(rawPolynomial, primitivePolynomial)
table[i] = byte(getValueOfPolynomial(rawPolynomial))
}
return table
}
func expGrowPolynomial(raw, primitivePolynomial polynomial) polynomial {
var newP polynomial
for i, v := range raw[:deg] {
if v == 1 {
newP[i+1] = 1
}
}
if newP[deg] == 1 {
for i, v := range primitivePolynomial[:deg] {
if v == 1 {
if newP[i] == 1 {
newP[i] = 0
} else {
newP[i] = 1
}
}
}
}
newP[deg] = 0
return newP
}
func getValueOfPolynomial(p polynomial) uint8 {
var v uint8
for i, coefficient := range p[:deg] {
if coefficient != 0 {
add := 1 << uint8(i)
v += uint8(add)
}
}
return v
}
func genLogTable(expTable []byte) []byte {
table := make([]byte, (1 << deg))
//table[0] 无法由本原元的幂得到
table[0] = 0
for i, v := range expTable {
table[v] = byte(i)
}
return table
}
func genMulTable(expTable, logTable []byte) [256][256]byte {
var result [256][256]byte
for a := range result {
for b := range result[a] {
if a == 0 || b == 0 {
result[a][b] = 0
continue
}
logA := int(logTable[a])
logB := int(logTable[b])
logSum := logA + logB
for logSum >= 255 {
logSum -= 255
}
result[a][b] = expTable[logSum]
}
}
return result
}
func genMulTableHalf(mulTable [256][256]byte) (low [256][16]byte, high [256][16]byte) {
for a := range low {
for b := range low {
//result := 0
var result byte
if !(a == 0 || b == 0) {
//result = int(mulTable[a][b])
result = mulTable[a][b]
}
// b & 00001111, [0,15]
if (b & 0xf) == b {
low[a][b] = result
}
// b & 11110000, [240,255]
if (b & 0xf0) == b {
high[a][b>>4] = result
}
}
}
return
}
func genInverseTable(mulTable [256][256]byte) [256]byte {
var inVerseTable [256]byte
for i, t := range mulTable {
for j, v := range t {
if int(v) == 1 {
inVerseTable[i] = byte(j)
}
}
}
return inVerseTable
}
func formatPolynomial(p polynomial) string {
var ps string
for i := deg; i > 1; i-- {
if p[i] == 1 {
ps = ps + "x^" + strconv.FormatInt(int64(i), 10) + "+"
}
}
if p[1] == 1 {
ps = ps + "x+"
}
if p[0] == 1 {
ps = ps + "1"
} else {
strings.TrimSuffix(ps, "+")
}
return ps
}

156
vendor/github.com/templexxx/reedsolomon/matrix.go generated vendored Normal file
View File

@@ -0,0 +1,156 @@
package reedsolomon
import "errors"
type matrix []byte
func genEncMatrixCauchy(d, p int) matrix {
t := d + p
m := make([]byte, t*d)
for i := 0; i < d; i++ {
m[i*d+i] = byte(1)
}
d2 := d * d
for i := d; i < t; i++ {
for j := 0; j < d; j++ {
d := i ^ j
a := inverseTbl[d]
m[d2] = byte(a)
d2++
}
}
return m
}
func gfExp(b byte, n int) byte {
if n == 0 {
return 1
}
if b == 0 {
return 0
}
a := logTbl[b]
ret := int(a) * n
for ret >= 255 {
ret -= 255
}
return byte(expTbl[ret])
}
func genVandMatrix(vm []byte, t, d int) {
for i := 0; i < t; i++ {
for j := 0; j < d; j++ {
vm[i*d+j] = gfExp(byte(i), j)
}
}
}
func (m matrix) mul(right matrix, rows, cols int, r []byte) {
for i := 0; i < rows; i++ {
for j := 0; j < cols; j++ {
var v byte
for k := 0; k < cols; k++ {
v ^= gfMul(m[i*cols+k], right[k*cols+j])
}
r[i*cols+j] = v
}
}
}
func genEncMatrixVand(d, p int) (matrix, error) {
t := d + p
buf := make([]byte, (2*t+4*d)*d)
vm := buf[:t*d]
genVandMatrix(vm, t, d)
top := buf[t*d : (t+d)*d]
copy(top, vm[:d*d])
raw := buf[(t+d)*d : (t+3*d)*d]
im := buf[(t+3*d)*d : (t+4*d)*d]
err := matrix(top).invert(raw, d, im)
if err != nil {
return nil, err
}
r := buf[(t+4*d)*d : (2*t+4*d)*d]
matrix(vm).mul(im, t, d, r)
return matrix(r), nil
}
// [I|m'] -> [m']
func (m matrix) subMatrix(n int, r []byte) {
for i := 0; i < n; i++ {
off := i * n
copy(r[off:off+n], m[2*off+n:2*(off+n)])
}
}
func (m matrix) invert(raw matrix, n int, im []byte) error {
// [m] -> [m|I]
for i := 0; i < n; i++ {
t := i * n
copy(raw[2*t:2*t+n], m[t:t+n])
raw[2*t+i+n] = byte(1)
}
err := gauss(raw, n)
if err != nil {
return err
}
raw.subMatrix(n, im)
return nil
}
func (m matrix) swap(i, j, n int) {
for k := 0; k < n; k++ {
m[i*n+k], m[j*n+k] = m[j*n+k], m[i*n+k]
}
}
func gfMul(a, b byte) byte {
return mulTbl[a][b]
}
var errSingular = errors.New("rs.invert: matrix is singular")
// [m|I] -> [I|m']
func gauss(m matrix, n int) error {
n2 := 2 * n
for i := 0; i < n; i++ {
if m[i*n2+i] == 0 {
for j := i + 1; j < n; j++ {
if m[j*n2+i] != 0 {
m.swap(i, j, n2)
break
}
}
}
if m[i*n2+i] == 0 {
return errSingular
}
if m[i*n2+i] != 1 {
d := m[i*n2+i]
scale := inverseTbl[d]
for c := 0; c < n2; c++ {
m[i*n2+c] = gfMul(m[i*n2+c], scale)
}
}
for j := i + 1; j < n; j++ {
if m[j*n2+i] != 0 {
scale := m[j*n2+i]
for c := 0; c < n2; c++ {
m[j*n2+c] ^= gfMul(scale, m[i*n2+c])
}
}
}
}
for k := 0; k < n; k++ {
for j := 0; j < k; j++ {
if m[j*n2+k] != 0 {
scale := m[j*n2+k]
for c := 0; c < n2; c++ {
m[j*n2+c] ^= gfMul(scale, m[k*n2+c])
}
}
}
}
return nil
}

280
vendor/github.com/templexxx/reedsolomon/rs.go generated vendored Normal file
View File

@@ -0,0 +1,280 @@
/*
Reed-Solomon Codes over GF(2^8)
Primitive Polynomial: x^8+x^4+x^3+x^2+1
Galois Filed arithmetic using Intel SIMD instructions (AVX2 or SSSE3)
*/
package reedsolomon
import "errors"
// Encoder implements for Reed-Solomon Encoding/Reconstructing
type Encoder interface {
// Encode multiply generator-matrix with data
// len(vects) must be equal with num of data+parity
Encode(vects [][]byte) error
// Result of reconst will be put into origin position of vects
// it means if you lost vects[0], after reconst the vects[0]'s data will be back in vects[0]
// Reconstruct repair lost data & parity
// Set vect nil if lost
Reconstruct(vects [][]byte) error
// Reconstruct repair lost data
// Set vect nil if lost
ReconstructData(vects [][]byte) error
// ReconstWithPos repair lost data&parity with has&lost vects position
// Save bandwidth&disk I/O (cmp with Reconstruct, if the lost is less than num of parity)
// As erasure codes, we must know which vect is broken,
// so it's necessary to provide such APIs
// len(has) must equal num of data vects
// Example:
// in 3+2, the whole position: [0,1,2,3,4]
// if lost vects[0]
// the "has" could be [1,2,3] or [1,2,4] or ...
// then you must be sure that vects[1] vects[2] vects[3] have correct data (if the "has" is [1,2,3])
// the "dLost" will be [0]
// ps:
// 1. the above lists are in increasing orders TODO support out-of-order
// 2. each vect has same len, don't set it nil
// so we don't need to make slice
ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error
//// ReconstWithPos repair lost data with survived&lost vects position
//// Don't need to append position of parity lost into "lost"
ReconstDataWithPos(vects [][]byte, has, dLost []int) error
}
func checkCfg(d, p int) error {
if (d <= 0) || (p <= 0) {
return errors.New("rs.New: data or parity <= 0")
}
if d+p >= 256 {
return errors.New("rs.New: data+parity >= 256")
}
return nil
}
// New create an Encoder (vandermonde matrix as Encoding matrix)
func New(data, parity int) (enc Encoder, err error) {
err = checkCfg(data, parity)
if err != nil {
return
}
e, err := genEncMatrixVand(data, parity)
if err != nil {
return
}
return newRS(data, parity, e), nil
}
// NewCauchy create an Encoder (cauchy matrix as Generator Matrix)
func NewCauchy(data, parity int) (enc Encoder, err error) {
err = checkCfg(data, parity)
if err != nil {
return
}
e := genEncMatrixCauchy(data, parity)
return newRS(data, parity, e), nil
}
type encBase struct {
data int
parity int
encode []byte
gen []byte
}
func checkEnc(d, p int, vs [][]byte) (size int, err error) {
total := len(vs)
if d+p != total {
err = errors.New("rs.checkER: vects not match rs args")
return
}
size = len(vs[0])
if size == 0 {
err = errors.New("rs.checkER: vects size = 0")
return
}
for i := 1; i < total; i++ {
if len(vs[i]) != size {
err = errors.New("rs.checkER: vects size mismatch")
return
}
}
return
}
func (e *encBase) Encode(vects [][]byte) (err error) {
d := e.data
p := e.parity
_, err = checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i], pv[j])
} else {
mulVect(g[j*d], dv[0], pv[j])
}
}
}
return
}
func mulVect(c byte, a, b []byte) {
t := mulTbl[c]
for i := 0; i < len(a); i++ {
b[i] = t[a[i]]
}
}
func mulVectAdd(c byte, a, b []byte) {
t := mulTbl[c]
for i := 0; i < len(a); i++ {
b[i] ^= t[a[i]]
}
}
func (e *encBase) Reconstruct(vects [][]byte) (err error) {
return e.reconstruct(vects, false)
}
func (e *encBase) ReconstructData(vects [][]byte) (err error) {
return e.reconstruct(vects, true)
}
func (e *encBase) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
return e.reconstWithPos(vects, has, dLost, pLost, false)
}
func (e *encBase) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
return e.reconstWithPos(vects, has, dLost, nil, true)
}
func (e *encBase) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
em := e.encode
dCnt := len(dLost)
size := len(vects[has[0]])
if dCnt != 0 {
vtmp := make([][]byte, d+dCnt)
for i, p := range has {
vtmp[i] = vects[p]
}
for i, p := range dLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
matrixbuf := make([]byte, 4*d*d+dCnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return err2
}
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
etmp := &encBase{data: d, parity: dCnt, gen: g}
err2 = etmp.Encode(vtmp[:d+dCnt])
if err2 != nil {
return err2
}
}
if dataOnly {
return
}
pCnt := len(pLost)
if pCnt != 0 {
vtmp := make([][]byte, d+pCnt)
g := make([]byte, pCnt*d)
for i, l := range pLost {
copy(g[i*d:i*d+d], em[l*d:l*d+d])
}
for i := 0; i < d; i++ {
vtmp[i] = vects[i]
}
for i, p := range pLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
etmp := &encBase{data: d, parity: pCnt, gen: g}
err2 := etmp.Encode(vtmp[:d+pCnt])
if err2 != nil {
return err2
}
}
return
}
func (e *encBase) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
p := e.parity
// TODO check more, maybe element in has show in lost & deal with len(has) > d
if len(has) != d {
return errors.New("rs.Reconst: not enough vects")
}
dCnt := len(dLost)
if dCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
pCnt := len(pLost)
if pCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
return e.reconst(vects, has, dLost, pLost, dataOnly)
}
func (e *encBase) reconstruct(vects [][]byte, dataOnly bool) (err error) {
d := e.data
p := e.parity
t := d + p
listBuf := make([]int, t+p)
has := listBuf[:d]
dLost := listBuf[d:t]
pLost := listBuf[t : t+p]
hasCnt, dCnt, pCnt := 0, 0, 0
for i := 0; i < t; i++ {
if vects[i] != nil {
if hasCnt < d {
has[hasCnt] = i
hasCnt++
}
} else {
if i < d {
if dCnt < p {
dLost[dCnt] = i
dCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
} else {
if pCnt < p {
pLost[pCnt] = i
pCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
}
}
}
if hasCnt != d {
return errors.New("rs.Reconst: not enough vects")
}
dLost = dLost[:dCnt]
pLost = pLost[:pCnt]
return e.reconst(vects, has, dLost, pLost, dataOnly)
}

868
vendor/github.com/templexxx/reedsolomon/rs_amd64.go generated vendored Normal file
View File

@@ -0,0 +1,868 @@
package reedsolomon
import (
"errors"
"sync"
"github.com/templexxx/cpufeat"
)
// SIMD Instruction Extensions
const (
none = iota
avx2
ssse3
)
var extension = none
func init() {
getEXT()
}
func getEXT() {
if cpufeat.X86.HasAVX2 {
extension = avx2
return
} else if cpufeat.X86.HasSSSE3 {
extension = ssse3
return
} else {
extension = none
return
}
}
//go:noescape
func copy32B(dst, src []byte) // Need SSE2(introduced in 2001)
func initTbl(g matrix, rows, cols int, tbl []byte) {
off := 0
for i := 0; i < cols; i++ {
for j := 0; j < rows; j++ {
c := g[j*cols+i]
t := lowhighTbl[c][:]
copy32B(tbl[off:off+32], t)
off += 32
}
}
}
// At most 3060 inverse matrix (when data=14, parity=4, calc by mathtool/cntinverse)
// In practice, data usually below 12, parity below 5
func okCache(data, parity int) bool {
if data < 15 && parity < 5 { // you can change it, but the data+parity can't be bigger than 32 (tips: see the codes about make inverse matrix)
return true
}
return false
}
type (
encSSSE3 encSIMD
encAVX2 encSIMD
encSIMD struct {
data int
parity int
encode matrix
gen matrix
tbl []byte
// inverse matrix cache is design for small vect size ( < 4KB )
// it will save time for calculating inverse matrix
// but it's not so important for big vect size
enableCache bool
inverseCache iCache
}
iCache struct {
sync.RWMutex
data map[uint32][]byte
}
)
func newRS(d, p int, em matrix) (enc Encoder) {
g := em[d*d:]
if extension == none {
return &encBase{data: d, parity: p, encode: em, gen: g}
}
t := make([]byte, d*p*32)
initTbl(g, p, d, t)
ok := okCache(d, p)
if extension == avx2 {
e := &encAVX2{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
inverseCache: iCache{data: make(map[uint32][]byte)}}
return e
}
e := &encSSSE3{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
inverseCache: iCache{data: make(map[uint32][]byte)}}
return e
}
// Size of sub-vector
const unit int = 16 * 1024
func getDo(n int) int {
if n < unit {
c := n >> 4
if c == 0 {
return unit
}
return c << 4
}
return unit
}
func (e *encAVX2) Encode(vects [][]byte) (err error) {
d := e.data
p := e.parity
size, err := checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
start, end := 0, 0
do := getDo(size)
for start < size {
end = start + do
if end <= size {
e.matrixMul(start, end, dv, pv)
start = end
} else {
e.matrixMulRemain(start, size, dv, pv)
start = size
}
}
return
}
//go:noescape
func mulVectAVX2(tbl, d, p []byte)
//go:noescape
func mulVectAddAVX2(tbl, d, p []byte)
func (e *encAVX2) matrixMul(start, end int, dv, pv [][]byte) {
d := e.data
p := e.parity
tbl := e.tbl
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
} else {
mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
}
off += 32
}
}
}
func (e *encAVX2) matrixMulRemain(start, end int, dv, pv [][]byte) {
undone := end - start
do := (undone >> 4) << 4
d := e.data
p := e.parity
tbl := e.tbl
if do >= 16 {
end2 := start + do
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
} else {
mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
}
off += 32
}
}
start = end
}
if undone > do {
// may recalculate some data, but still improve a lot
start2 := end - 16
if start2 >= 0 {
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
} else {
mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
}
off += 32
}
}
} else {
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
} else {
mulVect(g[j*d], dv[0][start:], pv[j][start:])
}
}
}
}
}
}
// use generator-matrix but not tbls for encoding
// it's design for reconstructing
// for small vects, it cost to much time on initTbl, so drop it
// and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
func (e *encAVX2) encodeGen(vects [][]byte) (err error) {
d := e.data
p := e.parity
size, err := checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
start, end := 0, 0
do := getDo(size)
for start < size {
end = start + do
if end <= size {
e.matrixMulGen(start, end, dv, pv)
start = end
} else {
e.matrixMulRemainGen(start, size, dv, pv)
start = size
}
}
return
}
func (e *encAVX2) matrixMulGen(start, end int, dv, pv [][]byte) {
d := e.data
p := e.parity
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
} else {
mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
}
}
}
}
func (e *encAVX2) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
undone := end - start
do := (undone >> 4) << 4
d := e.data
p := e.parity
g := e.gen
if do >= 16 {
end2 := start + do
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
} else {
mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
}
}
}
start = end
}
if undone > do {
start2 := end - 16
if start2 >= 0 {
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
} else {
mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
}
}
}
} else {
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
} else {
mulVect(g[j*d], dv[0][start:], pv[j][start:])
}
}
}
}
}
}
func (e *encAVX2) Reconstruct(vects [][]byte) (err error) {
return e.reconstruct(vects, false)
}
func (e *encAVX2) ReconstructData(vects [][]byte) (err error) {
return e.reconstruct(vects, true)
}
func (e *encAVX2) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
return e.reconstWithPos(vects, has, dLost, pLost, false)
}
func (e *encAVX2) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
return e.reconstWithPos(vects, has, dLost, nil, true)
}
func (e *encAVX2) makeGen(has, dLost []int) (gen []byte, err error) {
d := e.data
em := e.encode
cnt := len(dLost)
if !e.enableCache {
matrixbuf := make([]byte, 4*d*d+cnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return nil, err2
}
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
return g, nil
}
var ikey uint32
for _, p := range has {
ikey += 1 << uint8(p)
}
e.inverseCache.RLock()
v, ok := e.inverseCache.data[ikey]
if ok {
im := v
g := make([]byte, cnt*d)
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
e.inverseCache.RUnlock()
return g, nil
}
e.inverseCache.RUnlock()
matrixbuf := make([]byte, 4*d*d+cnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return nil, err2
}
e.inverseCache.Lock()
e.inverseCache.data[ikey] = im
e.inverseCache.Unlock()
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
return g, nil
}
func (e *encAVX2) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
em := e.encode
dCnt := len(dLost)
size := len(vects[has[0]])
if dCnt != 0 {
vtmp := make([][]byte, d+dCnt)
for i, p := range has {
vtmp[i] = vects[p]
}
for i, p := range dLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
g, err2 := e.makeGen(has, dLost)
if err2 != nil {
return
}
etmp := &encAVX2{data: d, parity: dCnt, gen: g}
err2 = etmp.encodeGen(vtmp)
if err2 != nil {
return err2
}
}
if dataOnly {
return
}
pCnt := len(pLost)
if pCnt != 0 {
g := make([]byte, pCnt*d)
for i, l := range pLost {
copy(g[i*d:i*d+d], em[l*d:l*d+d])
}
vtmp := make([][]byte, d+pCnt)
for i := 0; i < d; i++ {
vtmp[i] = vects[i]
}
for i, p := range pLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
etmp := &encAVX2{data: d, parity: pCnt, gen: g}
err2 := etmp.encodeGen(vtmp)
if err2 != nil {
return err2
}
}
return
}
func (e *encAVX2) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
p := e.parity
if len(has) != d {
return errors.New("rs.Reconst: not enough vects")
}
dCnt := len(dLost)
if dCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
pCnt := len(pLost)
if pCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
return e.reconst(vects, has, dLost, pLost, dataOnly)
}
func (e *encAVX2) reconstruct(vects [][]byte, dataOnly bool) (err error) {
d := e.data
p := e.parity
t := d + p
listBuf := make([]int, t+p)
has := listBuf[:d]
dLost := listBuf[d:t]
pLost := listBuf[t : t+p]
hasCnt, dCnt, pCnt := 0, 0, 0
for i := 0; i < t; i++ {
if vects[i] != nil {
if hasCnt < d {
has[hasCnt] = i
hasCnt++
}
} else {
if i < d {
if dCnt < p {
dLost[dCnt] = i
dCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
} else {
if pCnt < p {
pLost[pCnt] = i
pCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
}
}
}
if hasCnt != d {
return errors.New("rs.Reconst: not enough vects")
}
dLost = dLost[:dCnt]
pLost = pLost[:pCnt]
return e.reconst(vects, has, dLost, pLost, dataOnly)
}
func (e *encSSSE3) Encode(vects [][]byte) (err error) {
d := e.data
p := e.parity
size, err := checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
start, end := 0, 0
do := getDo(size)
for start < size {
end = start + do
if end <= size {
e.matrixMul(start, end, dv, pv)
start = end
} else {
e.matrixMulRemain(start, size, dv, pv)
start = size
}
}
return
}
//go:noescape
func mulVectSSSE3(tbl, d, p []byte)
//go:noescape
func mulVectAddSSSE3(tbl, d, p []byte)
func (e *encSSSE3) matrixMul(start, end int, dv, pv [][]byte) {
d := e.data
p := e.parity
tbl := e.tbl
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
} else {
mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
}
off += 32
}
}
}
func (e *encSSSE3) matrixMulRemain(start, end int, dv, pv [][]byte) {
undone := end - start
do := (undone >> 4) << 4
d := e.data
p := e.parity
tbl := e.tbl
if do >= 16 {
end2 := start + do
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
} else {
mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
}
off += 32
}
}
start = end
}
if undone > do {
start2 := end - 16
if start2 >= 0 {
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
} else {
mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
}
off += 32
}
}
} else {
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
} else {
mulVect(g[j*d], dv[0][start:], pv[j][start:])
}
}
}
}
}
}
// use generator-matrix but not tbls for encoding
// it's design for reconstructing
// for small vects, it cost to much time on initTbl, so drop it
// and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
func (e *encSSSE3) encodeGen(vects [][]byte) (err error) {
d := e.data
p := e.parity
size, err := checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
start, end := 0, 0
do := getDo(size)
for start < size {
end = start + do
if end <= size {
e.matrixMulGen(start, end, dv, pv)
start = end
} else {
e.matrixMulRemainGen(start, size, dv, pv)
start = size
}
}
return
}
func (e *encSSSE3) matrixMulGen(start, end int, dv, pv [][]byte) {
d := e.data
p := e.parity
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
} else {
mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
}
}
}
}
func (e *encSSSE3) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
undone := end - start
do := (undone >> 4) << 4
d := e.data
p := e.parity
g := e.gen
if do >= 16 {
end2 := start + do
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
} else {
mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
}
}
}
start = end
}
if undone > do {
start2 := end - 16
if start2 >= 0 {
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
} else {
mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
}
}
}
} else {
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
} else {
mulVect(g[j*d], dv[0][start:], pv[j][start:])
}
}
}
}
}
}
func (e *encSSSE3) Reconstruct(vects [][]byte) (err error) {
return e.reconstruct(vects, false)
}
func (e *encSSSE3) ReconstructData(vects [][]byte) (err error) {
return e.reconstruct(vects, true)
}
func (e *encSSSE3) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
return e.reconstWithPos(vects, has, dLost, pLost, false)
}
func (e *encSSSE3) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
return e.reconstWithPos(vects, has, dLost, nil, true)
}
func (e *encSSSE3) makeGen(has, dLost []int) (gen []byte, err error) {
d := e.data
em := e.encode
cnt := len(dLost)
if !e.enableCache {
matrixbuf := make([]byte, 4*d*d+cnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return nil, err2
}
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
return g, nil
}
var ikey uint32
for _, p := range has {
ikey += 1 << uint8(p)
}
e.inverseCache.RLock()
v, ok := e.inverseCache.data[ikey]
if ok {
im := v
g := make([]byte, cnt*d)
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
e.inverseCache.RUnlock()
return g, nil
}
e.inverseCache.RUnlock()
matrixbuf := make([]byte, 4*d*d+cnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return nil, err2
}
e.inverseCache.Lock()
e.inverseCache.data[ikey] = im
e.inverseCache.Unlock()
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
return g, nil
}
func (e *encSSSE3) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
em := e.encode
dCnt := len(dLost)
size := len(vects[has[0]])
if dCnt != 0 {
vtmp := make([][]byte, d+dCnt)
for i, p := range has {
vtmp[i] = vects[p]
}
for i, p := range dLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
g, err2 := e.makeGen(has, dLost)
if err2 != nil {
return
}
etmp := &encSSSE3{data: d, parity: dCnt, gen: g}
err2 = etmp.encodeGen(vtmp)
if err2 != nil {
return err2
}
}
if dataOnly {
return
}
pCnt := len(pLost)
if pCnt != 0 {
g := make([]byte, pCnt*d)
for i, l := range pLost {
copy(g[i*d:i*d+d], em[l*d:l*d+d])
}
vtmp := make([][]byte, d+pCnt)
for i := 0; i < d; i++ {
vtmp[i] = vects[i]
}
for i, p := range pLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
etmp := &encSSSE3{data: d, parity: pCnt, gen: g}
err2 := etmp.encodeGen(vtmp)
if err2 != nil {
return err2
}
}
return
}
func (e *encSSSE3) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
p := e.parity
if len(has) != d {
return errors.New("rs.Reconst: not enough vects")
}
dCnt := len(dLost)
if dCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
pCnt := len(pLost)
if pCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
return e.reconst(vects, has, dLost, pLost, dataOnly)
}
func (e *encSSSE3) reconstruct(vects [][]byte, dataOnly bool) (err error) {
d := e.data
p := e.parity
t := d + p
listBuf := make([]int, t+p)
has := listBuf[:d]
dLost := listBuf[d:t]
pLost := listBuf[t : t+p]
hasCnt, dCnt, pCnt := 0, 0, 0
for i := 0; i < t; i++ {
if vects[i] != nil {
if hasCnt < d {
has[hasCnt] = i
hasCnt++
}
} else {
if i < d {
if dCnt < p {
dLost[dCnt] = i
dCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
} else {
if pCnt < p {
pLost[pCnt] = i
pCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
}
}
}
if hasCnt != d {
return errors.New("rs.Reconst: not enough vects")
}
dLost = dLost[:dCnt]
pLost = pLost[:pCnt]
return e.reconst(vects, has, dLost, pLost, dataOnly)
}

401
vendor/github.com/templexxx/reedsolomon/rs_amd64.s generated vendored Normal file
View File

@@ -0,0 +1,401 @@
// Reference: www.ssrc.ucsc.edu/Papers/plank-fast13.pdf
#include "textflag.h"
#define low_tbl Y0
#define high_tbl Y1
#define mask Y2
#define in0 Y3
#define in1 Y4
#define in2 Y5
#define in3 Y6
#define in4 Y7
#define in5 Y8
#define in0_h Y10
#define in1_h Y11
#define in2_h Y12
#define in3_h Y13
#define in4_h Y14
#define in5_h Y15
#define in BX
#define out DI
#define len R8
#define pos R9
#define tmp0 R10
#define low_tblx X0
#define high_tblx X1
#define maskx X2
#define in0x X3
#define in0_hx X10
#define tmp0x X9
#define tmp1x X11
#define tmp2x X12
#define tmp3x X13
// func mulVectAVX2(tbl, d, p []byte)
TEXT ·mulVectAVX2(SB), NOSPLIT, $0
MOVQ i+24(FP), in
MOVQ o+48(FP), out
MOVQ tbl+0(FP), tmp0
VMOVDQU (tmp0), low_tblx
VMOVDQU 16(tmp0), high_tblx
MOVB $0x0f, DX
LONG $0x2069e3c4; WORD $0x00d2 // VPINSRB $0x00, EDX, XMM2, XMM2
VPBROADCASTB maskx, maskx
MOVQ in_len+32(FP), len
TESTQ $31, len
JNZ one16b
ymm:
VINSERTI128 $1, low_tblx, low_tbl, low_tbl
VINSERTI128 $1, high_tblx, high_tbl, high_tbl
VINSERTI128 $1, maskx, mask, mask
TESTQ $255, len
JNZ not_aligned
// 256bytes/loop
aligned:
MOVQ $0, pos
loop256b:
VMOVDQU (in)(pos*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VMOVDQU in0, (out)(pos*1)
VMOVDQU 32(in)(pos*1), in1
VPSRLQ $4, in1, in1_h
VPAND mask, in1_h, in1_h
VPAND mask, in1, in1
VPSHUFB in1_h, high_tbl, in1_h
VPSHUFB in1, low_tbl, in1
VPXOR in1, in1_h, in1
VMOVDQU in1, 32(out)(pos*1)
VMOVDQU 64(in)(pos*1), in2
VPSRLQ $4, in2, in2_h
VPAND mask, in2_h, in2_h
VPAND mask, in2, in2
VPSHUFB in2_h, high_tbl, in2_h
VPSHUFB in2, low_tbl, in2
VPXOR in2, in2_h, in2
VMOVDQU in2, 64(out)(pos*1)
VMOVDQU 96(in)(pos*1), in3
VPSRLQ $4, in3, in3_h
VPAND mask, in3_h, in3_h
VPAND mask, in3, in3
VPSHUFB in3_h, high_tbl, in3_h
VPSHUFB in3, low_tbl, in3
VPXOR in3, in3_h, in3
VMOVDQU in3, 96(out)(pos*1)
VMOVDQU 128(in)(pos*1), in4
VPSRLQ $4, in4, in4_h
VPAND mask, in4_h, in4_h
VPAND mask, in4, in4
VPSHUFB in4_h, high_tbl, in4_h
VPSHUFB in4, low_tbl, in4
VPXOR in4, in4_h, in4
VMOVDQU in4, 128(out)(pos*1)
VMOVDQU 160(in)(pos*1), in5
VPSRLQ $4, in5, in5_h
VPAND mask, in5_h, in5_h
VPAND mask, in5, in5
VPSHUFB in5_h, high_tbl, in5_h
VPSHUFB in5, low_tbl, in5
VPXOR in5, in5_h, in5
VMOVDQU in5, 160(out)(pos*1)
VMOVDQU 192(in)(pos*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VMOVDQU in0, 192(out)(pos*1)
VMOVDQU 224(in)(pos*1), in1
VPSRLQ $4, in1, in1_h
VPAND mask, in1_h, in1_h
VPAND mask, in1, in1
VPSHUFB in1_h, high_tbl, in1_h
VPSHUFB in1, low_tbl, in1
VPXOR in1, in1_h, in1
VMOVDQU in1, 224(out)(pos*1)
ADDQ $256, pos
CMPQ len, pos
JNE loop256b
VZEROUPPER
RET
not_aligned:
MOVQ len, tmp0
ANDQ $255, tmp0
loop32b:
VMOVDQU -32(in)(len*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VMOVDQU in0, -32(out)(len*1)
SUBQ $32, len
SUBQ $32, tmp0
JG loop32b
CMPQ len, $256
JGE aligned
VZEROUPPER
RET
one16b:
VMOVDQU -16(in)(len*1), in0x
VPSRLQ $4, in0x, in0_hx
VPAND maskx, in0x, in0x
VPAND maskx, in0_hx, in0_hx
VPSHUFB in0_hx, high_tblx, in0_hx
VPSHUFB in0x, low_tblx, in0x
VPXOR in0x, in0_hx, in0x
VMOVDQU in0x, -16(out)(len*1)
SUBQ $16, len
CMPQ len, $0
JNE ymm
RET
// func mulVectAddAVX2(tbl, d, p []byte)
TEXT ·mulVectAddAVX2(SB), NOSPLIT, $0
MOVQ i+24(FP), in
MOVQ o+48(FP), out
MOVQ tbl+0(FP), tmp0
VMOVDQU (tmp0), low_tblx
VMOVDQU 16(tmp0), high_tblx
MOVB $0x0f, DX
LONG $0x2069e3c4; WORD $0x00d2
VPBROADCASTB maskx, maskx
MOVQ in_len+32(FP), len
TESTQ $31, len
JNZ one16b
ymm:
VINSERTI128 $1, low_tblx, low_tbl, low_tbl
VINSERTI128 $1, high_tblx, high_tbl, high_tbl
VINSERTI128 $1, maskx, mask, mask
TESTQ $255, len
JNZ not_aligned
aligned:
MOVQ $0, pos
loop256b:
VMOVDQU (in)(pos*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VPXOR (out)(pos*1), in0, in0
VMOVDQU in0, (out)(pos*1)
VMOVDQU 32(in)(pos*1), in1
VPSRLQ $4, in1, in1_h
VPAND mask, in1_h, in1_h
VPAND mask, in1, in1
VPSHUFB in1_h, high_tbl, in1_h
VPSHUFB in1, low_tbl, in1
VPXOR in1, in1_h, in1
VPXOR 32(out)(pos*1), in1, in1
VMOVDQU in1, 32(out)(pos*1)
VMOVDQU 64(in)(pos*1), in2
VPSRLQ $4, in2, in2_h
VPAND mask, in2_h, in2_h
VPAND mask, in2, in2
VPSHUFB in2_h, high_tbl, in2_h
VPSHUFB in2, low_tbl, in2
VPXOR in2, in2_h, in2
VPXOR 64(out)(pos*1), in2, in2
VMOVDQU in2, 64(out)(pos*1)
VMOVDQU 96(in)(pos*1), in3
VPSRLQ $4, in3, in3_h
VPAND mask, in3_h, in3_h
VPAND mask, in3, in3
VPSHUFB in3_h, high_tbl, in3_h
VPSHUFB in3, low_tbl, in3
VPXOR in3, in3_h, in3
VPXOR 96(out)(pos*1), in3, in3
VMOVDQU in3, 96(out)(pos*1)
VMOVDQU 128(in)(pos*1), in4
VPSRLQ $4, in4, in4_h
VPAND mask, in4_h, in4_h
VPAND mask, in4, in4
VPSHUFB in4_h, high_tbl, in4_h
VPSHUFB in4, low_tbl, in4
VPXOR in4, in4_h, in4
VPXOR 128(out)(pos*1), in4, in4
VMOVDQU in4, 128(out)(pos*1)
VMOVDQU 160(in)(pos*1), in5
VPSRLQ $4, in5, in5_h
VPAND mask, in5_h, in5_h
VPAND mask, in5, in5
VPSHUFB in5_h, high_tbl, in5_h
VPSHUFB in5, low_tbl, in5
VPXOR in5, in5_h, in5
VPXOR 160(out)(pos*1), in5, in5
VMOVDQU in5, 160(out)(pos*1)
VMOVDQU 192(in)(pos*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VPXOR 192(out)(pos*1), in0, in0
VMOVDQU in0, 192(out)(pos*1)
VMOVDQU 224(in)(pos*1), in1
VPSRLQ $4, in1, in1_h
VPAND mask, in1_h, in1_h
VPAND mask, in1, in1
VPSHUFB in1_h, high_tbl, in1_h
VPSHUFB in1, low_tbl, in1
VPXOR in1, in1_h, in1
VPXOR 224(out)(pos*1), in1, in1
VMOVDQU in1, 224(out)(pos*1)
ADDQ $256, pos
CMPQ len, pos
JNE loop256b
VZEROUPPER
RET
not_aligned:
MOVQ len, tmp0
ANDQ $255, tmp0
loop32b:
VMOVDQU -32(in)(len*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VPXOR -32(out)(len*1), in0, in0
VMOVDQU in0, -32(out)(len*1)
SUBQ $32, len
SUBQ $32, tmp0
JG loop32b
CMPQ len, $256
JGE aligned
VZEROUPPER
RET
one16b:
VMOVDQU -16(in)(len*1), in0x
VPSRLQ $4, in0x, in0_hx
VPAND maskx, in0x, in0x
VPAND maskx, in0_hx, in0_hx
VPSHUFB in0_hx, high_tblx, in0_hx
VPSHUFB in0x, low_tblx, in0x
VPXOR in0x, in0_hx, in0x
VPXOR -16(out)(len*1), in0x, in0x
VMOVDQU in0x, -16(out)(len*1)
SUBQ $16, len
CMPQ len, $0
JNE ymm
RET
// func mulVectSSSE3(tbl, d, p []byte)
TEXT ·mulVectSSSE3(SB), NOSPLIT, $0
MOVQ i+24(FP), in
MOVQ o+48(FP), out
MOVQ tbl+0(FP), tmp0
MOVOU (tmp0), low_tblx
MOVOU 16(tmp0), high_tblx
MOVB $15, tmp0
MOVQ tmp0, maskx
PXOR tmp0x, tmp0x
PSHUFB tmp0x, maskx
MOVQ in_len+32(FP), len
SHRQ $4, len
loop:
MOVOU (in), in0x
MOVOU in0x, in0_hx
PSRLQ $4, in0_hx
PAND maskx, in0x
PAND maskx, in0_hx
MOVOU low_tblx, tmp1x
MOVOU high_tblx, tmp2x
PSHUFB in0x, tmp1x
PSHUFB in0_hx, tmp2x
PXOR tmp1x, tmp2x
MOVOU tmp2x, (out)
ADDQ $16, in
ADDQ $16, out
SUBQ $1, len
JNZ loop
RET
// func mulVectAddSSSE3(tbl, d, p []byte)
TEXT ·mulVectAddSSSE3(SB), NOSPLIT, $0
MOVQ i+24(FP), in
MOVQ o+48(FP), out
MOVQ tbl+0(FP), tmp0
MOVOU (tmp0), low_tblx
MOVOU 16(tmp0), high_tblx
MOVB $15, tmp0
MOVQ tmp0, maskx
PXOR tmp0x, tmp0x
PSHUFB tmp0x, maskx
MOVQ in_len+32(FP), len
SHRQ $4, len
loop:
MOVOU (in), in0x
MOVOU in0x, in0_hx
PSRLQ $4, in0_hx
PAND maskx, in0x
PAND maskx, in0_hx
MOVOU low_tblx, tmp1x
MOVOU high_tblx, tmp2x
PSHUFB in0x, tmp1x
PSHUFB in0_hx, tmp2x
PXOR tmp1x, tmp2x
MOVOU (out), tmp3x
PXOR tmp3x, tmp2x
MOVOU tmp2x, (out)
ADDQ $16, in
ADDQ $16, out
SUBQ $1, len
JNZ loop
RET
// func copy32B(dst, src []byte)
TEXT ·copy32B(SB), NOSPLIT, $0
MOVQ dst+0(FP), SI
MOVQ src+24(FP), DX
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU X0, (SI)
MOVOU X1, 16(SI)
RET

8
vendor/github.com/templexxx/reedsolomon/rs_other.go generated vendored Normal file
View File

@@ -0,0 +1,8 @@
// +build !amd64
package reedsolomon
func newRS(d, p int, em matrix) (enc Encoder) {
g := em[d*d:]
return &encBase{data: d, parity: p, encode: em, gen: g}
}

44
vendor/github.com/templexxx/reedsolomon/tbl.go generated vendored Normal file

File diff suppressed because one or more lines are too long