vendor: Update everything
GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4620
This commit is contained in:
23
vendor/github.com/templexxx/reedsolomon/LICENSE
generated
vendored
23
vendor/github.com/templexxx/reedsolomon/LICENSE
generated
vendored
@@ -1,23 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2017 Templexxx
|
||||
Copyright (c) 2015 Klaus Post
|
||||
Copyright (c) 2015 Backblaze
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
193
vendor/github.com/templexxx/reedsolomon/mathtool/cntinverse.go
generated
vendored
193
vendor/github.com/templexxx/reedsolomon/mathtool/cntinverse.go
generated
vendored
@@ -1,193 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
var vects = flag.Uint64("vects", 20, "number of vects (data+parity)")
|
||||
var data = flag.Uint64("data", 0, "number of data vects; keep it empty if you want to "+
|
||||
"get the max num of inverse matrix")
|
||||
|
||||
func init() {
|
||||
flag.Usage = func() {
|
||||
fmt.Printf("Usage of %s:\n", os.Args[0])
|
||||
fmt.Println(" cntinverse [-flags]")
|
||||
fmt.Println(" Valid flags:")
|
||||
flag.PrintDefaults()
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
if *vects > 256 {
|
||||
fmt.Println("Error: vects must <= 256")
|
||||
os.Exit(1)
|
||||
}
|
||||
if *data == 0 {
|
||||
n := getMAXCCombination(*vects)
|
||||
fmt.Println("max num of inverse matrix :", n)
|
||||
os.Exit(0)
|
||||
}
|
||||
n := getCCombination(*vects, *data)
|
||||
fmt.Println("num of inverse matrix:", n)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
func getMAXCCombination(a uint64) uint64 {
|
||||
b := a / 2 // proved in mathtool/combination.jpg
|
||||
return getCCombination(a, b)
|
||||
}
|
||||
|
||||
func getCCombination(a, b uint64) uint64 {
|
||||
top := make([]uint64, a-b)
|
||||
bottom := make([]uint64, a-b-1)
|
||||
for i := b + 1; i <= a; i++ {
|
||||
top[i-b-1] = i
|
||||
}
|
||||
var i uint64
|
||||
for i = 2; i <= a-b; i++ {
|
||||
bottom[i-2] = i
|
||||
}
|
||||
for j := 0; j <= 5; j++ {
|
||||
cleanEven(top, bottom)
|
||||
clean3(top, bottom)
|
||||
clean5(top, bottom)
|
||||
}
|
||||
cleanCoffeRound1(top, bottom)
|
||||
if maxBottomBigger5more1(bottom) {
|
||||
top = shuffTop(top)
|
||||
cleanCoffeRound1(top, bottom)
|
||||
cleanCoffeRound1(bottom, top)
|
||||
cleanCoffeRound1(top, bottom)
|
||||
cleanCoffeRound1(bottom, top)
|
||||
cleanCoffeRound1(top, bottom)
|
||||
cleanCoffeRound1(bottom, top)
|
||||
}
|
||||
var topV, bottomV uint64 = 1, 1
|
||||
for _, t := range top {
|
||||
topV = topV * t
|
||||
}
|
||||
for _, b := range bottom {
|
||||
bottomV = bottomV * b
|
||||
}
|
||||
return topV / bottomV
|
||||
}
|
||||
|
||||
func cleanEven(top, bottom []uint64) {
|
||||
for i, b := range bottom {
|
||||
if even(b) {
|
||||
for j, t := range top {
|
||||
if even(t) {
|
||||
top[j] = t / 2
|
||||
bottom[i] = b / 2
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func even(a uint64) bool {
|
||||
return a&1 == 0
|
||||
}
|
||||
|
||||
func clean3(top, bottom []uint64) {
|
||||
for i, b := range bottom {
|
||||
if mod3(b) {
|
||||
for j, t := range top {
|
||||
if mod3(t) {
|
||||
top[j] = t / 3
|
||||
bottom[i] = b / 3
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func mod3(a uint64) bool {
|
||||
c := a / 3
|
||||
if 3*c == a {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func clean5(top, bottom []uint64) {
|
||||
for i, b := range bottom {
|
||||
if mod5(b) {
|
||||
for j, t := range top {
|
||||
if mod5(t) {
|
||||
top[j] = t / 5
|
||||
bottom[i] = b / 5
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func mod5(a uint64) bool {
|
||||
c := a / 5
|
||||
if 5*c == a {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func maxBottomBigger5more1(bottom []uint64) bool {
|
||||
cnt := 0
|
||||
for _, b := range bottom {
|
||||
if b >= 5 {
|
||||
cnt++
|
||||
}
|
||||
}
|
||||
if cnt >= 2 {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func cleanCoffeRound1(top, bottom []uint64) {
|
||||
for i, b := range bottom {
|
||||
for j, t := range top {
|
||||
if isCoffe(b, t) {
|
||||
top[j] = t / b
|
||||
bottom[i] = 1
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func isCoffe(b, t uint64) bool {
|
||||
c := t / b
|
||||
if c*b == t {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func shuffTop(top []uint64) []uint64 {
|
||||
var tmp uint64 = 1
|
||||
newLen := len(top) + 1
|
||||
for i, t := range top {
|
||||
if t <= 5 {
|
||||
tmp = tmp * t
|
||||
newLen--
|
||||
top[i] = 1
|
||||
}
|
||||
}
|
||||
topNew := make([]uint64, newLen)
|
||||
topNew[0] = tmp
|
||||
cnt := 1
|
||||
for _, t := range top {
|
||||
if t != 1 {
|
||||
topNew[cnt] = t
|
||||
cnt++
|
||||
}
|
||||
}
|
||||
return topNew
|
||||
}
|
||||
270
vendor/github.com/templexxx/reedsolomon/mathtool/gentbls.go
generated
vendored
270
vendor/github.com/templexxx/reedsolomon/mathtool/gentbls.go
generated
vendored
@@ -1,270 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// set deg here
|
||||
const deg = 8 // <= 8
|
||||
|
||||
type polynomial [deg + 1]byte
|
||||
|
||||
func main() {
|
||||
f, err := os.OpenFile("tables", os.O_WRONLY|os.O_CREATE, 0666)
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
defer f.Close()
|
||||
outputWriter := bufio.NewWriter(f)
|
||||
ps := genPrimitivePolynomial()
|
||||
title := strconv.FormatInt(int64(deg), 10) + " degree primitive polynomial:\n"
|
||||
var pss string
|
||||
for i, p := range ps {
|
||||
pf := formatPolynomial(p)
|
||||
pf = strconv.FormatInt(int64(i+1), 10) + ". " + pf + ";\n"
|
||||
pss = pss + pf
|
||||
}
|
||||
body := fmt.Sprintf(title+"%v", pss)
|
||||
outputWriter.WriteString(body)
|
||||
|
||||
//set primitive polynomial here to generator tables
|
||||
//x^8+x^4+x^3+x^2+1
|
||||
var primitivePolynomial polynomial
|
||||
primitivePolynomial[0] = 1
|
||||
primitivePolynomial[2] = 1
|
||||
primitivePolynomial[3] = 1
|
||||
primitivePolynomial[4] = 1
|
||||
primitivePolynomial[8] = 1
|
||||
|
||||
lenExpTable := (1 << deg) - 1
|
||||
expTable := genExpTable(primitivePolynomial, lenExpTable)
|
||||
body = fmt.Sprintf("expTbl: %#v\n", expTable)
|
||||
outputWriter.WriteString(body)
|
||||
|
||||
logTable := genLogTable(expTable)
|
||||
body = fmt.Sprintf("logTbl: %#v\n", logTable)
|
||||
outputWriter.WriteString(body)
|
||||
|
||||
mulTable := genMulTable(expTable, logTable)
|
||||
body = fmt.Sprintf("mulTbl: %#v\n", mulTable)
|
||||
outputWriter.WriteString(body)
|
||||
|
||||
lowTable, highTable := genMulTableHalf(mulTable)
|
||||
body = fmt.Sprintf("lowTbl: %#v\n", lowTable)
|
||||
outputWriter.WriteString(body)
|
||||
body = fmt.Sprintf("highTbl: %#v\n", highTable)
|
||||
outputWriter.WriteString(body)
|
||||
|
||||
var combTable [256][32]byte
|
||||
for i := range combTable {
|
||||
l := lowTable[i]
|
||||
for j := 0; j < 16; j++ {
|
||||
combTable[i][j] = l[j]
|
||||
}
|
||||
h := highTable[i][:]
|
||||
for k := 16; k < 32; k++ {
|
||||
combTable[i][k] = h[k-16]
|
||||
}
|
||||
}
|
||||
body = fmt.Sprintf("lowhighTbl: %#v\n", combTable)
|
||||
outputWriter.WriteString(body)
|
||||
|
||||
inverseTable := genInverseTable(mulTable)
|
||||
body = fmt.Sprintf("inverseTbl: %#v\n", inverseTable)
|
||||
outputWriter.WriteString(body)
|
||||
outputWriter.Flush()
|
||||
}
|
||||
|
||||
// generate primitive Polynomial
|
||||
func genPrimitivePolynomial() []polynomial {
|
||||
// drop Polynomial x,so the constant term must be 1
|
||||
// so there are 2^(deg-1) Polynomials
|
||||
cnt := 1 << (deg - 1)
|
||||
var polynomials []polynomial
|
||||
var p polynomial
|
||||
p[0] = 1
|
||||
p[deg] = 1
|
||||
// gen all Polynomials
|
||||
for i := 0; i < cnt; i++ {
|
||||
p = genPolynomial(p, 1)
|
||||
polynomials = append(polynomials, p)
|
||||
}
|
||||
// drop Polynomial x+1, so the cnt of Polynomials is odd
|
||||
var psRaw []polynomial
|
||||
for _, p := range polynomials {
|
||||
var n int
|
||||
for _, v := range p {
|
||||
if v == 1 {
|
||||
n++
|
||||
}
|
||||
}
|
||||
if n&1 != 0 {
|
||||
psRaw = append(psRaw, p)
|
||||
}
|
||||
}
|
||||
// order of primitive element == 2^deg -1 ?
|
||||
var ps []polynomial
|
||||
for _, p := range psRaw {
|
||||
lenTable := (1 << deg) - 1
|
||||
table := genExpTable(p, lenTable)
|
||||
var numOf1 int
|
||||
for _, v := range table {
|
||||
// cnt 1 in ExpTable
|
||||
if int(v) == 1 {
|
||||
numOf1++
|
||||
}
|
||||
}
|
||||
if numOf1 == 1 {
|
||||
ps = append(ps, p)
|
||||
}
|
||||
}
|
||||
return ps
|
||||
}
|
||||
|
||||
func genPolynomial(p polynomial, i int) polynomial {
|
||||
if p[i] == 0 {
|
||||
p[i] = 1
|
||||
} else {
|
||||
p[i] = 0
|
||||
i++
|
||||
if i == deg {
|
||||
return p
|
||||
}
|
||||
p = genPolynomial(p, i)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func genExpTable(primitivePolynomial polynomial, exp int) []byte {
|
||||
table := make([]byte, exp)
|
||||
var rawPolynomial polynomial
|
||||
rawPolynomial[1] = 1
|
||||
table[0] = byte(1)
|
||||
table[1] = byte(2)
|
||||
for i := 2; i < exp; i++ {
|
||||
rawPolynomial = expGrowPolynomial(rawPolynomial, primitivePolynomial)
|
||||
table[i] = byte(getValueOfPolynomial(rawPolynomial))
|
||||
}
|
||||
return table
|
||||
}
|
||||
|
||||
func expGrowPolynomial(raw, primitivePolynomial polynomial) polynomial {
|
||||
var newP polynomial
|
||||
for i, v := range raw[:deg] {
|
||||
if v == 1 {
|
||||
newP[i+1] = 1
|
||||
}
|
||||
}
|
||||
if newP[deg] == 1 {
|
||||
for i, v := range primitivePolynomial[:deg] {
|
||||
if v == 1 {
|
||||
if newP[i] == 1 {
|
||||
newP[i] = 0
|
||||
} else {
|
||||
newP[i] = 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
newP[deg] = 0
|
||||
return newP
|
||||
}
|
||||
|
||||
func getValueOfPolynomial(p polynomial) uint8 {
|
||||
var v uint8
|
||||
for i, coefficient := range p[:deg] {
|
||||
if coefficient != 0 {
|
||||
add := 1 << uint8(i)
|
||||
v += uint8(add)
|
||||
}
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func genLogTable(expTable []byte) []byte {
|
||||
table := make([]byte, (1 << deg))
|
||||
//table[0] 无法由本原元的幂得到
|
||||
table[0] = 0
|
||||
for i, v := range expTable {
|
||||
table[v] = byte(i)
|
||||
}
|
||||
return table
|
||||
}
|
||||
|
||||
func genMulTable(expTable, logTable []byte) [256][256]byte {
|
||||
var result [256][256]byte
|
||||
for a := range result {
|
||||
for b := range result[a] {
|
||||
if a == 0 || b == 0 {
|
||||
result[a][b] = 0
|
||||
continue
|
||||
}
|
||||
logA := int(logTable[a])
|
||||
logB := int(logTable[b])
|
||||
logSum := logA + logB
|
||||
for logSum >= 255 {
|
||||
logSum -= 255
|
||||
}
|
||||
result[a][b] = expTable[logSum]
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func genMulTableHalf(mulTable [256][256]byte) (low [256][16]byte, high [256][16]byte) {
|
||||
for a := range low {
|
||||
for b := range low {
|
||||
//result := 0
|
||||
var result byte
|
||||
if !(a == 0 || b == 0) {
|
||||
//result = int(mulTable[a][b])
|
||||
result = mulTable[a][b]
|
||||
|
||||
}
|
||||
// b & 00001111, [0,15]
|
||||
if (b & 0xf) == b {
|
||||
low[a][b] = result
|
||||
}
|
||||
// b & 11110000, [240,255]
|
||||
if (b & 0xf0) == b {
|
||||
high[a][b>>4] = result
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func genInverseTable(mulTable [256][256]byte) [256]byte {
|
||||
var inVerseTable [256]byte
|
||||
for i, t := range mulTable {
|
||||
for j, v := range t {
|
||||
if int(v) == 1 {
|
||||
inVerseTable[i] = byte(j)
|
||||
}
|
||||
}
|
||||
}
|
||||
return inVerseTable
|
||||
}
|
||||
|
||||
func formatPolynomial(p polynomial) string {
|
||||
var ps string
|
||||
for i := deg; i > 1; i-- {
|
||||
if p[i] == 1 {
|
||||
ps = ps + "x^" + strconv.FormatInt(int64(i), 10) + "+"
|
||||
}
|
||||
}
|
||||
if p[1] == 1 {
|
||||
ps = ps + "x+"
|
||||
}
|
||||
if p[0] == 1 {
|
||||
ps = ps + "1"
|
||||
} else {
|
||||
strings.TrimSuffix(ps, "+")
|
||||
}
|
||||
return ps
|
||||
}
|
||||
156
vendor/github.com/templexxx/reedsolomon/matrix.go
generated
vendored
156
vendor/github.com/templexxx/reedsolomon/matrix.go
generated
vendored
@@ -1,156 +0,0 @@
|
||||
package reedsolomon
|
||||
|
||||
import "errors"
|
||||
|
||||
type matrix []byte
|
||||
|
||||
func genEncMatrixCauchy(d, p int) matrix {
|
||||
t := d + p
|
||||
m := make([]byte, t*d)
|
||||
for i := 0; i < d; i++ {
|
||||
m[i*d+i] = byte(1)
|
||||
}
|
||||
|
||||
d2 := d * d
|
||||
for i := d; i < t; i++ {
|
||||
for j := 0; j < d; j++ {
|
||||
d := i ^ j
|
||||
a := inverseTbl[d]
|
||||
m[d2] = byte(a)
|
||||
d2++
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func gfExp(b byte, n int) byte {
|
||||
if n == 0 {
|
||||
return 1
|
||||
}
|
||||
if b == 0 {
|
||||
return 0
|
||||
}
|
||||
a := logTbl[b]
|
||||
ret := int(a) * n
|
||||
for ret >= 255 {
|
||||
ret -= 255
|
||||
}
|
||||
return byte(expTbl[ret])
|
||||
}
|
||||
|
||||
func genVandMatrix(vm []byte, t, d int) {
|
||||
for i := 0; i < t; i++ {
|
||||
for j := 0; j < d; j++ {
|
||||
vm[i*d+j] = gfExp(byte(i), j)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m matrix) mul(right matrix, rows, cols int, r []byte) {
|
||||
for i := 0; i < rows; i++ {
|
||||
for j := 0; j < cols; j++ {
|
||||
var v byte
|
||||
for k := 0; k < cols; k++ {
|
||||
v ^= gfMul(m[i*cols+k], right[k*cols+j])
|
||||
}
|
||||
r[i*cols+j] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func genEncMatrixVand(d, p int) (matrix, error) {
|
||||
t := d + p
|
||||
buf := make([]byte, (2*t+4*d)*d)
|
||||
vm := buf[:t*d]
|
||||
genVandMatrix(vm, t, d)
|
||||
top := buf[t*d : (t+d)*d]
|
||||
copy(top, vm[:d*d])
|
||||
raw := buf[(t+d)*d : (t+3*d)*d]
|
||||
im := buf[(t+3*d)*d : (t+4*d)*d]
|
||||
err := matrix(top).invert(raw, d, im)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r := buf[(t+4*d)*d : (2*t+4*d)*d]
|
||||
matrix(vm).mul(im, t, d, r)
|
||||
return matrix(r), nil
|
||||
}
|
||||
|
||||
// [I|m'] -> [m']
|
||||
func (m matrix) subMatrix(n int, r []byte) {
|
||||
for i := 0; i < n; i++ {
|
||||
off := i * n
|
||||
copy(r[off:off+n], m[2*off+n:2*(off+n)])
|
||||
}
|
||||
}
|
||||
|
||||
func (m matrix) invert(raw matrix, n int, im []byte) error {
|
||||
// [m] -> [m|I]
|
||||
for i := 0; i < n; i++ {
|
||||
t := i * n
|
||||
copy(raw[2*t:2*t+n], m[t:t+n])
|
||||
raw[2*t+i+n] = byte(1)
|
||||
}
|
||||
err := gauss(raw, n)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
raw.subMatrix(n, im)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m matrix) swap(i, j, n int) {
|
||||
for k := 0; k < n; k++ {
|
||||
m[i*n+k], m[j*n+k] = m[j*n+k], m[i*n+k]
|
||||
}
|
||||
}
|
||||
|
||||
func gfMul(a, b byte) byte {
|
||||
return mulTbl[a][b]
|
||||
}
|
||||
|
||||
var errSingular = errors.New("rs.invert: matrix is singular")
|
||||
|
||||
// [m|I] -> [I|m']
|
||||
func gauss(m matrix, n int) error {
|
||||
n2 := 2 * n
|
||||
for i := 0; i < n; i++ {
|
||||
if m[i*n2+i] == 0 {
|
||||
for j := i + 1; j < n; j++ {
|
||||
if m[j*n2+i] != 0 {
|
||||
m.swap(i, j, n2)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if m[i*n2+i] == 0 {
|
||||
return errSingular
|
||||
}
|
||||
if m[i*n2+i] != 1 {
|
||||
d := m[i*n2+i]
|
||||
scale := inverseTbl[d]
|
||||
for c := 0; c < n2; c++ {
|
||||
m[i*n2+c] = gfMul(m[i*n2+c], scale)
|
||||
}
|
||||
}
|
||||
for j := i + 1; j < n; j++ {
|
||||
if m[j*n2+i] != 0 {
|
||||
scale := m[j*n2+i]
|
||||
for c := 0; c < n2; c++ {
|
||||
m[j*n2+c] ^= gfMul(scale, m[i*n2+c])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for k := 0; k < n; k++ {
|
||||
for j := 0; j < k; j++ {
|
||||
if m[j*n2+k] != 0 {
|
||||
scale := m[j*n2+k]
|
||||
for c := 0; c < n2; c++ {
|
||||
m[j*n2+c] ^= gfMul(scale, m[k*n2+c])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
280
vendor/github.com/templexxx/reedsolomon/rs.go
generated
vendored
280
vendor/github.com/templexxx/reedsolomon/rs.go
generated
vendored
@@ -1,280 +0,0 @@
|
||||
/*
|
||||
Reed-Solomon Codes over GF(2^8)
|
||||
Primitive Polynomial: x^8+x^4+x^3+x^2+1
|
||||
Galois Filed arithmetic using Intel SIMD instructions (AVX2 or SSSE3)
|
||||
*/
|
||||
|
||||
package reedsolomon
|
||||
|
||||
import "errors"
|
||||
|
||||
// Encoder implements for Reed-Solomon Encoding/Reconstructing
|
||||
type Encoder interface {
|
||||
// Encode multiply generator-matrix with data
|
||||
// len(vects) must be equal with num of data+parity
|
||||
Encode(vects [][]byte) error
|
||||
// Result of reconst will be put into origin position of vects
|
||||
// it means if you lost vects[0], after reconst the vects[0]'s data will be back in vects[0]
|
||||
|
||||
// Reconstruct repair lost data & parity
|
||||
// Set vect nil if lost
|
||||
Reconstruct(vects [][]byte) error
|
||||
// Reconstruct repair lost data
|
||||
// Set vect nil if lost
|
||||
ReconstructData(vects [][]byte) error
|
||||
// ReconstWithPos repair lost data&parity with has&lost vects position
|
||||
// Save bandwidth&disk I/O (cmp with Reconstruct, if the lost is less than num of parity)
|
||||
// As erasure codes, we must know which vect is broken,
|
||||
// so it's necessary to provide such APIs
|
||||
// len(has) must equal num of data vects
|
||||
// Example:
|
||||
// in 3+2, the whole position: [0,1,2,3,4]
|
||||
// if lost vects[0]
|
||||
// the "has" could be [1,2,3] or [1,2,4] or ...
|
||||
// then you must be sure that vects[1] vects[2] vects[3] have correct data (if the "has" is [1,2,3])
|
||||
// the "dLost" will be [0]
|
||||
// ps:
|
||||
// 1. the above lists are in increasing orders TODO support out-of-order
|
||||
// 2. each vect has same len, don't set it nil
|
||||
// so we don't need to make slice
|
||||
ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error
|
||||
//// ReconstWithPos repair lost data with survived&lost vects position
|
||||
//// Don't need to append position of parity lost into "lost"
|
||||
ReconstDataWithPos(vects [][]byte, has, dLost []int) error
|
||||
}
|
||||
|
||||
func checkCfg(d, p int) error {
|
||||
if (d <= 0) || (p <= 0) {
|
||||
return errors.New("rs.New: data or parity <= 0")
|
||||
}
|
||||
if d+p >= 256 {
|
||||
return errors.New("rs.New: data+parity >= 256")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// New create an Encoder (vandermonde matrix as Encoding matrix)
|
||||
func New(data, parity int) (enc Encoder, err error) {
|
||||
err = checkCfg(data, parity)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
e, err := genEncMatrixVand(data, parity)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return newRS(data, parity, e), nil
|
||||
}
|
||||
|
||||
// NewCauchy create an Encoder (cauchy matrix as Generator Matrix)
|
||||
func NewCauchy(data, parity int) (enc Encoder, err error) {
|
||||
err = checkCfg(data, parity)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
e := genEncMatrixCauchy(data, parity)
|
||||
return newRS(data, parity, e), nil
|
||||
}
|
||||
|
||||
type encBase struct {
|
||||
data int
|
||||
parity int
|
||||
encode []byte
|
||||
gen []byte
|
||||
}
|
||||
|
||||
func checkEnc(d, p int, vs [][]byte) (size int, err error) {
|
||||
total := len(vs)
|
||||
if d+p != total {
|
||||
err = errors.New("rs.checkER: vects not match rs args")
|
||||
return
|
||||
}
|
||||
size = len(vs[0])
|
||||
if size == 0 {
|
||||
err = errors.New("rs.checkER: vects size = 0")
|
||||
return
|
||||
}
|
||||
for i := 1; i < total; i++ {
|
||||
if len(vs[i]) != size {
|
||||
err = errors.New("rs.checkER: vects size mismatch")
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *encBase) Encode(vects [][]byte) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
_, err = checkEnc(d, p, vects)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
dv := vects[:d]
|
||||
pv := vects[d:]
|
||||
g := e.gen
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
if i != 0 {
|
||||
mulVectAdd(g[j*d+i], dv[i], pv[j])
|
||||
} else {
|
||||
mulVect(g[j*d], dv[0], pv[j])
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func mulVect(c byte, a, b []byte) {
|
||||
t := mulTbl[c]
|
||||
for i := 0; i < len(a); i++ {
|
||||
b[i] = t[a[i]]
|
||||
}
|
||||
}
|
||||
|
||||
func mulVectAdd(c byte, a, b []byte) {
|
||||
t := mulTbl[c]
|
||||
for i := 0; i < len(a); i++ {
|
||||
b[i] ^= t[a[i]]
|
||||
}
|
||||
}
|
||||
|
||||
func (e *encBase) Reconstruct(vects [][]byte) (err error) {
|
||||
return e.reconstruct(vects, false)
|
||||
}
|
||||
|
||||
func (e *encBase) ReconstructData(vects [][]byte) (err error) {
|
||||
return e.reconstruct(vects, true)
|
||||
}
|
||||
|
||||
func (e *encBase) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
|
||||
return e.reconstWithPos(vects, has, dLost, pLost, false)
|
||||
}
|
||||
|
||||
func (e *encBase) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
|
||||
return e.reconstWithPos(vects, has, dLost, nil, true)
|
||||
}
|
||||
|
||||
func (e *encBase) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
em := e.encode
|
||||
dCnt := len(dLost)
|
||||
size := len(vects[has[0]])
|
||||
if dCnt != 0 {
|
||||
vtmp := make([][]byte, d+dCnt)
|
||||
for i, p := range has {
|
||||
vtmp[i] = vects[p]
|
||||
}
|
||||
for i, p := range dLost {
|
||||
if len(vects[p]) == 0 {
|
||||
vects[p] = make([]byte, size)
|
||||
}
|
||||
vtmp[i+d] = vects[p]
|
||||
}
|
||||
matrixbuf := make([]byte, 4*d*d+dCnt*d)
|
||||
m := matrixbuf[:d*d]
|
||||
for i, l := range has {
|
||||
copy(m[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
raw := matrixbuf[d*d : 3*d*d]
|
||||
im := matrixbuf[3*d*d : 4*d*d]
|
||||
err2 := matrix(m).invert(raw, d, im)
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
g := matrixbuf[4*d*d:]
|
||||
for i, l := range dLost {
|
||||
copy(g[i*d:i*d+d], im[l*d:l*d+d])
|
||||
}
|
||||
etmp := &encBase{data: d, parity: dCnt, gen: g}
|
||||
err2 = etmp.Encode(vtmp[:d+dCnt])
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
}
|
||||
if dataOnly {
|
||||
return
|
||||
}
|
||||
pCnt := len(pLost)
|
||||
if pCnt != 0 {
|
||||
vtmp := make([][]byte, d+pCnt)
|
||||
g := make([]byte, pCnt*d)
|
||||
for i, l := range pLost {
|
||||
copy(g[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
for i := 0; i < d; i++ {
|
||||
vtmp[i] = vects[i]
|
||||
}
|
||||
for i, p := range pLost {
|
||||
if len(vects[p]) == 0 {
|
||||
vects[p] = make([]byte, size)
|
||||
}
|
||||
vtmp[i+d] = vects[p]
|
||||
}
|
||||
etmp := &encBase{data: d, parity: pCnt, gen: g}
|
||||
err2 := etmp.Encode(vtmp[:d+pCnt])
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *encBase) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
// TODO check more, maybe element in has show in lost & deal with len(has) > d
|
||||
if len(has) != d {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
dCnt := len(dLost)
|
||||
if dCnt > p {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
pCnt := len(pLost)
|
||||
if pCnt > p {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
return e.reconst(vects, has, dLost, pLost, dataOnly)
|
||||
}
|
||||
|
||||
func (e *encBase) reconstruct(vects [][]byte, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
t := d + p
|
||||
listBuf := make([]int, t+p)
|
||||
has := listBuf[:d]
|
||||
dLost := listBuf[d:t]
|
||||
pLost := listBuf[t : t+p]
|
||||
hasCnt, dCnt, pCnt := 0, 0, 0
|
||||
for i := 0; i < t; i++ {
|
||||
if vects[i] != nil {
|
||||
if hasCnt < d {
|
||||
has[hasCnt] = i
|
||||
hasCnt++
|
||||
}
|
||||
} else {
|
||||
if i < d {
|
||||
if dCnt < p {
|
||||
dLost[dCnt] = i
|
||||
dCnt++
|
||||
} else {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
} else {
|
||||
if pCnt < p {
|
||||
pLost[pCnt] = i
|
||||
pCnt++
|
||||
} else {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if hasCnt != d {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
dLost = dLost[:dCnt]
|
||||
pLost = pLost[:pCnt]
|
||||
return e.reconst(vects, has, dLost, pLost, dataOnly)
|
||||
}
|
||||
868
vendor/github.com/templexxx/reedsolomon/rs_amd64.go
generated
vendored
868
vendor/github.com/templexxx/reedsolomon/rs_amd64.go
generated
vendored
@@ -1,868 +0,0 @@
|
||||
package reedsolomon
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
|
||||
"github.com/templexxx/cpufeat"
|
||||
)
|
||||
|
||||
// SIMD Instruction Extensions
|
||||
const (
|
||||
none = iota
|
||||
avx2
|
||||
ssse3
|
||||
)
|
||||
|
||||
var extension = none
|
||||
|
||||
func init() {
|
||||
getEXT()
|
||||
}
|
||||
|
||||
func getEXT() {
|
||||
if cpufeat.X86.HasAVX2 {
|
||||
extension = avx2
|
||||
return
|
||||
} else if cpufeat.X86.HasSSSE3 {
|
||||
extension = ssse3
|
||||
return
|
||||
} else {
|
||||
extension = none
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func copy32B(dst, src []byte) // Need SSE2(introduced in 2001)
|
||||
|
||||
func initTbl(g matrix, rows, cols int, tbl []byte) {
|
||||
off := 0
|
||||
for i := 0; i < cols; i++ {
|
||||
for j := 0; j < rows; j++ {
|
||||
c := g[j*cols+i]
|
||||
t := lowhighTbl[c][:]
|
||||
copy32B(tbl[off:off+32], t)
|
||||
off += 32
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// At most 3060 inverse matrix (when data=14, parity=4, calc by mathtool/cntinverse)
|
||||
// In practice, data usually below 12, parity below 5
|
||||
func okCache(data, parity int) bool {
|
||||
if data < 15 && parity < 5 { // you can change it, but the data+parity can't be bigger than 32 (tips: see the codes about make inverse matrix)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type (
|
||||
encSSSE3 encSIMD
|
||||
encAVX2 encSIMD
|
||||
encSIMD struct {
|
||||
data int
|
||||
parity int
|
||||
encode matrix
|
||||
gen matrix
|
||||
tbl []byte
|
||||
// inverse matrix cache is design for small vect size ( < 4KB )
|
||||
// it will save time for calculating inverse matrix
|
||||
// but it's not so important for big vect size
|
||||
enableCache bool
|
||||
inverseCache iCache
|
||||
}
|
||||
iCache struct {
|
||||
sync.RWMutex
|
||||
data map[uint32][]byte
|
||||
}
|
||||
)
|
||||
|
||||
func newRS(d, p int, em matrix) (enc Encoder) {
|
||||
g := em[d*d:]
|
||||
if extension == none {
|
||||
return &encBase{data: d, parity: p, encode: em, gen: g}
|
||||
}
|
||||
t := make([]byte, d*p*32)
|
||||
initTbl(g, p, d, t)
|
||||
ok := okCache(d, p)
|
||||
if extension == avx2 {
|
||||
e := &encAVX2{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
|
||||
inverseCache: iCache{data: make(map[uint32][]byte)}}
|
||||
return e
|
||||
}
|
||||
e := &encSSSE3{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
|
||||
inverseCache: iCache{data: make(map[uint32][]byte)}}
|
||||
return e
|
||||
}
|
||||
|
||||
// Size of sub-vector
|
||||
const unit int = 16 * 1024
|
||||
|
||||
func getDo(n int) int {
|
||||
if n < unit {
|
||||
c := n >> 4
|
||||
if c == 0 {
|
||||
return unit
|
||||
}
|
||||
return c << 4
|
||||
}
|
||||
return unit
|
||||
}
|
||||
|
||||
func (e *encAVX2) Encode(vects [][]byte) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
size, err := checkEnc(d, p, vects)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
dv := vects[:d]
|
||||
pv := vects[d:]
|
||||
start, end := 0, 0
|
||||
do := getDo(size)
|
||||
for start < size {
|
||||
end = start + do
|
||||
if end <= size {
|
||||
e.matrixMul(start, end, dv, pv)
|
||||
start = end
|
||||
} else {
|
||||
e.matrixMulRemain(start, size, dv, pv)
|
||||
start = size
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func mulVectAVX2(tbl, d, p []byte)
|
||||
|
||||
//go:noescape
|
||||
func mulVectAddAVX2(tbl, d, p []byte)
|
||||
|
||||
func (e *encAVX2) matrixMul(start, end int, dv, pv [][]byte) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
tbl := e.tbl
|
||||
off := 0
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := tbl[off : off+32]
|
||||
if i != 0 {
|
||||
mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
|
||||
} else {
|
||||
mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
|
||||
}
|
||||
off += 32
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *encAVX2) matrixMulRemain(start, end int, dv, pv [][]byte) {
|
||||
undone := end - start
|
||||
do := (undone >> 4) << 4
|
||||
d := e.data
|
||||
p := e.parity
|
||||
tbl := e.tbl
|
||||
if do >= 16 {
|
||||
end2 := start + do
|
||||
off := 0
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := tbl[off : off+32]
|
||||
if i != 0 {
|
||||
mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
|
||||
} else {
|
||||
mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
|
||||
}
|
||||
off += 32
|
||||
}
|
||||
}
|
||||
start = end
|
||||
}
|
||||
if undone > do {
|
||||
// may recalculate some data, but still improve a lot
|
||||
start2 := end - 16
|
||||
if start2 >= 0 {
|
||||
off := 0
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := tbl[off : off+32]
|
||||
if i != 0 {
|
||||
mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
|
||||
} else {
|
||||
mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
|
||||
}
|
||||
off += 32
|
||||
}
|
||||
}
|
||||
} else {
|
||||
g := e.gen
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
if i != 0 {
|
||||
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
|
||||
} else {
|
||||
mulVect(g[j*d], dv[0][start:], pv[j][start:])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// use generator-matrix but not tbls for encoding
|
||||
// it's design for reconstructing
|
||||
// for small vects, it cost to much time on initTbl, so drop it
|
||||
// and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
|
||||
func (e *encAVX2) encodeGen(vects [][]byte) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
size, err := checkEnc(d, p, vects)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
dv := vects[:d]
|
||||
pv := vects[d:]
|
||||
start, end := 0, 0
|
||||
do := getDo(size)
|
||||
for start < size {
|
||||
end = start + do
|
||||
if end <= size {
|
||||
e.matrixMulGen(start, end, dv, pv)
|
||||
start = end
|
||||
} else {
|
||||
e.matrixMulRemainGen(start, size, dv, pv)
|
||||
start = size
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *encAVX2) matrixMulGen(start, end int, dv, pv [][]byte) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
g := e.gen
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := lowhighTbl[g[j*d+i]][:]
|
||||
if i != 0 {
|
||||
mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
|
||||
} else {
|
||||
mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *encAVX2) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
|
||||
undone := end - start
|
||||
do := (undone >> 4) << 4
|
||||
d := e.data
|
||||
p := e.parity
|
||||
g := e.gen
|
||||
if do >= 16 {
|
||||
end2 := start + do
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := lowhighTbl[g[j*d+i]][:]
|
||||
if i != 0 {
|
||||
mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
|
||||
} else {
|
||||
mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
|
||||
}
|
||||
}
|
||||
}
|
||||
start = end
|
||||
}
|
||||
if undone > do {
|
||||
start2 := end - 16
|
||||
if start2 >= 0 {
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := lowhighTbl[g[j*d+i]][:]
|
||||
if i != 0 {
|
||||
mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
|
||||
} else {
|
||||
mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
if i != 0 {
|
||||
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
|
||||
} else {
|
||||
mulVect(g[j*d], dv[0][start:], pv[j][start:])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *encAVX2) Reconstruct(vects [][]byte) (err error) {
|
||||
return e.reconstruct(vects, false)
|
||||
}
|
||||
|
||||
func (e *encAVX2) ReconstructData(vects [][]byte) (err error) {
|
||||
return e.reconstruct(vects, true)
|
||||
}
|
||||
|
||||
func (e *encAVX2) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
|
||||
return e.reconstWithPos(vects, has, dLost, pLost, false)
|
||||
}
|
||||
|
||||
func (e *encAVX2) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
|
||||
return e.reconstWithPos(vects, has, dLost, nil, true)
|
||||
}
|
||||
|
||||
func (e *encAVX2) makeGen(has, dLost []int) (gen []byte, err error) {
|
||||
d := e.data
|
||||
em := e.encode
|
||||
cnt := len(dLost)
|
||||
if !e.enableCache {
|
||||
matrixbuf := make([]byte, 4*d*d+cnt*d)
|
||||
m := matrixbuf[:d*d]
|
||||
for i, l := range has {
|
||||
copy(m[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
raw := matrixbuf[d*d : 3*d*d]
|
||||
im := matrixbuf[3*d*d : 4*d*d]
|
||||
err2 := matrix(m).invert(raw, d, im)
|
||||
if err2 != nil {
|
||||
return nil, err2
|
||||
}
|
||||
g := matrixbuf[4*d*d:]
|
||||
for i, l := range dLost {
|
||||
copy(g[i*d:i*d+d], im[l*d:l*d+d])
|
||||
}
|
||||
return g, nil
|
||||
}
|
||||
var ikey uint32
|
||||
for _, p := range has {
|
||||
ikey += 1 << uint8(p)
|
||||
}
|
||||
e.inverseCache.RLock()
|
||||
v, ok := e.inverseCache.data[ikey]
|
||||
if ok {
|
||||
im := v
|
||||
g := make([]byte, cnt*d)
|
||||
for i, l := range dLost {
|
||||
copy(g[i*d:i*d+d], im[l*d:l*d+d])
|
||||
}
|
||||
e.inverseCache.RUnlock()
|
||||
return g, nil
|
||||
}
|
||||
e.inverseCache.RUnlock()
|
||||
matrixbuf := make([]byte, 4*d*d+cnt*d)
|
||||
m := matrixbuf[:d*d]
|
||||
for i, l := range has {
|
||||
copy(m[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
raw := matrixbuf[d*d : 3*d*d]
|
||||
im := matrixbuf[3*d*d : 4*d*d]
|
||||
err2 := matrix(m).invert(raw, d, im)
|
||||
if err2 != nil {
|
||||
return nil, err2
|
||||
}
|
||||
e.inverseCache.Lock()
|
||||
e.inverseCache.data[ikey] = im
|
||||
e.inverseCache.Unlock()
|
||||
g := matrixbuf[4*d*d:]
|
||||
for i, l := range dLost {
|
||||
copy(g[i*d:i*d+d], im[l*d:l*d+d])
|
||||
}
|
||||
return g, nil
|
||||
}
|
||||
|
||||
func (e *encAVX2) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
em := e.encode
|
||||
dCnt := len(dLost)
|
||||
size := len(vects[has[0]])
|
||||
if dCnt != 0 {
|
||||
vtmp := make([][]byte, d+dCnt)
|
||||
for i, p := range has {
|
||||
vtmp[i] = vects[p]
|
||||
}
|
||||
for i, p := range dLost {
|
||||
if len(vects[p]) == 0 {
|
||||
vects[p] = make([]byte, size)
|
||||
}
|
||||
vtmp[i+d] = vects[p]
|
||||
}
|
||||
g, err2 := e.makeGen(has, dLost)
|
||||
if err2 != nil {
|
||||
return
|
||||
}
|
||||
etmp := &encAVX2{data: d, parity: dCnt, gen: g}
|
||||
err2 = etmp.encodeGen(vtmp)
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
}
|
||||
if dataOnly {
|
||||
return
|
||||
}
|
||||
pCnt := len(pLost)
|
||||
if pCnt != 0 {
|
||||
g := make([]byte, pCnt*d)
|
||||
for i, l := range pLost {
|
||||
copy(g[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
vtmp := make([][]byte, d+pCnt)
|
||||
for i := 0; i < d; i++ {
|
||||
vtmp[i] = vects[i]
|
||||
}
|
||||
for i, p := range pLost {
|
||||
if len(vects[p]) == 0 {
|
||||
vects[p] = make([]byte, size)
|
||||
}
|
||||
vtmp[i+d] = vects[p]
|
||||
}
|
||||
etmp := &encAVX2{data: d, parity: pCnt, gen: g}
|
||||
err2 := etmp.encodeGen(vtmp)
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *encAVX2) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
if len(has) != d {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
dCnt := len(dLost)
|
||||
if dCnt > p {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
pCnt := len(pLost)
|
||||
if pCnt > p {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
return e.reconst(vects, has, dLost, pLost, dataOnly)
|
||||
}
|
||||
|
||||
func (e *encAVX2) reconstruct(vects [][]byte, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
t := d + p
|
||||
listBuf := make([]int, t+p)
|
||||
has := listBuf[:d]
|
||||
dLost := listBuf[d:t]
|
||||
pLost := listBuf[t : t+p]
|
||||
hasCnt, dCnt, pCnt := 0, 0, 0
|
||||
for i := 0; i < t; i++ {
|
||||
if vects[i] != nil {
|
||||
if hasCnt < d {
|
||||
has[hasCnt] = i
|
||||
hasCnt++
|
||||
}
|
||||
} else {
|
||||
if i < d {
|
||||
if dCnt < p {
|
||||
dLost[dCnt] = i
|
||||
dCnt++
|
||||
} else {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
} else {
|
||||
if pCnt < p {
|
||||
pLost[pCnt] = i
|
||||
pCnt++
|
||||
} else {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if hasCnt != d {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
dLost = dLost[:dCnt]
|
||||
pLost = pLost[:pCnt]
|
||||
return e.reconst(vects, has, dLost, pLost, dataOnly)
|
||||
}
|
||||
|
||||
func (e *encSSSE3) Encode(vects [][]byte) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
size, err := checkEnc(d, p, vects)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
dv := vects[:d]
|
||||
pv := vects[d:]
|
||||
start, end := 0, 0
|
||||
do := getDo(size)
|
||||
for start < size {
|
||||
end = start + do
|
||||
if end <= size {
|
||||
e.matrixMul(start, end, dv, pv)
|
||||
start = end
|
||||
} else {
|
||||
e.matrixMulRemain(start, size, dv, pv)
|
||||
start = size
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func mulVectSSSE3(tbl, d, p []byte)
|
||||
|
||||
//go:noescape
|
||||
func mulVectAddSSSE3(tbl, d, p []byte)
|
||||
|
||||
func (e *encSSSE3) matrixMul(start, end int, dv, pv [][]byte) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
tbl := e.tbl
|
||||
off := 0
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := tbl[off : off+32]
|
||||
if i != 0 {
|
||||
mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
|
||||
} else {
|
||||
mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
|
||||
}
|
||||
off += 32
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *encSSSE3) matrixMulRemain(start, end int, dv, pv [][]byte) {
|
||||
undone := end - start
|
||||
do := (undone >> 4) << 4
|
||||
d := e.data
|
||||
p := e.parity
|
||||
tbl := e.tbl
|
||||
if do >= 16 {
|
||||
end2 := start + do
|
||||
off := 0
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := tbl[off : off+32]
|
||||
if i != 0 {
|
||||
mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
|
||||
} else {
|
||||
mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
|
||||
}
|
||||
off += 32
|
||||
}
|
||||
}
|
||||
start = end
|
||||
}
|
||||
if undone > do {
|
||||
start2 := end - 16
|
||||
if start2 >= 0 {
|
||||
off := 0
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := tbl[off : off+32]
|
||||
if i != 0 {
|
||||
mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
|
||||
} else {
|
||||
mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
|
||||
}
|
||||
off += 32
|
||||
}
|
||||
}
|
||||
} else {
|
||||
g := e.gen
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
if i != 0 {
|
||||
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
|
||||
} else {
|
||||
mulVect(g[j*d], dv[0][start:], pv[j][start:])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// use generator-matrix but not tbls for encoding
|
||||
// it's design for reconstructing
|
||||
// for small vects, it cost to much time on initTbl, so drop it
|
||||
// and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
|
||||
func (e *encSSSE3) encodeGen(vects [][]byte) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
size, err := checkEnc(d, p, vects)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
dv := vects[:d]
|
||||
pv := vects[d:]
|
||||
start, end := 0, 0
|
||||
do := getDo(size)
|
||||
for start < size {
|
||||
end = start + do
|
||||
if end <= size {
|
||||
e.matrixMulGen(start, end, dv, pv)
|
||||
start = end
|
||||
} else {
|
||||
e.matrixMulRemainGen(start, size, dv, pv)
|
||||
start = size
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *encSSSE3) matrixMulGen(start, end int, dv, pv [][]byte) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
g := e.gen
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := lowhighTbl[g[j*d+i]][:]
|
||||
if i != 0 {
|
||||
mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
|
||||
} else {
|
||||
mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *encSSSE3) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
|
||||
undone := end - start
|
||||
do := (undone >> 4) << 4
|
||||
d := e.data
|
||||
p := e.parity
|
||||
g := e.gen
|
||||
if do >= 16 {
|
||||
end2 := start + do
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := lowhighTbl[g[j*d+i]][:]
|
||||
if i != 0 {
|
||||
mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
|
||||
} else {
|
||||
mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
|
||||
}
|
||||
}
|
||||
}
|
||||
start = end
|
||||
}
|
||||
if undone > do {
|
||||
start2 := end - 16
|
||||
if start2 >= 0 {
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := lowhighTbl[g[j*d+i]][:]
|
||||
if i != 0 {
|
||||
mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
|
||||
} else {
|
||||
mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
if i != 0 {
|
||||
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
|
||||
} else {
|
||||
mulVect(g[j*d], dv[0][start:], pv[j][start:])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *encSSSE3) Reconstruct(vects [][]byte) (err error) {
|
||||
return e.reconstruct(vects, false)
|
||||
}
|
||||
|
||||
func (e *encSSSE3) ReconstructData(vects [][]byte) (err error) {
|
||||
return e.reconstruct(vects, true)
|
||||
}
|
||||
|
||||
func (e *encSSSE3) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
|
||||
return e.reconstWithPos(vects, has, dLost, pLost, false)
|
||||
}
|
||||
|
||||
func (e *encSSSE3) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
|
||||
return e.reconstWithPos(vects, has, dLost, nil, true)
|
||||
}
|
||||
|
||||
func (e *encSSSE3) makeGen(has, dLost []int) (gen []byte, err error) {
|
||||
d := e.data
|
||||
em := e.encode
|
||||
cnt := len(dLost)
|
||||
if !e.enableCache {
|
||||
matrixbuf := make([]byte, 4*d*d+cnt*d)
|
||||
m := matrixbuf[:d*d]
|
||||
for i, l := range has {
|
||||
copy(m[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
raw := matrixbuf[d*d : 3*d*d]
|
||||
im := matrixbuf[3*d*d : 4*d*d]
|
||||
err2 := matrix(m).invert(raw, d, im)
|
||||
if err2 != nil {
|
||||
return nil, err2
|
||||
}
|
||||
g := matrixbuf[4*d*d:]
|
||||
for i, l := range dLost {
|
||||
copy(g[i*d:i*d+d], im[l*d:l*d+d])
|
||||
}
|
||||
return g, nil
|
||||
}
|
||||
var ikey uint32
|
||||
for _, p := range has {
|
||||
ikey += 1 << uint8(p)
|
||||
}
|
||||
e.inverseCache.RLock()
|
||||
v, ok := e.inverseCache.data[ikey]
|
||||
if ok {
|
||||
im := v
|
||||
g := make([]byte, cnt*d)
|
||||
for i, l := range dLost {
|
||||
copy(g[i*d:i*d+d], im[l*d:l*d+d])
|
||||
}
|
||||
e.inverseCache.RUnlock()
|
||||
return g, nil
|
||||
}
|
||||
e.inverseCache.RUnlock()
|
||||
matrixbuf := make([]byte, 4*d*d+cnt*d)
|
||||
m := matrixbuf[:d*d]
|
||||
for i, l := range has {
|
||||
copy(m[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
raw := matrixbuf[d*d : 3*d*d]
|
||||
im := matrixbuf[3*d*d : 4*d*d]
|
||||
err2 := matrix(m).invert(raw, d, im)
|
||||
if err2 != nil {
|
||||
return nil, err2
|
||||
}
|
||||
e.inverseCache.Lock()
|
||||
e.inverseCache.data[ikey] = im
|
||||
e.inverseCache.Unlock()
|
||||
g := matrixbuf[4*d*d:]
|
||||
for i, l := range dLost {
|
||||
copy(g[i*d:i*d+d], im[l*d:l*d+d])
|
||||
}
|
||||
return g, nil
|
||||
}
|
||||
|
||||
func (e *encSSSE3) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
em := e.encode
|
||||
dCnt := len(dLost)
|
||||
size := len(vects[has[0]])
|
||||
if dCnt != 0 {
|
||||
vtmp := make([][]byte, d+dCnt)
|
||||
for i, p := range has {
|
||||
vtmp[i] = vects[p]
|
||||
}
|
||||
for i, p := range dLost {
|
||||
if len(vects[p]) == 0 {
|
||||
vects[p] = make([]byte, size)
|
||||
}
|
||||
vtmp[i+d] = vects[p]
|
||||
}
|
||||
g, err2 := e.makeGen(has, dLost)
|
||||
if err2 != nil {
|
||||
return
|
||||
}
|
||||
etmp := &encSSSE3{data: d, parity: dCnt, gen: g}
|
||||
err2 = etmp.encodeGen(vtmp)
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
}
|
||||
if dataOnly {
|
||||
return
|
||||
}
|
||||
pCnt := len(pLost)
|
||||
if pCnt != 0 {
|
||||
g := make([]byte, pCnt*d)
|
||||
for i, l := range pLost {
|
||||
copy(g[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
vtmp := make([][]byte, d+pCnt)
|
||||
for i := 0; i < d; i++ {
|
||||
vtmp[i] = vects[i]
|
||||
}
|
||||
for i, p := range pLost {
|
||||
if len(vects[p]) == 0 {
|
||||
vects[p] = make([]byte, size)
|
||||
}
|
||||
vtmp[i+d] = vects[p]
|
||||
}
|
||||
etmp := &encSSSE3{data: d, parity: pCnt, gen: g}
|
||||
err2 := etmp.encodeGen(vtmp)
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *encSSSE3) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
if len(has) != d {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
dCnt := len(dLost)
|
||||
if dCnt > p {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
pCnt := len(pLost)
|
||||
if pCnt > p {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
return e.reconst(vects, has, dLost, pLost, dataOnly)
|
||||
}
|
||||
|
||||
func (e *encSSSE3) reconstruct(vects [][]byte, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
t := d + p
|
||||
listBuf := make([]int, t+p)
|
||||
has := listBuf[:d]
|
||||
dLost := listBuf[d:t]
|
||||
pLost := listBuf[t : t+p]
|
||||
hasCnt, dCnt, pCnt := 0, 0, 0
|
||||
for i := 0; i < t; i++ {
|
||||
if vects[i] != nil {
|
||||
if hasCnt < d {
|
||||
has[hasCnt] = i
|
||||
hasCnt++
|
||||
}
|
||||
} else {
|
||||
if i < d {
|
||||
if dCnt < p {
|
||||
dLost[dCnt] = i
|
||||
dCnt++
|
||||
} else {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
} else {
|
||||
if pCnt < p {
|
||||
pLost[pCnt] = i
|
||||
pCnt++
|
||||
} else {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if hasCnt != d {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
dLost = dLost[:dCnt]
|
||||
pLost = pLost[:pCnt]
|
||||
return e.reconst(vects, has, dLost, pLost, dataOnly)
|
||||
}
|
||||
401
vendor/github.com/templexxx/reedsolomon/rs_amd64.s
generated
vendored
401
vendor/github.com/templexxx/reedsolomon/rs_amd64.s
generated
vendored
@@ -1,401 +0,0 @@
|
||||
// Reference: www.ssrc.ucsc.edu/Papers/plank-fast13.pdf
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define low_tbl Y0
|
||||
#define high_tbl Y1
|
||||
#define mask Y2
|
||||
#define in0 Y3
|
||||
#define in1 Y4
|
||||
#define in2 Y5
|
||||
#define in3 Y6
|
||||
#define in4 Y7
|
||||
#define in5 Y8
|
||||
#define in0_h Y10
|
||||
#define in1_h Y11
|
||||
#define in2_h Y12
|
||||
#define in3_h Y13
|
||||
#define in4_h Y14
|
||||
#define in5_h Y15
|
||||
|
||||
#define in BX
|
||||
#define out DI
|
||||
#define len R8
|
||||
#define pos R9
|
||||
|
||||
#define tmp0 R10
|
||||
|
||||
#define low_tblx X0
|
||||
#define high_tblx X1
|
||||
#define maskx X2
|
||||
#define in0x X3
|
||||
#define in0_hx X10
|
||||
#define tmp0x X9
|
||||
#define tmp1x X11
|
||||
#define tmp2x X12
|
||||
#define tmp3x X13
|
||||
|
||||
|
||||
// func mulVectAVX2(tbl, d, p []byte)
|
||||
TEXT ·mulVectAVX2(SB), NOSPLIT, $0
|
||||
MOVQ i+24(FP), in
|
||||
MOVQ o+48(FP), out
|
||||
MOVQ tbl+0(FP), tmp0
|
||||
VMOVDQU (tmp0), low_tblx
|
||||
VMOVDQU 16(tmp0), high_tblx
|
||||
MOVB $0x0f, DX
|
||||
LONG $0x2069e3c4; WORD $0x00d2 // VPINSRB $0x00, EDX, XMM2, XMM2
|
||||
VPBROADCASTB maskx, maskx
|
||||
MOVQ in_len+32(FP), len
|
||||
TESTQ $31, len
|
||||
JNZ one16b
|
||||
|
||||
ymm:
|
||||
VINSERTI128 $1, low_tblx, low_tbl, low_tbl
|
||||
VINSERTI128 $1, high_tblx, high_tbl, high_tbl
|
||||
VINSERTI128 $1, maskx, mask, mask
|
||||
TESTQ $255, len
|
||||
JNZ not_aligned
|
||||
|
||||
// 256bytes/loop
|
||||
aligned:
|
||||
MOVQ $0, pos
|
||||
|
||||
loop256b:
|
||||
VMOVDQU (in)(pos*1), in0
|
||||
VPSRLQ $4, in0, in0_h
|
||||
VPAND mask, in0_h, in0_h
|
||||
VPAND mask, in0, in0
|
||||
VPSHUFB in0_h, high_tbl, in0_h
|
||||
VPSHUFB in0, low_tbl, in0
|
||||
VPXOR in0, in0_h, in0
|
||||
VMOVDQU in0, (out)(pos*1)
|
||||
|
||||
VMOVDQU 32(in)(pos*1), in1
|
||||
VPSRLQ $4, in1, in1_h
|
||||
VPAND mask, in1_h, in1_h
|
||||
VPAND mask, in1, in1
|
||||
VPSHUFB in1_h, high_tbl, in1_h
|
||||
VPSHUFB in1, low_tbl, in1
|
||||
VPXOR in1, in1_h, in1
|
||||
VMOVDQU in1, 32(out)(pos*1)
|
||||
|
||||
VMOVDQU 64(in)(pos*1), in2
|
||||
VPSRLQ $4, in2, in2_h
|
||||
VPAND mask, in2_h, in2_h
|
||||
VPAND mask, in2, in2
|
||||
VPSHUFB in2_h, high_tbl, in2_h
|
||||
VPSHUFB in2, low_tbl, in2
|
||||
VPXOR in2, in2_h, in2
|
||||
VMOVDQU in2, 64(out)(pos*1)
|
||||
|
||||
VMOVDQU 96(in)(pos*1), in3
|
||||
VPSRLQ $4, in3, in3_h
|
||||
VPAND mask, in3_h, in3_h
|
||||
VPAND mask, in3, in3
|
||||
VPSHUFB in3_h, high_tbl, in3_h
|
||||
VPSHUFB in3, low_tbl, in3
|
||||
VPXOR in3, in3_h, in3
|
||||
VMOVDQU in3, 96(out)(pos*1)
|
||||
|
||||
VMOVDQU 128(in)(pos*1), in4
|
||||
VPSRLQ $4, in4, in4_h
|
||||
VPAND mask, in4_h, in4_h
|
||||
VPAND mask, in4, in4
|
||||
VPSHUFB in4_h, high_tbl, in4_h
|
||||
VPSHUFB in4, low_tbl, in4
|
||||
VPXOR in4, in4_h, in4
|
||||
VMOVDQU in4, 128(out)(pos*1)
|
||||
|
||||
VMOVDQU 160(in)(pos*1), in5
|
||||
VPSRLQ $4, in5, in5_h
|
||||
VPAND mask, in5_h, in5_h
|
||||
VPAND mask, in5, in5
|
||||
VPSHUFB in5_h, high_tbl, in5_h
|
||||
VPSHUFB in5, low_tbl, in5
|
||||
VPXOR in5, in5_h, in5
|
||||
VMOVDQU in5, 160(out)(pos*1)
|
||||
|
||||
VMOVDQU 192(in)(pos*1), in0
|
||||
VPSRLQ $4, in0, in0_h
|
||||
VPAND mask, in0_h, in0_h
|
||||
VPAND mask, in0, in0
|
||||
VPSHUFB in0_h, high_tbl, in0_h
|
||||
VPSHUFB in0, low_tbl, in0
|
||||
VPXOR in0, in0_h, in0
|
||||
VMOVDQU in0, 192(out)(pos*1)
|
||||
|
||||
VMOVDQU 224(in)(pos*1), in1
|
||||
VPSRLQ $4, in1, in1_h
|
||||
VPAND mask, in1_h, in1_h
|
||||
VPAND mask, in1, in1
|
||||
VPSHUFB in1_h, high_tbl, in1_h
|
||||
VPSHUFB in1, low_tbl, in1
|
||||
VPXOR in1, in1_h, in1
|
||||
VMOVDQU in1, 224(out)(pos*1)
|
||||
|
||||
ADDQ $256, pos
|
||||
CMPQ len, pos
|
||||
JNE loop256b
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
not_aligned:
|
||||
MOVQ len, tmp0
|
||||
ANDQ $255, tmp0
|
||||
|
||||
loop32b:
|
||||
VMOVDQU -32(in)(len*1), in0
|
||||
VPSRLQ $4, in0, in0_h
|
||||
VPAND mask, in0_h, in0_h
|
||||
VPAND mask, in0, in0
|
||||
VPSHUFB in0_h, high_tbl, in0_h
|
||||
VPSHUFB in0, low_tbl, in0
|
||||
VPXOR in0, in0_h, in0
|
||||
VMOVDQU in0, -32(out)(len*1)
|
||||
SUBQ $32, len
|
||||
SUBQ $32, tmp0
|
||||
JG loop32b
|
||||
CMPQ len, $256
|
||||
JGE aligned
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
one16b:
|
||||
VMOVDQU -16(in)(len*1), in0x
|
||||
VPSRLQ $4, in0x, in0_hx
|
||||
VPAND maskx, in0x, in0x
|
||||
VPAND maskx, in0_hx, in0_hx
|
||||
VPSHUFB in0_hx, high_tblx, in0_hx
|
||||
VPSHUFB in0x, low_tblx, in0x
|
||||
VPXOR in0x, in0_hx, in0x
|
||||
VMOVDQU in0x, -16(out)(len*1)
|
||||
SUBQ $16, len
|
||||
CMPQ len, $0
|
||||
JNE ymm
|
||||
RET
|
||||
|
||||
// func mulVectAddAVX2(tbl, d, p []byte)
|
||||
TEXT ·mulVectAddAVX2(SB), NOSPLIT, $0
|
||||
MOVQ i+24(FP), in
|
||||
MOVQ o+48(FP), out
|
||||
MOVQ tbl+0(FP), tmp0
|
||||
VMOVDQU (tmp0), low_tblx
|
||||
VMOVDQU 16(tmp0), high_tblx
|
||||
MOVB $0x0f, DX
|
||||
LONG $0x2069e3c4; WORD $0x00d2
|
||||
VPBROADCASTB maskx, maskx
|
||||
MOVQ in_len+32(FP), len
|
||||
TESTQ $31, len
|
||||
JNZ one16b
|
||||
|
||||
ymm:
|
||||
VINSERTI128 $1, low_tblx, low_tbl, low_tbl
|
||||
VINSERTI128 $1, high_tblx, high_tbl, high_tbl
|
||||
VINSERTI128 $1, maskx, mask, mask
|
||||
TESTQ $255, len
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, pos
|
||||
|
||||
loop256b:
|
||||
VMOVDQU (in)(pos*1), in0
|
||||
VPSRLQ $4, in0, in0_h
|
||||
VPAND mask, in0_h, in0_h
|
||||
VPAND mask, in0, in0
|
||||
VPSHUFB in0_h, high_tbl, in0_h
|
||||
VPSHUFB in0, low_tbl, in0
|
||||
VPXOR in0, in0_h, in0
|
||||
VPXOR (out)(pos*1), in0, in0
|
||||
VMOVDQU in0, (out)(pos*1)
|
||||
|
||||
VMOVDQU 32(in)(pos*1), in1
|
||||
VPSRLQ $4, in1, in1_h
|
||||
VPAND mask, in1_h, in1_h
|
||||
VPAND mask, in1, in1
|
||||
VPSHUFB in1_h, high_tbl, in1_h
|
||||
VPSHUFB in1, low_tbl, in1
|
||||
VPXOR in1, in1_h, in1
|
||||
VPXOR 32(out)(pos*1), in1, in1
|
||||
VMOVDQU in1, 32(out)(pos*1)
|
||||
|
||||
VMOVDQU 64(in)(pos*1), in2
|
||||
VPSRLQ $4, in2, in2_h
|
||||
VPAND mask, in2_h, in2_h
|
||||
VPAND mask, in2, in2
|
||||
VPSHUFB in2_h, high_tbl, in2_h
|
||||
VPSHUFB in2, low_tbl, in2
|
||||
VPXOR in2, in2_h, in2
|
||||
VPXOR 64(out)(pos*1), in2, in2
|
||||
VMOVDQU in2, 64(out)(pos*1)
|
||||
|
||||
VMOVDQU 96(in)(pos*1), in3
|
||||
VPSRLQ $4, in3, in3_h
|
||||
VPAND mask, in3_h, in3_h
|
||||
VPAND mask, in3, in3
|
||||
VPSHUFB in3_h, high_tbl, in3_h
|
||||
VPSHUFB in3, low_tbl, in3
|
||||
VPXOR in3, in3_h, in3
|
||||
VPXOR 96(out)(pos*1), in3, in3
|
||||
VMOVDQU in3, 96(out)(pos*1)
|
||||
|
||||
VMOVDQU 128(in)(pos*1), in4
|
||||
VPSRLQ $4, in4, in4_h
|
||||
VPAND mask, in4_h, in4_h
|
||||
VPAND mask, in4, in4
|
||||
VPSHUFB in4_h, high_tbl, in4_h
|
||||
VPSHUFB in4, low_tbl, in4
|
||||
VPXOR in4, in4_h, in4
|
||||
VPXOR 128(out)(pos*1), in4, in4
|
||||
VMOVDQU in4, 128(out)(pos*1)
|
||||
|
||||
VMOVDQU 160(in)(pos*1), in5
|
||||
VPSRLQ $4, in5, in5_h
|
||||
VPAND mask, in5_h, in5_h
|
||||
VPAND mask, in5, in5
|
||||
VPSHUFB in5_h, high_tbl, in5_h
|
||||
VPSHUFB in5, low_tbl, in5
|
||||
VPXOR in5, in5_h, in5
|
||||
VPXOR 160(out)(pos*1), in5, in5
|
||||
VMOVDQU in5, 160(out)(pos*1)
|
||||
|
||||
VMOVDQU 192(in)(pos*1), in0
|
||||
VPSRLQ $4, in0, in0_h
|
||||
VPAND mask, in0_h, in0_h
|
||||
VPAND mask, in0, in0
|
||||
VPSHUFB in0_h, high_tbl, in0_h
|
||||
VPSHUFB in0, low_tbl, in0
|
||||
VPXOR in0, in0_h, in0
|
||||
VPXOR 192(out)(pos*1), in0, in0
|
||||
VMOVDQU in0, 192(out)(pos*1)
|
||||
|
||||
VMOVDQU 224(in)(pos*1), in1
|
||||
VPSRLQ $4, in1, in1_h
|
||||
VPAND mask, in1_h, in1_h
|
||||
VPAND mask, in1, in1
|
||||
VPSHUFB in1_h, high_tbl, in1_h
|
||||
VPSHUFB in1, low_tbl, in1
|
||||
VPXOR in1, in1_h, in1
|
||||
VPXOR 224(out)(pos*1), in1, in1
|
||||
VMOVDQU in1, 224(out)(pos*1)
|
||||
|
||||
ADDQ $256, pos
|
||||
CMPQ len, pos
|
||||
JNE loop256b
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
not_aligned:
|
||||
MOVQ len, tmp0
|
||||
ANDQ $255, tmp0
|
||||
|
||||
loop32b:
|
||||
VMOVDQU -32(in)(len*1), in0
|
||||
VPSRLQ $4, in0, in0_h
|
||||
VPAND mask, in0_h, in0_h
|
||||
VPAND mask, in0, in0
|
||||
VPSHUFB in0_h, high_tbl, in0_h
|
||||
VPSHUFB in0, low_tbl, in0
|
||||
VPXOR in0, in0_h, in0
|
||||
VPXOR -32(out)(len*1), in0, in0
|
||||
VMOVDQU in0, -32(out)(len*1)
|
||||
SUBQ $32, len
|
||||
SUBQ $32, tmp0
|
||||
JG loop32b
|
||||
CMPQ len, $256
|
||||
JGE aligned
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
one16b:
|
||||
VMOVDQU -16(in)(len*1), in0x
|
||||
VPSRLQ $4, in0x, in0_hx
|
||||
VPAND maskx, in0x, in0x
|
||||
VPAND maskx, in0_hx, in0_hx
|
||||
VPSHUFB in0_hx, high_tblx, in0_hx
|
||||
VPSHUFB in0x, low_tblx, in0x
|
||||
VPXOR in0x, in0_hx, in0x
|
||||
VPXOR -16(out)(len*1), in0x, in0x
|
||||
VMOVDQU in0x, -16(out)(len*1)
|
||||
SUBQ $16, len
|
||||
CMPQ len, $0
|
||||
JNE ymm
|
||||
RET
|
||||
|
||||
// func mulVectSSSE3(tbl, d, p []byte)
|
||||
TEXT ·mulVectSSSE3(SB), NOSPLIT, $0
|
||||
MOVQ i+24(FP), in
|
||||
MOVQ o+48(FP), out
|
||||
MOVQ tbl+0(FP), tmp0
|
||||
MOVOU (tmp0), low_tblx
|
||||
MOVOU 16(tmp0), high_tblx
|
||||
MOVB $15, tmp0
|
||||
MOVQ tmp0, maskx
|
||||
PXOR tmp0x, tmp0x
|
||||
PSHUFB tmp0x, maskx
|
||||
MOVQ in_len+32(FP), len
|
||||
SHRQ $4, len
|
||||
|
||||
loop:
|
||||
MOVOU (in), in0x
|
||||
MOVOU in0x, in0_hx
|
||||
PSRLQ $4, in0_hx
|
||||
PAND maskx, in0x
|
||||
PAND maskx, in0_hx
|
||||
MOVOU low_tblx, tmp1x
|
||||
MOVOU high_tblx, tmp2x
|
||||
PSHUFB in0x, tmp1x
|
||||
PSHUFB in0_hx, tmp2x
|
||||
PXOR tmp1x, tmp2x
|
||||
MOVOU tmp2x, (out)
|
||||
ADDQ $16, in
|
||||
ADDQ $16, out
|
||||
SUBQ $1, len
|
||||
JNZ loop
|
||||
RET
|
||||
|
||||
// func mulVectAddSSSE3(tbl, d, p []byte)
|
||||
TEXT ·mulVectAddSSSE3(SB), NOSPLIT, $0
|
||||
MOVQ i+24(FP), in
|
||||
MOVQ o+48(FP), out
|
||||
MOVQ tbl+0(FP), tmp0
|
||||
MOVOU (tmp0), low_tblx
|
||||
MOVOU 16(tmp0), high_tblx
|
||||
MOVB $15, tmp0
|
||||
MOVQ tmp0, maskx
|
||||
PXOR tmp0x, tmp0x
|
||||
PSHUFB tmp0x, maskx
|
||||
MOVQ in_len+32(FP), len
|
||||
SHRQ $4, len
|
||||
|
||||
loop:
|
||||
MOVOU (in), in0x
|
||||
MOVOU in0x, in0_hx
|
||||
PSRLQ $4, in0_hx
|
||||
PAND maskx, in0x
|
||||
PAND maskx, in0_hx
|
||||
MOVOU low_tblx, tmp1x
|
||||
MOVOU high_tblx, tmp2x
|
||||
PSHUFB in0x, tmp1x
|
||||
PSHUFB in0_hx, tmp2x
|
||||
PXOR tmp1x, tmp2x
|
||||
MOVOU (out), tmp3x
|
||||
PXOR tmp3x, tmp2x
|
||||
MOVOU tmp2x, (out)
|
||||
ADDQ $16, in
|
||||
ADDQ $16, out
|
||||
SUBQ $1, len
|
||||
JNZ loop
|
||||
RET
|
||||
|
||||
// func copy32B(dst, src []byte)
|
||||
TEXT ·copy32B(SB), NOSPLIT, $0
|
||||
MOVQ dst+0(FP), SI
|
||||
MOVQ src+24(FP), DX
|
||||
MOVOU (DX), X0
|
||||
MOVOU 16(DX), X1
|
||||
MOVOU X0, (SI)
|
||||
MOVOU X1, 16(SI)
|
||||
RET
|
||||
|
||||
8
vendor/github.com/templexxx/reedsolomon/rs_other.go
generated
vendored
8
vendor/github.com/templexxx/reedsolomon/rs_other.go
generated
vendored
@@ -1,8 +0,0 @@
|
||||
// +build !amd64
|
||||
|
||||
package reedsolomon
|
||||
|
||||
func newRS(d, p int, em matrix) (enc Encoder) {
|
||||
g := em[d*d:]
|
||||
return &encBase{data: d, parity: p, encode: em, gen: g}
|
||||
}
|
||||
44
vendor/github.com/templexxx/reedsolomon/tbl.go
generated
vendored
44
vendor/github.com/templexxx/reedsolomon/tbl.go
generated
vendored
File diff suppressed because one or more lines are too long
14
vendor/github.com/templexxx/xor/avx2_amd64.s
generated
vendored
14
vendor/github.com/templexxx/xor/avx2_amd64.s
generated
vendored
@@ -45,6 +45,7 @@ loop32b:
|
||||
ADDQ $32, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop32b
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
@@ -113,6 +114,7 @@ loop128b:
|
||||
ADDQ $128, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop128b
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
@@ -182,6 +184,7 @@ loop128b:
|
||||
CMPQ LEN, POS
|
||||
JNE loop128b
|
||||
SFENCE
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
@@ -265,6 +268,7 @@ next_vect:
|
||||
ADDQ $128, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop128b
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
@@ -371,6 +375,7 @@ next_vect:
|
||||
ADDQ $128, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop128b
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
@@ -431,12 +436,3 @@ next_vect_8b:
|
||||
ret:
|
||||
RET
|
||||
|
||||
TEXT ·hasAVX2(SB), NOSPLIT, $0
|
||||
XORQ AX, AX
|
||||
XORQ CX, CX
|
||||
ADDL $7, AX
|
||||
CPUID
|
||||
SHRQ $5, BX
|
||||
ANDQ $1, BX
|
||||
MOVB BX, ret+0(FP)
|
||||
RET
|
||||
|
||||
4
vendor/github.com/templexxx/xor/xor_amd64.go
generated
vendored
4
vendor/github.com/templexxx/xor/xor_amd64.go
generated
vendored
@@ -1,11 +1,13 @@
|
||||
package xor
|
||||
|
||||
import "github.com/templexxx/cpufeat"
|
||||
|
||||
func init() {
|
||||
getEXT()
|
||||
}
|
||||
|
||||
func getEXT() {
|
||||
if hasAVX2() {
|
||||
if cpufeat.X86.HasAVX2 {
|
||||
extension = avx2
|
||||
} else {
|
||||
extension = sse2
|
||||
|
||||
Reference in New Issue
Block a user