vendor: Update everything

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4620
This commit is contained in:
Jakob Borg
2017-12-29 11:38:00 +00:00
parent 1296a22069
commit c24bf7ea55
1070 changed files with 294926 additions and 488191 deletions

View File

@@ -1,23 +0,0 @@
MIT License
Copyright (c) 2017 Templexxx
Copyright (c) 2015 Klaus Post
Copyright (c) 2015 Backblaze
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -1,193 +0,0 @@
package main
import (
"flag"
"fmt"
"os"
)
var vects = flag.Uint64("vects", 20, "number of vects (data+parity)")
var data = flag.Uint64("data", 0, "number of data vects; keep it empty if you want to "+
"get the max num of inverse matrix")
func init() {
flag.Usage = func() {
fmt.Printf("Usage of %s:\n", os.Args[0])
fmt.Println(" cntinverse [-flags]")
fmt.Println(" Valid flags:")
flag.PrintDefaults()
}
}
func main() {
flag.Parse()
if *vects > 256 {
fmt.Println("Error: vects must <= 256")
os.Exit(1)
}
if *data == 0 {
n := getMAXCCombination(*vects)
fmt.Println("max num of inverse matrix :", n)
os.Exit(0)
}
n := getCCombination(*vects, *data)
fmt.Println("num of inverse matrix:", n)
os.Exit(0)
}
func getMAXCCombination(a uint64) uint64 {
b := a / 2 // proved in mathtool/combination.jpg
return getCCombination(a, b)
}
func getCCombination(a, b uint64) uint64 {
top := make([]uint64, a-b)
bottom := make([]uint64, a-b-1)
for i := b + 1; i <= a; i++ {
top[i-b-1] = i
}
var i uint64
for i = 2; i <= a-b; i++ {
bottom[i-2] = i
}
for j := 0; j <= 5; j++ {
cleanEven(top, bottom)
clean3(top, bottom)
clean5(top, bottom)
}
cleanCoffeRound1(top, bottom)
if maxBottomBigger5more1(bottom) {
top = shuffTop(top)
cleanCoffeRound1(top, bottom)
cleanCoffeRound1(bottom, top)
cleanCoffeRound1(top, bottom)
cleanCoffeRound1(bottom, top)
cleanCoffeRound1(top, bottom)
cleanCoffeRound1(bottom, top)
}
var topV, bottomV uint64 = 1, 1
for _, t := range top {
topV = topV * t
}
for _, b := range bottom {
bottomV = bottomV * b
}
return topV / bottomV
}
func cleanEven(top, bottom []uint64) {
for i, b := range bottom {
if even(b) {
for j, t := range top {
if even(t) {
top[j] = t / 2
bottom[i] = b / 2
break
}
}
}
}
}
func even(a uint64) bool {
return a&1 == 0
}
func clean3(top, bottom []uint64) {
for i, b := range bottom {
if mod3(b) {
for j, t := range top {
if mod3(t) {
top[j] = t / 3
bottom[i] = b / 3
break
}
}
}
}
}
func mod3(a uint64) bool {
c := a / 3
if 3*c == a {
return true
}
return false
}
func clean5(top, bottom []uint64) {
for i, b := range bottom {
if mod5(b) {
for j, t := range top {
if mod5(t) {
top[j] = t / 5
bottom[i] = b / 5
break
}
}
}
}
}
func mod5(a uint64) bool {
c := a / 5
if 5*c == a {
return true
}
return false
}
func maxBottomBigger5more1(bottom []uint64) bool {
cnt := 0
for _, b := range bottom {
if b >= 5 {
cnt++
}
}
if cnt >= 2 {
return true
}
return false
}
func cleanCoffeRound1(top, bottom []uint64) {
for i, b := range bottom {
for j, t := range top {
if isCoffe(b, t) {
top[j] = t / b
bottom[i] = 1
break
}
}
}
}
func isCoffe(b, t uint64) bool {
c := t / b
if c*b == t {
return true
}
return false
}
func shuffTop(top []uint64) []uint64 {
var tmp uint64 = 1
newLen := len(top) + 1
for i, t := range top {
if t <= 5 {
tmp = tmp * t
newLen--
top[i] = 1
}
}
topNew := make([]uint64, newLen)
topNew[0] = tmp
cnt := 1
for _, t := range top {
if t != 1 {
topNew[cnt] = t
cnt++
}
}
return topNew
}

View File

@@ -1,270 +0,0 @@
package main
import (
"bufio"
"fmt"
"log"
"os"
"strconv"
"strings"
)
// set deg here
const deg = 8 // <= 8
type polynomial [deg + 1]byte
func main() {
f, err := os.OpenFile("tables", os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
log.Fatalln(err)
}
defer f.Close()
outputWriter := bufio.NewWriter(f)
ps := genPrimitivePolynomial()
title := strconv.FormatInt(int64(deg), 10) + " degree primitive polynomial\n"
var pss string
for i, p := range ps {
pf := formatPolynomial(p)
pf = strconv.FormatInt(int64(i+1), 10) + ". " + pf + ";\n"
pss = pss + pf
}
body := fmt.Sprintf(title+"%v", pss)
outputWriter.WriteString(body)
//set primitive polynomial here to generator tables
//x^8+x^4+x^3+x^2+1
var primitivePolynomial polynomial
primitivePolynomial[0] = 1
primitivePolynomial[2] = 1
primitivePolynomial[3] = 1
primitivePolynomial[4] = 1
primitivePolynomial[8] = 1
lenExpTable := (1 << deg) - 1
expTable := genExpTable(primitivePolynomial, lenExpTable)
body = fmt.Sprintf("expTbl: %#v\n", expTable)
outputWriter.WriteString(body)
logTable := genLogTable(expTable)
body = fmt.Sprintf("logTbl: %#v\n", logTable)
outputWriter.WriteString(body)
mulTable := genMulTable(expTable, logTable)
body = fmt.Sprintf("mulTbl: %#v\n", mulTable)
outputWriter.WriteString(body)
lowTable, highTable := genMulTableHalf(mulTable)
body = fmt.Sprintf("lowTbl: %#v\n", lowTable)
outputWriter.WriteString(body)
body = fmt.Sprintf("highTbl: %#v\n", highTable)
outputWriter.WriteString(body)
var combTable [256][32]byte
for i := range combTable {
l := lowTable[i]
for j := 0; j < 16; j++ {
combTable[i][j] = l[j]
}
h := highTable[i][:]
for k := 16; k < 32; k++ {
combTable[i][k] = h[k-16]
}
}
body = fmt.Sprintf("lowhighTbl: %#v\n", combTable)
outputWriter.WriteString(body)
inverseTable := genInverseTable(mulTable)
body = fmt.Sprintf("inverseTbl: %#v\n", inverseTable)
outputWriter.WriteString(body)
outputWriter.Flush()
}
// generate primitive Polynomial
func genPrimitivePolynomial() []polynomial {
// drop Polynomial xso the constant term must be 1
// so there are 2^(deg-1) Polynomials
cnt := 1 << (deg - 1)
var polynomials []polynomial
var p polynomial
p[0] = 1
p[deg] = 1
// gen all Polynomials
for i := 0; i < cnt; i++ {
p = genPolynomial(p, 1)
polynomials = append(polynomials, p)
}
// drop Polynomial x+1, so the cnt of Polynomials is odd
var psRaw []polynomial
for _, p := range polynomials {
var n int
for _, v := range p {
if v == 1 {
n++
}
}
if n&1 != 0 {
psRaw = append(psRaw, p)
}
}
// order of primitive element == 2^deg -1 ?
var ps []polynomial
for _, p := range psRaw {
lenTable := (1 << deg) - 1
table := genExpTable(p, lenTable)
var numOf1 int
for _, v := range table {
// cnt 1 in ExpTable
if int(v) == 1 {
numOf1++
}
}
if numOf1 == 1 {
ps = append(ps, p)
}
}
return ps
}
func genPolynomial(p polynomial, i int) polynomial {
if p[i] == 0 {
p[i] = 1
} else {
p[i] = 0
i++
if i == deg {
return p
}
p = genPolynomial(p, i)
}
return p
}
func genExpTable(primitivePolynomial polynomial, exp int) []byte {
table := make([]byte, exp)
var rawPolynomial polynomial
rawPolynomial[1] = 1
table[0] = byte(1)
table[1] = byte(2)
for i := 2; i < exp; i++ {
rawPolynomial = expGrowPolynomial(rawPolynomial, primitivePolynomial)
table[i] = byte(getValueOfPolynomial(rawPolynomial))
}
return table
}
func expGrowPolynomial(raw, primitivePolynomial polynomial) polynomial {
var newP polynomial
for i, v := range raw[:deg] {
if v == 1 {
newP[i+1] = 1
}
}
if newP[deg] == 1 {
for i, v := range primitivePolynomial[:deg] {
if v == 1 {
if newP[i] == 1 {
newP[i] = 0
} else {
newP[i] = 1
}
}
}
}
newP[deg] = 0
return newP
}
func getValueOfPolynomial(p polynomial) uint8 {
var v uint8
for i, coefficient := range p[:deg] {
if coefficient != 0 {
add := 1 << uint8(i)
v += uint8(add)
}
}
return v
}
func genLogTable(expTable []byte) []byte {
table := make([]byte, (1 << deg))
//table[0] 无法由本原元的幂得到
table[0] = 0
for i, v := range expTable {
table[v] = byte(i)
}
return table
}
func genMulTable(expTable, logTable []byte) [256][256]byte {
var result [256][256]byte
for a := range result {
for b := range result[a] {
if a == 0 || b == 0 {
result[a][b] = 0
continue
}
logA := int(logTable[a])
logB := int(logTable[b])
logSum := logA + logB
for logSum >= 255 {
logSum -= 255
}
result[a][b] = expTable[logSum]
}
}
return result
}
func genMulTableHalf(mulTable [256][256]byte) (low [256][16]byte, high [256][16]byte) {
for a := range low {
for b := range low {
//result := 0
var result byte
if !(a == 0 || b == 0) {
//result = int(mulTable[a][b])
result = mulTable[a][b]
}
// b & 00001111, [0,15]
if (b & 0xf) == b {
low[a][b] = result
}
// b & 11110000, [240,255]
if (b & 0xf0) == b {
high[a][b>>4] = result
}
}
}
return
}
func genInverseTable(mulTable [256][256]byte) [256]byte {
var inVerseTable [256]byte
for i, t := range mulTable {
for j, v := range t {
if int(v) == 1 {
inVerseTable[i] = byte(j)
}
}
}
return inVerseTable
}
func formatPolynomial(p polynomial) string {
var ps string
for i := deg; i > 1; i-- {
if p[i] == 1 {
ps = ps + "x^" + strconv.FormatInt(int64(i), 10) + "+"
}
}
if p[1] == 1 {
ps = ps + "x+"
}
if p[0] == 1 {
ps = ps + "1"
} else {
strings.TrimSuffix(ps, "+")
}
return ps
}

View File

@@ -1,156 +0,0 @@
package reedsolomon
import "errors"
type matrix []byte
func genEncMatrixCauchy(d, p int) matrix {
t := d + p
m := make([]byte, t*d)
for i := 0; i < d; i++ {
m[i*d+i] = byte(1)
}
d2 := d * d
for i := d; i < t; i++ {
for j := 0; j < d; j++ {
d := i ^ j
a := inverseTbl[d]
m[d2] = byte(a)
d2++
}
}
return m
}
func gfExp(b byte, n int) byte {
if n == 0 {
return 1
}
if b == 0 {
return 0
}
a := logTbl[b]
ret := int(a) * n
for ret >= 255 {
ret -= 255
}
return byte(expTbl[ret])
}
func genVandMatrix(vm []byte, t, d int) {
for i := 0; i < t; i++ {
for j := 0; j < d; j++ {
vm[i*d+j] = gfExp(byte(i), j)
}
}
}
func (m matrix) mul(right matrix, rows, cols int, r []byte) {
for i := 0; i < rows; i++ {
for j := 0; j < cols; j++ {
var v byte
for k := 0; k < cols; k++ {
v ^= gfMul(m[i*cols+k], right[k*cols+j])
}
r[i*cols+j] = v
}
}
}
func genEncMatrixVand(d, p int) (matrix, error) {
t := d + p
buf := make([]byte, (2*t+4*d)*d)
vm := buf[:t*d]
genVandMatrix(vm, t, d)
top := buf[t*d : (t+d)*d]
copy(top, vm[:d*d])
raw := buf[(t+d)*d : (t+3*d)*d]
im := buf[(t+3*d)*d : (t+4*d)*d]
err := matrix(top).invert(raw, d, im)
if err != nil {
return nil, err
}
r := buf[(t+4*d)*d : (2*t+4*d)*d]
matrix(vm).mul(im, t, d, r)
return matrix(r), nil
}
// [I|m'] -> [m']
func (m matrix) subMatrix(n int, r []byte) {
for i := 0; i < n; i++ {
off := i * n
copy(r[off:off+n], m[2*off+n:2*(off+n)])
}
}
func (m matrix) invert(raw matrix, n int, im []byte) error {
// [m] -> [m|I]
for i := 0; i < n; i++ {
t := i * n
copy(raw[2*t:2*t+n], m[t:t+n])
raw[2*t+i+n] = byte(1)
}
err := gauss(raw, n)
if err != nil {
return err
}
raw.subMatrix(n, im)
return nil
}
func (m matrix) swap(i, j, n int) {
for k := 0; k < n; k++ {
m[i*n+k], m[j*n+k] = m[j*n+k], m[i*n+k]
}
}
func gfMul(a, b byte) byte {
return mulTbl[a][b]
}
var errSingular = errors.New("rs.invert: matrix is singular")
// [m|I] -> [I|m']
func gauss(m matrix, n int) error {
n2 := 2 * n
for i := 0; i < n; i++ {
if m[i*n2+i] == 0 {
for j := i + 1; j < n; j++ {
if m[j*n2+i] != 0 {
m.swap(i, j, n2)
break
}
}
}
if m[i*n2+i] == 0 {
return errSingular
}
if m[i*n2+i] != 1 {
d := m[i*n2+i]
scale := inverseTbl[d]
for c := 0; c < n2; c++ {
m[i*n2+c] = gfMul(m[i*n2+c], scale)
}
}
for j := i + 1; j < n; j++ {
if m[j*n2+i] != 0 {
scale := m[j*n2+i]
for c := 0; c < n2; c++ {
m[j*n2+c] ^= gfMul(scale, m[i*n2+c])
}
}
}
}
for k := 0; k < n; k++ {
for j := 0; j < k; j++ {
if m[j*n2+k] != 0 {
scale := m[j*n2+k]
for c := 0; c < n2; c++ {
m[j*n2+c] ^= gfMul(scale, m[k*n2+c])
}
}
}
}
return nil
}

View File

@@ -1,280 +0,0 @@
/*
Reed-Solomon Codes over GF(2^8)
Primitive Polynomial: x^8+x^4+x^3+x^2+1
Galois Filed arithmetic using Intel SIMD instructions (AVX2 or SSSE3)
*/
package reedsolomon
import "errors"
// Encoder implements for Reed-Solomon Encoding/Reconstructing
type Encoder interface {
// Encode multiply generator-matrix with data
// len(vects) must be equal with num of data+parity
Encode(vects [][]byte) error
// Result of reconst will be put into origin position of vects
// it means if you lost vects[0], after reconst the vects[0]'s data will be back in vects[0]
// Reconstruct repair lost data & parity
// Set vect nil if lost
Reconstruct(vects [][]byte) error
// Reconstruct repair lost data
// Set vect nil if lost
ReconstructData(vects [][]byte) error
// ReconstWithPos repair lost data&parity with has&lost vects position
// Save bandwidth&disk I/O (cmp with Reconstruct, if the lost is less than num of parity)
// As erasure codes, we must know which vect is broken,
// so it's necessary to provide such APIs
// len(has) must equal num of data vects
// Example:
// in 3+2, the whole position: [0,1,2,3,4]
// if lost vects[0]
// the "has" could be [1,2,3] or [1,2,4] or ...
// then you must be sure that vects[1] vects[2] vects[3] have correct data (if the "has" is [1,2,3])
// the "dLost" will be [0]
// ps:
// 1. the above lists are in increasing orders TODO support out-of-order
// 2. each vect has same len, don't set it nil
// so we don't need to make slice
ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error
//// ReconstWithPos repair lost data with survived&lost vects position
//// Don't need to append position of parity lost into "lost"
ReconstDataWithPos(vects [][]byte, has, dLost []int) error
}
func checkCfg(d, p int) error {
if (d <= 0) || (p <= 0) {
return errors.New("rs.New: data or parity <= 0")
}
if d+p >= 256 {
return errors.New("rs.New: data+parity >= 256")
}
return nil
}
// New create an Encoder (vandermonde matrix as Encoding matrix)
func New(data, parity int) (enc Encoder, err error) {
err = checkCfg(data, parity)
if err != nil {
return
}
e, err := genEncMatrixVand(data, parity)
if err != nil {
return
}
return newRS(data, parity, e), nil
}
// NewCauchy create an Encoder (cauchy matrix as Generator Matrix)
func NewCauchy(data, parity int) (enc Encoder, err error) {
err = checkCfg(data, parity)
if err != nil {
return
}
e := genEncMatrixCauchy(data, parity)
return newRS(data, parity, e), nil
}
type encBase struct {
data int
parity int
encode []byte
gen []byte
}
func checkEnc(d, p int, vs [][]byte) (size int, err error) {
total := len(vs)
if d+p != total {
err = errors.New("rs.checkER: vects not match rs args")
return
}
size = len(vs[0])
if size == 0 {
err = errors.New("rs.checkER: vects size = 0")
return
}
for i := 1; i < total; i++ {
if len(vs[i]) != size {
err = errors.New("rs.checkER: vects size mismatch")
return
}
}
return
}
func (e *encBase) Encode(vects [][]byte) (err error) {
d := e.data
p := e.parity
_, err = checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i], pv[j])
} else {
mulVect(g[j*d], dv[0], pv[j])
}
}
}
return
}
func mulVect(c byte, a, b []byte) {
t := mulTbl[c]
for i := 0; i < len(a); i++ {
b[i] = t[a[i]]
}
}
func mulVectAdd(c byte, a, b []byte) {
t := mulTbl[c]
for i := 0; i < len(a); i++ {
b[i] ^= t[a[i]]
}
}
func (e *encBase) Reconstruct(vects [][]byte) (err error) {
return e.reconstruct(vects, false)
}
func (e *encBase) ReconstructData(vects [][]byte) (err error) {
return e.reconstruct(vects, true)
}
func (e *encBase) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
return e.reconstWithPos(vects, has, dLost, pLost, false)
}
func (e *encBase) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
return e.reconstWithPos(vects, has, dLost, nil, true)
}
func (e *encBase) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
em := e.encode
dCnt := len(dLost)
size := len(vects[has[0]])
if dCnt != 0 {
vtmp := make([][]byte, d+dCnt)
for i, p := range has {
vtmp[i] = vects[p]
}
for i, p := range dLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
matrixbuf := make([]byte, 4*d*d+dCnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return err2
}
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
etmp := &encBase{data: d, parity: dCnt, gen: g}
err2 = etmp.Encode(vtmp[:d+dCnt])
if err2 != nil {
return err2
}
}
if dataOnly {
return
}
pCnt := len(pLost)
if pCnt != 0 {
vtmp := make([][]byte, d+pCnt)
g := make([]byte, pCnt*d)
for i, l := range pLost {
copy(g[i*d:i*d+d], em[l*d:l*d+d])
}
for i := 0; i < d; i++ {
vtmp[i] = vects[i]
}
for i, p := range pLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
etmp := &encBase{data: d, parity: pCnt, gen: g}
err2 := etmp.Encode(vtmp[:d+pCnt])
if err2 != nil {
return err2
}
}
return
}
func (e *encBase) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
p := e.parity
// TODO check more, maybe element in has show in lost & deal with len(has) > d
if len(has) != d {
return errors.New("rs.Reconst: not enough vects")
}
dCnt := len(dLost)
if dCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
pCnt := len(pLost)
if pCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
return e.reconst(vects, has, dLost, pLost, dataOnly)
}
func (e *encBase) reconstruct(vects [][]byte, dataOnly bool) (err error) {
d := e.data
p := e.parity
t := d + p
listBuf := make([]int, t+p)
has := listBuf[:d]
dLost := listBuf[d:t]
pLost := listBuf[t : t+p]
hasCnt, dCnt, pCnt := 0, 0, 0
for i := 0; i < t; i++ {
if vects[i] != nil {
if hasCnt < d {
has[hasCnt] = i
hasCnt++
}
} else {
if i < d {
if dCnt < p {
dLost[dCnt] = i
dCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
} else {
if pCnt < p {
pLost[pCnt] = i
pCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
}
}
}
if hasCnt != d {
return errors.New("rs.Reconst: not enough vects")
}
dLost = dLost[:dCnt]
pLost = pLost[:pCnt]
return e.reconst(vects, has, dLost, pLost, dataOnly)
}

View File

@@ -1,868 +0,0 @@
package reedsolomon
import (
"errors"
"sync"
"github.com/templexxx/cpufeat"
)
// SIMD Instruction Extensions
const (
none = iota
avx2
ssse3
)
var extension = none
func init() {
getEXT()
}
func getEXT() {
if cpufeat.X86.HasAVX2 {
extension = avx2
return
} else if cpufeat.X86.HasSSSE3 {
extension = ssse3
return
} else {
extension = none
return
}
}
//go:noescape
func copy32B(dst, src []byte) // Need SSE2(introduced in 2001)
func initTbl(g matrix, rows, cols int, tbl []byte) {
off := 0
for i := 0; i < cols; i++ {
for j := 0; j < rows; j++ {
c := g[j*cols+i]
t := lowhighTbl[c][:]
copy32B(tbl[off:off+32], t)
off += 32
}
}
}
// At most 3060 inverse matrix (when data=14, parity=4, calc by mathtool/cntinverse)
// In practice, data usually below 12, parity below 5
func okCache(data, parity int) bool {
if data < 15 && parity < 5 { // you can change it, but the data+parity can't be bigger than 32 (tips: see the codes about make inverse matrix)
return true
}
return false
}
type (
encSSSE3 encSIMD
encAVX2 encSIMD
encSIMD struct {
data int
parity int
encode matrix
gen matrix
tbl []byte
// inverse matrix cache is design for small vect size ( < 4KB )
// it will save time for calculating inverse matrix
// but it's not so important for big vect size
enableCache bool
inverseCache iCache
}
iCache struct {
sync.RWMutex
data map[uint32][]byte
}
)
func newRS(d, p int, em matrix) (enc Encoder) {
g := em[d*d:]
if extension == none {
return &encBase{data: d, parity: p, encode: em, gen: g}
}
t := make([]byte, d*p*32)
initTbl(g, p, d, t)
ok := okCache(d, p)
if extension == avx2 {
e := &encAVX2{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
inverseCache: iCache{data: make(map[uint32][]byte)}}
return e
}
e := &encSSSE3{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
inverseCache: iCache{data: make(map[uint32][]byte)}}
return e
}
// Size of sub-vector
const unit int = 16 * 1024
func getDo(n int) int {
if n < unit {
c := n >> 4
if c == 0 {
return unit
}
return c << 4
}
return unit
}
func (e *encAVX2) Encode(vects [][]byte) (err error) {
d := e.data
p := e.parity
size, err := checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
start, end := 0, 0
do := getDo(size)
for start < size {
end = start + do
if end <= size {
e.matrixMul(start, end, dv, pv)
start = end
} else {
e.matrixMulRemain(start, size, dv, pv)
start = size
}
}
return
}
//go:noescape
func mulVectAVX2(tbl, d, p []byte)
//go:noescape
func mulVectAddAVX2(tbl, d, p []byte)
func (e *encAVX2) matrixMul(start, end int, dv, pv [][]byte) {
d := e.data
p := e.parity
tbl := e.tbl
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
} else {
mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
}
off += 32
}
}
}
func (e *encAVX2) matrixMulRemain(start, end int, dv, pv [][]byte) {
undone := end - start
do := (undone >> 4) << 4
d := e.data
p := e.parity
tbl := e.tbl
if do >= 16 {
end2 := start + do
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
} else {
mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
}
off += 32
}
}
start = end
}
if undone > do {
// may recalculate some data, but still improve a lot
start2 := end - 16
if start2 >= 0 {
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
} else {
mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
}
off += 32
}
}
} else {
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
} else {
mulVect(g[j*d], dv[0][start:], pv[j][start:])
}
}
}
}
}
}
// use generator-matrix but not tbls for encoding
// it's design for reconstructing
// for small vects, it cost to much time on initTbl, so drop it
// and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
func (e *encAVX2) encodeGen(vects [][]byte) (err error) {
d := e.data
p := e.parity
size, err := checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
start, end := 0, 0
do := getDo(size)
for start < size {
end = start + do
if end <= size {
e.matrixMulGen(start, end, dv, pv)
start = end
} else {
e.matrixMulRemainGen(start, size, dv, pv)
start = size
}
}
return
}
func (e *encAVX2) matrixMulGen(start, end int, dv, pv [][]byte) {
d := e.data
p := e.parity
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
} else {
mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
}
}
}
}
func (e *encAVX2) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
undone := end - start
do := (undone >> 4) << 4
d := e.data
p := e.parity
g := e.gen
if do >= 16 {
end2 := start + do
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
} else {
mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
}
}
}
start = end
}
if undone > do {
start2 := end - 16
if start2 >= 0 {
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
} else {
mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
}
}
}
} else {
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
} else {
mulVect(g[j*d], dv[0][start:], pv[j][start:])
}
}
}
}
}
}
func (e *encAVX2) Reconstruct(vects [][]byte) (err error) {
return e.reconstruct(vects, false)
}
func (e *encAVX2) ReconstructData(vects [][]byte) (err error) {
return e.reconstruct(vects, true)
}
func (e *encAVX2) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
return e.reconstWithPos(vects, has, dLost, pLost, false)
}
func (e *encAVX2) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
return e.reconstWithPos(vects, has, dLost, nil, true)
}
func (e *encAVX2) makeGen(has, dLost []int) (gen []byte, err error) {
d := e.data
em := e.encode
cnt := len(dLost)
if !e.enableCache {
matrixbuf := make([]byte, 4*d*d+cnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return nil, err2
}
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
return g, nil
}
var ikey uint32
for _, p := range has {
ikey += 1 << uint8(p)
}
e.inverseCache.RLock()
v, ok := e.inverseCache.data[ikey]
if ok {
im := v
g := make([]byte, cnt*d)
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
e.inverseCache.RUnlock()
return g, nil
}
e.inverseCache.RUnlock()
matrixbuf := make([]byte, 4*d*d+cnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return nil, err2
}
e.inverseCache.Lock()
e.inverseCache.data[ikey] = im
e.inverseCache.Unlock()
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
return g, nil
}
func (e *encAVX2) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
em := e.encode
dCnt := len(dLost)
size := len(vects[has[0]])
if dCnt != 0 {
vtmp := make([][]byte, d+dCnt)
for i, p := range has {
vtmp[i] = vects[p]
}
for i, p := range dLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
g, err2 := e.makeGen(has, dLost)
if err2 != nil {
return
}
etmp := &encAVX2{data: d, parity: dCnt, gen: g}
err2 = etmp.encodeGen(vtmp)
if err2 != nil {
return err2
}
}
if dataOnly {
return
}
pCnt := len(pLost)
if pCnt != 0 {
g := make([]byte, pCnt*d)
for i, l := range pLost {
copy(g[i*d:i*d+d], em[l*d:l*d+d])
}
vtmp := make([][]byte, d+pCnt)
for i := 0; i < d; i++ {
vtmp[i] = vects[i]
}
for i, p := range pLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
etmp := &encAVX2{data: d, parity: pCnt, gen: g}
err2 := etmp.encodeGen(vtmp)
if err2 != nil {
return err2
}
}
return
}
func (e *encAVX2) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
p := e.parity
if len(has) != d {
return errors.New("rs.Reconst: not enough vects")
}
dCnt := len(dLost)
if dCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
pCnt := len(pLost)
if pCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
return e.reconst(vects, has, dLost, pLost, dataOnly)
}
func (e *encAVX2) reconstruct(vects [][]byte, dataOnly bool) (err error) {
d := e.data
p := e.parity
t := d + p
listBuf := make([]int, t+p)
has := listBuf[:d]
dLost := listBuf[d:t]
pLost := listBuf[t : t+p]
hasCnt, dCnt, pCnt := 0, 0, 0
for i := 0; i < t; i++ {
if vects[i] != nil {
if hasCnt < d {
has[hasCnt] = i
hasCnt++
}
} else {
if i < d {
if dCnt < p {
dLost[dCnt] = i
dCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
} else {
if pCnt < p {
pLost[pCnt] = i
pCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
}
}
}
if hasCnt != d {
return errors.New("rs.Reconst: not enough vects")
}
dLost = dLost[:dCnt]
pLost = pLost[:pCnt]
return e.reconst(vects, has, dLost, pLost, dataOnly)
}
func (e *encSSSE3) Encode(vects [][]byte) (err error) {
d := e.data
p := e.parity
size, err := checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
start, end := 0, 0
do := getDo(size)
for start < size {
end = start + do
if end <= size {
e.matrixMul(start, end, dv, pv)
start = end
} else {
e.matrixMulRemain(start, size, dv, pv)
start = size
}
}
return
}
//go:noescape
func mulVectSSSE3(tbl, d, p []byte)
//go:noescape
func mulVectAddSSSE3(tbl, d, p []byte)
func (e *encSSSE3) matrixMul(start, end int, dv, pv [][]byte) {
d := e.data
p := e.parity
tbl := e.tbl
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
} else {
mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
}
off += 32
}
}
}
func (e *encSSSE3) matrixMulRemain(start, end int, dv, pv [][]byte) {
undone := end - start
do := (undone >> 4) << 4
d := e.data
p := e.parity
tbl := e.tbl
if do >= 16 {
end2 := start + do
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
} else {
mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
}
off += 32
}
}
start = end
}
if undone > do {
start2 := end - 16
if start2 >= 0 {
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
} else {
mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
}
off += 32
}
}
} else {
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
} else {
mulVect(g[j*d], dv[0][start:], pv[j][start:])
}
}
}
}
}
}
// use generator-matrix but not tbls for encoding
// it's design for reconstructing
// for small vects, it cost to much time on initTbl, so drop it
// and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
func (e *encSSSE3) encodeGen(vects [][]byte) (err error) {
d := e.data
p := e.parity
size, err := checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
start, end := 0, 0
do := getDo(size)
for start < size {
end = start + do
if end <= size {
e.matrixMulGen(start, end, dv, pv)
start = end
} else {
e.matrixMulRemainGen(start, size, dv, pv)
start = size
}
}
return
}
func (e *encSSSE3) matrixMulGen(start, end int, dv, pv [][]byte) {
d := e.data
p := e.parity
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
} else {
mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
}
}
}
}
func (e *encSSSE3) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
undone := end - start
do := (undone >> 4) << 4
d := e.data
p := e.parity
g := e.gen
if do >= 16 {
end2 := start + do
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
} else {
mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
}
}
}
start = end
}
if undone > do {
start2 := end - 16
if start2 >= 0 {
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
} else {
mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
}
}
}
} else {
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
} else {
mulVect(g[j*d], dv[0][start:], pv[j][start:])
}
}
}
}
}
}
func (e *encSSSE3) Reconstruct(vects [][]byte) (err error) {
return e.reconstruct(vects, false)
}
func (e *encSSSE3) ReconstructData(vects [][]byte) (err error) {
return e.reconstruct(vects, true)
}
func (e *encSSSE3) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
return e.reconstWithPos(vects, has, dLost, pLost, false)
}
func (e *encSSSE3) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
return e.reconstWithPos(vects, has, dLost, nil, true)
}
func (e *encSSSE3) makeGen(has, dLost []int) (gen []byte, err error) {
d := e.data
em := e.encode
cnt := len(dLost)
if !e.enableCache {
matrixbuf := make([]byte, 4*d*d+cnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return nil, err2
}
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
return g, nil
}
var ikey uint32
for _, p := range has {
ikey += 1 << uint8(p)
}
e.inverseCache.RLock()
v, ok := e.inverseCache.data[ikey]
if ok {
im := v
g := make([]byte, cnt*d)
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
e.inverseCache.RUnlock()
return g, nil
}
e.inverseCache.RUnlock()
matrixbuf := make([]byte, 4*d*d+cnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return nil, err2
}
e.inverseCache.Lock()
e.inverseCache.data[ikey] = im
e.inverseCache.Unlock()
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
return g, nil
}
func (e *encSSSE3) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
em := e.encode
dCnt := len(dLost)
size := len(vects[has[0]])
if dCnt != 0 {
vtmp := make([][]byte, d+dCnt)
for i, p := range has {
vtmp[i] = vects[p]
}
for i, p := range dLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
g, err2 := e.makeGen(has, dLost)
if err2 != nil {
return
}
etmp := &encSSSE3{data: d, parity: dCnt, gen: g}
err2 = etmp.encodeGen(vtmp)
if err2 != nil {
return err2
}
}
if dataOnly {
return
}
pCnt := len(pLost)
if pCnt != 0 {
g := make([]byte, pCnt*d)
for i, l := range pLost {
copy(g[i*d:i*d+d], em[l*d:l*d+d])
}
vtmp := make([][]byte, d+pCnt)
for i := 0; i < d; i++ {
vtmp[i] = vects[i]
}
for i, p := range pLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
etmp := &encSSSE3{data: d, parity: pCnt, gen: g}
err2 := etmp.encodeGen(vtmp)
if err2 != nil {
return err2
}
}
return
}
func (e *encSSSE3) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
p := e.parity
if len(has) != d {
return errors.New("rs.Reconst: not enough vects")
}
dCnt := len(dLost)
if dCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
pCnt := len(pLost)
if pCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
return e.reconst(vects, has, dLost, pLost, dataOnly)
}
func (e *encSSSE3) reconstruct(vects [][]byte, dataOnly bool) (err error) {
d := e.data
p := e.parity
t := d + p
listBuf := make([]int, t+p)
has := listBuf[:d]
dLost := listBuf[d:t]
pLost := listBuf[t : t+p]
hasCnt, dCnt, pCnt := 0, 0, 0
for i := 0; i < t; i++ {
if vects[i] != nil {
if hasCnt < d {
has[hasCnt] = i
hasCnt++
}
} else {
if i < d {
if dCnt < p {
dLost[dCnt] = i
dCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
} else {
if pCnt < p {
pLost[pCnt] = i
pCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
}
}
}
if hasCnt != d {
return errors.New("rs.Reconst: not enough vects")
}
dLost = dLost[:dCnt]
pLost = pLost[:pCnt]
return e.reconst(vects, has, dLost, pLost, dataOnly)
}

View File

@@ -1,401 +0,0 @@
// Reference: www.ssrc.ucsc.edu/Papers/plank-fast13.pdf
#include "textflag.h"
#define low_tbl Y0
#define high_tbl Y1
#define mask Y2
#define in0 Y3
#define in1 Y4
#define in2 Y5
#define in3 Y6
#define in4 Y7
#define in5 Y8
#define in0_h Y10
#define in1_h Y11
#define in2_h Y12
#define in3_h Y13
#define in4_h Y14
#define in5_h Y15
#define in BX
#define out DI
#define len R8
#define pos R9
#define tmp0 R10
#define low_tblx X0
#define high_tblx X1
#define maskx X2
#define in0x X3
#define in0_hx X10
#define tmp0x X9
#define tmp1x X11
#define tmp2x X12
#define tmp3x X13
// func mulVectAVX2(tbl, d, p []byte)
TEXT ·mulVectAVX2(SB), NOSPLIT, $0
MOVQ i+24(FP), in
MOVQ o+48(FP), out
MOVQ tbl+0(FP), tmp0
VMOVDQU (tmp0), low_tblx
VMOVDQU 16(tmp0), high_tblx
MOVB $0x0f, DX
LONG $0x2069e3c4; WORD $0x00d2 // VPINSRB $0x00, EDX, XMM2, XMM2
VPBROADCASTB maskx, maskx
MOVQ in_len+32(FP), len
TESTQ $31, len
JNZ one16b
ymm:
VINSERTI128 $1, low_tblx, low_tbl, low_tbl
VINSERTI128 $1, high_tblx, high_tbl, high_tbl
VINSERTI128 $1, maskx, mask, mask
TESTQ $255, len
JNZ not_aligned
// 256bytes/loop
aligned:
MOVQ $0, pos
loop256b:
VMOVDQU (in)(pos*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VMOVDQU in0, (out)(pos*1)
VMOVDQU 32(in)(pos*1), in1
VPSRLQ $4, in1, in1_h
VPAND mask, in1_h, in1_h
VPAND mask, in1, in1
VPSHUFB in1_h, high_tbl, in1_h
VPSHUFB in1, low_tbl, in1
VPXOR in1, in1_h, in1
VMOVDQU in1, 32(out)(pos*1)
VMOVDQU 64(in)(pos*1), in2
VPSRLQ $4, in2, in2_h
VPAND mask, in2_h, in2_h
VPAND mask, in2, in2
VPSHUFB in2_h, high_tbl, in2_h
VPSHUFB in2, low_tbl, in2
VPXOR in2, in2_h, in2
VMOVDQU in2, 64(out)(pos*1)
VMOVDQU 96(in)(pos*1), in3
VPSRLQ $4, in3, in3_h
VPAND mask, in3_h, in3_h
VPAND mask, in3, in3
VPSHUFB in3_h, high_tbl, in3_h
VPSHUFB in3, low_tbl, in3
VPXOR in3, in3_h, in3
VMOVDQU in3, 96(out)(pos*1)
VMOVDQU 128(in)(pos*1), in4
VPSRLQ $4, in4, in4_h
VPAND mask, in4_h, in4_h
VPAND mask, in4, in4
VPSHUFB in4_h, high_tbl, in4_h
VPSHUFB in4, low_tbl, in4
VPXOR in4, in4_h, in4
VMOVDQU in4, 128(out)(pos*1)
VMOVDQU 160(in)(pos*1), in5
VPSRLQ $4, in5, in5_h
VPAND mask, in5_h, in5_h
VPAND mask, in5, in5
VPSHUFB in5_h, high_tbl, in5_h
VPSHUFB in5, low_tbl, in5
VPXOR in5, in5_h, in5
VMOVDQU in5, 160(out)(pos*1)
VMOVDQU 192(in)(pos*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VMOVDQU in0, 192(out)(pos*1)
VMOVDQU 224(in)(pos*1), in1
VPSRLQ $4, in1, in1_h
VPAND mask, in1_h, in1_h
VPAND mask, in1, in1
VPSHUFB in1_h, high_tbl, in1_h
VPSHUFB in1, low_tbl, in1
VPXOR in1, in1_h, in1
VMOVDQU in1, 224(out)(pos*1)
ADDQ $256, pos
CMPQ len, pos
JNE loop256b
VZEROUPPER
RET
not_aligned:
MOVQ len, tmp0
ANDQ $255, tmp0
loop32b:
VMOVDQU -32(in)(len*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VMOVDQU in0, -32(out)(len*1)
SUBQ $32, len
SUBQ $32, tmp0
JG loop32b
CMPQ len, $256
JGE aligned
VZEROUPPER
RET
one16b:
VMOVDQU -16(in)(len*1), in0x
VPSRLQ $4, in0x, in0_hx
VPAND maskx, in0x, in0x
VPAND maskx, in0_hx, in0_hx
VPSHUFB in0_hx, high_tblx, in0_hx
VPSHUFB in0x, low_tblx, in0x
VPXOR in0x, in0_hx, in0x
VMOVDQU in0x, -16(out)(len*1)
SUBQ $16, len
CMPQ len, $0
JNE ymm
RET
// func mulVectAddAVX2(tbl, d, p []byte)
TEXT ·mulVectAddAVX2(SB), NOSPLIT, $0
MOVQ i+24(FP), in
MOVQ o+48(FP), out
MOVQ tbl+0(FP), tmp0
VMOVDQU (tmp0), low_tblx
VMOVDQU 16(tmp0), high_tblx
MOVB $0x0f, DX
LONG $0x2069e3c4; WORD $0x00d2
VPBROADCASTB maskx, maskx
MOVQ in_len+32(FP), len
TESTQ $31, len
JNZ one16b
ymm:
VINSERTI128 $1, low_tblx, low_tbl, low_tbl
VINSERTI128 $1, high_tblx, high_tbl, high_tbl
VINSERTI128 $1, maskx, mask, mask
TESTQ $255, len
JNZ not_aligned
aligned:
MOVQ $0, pos
loop256b:
VMOVDQU (in)(pos*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VPXOR (out)(pos*1), in0, in0
VMOVDQU in0, (out)(pos*1)
VMOVDQU 32(in)(pos*1), in1
VPSRLQ $4, in1, in1_h
VPAND mask, in1_h, in1_h
VPAND mask, in1, in1
VPSHUFB in1_h, high_tbl, in1_h
VPSHUFB in1, low_tbl, in1
VPXOR in1, in1_h, in1
VPXOR 32(out)(pos*1), in1, in1
VMOVDQU in1, 32(out)(pos*1)
VMOVDQU 64(in)(pos*1), in2
VPSRLQ $4, in2, in2_h
VPAND mask, in2_h, in2_h
VPAND mask, in2, in2
VPSHUFB in2_h, high_tbl, in2_h
VPSHUFB in2, low_tbl, in2
VPXOR in2, in2_h, in2
VPXOR 64(out)(pos*1), in2, in2
VMOVDQU in2, 64(out)(pos*1)
VMOVDQU 96(in)(pos*1), in3
VPSRLQ $4, in3, in3_h
VPAND mask, in3_h, in3_h
VPAND mask, in3, in3
VPSHUFB in3_h, high_tbl, in3_h
VPSHUFB in3, low_tbl, in3
VPXOR in3, in3_h, in3
VPXOR 96(out)(pos*1), in3, in3
VMOVDQU in3, 96(out)(pos*1)
VMOVDQU 128(in)(pos*1), in4
VPSRLQ $4, in4, in4_h
VPAND mask, in4_h, in4_h
VPAND mask, in4, in4
VPSHUFB in4_h, high_tbl, in4_h
VPSHUFB in4, low_tbl, in4
VPXOR in4, in4_h, in4
VPXOR 128(out)(pos*1), in4, in4
VMOVDQU in4, 128(out)(pos*1)
VMOVDQU 160(in)(pos*1), in5
VPSRLQ $4, in5, in5_h
VPAND mask, in5_h, in5_h
VPAND mask, in5, in5
VPSHUFB in5_h, high_tbl, in5_h
VPSHUFB in5, low_tbl, in5
VPXOR in5, in5_h, in5
VPXOR 160(out)(pos*1), in5, in5
VMOVDQU in5, 160(out)(pos*1)
VMOVDQU 192(in)(pos*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VPXOR 192(out)(pos*1), in0, in0
VMOVDQU in0, 192(out)(pos*1)
VMOVDQU 224(in)(pos*1), in1
VPSRLQ $4, in1, in1_h
VPAND mask, in1_h, in1_h
VPAND mask, in1, in1
VPSHUFB in1_h, high_tbl, in1_h
VPSHUFB in1, low_tbl, in1
VPXOR in1, in1_h, in1
VPXOR 224(out)(pos*1), in1, in1
VMOVDQU in1, 224(out)(pos*1)
ADDQ $256, pos
CMPQ len, pos
JNE loop256b
VZEROUPPER
RET
not_aligned:
MOVQ len, tmp0
ANDQ $255, tmp0
loop32b:
VMOVDQU -32(in)(len*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VPXOR -32(out)(len*1), in0, in0
VMOVDQU in0, -32(out)(len*1)
SUBQ $32, len
SUBQ $32, tmp0
JG loop32b
CMPQ len, $256
JGE aligned
VZEROUPPER
RET
one16b:
VMOVDQU -16(in)(len*1), in0x
VPSRLQ $4, in0x, in0_hx
VPAND maskx, in0x, in0x
VPAND maskx, in0_hx, in0_hx
VPSHUFB in0_hx, high_tblx, in0_hx
VPSHUFB in0x, low_tblx, in0x
VPXOR in0x, in0_hx, in0x
VPXOR -16(out)(len*1), in0x, in0x
VMOVDQU in0x, -16(out)(len*1)
SUBQ $16, len
CMPQ len, $0
JNE ymm
RET
// func mulVectSSSE3(tbl, d, p []byte)
TEXT ·mulVectSSSE3(SB), NOSPLIT, $0
MOVQ i+24(FP), in
MOVQ o+48(FP), out
MOVQ tbl+0(FP), tmp0
MOVOU (tmp0), low_tblx
MOVOU 16(tmp0), high_tblx
MOVB $15, tmp0
MOVQ tmp0, maskx
PXOR tmp0x, tmp0x
PSHUFB tmp0x, maskx
MOVQ in_len+32(FP), len
SHRQ $4, len
loop:
MOVOU (in), in0x
MOVOU in0x, in0_hx
PSRLQ $4, in0_hx
PAND maskx, in0x
PAND maskx, in0_hx
MOVOU low_tblx, tmp1x
MOVOU high_tblx, tmp2x
PSHUFB in0x, tmp1x
PSHUFB in0_hx, tmp2x
PXOR tmp1x, tmp2x
MOVOU tmp2x, (out)
ADDQ $16, in
ADDQ $16, out
SUBQ $1, len
JNZ loop
RET
// func mulVectAddSSSE3(tbl, d, p []byte)
TEXT ·mulVectAddSSSE3(SB), NOSPLIT, $0
MOVQ i+24(FP), in
MOVQ o+48(FP), out
MOVQ tbl+0(FP), tmp0
MOVOU (tmp0), low_tblx
MOVOU 16(tmp0), high_tblx
MOVB $15, tmp0
MOVQ tmp0, maskx
PXOR tmp0x, tmp0x
PSHUFB tmp0x, maskx
MOVQ in_len+32(FP), len
SHRQ $4, len
loop:
MOVOU (in), in0x
MOVOU in0x, in0_hx
PSRLQ $4, in0_hx
PAND maskx, in0x
PAND maskx, in0_hx
MOVOU low_tblx, tmp1x
MOVOU high_tblx, tmp2x
PSHUFB in0x, tmp1x
PSHUFB in0_hx, tmp2x
PXOR tmp1x, tmp2x
MOVOU (out), tmp3x
PXOR tmp3x, tmp2x
MOVOU tmp2x, (out)
ADDQ $16, in
ADDQ $16, out
SUBQ $1, len
JNZ loop
RET
// func copy32B(dst, src []byte)
TEXT ·copy32B(SB), NOSPLIT, $0
MOVQ dst+0(FP), SI
MOVQ src+24(FP), DX
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU X0, (SI)
MOVOU X1, 16(SI)
RET

View File

@@ -1,8 +0,0 @@
// +build !amd64
package reedsolomon
func newRS(d, p int, em matrix) (enc Encoder) {
g := em[d*d:]
return &encBase{data: d, parity: p, encode: em, gen: g}
}

File diff suppressed because one or more lines are too long

View File

@@ -45,6 +45,7 @@ loop32b:
ADDQ $32, POS
CMPQ LEN, POS
JNE loop32b
VZEROUPPER
RET
loop_1b:
@@ -113,6 +114,7 @@ loop128b:
ADDQ $128, POS
CMPQ LEN, POS
JNE loop128b
VZEROUPPER
RET
loop_1b:
@@ -182,6 +184,7 @@ loop128b:
CMPQ LEN, POS
JNE loop128b
SFENCE
VZEROUPPER
RET
loop_1b:
@@ -265,6 +268,7 @@ next_vect:
ADDQ $128, POS
CMPQ LEN, POS
JNE loop128b
VZEROUPPER
RET
loop_1b:
@@ -371,6 +375,7 @@ next_vect:
ADDQ $128, POS
CMPQ LEN, POS
JNE loop128b
VZEROUPPER
RET
loop_1b:
@@ -431,12 +436,3 @@ next_vect_8b:
ret:
RET
TEXT ·hasAVX2(SB), NOSPLIT, $0
XORQ AX, AX
XORQ CX, CX
ADDL $7, AX
CPUID
SHRQ $5, BX
ANDQ $1, BX
MOVB BX, ret+0(FP)
RET

View File

@@ -1,11 +1,13 @@
package xor
import "github.com/templexxx/cpufeat"
func init() {
getEXT()
}
func getEXT() {
if hasAVX2() {
if cpufeat.X86.HasAVX2 {
extension = avx2
} else {
extension = sse2