vendor: Mega update all dependencies

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4080
This commit is contained in:
Jakob Borg
2017-04-05 14:34:41 +00:00
parent 49c1527724
commit a1bcc15458
1354 changed files with 55066 additions and 797850 deletions

27
vendor/github.com/remyoudompheng/bigfft/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,27 @@
Copyright (c) 2012 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

276
vendor/github.com/remyoudompheng/bigfft/arith_386.s generated vendored Normal file
View File

@@ -0,0 +1,276 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file provides fast assembly versions for the elementary
// arithmetic operations on vectors implemented in arith.go.
// func mulWW(x, y Word) (z1, z0 Word)
TEXT ·mulWW(SB),7,$0
MOVL x+0(FP), AX
MULL y+4(FP)
MOVL DX, z1+8(FP)
MOVL AX, z0+12(FP)
RET
// func divWW(x1, x0, y Word) (q, r Word)
TEXT ·divWW(SB),7,$0
MOVL x1+0(FP), DX
MOVL x0+4(FP), AX
DIVL y+8(FP)
MOVL AX, q+12(FP)
MOVL DX, r+16(FP)
RET
// func addVV(z, x, y []Word) (c Word)
TEXT ·addVV(SB),7,$0
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), CX
MOVL n+4(FP), BP
MOVL $0, BX // i = 0
MOVL $0, DX // c = 0
JMP E1
L1: MOVL (SI)(BX*4), AX
RCRL $1, DX
ADCL (CX)(BX*4), AX
RCLL $1, DX
MOVL AX, (DI)(BX*4)
ADDL $1, BX // i++
E1: CMPL BX, BP // i < n
JL L1
MOVL DX, c+36(FP)
RET
// func subVV(z, x, y []Word) (c Word)
// (same as addVV except for SBBL instead of ADCL and label names)
TEXT ·subVV(SB),7,$0
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), CX
MOVL n+4(FP), BP
MOVL $0, BX // i = 0
MOVL $0, DX // c = 0
JMP E2
L2: MOVL (SI)(BX*4), AX
RCRL $1, DX
SBBL (CX)(BX*4), AX
RCLL $1, DX
MOVL AX, (DI)(BX*4)
ADDL $1, BX // i++
E2: CMPL BX, BP // i < n
JL L2
MOVL DX, c+36(FP)
RET
// func addVW(z, x []Word, y Word) (c Word)
TEXT ·addVW(SB),7,$0
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), AX // c = y
MOVL n+4(FP), BP
MOVL $0, BX // i = 0
JMP E3
L3: ADDL (SI)(BX*4), AX
MOVL AX, (DI)(BX*4)
RCLL $1, AX
ANDL $1, AX
ADDL $1, BX // i++
E3: CMPL BX, BP // i < n
JL L3
MOVL AX, c+28(FP)
RET
// func subVW(z, x []Word, y Word) (c Word)
TEXT ·subVW(SB),7,$0
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), AX // c = y
MOVL n+4(FP), BP
MOVL $0, BX // i = 0
JMP E4
L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL?
SUBL AX, DX
MOVL DX, (DI)(BX*4)
RCLL $1, AX
ANDL $1, AX
ADDL $1, BX // i++
E4: CMPL BX, BP // i < n
JL L4
MOVL AX, c+28(FP)
RET
// func shlVU(z, x []Word, s uint) (c Word)
TEXT ·shlVU(SB),7,$0
MOVL n+4(FP), BX // i = n
SUBL $1, BX // i--
JL X8b // i < 0 (n <= 0)
// n > 0
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL s+24(FP), CX
MOVL (SI)(BX*4), AX // w1 = x[n-1]
MOVL $0, DX
SHLL CX, DX:AX // w1>>ŝ
MOVL DX, c+28(FP)
CMPL BX, $0
JLE X8a // i <= 0
// i > 0
L8: MOVL AX, DX // w = w1
MOVL -4(SI)(BX*4), AX // w1 = x[i-1]
SHLL CX, DX:AX // w<<s | w1>>ŝ
MOVL DX, (DI)(BX*4) // z[i] = w<<s | w1>>ŝ
SUBL $1, BX // i--
JG L8 // i > 0
// i <= 0
X8a: SHLL CX, AX // w1<<s
MOVL AX, (DI) // z[0] = w1<<s
RET
X8b: MOVL $0, c+28(FP)
RET
// func shrVU(z, x []Word, s uint) (c Word)
TEXT ·shrVU(SB),7,$0
MOVL n+4(FP), BP
SUBL $1, BP // n--
JL X9b // n < 0 (n <= 0)
// n > 0
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL s+24(FP), CX
MOVL (SI), AX // w1 = x[0]
MOVL $0, DX
SHRL CX, DX:AX // w1<<ŝ
MOVL DX, c+28(FP)
MOVL $0, BX // i = 0
JMP E9
// i < n-1
L9: MOVL AX, DX // w = w1
MOVL 4(SI)(BX*4), AX // w1 = x[i+1]
SHRL CX, DX:AX // w>>s | w1<<ŝ
MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ
ADDL $1, BX // i++
E9: CMPL BX, BP
JL L9 // i < n-1
// i >= n-1
X9a: SHRL CX, AX // w1>>s
MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s
RET
X9b: MOVL $0, c+28(FP)
RET
// func mulAddVWW(z, x []Word, y, r Word) (c Word)
TEXT ·mulAddVWW(SB),7,$0
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), BP
MOVL r+28(FP), CX // c = r
MOVL n+4(FP), BX
LEAL (DI)(BX*4), DI
LEAL (SI)(BX*4), SI
NEGL BX // i = -n
JMP E5
L5: MOVL (SI)(BX*4), AX
MULL BP
ADDL CX, AX
ADCL $0, DX
MOVL AX, (DI)(BX*4)
MOVL DX, CX
ADDL $1, BX // i++
E5: CMPL BX, $0 // i < 0
JL L5
MOVL CX, c+32(FP)
RET
// func addMulVVW(z, x []Word, y Word) (c Word)
TEXT ·addMulVVW(SB),7,$0
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), BP
MOVL n+4(FP), BX
LEAL (DI)(BX*4), DI
LEAL (SI)(BX*4), SI
NEGL BX // i = -n
MOVL $0, CX // c = 0
JMP E6
L6: MOVL (SI)(BX*4), AX
MULL BP
ADDL CX, AX
ADCL $0, DX
ADDL AX, (DI)(BX*4)
ADCL $0, DX
MOVL DX, CX
ADDL $1, BX // i++
E6: CMPL BX, $0 // i < 0
JL L6
MOVL CX, c+28(FP)
RET
// func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
TEXT ·divWVW(SB),7,$0
MOVL z+0(FP), DI
MOVL xn+12(FP), DX // r = xn
MOVL x+16(FP), SI
MOVL y+28(FP), CX
MOVL n+4(FP), BX // i = n
JMP E7
L7: MOVL (SI)(BX*4), AX
DIVL CX
MOVL AX, (DI)(BX*4)
E7: SUBL $1, BX // i--
JGE L7 // i >= 0
MOVL DX, r+32(FP)
RET
// func bitLen(x Word) (n int)
TEXT ·bitLen(SB),7,$0
BSRL x+0(FP), AX
JZ Z1
INCL AX
MOVL AX, n+4(FP)
RET
Z1: MOVL $0, n+4(FP)
RET

399
vendor/github.com/remyoudompheng/bigfft/arith_amd64.s generated vendored Normal file
View File

@@ -0,0 +1,399 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file provides fast assembly versions for the elementary
// arithmetic operations on vectors implemented in arith.go.
// Literal instruction for MOVQ $0, CX.
// (MOVQ $0, reg is translated to XORQ reg, reg and clears CF.)
#define ZERO_CX BYTE $0x48; \
BYTE $0xc7; \
BYTE $0xc1; \
BYTE $0x00; \
BYTE $0x00; \
BYTE $0x00; \
BYTE $0x00
// func mulWW(x, y Word) (z1, z0 Word)
TEXT ·mulWW(SB),7,$0
MOVQ x+0(FP), AX
MULQ y+8(FP)
MOVQ DX, z1+16(FP)
MOVQ AX, z0+24(FP)
RET
// func divWW(x1, x0, y Word) (q, r Word)
TEXT ·divWW(SB),7,$0
MOVQ x1+0(FP), DX
MOVQ x0+8(FP), AX
DIVQ y+16(FP)
MOVQ AX, q+24(FP)
MOVQ DX, r+32(FP)
RET
// func addVV(z, x, y []Word) (c Word)
TEXT ·addVV(SB),7,$0
MOVQ z_len+8(FP), DI
MOVQ x+24(FP), R8
MOVQ y+48(FP), R9
MOVQ z+0(FP), R10
MOVQ $0, CX // c = 0
MOVQ $0, SI // i = 0
// s/JL/JMP/ below to disable the unrolled loop
SUBQ $4, DI // n -= 4
JL V1 // if n < 0 goto V1
U1: // n >= 0
// regular loop body unrolled 4x
RCRQ $1, CX // CF = c
MOVQ 0(R8)(SI*8), R11
MOVQ 8(R8)(SI*8), R12
MOVQ 16(R8)(SI*8), R13
MOVQ 24(R8)(SI*8), R14
ADCQ 0(R9)(SI*8), R11
ADCQ 8(R9)(SI*8), R12
ADCQ 16(R9)(SI*8), R13
ADCQ 24(R9)(SI*8), R14
MOVQ R11, 0(R10)(SI*8)
MOVQ R12, 8(R10)(SI*8)
MOVQ R13, 16(R10)(SI*8)
MOVQ R14, 24(R10)(SI*8)
RCLQ $1, CX // c = CF
ADDQ $4, SI // i += 4
SUBQ $4, DI // n -= 4
JGE U1 // if n >= 0 goto U1
V1: ADDQ $4, DI // n += 4
JLE E1 // if n <= 0 goto E1
L1: // n > 0
RCRQ $1, CX // CF = c
MOVQ 0(R8)(SI*8), R11
ADCQ 0(R9)(SI*8), R11
MOVQ R11, 0(R10)(SI*8)
RCLQ $1, CX // c = CF
ADDQ $1, SI // i++
SUBQ $1, DI // n--
JG L1 // if n > 0 goto L1
E1: MOVQ CX, c+72(FP) // return c
RET
// func subVV(z, x, y []Word) (c Word)
// (same as addVV except for SBBQ instead of ADCQ and label names)
TEXT ·subVV(SB),7,$0
MOVQ z_len+8(FP), DI
MOVQ x+24(FP), R8
MOVQ y+48(FP), R9
MOVQ z+0(FP), R10
MOVQ $0, CX // c = 0
MOVQ $0, SI // i = 0
// s/JL/JMP/ below to disable the unrolled loop
SUBQ $4, DI // n -= 4
JL V2 // if n < 0 goto V2
U2: // n >= 0
// regular loop body unrolled 4x
RCRQ $1, CX // CF = c
MOVQ 0(R8)(SI*8), R11
MOVQ 8(R8)(SI*8), R12
MOVQ 16(R8)(SI*8), R13
MOVQ 24(R8)(SI*8), R14
SBBQ 0(R9)(SI*8), R11
SBBQ 8(R9)(SI*8), R12
SBBQ 16(R9)(SI*8), R13
SBBQ 24(R9)(SI*8), R14
MOVQ R11, 0(R10)(SI*8)
MOVQ R12, 8(R10)(SI*8)
MOVQ R13, 16(R10)(SI*8)
MOVQ R14, 24(R10)(SI*8)
RCLQ $1, CX // c = CF
ADDQ $4, SI // i += 4
SUBQ $4, DI // n -= 4
JGE U2 // if n >= 0 goto U2
V2: ADDQ $4, DI // n += 4
JLE E2 // if n <= 0 goto E2
L2: // n > 0
RCRQ $1, CX // CF = c
MOVQ 0(R8)(SI*8), R11
SBBQ 0(R9)(SI*8), R11
MOVQ R11, 0(R10)(SI*8)
RCLQ $1, CX // c = CF
ADDQ $1, SI // i++
SUBQ $1, DI // n--
JG L2 // if n > 0 goto L2
E2: MOVQ CX, c+72(FP) // return c
RET
// func addVW(z, x []Word, y Word) (c Word)
TEXT ·addVW(SB),7,$0
MOVQ z_len+8(FP), DI
MOVQ x+24(FP), R8
MOVQ y+48(FP), CX // c = y
MOVQ z+0(FP), R10
MOVQ $0, SI // i = 0
// s/JL/JMP/ below to disable the unrolled loop
SUBQ $4, DI // n -= 4
JL V3 // if n < 4 goto V3
U3: // n >= 0
// regular loop body unrolled 4x
MOVQ 0(R8)(SI*8), R11
MOVQ 8(R8)(SI*8), R12
MOVQ 16(R8)(SI*8), R13
MOVQ 24(R8)(SI*8), R14
ADDQ CX, R11
ZERO_CX
ADCQ $0, R12
ADCQ $0, R13
ADCQ $0, R14
SETCS CX // c = CF
MOVQ R11, 0(R10)(SI*8)
MOVQ R12, 8(R10)(SI*8)
MOVQ R13, 16(R10)(SI*8)
MOVQ R14, 24(R10)(SI*8)
ADDQ $4, SI // i += 4
SUBQ $4, DI // n -= 4
JGE U3 // if n >= 0 goto U3
V3: ADDQ $4, DI // n += 4
JLE E3 // if n <= 0 goto E3
L3: // n > 0
ADDQ 0(R8)(SI*8), CX
MOVQ CX, 0(R10)(SI*8)
ZERO_CX
RCLQ $1, CX // c = CF
ADDQ $1, SI // i++
SUBQ $1, DI // n--
JG L3 // if n > 0 goto L3
E3: MOVQ CX, c+56(FP) // return c
RET
// func subVW(z, x []Word, y Word) (c Word)
// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names)
TEXT ·subVW(SB),7,$0
MOVQ z_len+8(FP), DI
MOVQ x+24(FP), R8
MOVQ y+48(FP), CX // c = y
MOVQ z+0(FP), R10
MOVQ $0, SI // i = 0
// s/JL/JMP/ below to disable the unrolled loop
SUBQ $4, DI // n -= 4
JL V4 // if n < 4 goto V4
U4: // n >= 0
// regular loop body unrolled 4x
MOVQ 0(R8)(SI*8), R11
MOVQ 8(R8)(SI*8), R12
MOVQ 16(R8)(SI*8), R13
MOVQ 24(R8)(SI*8), R14
SUBQ CX, R11
ZERO_CX
SBBQ $0, R12
SBBQ $0, R13
SBBQ $0, R14
SETCS CX // c = CF
MOVQ R11, 0(R10)(SI*8)
MOVQ R12, 8(R10)(SI*8)
MOVQ R13, 16(R10)(SI*8)
MOVQ R14, 24(R10)(SI*8)
ADDQ $4, SI // i += 4
SUBQ $4, DI // n -= 4
JGE U4 // if n >= 0 goto U4
V4: ADDQ $4, DI // n += 4
JLE E4 // if n <= 0 goto E4
L4: // n > 0
MOVQ 0(R8)(SI*8), R11
SUBQ CX, R11
MOVQ R11, 0(R10)(SI*8)
ZERO_CX
RCLQ $1, CX // c = CF
ADDQ $1, SI // i++
SUBQ $1, DI // n--
JG L4 // if n > 0 goto L4
E4: MOVQ CX, c+56(FP) // return c
RET
// func shlVU(z, x []Word, s uint) (c Word)
TEXT ·shlVU(SB),7,$0
MOVQ z_len+8(FP), BX // i = z
SUBQ $1, BX // i--
JL X8b // i < 0 (n <= 0)
// n > 0
MOVQ z+0(FP), R10
MOVQ x+24(FP), R8
MOVQ s+48(FP), CX
MOVQ (R8)(BX*8), AX // w1 = x[n-1]
MOVQ $0, DX
SHLQ CX, DX:AX // w1>>ŝ
MOVQ DX, c+56(FP)
CMPQ BX, $0
JLE X8a // i <= 0
// i > 0
L8: MOVQ AX, DX // w = w1
MOVQ -8(R8)(BX*8), AX // w1 = x[i-1]
SHLQ CX, DX:AX // w<<s | w1>>ŝ
MOVQ DX, (R10)(BX*8) // z[i] = w<<s | w1>>ŝ
SUBQ $1, BX // i--
JG L8 // i > 0
// i <= 0
X8a: SHLQ CX, AX // w1<<s
MOVQ AX, (R10) // z[0] = w1<<s
RET
X8b: MOVQ $0, c+56(FP)
RET
// func shrVU(z, x []Word, s uint) (c Word)
TEXT ·shrVU(SB),7,$0
MOVQ z_len+8(FP), R11
SUBQ $1, R11 // n--
JL X9b // n < 0 (n <= 0)
// n > 0
MOVQ z+0(FP), R10
MOVQ x+24(FP), R8
MOVQ s+48(FP), CX
MOVQ (R8), AX // w1 = x[0]
MOVQ $0, DX
SHRQ CX, DX:AX // w1<<ŝ
MOVQ DX, c+56(FP)
MOVQ $0, BX // i = 0
JMP E9
// i < n-1
L9: MOVQ AX, DX // w = w1
MOVQ 8(R8)(BX*8), AX // w1 = x[i+1]
SHRQ CX, DX:AX // w>>s | w1<<ŝ
MOVQ DX, (R10)(BX*8) // z[i] = w>>s | w1<<ŝ
ADDQ $1, BX // i++
E9: CMPQ BX, R11
JL L9 // i < n-1
// i >= n-1
X9a: SHRQ CX, AX // w1>>s
MOVQ AX, (R10)(R11*8) // z[n-1] = w1>>s
RET
X9b: MOVQ $0, c+56(FP)
RET
// func mulAddVWW(z, x []Word, y, r Word) (c Word)
TEXT ·mulAddVWW(SB),7,$0
MOVQ z+0(FP), R10
MOVQ x+24(FP), R8
MOVQ y+48(FP), R9
MOVQ r+56(FP), CX // c = r
MOVQ z_len+8(FP), R11
MOVQ $0, BX // i = 0
JMP E5
L5: MOVQ (R8)(BX*8), AX
MULQ R9
ADDQ CX, AX
ADCQ $0, DX
MOVQ AX, (R10)(BX*8)
MOVQ DX, CX
ADDQ $1, BX // i++
E5: CMPQ BX, R11 // i < n
JL L5
MOVQ CX, c+64(FP)
RET
// func addMulVVW(z, x []Word, y Word) (c Word)
TEXT ·addMulVVW(SB),7,$0
MOVQ z+0(FP), R10
MOVQ x+24(FP), R8
MOVQ y+48(FP), R9
MOVQ z_len+8(FP), R11
MOVQ $0, BX // i = 0
MOVQ $0, CX // c = 0
JMP E6
L6: MOVQ (R8)(BX*8), AX
MULQ R9
ADDQ CX, AX
ADCQ $0, DX
ADDQ AX, (R10)(BX*8)
ADCQ $0, DX
MOVQ DX, CX
ADDQ $1, BX // i++
E6: CMPQ BX, R11 // i < n
JL L6
MOVQ CX, c+56(FP)
RET
// func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
TEXT ·divWVW(SB),7,$0
MOVQ z+0(FP), R10
MOVQ xn+24(FP), DX // r = xn
MOVQ x+32(FP), R8
MOVQ y+56(FP), R9
MOVQ z_len+8(FP), BX // i = z
JMP E7
L7: MOVQ (R8)(BX*8), AX
DIVQ R9
MOVQ AX, (R10)(BX*8)
E7: SUBQ $1, BX // i--
JGE L7 // i >= 0
MOVQ DX, r+64(FP)
RET
// func bitLen(x Word) (n int)
TEXT ·bitLen(SB),7,$0
BSRQ x+0(FP), AX
JZ Z1
ADDQ $1, AX
MOVQ AX, n+8(FP)
RET
Z1: MOVQ $0, n+8(FP)
RET

321
vendor/github.com/remyoudompheng/bigfft/arith_arm.s generated vendored Normal file
View File

@@ -0,0 +1,321 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file provides fast assembly versions for the elementary
// arithmetic operations on vectors implemented in arith.go.
#define CFLAG 29 // bit position of carry flag
// func addVV(z, x, y []Word) (c Word)
TEXT ·addVV(SB),7,$0
MOVW $0, R0
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
MOVW n+4(FP), R4
MOVW R4<<2, R4
ADD R1, R4
B E1
L1:
MOVW.P 4(R2), R5
MOVW.P 4(R3), R6
MOVW R0, CPSR
ADC.S R6, R5
MOVW.P R5, 4(R1)
MOVW CPSR, R0
E1:
CMP R1, R4
BNE L1
MOVW R0>>CFLAG, R0
AND $1, R0
MOVW R0, c+36(FP)
RET
// func subVV(z, x, y []Word) (c Word)
// (same as addVV except for SBC instead of ADC and label names)
TEXT ·subVV(SB),7,$0
MOVW $(1<<CFLAG), R0
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
MOVW n+4(FP), R4
MOVW R4<<2, R4
ADD R1, R4
B E2
L2:
MOVW.P 4(R2), R5
MOVW.P 4(R3), R6
MOVW R0, CPSR
SBC.S R6, R5
MOVW.P R5, 4(R1)
MOVW CPSR, R0
E2:
CMP R1, R4
BNE L2
MOVW R0>>CFLAG, R0
AND $1, R0
EOR $1, R0
MOVW R0, c+36(FP)
RET
// func addVW(z, x []Word, y Word) (c Word)
TEXT ·addVW(SB),7,$0
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
MOVW n+4(FP), R4
MOVW R4<<2, R4
ADD R1, R4
CMP R1, R4
BNE L3a
MOVW R3, c+28(FP)
RET
L3a:
MOVW.P 4(R2), R5
ADD.S R3, R5
MOVW.P R5, 4(R1)
MOVW CPSR, R0
B E3
L3:
MOVW.P 4(R2), R5
MOVW R0, CPSR
ADC.S $0, R5
MOVW.P R5, 4(R1)
MOVW CPSR, R0
E3:
CMP R1, R4
BNE L3
MOVW R0>>CFLAG, R0
AND $1, R0
MOVW R0, c+28(FP)
RET
// func subVW(z, x []Word, y Word) (c Word)
TEXT ·subVW(SB),7,$0
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
MOVW n+4(FP), R4
MOVW R4<<2, R4
ADD R1, R4
CMP R1, R4
BNE L4a
MOVW R3, c+28(FP)
RET
L4a:
MOVW.P 4(R2), R5
SUB.S R3, R5
MOVW.P R5, 4(R1)
MOVW CPSR, R0
B E4
L4:
MOVW.P 4(R2), R5
MOVW R0, CPSR
SBC.S $0, R5
MOVW.P R5, 4(R1)
MOVW CPSR, R0
E4:
CMP R1, R4
BNE L4
MOVW R0>>CFLAG, R0
AND $1, R0
EOR $1, R0
MOVW R0, c+28(FP)
RET
// func shlVU(z, x []Word, s uint) (c Word)
TEXT ·shlVU(SB),7,$0
MOVW n+4(FP), R5
CMP $0, R5
BEQ X7
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW R5<<2, R5
ADD R5, R2
ADD R1, R5
MOVW s+24(FP), R3
CMP $0, R3 // shift 0 is special
BEQ Y7
ADD $4, R1 // stop one word early
MOVW $32, R4
SUB R3, R4
MOVW $0, R7
MOVW.W -4(R2), R6
MOVW R6<<R3, R7
MOVW R6>>R4, R6
MOVW R6, c+28(FP)
B E7
L7:
MOVW.W -4(R2), R6
ORR R6>>R4, R7
MOVW.W R7, -4(R5)
MOVW R6<<R3, R7
E7:
CMP R1, R5
BNE L7
MOVW R7, -4(R5)
RET
Y7: // copy loop, because shift 0 == shift 32
MOVW.W -4(R2), R6
MOVW.W R6, -4(R5)
CMP R1, R5
BNE Y7
X7:
MOVW $0, R1
MOVW R1, c+28(FP)
RET
// func shrVU(z, x []Word, s uint) (c Word)
TEXT ·shrVU(SB),7,$0
MOVW n+4(FP), R5
CMP $0, R5
BEQ X6
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW R5<<2, R5
ADD R1, R5
MOVW s+24(FP), R3
CMP $0, R3 // shift 0 is special
BEQ Y6
SUB $4, R5 // stop one word early
MOVW $32, R4
SUB R3, R4
MOVW $0, R7
// first word
MOVW.P 4(R2), R6
MOVW R6>>R3, R7
MOVW R6<<R4, R6
MOVW R6, c+28(FP)
B E6
// word loop
L6:
MOVW.P 4(R2), R6
ORR R6<<R4, R7
MOVW.P R7, 4(R1)
MOVW R6>>R3, R7
E6:
CMP R1, R5
BNE L6
MOVW R7, 0(R1)
RET
Y6: // copy loop, because shift 0 == shift 32
MOVW.P 4(R2), R6
MOVW.P R6, 4(R1)
CMP R1, R5
BNE Y6
X6:
MOVW $0, R1
MOVW R1, c+28(FP)
RET
// func mulAddVWW(z, x []Word, y, r Word) (c Word)
TEXT ·mulAddVWW(SB),7,$0
MOVW $0, R0
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
MOVW r+28(FP), R4
MOVW n+4(FP), R5
MOVW R5<<2, R5
ADD R1, R5
B E8
// word loop
L8:
MOVW.P 4(R2), R6
MULLU R6, R3, (R7, R6)
ADD.S R4, R6
ADC R0, R7
MOVW.P R6, 4(R1)
MOVW R7, R4
E8:
CMP R1, R5
BNE L8
MOVW R4, c+32(FP)
RET
// func addMulVVW(z, x []Word, y Word) (c Word)
TEXT ·addMulVVW(SB),7,$0
MOVW $0, R0
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
MOVW n+4(FP), R5
MOVW R5<<2, R5
ADD R1, R5
MOVW $0, R4
B E9
// word loop
L9:
MOVW.P 4(R2), R6
MULLU R6, R3, (R7, R6)
ADD.S R4, R6
ADC R0, R7
MOVW 0(R1), R4
ADD.S R4, R6
ADC R0, R7
MOVW.P R6, 4(R1)
MOVW R7, R4
E9:
CMP R1, R5
BNE L9
MOVW R4, c+28(FP)
RET
// func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
TEXT ·divWVW(SB),7,$0
// ARM has no multiword division, so use portable code.
B ·divWVW_g(SB)
// func divWW(x1, x0, y Word) (q, r Word)
TEXT ·divWW(SB),7,$0
// ARM has no multiword division, so use portable code.
B ·divWW_g(SB)
// func mulWW(x, y Word) (z1, z0 Word)
TEXT ·mulWW(SB),7,$0
MOVW x+0(FP), R1
MOVW y+4(FP), R2
MULLU R1, R2, (R4, R3)
MOVW R4, z1+8(FP)
MOVW R3, z0+12(FP)
RET
// func bitLen(x Word) (n int)
TEXT ·bitLen(SB),7,$0
MOVW x+0(FP), R0
CLZ R0, R0
MOVW $32, R1
SUB.S R0, R1
MOVW R1, n+4(FP)
RET

21
vendor/github.com/remyoudompheng/bigfft/arith_decl.go generated vendored Normal file
View File

@@ -0,0 +1,21 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bigfft
import . "math/big"
// implemented in arith_$GOARCH.s
func mulWW(x, y Word) (z1, z0 Word)
func divWW(x1, x0, y Word) (q, r Word)
func addVV(z, x, y []Word) (c Word)
func subVV(z, x, y []Word) (c Word)
func addVW(z, x []Word, y Word) (c Word)
func subVW(z, x []Word, y Word) (c Word)
func shlVU(z, x []Word, s uint) (c Word)
func shrVU(z, x []Word, s uint) (c Word)
func mulAddVWW(z, x []Word, y, r Word) (c Word)
func addMulVVW(z, x []Word, y Word) (c Word)
func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
func bitLen(x Word) (n int)

198
vendor/github.com/remyoudompheng/bigfft/fermat.go generated vendored Normal file
View File

@@ -0,0 +1,198 @@
package bigfft
import (
"math/big"
)
// Arithmetic modulo 2^n+1.
// A fermat of length w+1 represents a number modulo 2^(w*_W) + 1. The last
// word is zero or one. A number has at most two representatives satisfying the
// 0-1 last word constraint.
type fermat nat
func (n fermat) String() string { return nat(n).String() }
func (z fermat) norm() {
n := len(z) - 1
c := z[n]
if c == 0 {
return
}
if z[0] >= c {
z[n] = 0
z[0] -= c
return
}
// z[0] < z[n].
subVW(z, z, c) // Substract c
if c > 1 {
z[n] -= c - 1
c = 1
}
// Add back c.
if z[n] == 1 {
z[n] = 0
return
} else {
addVW(z, z, 1)
}
}
// Shift computes (x << k) mod (2^n+1).
func (z fermat) Shift(x fermat, k int) {
if len(z) != len(x) {
println(len(z), len(x))
panic("len(z) != len(x) in Shift")
}
n := len(x) - 1
// Shift by n*_W is taking the opposite.
k %= 2 * n * _W
if k < 0 {
k += 2 * n * _W
}
neg := false
if k >= n*_W {
k -= n * _W
neg = true
}
kw, kb := k/_W, k%_W
z[n] = 1 // Add (-1)
if !neg {
for i := 0; i < kw; i++ {
z[i] = 0
}
// Shift left by kw words.
// x = a·2^(n-k) + b
// x<<k = (b<<k) - a
copy(z[kw:], x[:n-kw])
b := subVV(z[:kw+1], z[:kw+1], x[n-kw:])
if z[kw+1] > 0 {
z[kw+1] -= b
} else {
subVW(z[kw+1:], z[kw+1:], b)
}
} else {
for i := kw + 1; i < n; i++ {
z[i] = 0
}
// Shift left and negate, by kw words.
copy(z[:kw+1], x[n-kw:n+1]) // z_low = x_high
b := subVV(z[kw:n], z[kw:n], x[:n-kw]) // z_high -= x_low
z[n] -= b
}
// Add back 1.
if z[0] < ^big.Word(0) {
z[0]++
} else {
addVW(z, z, 1)
}
// Shift left by kb bits
shlVU(z, z, uint(kb))
z.norm()
}
// ShiftHalf shifts x by k/2 bits the left. Shifting by 1/2 bit
// is multiplication by sqrt(2) mod 2^n+1 which is 2^(3n/4) - 2^(n/4).
// A temporary buffer must be provided in tmp.
func (z fermat) ShiftHalf(x fermat, k int, tmp fermat) {
n := len(z) - 1
if k%2 == 0 {
z.Shift(x, k/2)
return
}
u := (k - 1) / 2
a := u + (3*_W/4)*n
b := u + (_W/4)*n
z.Shift(x, a)
tmp.Shift(x, b)
z.Sub(z, tmp)
}
// Add computes addition mod 2^n+1.
func (z fermat) Add(x, y fermat) fermat {
if len(z) != len(x) {
panic("Add: len(z) != len(x)")
}
addVV(z, x, y) // there cannot be a carry here.
z.norm()
return z
}
// Sub computes substraction mod 2^n+1.
func (z fermat) Sub(x, y fermat) fermat {
if len(z) != len(x) {
panic("Add: len(z) != len(x)")
}
n := len(y) - 1
b := subVV(z[:n], x[:n], y[:n])
b += y[n]
// If b > 0, we need to subtract b<<n, which is the same as adding b.
z[n] = x[n]
if z[0] <= ^big.Word(0)-b {
z[0] += b
} else {
addVW(z, z, b)
}
z.norm()
return z
}
func (z fermat) Mul(x, y fermat) fermat {
n := len(x) - 1
if n < 30 {
z = z[:2*n+2]
basicMul(z, x, y)
z = z[:2*n+1]
} else {
var xi, yi, zi big.Int
xi.SetBits(x)
yi.SetBits(y)
zi.SetBits(z)
zb := zi.Mul(&xi, &yi).Bits()
if len(zb) <= n {
// Short product.
copy(z, zb)
for i := len(zb); i < len(z); i++ {
z[i] = 0
}
return z
}
z = zb
}
// len(z) is at most 2n+1.
if len(z) > 2*n+1 {
panic("len(z) > 2n+1")
}
i := len(z) - (n + 1) // i <= n
c := subVV(z[1:i+1], z[1:i+1], z[n+1:])
z = z[:n+1]
z[n]++ // Add -1.
subVW(z[i+1:], z[i+1:], c)
// Add 1.
if z[n] == 1 {
z[n] = 0
} else {
addVW(z, z, 1)
}
z.norm()
return z
}
// copied from math/big
//
// basicMul multiplies x and y and leaves the result in z.
// The (non-normalized) result is placed in z[0 : len(x) + len(y)].
func basicMul(z, x, y fermat) {
// initialize z
for i := 0; i < len(z); i++ {
z[i] = 0
}
for i, d := range y {
if d != 0 {
z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
}
}
}

366
vendor/github.com/remyoudompheng/bigfft/fft.go generated vendored Normal file
View File

@@ -0,0 +1,366 @@
// Package bigfft implements multiplication of big.Int using FFT.
//
// The implementation is based on the Schönhage-Strassen method
// using integer FFT modulo 2^n+1.
package bigfft
import (
"math/big"
"unsafe"
)
const _W = int(unsafe.Sizeof(big.Word(0)) * 8)
type nat []big.Word
func (n nat) String() string {
v := new(big.Int)
v.SetBits(n)
return v.String()
}
// fftThreshold is the size (in words) above which FFT is used over
// Karatsuba from math/big.
//
// TestCalibrate seems to indicate a threshold of 60kbits on 32-bit
// arches and 110kbits on 64-bit arches.
var fftThreshold = 1800
// Mul computes the product x*y and returns z.
// It can be used instead of the Mul method of
// *big.Int from math/big package.
func Mul(x, y *big.Int) *big.Int {
xwords := len(x.Bits())
ywords := len(y.Bits())
if xwords > fftThreshold && ywords > fftThreshold {
return mulFFT(x, y)
}
return new(big.Int).Mul(x, y)
}
func mulFFT(x, y *big.Int) *big.Int {
var xb, yb nat = x.Bits(), y.Bits()
zb := fftmul(xb, yb)
z := new(big.Int)
z.SetBits(zb)
if x.Sign()*y.Sign() < 0 {
z.Neg(z)
}
return z
}
// A FFT size of K=1<<k is adequate when K is about 2*sqrt(N) where
// N = x.Bitlen() + y.Bitlen().
func fftmul(x, y nat) nat {
k, m := fftSize(x, y)
xp := polyFromNat(x, k, m)
yp := polyFromNat(y, k, m)
rp := xp.Mul(&yp)
return rp.Int()
}
// fftSizeThreshold[i] is the maximal size (in bits) where we should use
// fft size i.
var fftSizeThreshold = [...]int64{0, 0, 0,
4 << 10, 8 << 10, 16 << 10, // 5
32 << 10, 64 << 10, 1 << 18, 1 << 20, 3 << 20, // 10
8 << 20, 30 << 20, 100 << 20, 300 << 20, 600 << 20,
}
// returns the FFT length k, m the number of words per chunk
// such that m << k is larger than the number of words
// in x*y.
func fftSize(x, y nat) (k uint, m int) {
words := len(x) + len(y)
bits := int64(words) * int64(_W)
k = uint(len(fftSizeThreshold))
for i := range fftSizeThreshold {
if fftSizeThreshold[i] > bits {
k = uint(i)
break
}
}
// The 1<<k chunks of m words must have N bits so that
// 2^N-1 is larger than x*y. That is, m<<k > words
m = words>>k + 1
return
}
// valueSize returns the smallest multiple of 1<<k greater than
// 2*m*_W + k, that is also a multiple of _W. If extra > 0, the
// returned value is only required to be a multiple of 1<<(k-extra)
func valueSize(k uint, m int, extra uint) int {
n := 2*m*_W + int(k)
K := 1 << (k - extra)
if K < _W {
K = _W
}
n = ((n / K) + 1) * K
return n / _W
}
// poly represents an integer via a polynomial in Z[x]/(x^K+1)
// where K is the FFT length and b is the computation basis 1<<(m*_W).
// If P = a[0] + a[1] x + ... a[n] x^(K-1), the associated natural number
// is P(b^m).
type poly struct {
k uint // k is such that K = 1<<k.
m int // the m such that P(b^m) is the original number.
a []nat // a slice of at most K m-word coefficients.
}
// polyFromNat slices the number x into a polynomial
// with 1<<k coefficients made of m words.
func polyFromNat(x nat, k uint, m int) poly {
p := poly{k: k, m: m}
length := len(x)/m + 1
p.a = make([]nat, length)
for i := range p.a {
if len(x) < m {
p.a[i] = make(nat, m)
copy(p.a[i], x)
break
}
p.a[i] = x[:m]
x = x[m:]
}
return p
}
// Int evaluates back a poly to its integer value.
func (p *poly) Int() nat {
length := len(p.a)*p.m + 1
if na := len(p.a); na > 0 {
length += len(p.a[na-1])
}
n := make(nat, length)
m := p.m
np := n
for i := range p.a {
l := len(p.a[i])
c := addVV(np[:l], np[:l], p.a[i])
if np[l] < ^big.Word(0) {
np[l] += c
} else {
addVW(np[l:], np[l:], c)
}
np = np[m:]
}
n = trim(n)
return n
}
func trim(n nat) nat {
for i := range n {
if n[len(n)-1-i] != 0 {
return n[:len(n)-i]
}
}
return nil
}
// Mul multiplies p and q modulo X^K-1, where K = 1<<p.k.
// The product is done via a Fourier transform.
func (p *poly) Mul(q *poly) poly {
// extra=2 because:
// * some power of 2 is a K-th root of unity when n is a multiple of K/2.
// * 2 itself is a square (see fermat.ShiftHalf)
n := valueSize(p.k, p.m, 2)
pv, qv := p.Transform(n), q.Transform(n)
rv := pv.Mul(&qv)
r := rv.InvTransform()
r.m = p.m
return r
}
// A polValues represents the value of a poly at the odd powers of a
// (2K)-th root of unity θ=2^l in Z/(b^n+1)Z, where b^n = 2^Kl.
type polValues struct {
k uint // k is such that K = 1<<k.
n int // the length of coefficients, n*_W a multiple of 1<<k.
values []fermat // a slice of K (n+1)-word values
}
// Transform evaluates p at θ^i for i = 0...K-1, where
// θ is a K-th primitive root of unity in Z/(b^n+1)Z.
func (p *poly) Transform(n int) polValues {
k := p.k
inputbits := make([]big.Word, (n+1)<<k)
input := make([]fermat, 1<<k)
// Now computed q(ω^i) for i = 0 ... K-1
valbits := make([]big.Word, (n+1)<<k)
values := make([]fermat, 1<<k)
for i := range values {
input[i] = inputbits[i*(n+1) : (i+1)*(n+1)]
if i < len(p.a) {
copy(input[i], p.a[i])
}
values[i] = fermat(valbits[i*(n+1) : (i+1)*(n+1)])
}
fourier(values, input, false, n, k)
return polValues{k, n, values}
}
// InvTransform reconstructs p (modulo X^K - 1) from its
// values at θ^i for i = 0..K-1.
func (v *polValues) InvTransform() poly {
k, n := v.k, v.n
// Perform an inverse Fourier transform to recover p.
pbits := make([]big.Word, (n+1)<<k)
p := make([]fermat, 1<<k)
for i := range p {
p[i] = fermat(pbits[i*(n+1) : (i+1)*(n+1)])
}
fourier(p, v.values, true, n, k)
// Divide by K, and untwist q to recover p.
u := make(fermat, n+1)
a := make([]nat, 1<<k)
for i := range p {
u.Shift(p[i], -int(k))
copy(p[i], u)
a[i] = nat(p[i])
}
return poly{k: k, m: 0, a: a}
}
// NTransform evaluates p at θω^i for i = 0...K-1, where
// θ is a (2K)-th primitive root of unity in Z/(b^n+1)Z
// and ω = θ².
func (p *poly) NTransform(n int) polValues {
k := p.k
if len(p.a) >= 1<<k {
panic("Transform: len(p.a) >= 1<<k")
}
// θ is represented as a shift.
θshift := (n * _W) >> k
// p(x) = a_0 + a_1 x + ... + a_{K-1} x^(K-1)
// p(θx) = q(x) where
// q(x) = a_0 + θa_1 x + ... + θ^(K-1) a_{K-1} x^(K-1)
//
// Twist p by θ to obtain q.
tbits := make([]big.Word, (n+1)<<k)
twisted := make([]fermat, 1<<k)
src := make(fermat, n+1)
for i := range twisted {
twisted[i] = fermat(tbits[i*(n+1) : (i+1)*(n+1)])
if i < len(p.a) {
for i := range src {
src[i] = 0
}
copy(src, p.a[i])
twisted[i].Shift(src, θshift*i)
}
}
// Now computed q(ω^i) for i = 0 ... K-1
valbits := make([]big.Word, (n+1)<<k)
values := make([]fermat, 1<<k)
for i := range values {
values[i] = fermat(valbits[i*(n+1) : (i+1)*(n+1)])
}
fourier(values, twisted, false, n, k)
return polValues{k, n, values}
}
// InvTransform reconstructs a polynomial from its values at
// roots of x^K+1. The m field of the returned polynomial
// is unspecified.
func (v *polValues) InvNTransform() poly {
k := v.k
n := v.n
θshift := (n * _W) >> k
// Perform an inverse Fourier transform to recover q.
qbits := make([]big.Word, (n+1)<<k)
q := make([]fermat, 1<<k)
for i := range q {
q[i] = fermat(qbits[i*(n+1) : (i+1)*(n+1)])
}
fourier(q, v.values, true, n, k)
// Divide by K, and untwist q to recover p.
u := make(fermat, n+1)
a := make([]nat, 1<<k)
for i := range q {
u.Shift(q[i], -int(k)-i*θshift)
copy(q[i], u)
a[i] = nat(q[i])
}
return poly{k: k, m: 0, a: a}
}
// fourier performs an unnormalized Fourier transform
// of src, a length 1<<k vector of numbers modulo b^n+1
// where b = 1<<_W.
func fourier(dst []fermat, src []fermat, backward bool, n int, k uint) {
var rec func(dst, src []fermat, size uint)
tmp := make(fermat, n+1) // pre-allocate temporary variables.
tmp2 := make(fermat, n+1) // pre-allocate temporary variables.
// The recursion function of the FFT.
// The root of unity used in the transform is ω=1<<(ω2shift/2).
// The source array may use shifted indices (i.e. the i-th
// element is src[i << idxShift]).
rec = func(dst, src []fermat, size uint) {
idxShift := k - size
ω2shift := (4 * n * _W) >> size
if backward {
ω2shift = -ω2shift
}
// Easy cases.
if len(src[0]) != n+1 || len(dst[0]) != n+1 {
panic("len(src[0]) != n+1 || len(dst[0]) != n+1")
}
switch size {
case 0:
copy(dst[0], src[0])
return
case 1:
dst[0].Add(src[0], src[1<<idxShift]) // dst[0] = src[0] + src[1]
dst[1].Sub(src[0], src[1<<idxShift]) // dst[1] = src[0] - src[1]
return
}
// Let P(x) = src[0] + src[1<<idxShift] * x + ... + src[K-1 << idxShift] * x^(K-1)
// The P(x) = Q1(x²) + x*Q2(x²)
// where Q1's coefficients are src with indices shifted by 1
// where Q2's coefficients are src[1<<idxShift:] with indices shifted by 1
// Split destination vectors in halves.
dst1 := dst[:1<<(size-1)]
dst2 := dst[1<<(size-1):]
// Transform Q1 and Q2 in the halves.
rec(dst1, src, size-1)
rec(dst2, src[1<<idxShift:], size-1)
// Reconstruct P's transform from transforms of Q1 and Q2.
// dst[i] is dst1[i] + ω^i * dst2[i]
// dst[i + 1<<(k-1)] is dst1[i] + ω^(i+K/2) * dst2[i]
//
for i := range dst1 {
tmp.ShiftHalf(dst2[i], i*ω2shift, tmp2) // ω^i * dst2[i]
dst2[i].Sub(dst1[i], tmp)
dst1[i].Add(dst1[i], tmp)
}
}
rec(dst, src, k)
}
// Mul returns the pointwise product of p and q.
func (p *polValues) Mul(q *polValues) (r polValues) {
n := p.n
r.k, r.n = p.k, p.n
r.values = make([]fermat, len(p.values))
bits := make([]big.Word, len(p.values)*(n+1))
buf := make(fermat, 8*n)
for i := range r.values {
r.values[i] = bits[i*(n+1) : (i+1)*(n+1)]
z := buf.Mul(p.values[i], q.values[i])
copy(r.values[i], z)
}
return
}