vendor: Mega update all dependencies
GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4080
This commit is contained in:
27
vendor/github.com/remyoudompheng/bigfft/LICENSE
generated
vendored
Normal file
27
vendor/github.com/remyoudompheng/bigfft/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
Copyright (c) 2012 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
276
vendor/github.com/remyoudompheng/bigfft/arith_386.s
generated
vendored
Normal file
276
vendor/github.com/remyoudompheng/bigfft/arith_386.s
generated
vendored
Normal file
@@ -0,0 +1,276 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// This file provides fast assembly versions for the elementary
|
||||
// arithmetic operations on vectors implemented in arith.go.
|
||||
|
||||
// func mulWW(x, y Word) (z1, z0 Word)
|
||||
TEXT ·mulWW(SB),7,$0
|
||||
MOVL x+0(FP), AX
|
||||
MULL y+4(FP)
|
||||
MOVL DX, z1+8(FP)
|
||||
MOVL AX, z0+12(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func divWW(x1, x0, y Word) (q, r Word)
|
||||
TEXT ·divWW(SB),7,$0
|
||||
MOVL x1+0(FP), DX
|
||||
MOVL x0+4(FP), AX
|
||||
DIVL y+8(FP)
|
||||
MOVL AX, q+12(FP)
|
||||
MOVL DX, r+16(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func addVV(z, x, y []Word) (c Word)
|
||||
TEXT ·addVV(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+12(FP), SI
|
||||
MOVL y+24(FP), CX
|
||||
MOVL n+4(FP), BP
|
||||
MOVL $0, BX // i = 0
|
||||
MOVL $0, DX // c = 0
|
||||
JMP E1
|
||||
|
||||
L1: MOVL (SI)(BX*4), AX
|
||||
RCRL $1, DX
|
||||
ADCL (CX)(BX*4), AX
|
||||
RCLL $1, DX
|
||||
MOVL AX, (DI)(BX*4)
|
||||
ADDL $1, BX // i++
|
||||
|
||||
E1: CMPL BX, BP // i < n
|
||||
JL L1
|
||||
|
||||
MOVL DX, c+36(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func subVV(z, x, y []Word) (c Word)
|
||||
// (same as addVV except for SBBL instead of ADCL and label names)
|
||||
TEXT ·subVV(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+12(FP), SI
|
||||
MOVL y+24(FP), CX
|
||||
MOVL n+4(FP), BP
|
||||
MOVL $0, BX // i = 0
|
||||
MOVL $0, DX // c = 0
|
||||
JMP E2
|
||||
|
||||
L2: MOVL (SI)(BX*4), AX
|
||||
RCRL $1, DX
|
||||
SBBL (CX)(BX*4), AX
|
||||
RCLL $1, DX
|
||||
MOVL AX, (DI)(BX*4)
|
||||
ADDL $1, BX // i++
|
||||
|
||||
E2: CMPL BX, BP // i < n
|
||||
JL L2
|
||||
|
||||
MOVL DX, c+36(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func addVW(z, x []Word, y Word) (c Word)
|
||||
TEXT ·addVW(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+12(FP), SI
|
||||
MOVL y+24(FP), AX // c = y
|
||||
MOVL n+4(FP), BP
|
||||
MOVL $0, BX // i = 0
|
||||
JMP E3
|
||||
|
||||
L3: ADDL (SI)(BX*4), AX
|
||||
MOVL AX, (DI)(BX*4)
|
||||
RCLL $1, AX
|
||||
ANDL $1, AX
|
||||
ADDL $1, BX // i++
|
||||
|
||||
E3: CMPL BX, BP // i < n
|
||||
JL L3
|
||||
|
||||
MOVL AX, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func subVW(z, x []Word, y Word) (c Word)
|
||||
TEXT ·subVW(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+12(FP), SI
|
||||
MOVL y+24(FP), AX // c = y
|
||||
MOVL n+4(FP), BP
|
||||
MOVL $0, BX // i = 0
|
||||
JMP E4
|
||||
|
||||
L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL?
|
||||
SUBL AX, DX
|
||||
MOVL DX, (DI)(BX*4)
|
||||
RCLL $1, AX
|
||||
ANDL $1, AX
|
||||
ADDL $1, BX // i++
|
||||
|
||||
E4: CMPL BX, BP // i < n
|
||||
JL L4
|
||||
|
||||
MOVL AX, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func shlVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shlVU(SB),7,$0
|
||||
MOVL n+4(FP), BX // i = n
|
||||
SUBL $1, BX // i--
|
||||
JL X8b // i < 0 (n <= 0)
|
||||
|
||||
// n > 0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+12(FP), SI
|
||||
MOVL s+24(FP), CX
|
||||
MOVL (SI)(BX*4), AX // w1 = x[n-1]
|
||||
MOVL $0, DX
|
||||
SHLL CX, DX:AX // w1>>ŝ
|
||||
MOVL DX, c+28(FP)
|
||||
|
||||
CMPL BX, $0
|
||||
JLE X8a // i <= 0
|
||||
|
||||
// i > 0
|
||||
L8: MOVL AX, DX // w = w1
|
||||
MOVL -4(SI)(BX*4), AX // w1 = x[i-1]
|
||||
SHLL CX, DX:AX // w<<s | w1>>ŝ
|
||||
MOVL DX, (DI)(BX*4) // z[i] = w<<s | w1>>ŝ
|
||||
SUBL $1, BX // i--
|
||||
JG L8 // i > 0
|
||||
|
||||
// i <= 0
|
||||
X8a: SHLL CX, AX // w1<<s
|
||||
MOVL AX, (DI) // z[0] = w1<<s
|
||||
RET
|
||||
|
||||
X8b: MOVL $0, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func shrVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shrVU(SB),7,$0
|
||||
MOVL n+4(FP), BP
|
||||
SUBL $1, BP // n--
|
||||
JL X9b // n < 0 (n <= 0)
|
||||
|
||||
// n > 0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+12(FP), SI
|
||||
MOVL s+24(FP), CX
|
||||
MOVL (SI), AX // w1 = x[0]
|
||||
MOVL $0, DX
|
||||
SHRL CX, DX:AX // w1<<ŝ
|
||||
MOVL DX, c+28(FP)
|
||||
|
||||
MOVL $0, BX // i = 0
|
||||
JMP E9
|
||||
|
||||
// i < n-1
|
||||
L9: MOVL AX, DX // w = w1
|
||||
MOVL 4(SI)(BX*4), AX // w1 = x[i+1]
|
||||
SHRL CX, DX:AX // w>>s | w1<<ŝ
|
||||
MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ
|
||||
ADDL $1, BX // i++
|
||||
|
||||
E9: CMPL BX, BP
|
||||
JL L9 // i < n-1
|
||||
|
||||
// i >= n-1
|
||||
X9a: SHRL CX, AX // w1>>s
|
||||
MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s
|
||||
RET
|
||||
|
||||
X9b: MOVL $0, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func mulAddVWW(z, x []Word, y, r Word) (c Word)
|
||||
TEXT ·mulAddVWW(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+12(FP), SI
|
||||
MOVL y+24(FP), BP
|
||||
MOVL r+28(FP), CX // c = r
|
||||
MOVL n+4(FP), BX
|
||||
LEAL (DI)(BX*4), DI
|
||||
LEAL (SI)(BX*4), SI
|
||||
NEGL BX // i = -n
|
||||
JMP E5
|
||||
|
||||
L5: MOVL (SI)(BX*4), AX
|
||||
MULL BP
|
||||
ADDL CX, AX
|
||||
ADCL $0, DX
|
||||
MOVL AX, (DI)(BX*4)
|
||||
MOVL DX, CX
|
||||
ADDL $1, BX // i++
|
||||
|
||||
E5: CMPL BX, $0 // i < 0
|
||||
JL L5
|
||||
|
||||
MOVL CX, c+32(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func addMulVVW(z, x []Word, y Word) (c Word)
|
||||
TEXT ·addMulVVW(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL x+12(FP), SI
|
||||
MOVL y+24(FP), BP
|
||||
MOVL n+4(FP), BX
|
||||
LEAL (DI)(BX*4), DI
|
||||
LEAL (SI)(BX*4), SI
|
||||
NEGL BX // i = -n
|
||||
MOVL $0, CX // c = 0
|
||||
JMP E6
|
||||
|
||||
L6: MOVL (SI)(BX*4), AX
|
||||
MULL BP
|
||||
ADDL CX, AX
|
||||
ADCL $0, DX
|
||||
ADDL AX, (DI)(BX*4)
|
||||
ADCL $0, DX
|
||||
MOVL DX, CX
|
||||
ADDL $1, BX // i++
|
||||
|
||||
E6: CMPL BX, $0 // i < 0
|
||||
JL L6
|
||||
|
||||
MOVL CX, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
|
||||
TEXT ·divWVW(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
MOVL xn+12(FP), DX // r = xn
|
||||
MOVL x+16(FP), SI
|
||||
MOVL y+28(FP), CX
|
||||
MOVL n+4(FP), BX // i = n
|
||||
JMP E7
|
||||
|
||||
L7: MOVL (SI)(BX*4), AX
|
||||
DIVL CX
|
||||
MOVL AX, (DI)(BX*4)
|
||||
|
||||
E7: SUBL $1, BX // i--
|
||||
JGE L7 // i >= 0
|
||||
|
||||
MOVL DX, r+32(FP)
|
||||
RET
|
||||
|
||||
// func bitLen(x Word) (n int)
|
||||
TEXT ·bitLen(SB),7,$0
|
||||
BSRL x+0(FP), AX
|
||||
JZ Z1
|
||||
INCL AX
|
||||
MOVL AX, n+4(FP)
|
||||
RET
|
||||
|
||||
Z1: MOVL $0, n+4(FP)
|
||||
RET
|
||||
399
vendor/github.com/remyoudompheng/bigfft/arith_amd64.s
generated
vendored
Normal file
399
vendor/github.com/remyoudompheng/bigfft/arith_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,399 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// This file provides fast assembly versions for the elementary
|
||||
// arithmetic operations on vectors implemented in arith.go.
|
||||
|
||||
// Literal instruction for MOVQ $0, CX.
|
||||
// (MOVQ $0, reg is translated to XORQ reg, reg and clears CF.)
|
||||
#define ZERO_CX BYTE $0x48; \
|
||||
BYTE $0xc7; \
|
||||
BYTE $0xc1; \
|
||||
BYTE $0x00; \
|
||||
BYTE $0x00; \
|
||||
BYTE $0x00; \
|
||||
BYTE $0x00
|
||||
|
||||
// func mulWW(x, y Word) (z1, z0 Word)
|
||||
TEXT ·mulWW(SB),7,$0
|
||||
MOVQ x+0(FP), AX
|
||||
MULQ y+8(FP)
|
||||
MOVQ DX, z1+16(FP)
|
||||
MOVQ AX, z0+24(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func divWW(x1, x0, y Word) (q, r Word)
|
||||
TEXT ·divWW(SB),7,$0
|
||||
MOVQ x1+0(FP), DX
|
||||
MOVQ x0+8(FP), AX
|
||||
DIVQ y+16(FP)
|
||||
MOVQ AX, q+24(FP)
|
||||
MOVQ DX, r+32(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func addVV(z, x, y []Word) (c Word)
|
||||
TEXT ·addVV(SB),7,$0
|
||||
MOVQ z_len+8(FP), DI
|
||||
MOVQ x+24(FP), R8
|
||||
MOVQ y+48(FP), R9
|
||||
MOVQ z+0(FP), R10
|
||||
|
||||
MOVQ $0, CX // c = 0
|
||||
MOVQ $0, SI // i = 0
|
||||
|
||||
// s/JL/JMP/ below to disable the unrolled loop
|
||||
SUBQ $4, DI // n -= 4
|
||||
JL V1 // if n < 0 goto V1
|
||||
|
||||
U1: // n >= 0
|
||||
// regular loop body unrolled 4x
|
||||
RCRQ $1, CX // CF = c
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
MOVQ 8(R8)(SI*8), R12
|
||||
MOVQ 16(R8)(SI*8), R13
|
||||
MOVQ 24(R8)(SI*8), R14
|
||||
ADCQ 0(R9)(SI*8), R11
|
||||
ADCQ 8(R9)(SI*8), R12
|
||||
ADCQ 16(R9)(SI*8), R13
|
||||
ADCQ 24(R9)(SI*8), R14
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
MOVQ R12, 8(R10)(SI*8)
|
||||
MOVQ R13, 16(R10)(SI*8)
|
||||
MOVQ R14, 24(R10)(SI*8)
|
||||
RCLQ $1, CX // c = CF
|
||||
|
||||
ADDQ $4, SI // i += 4
|
||||
SUBQ $4, DI // n -= 4
|
||||
JGE U1 // if n >= 0 goto U1
|
||||
|
||||
V1: ADDQ $4, DI // n += 4
|
||||
JLE E1 // if n <= 0 goto E1
|
||||
|
||||
L1: // n > 0
|
||||
RCRQ $1, CX // CF = c
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
ADCQ 0(R9)(SI*8), R11
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
RCLQ $1, CX // c = CF
|
||||
|
||||
ADDQ $1, SI // i++
|
||||
SUBQ $1, DI // n--
|
||||
JG L1 // if n > 0 goto L1
|
||||
|
||||
E1: MOVQ CX, c+72(FP) // return c
|
||||
RET
|
||||
|
||||
|
||||
// func subVV(z, x, y []Word) (c Word)
|
||||
// (same as addVV except for SBBQ instead of ADCQ and label names)
|
||||
TEXT ·subVV(SB),7,$0
|
||||
MOVQ z_len+8(FP), DI
|
||||
MOVQ x+24(FP), R8
|
||||
MOVQ y+48(FP), R9
|
||||
MOVQ z+0(FP), R10
|
||||
|
||||
MOVQ $0, CX // c = 0
|
||||
MOVQ $0, SI // i = 0
|
||||
|
||||
// s/JL/JMP/ below to disable the unrolled loop
|
||||
SUBQ $4, DI // n -= 4
|
||||
JL V2 // if n < 0 goto V2
|
||||
|
||||
U2: // n >= 0
|
||||
// regular loop body unrolled 4x
|
||||
RCRQ $1, CX // CF = c
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
MOVQ 8(R8)(SI*8), R12
|
||||
MOVQ 16(R8)(SI*8), R13
|
||||
MOVQ 24(R8)(SI*8), R14
|
||||
SBBQ 0(R9)(SI*8), R11
|
||||
SBBQ 8(R9)(SI*8), R12
|
||||
SBBQ 16(R9)(SI*8), R13
|
||||
SBBQ 24(R9)(SI*8), R14
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
MOVQ R12, 8(R10)(SI*8)
|
||||
MOVQ R13, 16(R10)(SI*8)
|
||||
MOVQ R14, 24(R10)(SI*8)
|
||||
RCLQ $1, CX // c = CF
|
||||
|
||||
ADDQ $4, SI // i += 4
|
||||
SUBQ $4, DI // n -= 4
|
||||
JGE U2 // if n >= 0 goto U2
|
||||
|
||||
V2: ADDQ $4, DI // n += 4
|
||||
JLE E2 // if n <= 0 goto E2
|
||||
|
||||
L2: // n > 0
|
||||
RCRQ $1, CX // CF = c
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
SBBQ 0(R9)(SI*8), R11
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
RCLQ $1, CX // c = CF
|
||||
|
||||
ADDQ $1, SI // i++
|
||||
SUBQ $1, DI // n--
|
||||
JG L2 // if n > 0 goto L2
|
||||
|
||||
E2: MOVQ CX, c+72(FP) // return c
|
||||
RET
|
||||
|
||||
|
||||
// func addVW(z, x []Word, y Word) (c Word)
|
||||
TEXT ·addVW(SB),7,$0
|
||||
MOVQ z_len+8(FP), DI
|
||||
MOVQ x+24(FP), R8
|
||||
MOVQ y+48(FP), CX // c = y
|
||||
MOVQ z+0(FP), R10
|
||||
|
||||
MOVQ $0, SI // i = 0
|
||||
|
||||
// s/JL/JMP/ below to disable the unrolled loop
|
||||
SUBQ $4, DI // n -= 4
|
||||
JL V3 // if n < 4 goto V3
|
||||
|
||||
U3: // n >= 0
|
||||
// regular loop body unrolled 4x
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
MOVQ 8(R8)(SI*8), R12
|
||||
MOVQ 16(R8)(SI*8), R13
|
||||
MOVQ 24(R8)(SI*8), R14
|
||||
ADDQ CX, R11
|
||||
ZERO_CX
|
||||
ADCQ $0, R12
|
||||
ADCQ $0, R13
|
||||
ADCQ $0, R14
|
||||
SETCS CX // c = CF
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
MOVQ R12, 8(R10)(SI*8)
|
||||
MOVQ R13, 16(R10)(SI*8)
|
||||
MOVQ R14, 24(R10)(SI*8)
|
||||
|
||||
ADDQ $4, SI // i += 4
|
||||
SUBQ $4, DI // n -= 4
|
||||
JGE U3 // if n >= 0 goto U3
|
||||
|
||||
V3: ADDQ $4, DI // n += 4
|
||||
JLE E3 // if n <= 0 goto E3
|
||||
|
||||
L3: // n > 0
|
||||
ADDQ 0(R8)(SI*8), CX
|
||||
MOVQ CX, 0(R10)(SI*8)
|
||||
ZERO_CX
|
||||
RCLQ $1, CX // c = CF
|
||||
|
||||
ADDQ $1, SI // i++
|
||||
SUBQ $1, DI // n--
|
||||
JG L3 // if n > 0 goto L3
|
||||
|
||||
E3: MOVQ CX, c+56(FP) // return c
|
||||
RET
|
||||
|
||||
|
||||
// func subVW(z, x []Word, y Word) (c Word)
|
||||
// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names)
|
||||
TEXT ·subVW(SB),7,$0
|
||||
MOVQ z_len+8(FP), DI
|
||||
MOVQ x+24(FP), R8
|
||||
MOVQ y+48(FP), CX // c = y
|
||||
MOVQ z+0(FP), R10
|
||||
|
||||
MOVQ $0, SI // i = 0
|
||||
|
||||
// s/JL/JMP/ below to disable the unrolled loop
|
||||
SUBQ $4, DI // n -= 4
|
||||
JL V4 // if n < 4 goto V4
|
||||
|
||||
U4: // n >= 0
|
||||
// regular loop body unrolled 4x
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
MOVQ 8(R8)(SI*8), R12
|
||||
MOVQ 16(R8)(SI*8), R13
|
||||
MOVQ 24(R8)(SI*8), R14
|
||||
SUBQ CX, R11
|
||||
ZERO_CX
|
||||
SBBQ $0, R12
|
||||
SBBQ $0, R13
|
||||
SBBQ $0, R14
|
||||
SETCS CX // c = CF
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
MOVQ R12, 8(R10)(SI*8)
|
||||
MOVQ R13, 16(R10)(SI*8)
|
||||
MOVQ R14, 24(R10)(SI*8)
|
||||
|
||||
ADDQ $4, SI // i += 4
|
||||
SUBQ $4, DI // n -= 4
|
||||
JGE U4 // if n >= 0 goto U4
|
||||
|
||||
V4: ADDQ $4, DI // n += 4
|
||||
JLE E4 // if n <= 0 goto E4
|
||||
|
||||
L4: // n > 0
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
SUBQ CX, R11
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
ZERO_CX
|
||||
RCLQ $1, CX // c = CF
|
||||
|
||||
ADDQ $1, SI // i++
|
||||
SUBQ $1, DI // n--
|
||||
JG L4 // if n > 0 goto L4
|
||||
|
||||
E4: MOVQ CX, c+56(FP) // return c
|
||||
RET
|
||||
|
||||
|
||||
// func shlVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shlVU(SB),7,$0
|
||||
MOVQ z_len+8(FP), BX // i = z
|
||||
SUBQ $1, BX // i--
|
||||
JL X8b // i < 0 (n <= 0)
|
||||
|
||||
// n > 0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ x+24(FP), R8
|
||||
MOVQ s+48(FP), CX
|
||||
MOVQ (R8)(BX*8), AX // w1 = x[n-1]
|
||||
MOVQ $0, DX
|
||||
SHLQ CX, DX:AX // w1>>ŝ
|
||||
MOVQ DX, c+56(FP)
|
||||
|
||||
CMPQ BX, $0
|
||||
JLE X8a // i <= 0
|
||||
|
||||
// i > 0
|
||||
L8: MOVQ AX, DX // w = w1
|
||||
MOVQ -8(R8)(BX*8), AX // w1 = x[i-1]
|
||||
SHLQ CX, DX:AX // w<<s | w1>>ŝ
|
||||
MOVQ DX, (R10)(BX*8) // z[i] = w<<s | w1>>ŝ
|
||||
SUBQ $1, BX // i--
|
||||
JG L8 // i > 0
|
||||
|
||||
// i <= 0
|
||||
X8a: SHLQ CX, AX // w1<<s
|
||||
MOVQ AX, (R10) // z[0] = w1<<s
|
||||
RET
|
||||
|
||||
X8b: MOVQ $0, c+56(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func shrVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shrVU(SB),7,$0
|
||||
MOVQ z_len+8(FP), R11
|
||||
SUBQ $1, R11 // n--
|
||||
JL X9b // n < 0 (n <= 0)
|
||||
|
||||
// n > 0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ x+24(FP), R8
|
||||
MOVQ s+48(FP), CX
|
||||
MOVQ (R8), AX // w1 = x[0]
|
||||
MOVQ $0, DX
|
||||
SHRQ CX, DX:AX // w1<<ŝ
|
||||
MOVQ DX, c+56(FP)
|
||||
|
||||
MOVQ $0, BX // i = 0
|
||||
JMP E9
|
||||
|
||||
// i < n-1
|
||||
L9: MOVQ AX, DX // w = w1
|
||||
MOVQ 8(R8)(BX*8), AX // w1 = x[i+1]
|
||||
SHRQ CX, DX:AX // w>>s | w1<<ŝ
|
||||
MOVQ DX, (R10)(BX*8) // z[i] = w>>s | w1<<ŝ
|
||||
ADDQ $1, BX // i++
|
||||
|
||||
E9: CMPQ BX, R11
|
||||
JL L9 // i < n-1
|
||||
|
||||
// i >= n-1
|
||||
X9a: SHRQ CX, AX // w1>>s
|
||||
MOVQ AX, (R10)(R11*8) // z[n-1] = w1>>s
|
||||
RET
|
||||
|
||||
X9b: MOVQ $0, c+56(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func mulAddVWW(z, x []Word, y, r Word) (c Word)
|
||||
TEXT ·mulAddVWW(SB),7,$0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ x+24(FP), R8
|
||||
MOVQ y+48(FP), R9
|
||||
MOVQ r+56(FP), CX // c = r
|
||||
MOVQ z_len+8(FP), R11
|
||||
MOVQ $0, BX // i = 0
|
||||
JMP E5
|
||||
|
||||
L5: MOVQ (R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
MOVQ AX, (R10)(BX*8)
|
||||
MOVQ DX, CX
|
||||
ADDQ $1, BX // i++
|
||||
|
||||
E5: CMPQ BX, R11 // i < n
|
||||
JL L5
|
||||
|
||||
MOVQ CX, c+64(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func addMulVVW(z, x []Word, y Word) (c Word)
|
||||
TEXT ·addMulVVW(SB),7,$0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ x+24(FP), R8
|
||||
MOVQ y+48(FP), R9
|
||||
MOVQ z_len+8(FP), R11
|
||||
MOVQ $0, BX // i = 0
|
||||
MOVQ $0, CX // c = 0
|
||||
JMP E6
|
||||
|
||||
L6: MOVQ (R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, (R10)(BX*8)
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, CX
|
||||
ADDQ $1, BX // i++
|
||||
|
||||
E6: CMPQ BX, R11 // i < n
|
||||
JL L6
|
||||
|
||||
MOVQ CX, c+56(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
|
||||
TEXT ·divWVW(SB),7,$0
|
||||
MOVQ z+0(FP), R10
|
||||
MOVQ xn+24(FP), DX // r = xn
|
||||
MOVQ x+32(FP), R8
|
||||
MOVQ y+56(FP), R9
|
||||
MOVQ z_len+8(FP), BX // i = z
|
||||
JMP E7
|
||||
|
||||
L7: MOVQ (R8)(BX*8), AX
|
||||
DIVQ R9
|
||||
MOVQ AX, (R10)(BX*8)
|
||||
|
||||
E7: SUBQ $1, BX // i--
|
||||
JGE L7 // i >= 0
|
||||
|
||||
MOVQ DX, r+64(FP)
|
||||
RET
|
||||
|
||||
// func bitLen(x Word) (n int)
|
||||
TEXT ·bitLen(SB),7,$0
|
||||
BSRQ x+0(FP), AX
|
||||
JZ Z1
|
||||
ADDQ $1, AX
|
||||
MOVQ AX, n+8(FP)
|
||||
RET
|
||||
|
||||
Z1: MOVQ $0, n+8(FP)
|
||||
RET
|
||||
321
vendor/github.com/remyoudompheng/bigfft/arith_arm.s
generated
vendored
Normal file
321
vendor/github.com/remyoudompheng/bigfft/arith_arm.s
generated
vendored
Normal file
@@ -0,0 +1,321 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// This file provides fast assembly versions for the elementary
|
||||
// arithmetic operations on vectors implemented in arith.go.
|
||||
|
||||
#define CFLAG 29 // bit position of carry flag
|
||||
|
||||
// func addVV(z, x, y []Word) (c Word)
|
||||
TEXT ·addVV(SB),7,$0
|
||||
MOVW $0, R0
|
||||
MOVW z+0(FP), R1
|
||||
MOVW x+12(FP), R2
|
||||
MOVW y+24(FP), R3
|
||||
MOVW n+4(FP), R4
|
||||
MOVW R4<<2, R4
|
||||
ADD R1, R4
|
||||
B E1
|
||||
L1:
|
||||
MOVW.P 4(R2), R5
|
||||
MOVW.P 4(R3), R6
|
||||
MOVW R0, CPSR
|
||||
ADC.S R6, R5
|
||||
MOVW.P R5, 4(R1)
|
||||
MOVW CPSR, R0
|
||||
E1:
|
||||
CMP R1, R4
|
||||
BNE L1
|
||||
|
||||
MOVW R0>>CFLAG, R0
|
||||
AND $1, R0
|
||||
MOVW R0, c+36(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func subVV(z, x, y []Word) (c Word)
|
||||
// (same as addVV except for SBC instead of ADC and label names)
|
||||
TEXT ·subVV(SB),7,$0
|
||||
MOVW $(1<<CFLAG), R0
|
||||
MOVW z+0(FP), R1
|
||||
MOVW x+12(FP), R2
|
||||
MOVW y+24(FP), R3
|
||||
MOVW n+4(FP), R4
|
||||
MOVW R4<<2, R4
|
||||
ADD R1, R4
|
||||
B E2
|
||||
L2:
|
||||
MOVW.P 4(R2), R5
|
||||
MOVW.P 4(R3), R6
|
||||
MOVW R0, CPSR
|
||||
SBC.S R6, R5
|
||||
MOVW.P R5, 4(R1)
|
||||
MOVW CPSR, R0
|
||||
E2:
|
||||
CMP R1, R4
|
||||
BNE L2
|
||||
|
||||
MOVW R0>>CFLAG, R0
|
||||
AND $1, R0
|
||||
EOR $1, R0
|
||||
MOVW R0, c+36(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func addVW(z, x []Word, y Word) (c Word)
|
||||
TEXT ·addVW(SB),7,$0
|
||||
MOVW z+0(FP), R1
|
||||
MOVW x+12(FP), R2
|
||||
MOVW y+24(FP), R3
|
||||
MOVW n+4(FP), R4
|
||||
MOVW R4<<2, R4
|
||||
ADD R1, R4
|
||||
CMP R1, R4
|
||||
BNE L3a
|
||||
MOVW R3, c+28(FP)
|
||||
RET
|
||||
L3a:
|
||||
MOVW.P 4(R2), R5
|
||||
ADD.S R3, R5
|
||||
MOVW.P R5, 4(R1)
|
||||
MOVW CPSR, R0
|
||||
B E3
|
||||
L3:
|
||||
MOVW.P 4(R2), R5
|
||||
MOVW R0, CPSR
|
||||
ADC.S $0, R5
|
||||
MOVW.P R5, 4(R1)
|
||||
MOVW CPSR, R0
|
||||
E3:
|
||||
CMP R1, R4
|
||||
BNE L3
|
||||
|
||||
MOVW R0>>CFLAG, R0
|
||||
AND $1, R0
|
||||
MOVW R0, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func subVW(z, x []Word, y Word) (c Word)
|
||||
TEXT ·subVW(SB),7,$0
|
||||
MOVW z+0(FP), R1
|
||||
MOVW x+12(FP), R2
|
||||
MOVW y+24(FP), R3
|
||||
MOVW n+4(FP), R4
|
||||
MOVW R4<<2, R4
|
||||
ADD R1, R4
|
||||
CMP R1, R4
|
||||
BNE L4a
|
||||
MOVW R3, c+28(FP)
|
||||
RET
|
||||
L4a:
|
||||
MOVW.P 4(R2), R5
|
||||
SUB.S R3, R5
|
||||
MOVW.P R5, 4(R1)
|
||||
MOVW CPSR, R0
|
||||
B E4
|
||||
L4:
|
||||
MOVW.P 4(R2), R5
|
||||
MOVW R0, CPSR
|
||||
SBC.S $0, R5
|
||||
MOVW.P R5, 4(R1)
|
||||
MOVW CPSR, R0
|
||||
E4:
|
||||
CMP R1, R4
|
||||
BNE L4
|
||||
|
||||
MOVW R0>>CFLAG, R0
|
||||
AND $1, R0
|
||||
EOR $1, R0
|
||||
MOVW R0, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func shlVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shlVU(SB),7,$0
|
||||
MOVW n+4(FP), R5
|
||||
CMP $0, R5
|
||||
BEQ X7
|
||||
|
||||
MOVW z+0(FP), R1
|
||||
MOVW x+12(FP), R2
|
||||
MOVW R5<<2, R5
|
||||
ADD R5, R2
|
||||
ADD R1, R5
|
||||
MOVW s+24(FP), R3
|
||||
CMP $0, R3 // shift 0 is special
|
||||
BEQ Y7
|
||||
ADD $4, R1 // stop one word early
|
||||
MOVW $32, R4
|
||||
SUB R3, R4
|
||||
MOVW $0, R7
|
||||
|
||||
MOVW.W -4(R2), R6
|
||||
MOVW R6<<R3, R7
|
||||
MOVW R6>>R4, R6
|
||||
MOVW R6, c+28(FP)
|
||||
B E7
|
||||
|
||||
L7:
|
||||
MOVW.W -4(R2), R6
|
||||
ORR R6>>R4, R7
|
||||
MOVW.W R7, -4(R5)
|
||||
MOVW R6<<R3, R7
|
||||
E7:
|
||||
CMP R1, R5
|
||||
BNE L7
|
||||
|
||||
MOVW R7, -4(R5)
|
||||
RET
|
||||
|
||||
Y7: // copy loop, because shift 0 == shift 32
|
||||
MOVW.W -4(R2), R6
|
||||
MOVW.W R6, -4(R5)
|
||||
CMP R1, R5
|
||||
BNE Y7
|
||||
|
||||
X7:
|
||||
MOVW $0, R1
|
||||
MOVW R1, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func shrVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shrVU(SB),7,$0
|
||||
MOVW n+4(FP), R5
|
||||
CMP $0, R5
|
||||
BEQ X6
|
||||
|
||||
MOVW z+0(FP), R1
|
||||
MOVW x+12(FP), R2
|
||||
MOVW R5<<2, R5
|
||||
ADD R1, R5
|
||||
MOVW s+24(FP), R3
|
||||
CMP $0, R3 // shift 0 is special
|
||||
BEQ Y6
|
||||
SUB $4, R5 // stop one word early
|
||||
MOVW $32, R4
|
||||
SUB R3, R4
|
||||
MOVW $0, R7
|
||||
|
||||
// first word
|
||||
MOVW.P 4(R2), R6
|
||||
MOVW R6>>R3, R7
|
||||
MOVW R6<<R4, R6
|
||||
MOVW R6, c+28(FP)
|
||||
B E6
|
||||
|
||||
// word loop
|
||||
L6:
|
||||
MOVW.P 4(R2), R6
|
||||
ORR R6<<R4, R7
|
||||
MOVW.P R7, 4(R1)
|
||||
MOVW R6>>R3, R7
|
||||
E6:
|
||||
CMP R1, R5
|
||||
BNE L6
|
||||
|
||||
MOVW R7, 0(R1)
|
||||
RET
|
||||
|
||||
Y6: // copy loop, because shift 0 == shift 32
|
||||
MOVW.P 4(R2), R6
|
||||
MOVW.P R6, 4(R1)
|
||||
CMP R1, R5
|
||||
BNE Y6
|
||||
|
||||
X6:
|
||||
MOVW $0, R1
|
||||
MOVW R1, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func mulAddVWW(z, x []Word, y, r Word) (c Word)
|
||||
TEXT ·mulAddVWW(SB),7,$0
|
||||
MOVW $0, R0
|
||||
MOVW z+0(FP), R1
|
||||
MOVW x+12(FP), R2
|
||||
MOVW y+24(FP), R3
|
||||
MOVW r+28(FP), R4
|
||||
MOVW n+4(FP), R5
|
||||
MOVW R5<<2, R5
|
||||
ADD R1, R5
|
||||
B E8
|
||||
|
||||
// word loop
|
||||
L8:
|
||||
MOVW.P 4(R2), R6
|
||||
MULLU R6, R3, (R7, R6)
|
||||
ADD.S R4, R6
|
||||
ADC R0, R7
|
||||
MOVW.P R6, 4(R1)
|
||||
MOVW R7, R4
|
||||
E8:
|
||||
CMP R1, R5
|
||||
BNE L8
|
||||
|
||||
MOVW R4, c+32(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func addMulVVW(z, x []Word, y Word) (c Word)
|
||||
TEXT ·addMulVVW(SB),7,$0
|
||||
MOVW $0, R0
|
||||
MOVW z+0(FP), R1
|
||||
MOVW x+12(FP), R2
|
||||
MOVW y+24(FP), R3
|
||||
MOVW n+4(FP), R5
|
||||
MOVW R5<<2, R5
|
||||
ADD R1, R5
|
||||
MOVW $0, R4
|
||||
B E9
|
||||
|
||||
// word loop
|
||||
L9:
|
||||
MOVW.P 4(R2), R6
|
||||
MULLU R6, R3, (R7, R6)
|
||||
ADD.S R4, R6
|
||||
ADC R0, R7
|
||||
MOVW 0(R1), R4
|
||||
ADD.S R4, R6
|
||||
ADC R0, R7
|
||||
MOVW.P R6, 4(R1)
|
||||
MOVW R7, R4
|
||||
E9:
|
||||
CMP R1, R5
|
||||
BNE L9
|
||||
|
||||
MOVW R4, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
|
||||
TEXT ·divWVW(SB),7,$0
|
||||
// ARM has no multiword division, so use portable code.
|
||||
B ·divWVW_g(SB)
|
||||
|
||||
|
||||
// func divWW(x1, x0, y Word) (q, r Word)
|
||||
TEXT ·divWW(SB),7,$0
|
||||
// ARM has no multiword division, so use portable code.
|
||||
B ·divWW_g(SB)
|
||||
|
||||
|
||||
// func mulWW(x, y Word) (z1, z0 Word)
|
||||
TEXT ·mulWW(SB),7,$0
|
||||
MOVW x+0(FP), R1
|
||||
MOVW y+4(FP), R2
|
||||
MULLU R1, R2, (R4, R3)
|
||||
MOVW R4, z1+8(FP)
|
||||
MOVW R3, z0+12(FP)
|
||||
RET
|
||||
|
||||
// func bitLen(x Word) (n int)
|
||||
TEXT ·bitLen(SB),7,$0
|
||||
MOVW x+0(FP), R0
|
||||
CLZ R0, R0
|
||||
MOVW $32, R1
|
||||
SUB.S R0, R1
|
||||
MOVW R1, n+4(FP)
|
||||
RET
|
||||
21
vendor/github.com/remyoudompheng/bigfft/arith_decl.go
generated
vendored
Normal file
21
vendor/github.com/remyoudompheng/bigfft/arith_decl.go
generated
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bigfft
|
||||
|
||||
import . "math/big"
|
||||
|
||||
// implemented in arith_$GOARCH.s
|
||||
func mulWW(x, y Word) (z1, z0 Word)
|
||||
func divWW(x1, x0, y Word) (q, r Word)
|
||||
func addVV(z, x, y []Word) (c Word)
|
||||
func subVV(z, x, y []Word) (c Word)
|
||||
func addVW(z, x []Word, y Word) (c Word)
|
||||
func subVW(z, x []Word, y Word) (c Word)
|
||||
func shlVU(z, x []Word, s uint) (c Word)
|
||||
func shrVU(z, x []Word, s uint) (c Word)
|
||||
func mulAddVWW(z, x []Word, y, r Word) (c Word)
|
||||
func addMulVVW(z, x []Word, y Word) (c Word)
|
||||
func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
|
||||
func bitLen(x Word) (n int)
|
||||
198
vendor/github.com/remyoudompheng/bigfft/fermat.go
generated
vendored
Normal file
198
vendor/github.com/remyoudompheng/bigfft/fermat.go
generated
vendored
Normal file
@@ -0,0 +1,198 @@
|
||||
package bigfft
|
||||
|
||||
import (
|
||||
"math/big"
|
||||
)
|
||||
|
||||
// Arithmetic modulo 2^n+1.
|
||||
|
||||
// A fermat of length w+1 represents a number modulo 2^(w*_W) + 1. The last
|
||||
// word is zero or one. A number has at most two representatives satisfying the
|
||||
// 0-1 last word constraint.
|
||||
type fermat nat
|
||||
|
||||
func (n fermat) String() string { return nat(n).String() }
|
||||
|
||||
func (z fermat) norm() {
|
||||
n := len(z) - 1
|
||||
c := z[n]
|
||||
if c == 0 {
|
||||
return
|
||||
}
|
||||
if z[0] >= c {
|
||||
z[n] = 0
|
||||
z[0] -= c
|
||||
return
|
||||
}
|
||||
// z[0] < z[n].
|
||||
subVW(z, z, c) // Substract c
|
||||
if c > 1 {
|
||||
z[n] -= c - 1
|
||||
c = 1
|
||||
}
|
||||
// Add back c.
|
||||
if z[n] == 1 {
|
||||
z[n] = 0
|
||||
return
|
||||
} else {
|
||||
addVW(z, z, 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Shift computes (x << k) mod (2^n+1).
|
||||
func (z fermat) Shift(x fermat, k int) {
|
||||
if len(z) != len(x) {
|
||||
println(len(z), len(x))
|
||||
panic("len(z) != len(x) in Shift")
|
||||
}
|
||||
n := len(x) - 1
|
||||
// Shift by n*_W is taking the opposite.
|
||||
k %= 2 * n * _W
|
||||
if k < 0 {
|
||||
k += 2 * n * _W
|
||||
}
|
||||
neg := false
|
||||
if k >= n*_W {
|
||||
k -= n * _W
|
||||
neg = true
|
||||
}
|
||||
|
||||
kw, kb := k/_W, k%_W
|
||||
|
||||
z[n] = 1 // Add (-1)
|
||||
if !neg {
|
||||
for i := 0; i < kw; i++ {
|
||||
z[i] = 0
|
||||
}
|
||||
// Shift left by kw words.
|
||||
// x = a·2^(n-k) + b
|
||||
// x<<k = (b<<k) - a
|
||||
copy(z[kw:], x[:n-kw])
|
||||
b := subVV(z[:kw+1], z[:kw+1], x[n-kw:])
|
||||
if z[kw+1] > 0 {
|
||||
z[kw+1] -= b
|
||||
} else {
|
||||
subVW(z[kw+1:], z[kw+1:], b)
|
||||
}
|
||||
} else {
|
||||
for i := kw + 1; i < n; i++ {
|
||||
z[i] = 0
|
||||
}
|
||||
// Shift left and negate, by kw words.
|
||||
copy(z[:kw+1], x[n-kw:n+1]) // z_low = x_high
|
||||
b := subVV(z[kw:n], z[kw:n], x[:n-kw]) // z_high -= x_low
|
||||
z[n] -= b
|
||||
}
|
||||
// Add back 1.
|
||||
if z[0] < ^big.Word(0) {
|
||||
z[0]++
|
||||
} else {
|
||||
addVW(z, z, 1)
|
||||
}
|
||||
// Shift left by kb bits
|
||||
shlVU(z, z, uint(kb))
|
||||
z.norm()
|
||||
}
|
||||
|
||||
// ShiftHalf shifts x by k/2 bits the left. Shifting by 1/2 bit
|
||||
// is multiplication by sqrt(2) mod 2^n+1 which is 2^(3n/4) - 2^(n/4).
|
||||
// A temporary buffer must be provided in tmp.
|
||||
func (z fermat) ShiftHalf(x fermat, k int, tmp fermat) {
|
||||
n := len(z) - 1
|
||||
if k%2 == 0 {
|
||||
z.Shift(x, k/2)
|
||||
return
|
||||
}
|
||||
u := (k - 1) / 2
|
||||
a := u + (3*_W/4)*n
|
||||
b := u + (_W/4)*n
|
||||
z.Shift(x, a)
|
||||
tmp.Shift(x, b)
|
||||
z.Sub(z, tmp)
|
||||
}
|
||||
|
||||
// Add computes addition mod 2^n+1.
|
||||
func (z fermat) Add(x, y fermat) fermat {
|
||||
if len(z) != len(x) {
|
||||
panic("Add: len(z) != len(x)")
|
||||
}
|
||||
addVV(z, x, y) // there cannot be a carry here.
|
||||
z.norm()
|
||||
return z
|
||||
}
|
||||
|
||||
// Sub computes substraction mod 2^n+1.
|
||||
func (z fermat) Sub(x, y fermat) fermat {
|
||||
if len(z) != len(x) {
|
||||
panic("Add: len(z) != len(x)")
|
||||
}
|
||||
n := len(y) - 1
|
||||
b := subVV(z[:n], x[:n], y[:n])
|
||||
b += y[n]
|
||||
// If b > 0, we need to subtract b<<n, which is the same as adding b.
|
||||
z[n] = x[n]
|
||||
if z[0] <= ^big.Word(0)-b {
|
||||
z[0] += b
|
||||
} else {
|
||||
addVW(z, z, b)
|
||||
}
|
||||
z.norm()
|
||||
return z
|
||||
}
|
||||
|
||||
func (z fermat) Mul(x, y fermat) fermat {
|
||||
n := len(x) - 1
|
||||
if n < 30 {
|
||||
z = z[:2*n+2]
|
||||
basicMul(z, x, y)
|
||||
z = z[:2*n+1]
|
||||
} else {
|
||||
var xi, yi, zi big.Int
|
||||
xi.SetBits(x)
|
||||
yi.SetBits(y)
|
||||
zi.SetBits(z)
|
||||
zb := zi.Mul(&xi, &yi).Bits()
|
||||
if len(zb) <= n {
|
||||
// Short product.
|
||||
copy(z, zb)
|
||||
for i := len(zb); i < len(z); i++ {
|
||||
z[i] = 0
|
||||
}
|
||||
return z
|
||||
}
|
||||
z = zb
|
||||
}
|
||||
// len(z) is at most 2n+1.
|
||||
if len(z) > 2*n+1 {
|
||||
panic("len(z) > 2n+1")
|
||||
}
|
||||
i := len(z) - (n + 1) // i <= n
|
||||
c := subVV(z[1:i+1], z[1:i+1], z[n+1:])
|
||||
z = z[:n+1]
|
||||
z[n]++ // Add -1.
|
||||
subVW(z[i+1:], z[i+1:], c)
|
||||
// Add 1.
|
||||
if z[n] == 1 {
|
||||
z[n] = 0
|
||||
} else {
|
||||
addVW(z, z, 1)
|
||||
}
|
||||
z.norm()
|
||||
return z
|
||||
}
|
||||
|
||||
// copied from math/big
|
||||
//
|
||||
// basicMul multiplies x and y and leaves the result in z.
|
||||
// The (non-normalized) result is placed in z[0 : len(x) + len(y)].
|
||||
func basicMul(z, x, y fermat) {
|
||||
// initialize z
|
||||
for i := 0; i < len(z); i++ {
|
||||
z[i] = 0
|
||||
}
|
||||
for i, d := range y {
|
||||
if d != 0 {
|
||||
z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
|
||||
}
|
||||
}
|
||||
}
|
||||
366
vendor/github.com/remyoudompheng/bigfft/fft.go
generated
vendored
Normal file
366
vendor/github.com/remyoudompheng/bigfft/fft.go
generated
vendored
Normal file
@@ -0,0 +1,366 @@
|
||||
// Package bigfft implements multiplication of big.Int using FFT.
|
||||
//
|
||||
// The implementation is based on the Schönhage-Strassen method
|
||||
// using integer FFT modulo 2^n+1.
|
||||
package bigfft
|
||||
|
||||
import (
|
||||
"math/big"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const _W = int(unsafe.Sizeof(big.Word(0)) * 8)
|
||||
|
||||
type nat []big.Word
|
||||
|
||||
func (n nat) String() string {
|
||||
v := new(big.Int)
|
||||
v.SetBits(n)
|
||||
return v.String()
|
||||
}
|
||||
|
||||
// fftThreshold is the size (in words) above which FFT is used over
|
||||
// Karatsuba from math/big.
|
||||
//
|
||||
// TestCalibrate seems to indicate a threshold of 60kbits on 32-bit
|
||||
// arches and 110kbits on 64-bit arches.
|
||||
var fftThreshold = 1800
|
||||
|
||||
// Mul computes the product x*y and returns z.
|
||||
// It can be used instead of the Mul method of
|
||||
// *big.Int from math/big package.
|
||||
func Mul(x, y *big.Int) *big.Int {
|
||||
xwords := len(x.Bits())
|
||||
ywords := len(y.Bits())
|
||||
if xwords > fftThreshold && ywords > fftThreshold {
|
||||
return mulFFT(x, y)
|
||||
}
|
||||
return new(big.Int).Mul(x, y)
|
||||
}
|
||||
|
||||
func mulFFT(x, y *big.Int) *big.Int {
|
||||
var xb, yb nat = x.Bits(), y.Bits()
|
||||
zb := fftmul(xb, yb)
|
||||
z := new(big.Int)
|
||||
z.SetBits(zb)
|
||||
if x.Sign()*y.Sign() < 0 {
|
||||
z.Neg(z)
|
||||
}
|
||||
return z
|
||||
}
|
||||
|
||||
// A FFT size of K=1<<k is adequate when K is about 2*sqrt(N) where
|
||||
// N = x.Bitlen() + y.Bitlen().
|
||||
|
||||
func fftmul(x, y nat) nat {
|
||||
k, m := fftSize(x, y)
|
||||
xp := polyFromNat(x, k, m)
|
||||
yp := polyFromNat(y, k, m)
|
||||
rp := xp.Mul(&yp)
|
||||
return rp.Int()
|
||||
}
|
||||
|
||||
// fftSizeThreshold[i] is the maximal size (in bits) where we should use
|
||||
// fft size i.
|
||||
var fftSizeThreshold = [...]int64{0, 0, 0,
|
||||
4 << 10, 8 << 10, 16 << 10, // 5
|
||||
32 << 10, 64 << 10, 1 << 18, 1 << 20, 3 << 20, // 10
|
||||
8 << 20, 30 << 20, 100 << 20, 300 << 20, 600 << 20,
|
||||
}
|
||||
|
||||
// returns the FFT length k, m the number of words per chunk
|
||||
// such that m << k is larger than the number of words
|
||||
// in x*y.
|
||||
func fftSize(x, y nat) (k uint, m int) {
|
||||
words := len(x) + len(y)
|
||||
bits := int64(words) * int64(_W)
|
||||
k = uint(len(fftSizeThreshold))
|
||||
for i := range fftSizeThreshold {
|
||||
if fftSizeThreshold[i] > bits {
|
||||
k = uint(i)
|
||||
break
|
||||
}
|
||||
}
|
||||
// The 1<<k chunks of m words must have N bits so that
|
||||
// 2^N-1 is larger than x*y. That is, m<<k > words
|
||||
m = words>>k + 1
|
||||
return
|
||||
}
|
||||
|
||||
// valueSize returns the smallest multiple of 1<<k greater than
|
||||
// 2*m*_W + k, that is also a multiple of _W. If extra > 0, the
|
||||
// returned value is only required to be a multiple of 1<<(k-extra)
|
||||
func valueSize(k uint, m int, extra uint) int {
|
||||
n := 2*m*_W + int(k)
|
||||
K := 1 << (k - extra)
|
||||
if K < _W {
|
||||
K = _W
|
||||
}
|
||||
n = ((n / K) + 1) * K
|
||||
return n / _W
|
||||
}
|
||||
|
||||
// poly represents an integer via a polynomial in Z[x]/(x^K+1)
|
||||
// where K is the FFT length and b is the computation basis 1<<(m*_W).
|
||||
// If P = a[0] + a[1] x + ... a[n] x^(K-1), the associated natural number
|
||||
// is P(b^m).
|
||||
type poly struct {
|
||||
k uint // k is such that K = 1<<k.
|
||||
m int // the m such that P(b^m) is the original number.
|
||||
a []nat // a slice of at most K m-word coefficients.
|
||||
}
|
||||
|
||||
// polyFromNat slices the number x into a polynomial
|
||||
// with 1<<k coefficients made of m words.
|
||||
func polyFromNat(x nat, k uint, m int) poly {
|
||||
p := poly{k: k, m: m}
|
||||
length := len(x)/m + 1
|
||||
p.a = make([]nat, length)
|
||||
for i := range p.a {
|
||||
if len(x) < m {
|
||||
p.a[i] = make(nat, m)
|
||||
copy(p.a[i], x)
|
||||
break
|
||||
}
|
||||
p.a[i] = x[:m]
|
||||
x = x[m:]
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// Int evaluates back a poly to its integer value.
|
||||
func (p *poly) Int() nat {
|
||||
length := len(p.a)*p.m + 1
|
||||
if na := len(p.a); na > 0 {
|
||||
length += len(p.a[na-1])
|
||||
}
|
||||
n := make(nat, length)
|
||||
m := p.m
|
||||
np := n
|
||||
for i := range p.a {
|
||||
l := len(p.a[i])
|
||||
c := addVV(np[:l], np[:l], p.a[i])
|
||||
if np[l] < ^big.Word(0) {
|
||||
np[l] += c
|
||||
} else {
|
||||
addVW(np[l:], np[l:], c)
|
||||
}
|
||||
np = np[m:]
|
||||
}
|
||||
n = trim(n)
|
||||
return n
|
||||
}
|
||||
|
||||
func trim(n nat) nat {
|
||||
for i := range n {
|
||||
if n[len(n)-1-i] != 0 {
|
||||
return n[:len(n)-i]
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Mul multiplies p and q modulo X^K-1, where K = 1<<p.k.
|
||||
// The product is done via a Fourier transform.
|
||||
func (p *poly) Mul(q *poly) poly {
|
||||
// extra=2 because:
|
||||
// * some power of 2 is a K-th root of unity when n is a multiple of K/2.
|
||||
// * 2 itself is a square (see fermat.ShiftHalf)
|
||||
n := valueSize(p.k, p.m, 2)
|
||||
|
||||
pv, qv := p.Transform(n), q.Transform(n)
|
||||
rv := pv.Mul(&qv)
|
||||
r := rv.InvTransform()
|
||||
r.m = p.m
|
||||
return r
|
||||
}
|
||||
|
||||
// A polValues represents the value of a poly at the odd powers of a
|
||||
// (2K)-th root of unity θ=2^l in Z/(b^n+1)Z, where b^n = 2^Kl.
|
||||
type polValues struct {
|
||||
k uint // k is such that K = 1<<k.
|
||||
n int // the length of coefficients, n*_W a multiple of 1<<k.
|
||||
values []fermat // a slice of K (n+1)-word values
|
||||
}
|
||||
|
||||
// Transform evaluates p at θ^i for i = 0...K-1, where
|
||||
// θ is a K-th primitive root of unity in Z/(b^n+1)Z.
|
||||
func (p *poly) Transform(n int) polValues {
|
||||
k := p.k
|
||||
inputbits := make([]big.Word, (n+1)<<k)
|
||||
input := make([]fermat, 1<<k)
|
||||
// Now computed q(ω^i) for i = 0 ... K-1
|
||||
valbits := make([]big.Word, (n+1)<<k)
|
||||
values := make([]fermat, 1<<k)
|
||||
for i := range values {
|
||||
input[i] = inputbits[i*(n+1) : (i+1)*(n+1)]
|
||||
if i < len(p.a) {
|
||||
copy(input[i], p.a[i])
|
||||
}
|
||||
values[i] = fermat(valbits[i*(n+1) : (i+1)*(n+1)])
|
||||
}
|
||||
fourier(values, input, false, n, k)
|
||||
return polValues{k, n, values}
|
||||
}
|
||||
|
||||
// InvTransform reconstructs p (modulo X^K - 1) from its
|
||||
// values at θ^i for i = 0..K-1.
|
||||
func (v *polValues) InvTransform() poly {
|
||||
k, n := v.k, v.n
|
||||
|
||||
// Perform an inverse Fourier transform to recover p.
|
||||
pbits := make([]big.Word, (n+1)<<k)
|
||||
p := make([]fermat, 1<<k)
|
||||
for i := range p {
|
||||
p[i] = fermat(pbits[i*(n+1) : (i+1)*(n+1)])
|
||||
}
|
||||
fourier(p, v.values, true, n, k)
|
||||
// Divide by K, and untwist q to recover p.
|
||||
u := make(fermat, n+1)
|
||||
a := make([]nat, 1<<k)
|
||||
for i := range p {
|
||||
u.Shift(p[i], -int(k))
|
||||
copy(p[i], u)
|
||||
a[i] = nat(p[i])
|
||||
}
|
||||
return poly{k: k, m: 0, a: a}
|
||||
}
|
||||
|
||||
// NTransform evaluates p at θω^i for i = 0...K-1, where
|
||||
// θ is a (2K)-th primitive root of unity in Z/(b^n+1)Z
|
||||
// and ω = θ².
|
||||
func (p *poly) NTransform(n int) polValues {
|
||||
k := p.k
|
||||
if len(p.a) >= 1<<k {
|
||||
panic("Transform: len(p.a) >= 1<<k")
|
||||
}
|
||||
// θ is represented as a shift.
|
||||
θshift := (n * _W) >> k
|
||||
// p(x) = a_0 + a_1 x + ... + a_{K-1} x^(K-1)
|
||||
// p(θx) = q(x) where
|
||||
// q(x) = a_0 + θa_1 x + ... + θ^(K-1) a_{K-1} x^(K-1)
|
||||
//
|
||||
// Twist p by θ to obtain q.
|
||||
tbits := make([]big.Word, (n+1)<<k)
|
||||
twisted := make([]fermat, 1<<k)
|
||||
src := make(fermat, n+1)
|
||||
for i := range twisted {
|
||||
twisted[i] = fermat(tbits[i*(n+1) : (i+1)*(n+1)])
|
||||
if i < len(p.a) {
|
||||
for i := range src {
|
||||
src[i] = 0
|
||||
}
|
||||
copy(src, p.a[i])
|
||||
twisted[i].Shift(src, θshift*i)
|
||||
}
|
||||
}
|
||||
|
||||
// Now computed q(ω^i) for i = 0 ... K-1
|
||||
valbits := make([]big.Word, (n+1)<<k)
|
||||
values := make([]fermat, 1<<k)
|
||||
for i := range values {
|
||||
values[i] = fermat(valbits[i*(n+1) : (i+1)*(n+1)])
|
||||
}
|
||||
fourier(values, twisted, false, n, k)
|
||||
return polValues{k, n, values}
|
||||
}
|
||||
|
||||
// InvTransform reconstructs a polynomial from its values at
|
||||
// roots of x^K+1. The m field of the returned polynomial
|
||||
// is unspecified.
|
||||
func (v *polValues) InvNTransform() poly {
|
||||
k := v.k
|
||||
n := v.n
|
||||
θshift := (n * _W) >> k
|
||||
|
||||
// Perform an inverse Fourier transform to recover q.
|
||||
qbits := make([]big.Word, (n+1)<<k)
|
||||
q := make([]fermat, 1<<k)
|
||||
for i := range q {
|
||||
q[i] = fermat(qbits[i*(n+1) : (i+1)*(n+1)])
|
||||
}
|
||||
fourier(q, v.values, true, n, k)
|
||||
|
||||
// Divide by K, and untwist q to recover p.
|
||||
u := make(fermat, n+1)
|
||||
a := make([]nat, 1<<k)
|
||||
for i := range q {
|
||||
u.Shift(q[i], -int(k)-i*θshift)
|
||||
copy(q[i], u)
|
||||
a[i] = nat(q[i])
|
||||
}
|
||||
return poly{k: k, m: 0, a: a}
|
||||
}
|
||||
|
||||
// fourier performs an unnormalized Fourier transform
|
||||
// of src, a length 1<<k vector of numbers modulo b^n+1
|
||||
// where b = 1<<_W.
|
||||
func fourier(dst []fermat, src []fermat, backward bool, n int, k uint) {
|
||||
var rec func(dst, src []fermat, size uint)
|
||||
tmp := make(fermat, n+1) // pre-allocate temporary variables.
|
||||
tmp2 := make(fermat, n+1) // pre-allocate temporary variables.
|
||||
|
||||
// The recursion function of the FFT.
|
||||
// The root of unity used in the transform is ω=1<<(ω2shift/2).
|
||||
// The source array may use shifted indices (i.e. the i-th
|
||||
// element is src[i << idxShift]).
|
||||
rec = func(dst, src []fermat, size uint) {
|
||||
idxShift := k - size
|
||||
ω2shift := (4 * n * _W) >> size
|
||||
if backward {
|
||||
ω2shift = -ω2shift
|
||||
}
|
||||
|
||||
// Easy cases.
|
||||
if len(src[0]) != n+1 || len(dst[0]) != n+1 {
|
||||
panic("len(src[0]) != n+1 || len(dst[0]) != n+1")
|
||||
}
|
||||
switch size {
|
||||
case 0:
|
||||
copy(dst[0], src[0])
|
||||
return
|
||||
case 1:
|
||||
dst[0].Add(src[0], src[1<<idxShift]) // dst[0] = src[0] + src[1]
|
||||
dst[1].Sub(src[0], src[1<<idxShift]) // dst[1] = src[0] - src[1]
|
||||
return
|
||||
}
|
||||
|
||||
// Let P(x) = src[0] + src[1<<idxShift] * x + ... + src[K-1 << idxShift] * x^(K-1)
|
||||
// The P(x) = Q1(x²) + x*Q2(x²)
|
||||
// where Q1's coefficients are src with indices shifted by 1
|
||||
// where Q2's coefficients are src[1<<idxShift:] with indices shifted by 1
|
||||
|
||||
// Split destination vectors in halves.
|
||||
dst1 := dst[:1<<(size-1)]
|
||||
dst2 := dst[1<<(size-1):]
|
||||
// Transform Q1 and Q2 in the halves.
|
||||
rec(dst1, src, size-1)
|
||||
rec(dst2, src[1<<idxShift:], size-1)
|
||||
|
||||
// Reconstruct P's transform from transforms of Q1 and Q2.
|
||||
// dst[i] is dst1[i] + ω^i * dst2[i]
|
||||
// dst[i + 1<<(k-1)] is dst1[i] + ω^(i+K/2) * dst2[i]
|
||||
//
|
||||
for i := range dst1 {
|
||||
tmp.ShiftHalf(dst2[i], i*ω2shift, tmp2) // ω^i * dst2[i]
|
||||
dst2[i].Sub(dst1[i], tmp)
|
||||
dst1[i].Add(dst1[i], tmp)
|
||||
}
|
||||
}
|
||||
rec(dst, src, k)
|
||||
}
|
||||
|
||||
// Mul returns the pointwise product of p and q.
|
||||
func (p *polValues) Mul(q *polValues) (r polValues) {
|
||||
n := p.n
|
||||
r.k, r.n = p.k, p.n
|
||||
r.values = make([]fermat, len(p.values))
|
||||
bits := make([]big.Word, len(p.values)*(n+1))
|
||||
buf := make(fermat, 8*n)
|
||||
for i := range r.values {
|
||||
r.values[i] = bits[i*(n+1) : (i+1)*(n+1)]
|
||||
z := buf.Mul(p.values[i], q.values[i])
|
||||
copy(r.values[i], z)
|
||||
}
|
||||
return
|
||||
}
|
||||
Reference in New Issue
Block a user