vendor: Update everything
GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4620
This commit is contained in:
10
vendor/github.com/remyoudompheng/bigfft/arith_386.s
generated
vendored
10
vendor/github.com/remyoudompheng/bigfft/arith_386.s
generated
vendored
@@ -264,13 +264,3 @@ E7: SUBL $1, BX // i--
|
||||
MOVL DX, r+32(FP)
|
||||
RET
|
||||
|
||||
// func bitLen(x Word) (n int)
|
||||
TEXT ·bitLen(SB),7,$0
|
||||
BSRL x+0(FP), AX
|
||||
JZ Z1
|
||||
INCL AX
|
||||
MOVL AX, n+4(FP)
|
||||
RET
|
||||
|
||||
Z1: MOVL $0, n+4(FP)
|
||||
RET
|
||||
|
||||
130
vendor/github.com/remyoudompheng/bigfft/arith_amd64.s
generated
vendored
130
vendor/github.com/remyoudompheng/bigfft/arith_amd64.s
generated
vendored
@@ -5,16 +5,6 @@
|
||||
// This file provides fast assembly versions for the elementary
|
||||
// arithmetic operations on vectors implemented in arith.go.
|
||||
|
||||
// Literal instruction for MOVQ $0, CX.
|
||||
// (MOVQ $0, reg is translated to XORQ reg, reg and clears CF.)
|
||||
#define ZERO_CX BYTE $0x48; \
|
||||
BYTE $0xc7; \
|
||||
BYTE $0xc1; \
|
||||
BYTE $0x00; \
|
||||
BYTE $0x00; \
|
||||
BYTE $0x00; \
|
||||
BYTE $0x00
|
||||
|
||||
// func mulWW(x, y Word) (z1, z0 Word)
|
||||
TEXT ·mulWW(SB),7,$0
|
||||
MOVQ x+0(FP), AX
|
||||
@@ -33,6 +23,11 @@ TEXT ·divWW(SB),7,$0
|
||||
MOVQ DX, r+32(FP)
|
||||
RET
|
||||
|
||||
// The carry bit is saved with SBBQ Rx, Rx: if the carry was set, Rx is -1, otherwise it is 0.
|
||||
// It is restored with ADDQ Rx, Rx: if Rx was -1 the carry is set, otherwise it is cleared.
|
||||
// This is faster than using rotate instructions.
|
||||
//
|
||||
// CAUTION: Note that MOVQ $0, Rx is translated to XORQ Rx, Rx which clears the carry bit!
|
||||
|
||||
// func addVV(z, x, y []Word) (c Word)
|
||||
TEXT ·addVV(SB),7,$0
|
||||
@@ -50,7 +45,7 @@ TEXT ·addVV(SB),7,$0
|
||||
|
||||
U1: // n >= 0
|
||||
// regular loop body unrolled 4x
|
||||
RCRQ $1, CX // CF = c
|
||||
ADDQ CX, CX // restore CF
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
MOVQ 8(R8)(SI*8), R12
|
||||
MOVQ 16(R8)(SI*8), R13
|
||||
@@ -63,7 +58,7 @@ U1: // n >= 0
|
||||
MOVQ R12, 8(R10)(SI*8)
|
||||
MOVQ R13, 16(R10)(SI*8)
|
||||
MOVQ R14, 24(R10)(SI*8)
|
||||
RCLQ $1, CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
|
||||
ADDQ $4, SI // i += 4
|
||||
SUBQ $4, DI // n -= 4
|
||||
@@ -73,17 +68,18 @@ V1: ADDQ $4, DI // n += 4
|
||||
JLE E1 // if n <= 0 goto E1
|
||||
|
||||
L1: // n > 0
|
||||
RCRQ $1, CX // CF = c
|
||||
ADDQ CX, CX // restore CF
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
ADCQ 0(R9)(SI*8), R11
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
RCLQ $1, CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
|
||||
ADDQ $1, SI // i++
|
||||
SUBQ $1, DI // n--
|
||||
JG L1 // if n > 0 goto L1
|
||||
|
||||
E1: MOVQ CX, c+72(FP) // return c
|
||||
E1: NEGQ CX
|
||||
MOVQ CX, c+72(FP) // return c
|
||||
RET
|
||||
|
||||
|
||||
@@ -104,7 +100,7 @@ TEXT ·subVV(SB),7,$0
|
||||
|
||||
U2: // n >= 0
|
||||
// regular loop body unrolled 4x
|
||||
RCRQ $1, CX // CF = c
|
||||
ADDQ CX, CX // restore CF
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
MOVQ 8(R8)(SI*8), R12
|
||||
MOVQ 16(R8)(SI*8), R13
|
||||
@@ -117,7 +113,7 @@ U2: // n >= 0
|
||||
MOVQ R12, 8(R10)(SI*8)
|
||||
MOVQ R13, 16(R10)(SI*8)
|
||||
MOVQ R14, 24(R10)(SI*8)
|
||||
RCLQ $1, CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
|
||||
ADDQ $4, SI // i += 4
|
||||
SUBQ $4, DI // n -= 4
|
||||
@@ -127,17 +123,18 @@ V2: ADDQ $4, DI // n += 4
|
||||
JLE E2 // if n <= 0 goto E2
|
||||
|
||||
L2: // n > 0
|
||||
RCRQ $1, CX // CF = c
|
||||
ADDQ CX, CX // restore CF
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
SBBQ 0(R9)(SI*8), R11
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
RCLQ $1, CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
|
||||
ADDQ $1, SI // i++
|
||||
SUBQ $1, DI // n--
|
||||
JG L2 // if n > 0 goto L2
|
||||
|
||||
E2: MOVQ CX, c+72(FP) // return c
|
||||
E2: NEGQ CX
|
||||
MOVQ CX, c+72(FP) // return c
|
||||
RET
|
||||
|
||||
|
||||
@@ -161,11 +158,11 @@ U3: // n >= 0
|
||||
MOVQ 16(R8)(SI*8), R13
|
||||
MOVQ 24(R8)(SI*8), R14
|
||||
ADDQ CX, R11
|
||||
ZERO_CX
|
||||
ADCQ $0, R12
|
||||
ADCQ $0, R13
|
||||
ADCQ $0, R14
|
||||
SETCS CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
NEGQ CX
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
MOVQ R12, 8(R10)(SI*8)
|
||||
MOVQ R13, 16(R10)(SI*8)
|
||||
@@ -181,8 +178,8 @@ V3: ADDQ $4, DI // n += 4
|
||||
L3: // n > 0
|
||||
ADDQ 0(R8)(SI*8), CX
|
||||
MOVQ CX, 0(R10)(SI*8)
|
||||
ZERO_CX
|
||||
RCLQ $1, CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
NEGQ CX
|
||||
|
||||
ADDQ $1, SI // i++
|
||||
SUBQ $1, DI // n--
|
||||
@@ -199,7 +196,7 @@ TEXT ·subVW(SB),7,$0
|
||||
MOVQ x+24(FP), R8
|
||||
MOVQ y+48(FP), CX // c = y
|
||||
MOVQ z+0(FP), R10
|
||||
|
||||
|
||||
MOVQ $0, SI // i = 0
|
||||
|
||||
// s/JL/JMP/ below to disable the unrolled loop
|
||||
@@ -213,11 +210,11 @@ U4: // n >= 0
|
||||
MOVQ 16(R8)(SI*8), R13
|
||||
MOVQ 24(R8)(SI*8), R14
|
||||
SUBQ CX, R11
|
||||
ZERO_CX
|
||||
SBBQ $0, R12
|
||||
SBBQ $0, R13
|
||||
SBBQ $0, R14
|
||||
SETCS CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
NEGQ CX
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
MOVQ R12, 8(R10)(SI*8)
|
||||
MOVQ R13, 16(R10)(SI*8)
|
||||
@@ -234,8 +231,8 @@ L4: // n > 0
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
SUBQ CX, R11
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
ZERO_CX
|
||||
RCLQ $1, CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
NEGQ CX
|
||||
|
||||
ADDQ $1, SI // i++
|
||||
SUBQ $1, DI // n--
|
||||
@@ -304,7 +301,7 @@ L9: MOVQ AX, DX // w = w1
|
||||
SHRQ CX, DX:AX // w>>s | w1<<ŝ
|
||||
MOVQ DX, (R10)(BX*8) // z[i] = w>>s | w1<<ŝ
|
||||
ADDQ $1, BX // i++
|
||||
|
||||
|
||||
E9: CMPQ BX, R11
|
||||
JL L9 // i < n-1
|
||||
|
||||
@@ -325,6 +322,41 @@ TEXT ·mulAddVWW(SB),7,$0
|
||||
MOVQ r+56(FP), CX // c = r
|
||||
MOVQ z_len+8(FP), R11
|
||||
MOVQ $0, BX // i = 0
|
||||
|
||||
CMPQ R11, $4
|
||||
JL E5
|
||||
|
||||
U5: // i+4 <= n
|
||||
// regular loop body unrolled 4x
|
||||
MOVQ (0*8)(R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
MOVQ AX, (0*8)(R10)(BX*8)
|
||||
MOVQ DX, CX
|
||||
MOVQ (1*8)(R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
MOVQ AX, (1*8)(R10)(BX*8)
|
||||
MOVQ DX, CX
|
||||
MOVQ (2*8)(R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
MOVQ AX, (2*8)(R10)(BX*8)
|
||||
MOVQ DX, CX
|
||||
MOVQ (3*8)(R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
MOVQ AX, (3*8)(R10)(BX*8)
|
||||
MOVQ DX, CX
|
||||
ADDQ $4, BX // i += 4
|
||||
|
||||
LEAQ 4(BX), DX
|
||||
CMPQ DX, R11
|
||||
JLE U5
|
||||
JMP E5
|
||||
|
||||
L5: MOVQ (R8)(BX*8), AX
|
||||
@@ -350,6 +382,34 @@ TEXT ·addMulVVW(SB),7,$0
|
||||
MOVQ z_len+8(FP), R11
|
||||
MOVQ $0, BX // i = 0
|
||||
MOVQ $0, CX // c = 0
|
||||
MOVQ R11, R12
|
||||
ANDQ $-2, R12
|
||||
CMPQ R11, $2
|
||||
JAE A6
|
||||
JMP E6
|
||||
|
||||
A6:
|
||||
MOVQ (R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ (R10)(BX*8), AX
|
||||
ADCQ $0, DX
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, CX
|
||||
MOVQ AX, (R10)(BX*8)
|
||||
|
||||
MOVQ (8)(R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ (8)(R10)(BX*8), AX
|
||||
ADCQ $0, DX
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, CX
|
||||
MOVQ AX, (8)(R10)(BX*8)
|
||||
|
||||
ADDQ $2, BX
|
||||
CMPQ BX, R12
|
||||
JL A6
|
||||
JMP E6
|
||||
|
||||
L6: MOVQ (R8)(BX*8), AX
|
||||
@@ -387,13 +447,3 @@ E7: SUBQ $1, BX // i--
|
||||
MOVQ DX, r+64(FP)
|
||||
RET
|
||||
|
||||
// func bitLen(x Word) (n int)
|
||||
TEXT ·bitLen(SB),7,$0
|
||||
BSRQ x+0(FP), AX
|
||||
JZ Z1
|
||||
ADDQ $1, AX
|
||||
MOVQ AX, n+8(FP)
|
||||
RET
|
||||
|
||||
Z1: MOVQ $0, n+8(FP)
|
||||
RET
|
||||
|
||||
141
vendor/github.com/remyoudompheng/bigfft/arith_arm.s
generated
vendored
141
vendor/github.com/remyoudompheng/bigfft/arith_arm.s
generated
vendored
@@ -2,76 +2,68 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// This file provides fast assembly versions for the elementary
|
||||
// arithmetic operations on vectors implemented in arith.go.
|
||||
|
||||
#define CFLAG 29 // bit position of carry flag
|
||||
|
||||
// func addVV(z, x, y []Word) (c Word)
|
||||
TEXT ·addVV(SB),7,$0
|
||||
MOVW $0, R0
|
||||
TEXT ·addVV(SB),NOSPLIT,$0
|
||||
ADD.S $0, R0 // clear carry flag
|
||||
MOVW z+0(FP), R1
|
||||
MOVW z_len+4(FP), R4
|
||||
MOVW x+12(FP), R2
|
||||
MOVW y+24(FP), R3
|
||||
MOVW n+4(FP), R4
|
||||
MOVW R4<<2, R4
|
||||
ADD R1, R4
|
||||
ADD R4<<2, R1, R4
|
||||
B E1
|
||||
L1:
|
||||
MOVW.P 4(R2), R5
|
||||
MOVW.P 4(R3), R6
|
||||
MOVW R0, CPSR
|
||||
ADC.S R6, R5
|
||||
MOVW.P R5, 4(R1)
|
||||
MOVW CPSR, R0
|
||||
E1:
|
||||
CMP R1, R4
|
||||
TEQ R1, R4
|
||||
BNE L1
|
||||
|
||||
MOVW R0>>CFLAG, R0
|
||||
AND $1, R0
|
||||
MOVW $0, R0
|
||||
MOVW.CS $1, R0
|
||||
MOVW R0, c+36(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func subVV(z, x, y []Word) (c Word)
|
||||
// (same as addVV except for SBC instead of ADC and label names)
|
||||
TEXT ·subVV(SB),7,$0
|
||||
MOVW $(1<<CFLAG), R0
|
||||
TEXT ·subVV(SB),NOSPLIT,$0
|
||||
SUB.S $0, R0 // clear borrow flag
|
||||
MOVW z+0(FP), R1
|
||||
MOVW z_len+4(FP), R4
|
||||
MOVW x+12(FP), R2
|
||||
MOVW y+24(FP), R3
|
||||
MOVW n+4(FP), R4
|
||||
MOVW R4<<2, R4
|
||||
ADD R1, R4
|
||||
ADD R4<<2, R1, R4
|
||||
B E2
|
||||
L2:
|
||||
MOVW.P 4(R2), R5
|
||||
MOVW.P 4(R3), R6
|
||||
MOVW R0, CPSR
|
||||
SBC.S R6, R5
|
||||
MOVW.P R5, 4(R1)
|
||||
MOVW CPSR, R0
|
||||
E2:
|
||||
CMP R1, R4
|
||||
TEQ R1, R4
|
||||
BNE L2
|
||||
|
||||
MOVW R0>>CFLAG, R0
|
||||
AND $1, R0
|
||||
EOR $1, R0
|
||||
MOVW $0, R0
|
||||
MOVW.CC $1, R0
|
||||
MOVW R0, c+36(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func addVW(z, x []Word, y Word) (c Word)
|
||||
TEXT ·addVW(SB),7,$0
|
||||
TEXT ·addVW(SB),NOSPLIT,$0
|
||||
MOVW z+0(FP), R1
|
||||
MOVW z_len+4(FP), R4
|
||||
MOVW x+12(FP), R2
|
||||
MOVW y+24(FP), R3
|
||||
MOVW n+4(FP), R4
|
||||
MOVW R4<<2, R4
|
||||
ADD R1, R4
|
||||
CMP R1, R4
|
||||
ADD R4<<2, R1, R4
|
||||
TEQ R1, R4
|
||||
BNE L3a
|
||||
MOVW R3, c+28(FP)
|
||||
RET
|
||||
@@ -79,33 +71,29 @@ L3a:
|
||||
MOVW.P 4(R2), R5
|
||||
ADD.S R3, R5
|
||||
MOVW.P R5, 4(R1)
|
||||
MOVW CPSR, R0
|
||||
B E3
|
||||
L3:
|
||||
MOVW.P 4(R2), R5
|
||||
MOVW R0, CPSR
|
||||
ADC.S $0, R5
|
||||
MOVW.P R5, 4(R1)
|
||||
MOVW CPSR, R0
|
||||
E3:
|
||||
CMP R1, R4
|
||||
TEQ R1, R4
|
||||
BNE L3
|
||||
|
||||
MOVW R0>>CFLAG, R0
|
||||
AND $1, R0
|
||||
MOVW $0, R0
|
||||
MOVW.CS $1, R0
|
||||
MOVW R0, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func subVW(z, x []Word, y Word) (c Word)
|
||||
TEXT ·subVW(SB),7,$0
|
||||
TEXT ·subVW(SB),NOSPLIT,$0
|
||||
MOVW z+0(FP), R1
|
||||
MOVW z_len+4(FP), R4
|
||||
MOVW x+12(FP), R2
|
||||
MOVW y+24(FP), R3
|
||||
MOVW n+4(FP), R4
|
||||
MOVW R4<<2, R4
|
||||
ADD R1, R4
|
||||
CMP R1, R4
|
||||
ADD R4<<2, R1, R4
|
||||
TEQ R1, R4
|
||||
BNE L4a
|
||||
MOVW R3, c+28(FP)
|
||||
RET
|
||||
@@ -113,38 +101,33 @@ L4a:
|
||||
MOVW.P 4(R2), R5
|
||||
SUB.S R3, R5
|
||||
MOVW.P R5, 4(R1)
|
||||
MOVW CPSR, R0
|
||||
B E4
|
||||
L4:
|
||||
MOVW.P 4(R2), R5
|
||||
MOVW R0, CPSR
|
||||
SBC.S $0, R5
|
||||
MOVW.P R5, 4(R1)
|
||||
MOVW CPSR, R0
|
||||
E4:
|
||||
CMP R1, R4
|
||||
TEQ R1, R4
|
||||
BNE L4
|
||||
|
||||
MOVW R0>>CFLAG, R0
|
||||
AND $1, R0
|
||||
EOR $1, R0
|
||||
MOVW $0, R0
|
||||
MOVW.CC $1, R0
|
||||
MOVW R0, c+28(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func shlVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shlVU(SB),7,$0
|
||||
MOVW n+4(FP), R5
|
||||
CMP $0, R5
|
||||
TEXT ·shlVU(SB),NOSPLIT,$0
|
||||
MOVW z_len+4(FP), R5
|
||||
TEQ $0, R5
|
||||
BEQ X7
|
||||
|
||||
MOVW z+0(FP), R1
|
||||
MOVW x+12(FP), R2
|
||||
MOVW R5<<2, R5
|
||||
ADD R5, R2
|
||||
ADD R1, R5
|
||||
ADD R5<<2, R2, R2
|
||||
ADD R5<<2, R1, R5
|
||||
MOVW s+24(FP), R3
|
||||
CMP $0, R3 // shift 0 is special
|
||||
TEQ $0, R3 // shift 0 is special
|
||||
BEQ Y7
|
||||
ADD $4, R1 // stop one word early
|
||||
MOVW $32, R4
|
||||
@@ -163,7 +146,7 @@ L7:
|
||||
MOVW.W R7, -4(R5)
|
||||
MOVW R6<<R3, R7
|
||||
E7:
|
||||
CMP R1, R5
|
||||
TEQ R1, R5
|
||||
BNE L7
|
||||
|
||||
MOVW R7, -4(R5)
|
||||
@@ -172,7 +155,7 @@ E7:
|
||||
Y7: // copy loop, because shift 0 == shift 32
|
||||
MOVW.W -4(R2), R6
|
||||
MOVW.W R6, -4(R5)
|
||||
CMP R1, R5
|
||||
TEQ R1, R5
|
||||
BNE Y7
|
||||
|
||||
X7:
|
||||
@@ -182,17 +165,16 @@ X7:
|
||||
|
||||
|
||||
// func shrVU(z, x []Word, s uint) (c Word)
|
||||
TEXT ·shrVU(SB),7,$0
|
||||
MOVW n+4(FP), R5
|
||||
CMP $0, R5
|
||||
TEXT ·shrVU(SB),NOSPLIT,$0
|
||||
MOVW z_len+4(FP), R5
|
||||
TEQ $0, R5
|
||||
BEQ X6
|
||||
|
||||
MOVW z+0(FP), R1
|
||||
MOVW x+12(FP), R2
|
||||
MOVW R5<<2, R5
|
||||
ADD R1, R5
|
||||
ADD R5<<2, R1, R5
|
||||
MOVW s+24(FP), R3
|
||||
CMP $0, R3 // shift 0 is special
|
||||
TEQ $0, R3 // shift 0 is special
|
||||
BEQ Y6
|
||||
SUB $4, R5 // stop one word early
|
||||
MOVW $32, R4
|
||||
@@ -213,7 +195,7 @@ L6:
|
||||
MOVW.P R7, 4(R1)
|
||||
MOVW R6>>R3, R7
|
||||
E6:
|
||||
CMP R1, R5
|
||||
TEQ R1, R5
|
||||
BNE L6
|
||||
|
||||
MOVW R7, 0(R1)
|
||||
@@ -222,7 +204,7 @@ E6:
|
||||
Y6: // copy loop, because shift 0 == shift 32
|
||||
MOVW.P 4(R2), R6
|
||||
MOVW.P R6, 4(R1)
|
||||
CMP R1, R5
|
||||
TEQ R1, R5
|
||||
BNE Y6
|
||||
|
||||
X6:
|
||||
@@ -232,15 +214,14 @@ X6:
|
||||
|
||||
|
||||
// func mulAddVWW(z, x []Word, y, r Word) (c Word)
|
||||
TEXT ·mulAddVWW(SB),7,$0
|
||||
TEXT ·mulAddVWW(SB),NOSPLIT,$0
|
||||
MOVW $0, R0
|
||||
MOVW z+0(FP), R1
|
||||
MOVW z_len+4(FP), R5
|
||||
MOVW x+12(FP), R2
|
||||
MOVW y+24(FP), R3
|
||||
MOVW r+28(FP), R4
|
||||
MOVW n+4(FP), R5
|
||||
MOVW R5<<2, R5
|
||||
ADD R1, R5
|
||||
ADD R5<<2, R1, R5
|
||||
B E8
|
||||
|
||||
// word loop
|
||||
@@ -252,7 +233,7 @@ L8:
|
||||
MOVW.P R6, 4(R1)
|
||||
MOVW R7, R4
|
||||
E8:
|
||||
CMP R1, R5
|
||||
TEQ R1, R5
|
||||
BNE L8
|
||||
|
||||
MOVW R4, c+32(FP)
|
||||
@@ -260,14 +241,13 @@ E8:
|
||||
|
||||
|
||||
// func addMulVVW(z, x []Word, y Word) (c Word)
|
||||
TEXT ·addMulVVW(SB),7,$0
|
||||
TEXT ·addMulVVW(SB),NOSPLIT,$0
|
||||
MOVW $0, R0
|
||||
MOVW z+0(FP), R1
|
||||
MOVW z_len+4(FP), R5
|
||||
MOVW x+12(FP), R2
|
||||
MOVW y+24(FP), R3
|
||||
MOVW n+4(FP), R5
|
||||
MOVW R5<<2, R5
|
||||
ADD R1, R5
|
||||
ADD R5<<2, R1, R5
|
||||
MOVW $0, R4
|
||||
B E9
|
||||
|
||||
@@ -283,7 +263,7 @@ L9:
|
||||
MOVW.P R6, 4(R1)
|
||||
MOVW R7, R4
|
||||
E9:
|
||||
CMP R1, R5
|
||||
TEQ R1, R5
|
||||
BNE L9
|
||||
|
||||
MOVW R4, c+28(FP)
|
||||
@@ -291,31 +271,22 @@ E9:
|
||||
|
||||
|
||||
// func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
|
||||
TEXT ·divWVW(SB),7,$0
|
||||
TEXT ·divWVW(SB),NOSPLIT,$0
|
||||
// ARM has no multiword division, so use portable code.
|
||||
B ·divWVW_g(SB)
|
||||
|
||||
|
||||
// func divWW(x1, x0, y Word) (q, r Word)
|
||||
TEXT ·divWW(SB),7,$0
|
||||
TEXT ·divWW(SB),NOSPLIT,$0
|
||||
// ARM has no multiword division, so use portable code.
|
||||
B ·divWW_g(SB)
|
||||
|
||||
|
||||
// func mulWW(x, y Word) (z1, z0 Word)
|
||||
TEXT ·mulWW(SB),7,$0
|
||||
TEXT ·mulWW(SB),NOSPLIT,$0
|
||||
MOVW x+0(FP), R1
|
||||
MOVW y+4(FP), R2
|
||||
MULLU R1, R2, (R4, R3)
|
||||
MOVW R4, z1+8(FP)
|
||||
MOVW R3, z0+12(FP)
|
||||
RET
|
||||
|
||||
// func bitLen(x Word) (n int)
|
||||
TEXT ·bitLen(SB),7,$0
|
||||
MOVW x+0(FP), R0
|
||||
CLZ R0, R0
|
||||
MOVW $32, R1
|
||||
SUB.S R0, R1
|
||||
MOVW R1, n+4(FP)
|
||||
RET
|
||||
|
||||
1
vendor/github.com/remyoudompheng/bigfft/arith_decl.go
generated
vendored
1
vendor/github.com/remyoudompheng/bigfft/arith_decl.go
generated
vendored
@@ -18,4 +18,3 @@ func shrVU(z, x []Word, s uint) (c Word)
|
||||
func mulAddVWW(z, x []Word, y, r Word) (c Word)
|
||||
func addMulVVW(z, x []Word, y Word) (c Word)
|
||||
func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
|
||||
func bitLen(x Word) (n int)
|
||||
|
||||
40
vendor/github.com/remyoudompheng/bigfft/fermat.go
generated
vendored
40
vendor/github.com/remyoudompheng/bigfft/fermat.go
generated
vendored
@@ -42,7 +42,6 @@ func (z fermat) norm() {
|
||||
// Shift computes (x << k) mod (2^n+1).
|
||||
func (z fermat) Shift(x fermat, k int) {
|
||||
if len(z) != len(x) {
|
||||
println(len(z), len(x))
|
||||
panic("len(z) != len(x) in Shift")
|
||||
}
|
||||
n := len(x) - 1
|
||||
@@ -84,7 +83,9 @@ func (z fermat) Shift(x fermat, k int) {
|
||||
z[n] -= b
|
||||
}
|
||||
// Add back 1.
|
||||
if z[0] < ^big.Word(0) {
|
||||
if z[n] > 0 {
|
||||
z[n]--
|
||||
} else if z[0] < ^big.Word(0) {
|
||||
z[0]++
|
||||
} else {
|
||||
addVW(z, z, 1)
|
||||
@@ -141,6 +142,9 @@ func (z fermat) Sub(x, y fermat) fermat {
|
||||
}
|
||||
|
||||
func (z fermat) Mul(x, y fermat) fermat {
|
||||
if len(x) != len(y) {
|
||||
panic("Mul: len(x) != len(y)")
|
||||
}
|
||||
n := len(x) - 1
|
||||
if n < 30 {
|
||||
z = z[:2*n+2]
|
||||
@@ -166,16 +170,30 @@ func (z fermat) Mul(x, y fermat) fermat {
|
||||
if len(z) > 2*n+1 {
|
||||
panic("len(z) > 2n+1")
|
||||
}
|
||||
i := len(z) - (n + 1) // i <= n
|
||||
c := subVV(z[1:i+1], z[1:i+1], z[n+1:])
|
||||
z = z[:n+1]
|
||||
z[n]++ // Add -1.
|
||||
subVW(z[i+1:], z[i+1:], c)
|
||||
// Add 1.
|
||||
if z[n] == 1 {
|
||||
z[n] = 0
|
||||
// We now have
|
||||
// z = z[:n] + 1<<(n*W) * z[n:2n+1]
|
||||
// which normalizes to:
|
||||
// z = z[:n] - z[n:2n] + z[2n]
|
||||
c1 := big.Word(0)
|
||||
if len(z) > 2*n {
|
||||
c1 = addVW(z[:n], z[:n], z[2*n])
|
||||
}
|
||||
c2 := big.Word(0)
|
||||
if len(z) >= 2*n {
|
||||
c2 = subVV(z[:n], z[:n], z[n:2*n])
|
||||
} else {
|
||||
addVW(z, z, 1)
|
||||
m := len(z) - n
|
||||
c2 = subVV(z[:m], z[:m], z[n:])
|
||||
c2 = subVW(z[m:n], z[m:n], c2)
|
||||
}
|
||||
// Restore carries.
|
||||
// Substracting z[n] -= c2 is the same
|
||||
// as z[0] += c2
|
||||
z = z[:n+1]
|
||||
z[n] = c1
|
||||
c := addVW(z, z, c2)
|
||||
if c != 0 {
|
||||
panic("impossible")
|
||||
}
|
||||
z.norm()
|
||||
return z
|
||||
|
||||
22
vendor/github.com/remyoudompheng/bigfft/fft.go
generated
vendored
22
vendor/github.com/remyoudompheng/bigfft/fft.go
generated
vendored
@@ -87,21 +87,25 @@ func fftSize(x, y nat) (k uint, m int) {
|
||||
return
|
||||
}
|
||||
|
||||
// valueSize returns the smallest multiple of 1<<k greater than
|
||||
// 2*m*_W + k, that is also a multiple of _W. If extra > 0, the
|
||||
// returned value is only required to be a multiple of 1<<(k-extra)
|
||||
// valueSize returns the length (in words) to use for polynomial
|
||||
// coefficients, to compute a correct product of polynomials P*Q
|
||||
// where deg(P*Q) < K (== 1<<k) and where coefficients of P and Q are
|
||||
// less than b^m (== 1 << (m*_W)).
|
||||
// The chosen length (in bits) must be a multiple of 1 << (k-extra).
|
||||
func valueSize(k uint, m int, extra uint) int {
|
||||
n := 2*m*_W + int(k)
|
||||
// The coefficients of P*Q are less than b^(2m)*K
|
||||
// so we need W * valueSize >= 2*m*W+K
|
||||
n := 2*m*_W + int(k) // necessary bits
|
||||
K := 1 << (k - extra)
|
||||
if K < _W {
|
||||
K = _W
|
||||
}
|
||||
n = ((n / K) + 1) * K
|
||||
n = ((n / K) + 1) * K // round to a multiple of K
|
||||
return n / _W
|
||||
}
|
||||
|
||||
// poly represents an integer via a polynomial in Z[x]/(x^K+1)
|
||||
// where K is the FFT length and b is the computation basis 1<<(m*_W).
|
||||
// where K is the FFT length and b^m is the computation basis 1<<(m*_W).
|
||||
// If P = a[0] + a[1] x + ... a[n] x^(K-1), the associated natural number
|
||||
// is P(b^m).
|
||||
type poly struct {
|
||||
@@ -175,11 +179,11 @@ func (p *poly) Mul(q *poly) poly {
|
||||
return r
|
||||
}
|
||||
|
||||
// A polValues represents the value of a poly at the odd powers of a
|
||||
// (2K)-th root of unity θ=2^l in Z/(b^n+1)Z, where b^n = 2^Kl.
|
||||
// A polValues represents the value of a poly at the powers of a
|
||||
// K-th root of unity θ=2^(l/2) in Z/(b^n+1)Z, where b^n = 2^(K/4*l).
|
||||
type polValues struct {
|
||||
k uint // k is such that K = 1<<k.
|
||||
n int // the length of coefficients, n*_W a multiple of 1<<k.
|
||||
n int // the length of coefficients, n*_W a multiple of K/4.
|
||||
values []fermat // a slice of K (n+1)-word values
|
||||
}
|
||||
|
||||
|
||||
70
vendor/github.com/remyoudompheng/bigfft/scan.go
generated
vendored
Normal file
70
vendor/github.com/remyoudompheng/bigfft/scan.go
generated
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
package bigfft
|
||||
|
||||
import (
|
||||
"math/big"
|
||||
)
|
||||
|
||||
// FromDecimalString converts the base 10 string
|
||||
// representation of a natural (non-negative) number
|
||||
// into a *big.Int.
|
||||
// Its asymptotic complexity is less than quadratic.
|
||||
func FromDecimalString(s string) *big.Int {
|
||||
var sc scanner
|
||||
z := new(big.Int)
|
||||
sc.scan(z, s)
|
||||
return z
|
||||
}
|
||||
|
||||
type scanner struct {
|
||||
// powers[i] is 10^(2^i * quadraticScanThreshold).
|
||||
powers []*big.Int
|
||||
}
|
||||
|
||||
func (s *scanner) chunkSize(size int) (int, *big.Int) {
|
||||
if size <= quadraticScanThreshold {
|
||||
panic("size < quadraticScanThreshold")
|
||||
}
|
||||
pow := uint(0)
|
||||
for n := size; n > quadraticScanThreshold; n /= 2 {
|
||||
pow++
|
||||
}
|
||||
// threshold * 2^(pow-1) <= size < threshold * 2^pow
|
||||
return quadraticScanThreshold << (pow - 1), s.power(pow - 1)
|
||||
}
|
||||
|
||||
func (s *scanner) power(k uint) *big.Int {
|
||||
for i := len(s.powers); i <= int(k); i++ {
|
||||
z := new(big.Int)
|
||||
if i == 0 {
|
||||
if quadraticScanThreshold%14 != 0 {
|
||||
panic("quadraticScanThreshold % 14 != 0")
|
||||
}
|
||||
z.Exp(big.NewInt(1e14), big.NewInt(quadraticScanThreshold/14), nil)
|
||||
} else {
|
||||
z.Mul(s.powers[i-1], s.powers[i-1])
|
||||
}
|
||||
s.powers = append(s.powers, z)
|
||||
}
|
||||
return s.powers[k]
|
||||
}
|
||||
|
||||
func (s *scanner) scan(z *big.Int, str string) {
|
||||
if len(str) <= quadraticScanThreshold {
|
||||
z.SetString(str, 10)
|
||||
return
|
||||
}
|
||||
sz, pow := s.chunkSize(len(str))
|
||||
// Scan the left half.
|
||||
s.scan(z, str[:len(str)-sz])
|
||||
// FIXME: reuse temporaries.
|
||||
left := Mul(z, pow)
|
||||
// Scan the right half
|
||||
s.scan(z, str[len(str)-sz:])
|
||||
z.Add(z, left)
|
||||
}
|
||||
|
||||
// quadraticScanThreshold is the number of digits
|
||||
// below which big.Int.SetString is more efficient
|
||||
// than subquadratic algorithms.
|
||||
// 1232 digits fit in 4096 bits.
|
||||
const quadraticScanThreshold = 1232
|
||||
Reference in New Issue
Block a user