vendor: Update everything
GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4620
This commit is contained in:
130
vendor/github.com/remyoudompheng/bigfft/arith_amd64.s
generated
vendored
130
vendor/github.com/remyoudompheng/bigfft/arith_amd64.s
generated
vendored
@@ -5,16 +5,6 @@
|
||||
// This file provides fast assembly versions for the elementary
|
||||
// arithmetic operations on vectors implemented in arith.go.
|
||||
|
||||
// Literal instruction for MOVQ $0, CX.
|
||||
// (MOVQ $0, reg is translated to XORQ reg, reg and clears CF.)
|
||||
#define ZERO_CX BYTE $0x48; \
|
||||
BYTE $0xc7; \
|
||||
BYTE $0xc1; \
|
||||
BYTE $0x00; \
|
||||
BYTE $0x00; \
|
||||
BYTE $0x00; \
|
||||
BYTE $0x00
|
||||
|
||||
// func mulWW(x, y Word) (z1, z0 Word)
|
||||
TEXT ·mulWW(SB),7,$0
|
||||
MOVQ x+0(FP), AX
|
||||
@@ -33,6 +23,11 @@ TEXT ·divWW(SB),7,$0
|
||||
MOVQ DX, r+32(FP)
|
||||
RET
|
||||
|
||||
// The carry bit is saved with SBBQ Rx, Rx: if the carry was set, Rx is -1, otherwise it is 0.
|
||||
// It is restored with ADDQ Rx, Rx: if Rx was -1 the carry is set, otherwise it is cleared.
|
||||
// This is faster than using rotate instructions.
|
||||
//
|
||||
// CAUTION: Note that MOVQ $0, Rx is translated to XORQ Rx, Rx which clears the carry bit!
|
||||
|
||||
// func addVV(z, x, y []Word) (c Word)
|
||||
TEXT ·addVV(SB),7,$0
|
||||
@@ -50,7 +45,7 @@ TEXT ·addVV(SB),7,$0
|
||||
|
||||
U1: // n >= 0
|
||||
// regular loop body unrolled 4x
|
||||
RCRQ $1, CX // CF = c
|
||||
ADDQ CX, CX // restore CF
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
MOVQ 8(R8)(SI*8), R12
|
||||
MOVQ 16(R8)(SI*8), R13
|
||||
@@ -63,7 +58,7 @@ U1: // n >= 0
|
||||
MOVQ R12, 8(R10)(SI*8)
|
||||
MOVQ R13, 16(R10)(SI*8)
|
||||
MOVQ R14, 24(R10)(SI*8)
|
||||
RCLQ $1, CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
|
||||
ADDQ $4, SI // i += 4
|
||||
SUBQ $4, DI // n -= 4
|
||||
@@ -73,17 +68,18 @@ V1: ADDQ $4, DI // n += 4
|
||||
JLE E1 // if n <= 0 goto E1
|
||||
|
||||
L1: // n > 0
|
||||
RCRQ $1, CX // CF = c
|
||||
ADDQ CX, CX // restore CF
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
ADCQ 0(R9)(SI*8), R11
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
RCLQ $1, CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
|
||||
ADDQ $1, SI // i++
|
||||
SUBQ $1, DI // n--
|
||||
JG L1 // if n > 0 goto L1
|
||||
|
||||
E1: MOVQ CX, c+72(FP) // return c
|
||||
E1: NEGQ CX
|
||||
MOVQ CX, c+72(FP) // return c
|
||||
RET
|
||||
|
||||
|
||||
@@ -104,7 +100,7 @@ TEXT ·subVV(SB),7,$0
|
||||
|
||||
U2: // n >= 0
|
||||
// regular loop body unrolled 4x
|
||||
RCRQ $1, CX // CF = c
|
||||
ADDQ CX, CX // restore CF
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
MOVQ 8(R8)(SI*8), R12
|
||||
MOVQ 16(R8)(SI*8), R13
|
||||
@@ -117,7 +113,7 @@ U2: // n >= 0
|
||||
MOVQ R12, 8(R10)(SI*8)
|
||||
MOVQ R13, 16(R10)(SI*8)
|
||||
MOVQ R14, 24(R10)(SI*8)
|
||||
RCLQ $1, CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
|
||||
ADDQ $4, SI // i += 4
|
||||
SUBQ $4, DI // n -= 4
|
||||
@@ -127,17 +123,18 @@ V2: ADDQ $4, DI // n += 4
|
||||
JLE E2 // if n <= 0 goto E2
|
||||
|
||||
L2: // n > 0
|
||||
RCRQ $1, CX // CF = c
|
||||
ADDQ CX, CX // restore CF
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
SBBQ 0(R9)(SI*8), R11
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
RCLQ $1, CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
|
||||
ADDQ $1, SI // i++
|
||||
SUBQ $1, DI // n--
|
||||
JG L2 // if n > 0 goto L2
|
||||
|
||||
E2: MOVQ CX, c+72(FP) // return c
|
||||
E2: NEGQ CX
|
||||
MOVQ CX, c+72(FP) // return c
|
||||
RET
|
||||
|
||||
|
||||
@@ -161,11 +158,11 @@ U3: // n >= 0
|
||||
MOVQ 16(R8)(SI*8), R13
|
||||
MOVQ 24(R8)(SI*8), R14
|
||||
ADDQ CX, R11
|
||||
ZERO_CX
|
||||
ADCQ $0, R12
|
||||
ADCQ $0, R13
|
||||
ADCQ $0, R14
|
||||
SETCS CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
NEGQ CX
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
MOVQ R12, 8(R10)(SI*8)
|
||||
MOVQ R13, 16(R10)(SI*8)
|
||||
@@ -181,8 +178,8 @@ V3: ADDQ $4, DI // n += 4
|
||||
L3: // n > 0
|
||||
ADDQ 0(R8)(SI*8), CX
|
||||
MOVQ CX, 0(R10)(SI*8)
|
||||
ZERO_CX
|
||||
RCLQ $1, CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
NEGQ CX
|
||||
|
||||
ADDQ $1, SI // i++
|
||||
SUBQ $1, DI // n--
|
||||
@@ -199,7 +196,7 @@ TEXT ·subVW(SB),7,$0
|
||||
MOVQ x+24(FP), R8
|
||||
MOVQ y+48(FP), CX // c = y
|
||||
MOVQ z+0(FP), R10
|
||||
|
||||
|
||||
MOVQ $0, SI // i = 0
|
||||
|
||||
// s/JL/JMP/ below to disable the unrolled loop
|
||||
@@ -213,11 +210,11 @@ U4: // n >= 0
|
||||
MOVQ 16(R8)(SI*8), R13
|
||||
MOVQ 24(R8)(SI*8), R14
|
||||
SUBQ CX, R11
|
||||
ZERO_CX
|
||||
SBBQ $0, R12
|
||||
SBBQ $0, R13
|
||||
SBBQ $0, R14
|
||||
SETCS CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
NEGQ CX
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
MOVQ R12, 8(R10)(SI*8)
|
||||
MOVQ R13, 16(R10)(SI*8)
|
||||
@@ -234,8 +231,8 @@ L4: // n > 0
|
||||
MOVQ 0(R8)(SI*8), R11
|
||||
SUBQ CX, R11
|
||||
MOVQ R11, 0(R10)(SI*8)
|
||||
ZERO_CX
|
||||
RCLQ $1, CX // c = CF
|
||||
SBBQ CX, CX // save CF
|
||||
NEGQ CX
|
||||
|
||||
ADDQ $1, SI // i++
|
||||
SUBQ $1, DI // n--
|
||||
@@ -304,7 +301,7 @@ L9: MOVQ AX, DX // w = w1
|
||||
SHRQ CX, DX:AX // w>>s | w1<<ŝ
|
||||
MOVQ DX, (R10)(BX*8) // z[i] = w>>s | w1<<ŝ
|
||||
ADDQ $1, BX // i++
|
||||
|
||||
|
||||
E9: CMPQ BX, R11
|
||||
JL L9 // i < n-1
|
||||
|
||||
@@ -325,6 +322,41 @@ TEXT ·mulAddVWW(SB),7,$0
|
||||
MOVQ r+56(FP), CX // c = r
|
||||
MOVQ z_len+8(FP), R11
|
||||
MOVQ $0, BX // i = 0
|
||||
|
||||
CMPQ R11, $4
|
||||
JL E5
|
||||
|
||||
U5: // i+4 <= n
|
||||
// regular loop body unrolled 4x
|
||||
MOVQ (0*8)(R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
MOVQ AX, (0*8)(R10)(BX*8)
|
||||
MOVQ DX, CX
|
||||
MOVQ (1*8)(R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
MOVQ AX, (1*8)(R10)(BX*8)
|
||||
MOVQ DX, CX
|
||||
MOVQ (2*8)(R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
MOVQ AX, (2*8)(R10)(BX*8)
|
||||
MOVQ DX, CX
|
||||
MOVQ (3*8)(R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
MOVQ AX, (3*8)(R10)(BX*8)
|
||||
MOVQ DX, CX
|
||||
ADDQ $4, BX // i += 4
|
||||
|
||||
LEAQ 4(BX), DX
|
||||
CMPQ DX, R11
|
||||
JLE U5
|
||||
JMP E5
|
||||
|
||||
L5: MOVQ (R8)(BX*8), AX
|
||||
@@ -350,6 +382,34 @@ TEXT ·addMulVVW(SB),7,$0
|
||||
MOVQ z_len+8(FP), R11
|
||||
MOVQ $0, BX // i = 0
|
||||
MOVQ $0, CX // c = 0
|
||||
MOVQ R11, R12
|
||||
ANDQ $-2, R12
|
||||
CMPQ R11, $2
|
||||
JAE A6
|
||||
JMP E6
|
||||
|
||||
A6:
|
||||
MOVQ (R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ (R10)(BX*8), AX
|
||||
ADCQ $0, DX
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, CX
|
||||
MOVQ AX, (R10)(BX*8)
|
||||
|
||||
MOVQ (8)(R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ (8)(R10)(BX*8), AX
|
||||
ADCQ $0, DX
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, CX
|
||||
MOVQ AX, (8)(R10)(BX*8)
|
||||
|
||||
ADDQ $2, BX
|
||||
CMPQ BX, R12
|
||||
JL A6
|
||||
JMP E6
|
||||
|
||||
L6: MOVQ (R8)(BX*8), AX
|
||||
@@ -387,13 +447,3 @@ E7: SUBQ $1, BX // i--
|
||||
MOVQ DX, r+64(FP)
|
||||
RET
|
||||
|
||||
// func bitLen(x Word) (n int)
|
||||
TEXT ·bitLen(SB),7,$0
|
||||
BSRQ x+0(FP), AX
|
||||
JZ Z1
|
||||
ADDQ $1, AX
|
||||
MOVQ AX, n+8(FP)
|
||||
RET
|
||||
|
||||
Z1: MOVQ $0, n+8(FP)
|
||||
RET
|
||||
|
||||
Reference in New Issue
Block a user