From ec64358ac999cb737277b92ee018f7bb782c724f Mon Sep 17 00:00:00 2001 From: uji <49834542+uji@users.noreply.github.com> Date: Wed, 9 Mar 2022 08:23:13 +0900 Subject: [PATCH] crypto/bn256/cloudflare: fix asm for dynamic linking (#24476) When using -buildmode=shared, R15 is clobbered by a global variable access; use a different register instead. Fixes: #24439 --- crypto/bn256/cloudflare/gfp_amd64.s | 14 +++++++------- crypto/bn256/cloudflare/mul_amd64.h | 6 +++--- crypto/bn256/cloudflare/mul_bmi2_amd64.h | 12 ++++++------ 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/crypto/bn256/cloudflare/gfp_amd64.s b/crypto/bn256/cloudflare/gfp_amd64.s index bdb4ffb78..64c97eaed 100644 --- a/crypto/bn256/cloudflare/gfp_amd64.s +++ b/crypto/bn256/cloudflare/gfp_amd64.s @@ -49,7 +49,7 @@ TEXT ·gfpNeg(SB),0,$0-16 SBBQ 24(DI), R11 MOVQ $0, AX - gfpCarry(R8,R9,R10,R11,AX, R12,R13,R14,R15,BX) + gfpCarry(R8,R9,R10,R11,AX, R12,R13,R14,CX,BX) MOVQ c+0(FP), DI storeBlock(R8,R9,R10,R11, 0(DI)) @@ -68,7 +68,7 @@ TEXT ·gfpAdd(SB),0,$0-24 ADCQ 24(SI), R11 ADCQ $0, R12 - gfpCarry(R8,R9,R10,R11,R12, R13,R14,R15,AX,BX) + gfpCarry(R8,R9,R10,R11,R12, R13,R14,CX,AX,BX) MOVQ c+0(FP), DI storeBlock(R8,R9,R10,R11, 0(DI)) @@ -83,7 +83,7 @@ TEXT ·gfpSub(SB),0,$0-24 MOVQ ·p2+0(SB), R12 MOVQ ·p2+8(SB), R13 MOVQ ·p2+16(SB), R14 - MOVQ ·p2+24(SB), R15 + MOVQ ·p2+24(SB), CX MOVQ $0, AX SUBQ 0(SI), R8 @@ -94,12 +94,12 @@ TEXT ·gfpSub(SB),0,$0-24 CMOVQCC AX, R12 CMOVQCC AX, R13 CMOVQCC AX, R14 - CMOVQCC AX, R15 + CMOVQCC AX, CX ADDQ R12, R8 ADCQ R13, R9 ADCQ R14, R10 - ADCQ R15, R11 + ADCQ CX, R11 MOVQ c+0(FP), DI storeBlock(R8,R9,R10,R11, 0(DI)) @@ -115,7 +115,7 @@ TEXT ·gfpMul(SB),0,$160-24 mulBMI2(0(DI),8(DI),16(DI),24(DI), 0(SI)) storeBlock( R8, R9,R10,R11, 0(SP)) - storeBlock(R12,R13,R14,R15, 32(SP)) + storeBlock(R12,R13,R14,CX, 32(SP)) gfpReduceBMI2() JMP end @@ -125,5 +125,5 @@ nobmi2Mul: end: MOVQ c+0(FP), DI - storeBlock(R12,R13,R14,R15, 0(DI)) + storeBlock(R12,R13,R14,CX, 0(DI)) RET diff --git a/crypto/bn256/cloudflare/mul_amd64.h b/crypto/bn256/cloudflare/mul_amd64.h index bab5da831..9d8e4b37d 100644 --- a/crypto/bn256/cloudflare/mul_amd64.h +++ b/crypto/bn256/cloudflare/mul_amd64.h @@ -165,7 +165,7 @@ \ \ // Add the 512-bit intermediate to m*N loadBlock(96+stack, R8,R9,R10,R11) \ - loadBlock(128+stack, R12,R13,R14,R15) \ + loadBlock(128+stack, R12,R13,R14,CX) \ \ MOVQ $0, AX \ ADDQ 0+stack, R8 \ @@ -175,7 +175,7 @@ ADCQ 32+stack, R12 \ ADCQ 40+stack, R13 \ ADCQ 48+stack, R14 \ - ADCQ 56+stack, R15 \ + ADCQ 56+stack, CX \ ADCQ $0, AX \ \ - gfpCarry(R12,R13,R14,R15,AX, R8,R9,R10,R11,BX) + gfpCarry(R12,R13,R14,CX,AX, R8,R9,R10,R11,BX) diff --git a/crypto/bn256/cloudflare/mul_bmi2_amd64.h b/crypto/bn256/cloudflare/mul_bmi2_amd64.h index 71ad0499a..403566c6f 100644 --- a/crypto/bn256/cloudflare/mul_bmi2_amd64.h +++ b/crypto/bn256/cloudflare/mul_bmi2_amd64.h @@ -29,7 +29,7 @@ ADCQ $0, R14 \ \ MOVQ a2, DX \ - MOVQ $0, R15 \ + MOVQ $0, CX \ MULXQ 0+rb, AX, BX \ ADDQ AX, R10 \ ADCQ BX, R11 \ @@ -43,7 +43,7 @@ MULXQ 24+rb, AX, BX \ ADCQ AX, R13 \ ADCQ BX, R14 \ - ADCQ $0, R15 \ + ADCQ $0, CX \ \ MOVQ a3, DX \ MULXQ 0+rb, AX, BX \ @@ -52,13 +52,13 @@ MULXQ 16+rb, AX, BX \ ADCQ AX, R13 \ ADCQ BX, R14 \ - ADCQ $0, R15 \ + ADCQ $0, CX \ MULXQ 8+rb, AX, BX \ ADDQ AX, R12 \ ADCQ BX, R13 \ MULXQ 24+rb, AX, BX \ ADCQ AX, R14 \ - ADCQ BX, R15 + ADCQ BX, CX #define gfpReduceBMI2() \ \ // m = (T * N') mod R, store m in R8:R9:R10:R11 @@ -106,7 +106,7 @@ ADCQ 32(SP), R12 \ ADCQ 40(SP), R13 \ ADCQ 48(SP), R14 \ - ADCQ 56(SP), R15 \ + ADCQ 56(SP), CX \ ADCQ $0, AX \ \ - gfpCarry(R12,R13,R14,R15,AX, R8,R9,R10,R11,BX) + gfpCarry(R12,R13,R14,CX,AX, R8,R9,R10,R11,BX)