ring/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl [3221:3272]: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - lea $U2(%rsp), $r_ptr # U1*H^2 call __ecp_nistz256_mul_mont$x # p256_mul_mont(U2, U1, Hsqr); ___ { ####################################################################### # operate in 4-5-0-1 "name space" that matches multiplication output # my ($acc0,$acc1,$acc2,$acc3,$t3,$t4)=($acc4,$acc5,$acc0,$acc1,$acc2,$acc3); my ($poly1, $poly3)=($acc6,$acc7); $code.=<<___; #lea $U2(%rsp), $a_ptr #lea $Hsqr(%rsp), $r_ptr # 2*U1*H^2 #call __ecp_nistz256_mul_by_2 # ecp_nistz256_mul_by_2(Hsqr, U2); xor $t4, $t4 add $acc0, $acc0 # a0:a3+a0:a3 lea $Rsqr(%rsp), $a_ptr adc $acc1, $acc1 mov $acc0, $t0 adc $acc2, $acc2 adc $acc3, $acc3 mov $acc1, $t1 adc \$0, $t4 sub \$-1, $acc0 mov $acc2, $t2 sbb $poly1, $acc1 sbb \$0, $acc2 mov $acc3, $t3 sbb $poly3, $acc3 sbb \$0, $t4 cmovc $t0, $acc0 mov 8*0($a_ptr), $t0 cmovc $t1, $acc1 mov 8*1($a_ptr), $t1 cmovc $t2, $acc2 mov 8*2($a_ptr), $t2 cmovc $t3, $acc3 mov 8*3($a_ptr), $t3 call __ecp_nistz256_sub$x # p256_sub(res_x, Rsqr, Hsqr); lea $Hcub(%rsp), $b_ptr lea $res_x(%rsp), $r_ptr call __ecp_nistz256_sub_from$x # p256_sub(res_x, res_x, Hcub); mov $U2+8*0(%rsp), $t0 mov $U2+8*1(%rsp), $t1 mov $U2+8*2(%rsp), $t2 mov $U2+8*3(%rsp), $t3 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ring/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl [3547:3598]: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - lea $U2(%rsp), $r_ptr # U1*H^2 call __ecp_nistz256_mul_mont$x # p256_mul_mont(U2, in1_x, Hsqr); ___ { ####################################################################### # operate in 4-5-0-1 "name space" that matches multiplication output # my ($acc0,$acc1,$acc2,$acc3,$t3,$t4)=($acc4,$acc5,$acc0,$acc1,$acc2,$acc3); my ($poly1, $poly3)=($acc6,$acc7); $code.=<<___; #lea $U2(%rsp), $a_ptr #lea $Hsqr(%rsp), $r_ptr # 2*U1*H^2 #call __ecp_nistz256_mul_by_2 # ecp_nistz256_mul_by_2(Hsqr, U2); xor $t4, $t4 add $acc0, $acc0 # a0:a3+a0:a3 lea $Rsqr(%rsp), $a_ptr adc $acc1, $acc1 mov $acc0, $t0 adc $acc2, $acc2 adc $acc3, $acc3 mov $acc1, $t1 adc \$0, $t4 sub \$-1, $acc0 mov $acc2, $t2 sbb $poly1, $acc1 sbb \$0, $acc2 mov $acc3, $t3 sbb $poly3, $acc3 sbb \$0, $t4 cmovc $t0, $acc0 mov 8*0($a_ptr), $t0 cmovc $t1, $acc1 mov 8*1($a_ptr), $t1 cmovc $t2, $acc2 mov 8*2($a_ptr), $t2 cmovc $t3, $acc3 mov 8*3($a_ptr), $t3 call __ecp_nistz256_sub$x # p256_sub(res_x, Rsqr, Hsqr); lea $Hcub(%rsp), $b_ptr lea $res_x(%rsp), $r_ptr call __ecp_nistz256_sub_from$x # p256_sub(res_x, res_x, Hcub); mov $U2+8*0(%rsp), $t0 mov $U2+8*1(%rsp), $t1 mov $U2+8*2(%rsp), $t2 mov $U2+8*3(%rsp), $t3 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -