in ring/crypto/fipsmodule/ec/asm/ecp_nistz256-x86.pl [121:412]
&sub ("edx","ebp");
&add ("eax","edx");
&adc ("ebx","edx");
&mov (&DWP(0,"edi"),"eax");
&adc ("ecx","edx");
&mov (&DWP(4,"edi"),"ebx");
&mov (&DWP(8,"edi"),"ecx");
&mov ("eax",&DWP(12,"esi"));
&mov ("ebx",&DWP(16,"esi"));
&adc ("eax",0);
&mov ("ecx",&DWP(20,"esi"));
&adc ("ebx",0);
&mov (&DWP(12,"edi"),"eax");
&adc ("ecx",0);
&mov (&DWP(16,"edi"),"ebx");
&mov (&DWP(20,"edi"),"ecx");
&mov ("eax",&DWP(24,"esi"));
&mov ("ebx",&DWP(28,"esi"));
&adc ("eax","ebp");
&adc ("ebx","edx");
&mov (&DWP(24,"edi"),"eax");
&sbb ("esi","esi"); # broadcast carry bit
&mov (&DWP(28,"edi"),"ebx");
# ret = tmp >> 1
&mov ("eax",&DWP(0,"edi"));
&mov ("ebx",&DWP(4,"edi"));
&mov ("ecx",&DWP(8,"edi"));
&mov ("edx",&DWP(12,"edi"));
&shr ("eax",1);
&mov ("ebp","ebx");
&shl ("ebx",31);
&or ("eax","ebx");
&shr ("ebp",1);
&mov ("ebx","ecx");
&shl ("ecx",31);
&mov (&DWP(0,"edi"),"eax");
&or ("ebp","ecx");
&mov ("eax",&DWP(16,"edi"));
&shr ("ebx",1);
&mov ("ecx","edx");
&shl ("edx",31);
&mov (&DWP(4,"edi"),"ebp");
&or ("ebx","edx");
&mov ("ebp",&DWP(20,"edi"));
&shr ("ecx",1);
&mov ("edx","eax");
&shl ("eax",31);
&mov (&DWP(8,"edi"),"ebx");
&or ("ecx","eax");
&mov ("ebx",&DWP(24,"edi"));
&shr ("edx",1);
&mov ("eax","ebp");
&shl ("ebp",31);
&mov (&DWP(12,"edi"),"ecx");
&or ("edx","ebp");
&mov ("ecx",&DWP(28,"edi"));
&shr ("eax",1);
&mov ("ebp","ebx");
&shl ("ebx",31);
&mov (&DWP(16,"edi"),"edx");
&or ("eax","ebx");
&shr ("ebp",1);
&mov ("ebx","ecx");
&shl ("ecx",31);
&mov (&DWP(20,"edi"),"eax");
&or ("ebp","ecx");
&shr ("ebx",1);
&shl ("esi",31);
&mov (&DWP(24,"edi"),"ebp");
&or ("ebx","esi"); # handle top-most carry bit
&mov (&DWP(28,"edi"),"ebx");
&ret ();
&function_end_B("_ecp_nistz256_div_by_2");
########################################################################
# void GFp_nistz256_add(BN_ULONG edi[8],const BN_ULONG esi[8],
# const BN_ULONG ebp[8]);
&function_begin("GFp_nistz256_add");
&mov ("esi",&wparam(1));
&mov ("ebp",&wparam(2));
&mov ("edi",&wparam(0));
&call ("_ecp_nistz256_add");
&function_end("GFp_nistz256_add");
&function_begin_B("_ecp_nistz256_add");
&mov ("eax",&DWP(0,"esi"));
&mov ("ebx",&DWP(4,"esi"));
&mov ("ecx",&DWP(8,"esi"));
&add ("eax",&DWP(0,"ebp"));
&mov ("edx",&DWP(12,"esi"));
&adc ("ebx",&DWP(4,"ebp"));
&mov (&DWP(0,"edi"),"eax");
&adc ("ecx",&DWP(8,"ebp"));
&mov (&DWP(4,"edi"),"ebx");
&adc ("edx",&DWP(12,"ebp"));
&mov (&DWP(8,"edi"),"ecx");
&mov (&DWP(12,"edi"),"edx");
&mov ("eax",&DWP(16,"esi"));
&mov ("ebx",&DWP(20,"esi"));
&mov ("ecx",&DWP(24,"esi"));
&adc ("eax",&DWP(16,"ebp"));
&mov ("edx",&DWP(28,"esi"));
&adc ("ebx",&DWP(20,"ebp"));
&mov (&DWP(16,"edi"),"eax");
&adc ("ecx",&DWP(24,"ebp"));
&mov (&DWP(20,"edi"),"ebx");
&mov ("esi",0);
&adc ("edx",&DWP(28,"ebp"));
&mov (&DWP(24,"edi"),"ecx");
&adc ("esi",0);
&mov (&DWP(28,"edi"),"edx");
# if a+b >= modulus, subtract modulus.
#
# But since comparison implies subtraction, we subtract modulus
# to see if it borrows, and then subtract it for real if
# subtraction didn't borrow.
&mov ("eax",&DWP(0,"edi"));
&mov ("ebx",&DWP(4,"edi"));
&mov ("ecx",&DWP(8,"edi"));
&sub ("eax",-1);
&mov ("edx",&DWP(12,"edi"));
&sbb ("ebx",-1);
&mov ("eax",&DWP(16,"edi"));
&sbb ("ecx",-1);
&mov ("ebx",&DWP(20,"edi"));
&sbb ("edx",0);
&mov ("ecx",&DWP(24,"edi"));
&sbb ("eax",0);
&mov ("edx",&DWP(28,"edi"));
&sbb ("ebx",0);
&sbb ("ecx",1);
&sbb ("edx",-1);
&sbb ("esi",0);
# Note that because mod has special form, i.e. consists of
# 0xffffffff, 1 and 0s, we can conditionally synthesize it by
# by using borrow.
¬ ("esi");
&mov ("eax",&DWP(0,"edi"));
&mov ("ebp","esi");
&mov ("ebx",&DWP(4,"edi"));
&shr ("ebp",31);
&mov ("ecx",&DWP(8,"edi"));
&sub ("eax","esi");
&mov ("edx",&DWP(12,"edi"));
&sbb ("ebx","esi");
&mov (&DWP(0,"edi"),"eax");
&sbb ("ecx","esi");
&mov (&DWP(4,"edi"),"ebx");
&sbb ("edx",0);
&mov (&DWP(8,"edi"),"ecx");
&mov (&DWP(12,"edi"),"edx");
&mov ("eax",&DWP(16,"edi"));
&mov ("ebx",&DWP(20,"edi"));
&mov ("ecx",&DWP(24,"edi"));
&sbb ("eax",0);
&mov ("edx",&DWP(28,"edi"));
&sbb ("ebx",0);
&mov (&DWP(16,"edi"),"eax");
&sbb ("ecx","ebp");
&mov (&DWP(20,"edi"),"ebx");
&sbb ("edx","esi");
&mov (&DWP(24,"edi"),"ecx");
&mov (&DWP(28,"edi"),"edx");
&ret ();
&function_end_B("_ecp_nistz256_add");
&function_begin_B("_ecp_nistz256_sub");
&mov ("eax",&DWP(0,"esi"));
&mov ("ebx",&DWP(4,"esi"));
&mov ("ecx",&DWP(8,"esi"));
&sub ("eax",&DWP(0,"ebp"));
&mov ("edx",&DWP(12,"esi"));
&sbb ("ebx",&DWP(4,"ebp"));
&mov (&DWP(0,"edi"),"eax");
&sbb ("ecx",&DWP(8,"ebp"));
&mov (&DWP(4,"edi"),"ebx");
&sbb ("edx",&DWP(12,"ebp"));
&mov (&DWP(8,"edi"),"ecx");
&mov (&DWP(12,"edi"),"edx");
&mov ("eax",&DWP(16,"esi"));
&mov ("ebx",&DWP(20,"esi"));
&mov ("ecx",&DWP(24,"esi"));
&sbb ("eax",&DWP(16,"ebp"));
&mov ("edx",&DWP(28,"esi"));
&sbb ("ebx",&DWP(20,"ebp"));
&sbb ("ecx",&DWP(24,"ebp"));
&mov (&DWP(16,"edi"),"eax");
&sbb ("edx",&DWP(28,"ebp"));
&mov (&DWP(20,"edi"),"ebx");
&sbb ("esi","esi"); # broadcast borrow bit
&mov (&DWP(24,"edi"),"ecx");
&mov (&DWP(28,"edi"),"edx");
# if a-b borrows, add modulus.
#
# Note that because mod has special form, i.e. consists of
# 0xffffffff, 1 and 0s, we can conditionally synthesize it by
# assigning borrow bit to one register, %ebp, and its negative
# to another, %esi. But we started by calculating %esi...
&mov ("eax",&DWP(0,"edi"));
&mov ("ebp","esi");
&mov ("ebx",&DWP(4,"edi"));
&shr ("ebp",31);
&mov ("ecx",&DWP(8,"edi"));
&add ("eax","esi");
&mov ("edx",&DWP(12,"edi"));
&adc ("ebx","esi");
&mov (&DWP(0,"edi"),"eax");
&adc ("ecx","esi");
&mov (&DWP(4,"edi"),"ebx");
&adc ("edx",0);
&mov (&DWP(8,"edi"),"ecx");
&mov (&DWP(12,"edi"),"edx");
&mov ("eax",&DWP(16,"edi"));
&mov ("ebx",&DWP(20,"edi"));
&mov ("ecx",&DWP(24,"edi"));
&adc ("eax",0);
&mov ("edx",&DWP(28,"edi"));
&adc ("ebx",0);
&mov (&DWP(16,"edi"),"eax");
&adc ("ecx","ebp");
&mov (&DWP(20,"edi"),"ebx");
&adc ("edx","esi");
&mov (&DWP(24,"edi"),"ecx");
&mov (&DWP(28,"edi"),"edx");
&ret ();
&function_end_B("_ecp_nistz256_sub");
########################################################################
# void GFp_nistz256_neg(BN_ULONG edi[8],const BN_ULONG esi[8]);
&function_begin("GFp_nistz256_neg");
&mov ("ebp",&wparam(1));
&mov ("edi",&wparam(0));
&xor ("eax","eax");
&stack_push(8);
&mov (&DWP(0,"esp"),"eax");
&mov ("esi","esp");
&mov (&DWP(4,"esp"),"eax");
&mov (&DWP(8,"esp"),"eax");
&mov (&DWP(12,"esp"),"eax");
&mov (&DWP(16,"esp"),"eax");
&mov (&DWP(20,"esp"),"eax");
&mov (&DWP(24,"esp"),"eax");
&mov (&DWP(28,"esp"),"eax");
&call ("_ecp_nistz256_sub");
&stack_pop(8);
&function_end("GFp_nistz256_neg");
&function_begin_B("_picup_eax");
&mov ("eax",&DWP(0,"esp"));
&ret ();
&function_end_B("_picup_eax");
########################################################################
# void GFp_nistz256_mul_mont(BN_ULONG edi[8],const BN_ULONG esi[8],
# const BN_ULONG ebp[8]);
&function_begin("GFp_nistz256_mul_mont");
&mov ("esi",&wparam(1));
&mov ("ebp",&wparam(2));
if ($sse2) {
&call ("_picup_eax");
&set_label("pic");
&picmeup("eax","GFp_ia32cap_P","eax",&label("pic"));
&mov ("eax",&DWP(0,"eax")); }