&sub()

in ring/crypto/fipsmodule/ec/asm/ecp_nistz256-x86.pl [121:412]


	&sub	("edx","ebp");

	&add	("eax","edx");
	&adc	("ebx","edx");
	&mov	(&DWP(0,"edi"),"eax");
	&adc	("ecx","edx");
	&mov	(&DWP(4,"edi"),"ebx");
	&mov	(&DWP(8,"edi"),"ecx");

	&mov	("eax",&DWP(12,"esi"));
	&mov	("ebx",&DWP(16,"esi"));
	&adc	("eax",0);
	&mov	("ecx",&DWP(20,"esi"));
	&adc	("ebx",0);
	&mov	(&DWP(12,"edi"),"eax");
	&adc	("ecx",0);
	&mov	(&DWP(16,"edi"),"ebx");
	&mov	(&DWP(20,"edi"),"ecx");

	&mov	("eax",&DWP(24,"esi"));
	&mov	("ebx",&DWP(28,"esi"));
	&adc	("eax","ebp");
	&adc	("ebx","edx");
	&mov	(&DWP(24,"edi"),"eax");
	&sbb	("esi","esi");			# broadcast carry bit
	&mov	(&DWP(28,"edi"),"ebx");

	# ret = tmp >> 1

	&mov	("eax",&DWP(0,"edi"));
	&mov	("ebx",&DWP(4,"edi"));
	&mov	("ecx",&DWP(8,"edi"));
	&mov	("edx",&DWP(12,"edi"));

	&shr	("eax",1);
	&mov	("ebp","ebx");
	&shl	("ebx",31);
	&or	("eax","ebx");

	&shr	("ebp",1);
	&mov	("ebx","ecx");
	&shl	("ecx",31);
	&mov	(&DWP(0,"edi"),"eax");
	&or	("ebp","ecx");
	&mov	("eax",&DWP(16,"edi"));

	&shr	("ebx",1);
	&mov	("ecx","edx");
	&shl	("edx",31);
	&mov	(&DWP(4,"edi"),"ebp");
	&or	("ebx","edx");
	&mov	("ebp",&DWP(20,"edi"));

	&shr	("ecx",1);
	&mov	("edx","eax");
	&shl	("eax",31);
	&mov	(&DWP(8,"edi"),"ebx");
	&or	("ecx","eax");
	&mov	("ebx",&DWP(24,"edi"));

	&shr	("edx",1);
	&mov	("eax","ebp");
	&shl	("ebp",31);
	&mov	(&DWP(12,"edi"),"ecx");
	&or	("edx","ebp");
	&mov	("ecx",&DWP(28,"edi"));

	&shr	("eax",1);
	&mov	("ebp","ebx");
	&shl	("ebx",31);
	&mov	(&DWP(16,"edi"),"edx");
	&or	("eax","ebx");

	&shr	("ebp",1);
	&mov	("ebx","ecx");
	&shl	("ecx",31);
	&mov	(&DWP(20,"edi"),"eax");
	&or	("ebp","ecx");

	&shr	("ebx",1);
	&shl	("esi",31);
	&mov	(&DWP(24,"edi"),"ebp");
	&or	("ebx","esi");			# handle top-most carry bit
	&mov	(&DWP(28,"edi"),"ebx");

	&ret	();
&function_end_B("_ecp_nistz256_div_by_2");

########################################################################
# void GFp_nistz256_add(BN_ULONG edi[8],const BN_ULONG esi[8],
#					const BN_ULONG ebp[8]);
&function_begin("GFp_nistz256_add");
	&mov	("esi",&wparam(1));
	&mov	("ebp",&wparam(2));
	&mov	("edi",&wparam(0));
	&call	("_ecp_nistz256_add");
&function_end("GFp_nistz256_add");

&function_begin_B("_ecp_nistz256_add");
	&mov	("eax",&DWP(0,"esi"));
	&mov	("ebx",&DWP(4,"esi"));
	&mov	("ecx",&DWP(8,"esi"));
	&add	("eax",&DWP(0,"ebp"));
	&mov	("edx",&DWP(12,"esi"));
	&adc	("ebx",&DWP(4,"ebp"));
	&mov	(&DWP(0,"edi"),"eax");
	&adc	("ecx",&DWP(8,"ebp"));
	&mov	(&DWP(4,"edi"),"ebx");
	&adc	("edx",&DWP(12,"ebp"));
	&mov	(&DWP(8,"edi"),"ecx");
	&mov	(&DWP(12,"edi"),"edx");

	&mov	("eax",&DWP(16,"esi"));
	&mov	("ebx",&DWP(20,"esi"));
	&mov	("ecx",&DWP(24,"esi"));
	&adc	("eax",&DWP(16,"ebp"));
	&mov	("edx",&DWP(28,"esi"));
	&adc	("ebx",&DWP(20,"ebp"));
	&mov	(&DWP(16,"edi"),"eax");
	&adc	("ecx",&DWP(24,"ebp"));
	&mov	(&DWP(20,"edi"),"ebx");
	&mov	("esi",0);
	&adc	("edx",&DWP(28,"ebp"));
	&mov	(&DWP(24,"edi"),"ecx");
	&adc	("esi",0);
	&mov	(&DWP(28,"edi"),"edx");

	# if a+b >= modulus, subtract modulus.
	#
	# But since comparison implies subtraction, we subtract modulus
	# to see if it borrows, and then subtract it for real if
	# subtraction didn't borrow.

	&mov	("eax",&DWP(0,"edi"));
	&mov	("ebx",&DWP(4,"edi"));
	&mov	("ecx",&DWP(8,"edi"));
	&sub	("eax",-1);
	&mov	("edx",&DWP(12,"edi"));
	&sbb	("ebx",-1);
	&mov	("eax",&DWP(16,"edi"));
	&sbb	("ecx",-1);
	&mov	("ebx",&DWP(20,"edi"));
	&sbb	("edx",0);
	&mov	("ecx",&DWP(24,"edi"));
	&sbb	("eax",0);
	&mov	("edx",&DWP(28,"edi"));
	&sbb	("ebx",0);
	&sbb	("ecx",1);
	&sbb	("edx",-1);
	&sbb	("esi",0);

	# Note that because mod has special form, i.e. consists of
	# 0xffffffff, 1 and 0s, we can conditionally synthesize it by
	# by using borrow.

	&not	("esi");
	&mov	("eax",&DWP(0,"edi"));
	&mov	("ebp","esi");
	&mov	("ebx",&DWP(4,"edi"));
	&shr	("ebp",31);
	&mov	("ecx",&DWP(8,"edi"));
	&sub	("eax","esi");
	&mov	("edx",&DWP(12,"edi"));
	&sbb	("ebx","esi");
	&mov	(&DWP(0,"edi"),"eax");
	&sbb	("ecx","esi");
	&mov	(&DWP(4,"edi"),"ebx");
	&sbb	("edx",0);
	&mov	(&DWP(8,"edi"),"ecx");
	&mov	(&DWP(12,"edi"),"edx");

	&mov	("eax",&DWP(16,"edi"));
	&mov	("ebx",&DWP(20,"edi"));
	&mov	("ecx",&DWP(24,"edi"));
	&sbb	("eax",0);
	&mov	("edx",&DWP(28,"edi"));
	&sbb	("ebx",0);
	&mov	(&DWP(16,"edi"),"eax");
	&sbb	("ecx","ebp");
	&mov	(&DWP(20,"edi"),"ebx");
	&sbb	("edx","esi");
	&mov	(&DWP(24,"edi"),"ecx");
	&mov	(&DWP(28,"edi"),"edx");

	&ret	();
&function_end_B("_ecp_nistz256_add");

&function_begin_B("_ecp_nistz256_sub");
	&mov	("eax",&DWP(0,"esi"));
	&mov	("ebx",&DWP(4,"esi"));
	&mov	("ecx",&DWP(8,"esi"));
	&sub	("eax",&DWP(0,"ebp"));
	&mov	("edx",&DWP(12,"esi"));
	&sbb	("ebx",&DWP(4,"ebp"));
	&mov	(&DWP(0,"edi"),"eax");
	&sbb	("ecx",&DWP(8,"ebp"));
	&mov	(&DWP(4,"edi"),"ebx");
	&sbb	("edx",&DWP(12,"ebp"));
	&mov	(&DWP(8,"edi"),"ecx");
	&mov	(&DWP(12,"edi"),"edx");

	&mov	("eax",&DWP(16,"esi"));
	&mov	("ebx",&DWP(20,"esi"));
	&mov	("ecx",&DWP(24,"esi"));
	&sbb	("eax",&DWP(16,"ebp"));
	&mov	("edx",&DWP(28,"esi"));
	&sbb	("ebx",&DWP(20,"ebp"));
	&sbb	("ecx",&DWP(24,"ebp"));
	&mov	(&DWP(16,"edi"),"eax");
	&sbb	("edx",&DWP(28,"ebp"));
	&mov	(&DWP(20,"edi"),"ebx");
	&sbb	("esi","esi");			# broadcast borrow bit
	&mov	(&DWP(24,"edi"),"ecx");
	&mov	(&DWP(28,"edi"),"edx");

	# if a-b borrows, add modulus.
	#
	# Note that because mod has special form, i.e. consists of
	# 0xffffffff, 1 and 0s, we can conditionally synthesize it by
	# assigning borrow bit to one register, %ebp, and its negative
	# to another, %esi. But we started by calculating %esi...

	&mov	("eax",&DWP(0,"edi"));
	&mov	("ebp","esi");
	&mov	("ebx",&DWP(4,"edi"));
	&shr	("ebp",31);
	&mov	("ecx",&DWP(8,"edi"));
	&add	("eax","esi");
	&mov	("edx",&DWP(12,"edi"));
	&adc	("ebx","esi");
	&mov	(&DWP(0,"edi"),"eax");
	&adc	("ecx","esi");
	&mov	(&DWP(4,"edi"),"ebx");
	&adc	("edx",0);
	&mov	(&DWP(8,"edi"),"ecx");
	&mov	(&DWP(12,"edi"),"edx");

	&mov	("eax",&DWP(16,"edi"));
	&mov	("ebx",&DWP(20,"edi"));
	&mov	("ecx",&DWP(24,"edi"));
	&adc	("eax",0);
	&mov	("edx",&DWP(28,"edi"));
	&adc	("ebx",0);
	&mov	(&DWP(16,"edi"),"eax");
	&adc	("ecx","ebp");
	&mov	(&DWP(20,"edi"),"ebx");
	&adc	("edx","esi");
	&mov	(&DWP(24,"edi"),"ecx");
	&mov	(&DWP(28,"edi"),"edx");

	&ret	();
&function_end_B("_ecp_nistz256_sub");

########################################################################
# void GFp_nistz256_neg(BN_ULONG edi[8],const BN_ULONG esi[8]);
&function_begin("GFp_nistz256_neg");
	&mov	("ebp",&wparam(1));
	&mov	("edi",&wparam(0));

	&xor	("eax","eax");
	&stack_push(8);
	&mov	(&DWP(0,"esp"),"eax");
	&mov	("esi","esp");
	&mov	(&DWP(4,"esp"),"eax");
	&mov	(&DWP(8,"esp"),"eax");
	&mov	(&DWP(12,"esp"),"eax");
	&mov	(&DWP(16,"esp"),"eax");
	&mov	(&DWP(20,"esp"),"eax");
	&mov	(&DWP(24,"esp"),"eax");
	&mov	(&DWP(28,"esp"),"eax");

	&call	("_ecp_nistz256_sub");

	&stack_pop(8);
&function_end("GFp_nistz256_neg");

&function_begin_B("_picup_eax");
	&mov	("eax",&DWP(0,"esp"));
	&ret	();
&function_end_B("_picup_eax");

########################################################################
# void GFp_nistz256_mul_mont(BN_ULONG edi[8],const BN_ULONG esi[8],
#					     const BN_ULONG ebp[8]);
&function_begin("GFp_nistz256_mul_mont");
	&mov	("esi",&wparam(1));
	&mov	("ebp",&wparam(2));
						if ($sse2) {
	&call	("_picup_eax");
    &set_label("pic");
	&picmeup("eax","GFp_ia32cap_P","eax",&label("pic"));
	&mov	("eax",&DWP(0,"eax"));		}