in ring/crypto/fipsmodule/aes/asm/aesni-x86_64.pl [968:1154]
sub \$8,$len
jnc .Lctr32_loop8 # loop if $len-=8 didn't borrow
add \$8,$len # restore real remaining $len
jz .Lctr32_done # done if ($len==0)
lea -0x80($key),$key
.Lctr32_tail:
# note that at this point $inout0..5 are populated with
# counter values xor-ed with 0-round key
lea 16($key),$key
cmp \$4,$len
jb .Lctr32_loop3
je .Lctr32_loop4
# if ($len>4) compute 7 E(counter)
shl \$4,$rounds
movdqa 0x60(%rsp),$inout6
pxor $inout7,$inout7
$movkey 16($key),$rndkey0
aesenc $rndkey1,$inout0
aesenc $rndkey1,$inout1
lea 32-16($key,$rounds),$key# prepare for .Lenc_loop8_enter
neg %rax
aesenc $rndkey1,$inout2
add \$16,%rax # prepare for .Lenc_loop8_enter
movups ($inp),$in0
aesenc $rndkey1,$inout3
aesenc $rndkey1,$inout4
movups 0x10($inp),$in1 # pre-load input
movups 0x20($inp),$in2
aesenc $rndkey1,$inout5
aesenc $rndkey1,$inout6
call .Lenc_loop8_enter
movdqu 0x30($inp),$in3
pxor $in0,$inout0
movdqu 0x40($inp),$in0
pxor $in1,$inout1
movdqu $inout0,($out) # store output
pxor $in2,$inout2
movdqu $inout1,0x10($out)
pxor $in3,$inout3
movdqu $inout2,0x20($out)
pxor $in0,$inout4
movdqu $inout3,0x30($out)
movdqu $inout4,0x40($out)
cmp \$6,$len
jb .Lctr32_done # $len was 5, stop store
movups 0x50($inp),$in1
xorps $in1,$inout5
movups $inout5,0x50($out)
je .Lctr32_done # $len was 6, stop store
movups 0x60($inp),$in2
xorps $in2,$inout6
movups $inout6,0x60($out)
jmp .Lctr32_done # $len was 7, stop store
.align 32
.Lctr32_loop4:
aesenc $rndkey1,$inout0
lea 16($key),$key
dec $rounds
aesenc $rndkey1,$inout1
aesenc $rndkey1,$inout2
aesenc $rndkey1,$inout3
$movkey ($key),$rndkey1
jnz .Lctr32_loop4
aesenclast $rndkey1,$inout0
aesenclast $rndkey1,$inout1
movups ($inp),$in0 # load input
movups 0x10($inp),$in1
aesenclast $rndkey1,$inout2
aesenclast $rndkey1,$inout3
movups 0x20($inp),$in2
movups 0x30($inp),$in3
xorps $in0,$inout0
movups $inout0,($out) # store output
xorps $in1,$inout1
movups $inout1,0x10($out)
pxor $in2,$inout2
movdqu $inout2,0x20($out)
pxor $in3,$inout3
movdqu $inout3,0x30($out)
jmp .Lctr32_done # $len was 4, stop store
.align 32
.Lctr32_loop3:
aesenc $rndkey1,$inout0
lea 16($key),$key
dec $rounds
aesenc $rndkey1,$inout1
aesenc $rndkey1,$inout2
$movkey ($key),$rndkey1
jnz .Lctr32_loop3
aesenclast $rndkey1,$inout0
aesenclast $rndkey1,$inout1
aesenclast $rndkey1,$inout2
movups ($inp),$in0 # load input
xorps $in0,$inout0
movups $inout0,($out) # store output
cmp \$2,$len
jb .Lctr32_done # $len was 1, stop store
movups 0x10($inp),$in1
xorps $in1,$inout1
movups $inout1,0x10($out)
je .Lctr32_done # $len was 2, stop store
movups 0x20($inp),$in2
xorps $in2,$inout2
movups $inout2,0x20($out) # $len was 3, stop store
.Lctr32_done:
xorps %xmm0,%xmm0 # clear register bank
xor $key0,$key0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
___
$code.=<<___ if (!$win64);
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
movaps %xmm0,0x00(%rsp) # clear stack
pxor %xmm8,%xmm8
movaps %xmm0,0x10(%rsp)
pxor %xmm9,%xmm9
movaps %xmm0,0x20(%rsp)
pxor %xmm10,%xmm10
movaps %xmm0,0x30(%rsp)
pxor %xmm11,%xmm11
movaps %xmm0,0x40(%rsp)
pxor %xmm12,%xmm12
movaps %xmm0,0x50(%rsp)
pxor %xmm13,%xmm13
movaps %xmm0,0x60(%rsp)
pxor %xmm14,%xmm14
movaps %xmm0,0x70(%rsp)
pxor %xmm15,%xmm15
___
$code.=<<___ if ($win64);
movaps -0xa8($key_),%xmm6
movaps %xmm0,-0xa8($key_) # clear stack
movaps -0x98($key_),%xmm7
movaps %xmm0,-0x98($key_)
movaps -0x88($key_),%xmm8
movaps %xmm0,-0x88($key_)
movaps -0x78($key_),%xmm9
movaps %xmm0,-0x78($key_)
movaps -0x68($key_),%xmm10
movaps %xmm0,-0x68($key_)
movaps -0x58($key_),%xmm11
movaps %xmm0,-0x58($key_)
movaps -0x48($key_),%xmm12
movaps %xmm0,-0x48($key_)
movaps -0x38($key_),%xmm13
movaps %xmm0,-0x38($key_)
movaps -0x28($key_),%xmm14
movaps %xmm0,-0x28($key_)
movaps -0x18($key_),%xmm15
movaps %xmm0,-0x18($key_)
movaps %xmm0,0x00(%rsp)
movaps %xmm0,0x10(%rsp)
movaps %xmm0,0x20(%rsp)
movaps %xmm0,0x30(%rsp)
movaps %xmm0,0x40(%rsp)
movaps %xmm0,0x50(%rsp)
movaps %xmm0,0x60(%rsp)
movaps %xmm0,0x70(%rsp)
___
$code.=<<___;
mov -8($key_),%rbp
.cfi_restore %rbp
lea ($key_),%rsp
.cfi_def_cfa_register %rsp
.Lctr32_epilogue:
ret
.cfi_endproc
.size GFp_${PREFIX}_ctr32_encrypt_blocks,.-GFp_${PREFIX}_ctr32_encrypt_blocks