generated-src/win-x86_64/crypto/cipher_extra/aesni-sha256-x86_64.asm (4,556 lines of code) (raw):
; This file is generated from a similarly-named Perl script in the BoringSSL
; source tree. Do not edit by hand.
%ifidn __OUTPUT_FORMAT__, win64
default rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
%define _CET_ENDBR
%include "openssl/boringssl_prefix_symbols_nasm.inc"
section .text code align=64
EXTERN OPENSSL_ia32cap_P
global aesni_cbc_sha256_enc
ALIGN 16
aesni_cbc_sha256_enc:
lea r11,[OPENSSL_ia32cap_P]
mov eax,1
cmp rcx,0
je NEAR $L$probe
mov eax,DWORD[r11]
mov r10,QWORD[4+r11]
bt r10,61
jc NEAR aesni_cbc_sha256_enc_shaext
mov r11,r10
shr r11,32
test r10d,2048
jnz NEAR aesni_cbc_sha256_enc_xop
and r11d,296
cmp r11d,296
je NEAR aesni_cbc_sha256_enc_avx2
and r10d,268435456
jnz NEAR aesni_cbc_sha256_enc_avx
ud2
xor eax,eax
cmp rcx,0
je NEAR $L$probe
ud2
$L$probe:
DB 0F3h,0C3h ;repret
section .rdata rdata align=8
ALIGN 64
K256:
DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
DD 0,0,0,0,0,0,0,0,-1,-1,-1,-1
DD 0,0,0,0,0,0,0,0
DB 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54
DB 32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95
DB 54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98
DB 121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108
DB 46,111,114,103,62,0
section .text
ALIGN 64
ALIGN 64
aesni_cbc_sha256_enc_xop:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_aesni_cbc_sha256_enc_xop:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
mov r9,QWORD[48+rsp]
$L$xop_shortcut:
mov r10,QWORD[56+rsp]
mov rax,rsp
push rbx
push rbp
push r12
push r13
push r14
push r15
sub rsp,288
and rsp,-64
shl rdx,6
sub rsi,rdi
sub r10,rdi
add rdx,rdi
mov QWORD[((64+8))+rsp],rsi
mov QWORD[((64+16))+rsp],rdx
mov QWORD[((64+32))+rsp],r8
mov QWORD[((64+40))+rsp],r9
mov QWORD[((64+48))+rsp],r10
mov QWORD[120+rsp],rax
movaps XMMWORD[128+rsp],xmm6
movaps XMMWORD[144+rsp],xmm7
movaps XMMWORD[160+rsp],xmm8
movaps XMMWORD[176+rsp],xmm9
movaps XMMWORD[192+rsp],xmm10
movaps XMMWORD[208+rsp],xmm11
movaps XMMWORD[224+rsp],xmm12
movaps XMMWORD[240+rsp],xmm13
movaps XMMWORD[256+rsp],xmm14
movaps XMMWORD[272+rsp],xmm15
$L$prologue_xop:
vzeroall
mov r12,rdi
lea rdi,[128+rcx]
lea r13,[((K256+544))]
mov r14d,DWORD[((240-128))+rdi]
mov r15,r9
mov rsi,r10
vmovdqu xmm8,XMMWORD[r8]
sub r14,9
mov eax,DWORD[r15]
mov ebx,DWORD[4+r15]
mov ecx,DWORD[8+r15]
mov edx,DWORD[12+r15]
mov r8d,DWORD[16+r15]
mov r9d,DWORD[20+r15]
mov r10d,DWORD[24+r15]
mov r11d,DWORD[28+r15]
vmovdqa xmm14,XMMWORD[r14*8+r13]
vmovdqa xmm13,XMMWORD[16+r14*8+r13]
vmovdqa xmm12,XMMWORD[32+r14*8+r13]
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
jmp NEAR $L$loop_xop
ALIGN 16
$L$loop_xop:
vmovdqa xmm7,XMMWORD[((K256+512))]
vmovdqu xmm0,XMMWORD[r12*1+rsi]
vmovdqu xmm1,XMMWORD[16+r12*1+rsi]
vmovdqu xmm2,XMMWORD[32+r12*1+rsi]
vmovdqu xmm3,XMMWORD[48+r12*1+rsi]
vpshufb xmm0,xmm0,xmm7
lea rbp,[K256]
vpshufb xmm1,xmm1,xmm7
vpshufb xmm2,xmm2,xmm7
vpaddd xmm4,xmm0,XMMWORD[rbp]
vpshufb xmm3,xmm3,xmm7
vpaddd xmm5,xmm1,XMMWORD[32+rbp]
vpaddd xmm6,xmm2,XMMWORD[64+rbp]
vpaddd xmm7,xmm3,XMMWORD[96+rbp]
vmovdqa XMMWORD[rsp],xmm4
mov r14d,eax
vmovdqa XMMWORD[16+rsp],xmm5
mov esi,ebx
vmovdqa XMMWORD[32+rsp],xmm6
xor esi,ecx
vmovdqa XMMWORD[48+rsp],xmm7
mov r13d,r8d
jmp NEAR $L$xop_00_47
ALIGN 16
$L$xop_00_47:
sub rbp,-16*2*4
vmovdqu xmm9,XMMWORD[r12]
mov QWORD[((64+0))+rsp],r12
vpalignr xmm4,xmm1,xmm0,4
ror r13d,14
mov eax,r14d
vpalignr xmm7,xmm3,xmm2,4
mov r12d,r9d
xor r13d,r8d
DB 143,232,120,194,236,14
ror r14d,9
xor r12d,r10d
vpsrld xmm4,xmm4,3
ror r13d,5
xor r14d,eax
vpaddd xmm0,xmm0,xmm7
and r12d,r8d
vpxor xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((16-128))+rdi]
xor r13d,r8d
add r11d,DWORD[rsp]
mov r15d,eax
DB 143,232,120,194,245,11
ror r14d,11
xor r12d,r10d
vpxor xmm4,xmm4,xmm5
xor r15d,ebx
ror r13d,6
add r11d,r12d
and esi,r15d
DB 143,232,120,194,251,13
xor r14d,eax
add r11d,r13d
vpxor xmm4,xmm4,xmm6
xor esi,ebx
add edx,r11d
vpsrld xmm6,xmm3,10
ror r14d,2
add r11d,esi
vpaddd xmm0,xmm0,xmm4
mov r13d,edx
add r14d,r11d
DB 143,232,120,194,239,2
ror r13d,14
mov r11d,r14d
vpxor xmm7,xmm7,xmm6
mov r12d,r8d
xor r13d,edx
ror r14d,9
xor r12d,r9d
vpxor xmm7,xmm7,xmm5
ror r13d,5
xor r14d,r11d
and r12d,edx
vpxor xmm9,xmm9,xmm8
xor r13d,edx
vpsrldq xmm7,xmm7,8
add r10d,DWORD[4+rsp]
mov esi,r11d
ror r14d,11
xor r12d,r9d
vpaddd xmm0,xmm0,xmm7
xor esi,eax
ror r13d,6
add r10d,r12d
and r15d,esi
DB 143,232,120,194,248,13
xor r14d,r11d
add r10d,r13d
vpsrld xmm6,xmm0,10
xor r15d,eax
add ecx,r10d
DB 143,232,120,194,239,2
ror r14d,2
add r10d,r15d
vpxor xmm7,xmm7,xmm6
mov r13d,ecx
add r14d,r10d
ror r13d,14
mov r10d,r14d
vpxor xmm7,xmm7,xmm5
mov r12d,edx
xor r13d,ecx
ror r14d,9
xor r12d,r8d
vpslldq xmm7,xmm7,8
ror r13d,5
xor r14d,r10d
and r12d,ecx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((32-128))+rdi]
xor r13d,ecx
vpaddd xmm0,xmm0,xmm7
add r9d,DWORD[8+rsp]
mov r15d,r10d
ror r14d,11
xor r12d,r8d
vpaddd xmm6,xmm0,XMMWORD[rbp]
xor r15d,r11d
ror r13d,6
add r9d,r12d
and esi,r15d
xor r14d,r10d
add r9d,r13d
xor esi,r11d
add ebx,r9d
ror r14d,2
add r9d,esi
mov r13d,ebx
add r14d,r9d
ror r13d,14
mov r9d,r14d
mov r12d,ecx
xor r13d,ebx
ror r14d,9
xor r12d,edx
ror r13d,5
xor r14d,r9d
and r12d,ebx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((48-128))+rdi]
xor r13d,ebx
add r8d,DWORD[12+rsp]
mov esi,r9d
ror r14d,11
xor r12d,edx
xor esi,r10d
ror r13d,6
add r8d,r12d
and r15d,esi
xor r14d,r9d
add r8d,r13d
xor r15d,r10d
add eax,r8d
ror r14d,2
add r8d,r15d
mov r13d,eax
add r14d,r8d
vmovdqa XMMWORD[rsp],xmm6
vpalignr xmm4,xmm2,xmm1,4
ror r13d,14
mov r8d,r14d
vpalignr xmm7,xmm0,xmm3,4
mov r12d,ebx
xor r13d,eax
DB 143,232,120,194,236,14
ror r14d,9
xor r12d,ecx
vpsrld xmm4,xmm4,3
ror r13d,5
xor r14d,r8d
vpaddd xmm1,xmm1,xmm7
and r12d,eax
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((64-128))+rdi]
xor r13d,eax
add edx,DWORD[16+rsp]
mov r15d,r8d
DB 143,232,120,194,245,11
ror r14d,11
xor r12d,ecx
vpxor xmm4,xmm4,xmm5
xor r15d,r9d
ror r13d,6
add edx,r12d
and esi,r15d
DB 143,232,120,194,248,13
xor r14d,r8d
add edx,r13d
vpxor xmm4,xmm4,xmm6
xor esi,r9d
add r11d,edx
vpsrld xmm6,xmm0,10
ror r14d,2
add edx,esi
vpaddd xmm1,xmm1,xmm4
mov r13d,r11d
add r14d,edx
DB 143,232,120,194,239,2
ror r13d,14
mov edx,r14d
vpxor xmm7,xmm7,xmm6
mov r12d,eax
xor r13d,r11d
ror r14d,9
xor r12d,ebx
vpxor xmm7,xmm7,xmm5
ror r13d,5
xor r14d,edx
and r12d,r11d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((80-128))+rdi]
xor r13d,r11d
vpsrldq xmm7,xmm7,8
add ecx,DWORD[20+rsp]
mov esi,edx
ror r14d,11
xor r12d,ebx
vpaddd xmm1,xmm1,xmm7
xor esi,r8d
ror r13d,6
add ecx,r12d
and r15d,esi
DB 143,232,120,194,249,13
xor r14d,edx
add ecx,r13d
vpsrld xmm6,xmm1,10
xor r15d,r8d
add r10d,ecx
DB 143,232,120,194,239,2
ror r14d,2
add ecx,r15d
vpxor xmm7,xmm7,xmm6
mov r13d,r10d
add r14d,ecx
ror r13d,14
mov ecx,r14d
vpxor xmm7,xmm7,xmm5
mov r12d,r11d
xor r13d,r10d
ror r14d,9
xor r12d,eax
vpslldq xmm7,xmm7,8
ror r13d,5
xor r14d,ecx
and r12d,r10d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((96-128))+rdi]
xor r13d,r10d
vpaddd xmm1,xmm1,xmm7
add ebx,DWORD[24+rsp]
mov r15d,ecx
ror r14d,11
xor r12d,eax
vpaddd xmm6,xmm1,XMMWORD[32+rbp]
xor r15d,edx
ror r13d,6
add ebx,r12d
and esi,r15d
xor r14d,ecx
add ebx,r13d
xor esi,edx
add r9d,ebx
ror r14d,2
add ebx,esi
mov r13d,r9d
add r14d,ebx
ror r13d,14
mov ebx,r14d
mov r12d,r10d
xor r13d,r9d
ror r14d,9
xor r12d,r11d
ror r13d,5
xor r14d,ebx
and r12d,r9d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((112-128))+rdi]
xor r13d,r9d
add eax,DWORD[28+rsp]
mov esi,ebx
ror r14d,11
xor r12d,r11d
xor esi,ecx
ror r13d,6
add eax,r12d
and r15d,esi
xor r14d,ebx
add eax,r13d
xor r15d,ecx
add r8d,eax
ror r14d,2
add eax,r15d
mov r13d,r8d
add r14d,eax
vmovdqa XMMWORD[16+rsp],xmm6
vpalignr xmm4,xmm3,xmm2,4
ror r13d,14
mov eax,r14d
vpalignr xmm7,xmm1,xmm0,4
mov r12d,r9d
xor r13d,r8d
DB 143,232,120,194,236,14
ror r14d,9
xor r12d,r10d
vpsrld xmm4,xmm4,3
ror r13d,5
xor r14d,eax
vpaddd xmm2,xmm2,xmm7
and r12d,r8d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((128-128))+rdi]
xor r13d,r8d
add r11d,DWORD[32+rsp]
mov r15d,eax
DB 143,232,120,194,245,11
ror r14d,11
xor r12d,r10d
vpxor xmm4,xmm4,xmm5
xor r15d,ebx
ror r13d,6
add r11d,r12d
and esi,r15d
DB 143,232,120,194,249,13
xor r14d,eax
add r11d,r13d
vpxor xmm4,xmm4,xmm6
xor esi,ebx
add edx,r11d
vpsrld xmm6,xmm1,10
ror r14d,2
add r11d,esi
vpaddd xmm2,xmm2,xmm4
mov r13d,edx
add r14d,r11d
DB 143,232,120,194,239,2
ror r13d,14
mov r11d,r14d
vpxor xmm7,xmm7,xmm6
mov r12d,r8d
xor r13d,edx
ror r14d,9
xor r12d,r9d
vpxor xmm7,xmm7,xmm5
ror r13d,5
xor r14d,r11d
and r12d,edx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((144-128))+rdi]
xor r13d,edx
vpsrldq xmm7,xmm7,8
add r10d,DWORD[36+rsp]
mov esi,r11d
ror r14d,11
xor r12d,r9d
vpaddd xmm2,xmm2,xmm7
xor esi,eax
ror r13d,6
add r10d,r12d
and r15d,esi
DB 143,232,120,194,250,13
xor r14d,r11d
add r10d,r13d
vpsrld xmm6,xmm2,10
xor r15d,eax
add ecx,r10d
DB 143,232,120,194,239,2
ror r14d,2
add r10d,r15d
vpxor xmm7,xmm7,xmm6
mov r13d,ecx
add r14d,r10d
ror r13d,14
mov r10d,r14d
vpxor xmm7,xmm7,xmm5
mov r12d,edx
xor r13d,ecx
ror r14d,9
xor r12d,r8d
vpslldq xmm7,xmm7,8
ror r13d,5
xor r14d,r10d
and r12d,ecx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((160-128))+rdi]
xor r13d,ecx
vpaddd xmm2,xmm2,xmm7
add r9d,DWORD[40+rsp]
mov r15d,r10d
ror r14d,11
xor r12d,r8d
vpaddd xmm6,xmm2,XMMWORD[64+rbp]
xor r15d,r11d
ror r13d,6
add r9d,r12d
and esi,r15d
xor r14d,r10d
add r9d,r13d
xor esi,r11d
add ebx,r9d
ror r14d,2
add r9d,esi
mov r13d,ebx
add r14d,r9d
ror r13d,14
mov r9d,r14d
mov r12d,ecx
xor r13d,ebx
ror r14d,9
xor r12d,edx
ror r13d,5
xor r14d,r9d
and r12d,ebx
vaesenclast xmm11,xmm9,xmm10
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((176-128))+rdi]
xor r13d,ebx
add r8d,DWORD[44+rsp]
mov esi,r9d
ror r14d,11
xor r12d,edx
xor esi,r10d
ror r13d,6
add r8d,r12d
and r15d,esi
xor r14d,r9d
add r8d,r13d
xor r15d,r10d
add eax,r8d
ror r14d,2
add r8d,r15d
mov r13d,eax
add r14d,r8d
vmovdqa XMMWORD[32+rsp],xmm6
vpalignr xmm4,xmm0,xmm3,4
ror r13d,14
mov r8d,r14d
vpalignr xmm7,xmm2,xmm1,4
mov r12d,ebx
xor r13d,eax
DB 143,232,120,194,236,14
ror r14d,9
xor r12d,ecx
vpsrld xmm4,xmm4,3
ror r13d,5
xor r14d,r8d
vpaddd xmm3,xmm3,xmm7
and r12d,eax
vpand xmm8,xmm11,xmm12
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((192-128))+rdi]
xor r13d,eax
add edx,DWORD[48+rsp]
mov r15d,r8d
DB 143,232,120,194,245,11
ror r14d,11
xor r12d,ecx
vpxor xmm4,xmm4,xmm5
xor r15d,r9d
ror r13d,6
add edx,r12d
and esi,r15d
DB 143,232,120,194,250,13
xor r14d,r8d
add edx,r13d
vpxor xmm4,xmm4,xmm6
xor esi,r9d
add r11d,edx
vpsrld xmm6,xmm2,10
ror r14d,2
add edx,esi
vpaddd xmm3,xmm3,xmm4
mov r13d,r11d
add r14d,edx
DB 143,232,120,194,239,2
ror r13d,14
mov edx,r14d
vpxor xmm7,xmm7,xmm6
mov r12d,eax
xor r13d,r11d
ror r14d,9
xor r12d,ebx
vpxor xmm7,xmm7,xmm5
ror r13d,5
xor r14d,edx
and r12d,r11d
vaesenclast xmm11,xmm9,xmm10
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((208-128))+rdi]
xor r13d,r11d
vpsrldq xmm7,xmm7,8
add ecx,DWORD[52+rsp]
mov esi,edx
ror r14d,11
xor r12d,ebx
vpaddd xmm3,xmm3,xmm7
xor esi,r8d
ror r13d,6
add ecx,r12d
and r15d,esi
DB 143,232,120,194,251,13
xor r14d,edx
add ecx,r13d
vpsrld xmm6,xmm3,10
xor r15d,r8d
add r10d,ecx
DB 143,232,120,194,239,2
ror r14d,2
add ecx,r15d
vpxor xmm7,xmm7,xmm6
mov r13d,r10d
add r14d,ecx
ror r13d,14
mov ecx,r14d
vpxor xmm7,xmm7,xmm5
mov r12d,r11d
xor r13d,r10d
ror r14d,9
xor r12d,eax
vpslldq xmm7,xmm7,8
ror r13d,5
xor r14d,ecx
and r12d,r10d
vpand xmm11,xmm11,xmm13
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((224-128))+rdi]
xor r13d,r10d
vpaddd xmm3,xmm3,xmm7
add ebx,DWORD[56+rsp]
mov r15d,ecx
ror r14d,11
xor r12d,eax
vpaddd xmm6,xmm3,XMMWORD[96+rbp]
xor r15d,edx
ror r13d,6
add ebx,r12d
and esi,r15d
xor r14d,ecx
add ebx,r13d
xor esi,edx
add r9d,ebx
ror r14d,2
add ebx,esi
mov r13d,r9d
add r14d,ebx
ror r13d,14
mov ebx,r14d
mov r12d,r10d
xor r13d,r9d
ror r14d,9
xor r12d,r11d
ror r13d,5
xor r14d,ebx
and r12d,r9d
vpor xmm8,xmm8,xmm11
vaesenclast xmm11,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
xor r13d,r9d
add eax,DWORD[60+rsp]
mov esi,ebx
ror r14d,11
xor r12d,r11d
xor esi,ecx
ror r13d,6
add eax,r12d
and r15d,esi
xor r14d,ebx
add eax,r13d
xor r15d,ecx
add r8d,eax
ror r14d,2
add eax,r15d
mov r13d,r8d
add r14d,eax
vmovdqa XMMWORD[48+rsp],xmm6
mov r12,QWORD[((64+0))+rsp]
vpand xmm11,xmm11,xmm14
mov r15,QWORD[((64+8))+rsp]
vpor xmm8,xmm8,xmm11
vmovdqu XMMWORD[r12*1+r15],xmm8
lea r12,[16+r12]
cmp BYTE[131+rbp],0
jne NEAR $L$xop_00_47
vmovdqu xmm9,XMMWORD[r12]
mov QWORD[((64+0))+rsp],r12
ror r13d,14
mov eax,r14d
mov r12d,r9d
xor r13d,r8d
ror r14d,9
xor r12d,r10d
ror r13d,5
xor r14d,eax
and r12d,r8d
vpxor xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((16-128))+rdi]
xor r13d,r8d
add r11d,DWORD[rsp]
mov r15d,eax
ror r14d,11
xor r12d,r10d
xor r15d,ebx
ror r13d,6
add r11d,r12d
and esi,r15d
xor r14d,eax
add r11d,r13d
xor esi,ebx
add edx,r11d
ror r14d,2
add r11d,esi
mov r13d,edx
add r14d,r11d
ror r13d,14
mov r11d,r14d
mov r12d,r8d
xor r13d,edx
ror r14d,9
xor r12d,r9d
ror r13d,5
xor r14d,r11d
and r12d,edx
vpxor xmm9,xmm9,xmm8
xor r13d,edx
add r10d,DWORD[4+rsp]
mov esi,r11d
ror r14d,11
xor r12d,r9d
xor esi,eax
ror r13d,6
add r10d,r12d
and r15d,esi
xor r14d,r11d
add r10d,r13d
xor r15d,eax
add ecx,r10d
ror r14d,2
add r10d,r15d
mov r13d,ecx
add r14d,r10d
ror r13d,14
mov r10d,r14d
mov r12d,edx
xor r13d,ecx
ror r14d,9
xor r12d,r8d
ror r13d,5
xor r14d,r10d
and r12d,ecx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((32-128))+rdi]
xor r13d,ecx
add r9d,DWORD[8+rsp]
mov r15d,r10d
ror r14d,11
xor r12d,r8d
xor r15d,r11d
ror r13d,6
add r9d,r12d
and esi,r15d
xor r14d,r10d
add r9d,r13d
xor esi,r11d
add ebx,r9d
ror r14d,2
add r9d,esi
mov r13d,ebx
add r14d,r9d
ror r13d,14
mov r9d,r14d
mov r12d,ecx
xor r13d,ebx
ror r14d,9
xor r12d,edx
ror r13d,5
xor r14d,r9d
and r12d,ebx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((48-128))+rdi]
xor r13d,ebx
add r8d,DWORD[12+rsp]
mov esi,r9d
ror r14d,11
xor r12d,edx
xor esi,r10d
ror r13d,6
add r8d,r12d
and r15d,esi
xor r14d,r9d
add r8d,r13d
xor r15d,r10d
add eax,r8d
ror r14d,2
add r8d,r15d
mov r13d,eax
add r14d,r8d
ror r13d,14
mov r8d,r14d
mov r12d,ebx
xor r13d,eax
ror r14d,9
xor r12d,ecx
ror r13d,5
xor r14d,r8d
and r12d,eax
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((64-128))+rdi]
xor r13d,eax
add edx,DWORD[16+rsp]
mov r15d,r8d
ror r14d,11
xor r12d,ecx
xor r15d,r9d
ror r13d,6
add edx,r12d
and esi,r15d
xor r14d,r8d
add edx,r13d
xor esi,r9d
add r11d,edx
ror r14d,2
add edx,esi
mov r13d,r11d
add r14d,edx
ror r13d,14
mov edx,r14d
mov r12d,eax
xor r13d,r11d
ror r14d,9
xor r12d,ebx
ror r13d,5
xor r14d,edx
and r12d,r11d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((80-128))+rdi]
xor r13d,r11d
add ecx,DWORD[20+rsp]
mov esi,edx
ror r14d,11
xor r12d,ebx
xor esi,r8d
ror r13d,6
add ecx,r12d
and r15d,esi
xor r14d,edx
add ecx,r13d
xor r15d,r8d
add r10d,ecx
ror r14d,2
add ecx,r15d
mov r13d,r10d
add r14d,ecx
ror r13d,14
mov ecx,r14d
mov r12d,r11d
xor r13d,r10d
ror r14d,9
xor r12d,eax
ror r13d,5
xor r14d,ecx
and r12d,r10d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((96-128))+rdi]
xor r13d,r10d
add ebx,DWORD[24+rsp]
mov r15d,ecx
ror r14d,11
xor r12d,eax
xor r15d,edx
ror r13d,6
add ebx,r12d
and esi,r15d
xor r14d,ecx
add ebx,r13d
xor esi,edx
add r9d,ebx
ror r14d,2
add ebx,esi
mov r13d,r9d
add r14d,ebx
ror r13d,14
mov ebx,r14d
mov r12d,r10d
xor r13d,r9d
ror r14d,9
xor r12d,r11d
ror r13d,5
xor r14d,ebx
and r12d,r9d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((112-128))+rdi]
xor r13d,r9d
add eax,DWORD[28+rsp]
mov esi,ebx
ror r14d,11
xor r12d,r11d
xor esi,ecx
ror r13d,6
add eax,r12d
and r15d,esi
xor r14d,ebx
add eax,r13d
xor r15d,ecx
add r8d,eax
ror r14d,2
add eax,r15d
mov r13d,r8d
add r14d,eax
ror r13d,14
mov eax,r14d
mov r12d,r9d
xor r13d,r8d
ror r14d,9
xor r12d,r10d
ror r13d,5
xor r14d,eax
and r12d,r8d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((128-128))+rdi]
xor r13d,r8d
add r11d,DWORD[32+rsp]
mov r15d,eax
ror r14d,11
xor r12d,r10d
xor r15d,ebx
ror r13d,6
add r11d,r12d
and esi,r15d
xor r14d,eax
add r11d,r13d
xor esi,ebx
add edx,r11d
ror r14d,2
add r11d,esi
mov r13d,edx
add r14d,r11d
ror r13d,14
mov r11d,r14d
mov r12d,r8d
xor r13d,edx
ror r14d,9
xor r12d,r9d
ror r13d,5
xor r14d,r11d
and r12d,edx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((144-128))+rdi]
xor r13d,edx
add r10d,DWORD[36+rsp]
mov esi,r11d
ror r14d,11
xor r12d,r9d
xor esi,eax
ror r13d,6
add r10d,r12d
and r15d,esi
xor r14d,r11d
add r10d,r13d
xor r15d,eax
add ecx,r10d
ror r14d,2
add r10d,r15d
mov r13d,ecx
add r14d,r10d
ror r13d,14
mov r10d,r14d
mov r12d,edx
xor r13d,ecx
ror r14d,9
xor r12d,r8d
ror r13d,5
xor r14d,r10d
and r12d,ecx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((160-128))+rdi]
xor r13d,ecx
add r9d,DWORD[40+rsp]
mov r15d,r10d
ror r14d,11
xor r12d,r8d
xor r15d,r11d
ror r13d,6
add r9d,r12d
and esi,r15d
xor r14d,r10d
add r9d,r13d
xor esi,r11d
add ebx,r9d
ror r14d,2
add r9d,esi
mov r13d,ebx
add r14d,r9d
ror r13d,14
mov r9d,r14d
mov r12d,ecx
xor r13d,ebx
ror r14d,9
xor r12d,edx
ror r13d,5
xor r14d,r9d
and r12d,ebx
vaesenclast xmm11,xmm9,xmm10
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((176-128))+rdi]
xor r13d,ebx
add r8d,DWORD[44+rsp]
mov esi,r9d
ror r14d,11
xor r12d,edx
xor esi,r10d
ror r13d,6
add r8d,r12d
and r15d,esi
xor r14d,r9d
add r8d,r13d
xor r15d,r10d
add eax,r8d
ror r14d,2
add r8d,r15d
mov r13d,eax
add r14d,r8d
ror r13d,14
mov r8d,r14d
mov r12d,ebx
xor r13d,eax
ror r14d,9
xor r12d,ecx
ror r13d,5
xor r14d,r8d
and r12d,eax
vpand xmm8,xmm11,xmm12
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((192-128))+rdi]
xor r13d,eax
add edx,DWORD[48+rsp]
mov r15d,r8d
ror r14d,11
xor r12d,ecx
xor r15d,r9d
ror r13d,6
add edx,r12d
and esi,r15d
xor r14d,r8d
add edx,r13d
xor esi,r9d
add r11d,edx
ror r14d,2
add edx,esi
mov r13d,r11d
add r14d,edx
ror r13d,14
mov edx,r14d
mov r12d,eax
xor r13d,r11d
ror r14d,9
xor r12d,ebx
ror r13d,5
xor r14d,edx
and r12d,r11d
vaesenclast xmm11,xmm9,xmm10
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((208-128))+rdi]
xor r13d,r11d
add ecx,DWORD[52+rsp]
mov esi,edx
ror r14d,11
xor r12d,ebx
xor esi,r8d
ror r13d,6
add ecx,r12d
and r15d,esi
xor r14d,edx
add ecx,r13d
xor r15d,r8d
add r10d,ecx
ror r14d,2
add ecx,r15d
mov r13d,r10d
add r14d,ecx
ror r13d,14
mov ecx,r14d
mov r12d,r11d
xor r13d,r10d
ror r14d,9
xor r12d,eax
ror r13d,5
xor r14d,ecx
and r12d,r10d
vpand xmm11,xmm11,xmm13
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((224-128))+rdi]
xor r13d,r10d
add ebx,DWORD[56+rsp]
mov r15d,ecx
ror r14d,11
xor r12d,eax
xor r15d,edx
ror r13d,6
add ebx,r12d
and esi,r15d
xor r14d,ecx
add ebx,r13d
xor esi,edx
add r9d,ebx
ror r14d,2
add ebx,esi
mov r13d,r9d
add r14d,ebx
ror r13d,14
mov ebx,r14d
mov r12d,r10d
xor r13d,r9d
ror r14d,9
xor r12d,r11d
ror r13d,5
xor r14d,ebx
and r12d,r9d
vpor xmm8,xmm8,xmm11
vaesenclast xmm11,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
xor r13d,r9d
add eax,DWORD[60+rsp]
mov esi,ebx
ror r14d,11
xor r12d,r11d
xor esi,ecx
ror r13d,6
add eax,r12d
and r15d,esi
xor r14d,ebx
add eax,r13d
xor r15d,ecx
add r8d,eax
ror r14d,2
add eax,r15d
mov r13d,r8d
add r14d,eax
mov r12,QWORD[((64+0))+rsp]
mov r13,QWORD[((64+8))+rsp]
mov r15,QWORD[((64+40))+rsp]
mov rsi,QWORD[((64+48))+rsp]
vpand xmm11,xmm11,xmm14
mov eax,r14d
vpor xmm8,xmm8,xmm11
vmovdqu XMMWORD[r13*1+r12],xmm8
lea r12,[16+r12]
add eax,DWORD[r15]
add ebx,DWORD[4+r15]
add ecx,DWORD[8+r15]
add edx,DWORD[12+r15]
add r8d,DWORD[16+r15]
add r9d,DWORD[20+r15]
add r10d,DWORD[24+r15]
add r11d,DWORD[28+r15]
cmp r12,QWORD[((64+16))+rsp]
mov DWORD[r15],eax
mov DWORD[4+r15],ebx
mov DWORD[8+r15],ecx
mov DWORD[12+r15],edx
mov DWORD[16+r15],r8d
mov DWORD[20+r15],r9d
mov DWORD[24+r15],r10d
mov DWORD[28+r15],r11d
jb NEAR $L$loop_xop
mov r8,QWORD[((64+32))+rsp]
mov rsi,QWORD[120+rsp]
vmovdqu XMMWORD[r8],xmm8
vzeroall
movaps xmm6,XMMWORD[128+rsp]
movaps xmm7,XMMWORD[144+rsp]
movaps xmm8,XMMWORD[160+rsp]
movaps xmm9,XMMWORD[176+rsp]
movaps xmm10,XMMWORD[192+rsp]
movaps xmm11,XMMWORD[208+rsp]
movaps xmm12,XMMWORD[224+rsp]
movaps xmm13,XMMWORD[240+rsp]
movaps xmm14,XMMWORD[256+rsp]
movaps xmm15,XMMWORD[272+rsp]
mov r15,QWORD[((-48))+rsi]
mov r14,QWORD[((-40))+rsi]
mov r13,QWORD[((-32))+rsi]
mov r12,QWORD[((-24))+rsi]
mov rbp,QWORD[((-16))+rsi]
mov rbx,QWORD[((-8))+rsi]
lea rsp,[rsi]
$L$epilogue_xop:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_aesni_cbc_sha256_enc_xop:
ALIGN 64
aesni_cbc_sha256_enc_avx:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_aesni_cbc_sha256_enc_avx:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
mov r9,QWORD[48+rsp]
$L$avx_shortcut:
mov r10,QWORD[56+rsp]
mov rax,rsp
push rbx
push rbp
push r12
push r13
push r14
push r15
sub rsp,288
and rsp,-64
shl rdx,6
sub rsi,rdi
sub r10,rdi
add rdx,rdi
mov QWORD[((64+8))+rsp],rsi
mov QWORD[((64+16))+rsp],rdx
mov QWORD[((64+32))+rsp],r8
mov QWORD[((64+40))+rsp],r9
mov QWORD[((64+48))+rsp],r10
mov QWORD[120+rsp],rax
movaps XMMWORD[128+rsp],xmm6
movaps XMMWORD[144+rsp],xmm7
movaps XMMWORD[160+rsp],xmm8
movaps XMMWORD[176+rsp],xmm9
movaps XMMWORD[192+rsp],xmm10
movaps XMMWORD[208+rsp],xmm11
movaps XMMWORD[224+rsp],xmm12
movaps XMMWORD[240+rsp],xmm13
movaps XMMWORD[256+rsp],xmm14
movaps XMMWORD[272+rsp],xmm15
$L$prologue_avx:
vzeroall
mov r12,rdi
lea rdi,[128+rcx]
lea r13,[((K256+544))]
mov r14d,DWORD[((240-128))+rdi]
mov r15,r9
mov rsi,r10
vmovdqu xmm8,XMMWORD[r8]
sub r14,9
mov eax,DWORD[r15]
mov ebx,DWORD[4+r15]
mov ecx,DWORD[8+r15]
mov edx,DWORD[12+r15]
mov r8d,DWORD[16+r15]
mov r9d,DWORD[20+r15]
mov r10d,DWORD[24+r15]
mov r11d,DWORD[28+r15]
vmovdqa xmm14,XMMWORD[r14*8+r13]
vmovdqa xmm13,XMMWORD[16+r14*8+r13]
vmovdqa xmm12,XMMWORD[32+r14*8+r13]
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
jmp NEAR $L$loop_avx
ALIGN 16
$L$loop_avx:
vmovdqa xmm7,XMMWORD[((K256+512))]
vmovdqu xmm0,XMMWORD[r12*1+rsi]
vmovdqu xmm1,XMMWORD[16+r12*1+rsi]
vmovdqu xmm2,XMMWORD[32+r12*1+rsi]
vmovdqu xmm3,XMMWORD[48+r12*1+rsi]
vpshufb xmm0,xmm0,xmm7
lea rbp,[K256]
vpshufb xmm1,xmm1,xmm7
vpshufb xmm2,xmm2,xmm7
vpaddd xmm4,xmm0,XMMWORD[rbp]
vpshufb xmm3,xmm3,xmm7
vpaddd xmm5,xmm1,XMMWORD[32+rbp]
vpaddd xmm6,xmm2,XMMWORD[64+rbp]
vpaddd xmm7,xmm3,XMMWORD[96+rbp]
vmovdqa XMMWORD[rsp],xmm4
mov r14d,eax
vmovdqa XMMWORD[16+rsp],xmm5
mov esi,ebx
vmovdqa XMMWORD[32+rsp],xmm6
xor esi,ecx
vmovdqa XMMWORD[48+rsp],xmm7
mov r13d,r8d
jmp NEAR $L$avx_00_47
ALIGN 16
$L$avx_00_47:
sub rbp,-16*2*4
vmovdqu xmm9,XMMWORD[r12]
mov QWORD[((64+0))+rsp],r12
vpalignr xmm4,xmm1,xmm0,4
shrd r13d,r13d,14
mov eax,r14d
mov r12d,r9d
vpalignr xmm7,xmm3,xmm2,4
xor r13d,r8d
shrd r14d,r14d,9
xor r12d,r10d
vpsrld xmm6,xmm4,7
shrd r13d,r13d,5
xor r14d,eax
and r12d,r8d
vpaddd xmm0,xmm0,xmm7
vpxor xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((16-128))+rdi]
xor r13d,r8d
add r11d,DWORD[rsp]
mov r15d,eax
vpsrld xmm7,xmm4,3
shrd r14d,r14d,11
xor r12d,r10d
xor r15d,ebx
vpslld xmm5,xmm4,14
shrd r13d,r13d,6
add r11d,r12d
and esi,r15d
vpxor xmm4,xmm7,xmm6
xor r14d,eax
add r11d,r13d
xor esi,ebx
vpshufd xmm7,xmm3,250
add edx,r11d
shrd r14d,r14d,2
add r11d,esi
vpsrld xmm6,xmm6,11
mov r13d,edx
add r14d,r11d
shrd r13d,r13d,14
vpxor xmm4,xmm4,xmm5
mov r11d,r14d
mov r12d,r8d
xor r13d,edx
vpslld xmm5,xmm5,11
shrd r14d,r14d,9
xor r12d,r9d
shrd r13d,r13d,5
vpxor xmm4,xmm4,xmm6
xor r14d,r11d
and r12d,edx
vpxor xmm9,xmm9,xmm8
xor r13d,edx
vpsrld xmm6,xmm7,10
add r10d,DWORD[4+rsp]
mov esi,r11d
shrd r14d,r14d,11
vpxor xmm4,xmm4,xmm5
xor r12d,r9d
xor esi,eax
shrd r13d,r13d,6
vpsrlq xmm7,xmm7,17
add r10d,r12d
and r15d,esi
xor r14d,r11d
vpaddd xmm0,xmm0,xmm4
add r10d,r13d
xor r15d,eax
add ecx,r10d
vpxor xmm6,xmm6,xmm7
shrd r14d,r14d,2
add r10d,r15d
mov r13d,ecx
vpsrlq xmm7,xmm7,2
add r14d,r10d
shrd r13d,r13d,14
mov r10d,r14d
vpxor xmm6,xmm6,xmm7
mov r12d,edx
xor r13d,ecx
shrd r14d,r14d,9
vpshufd xmm6,xmm6,132
xor r12d,r8d
shrd r13d,r13d,5
xor r14d,r10d
vpsrldq xmm6,xmm6,8
and r12d,ecx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((32-128))+rdi]
xor r13d,ecx
add r9d,DWORD[8+rsp]
vpaddd xmm0,xmm0,xmm6
mov r15d,r10d
shrd r14d,r14d,11
xor r12d,r8d
vpshufd xmm7,xmm0,80
xor r15d,r11d
shrd r13d,r13d,6
add r9d,r12d
vpsrld xmm6,xmm7,10
and esi,r15d
xor r14d,r10d
add r9d,r13d
vpsrlq xmm7,xmm7,17
xor esi,r11d
add ebx,r9d
shrd r14d,r14d,2
vpxor xmm6,xmm6,xmm7
add r9d,esi
mov r13d,ebx
add r14d,r9d
vpsrlq xmm7,xmm7,2
shrd r13d,r13d,14
mov r9d,r14d
mov r12d,ecx
vpxor xmm6,xmm6,xmm7
xor r13d,ebx
shrd r14d,r14d,9
xor r12d,edx
vpshufd xmm6,xmm6,232
shrd r13d,r13d,5
xor r14d,r9d
and r12d,ebx
vpslldq xmm6,xmm6,8
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((48-128))+rdi]
xor r13d,ebx
add r8d,DWORD[12+rsp]
mov esi,r9d
vpaddd xmm0,xmm0,xmm6
shrd r14d,r14d,11
xor r12d,edx
xor esi,r10d
vpaddd xmm6,xmm0,XMMWORD[rbp]
shrd r13d,r13d,6
add r8d,r12d
and r15d,esi
xor r14d,r9d
add r8d,r13d
xor r15d,r10d
add eax,r8d
shrd r14d,r14d,2
add r8d,r15d
mov r13d,eax
add r14d,r8d
vmovdqa XMMWORD[rsp],xmm6
vpalignr xmm4,xmm2,xmm1,4
shrd r13d,r13d,14
mov r8d,r14d
mov r12d,ebx
vpalignr xmm7,xmm0,xmm3,4
xor r13d,eax
shrd r14d,r14d,9
xor r12d,ecx
vpsrld xmm6,xmm4,7
shrd r13d,r13d,5
xor r14d,r8d
and r12d,eax
vpaddd xmm1,xmm1,xmm7
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((64-128))+rdi]
xor r13d,eax
add edx,DWORD[16+rsp]
mov r15d,r8d
vpsrld xmm7,xmm4,3
shrd r14d,r14d,11
xor r12d,ecx
xor r15d,r9d
vpslld xmm5,xmm4,14
shrd r13d,r13d,6
add edx,r12d
and esi,r15d
vpxor xmm4,xmm7,xmm6
xor r14d,r8d
add edx,r13d
xor esi,r9d
vpshufd xmm7,xmm0,250
add r11d,edx
shrd r14d,r14d,2
add edx,esi
vpsrld xmm6,xmm6,11
mov r13d,r11d
add r14d,edx
shrd r13d,r13d,14
vpxor xmm4,xmm4,xmm5
mov edx,r14d
mov r12d,eax
xor r13d,r11d
vpslld xmm5,xmm5,11
shrd r14d,r14d,9
xor r12d,ebx
shrd r13d,r13d,5
vpxor xmm4,xmm4,xmm6
xor r14d,edx
and r12d,r11d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((80-128))+rdi]
xor r13d,r11d
vpsrld xmm6,xmm7,10
add ecx,DWORD[20+rsp]
mov esi,edx
shrd r14d,r14d,11
vpxor xmm4,xmm4,xmm5
xor r12d,ebx
xor esi,r8d
shrd r13d,r13d,6
vpsrlq xmm7,xmm7,17
add ecx,r12d
and r15d,esi
xor r14d,edx
vpaddd xmm1,xmm1,xmm4
add ecx,r13d
xor r15d,r8d
add r10d,ecx
vpxor xmm6,xmm6,xmm7
shrd r14d,r14d,2
add ecx,r15d
mov r13d,r10d
vpsrlq xmm7,xmm7,2
add r14d,ecx
shrd r13d,r13d,14
mov ecx,r14d
vpxor xmm6,xmm6,xmm7
mov r12d,r11d
xor r13d,r10d
shrd r14d,r14d,9
vpshufd xmm6,xmm6,132
xor r12d,eax
shrd r13d,r13d,5
xor r14d,ecx
vpsrldq xmm6,xmm6,8
and r12d,r10d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((96-128))+rdi]
xor r13d,r10d
add ebx,DWORD[24+rsp]
vpaddd xmm1,xmm1,xmm6
mov r15d,ecx
shrd r14d,r14d,11
xor r12d,eax
vpshufd xmm7,xmm1,80
xor r15d,edx
shrd r13d,r13d,6
add ebx,r12d
vpsrld xmm6,xmm7,10
and esi,r15d
xor r14d,ecx
add ebx,r13d
vpsrlq xmm7,xmm7,17
xor esi,edx
add r9d,ebx
shrd r14d,r14d,2
vpxor xmm6,xmm6,xmm7
add ebx,esi
mov r13d,r9d
add r14d,ebx
vpsrlq xmm7,xmm7,2
shrd r13d,r13d,14
mov ebx,r14d
mov r12d,r10d
vpxor xmm6,xmm6,xmm7
xor r13d,r9d
shrd r14d,r14d,9
xor r12d,r11d
vpshufd xmm6,xmm6,232
shrd r13d,r13d,5
xor r14d,ebx
and r12d,r9d
vpslldq xmm6,xmm6,8
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((112-128))+rdi]
xor r13d,r9d
add eax,DWORD[28+rsp]
mov esi,ebx
vpaddd xmm1,xmm1,xmm6
shrd r14d,r14d,11
xor r12d,r11d
xor esi,ecx
vpaddd xmm6,xmm1,XMMWORD[32+rbp]
shrd r13d,r13d,6
add eax,r12d
and r15d,esi
xor r14d,ebx
add eax,r13d
xor r15d,ecx
add r8d,eax
shrd r14d,r14d,2
add eax,r15d
mov r13d,r8d
add r14d,eax
vmovdqa XMMWORD[16+rsp],xmm6
vpalignr xmm4,xmm3,xmm2,4
shrd r13d,r13d,14
mov eax,r14d
mov r12d,r9d
vpalignr xmm7,xmm1,xmm0,4
xor r13d,r8d
shrd r14d,r14d,9
xor r12d,r10d
vpsrld xmm6,xmm4,7
shrd r13d,r13d,5
xor r14d,eax
and r12d,r8d
vpaddd xmm2,xmm2,xmm7
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((128-128))+rdi]
xor r13d,r8d
add r11d,DWORD[32+rsp]
mov r15d,eax
vpsrld xmm7,xmm4,3
shrd r14d,r14d,11
xor r12d,r10d
xor r15d,ebx
vpslld xmm5,xmm4,14
shrd r13d,r13d,6
add r11d,r12d
and esi,r15d
vpxor xmm4,xmm7,xmm6
xor r14d,eax
add r11d,r13d
xor esi,ebx
vpshufd xmm7,xmm1,250
add edx,r11d
shrd r14d,r14d,2
add r11d,esi
vpsrld xmm6,xmm6,11
mov r13d,edx
add r14d,r11d
shrd r13d,r13d,14
vpxor xmm4,xmm4,xmm5
mov r11d,r14d
mov r12d,r8d
xor r13d,edx
vpslld xmm5,xmm5,11
shrd r14d,r14d,9
xor r12d,r9d
shrd r13d,r13d,5
vpxor xmm4,xmm4,xmm6
xor r14d,r11d
and r12d,edx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((144-128))+rdi]
xor r13d,edx
vpsrld xmm6,xmm7,10
add r10d,DWORD[36+rsp]
mov esi,r11d
shrd r14d,r14d,11
vpxor xmm4,xmm4,xmm5
xor r12d,r9d
xor esi,eax
shrd r13d,r13d,6
vpsrlq xmm7,xmm7,17
add r10d,r12d
and r15d,esi
xor r14d,r11d
vpaddd xmm2,xmm2,xmm4
add r10d,r13d
xor r15d,eax
add ecx,r10d
vpxor xmm6,xmm6,xmm7
shrd r14d,r14d,2
add r10d,r15d
mov r13d,ecx
vpsrlq xmm7,xmm7,2
add r14d,r10d
shrd r13d,r13d,14
mov r10d,r14d
vpxor xmm6,xmm6,xmm7
mov r12d,edx
xor r13d,ecx
shrd r14d,r14d,9
vpshufd xmm6,xmm6,132
xor r12d,r8d
shrd r13d,r13d,5
xor r14d,r10d
vpsrldq xmm6,xmm6,8
and r12d,ecx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((160-128))+rdi]
xor r13d,ecx
add r9d,DWORD[40+rsp]
vpaddd xmm2,xmm2,xmm6
mov r15d,r10d
shrd r14d,r14d,11
xor r12d,r8d
vpshufd xmm7,xmm2,80
xor r15d,r11d
shrd r13d,r13d,6
add r9d,r12d
vpsrld xmm6,xmm7,10
and esi,r15d
xor r14d,r10d
add r9d,r13d
vpsrlq xmm7,xmm7,17
xor esi,r11d
add ebx,r9d
shrd r14d,r14d,2
vpxor xmm6,xmm6,xmm7
add r9d,esi
mov r13d,ebx
add r14d,r9d
vpsrlq xmm7,xmm7,2
shrd r13d,r13d,14
mov r9d,r14d
mov r12d,ecx
vpxor xmm6,xmm6,xmm7
xor r13d,ebx
shrd r14d,r14d,9
xor r12d,edx
vpshufd xmm6,xmm6,232
shrd r13d,r13d,5
xor r14d,r9d
and r12d,ebx
vpslldq xmm6,xmm6,8
vaesenclast xmm11,xmm9,xmm10
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((176-128))+rdi]
xor r13d,ebx
add r8d,DWORD[44+rsp]
mov esi,r9d
vpaddd xmm2,xmm2,xmm6
shrd r14d,r14d,11
xor r12d,edx
xor esi,r10d
vpaddd xmm6,xmm2,XMMWORD[64+rbp]
shrd r13d,r13d,6
add r8d,r12d
and r15d,esi
xor r14d,r9d
add r8d,r13d
xor r15d,r10d
add eax,r8d
shrd r14d,r14d,2
add r8d,r15d
mov r13d,eax
add r14d,r8d
vmovdqa XMMWORD[32+rsp],xmm6
vpalignr xmm4,xmm0,xmm3,4
shrd r13d,r13d,14
mov r8d,r14d
mov r12d,ebx
vpalignr xmm7,xmm2,xmm1,4
xor r13d,eax
shrd r14d,r14d,9
xor r12d,ecx
vpsrld xmm6,xmm4,7
shrd r13d,r13d,5
xor r14d,r8d
and r12d,eax
vpaddd xmm3,xmm3,xmm7
vpand xmm8,xmm11,xmm12
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((192-128))+rdi]
xor r13d,eax
add edx,DWORD[48+rsp]
mov r15d,r8d
vpsrld xmm7,xmm4,3
shrd r14d,r14d,11
xor r12d,ecx
xor r15d,r9d
vpslld xmm5,xmm4,14
shrd r13d,r13d,6
add edx,r12d
and esi,r15d
vpxor xmm4,xmm7,xmm6
xor r14d,r8d
add edx,r13d
xor esi,r9d
vpshufd xmm7,xmm2,250
add r11d,edx
shrd r14d,r14d,2
add edx,esi
vpsrld xmm6,xmm6,11
mov r13d,r11d
add r14d,edx
shrd r13d,r13d,14
vpxor xmm4,xmm4,xmm5
mov edx,r14d
mov r12d,eax
xor r13d,r11d
vpslld xmm5,xmm5,11
shrd r14d,r14d,9
xor r12d,ebx
shrd r13d,r13d,5
vpxor xmm4,xmm4,xmm6
xor r14d,edx
and r12d,r11d
vaesenclast xmm11,xmm9,xmm10
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((208-128))+rdi]
xor r13d,r11d
vpsrld xmm6,xmm7,10
add ecx,DWORD[52+rsp]
mov esi,edx
shrd r14d,r14d,11
vpxor xmm4,xmm4,xmm5
xor r12d,ebx
xor esi,r8d
shrd r13d,r13d,6
vpsrlq xmm7,xmm7,17
add ecx,r12d
and r15d,esi
xor r14d,edx
vpaddd xmm3,xmm3,xmm4
add ecx,r13d
xor r15d,r8d
add r10d,ecx
vpxor xmm6,xmm6,xmm7
shrd r14d,r14d,2
add ecx,r15d
mov r13d,r10d
vpsrlq xmm7,xmm7,2
add r14d,ecx
shrd r13d,r13d,14
mov ecx,r14d
vpxor xmm6,xmm6,xmm7
mov r12d,r11d
xor r13d,r10d
shrd r14d,r14d,9
vpshufd xmm6,xmm6,132
xor r12d,eax
shrd r13d,r13d,5
xor r14d,ecx
vpsrldq xmm6,xmm6,8
and r12d,r10d
vpand xmm11,xmm11,xmm13
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((224-128))+rdi]
xor r13d,r10d
add ebx,DWORD[56+rsp]
vpaddd xmm3,xmm3,xmm6
mov r15d,ecx
shrd r14d,r14d,11
xor r12d,eax
vpshufd xmm7,xmm3,80
xor r15d,edx
shrd r13d,r13d,6
add ebx,r12d
vpsrld xmm6,xmm7,10
and esi,r15d
xor r14d,ecx
add ebx,r13d
vpsrlq xmm7,xmm7,17
xor esi,edx
add r9d,ebx
shrd r14d,r14d,2
vpxor xmm6,xmm6,xmm7
add ebx,esi
mov r13d,r9d
add r14d,ebx
vpsrlq xmm7,xmm7,2
shrd r13d,r13d,14
mov ebx,r14d
mov r12d,r10d
vpxor xmm6,xmm6,xmm7
xor r13d,r9d
shrd r14d,r14d,9
xor r12d,r11d
vpshufd xmm6,xmm6,232
shrd r13d,r13d,5
xor r14d,ebx
and r12d,r9d
vpslldq xmm6,xmm6,8
vpor xmm8,xmm8,xmm11
vaesenclast xmm11,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
xor r13d,r9d
add eax,DWORD[60+rsp]
mov esi,ebx
vpaddd xmm3,xmm3,xmm6
shrd r14d,r14d,11
xor r12d,r11d
xor esi,ecx
vpaddd xmm6,xmm3,XMMWORD[96+rbp]
shrd r13d,r13d,6
add eax,r12d
and r15d,esi
xor r14d,ebx
add eax,r13d
xor r15d,ecx
add r8d,eax
shrd r14d,r14d,2
add eax,r15d
mov r13d,r8d
add r14d,eax
vmovdqa XMMWORD[48+rsp],xmm6
mov r12,QWORD[((64+0))+rsp]
vpand xmm11,xmm11,xmm14
mov r15,QWORD[((64+8))+rsp]
vpor xmm8,xmm8,xmm11
vmovdqu XMMWORD[r12*1+r15],xmm8
lea r12,[16+r12]
cmp BYTE[131+rbp],0
jne NEAR $L$avx_00_47
vmovdqu xmm9,XMMWORD[r12]
mov QWORD[((64+0))+rsp],r12
shrd r13d,r13d,14
mov eax,r14d
mov r12d,r9d
xor r13d,r8d
shrd r14d,r14d,9
xor r12d,r10d
shrd r13d,r13d,5
xor r14d,eax
and r12d,r8d
vpxor xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((16-128))+rdi]
xor r13d,r8d
add r11d,DWORD[rsp]
mov r15d,eax
shrd r14d,r14d,11
xor r12d,r10d
xor r15d,ebx
shrd r13d,r13d,6
add r11d,r12d
and esi,r15d
xor r14d,eax
add r11d,r13d
xor esi,ebx
add edx,r11d
shrd r14d,r14d,2
add r11d,esi
mov r13d,edx
add r14d,r11d
shrd r13d,r13d,14
mov r11d,r14d
mov r12d,r8d
xor r13d,edx
shrd r14d,r14d,9
xor r12d,r9d
shrd r13d,r13d,5
xor r14d,r11d
and r12d,edx
vpxor xmm9,xmm9,xmm8
xor r13d,edx
add r10d,DWORD[4+rsp]
mov esi,r11d
shrd r14d,r14d,11
xor r12d,r9d
xor esi,eax
shrd r13d,r13d,6
add r10d,r12d
and r15d,esi
xor r14d,r11d
add r10d,r13d
xor r15d,eax
add ecx,r10d
shrd r14d,r14d,2
add r10d,r15d
mov r13d,ecx
add r14d,r10d
shrd r13d,r13d,14
mov r10d,r14d
mov r12d,edx
xor r13d,ecx
shrd r14d,r14d,9
xor r12d,r8d
shrd r13d,r13d,5
xor r14d,r10d
and r12d,ecx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((32-128))+rdi]
xor r13d,ecx
add r9d,DWORD[8+rsp]
mov r15d,r10d
shrd r14d,r14d,11
xor r12d,r8d
xor r15d,r11d
shrd r13d,r13d,6
add r9d,r12d
and esi,r15d
xor r14d,r10d
add r9d,r13d
xor esi,r11d
add ebx,r9d
shrd r14d,r14d,2
add r9d,esi
mov r13d,ebx
add r14d,r9d
shrd r13d,r13d,14
mov r9d,r14d
mov r12d,ecx
xor r13d,ebx
shrd r14d,r14d,9
xor r12d,edx
shrd r13d,r13d,5
xor r14d,r9d
and r12d,ebx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((48-128))+rdi]
xor r13d,ebx
add r8d,DWORD[12+rsp]
mov esi,r9d
shrd r14d,r14d,11
xor r12d,edx
xor esi,r10d
shrd r13d,r13d,6
add r8d,r12d
and r15d,esi
xor r14d,r9d
add r8d,r13d
xor r15d,r10d
add eax,r8d
shrd r14d,r14d,2
add r8d,r15d
mov r13d,eax
add r14d,r8d
shrd r13d,r13d,14
mov r8d,r14d
mov r12d,ebx
xor r13d,eax
shrd r14d,r14d,9
xor r12d,ecx
shrd r13d,r13d,5
xor r14d,r8d
and r12d,eax
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((64-128))+rdi]
xor r13d,eax
add edx,DWORD[16+rsp]
mov r15d,r8d
shrd r14d,r14d,11
xor r12d,ecx
xor r15d,r9d
shrd r13d,r13d,6
add edx,r12d
and esi,r15d
xor r14d,r8d
add edx,r13d
xor esi,r9d
add r11d,edx
shrd r14d,r14d,2
add edx,esi
mov r13d,r11d
add r14d,edx
shrd r13d,r13d,14
mov edx,r14d
mov r12d,eax
xor r13d,r11d
shrd r14d,r14d,9
xor r12d,ebx
shrd r13d,r13d,5
xor r14d,edx
and r12d,r11d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((80-128))+rdi]
xor r13d,r11d
add ecx,DWORD[20+rsp]
mov esi,edx
shrd r14d,r14d,11
xor r12d,ebx
xor esi,r8d
shrd r13d,r13d,6
add ecx,r12d
and r15d,esi
xor r14d,edx
add ecx,r13d
xor r15d,r8d
add r10d,ecx
shrd r14d,r14d,2
add ecx,r15d
mov r13d,r10d
add r14d,ecx
shrd r13d,r13d,14
mov ecx,r14d
mov r12d,r11d
xor r13d,r10d
shrd r14d,r14d,9
xor r12d,eax
shrd r13d,r13d,5
xor r14d,ecx
and r12d,r10d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((96-128))+rdi]
xor r13d,r10d
add ebx,DWORD[24+rsp]
mov r15d,ecx
shrd r14d,r14d,11
xor r12d,eax
xor r15d,edx
shrd r13d,r13d,6
add ebx,r12d
and esi,r15d
xor r14d,ecx
add ebx,r13d
xor esi,edx
add r9d,ebx
shrd r14d,r14d,2
add ebx,esi
mov r13d,r9d
add r14d,ebx
shrd r13d,r13d,14
mov ebx,r14d
mov r12d,r10d
xor r13d,r9d
shrd r14d,r14d,9
xor r12d,r11d
shrd r13d,r13d,5
xor r14d,ebx
and r12d,r9d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((112-128))+rdi]
xor r13d,r9d
add eax,DWORD[28+rsp]
mov esi,ebx
shrd r14d,r14d,11
xor r12d,r11d
xor esi,ecx
shrd r13d,r13d,6
add eax,r12d
and r15d,esi
xor r14d,ebx
add eax,r13d
xor r15d,ecx
add r8d,eax
shrd r14d,r14d,2
add eax,r15d
mov r13d,r8d
add r14d,eax
shrd r13d,r13d,14
mov eax,r14d
mov r12d,r9d
xor r13d,r8d
shrd r14d,r14d,9
xor r12d,r10d
shrd r13d,r13d,5
xor r14d,eax
and r12d,r8d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((128-128))+rdi]
xor r13d,r8d
add r11d,DWORD[32+rsp]
mov r15d,eax
shrd r14d,r14d,11
xor r12d,r10d
xor r15d,ebx
shrd r13d,r13d,6
add r11d,r12d
and esi,r15d
xor r14d,eax
add r11d,r13d
xor esi,ebx
add edx,r11d
shrd r14d,r14d,2
add r11d,esi
mov r13d,edx
add r14d,r11d
shrd r13d,r13d,14
mov r11d,r14d
mov r12d,r8d
xor r13d,edx
shrd r14d,r14d,9
xor r12d,r9d
shrd r13d,r13d,5
xor r14d,r11d
and r12d,edx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((144-128))+rdi]
xor r13d,edx
add r10d,DWORD[36+rsp]
mov esi,r11d
shrd r14d,r14d,11
xor r12d,r9d
xor esi,eax
shrd r13d,r13d,6
add r10d,r12d
and r15d,esi
xor r14d,r11d
add r10d,r13d
xor r15d,eax
add ecx,r10d
shrd r14d,r14d,2
add r10d,r15d
mov r13d,ecx
add r14d,r10d
shrd r13d,r13d,14
mov r10d,r14d
mov r12d,edx
xor r13d,ecx
shrd r14d,r14d,9
xor r12d,r8d
shrd r13d,r13d,5
xor r14d,r10d
and r12d,ecx
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((160-128))+rdi]
xor r13d,ecx
add r9d,DWORD[40+rsp]
mov r15d,r10d
shrd r14d,r14d,11
xor r12d,r8d
xor r15d,r11d
shrd r13d,r13d,6
add r9d,r12d
and esi,r15d
xor r14d,r10d
add r9d,r13d
xor esi,r11d
add ebx,r9d
shrd r14d,r14d,2
add r9d,esi
mov r13d,ebx
add r14d,r9d
shrd r13d,r13d,14
mov r9d,r14d
mov r12d,ecx
xor r13d,ebx
shrd r14d,r14d,9
xor r12d,edx
shrd r13d,r13d,5
xor r14d,r9d
and r12d,ebx
vaesenclast xmm11,xmm9,xmm10
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((176-128))+rdi]
xor r13d,ebx
add r8d,DWORD[44+rsp]
mov esi,r9d
shrd r14d,r14d,11
xor r12d,edx
xor esi,r10d
shrd r13d,r13d,6
add r8d,r12d
and r15d,esi
xor r14d,r9d
add r8d,r13d
xor r15d,r10d
add eax,r8d
shrd r14d,r14d,2
add r8d,r15d
mov r13d,eax
add r14d,r8d
shrd r13d,r13d,14
mov r8d,r14d
mov r12d,ebx
xor r13d,eax
shrd r14d,r14d,9
xor r12d,ecx
shrd r13d,r13d,5
xor r14d,r8d
and r12d,eax
vpand xmm8,xmm11,xmm12
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((192-128))+rdi]
xor r13d,eax
add edx,DWORD[48+rsp]
mov r15d,r8d
shrd r14d,r14d,11
xor r12d,ecx
xor r15d,r9d
shrd r13d,r13d,6
add edx,r12d
and esi,r15d
xor r14d,r8d
add edx,r13d
xor esi,r9d
add r11d,edx
shrd r14d,r14d,2
add edx,esi
mov r13d,r11d
add r14d,edx
shrd r13d,r13d,14
mov edx,r14d
mov r12d,eax
xor r13d,r11d
shrd r14d,r14d,9
xor r12d,ebx
shrd r13d,r13d,5
xor r14d,edx
and r12d,r11d
vaesenclast xmm11,xmm9,xmm10
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((208-128))+rdi]
xor r13d,r11d
add ecx,DWORD[52+rsp]
mov esi,edx
shrd r14d,r14d,11
xor r12d,ebx
xor esi,r8d
shrd r13d,r13d,6
add ecx,r12d
and r15d,esi
xor r14d,edx
add ecx,r13d
xor r15d,r8d
add r10d,ecx
shrd r14d,r14d,2
add ecx,r15d
mov r13d,r10d
add r14d,ecx
shrd r13d,r13d,14
mov ecx,r14d
mov r12d,r11d
xor r13d,r10d
shrd r14d,r14d,9
xor r12d,eax
shrd r13d,r13d,5
xor r14d,ecx
and r12d,r10d
vpand xmm11,xmm11,xmm13
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((224-128))+rdi]
xor r13d,r10d
add ebx,DWORD[56+rsp]
mov r15d,ecx
shrd r14d,r14d,11
xor r12d,eax
xor r15d,edx
shrd r13d,r13d,6
add ebx,r12d
and esi,r15d
xor r14d,ecx
add ebx,r13d
xor esi,edx
add r9d,ebx
shrd r14d,r14d,2
add ebx,esi
mov r13d,r9d
add r14d,ebx
shrd r13d,r13d,14
mov ebx,r14d
mov r12d,r10d
xor r13d,r9d
shrd r14d,r14d,9
xor r12d,r11d
shrd r13d,r13d,5
xor r14d,ebx
and r12d,r9d
vpor xmm8,xmm8,xmm11
vaesenclast xmm11,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
xor r13d,r9d
add eax,DWORD[60+rsp]
mov esi,ebx
shrd r14d,r14d,11
xor r12d,r11d
xor esi,ecx
shrd r13d,r13d,6
add eax,r12d
and r15d,esi
xor r14d,ebx
add eax,r13d
xor r15d,ecx
add r8d,eax
shrd r14d,r14d,2
add eax,r15d
mov r13d,r8d
add r14d,eax
mov r12,QWORD[((64+0))+rsp]
mov r13,QWORD[((64+8))+rsp]
mov r15,QWORD[((64+40))+rsp]
mov rsi,QWORD[((64+48))+rsp]
vpand xmm11,xmm11,xmm14
mov eax,r14d
vpor xmm8,xmm8,xmm11
vmovdqu XMMWORD[r13*1+r12],xmm8
lea r12,[16+r12]
add eax,DWORD[r15]
add ebx,DWORD[4+r15]
add ecx,DWORD[8+r15]
add edx,DWORD[12+r15]
add r8d,DWORD[16+r15]
add r9d,DWORD[20+r15]
add r10d,DWORD[24+r15]
add r11d,DWORD[28+r15]
cmp r12,QWORD[((64+16))+rsp]
mov DWORD[r15],eax
mov DWORD[4+r15],ebx
mov DWORD[8+r15],ecx
mov DWORD[12+r15],edx
mov DWORD[16+r15],r8d
mov DWORD[20+r15],r9d
mov DWORD[24+r15],r10d
mov DWORD[28+r15],r11d
jb NEAR $L$loop_avx
mov r8,QWORD[((64+32))+rsp]
mov rsi,QWORD[120+rsp]
vmovdqu XMMWORD[r8],xmm8
vzeroall
movaps xmm6,XMMWORD[128+rsp]
movaps xmm7,XMMWORD[144+rsp]
movaps xmm8,XMMWORD[160+rsp]
movaps xmm9,XMMWORD[176+rsp]
movaps xmm10,XMMWORD[192+rsp]
movaps xmm11,XMMWORD[208+rsp]
movaps xmm12,XMMWORD[224+rsp]
movaps xmm13,XMMWORD[240+rsp]
movaps xmm14,XMMWORD[256+rsp]
movaps xmm15,XMMWORD[272+rsp]
mov r15,QWORD[((-48))+rsi]
mov r14,QWORD[((-40))+rsi]
mov r13,QWORD[((-32))+rsi]
mov r12,QWORD[((-24))+rsi]
mov rbp,QWORD[((-16))+rsi]
mov rbx,QWORD[((-8))+rsi]
lea rsp,[rsi]
$L$epilogue_avx:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_aesni_cbc_sha256_enc_avx:
ALIGN 64
aesni_cbc_sha256_enc_avx2:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_aesni_cbc_sha256_enc_avx2:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
mov r9,QWORD[48+rsp]
$L$avx2_shortcut:
mov r10,QWORD[56+rsp]
mov rax,rsp
push rbx
push rbp
push r12
push r13
push r14
push r15
sub rsp,736
and rsp,-256*4
add rsp,448
shl rdx,6
sub rsi,rdi
sub r10,rdi
add rdx,rdi
mov QWORD[((64+16))+rsp],rdx
mov QWORD[((64+32))+rsp],r8
mov QWORD[((64+40))+rsp],r9
mov QWORD[((64+48))+rsp],r10
mov QWORD[120+rsp],rax
movaps XMMWORD[128+rsp],xmm6
movaps XMMWORD[144+rsp],xmm7
movaps XMMWORD[160+rsp],xmm8
movaps XMMWORD[176+rsp],xmm9
movaps XMMWORD[192+rsp],xmm10
movaps XMMWORD[208+rsp],xmm11
movaps XMMWORD[224+rsp],xmm12
movaps XMMWORD[240+rsp],xmm13
movaps XMMWORD[256+rsp],xmm14
movaps XMMWORD[272+rsp],xmm15
$L$prologue_avx2:
vzeroall
mov r13,rdi
vpinsrq xmm15,xmm15,rsi,1
lea rdi,[128+rcx]
lea r12,[((K256+544))]
mov r14d,DWORD[((240-128))+rdi]
mov r15,r9
mov rsi,r10
vmovdqu xmm8,XMMWORD[r8]
lea r14,[((-9))+r14]
vmovdqa xmm14,XMMWORD[r14*8+r12]
vmovdqa xmm13,XMMWORD[16+r14*8+r12]
vmovdqa xmm12,XMMWORD[32+r14*8+r12]
sub r13,-16*4
mov eax,DWORD[r15]
lea r12,[r13*1+rsi]
mov ebx,DWORD[4+r15]
cmp r13,rdx
mov ecx,DWORD[8+r15]
cmove r12,rsp
mov edx,DWORD[12+r15]
mov r8d,DWORD[16+r15]
mov r9d,DWORD[20+r15]
mov r10d,DWORD[24+r15]
mov r11d,DWORD[28+r15]
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
jmp NEAR $L$oop_avx2
ALIGN 16
$L$oop_avx2:
vmovdqa ymm7,YMMWORD[((K256+512))]
vmovdqu xmm0,XMMWORD[((-64+0))+r13*1+rsi]
vmovdqu xmm1,XMMWORD[((-64+16))+r13*1+rsi]
vmovdqu xmm2,XMMWORD[((-64+32))+r13*1+rsi]
vmovdqu xmm3,XMMWORD[((-64+48))+r13*1+rsi]
vinserti128 ymm0,ymm0,XMMWORD[r12],1
vinserti128 ymm1,ymm1,XMMWORD[16+r12],1
vpshufb ymm0,ymm0,ymm7
vinserti128 ymm2,ymm2,XMMWORD[32+r12],1
vpshufb ymm1,ymm1,ymm7
vinserti128 ymm3,ymm3,XMMWORD[48+r12],1
lea rbp,[K256]
vpshufb ymm2,ymm2,ymm7
lea r13,[((-64))+r13]
vpaddd ymm4,ymm0,YMMWORD[rbp]
vpshufb ymm3,ymm3,ymm7
vpaddd ymm5,ymm1,YMMWORD[32+rbp]
vpaddd ymm6,ymm2,YMMWORD[64+rbp]
vpaddd ymm7,ymm3,YMMWORD[96+rbp]
vmovdqa YMMWORD[rsp],ymm4
xor r14d,r14d
vmovdqa YMMWORD[32+rsp],ymm5
lea rsp,[((-64))+rsp]
mov esi,ebx
vmovdqa YMMWORD[rsp],ymm6
xor esi,ecx
vmovdqa YMMWORD[32+rsp],ymm7
mov r12d,r9d
sub rbp,-16*2*4
jmp NEAR $L$avx2_00_47
ALIGN 16
$L$avx2_00_47:
vmovdqu xmm9,XMMWORD[r13]
vpinsrq xmm15,xmm15,r13,0
lea rsp,[((-64))+rsp]
vpalignr ymm4,ymm1,ymm0,4
add r11d,DWORD[((0+128))+rsp]
and r12d,r8d
rorx r13d,r8d,25
vpalignr ymm7,ymm3,ymm2,4
rorx r15d,r8d,11
lea eax,[r14*1+rax]
lea r11d,[r12*1+r11]
vpsrld ymm6,ymm4,7
andn r12d,r8d,r10d
xor r13d,r15d
rorx r14d,r8d,6
vpaddd ymm0,ymm0,ymm7
lea r11d,[r12*1+r11]
xor r13d,r14d
mov r15d,eax
vpsrld ymm7,ymm4,3
rorx r12d,eax,22
lea r11d,[r13*1+r11]
xor r15d,ebx
vpslld ymm5,ymm4,14
rorx r14d,eax,13
rorx r13d,eax,2
lea edx,[r11*1+rdx]
vpxor ymm4,ymm7,ymm6
and esi,r15d
vpxor xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((16-128))+rdi]
xor r14d,r12d
xor esi,ebx
vpshufd ymm7,ymm3,250
xor r14d,r13d
lea r11d,[rsi*1+r11]
mov r12d,r8d
vpsrld ymm6,ymm6,11
add r10d,DWORD[((4+128))+rsp]
and r12d,edx
rorx r13d,edx,25
vpxor ymm4,ymm4,ymm5
rorx esi,edx,11
lea r11d,[r14*1+r11]
lea r10d,[r12*1+r10]
vpslld ymm5,ymm5,11
andn r12d,edx,r9d
xor r13d,esi
rorx r14d,edx,6
vpxor ymm4,ymm4,ymm6
lea r10d,[r12*1+r10]
xor r13d,r14d
mov esi,r11d
vpsrld ymm6,ymm7,10
rorx r12d,r11d,22
lea r10d,[r13*1+r10]
xor esi,eax
vpxor ymm4,ymm4,ymm5
rorx r14d,r11d,13
rorx r13d,r11d,2
lea ecx,[r10*1+rcx]
vpsrlq ymm7,ymm7,17
and r15d,esi
vpxor xmm9,xmm9,xmm8
xor r14d,r12d
xor r15d,eax
vpaddd ymm0,ymm0,ymm4
xor r14d,r13d
lea r10d,[r15*1+r10]
mov r12d,edx
vpxor ymm6,ymm6,ymm7
add r9d,DWORD[((8+128))+rsp]
and r12d,ecx
rorx r13d,ecx,25
vpsrlq ymm7,ymm7,2
rorx r15d,ecx,11
lea r10d,[r14*1+r10]
lea r9d,[r12*1+r9]
vpxor ymm6,ymm6,ymm7
andn r12d,ecx,r8d
xor r13d,r15d
rorx r14d,ecx,6
vpshufd ymm6,ymm6,132
lea r9d,[r12*1+r9]
xor r13d,r14d
mov r15d,r10d
vpsrldq ymm6,ymm6,8
rorx r12d,r10d,22
lea r9d,[r13*1+r9]
xor r15d,r11d
vpaddd ymm0,ymm0,ymm6
rorx r14d,r10d,13
rorx r13d,r10d,2
lea ebx,[r9*1+rbx]
vpshufd ymm7,ymm0,80
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((32-128))+rdi]
xor r14d,r12d
xor esi,r11d
vpsrld ymm6,ymm7,10
xor r14d,r13d
lea r9d,[rsi*1+r9]
mov r12d,ecx
vpsrlq ymm7,ymm7,17
add r8d,DWORD[((12+128))+rsp]
and r12d,ebx
rorx r13d,ebx,25
vpxor ymm6,ymm6,ymm7
rorx esi,ebx,11
lea r9d,[r14*1+r9]
lea r8d,[r12*1+r8]
vpsrlq ymm7,ymm7,2
andn r12d,ebx,edx
xor r13d,esi
rorx r14d,ebx,6
vpxor ymm6,ymm6,ymm7
lea r8d,[r12*1+r8]
xor r13d,r14d
mov esi,r9d
vpshufd ymm6,ymm6,232
rorx r12d,r9d,22
lea r8d,[r13*1+r8]
xor esi,r10d
vpslldq ymm6,ymm6,8
rorx r14d,r9d,13
rorx r13d,r9d,2
lea eax,[r8*1+rax]
vpaddd ymm0,ymm0,ymm6
and r15d,esi
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((48-128))+rdi]
xor r14d,r12d
xor r15d,r10d
vpaddd ymm6,ymm0,YMMWORD[rbp]
xor r14d,r13d
lea r8d,[r15*1+r8]
mov r12d,ebx
vmovdqa YMMWORD[rsp],ymm6
vpalignr ymm4,ymm2,ymm1,4
add edx,DWORD[((32+128))+rsp]
and r12d,eax
rorx r13d,eax,25
vpalignr ymm7,ymm0,ymm3,4
rorx r15d,eax,11
lea r8d,[r14*1+r8]
lea edx,[r12*1+rdx]
vpsrld ymm6,ymm4,7
andn r12d,eax,ecx
xor r13d,r15d
rorx r14d,eax,6
vpaddd ymm1,ymm1,ymm7
lea edx,[r12*1+rdx]
xor r13d,r14d
mov r15d,r8d
vpsrld ymm7,ymm4,3
rorx r12d,r8d,22
lea edx,[r13*1+rdx]
xor r15d,r9d
vpslld ymm5,ymm4,14
rorx r14d,r8d,13
rorx r13d,r8d,2
lea r11d,[rdx*1+r11]
vpxor ymm4,ymm7,ymm6
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((64-128))+rdi]
xor r14d,r12d
xor esi,r9d
vpshufd ymm7,ymm0,250
xor r14d,r13d
lea edx,[rsi*1+rdx]
mov r12d,eax
vpsrld ymm6,ymm6,11
add ecx,DWORD[((36+128))+rsp]
and r12d,r11d
rorx r13d,r11d,25
vpxor ymm4,ymm4,ymm5
rorx esi,r11d,11
lea edx,[r14*1+rdx]
lea ecx,[r12*1+rcx]
vpslld ymm5,ymm5,11
andn r12d,r11d,ebx
xor r13d,esi
rorx r14d,r11d,6
vpxor ymm4,ymm4,ymm6
lea ecx,[r12*1+rcx]
xor r13d,r14d
mov esi,edx
vpsrld ymm6,ymm7,10
rorx r12d,edx,22
lea ecx,[r13*1+rcx]
xor esi,r8d
vpxor ymm4,ymm4,ymm5
rorx r14d,edx,13
rorx r13d,edx,2
lea r10d,[rcx*1+r10]
vpsrlq ymm7,ymm7,17
and r15d,esi
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((80-128))+rdi]
xor r14d,r12d
xor r15d,r8d
vpaddd ymm1,ymm1,ymm4
xor r14d,r13d
lea ecx,[r15*1+rcx]
mov r12d,r11d
vpxor ymm6,ymm6,ymm7
add ebx,DWORD[((40+128))+rsp]
and r12d,r10d
rorx r13d,r10d,25
vpsrlq ymm7,ymm7,2
rorx r15d,r10d,11
lea ecx,[r14*1+rcx]
lea ebx,[r12*1+rbx]
vpxor ymm6,ymm6,ymm7
andn r12d,r10d,eax
xor r13d,r15d
rorx r14d,r10d,6
vpshufd ymm6,ymm6,132
lea ebx,[r12*1+rbx]
xor r13d,r14d
mov r15d,ecx
vpsrldq ymm6,ymm6,8
rorx r12d,ecx,22
lea ebx,[r13*1+rbx]
xor r15d,edx
vpaddd ymm1,ymm1,ymm6
rorx r14d,ecx,13
rorx r13d,ecx,2
lea r9d,[rbx*1+r9]
vpshufd ymm7,ymm1,80
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((96-128))+rdi]
xor r14d,r12d
xor esi,edx
vpsrld ymm6,ymm7,10
xor r14d,r13d
lea ebx,[rsi*1+rbx]
mov r12d,r10d
vpsrlq ymm7,ymm7,17
add eax,DWORD[((44+128))+rsp]
and r12d,r9d
rorx r13d,r9d,25
vpxor ymm6,ymm6,ymm7
rorx esi,r9d,11
lea ebx,[r14*1+rbx]
lea eax,[r12*1+rax]
vpsrlq ymm7,ymm7,2
andn r12d,r9d,r11d
xor r13d,esi
rorx r14d,r9d,6
vpxor ymm6,ymm6,ymm7
lea eax,[r12*1+rax]
xor r13d,r14d
mov esi,ebx
vpshufd ymm6,ymm6,232
rorx r12d,ebx,22
lea eax,[r13*1+rax]
xor esi,ecx
vpslldq ymm6,ymm6,8
rorx r14d,ebx,13
rorx r13d,ebx,2
lea r8d,[rax*1+r8]
vpaddd ymm1,ymm1,ymm6
and r15d,esi
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((112-128))+rdi]
xor r14d,r12d
xor r15d,ecx
vpaddd ymm6,ymm1,YMMWORD[32+rbp]
xor r14d,r13d
lea eax,[r15*1+rax]
mov r12d,r9d
vmovdqa YMMWORD[32+rsp],ymm6
lea rsp,[((-64))+rsp]
vpalignr ymm4,ymm3,ymm2,4
add r11d,DWORD[((0+128))+rsp]
and r12d,r8d
rorx r13d,r8d,25
vpalignr ymm7,ymm1,ymm0,4
rorx r15d,r8d,11
lea eax,[r14*1+rax]
lea r11d,[r12*1+r11]
vpsrld ymm6,ymm4,7
andn r12d,r8d,r10d
xor r13d,r15d
rorx r14d,r8d,6
vpaddd ymm2,ymm2,ymm7
lea r11d,[r12*1+r11]
xor r13d,r14d
mov r15d,eax
vpsrld ymm7,ymm4,3
rorx r12d,eax,22
lea r11d,[r13*1+r11]
xor r15d,ebx
vpslld ymm5,ymm4,14
rorx r14d,eax,13
rorx r13d,eax,2
lea edx,[r11*1+rdx]
vpxor ymm4,ymm7,ymm6
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((128-128))+rdi]
xor r14d,r12d
xor esi,ebx
vpshufd ymm7,ymm1,250
xor r14d,r13d
lea r11d,[rsi*1+r11]
mov r12d,r8d
vpsrld ymm6,ymm6,11
add r10d,DWORD[((4+128))+rsp]
and r12d,edx
rorx r13d,edx,25
vpxor ymm4,ymm4,ymm5
rorx esi,edx,11
lea r11d,[r14*1+r11]
lea r10d,[r12*1+r10]
vpslld ymm5,ymm5,11
andn r12d,edx,r9d
xor r13d,esi
rorx r14d,edx,6
vpxor ymm4,ymm4,ymm6
lea r10d,[r12*1+r10]
xor r13d,r14d
mov esi,r11d
vpsrld ymm6,ymm7,10
rorx r12d,r11d,22
lea r10d,[r13*1+r10]
xor esi,eax
vpxor ymm4,ymm4,ymm5
rorx r14d,r11d,13
rorx r13d,r11d,2
lea ecx,[r10*1+rcx]
vpsrlq ymm7,ymm7,17
and r15d,esi
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((144-128))+rdi]
xor r14d,r12d
xor r15d,eax
vpaddd ymm2,ymm2,ymm4
xor r14d,r13d
lea r10d,[r15*1+r10]
mov r12d,edx
vpxor ymm6,ymm6,ymm7
add r9d,DWORD[((8+128))+rsp]
and r12d,ecx
rorx r13d,ecx,25
vpsrlq ymm7,ymm7,2
rorx r15d,ecx,11
lea r10d,[r14*1+r10]
lea r9d,[r12*1+r9]
vpxor ymm6,ymm6,ymm7
andn r12d,ecx,r8d
xor r13d,r15d
rorx r14d,ecx,6
vpshufd ymm6,ymm6,132
lea r9d,[r12*1+r9]
xor r13d,r14d
mov r15d,r10d
vpsrldq ymm6,ymm6,8
rorx r12d,r10d,22
lea r9d,[r13*1+r9]
xor r15d,r11d
vpaddd ymm2,ymm2,ymm6
rorx r14d,r10d,13
rorx r13d,r10d,2
lea ebx,[r9*1+rbx]
vpshufd ymm7,ymm2,80
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((160-128))+rdi]
xor r14d,r12d
xor esi,r11d
vpsrld ymm6,ymm7,10
xor r14d,r13d
lea r9d,[rsi*1+r9]
mov r12d,ecx
vpsrlq ymm7,ymm7,17
add r8d,DWORD[((12+128))+rsp]
and r12d,ebx
rorx r13d,ebx,25
vpxor ymm6,ymm6,ymm7
rorx esi,ebx,11
lea r9d,[r14*1+r9]
lea r8d,[r12*1+r8]
vpsrlq ymm7,ymm7,2
andn r12d,ebx,edx
xor r13d,esi
rorx r14d,ebx,6
vpxor ymm6,ymm6,ymm7
lea r8d,[r12*1+r8]
xor r13d,r14d
mov esi,r9d
vpshufd ymm6,ymm6,232
rorx r12d,r9d,22
lea r8d,[r13*1+r8]
xor esi,r10d
vpslldq ymm6,ymm6,8
rorx r14d,r9d,13
rorx r13d,r9d,2
lea eax,[r8*1+rax]
vpaddd ymm2,ymm2,ymm6
and r15d,esi
vaesenclast xmm11,xmm9,xmm10
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((176-128))+rdi]
xor r14d,r12d
xor r15d,r10d
vpaddd ymm6,ymm2,YMMWORD[64+rbp]
xor r14d,r13d
lea r8d,[r15*1+r8]
mov r12d,ebx
vmovdqa YMMWORD[rsp],ymm6
vpalignr ymm4,ymm0,ymm3,4
add edx,DWORD[((32+128))+rsp]
and r12d,eax
rorx r13d,eax,25
vpalignr ymm7,ymm2,ymm1,4
rorx r15d,eax,11
lea r8d,[r14*1+r8]
lea edx,[r12*1+rdx]
vpsrld ymm6,ymm4,7
andn r12d,eax,ecx
xor r13d,r15d
rorx r14d,eax,6
vpaddd ymm3,ymm3,ymm7
lea edx,[r12*1+rdx]
xor r13d,r14d
mov r15d,r8d
vpsrld ymm7,ymm4,3
rorx r12d,r8d,22
lea edx,[r13*1+rdx]
xor r15d,r9d
vpslld ymm5,ymm4,14
rorx r14d,r8d,13
rorx r13d,r8d,2
lea r11d,[rdx*1+r11]
vpxor ymm4,ymm7,ymm6
and esi,r15d
vpand xmm8,xmm11,xmm12
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((192-128))+rdi]
xor r14d,r12d
xor esi,r9d
vpshufd ymm7,ymm2,250
xor r14d,r13d
lea edx,[rsi*1+rdx]
mov r12d,eax
vpsrld ymm6,ymm6,11
add ecx,DWORD[((36+128))+rsp]
and r12d,r11d
rorx r13d,r11d,25
vpxor ymm4,ymm4,ymm5
rorx esi,r11d,11
lea edx,[r14*1+rdx]
lea ecx,[r12*1+rcx]
vpslld ymm5,ymm5,11
andn r12d,r11d,ebx
xor r13d,esi
rorx r14d,r11d,6
vpxor ymm4,ymm4,ymm6
lea ecx,[r12*1+rcx]
xor r13d,r14d
mov esi,edx
vpsrld ymm6,ymm7,10
rorx r12d,edx,22
lea ecx,[r13*1+rcx]
xor esi,r8d
vpxor ymm4,ymm4,ymm5
rorx r14d,edx,13
rorx r13d,edx,2
lea r10d,[rcx*1+r10]
vpsrlq ymm7,ymm7,17
and r15d,esi
vaesenclast xmm11,xmm9,xmm10
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((208-128))+rdi]
xor r14d,r12d
xor r15d,r8d
vpaddd ymm3,ymm3,ymm4
xor r14d,r13d
lea ecx,[r15*1+rcx]
mov r12d,r11d
vpxor ymm6,ymm6,ymm7
add ebx,DWORD[((40+128))+rsp]
and r12d,r10d
rorx r13d,r10d,25
vpsrlq ymm7,ymm7,2
rorx r15d,r10d,11
lea ecx,[r14*1+rcx]
lea ebx,[r12*1+rbx]
vpxor ymm6,ymm6,ymm7
andn r12d,r10d,eax
xor r13d,r15d
rorx r14d,r10d,6
vpshufd ymm6,ymm6,132
lea ebx,[r12*1+rbx]
xor r13d,r14d
mov r15d,ecx
vpsrldq ymm6,ymm6,8
rorx r12d,ecx,22
lea ebx,[r13*1+rbx]
xor r15d,edx
vpaddd ymm3,ymm3,ymm6
rorx r14d,ecx,13
rorx r13d,ecx,2
lea r9d,[rbx*1+r9]
vpshufd ymm7,ymm3,80
and esi,r15d
vpand xmm11,xmm11,xmm13
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((224-128))+rdi]
xor r14d,r12d
xor esi,edx
vpsrld ymm6,ymm7,10
xor r14d,r13d
lea ebx,[rsi*1+rbx]
mov r12d,r10d
vpsrlq ymm7,ymm7,17
add eax,DWORD[((44+128))+rsp]
and r12d,r9d
rorx r13d,r9d,25
vpxor ymm6,ymm6,ymm7
rorx esi,r9d,11
lea ebx,[r14*1+rbx]
lea eax,[r12*1+rax]
vpsrlq ymm7,ymm7,2
andn r12d,r9d,r11d
xor r13d,esi
rorx r14d,r9d,6
vpxor ymm6,ymm6,ymm7
lea eax,[r12*1+rax]
xor r13d,r14d
mov esi,ebx
vpshufd ymm6,ymm6,232
rorx r12d,ebx,22
lea eax,[r13*1+rax]
xor esi,ecx
vpslldq ymm6,ymm6,8
rorx r14d,ebx,13
rorx r13d,ebx,2
lea r8d,[rax*1+r8]
vpaddd ymm3,ymm3,ymm6
and r15d,esi
vpor xmm8,xmm8,xmm11
vaesenclast xmm11,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
xor r14d,r12d
xor r15d,ecx
vpaddd ymm6,ymm3,YMMWORD[96+rbp]
xor r14d,r13d
lea eax,[r15*1+rax]
mov r12d,r9d
vmovdqa YMMWORD[32+rsp],ymm6
vmovq r13,xmm15
vpextrq r15,xmm15,1
vpand xmm11,xmm11,xmm14
vpor xmm8,xmm8,xmm11
vmovdqu XMMWORD[r13*1+r15],xmm8
lea r13,[16+r13]
lea rbp,[128+rbp]
cmp BYTE[3+rbp],0
jne NEAR $L$avx2_00_47
vmovdqu xmm9,XMMWORD[r13]
vpinsrq xmm15,xmm15,r13,0
add r11d,DWORD[((0+64))+rsp]
and r12d,r8d
rorx r13d,r8d,25
rorx r15d,r8d,11
lea eax,[r14*1+rax]
lea r11d,[r12*1+r11]
andn r12d,r8d,r10d
xor r13d,r15d
rorx r14d,r8d,6
lea r11d,[r12*1+r11]
xor r13d,r14d
mov r15d,eax
rorx r12d,eax,22
lea r11d,[r13*1+r11]
xor r15d,ebx
rorx r14d,eax,13
rorx r13d,eax,2
lea edx,[r11*1+rdx]
and esi,r15d
vpxor xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((16-128))+rdi]
xor r14d,r12d
xor esi,ebx
xor r14d,r13d
lea r11d,[rsi*1+r11]
mov r12d,r8d
add r10d,DWORD[((4+64))+rsp]
and r12d,edx
rorx r13d,edx,25
rorx esi,edx,11
lea r11d,[r14*1+r11]
lea r10d,[r12*1+r10]
andn r12d,edx,r9d
xor r13d,esi
rorx r14d,edx,6
lea r10d,[r12*1+r10]
xor r13d,r14d
mov esi,r11d
rorx r12d,r11d,22
lea r10d,[r13*1+r10]
xor esi,eax
rorx r14d,r11d,13
rorx r13d,r11d,2
lea ecx,[r10*1+rcx]
and r15d,esi
vpxor xmm9,xmm9,xmm8
xor r14d,r12d
xor r15d,eax
xor r14d,r13d
lea r10d,[r15*1+r10]
mov r12d,edx
add r9d,DWORD[((8+64))+rsp]
and r12d,ecx
rorx r13d,ecx,25
rorx r15d,ecx,11
lea r10d,[r14*1+r10]
lea r9d,[r12*1+r9]
andn r12d,ecx,r8d
xor r13d,r15d
rorx r14d,ecx,6
lea r9d,[r12*1+r9]
xor r13d,r14d
mov r15d,r10d
rorx r12d,r10d,22
lea r9d,[r13*1+r9]
xor r15d,r11d
rorx r14d,r10d,13
rorx r13d,r10d,2
lea ebx,[r9*1+rbx]
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((32-128))+rdi]
xor r14d,r12d
xor esi,r11d
xor r14d,r13d
lea r9d,[rsi*1+r9]
mov r12d,ecx
add r8d,DWORD[((12+64))+rsp]
and r12d,ebx
rorx r13d,ebx,25
rorx esi,ebx,11
lea r9d,[r14*1+r9]
lea r8d,[r12*1+r8]
andn r12d,ebx,edx
xor r13d,esi
rorx r14d,ebx,6
lea r8d,[r12*1+r8]
xor r13d,r14d
mov esi,r9d
rorx r12d,r9d,22
lea r8d,[r13*1+r8]
xor esi,r10d
rorx r14d,r9d,13
rorx r13d,r9d,2
lea eax,[r8*1+rax]
and r15d,esi
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((48-128))+rdi]
xor r14d,r12d
xor r15d,r10d
xor r14d,r13d
lea r8d,[r15*1+r8]
mov r12d,ebx
add edx,DWORD[((32+64))+rsp]
and r12d,eax
rorx r13d,eax,25
rorx r15d,eax,11
lea r8d,[r14*1+r8]
lea edx,[r12*1+rdx]
andn r12d,eax,ecx
xor r13d,r15d
rorx r14d,eax,6
lea edx,[r12*1+rdx]
xor r13d,r14d
mov r15d,r8d
rorx r12d,r8d,22
lea edx,[r13*1+rdx]
xor r15d,r9d
rorx r14d,r8d,13
rorx r13d,r8d,2
lea r11d,[rdx*1+r11]
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((64-128))+rdi]
xor r14d,r12d
xor esi,r9d
xor r14d,r13d
lea edx,[rsi*1+rdx]
mov r12d,eax
add ecx,DWORD[((36+64))+rsp]
and r12d,r11d
rorx r13d,r11d,25
rorx esi,r11d,11
lea edx,[r14*1+rdx]
lea ecx,[r12*1+rcx]
andn r12d,r11d,ebx
xor r13d,esi
rorx r14d,r11d,6
lea ecx,[r12*1+rcx]
xor r13d,r14d
mov esi,edx
rorx r12d,edx,22
lea ecx,[r13*1+rcx]
xor esi,r8d
rorx r14d,edx,13
rorx r13d,edx,2
lea r10d,[rcx*1+r10]
and r15d,esi
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((80-128))+rdi]
xor r14d,r12d
xor r15d,r8d
xor r14d,r13d
lea ecx,[r15*1+rcx]
mov r12d,r11d
add ebx,DWORD[((40+64))+rsp]
and r12d,r10d
rorx r13d,r10d,25
rorx r15d,r10d,11
lea ecx,[r14*1+rcx]
lea ebx,[r12*1+rbx]
andn r12d,r10d,eax
xor r13d,r15d
rorx r14d,r10d,6
lea ebx,[r12*1+rbx]
xor r13d,r14d
mov r15d,ecx
rorx r12d,ecx,22
lea ebx,[r13*1+rbx]
xor r15d,edx
rorx r14d,ecx,13
rorx r13d,ecx,2
lea r9d,[rbx*1+r9]
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((96-128))+rdi]
xor r14d,r12d
xor esi,edx
xor r14d,r13d
lea ebx,[rsi*1+rbx]
mov r12d,r10d
add eax,DWORD[((44+64))+rsp]
and r12d,r9d
rorx r13d,r9d,25
rorx esi,r9d,11
lea ebx,[r14*1+rbx]
lea eax,[r12*1+rax]
andn r12d,r9d,r11d
xor r13d,esi
rorx r14d,r9d,6
lea eax,[r12*1+rax]
xor r13d,r14d
mov esi,ebx
rorx r12d,ebx,22
lea eax,[r13*1+rax]
xor esi,ecx
rorx r14d,ebx,13
rorx r13d,ebx,2
lea r8d,[rax*1+r8]
and r15d,esi
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((112-128))+rdi]
xor r14d,r12d
xor r15d,ecx
xor r14d,r13d
lea eax,[r15*1+rax]
mov r12d,r9d
add r11d,DWORD[rsp]
and r12d,r8d
rorx r13d,r8d,25
rorx r15d,r8d,11
lea eax,[r14*1+rax]
lea r11d,[r12*1+r11]
andn r12d,r8d,r10d
xor r13d,r15d
rorx r14d,r8d,6
lea r11d,[r12*1+r11]
xor r13d,r14d
mov r15d,eax
rorx r12d,eax,22
lea r11d,[r13*1+r11]
xor r15d,ebx
rorx r14d,eax,13
rorx r13d,eax,2
lea edx,[r11*1+rdx]
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((128-128))+rdi]
xor r14d,r12d
xor esi,ebx
xor r14d,r13d
lea r11d,[rsi*1+r11]
mov r12d,r8d
add r10d,DWORD[4+rsp]
and r12d,edx
rorx r13d,edx,25
rorx esi,edx,11
lea r11d,[r14*1+r11]
lea r10d,[r12*1+r10]
andn r12d,edx,r9d
xor r13d,esi
rorx r14d,edx,6
lea r10d,[r12*1+r10]
xor r13d,r14d
mov esi,r11d
rorx r12d,r11d,22
lea r10d,[r13*1+r10]
xor esi,eax
rorx r14d,r11d,13
rorx r13d,r11d,2
lea ecx,[r10*1+rcx]
and r15d,esi
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((144-128))+rdi]
xor r14d,r12d
xor r15d,eax
xor r14d,r13d
lea r10d,[r15*1+r10]
mov r12d,edx
add r9d,DWORD[8+rsp]
and r12d,ecx
rorx r13d,ecx,25
rorx r15d,ecx,11
lea r10d,[r14*1+r10]
lea r9d,[r12*1+r9]
andn r12d,ecx,r8d
xor r13d,r15d
rorx r14d,ecx,6
lea r9d,[r12*1+r9]
xor r13d,r14d
mov r15d,r10d
rorx r12d,r10d,22
lea r9d,[r13*1+r9]
xor r15d,r11d
rorx r14d,r10d,13
rorx r13d,r10d,2
lea ebx,[r9*1+rbx]
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((160-128))+rdi]
xor r14d,r12d
xor esi,r11d
xor r14d,r13d
lea r9d,[rsi*1+r9]
mov r12d,ecx
add r8d,DWORD[12+rsp]
and r12d,ebx
rorx r13d,ebx,25
rorx esi,ebx,11
lea r9d,[r14*1+r9]
lea r8d,[r12*1+r8]
andn r12d,ebx,edx
xor r13d,esi
rorx r14d,ebx,6
lea r8d,[r12*1+r8]
xor r13d,r14d
mov esi,r9d
rorx r12d,r9d,22
lea r8d,[r13*1+r8]
xor esi,r10d
rorx r14d,r9d,13
rorx r13d,r9d,2
lea eax,[r8*1+rax]
and r15d,esi
vaesenclast xmm11,xmm9,xmm10
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((176-128))+rdi]
xor r14d,r12d
xor r15d,r10d
xor r14d,r13d
lea r8d,[r15*1+r8]
mov r12d,ebx
add edx,DWORD[32+rsp]
and r12d,eax
rorx r13d,eax,25
rorx r15d,eax,11
lea r8d,[r14*1+r8]
lea edx,[r12*1+rdx]
andn r12d,eax,ecx
xor r13d,r15d
rorx r14d,eax,6
lea edx,[r12*1+rdx]
xor r13d,r14d
mov r15d,r8d
rorx r12d,r8d,22
lea edx,[r13*1+rdx]
xor r15d,r9d
rorx r14d,r8d,13
rorx r13d,r8d,2
lea r11d,[rdx*1+r11]
and esi,r15d
vpand xmm8,xmm11,xmm12
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((192-128))+rdi]
xor r14d,r12d
xor esi,r9d
xor r14d,r13d
lea edx,[rsi*1+rdx]
mov r12d,eax
add ecx,DWORD[36+rsp]
and r12d,r11d
rorx r13d,r11d,25
rorx esi,r11d,11
lea edx,[r14*1+rdx]
lea ecx,[r12*1+rcx]
andn r12d,r11d,ebx
xor r13d,esi
rorx r14d,r11d,6
lea ecx,[r12*1+rcx]
xor r13d,r14d
mov esi,edx
rorx r12d,edx,22
lea ecx,[r13*1+rcx]
xor esi,r8d
rorx r14d,edx,13
rorx r13d,edx,2
lea r10d,[rcx*1+r10]
and r15d,esi
vaesenclast xmm11,xmm9,xmm10
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((208-128))+rdi]
xor r14d,r12d
xor r15d,r8d
xor r14d,r13d
lea ecx,[r15*1+rcx]
mov r12d,r11d
add ebx,DWORD[40+rsp]
and r12d,r10d
rorx r13d,r10d,25
rorx r15d,r10d,11
lea ecx,[r14*1+rcx]
lea ebx,[r12*1+rbx]
andn r12d,r10d,eax
xor r13d,r15d
rorx r14d,r10d,6
lea ebx,[r12*1+rbx]
xor r13d,r14d
mov r15d,ecx
rorx r12d,ecx,22
lea ebx,[r13*1+rbx]
xor r15d,edx
rorx r14d,ecx,13
rorx r13d,ecx,2
lea r9d,[rbx*1+r9]
and esi,r15d
vpand xmm11,xmm11,xmm13
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((224-128))+rdi]
xor r14d,r12d
xor esi,edx
xor r14d,r13d
lea ebx,[rsi*1+rbx]
mov r12d,r10d
add eax,DWORD[44+rsp]
and r12d,r9d
rorx r13d,r9d,25
rorx esi,r9d,11
lea ebx,[r14*1+rbx]
lea eax,[r12*1+rax]
andn r12d,r9d,r11d
xor r13d,esi
rorx r14d,r9d,6
lea eax,[r12*1+rax]
xor r13d,r14d
mov esi,ebx
rorx r12d,ebx,22
lea eax,[r13*1+rax]
xor esi,ecx
rorx r14d,ebx,13
rorx r13d,ebx,2
lea r8d,[rax*1+r8]
and r15d,esi
vpor xmm8,xmm8,xmm11
vaesenclast xmm11,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
xor r14d,r12d
xor r15d,ecx
xor r14d,r13d
lea eax,[r15*1+rax]
mov r12d,r9d
vpextrq r12,xmm15,1
vmovq r13,xmm15
mov r15,QWORD[552+rsp]
add eax,r14d
lea rbp,[448+rsp]
vpand xmm11,xmm11,xmm14
vpor xmm8,xmm8,xmm11
vmovdqu XMMWORD[r13*1+r12],xmm8
lea r13,[16+r13]
add eax,DWORD[r15]
add ebx,DWORD[4+r15]
add ecx,DWORD[8+r15]
add edx,DWORD[12+r15]
add r8d,DWORD[16+r15]
add r9d,DWORD[20+r15]
add r10d,DWORD[24+r15]
add r11d,DWORD[28+r15]
mov DWORD[r15],eax
mov DWORD[4+r15],ebx
mov DWORD[8+r15],ecx
mov DWORD[12+r15],edx
mov DWORD[16+r15],r8d
mov DWORD[20+r15],r9d
mov DWORD[24+r15],r10d
mov DWORD[28+r15],r11d
cmp r13,QWORD[80+rbp]
je NEAR $L$done_avx2
xor r14d,r14d
mov esi,ebx
mov r12d,r9d
xor esi,ecx
jmp NEAR $L$ower_avx2
ALIGN 16
$L$ower_avx2:
vmovdqu xmm9,XMMWORD[r13]
vpinsrq xmm15,xmm15,r13,0
add r11d,DWORD[((0+16))+rbp]
and r12d,r8d
rorx r13d,r8d,25
rorx r15d,r8d,11
lea eax,[r14*1+rax]
lea r11d,[r12*1+r11]
andn r12d,r8d,r10d
xor r13d,r15d
rorx r14d,r8d,6
lea r11d,[r12*1+r11]
xor r13d,r14d
mov r15d,eax
rorx r12d,eax,22
lea r11d,[r13*1+r11]
xor r15d,ebx
rorx r14d,eax,13
rorx r13d,eax,2
lea edx,[r11*1+rdx]
and esi,r15d
vpxor xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((16-128))+rdi]
xor r14d,r12d
xor esi,ebx
xor r14d,r13d
lea r11d,[rsi*1+r11]
mov r12d,r8d
add r10d,DWORD[((4+16))+rbp]
and r12d,edx
rorx r13d,edx,25
rorx esi,edx,11
lea r11d,[r14*1+r11]
lea r10d,[r12*1+r10]
andn r12d,edx,r9d
xor r13d,esi
rorx r14d,edx,6
lea r10d,[r12*1+r10]
xor r13d,r14d
mov esi,r11d
rorx r12d,r11d,22
lea r10d,[r13*1+r10]
xor esi,eax
rorx r14d,r11d,13
rorx r13d,r11d,2
lea ecx,[r10*1+rcx]
and r15d,esi
vpxor xmm9,xmm9,xmm8
xor r14d,r12d
xor r15d,eax
xor r14d,r13d
lea r10d,[r15*1+r10]
mov r12d,edx
add r9d,DWORD[((8+16))+rbp]
and r12d,ecx
rorx r13d,ecx,25
rorx r15d,ecx,11
lea r10d,[r14*1+r10]
lea r9d,[r12*1+r9]
andn r12d,ecx,r8d
xor r13d,r15d
rorx r14d,ecx,6
lea r9d,[r12*1+r9]
xor r13d,r14d
mov r15d,r10d
rorx r12d,r10d,22
lea r9d,[r13*1+r9]
xor r15d,r11d
rorx r14d,r10d,13
rorx r13d,r10d,2
lea ebx,[r9*1+rbx]
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((32-128))+rdi]
xor r14d,r12d
xor esi,r11d
xor r14d,r13d
lea r9d,[rsi*1+r9]
mov r12d,ecx
add r8d,DWORD[((12+16))+rbp]
and r12d,ebx
rorx r13d,ebx,25
rorx esi,ebx,11
lea r9d,[r14*1+r9]
lea r8d,[r12*1+r8]
andn r12d,ebx,edx
xor r13d,esi
rorx r14d,ebx,6
lea r8d,[r12*1+r8]
xor r13d,r14d
mov esi,r9d
rorx r12d,r9d,22
lea r8d,[r13*1+r8]
xor esi,r10d
rorx r14d,r9d,13
rorx r13d,r9d,2
lea eax,[r8*1+rax]
and r15d,esi
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((48-128))+rdi]
xor r14d,r12d
xor r15d,r10d
xor r14d,r13d
lea r8d,[r15*1+r8]
mov r12d,ebx
add edx,DWORD[((32+16))+rbp]
and r12d,eax
rorx r13d,eax,25
rorx r15d,eax,11
lea r8d,[r14*1+r8]
lea edx,[r12*1+rdx]
andn r12d,eax,ecx
xor r13d,r15d
rorx r14d,eax,6
lea edx,[r12*1+rdx]
xor r13d,r14d
mov r15d,r8d
rorx r12d,r8d,22
lea edx,[r13*1+rdx]
xor r15d,r9d
rorx r14d,r8d,13
rorx r13d,r8d,2
lea r11d,[rdx*1+r11]
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((64-128))+rdi]
xor r14d,r12d
xor esi,r9d
xor r14d,r13d
lea edx,[rsi*1+rdx]
mov r12d,eax
add ecx,DWORD[((36+16))+rbp]
and r12d,r11d
rorx r13d,r11d,25
rorx esi,r11d,11
lea edx,[r14*1+rdx]
lea ecx,[r12*1+rcx]
andn r12d,r11d,ebx
xor r13d,esi
rorx r14d,r11d,6
lea ecx,[r12*1+rcx]
xor r13d,r14d
mov esi,edx
rorx r12d,edx,22
lea ecx,[r13*1+rcx]
xor esi,r8d
rorx r14d,edx,13
rorx r13d,edx,2
lea r10d,[rcx*1+r10]
and r15d,esi
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((80-128))+rdi]
xor r14d,r12d
xor r15d,r8d
xor r14d,r13d
lea ecx,[r15*1+rcx]
mov r12d,r11d
add ebx,DWORD[((40+16))+rbp]
and r12d,r10d
rorx r13d,r10d,25
rorx r15d,r10d,11
lea ecx,[r14*1+rcx]
lea ebx,[r12*1+rbx]
andn r12d,r10d,eax
xor r13d,r15d
rorx r14d,r10d,6
lea ebx,[r12*1+rbx]
xor r13d,r14d
mov r15d,ecx
rorx r12d,ecx,22
lea ebx,[r13*1+rbx]
xor r15d,edx
rorx r14d,ecx,13
rorx r13d,ecx,2
lea r9d,[rbx*1+r9]
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((96-128))+rdi]
xor r14d,r12d
xor esi,edx
xor r14d,r13d
lea ebx,[rsi*1+rbx]
mov r12d,r10d
add eax,DWORD[((44+16))+rbp]
and r12d,r9d
rorx r13d,r9d,25
rorx esi,r9d,11
lea ebx,[r14*1+rbx]
lea eax,[r12*1+rax]
andn r12d,r9d,r11d
xor r13d,esi
rorx r14d,r9d,6
lea eax,[r12*1+rax]
xor r13d,r14d
mov esi,ebx
rorx r12d,ebx,22
lea eax,[r13*1+rax]
xor esi,ecx
rorx r14d,ebx,13
rorx r13d,ebx,2
lea r8d,[rax*1+r8]
and r15d,esi
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((112-128))+rdi]
xor r14d,r12d
xor r15d,ecx
xor r14d,r13d
lea eax,[r15*1+rax]
mov r12d,r9d
lea rbp,[((-64))+rbp]
add r11d,DWORD[((0+16))+rbp]
and r12d,r8d
rorx r13d,r8d,25
rorx r15d,r8d,11
lea eax,[r14*1+rax]
lea r11d,[r12*1+r11]
andn r12d,r8d,r10d
xor r13d,r15d
rorx r14d,r8d,6
lea r11d,[r12*1+r11]
xor r13d,r14d
mov r15d,eax
rorx r12d,eax,22
lea r11d,[r13*1+r11]
xor r15d,ebx
rorx r14d,eax,13
rorx r13d,eax,2
lea edx,[r11*1+rdx]
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((128-128))+rdi]
xor r14d,r12d
xor esi,ebx
xor r14d,r13d
lea r11d,[rsi*1+r11]
mov r12d,r8d
add r10d,DWORD[((4+16))+rbp]
and r12d,edx
rorx r13d,edx,25
rorx esi,edx,11
lea r11d,[r14*1+r11]
lea r10d,[r12*1+r10]
andn r12d,edx,r9d
xor r13d,esi
rorx r14d,edx,6
lea r10d,[r12*1+r10]
xor r13d,r14d
mov esi,r11d
rorx r12d,r11d,22
lea r10d,[r13*1+r10]
xor esi,eax
rorx r14d,r11d,13
rorx r13d,r11d,2
lea ecx,[r10*1+rcx]
and r15d,esi
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((144-128))+rdi]
xor r14d,r12d
xor r15d,eax
xor r14d,r13d
lea r10d,[r15*1+r10]
mov r12d,edx
add r9d,DWORD[((8+16))+rbp]
and r12d,ecx
rorx r13d,ecx,25
rorx r15d,ecx,11
lea r10d,[r14*1+r10]
lea r9d,[r12*1+r9]
andn r12d,ecx,r8d
xor r13d,r15d
rorx r14d,ecx,6
lea r9d,[r12*1+r9]
xor r13d,r14d
mov r15d,r10d
rorx r12d,r10d,22
lea r9d,[r13*1+r9]
xor r15d,r11d
rorx r14d,r10d,13
rorx r13d,r10d,2
lea ebx,[r9*1+rbx]
and esi,r15d
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((160-128))+rdi]
xor r14d,r12d
xor esi,r11d
xor r14d,r13d
lea r9d,[rsi*1+r9]
mov r12d,ecx
add r8d,DWORD[((12+16))+rbp]
and r12d,ebx
rorx r13d,ebx,25
rorx esi,ebx,11
lea r9d,[r14*1+r9]
lea r8d,[r12*1+r8]
andn r12d,ebx,edx
xor r13d,esi
rorx r14d,ebx,6
lea r8d,[r12*1+r8]
xor r13d,r14d
mov esi,r9d
rorx r12d,r9d,22
lea r8d,[r13*1+r8]
xor esi,r10d
rorx r14d,r9d,13
rorx r13d,r9d,2
lea eax,[r8*1+rax]
and r15d,esi
vaesenclast xmm11,xmm9,xmm10
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((176-128))+rdi]
xor r14d,r12d
xor r15d,r10d
xor r14d,r13d
lea r8d,[r15*1+r8]
mov r12d,ebx
add edx,DWORD[((32+16))+rbp]
and r12d,eax
rorx r13d,eax,25
rorx r15d,eax,11
lea r8d,[r14*1+r8]
lea edx,[r12*1+rdx]
andn r12d,eax,ecx
xor r13d,r15d
rorx r14d,eax,6
lea edx,[r12*1+rdx]
xor r13d,r14d
mov r15d,r8d
rorx r12d,r8d,22
lea edx,[r13*1+rdx]
xor r15d,r9d
rorx r14d,r8d,13
rorx r13d,r8d,2
lea r11d,[rdx*1+r11]
and esi,r15d
vpand xmm8,xmm11,xmm12
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((192-128))+rdi]
xor r14d,r12d
xor esi,r9d
xor r14d,r13d
lea edx,[rsi*1+rdx]
mov r12d,eax
add ecx,DWORD[((36+16))+rbp]
and r12d,r11d
rorx r13d,r11d,25
rorx esi,r11d,11
lea edx,[r14*1+rdx]
lea ecx,[r12*1+rcx]
andn r12d,r11d,ebx
xor r13d,esi
rorx r14d,r11d,6
lea ecx,[r12*1+rcx]
xor r13d,r14d
mov esi,edx
rorx r12d,edx,22
lea ecx,[r13*1+rcx]
xor esi,r8d
rorx r14d,edx,13
rorx r13d,edx,2
lea r10d,[rcx*1+r10]
and r15d,esi
vaesenclast xmm11,xmm9,xmm10
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((208-128))+rdi]
xor r14d,r12d
xor r15d,r8d
xor r14d,r13d
lea ecx,[r15*1+rcx]
mov r12d,r11d
add ebx,DWORD[((40+16))+rbp]
and r12d,r10d
rorx r13d,r10d,25
rorx r15d,r10d,11
lea ecx,[r14*1+rcx]
lea ebx,[r12*1+rbx]
andn r12d,r10d,eax
xor r13d,r15d
rorx r14d,r10d,6
lea ebx,[r12*1+rbx]
xor r13d,r14d
mov r15d,ecx
rorx r12d,ecx,22
lea ebx,[r13*1+rbx]
xor r15d,edx
rorx r14d,ecx,13
rorx r13d,ecx,2
lea r9d,[rbx*1+r9]
and esi,r15d
vpand xmm11,xmm11,xmm13
vaesenc xmm9,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((224-128))+rdi]
xor r14d,r12d
xor esi,edx
xor r14d,r13d
lea ebx,[rsi*1+rbx]
mov r12d,r10d
add eax,DWORD[((44+16))+rbp]
and r12d,r9d
rorx r13d,r9d,25
rorx esi,r9d,11
lea ebx,[r14*1+rbx]
lea eax,[r12*1+rax]
andn r12d,r9d,r11d
xor r13d,esi
rorx r14d,r9d,6
lea eax,[r12*1+rax]
xor r13d,r14d
mov esi,ebx
rorx r12d,ebx,22
lea eax,[r13*1+rax]
xor esi,ecx
rorx r14d,ebx,13
rorx r13d,ebx,2
lea r8d,[rax*1+r8]
and r15d,esi
vpor xmm8,xmm8,xmm11
vaesenclast xmm11,xmm9,xmm10
vmovdqu xmm10,XMMWORD[((0-128))+rdi]
xor r14d,r12d
xor r15d,ecx
xor r14d,r13d
lea eax,[r15*1+rax]
mov r12d,r9d
vmovq r13,xmm15
vpextrq r15,xmm15,1
vpand xmm11,xmm11,xmm14
vpor xmm8,xmm8,xmm11
lea rbp,[((-64))+rbp]
vmovdqu XMMWORD[r13*1+r15],xmm8
lea r13,[16+r13]
cmp rbp,rsp
jae NEAR $L$ower_avx2
mov r15,QWORD[552+rsp]
lea r13,[64+r13]
mov rsi,QWORD[560+rsp]
add eax,r14d
lea rsp,[448+rsp]
add eax,DWORD[r15]
add ebx,DWORD[4+r15]
add ecx,DWORD[8+r15]
add edx,DWORD[12+r15]
add r8d,DWORD[16+r15]
add r9d,DWORD[20+r15]
add r10d,DWORD[24+r15]
lea r12,[r13*1+rsi]
add r11d,DWORD[28+r15]
cmp r13,QWORD[((64+16))+rsp]
mov DWORD[r15],eax
cmove r12,rsp
mov DWORD[4+r15],ebx
mov DWORD[8+r15],ecx
mov DWORD[12+r15],edx
mov DWORD[16+r15],r8d
mov DWORD[20+r15],r9d
mov DWORD[24+r15],r10d
mov DWORD[28+r15],r11d
jbe NEAR $L$oop_avx2
lea rbp,[rsp]
$L$done_avx2:
mov r8,QWORD[((64+32))+rbp]
mov rsi,QWORD[((64+56))+rbp]
vmovdqu XMMWORD[r8],xmm8
vzeroall
movaps xmm6,XMMWORD[128+rbp]
movaps xmm7,XMMWORD[144+rbp]
movaps xmm8,XMMWORD[160+rbp]
movaps xmm9,XMMWORD[176+rbp]
movaps xmm10,XMMWORD[192+rbp]
movaps xmm11,XMMWORD[208+rbp]
movaps xmm12,XMMWORD[224+rbp]
movaps xmm13,XMMWORD[240+rbp]
movaps xmm14,XMMWORD[256+rbp]
movaps xmm15,XMMWORD[272+rbp]
mov r15,QWORD[((-48))+rsi]
mov r14,QWORD[((-40))+rsi]
mov r13,QWORD[((-32))+rsi]
mov r12,QWORD[((-24))+rsi]
mov rbp,QWORD[((-16))+rsi]
mov rbx,QWORD[((-8))+rsi]
lea rsp,[rsi]
$L$epilogue_avx2:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_aesni_cbc_sha256_enc_avx2:
ALIGN 32
aesni_cbc_sha256_enc_shaext:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_aesni_cbc_sha256_enc_shaext:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
mov r9,QWORD[48+rsp]
mov r10,QWORD[56+rsp]
lea rsp,[((-168))+rsp]
movaps XMMWORD[(-8-160)+rax],xmm6
movaps XMMWORD[(-8-144)+rax],xmm7
movaps XMMWORD[(-8-128)+rax],xmm8
movaps XMMWORD[(-8-112)+rax],xmm9
movaps XMMWORD[(-8-96)+rax],xmm10
movaps XMMWORD[(-8-80)+rax],xmm11
movaps XMMWORD[(-8-64)+rax],xmm12
movaps XMMWORD[(-8-48)+rax],xmm13
movaps XMMWORD[(-8-32)+rax],xmm14
movaps XMMWORD[(-8-16)+rax],xmm15
$L$prologue_shaext:
lea rax,[((K256+128))]
movdqu xmm1,XMMWORD[r9]
movdqu xmm2,XMMWORD[16+r9]
movdqa xmm3,XMMWORD[((512-128))+rax]
mov r11d,DWORD[240+rcx]
sub rsi,rdi
movups xmm15,XMMWORD[rcx]
movups xmm6,XMMWORD[r8]
movups xmm4,XMMWORD[16+rcx]
lea rcx,[112+rcx]
pshufd xmm0,xmm1,0x1b
pshufd xmm1,xmm1,0xb1
pshufd xmm2,xmm2,0x1b
movdqa xmm7,xmm3
DB 102,15,58,15,202,8
punpcklqdq xmm2,xmm0
jmp NEAR $L$oop_shaext
ALIGN 16
$L$oop_shaext:
movdqu xmm10,XMMWORD[r10]
movdqu xmm11,XMMWORD[16+r10]
movdqu xmm12,XMMWORD[32+r10]
DB 102,68,15,56,0,211
movdqu xmm13,XMMWORD[48+r10]
movdqa xmm0,XMMWORD[((0-128))+rax]
paddd xmm0,xmm10
DB 102,68,15,56,0,219
movdqa xmm9,xmm2
movdqa xmm8,xmm1
movups xmm14,XMMWORD[rdi]
xorps xmm14,xmm15
xorps xmm6,xmm14
movups xmm5,XMMWORD[((-80))+rcx]
aesenc xmm6,xmm4
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movups xmm4,XMMWORD[((-64))+rcx]
aesenc xmm6,xmm5
DB 15,56,203,202
movdqa xmm0,XMMWORD[((32-128))+rax]
paddd xmm0,xmm11
DB 102,68,15,56,0,227
lea r10,[64+r10]
movups xmm5,XMMWORD[((-48))+rcx]
aesenc xmm6,xmm4
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movups xmm4,XMMWORD[((-32))+rcx]
aesenc xmm6,xmm5
DB 15,56,203,202
movdqa xmm0,XMMWORD[((64-128))+rax]
paddd xmm0,xmm12
DB 102,68,15,56,0,235
DB 69,15,56,204,211
movups xmm5,XMMWORD[((-16))+rcx]
aesenc xmm6,xmm4
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movdqa xmm3,xmm13
DB 102,65,15,58,15,220,4
paddd xmm10,xmm3
movups xmm4,XMMWORD[rcx]
aesenc xmm6,xmm5
DB 15,56,203,202
movdqa xmm0,XMMWORD[((96-128))+rax]
paddd xmm0,xmm13
DB 69,15,56,205,213
DB 69,15,56,204,220
movups xmm5,XMMWORD[16+rcx]
aesenc xmm6,xmm4
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movups xmm4,XMMWORD[32+rcx]
aesenc xmm6,xmm5
movdqa xmm3,xmm10
DB 102,65,15,58,15,221,4
paddd xmm11,xmm3
DB 15,56,203,202
movdqa xmm0,XMMWORD[((128-128))+rax]
paddd xmm0,xmm10
DB 69,15,56,205,218
DB 69,15,56,204,229
movups xmm5,XMMWORD[48+rcx]
aesenc xmm6,xmm4
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movdqa xmm3,xmm11
DB 102,65,15,58,15,218,4
paddd xmm12,xmm3
cmp r11d,11
jb NEAR $L$aesenclast1
movups xmm4,XMMWORD[64+rcx]
aesenc xmm6,xmm5
movups xmm5,XMMWORD[80+rcx]
aesenc xmm6,xmm4
je NEAR $L$aesenclast1
movups xmm4,XMMWORD[96+rcx]
aesenc xmm6,xmm5
movups xmm5,XMMWORD[112+rcx]
aesenc xmm6,xmm4
$L$aesenclast1:
aesenclast xmm6,xmm5
movups xmm4,XMMWORD[((16-112))+rcx]
nop
DB 15,56,203,202
movups xmm14,XMMWORD[16+rdi]
xorps xmm14,xmm15
movups XMMWORD[rdi*1+rsi],xmm6
xorps xmm6,xmm14
movups xmm5,XMMWORD[((-80))+rcx]
aesenc xmm6,xmm4
movdqa xmm0,XMMWORD[((160-128))+rax]
paddd xmm0,xmm11
DB 69,15,56,205,227
DB 69,15,56,204,234
movups xmm4,XMMWORD[((-64))+rcx]
aesenc xmm6,xmm5
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movdqa xmm3,xmm12
DB 102,65,15,58,15,219,4
paddd xmm13,xmm3
movups xmm5,XMMWORD[((-48))+rcx]
aesenc xmm6,xmm4
DB 15,56,203,202
movdqa xmm0,XMMWORD[((192-128))+rax]
paddd xmm0,xmm12
DB 69,15,56,205,236
DB 69,15,56,204,211
movups xmm4,XMMWORD[((-32))+rcx]
aesenc xmm6,xmm5
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movdqa xmm3,xmm13
DB 102,65,15,58,15,220,4
paddd xmm10,xmm3
movups xmm5,XMMWORD[((-16))+rcx]
aesenc xmm6,xmm4
DB 15,56,203,202
movdqa xmm0,XMMWORD[((224-128))+rax]
paddd xmm0,xmm13
DB 69,15,56,205,213
DB 69,15,56,204,220
movups xmm4,XMMWORD[rcx]
aesenc xmm6,xmm5
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movdqa xmm3,xmm10
DB 102,65,15,58,15,221,4
paddd xmm11,xmm3
movups xmm5,XMMWORD[16+rcx]
aesenc xmm6,xmm4
DB 15,56,203,202
movdqa xmm0,XMMWORD[((256-128))+rax]
paddd xmm0,xmm10
DB 69,15,56,205,218
DB 69,15,56,204,229
movups xmm4,XMMWORD[32+rcx]
aesenc xmm6,xmm5
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movdqa xmm3,xmm11
DB 102,65,15,58,15,218,4
paddd xmm12,xmm3
movups xmm5,XMMWORD[48+rcx]
aesenc xmm6,xmm4
cmp r11d,11
jb NEAR $L$aesenclast2
movups xmm4,XMMWORD[64+rcx]
aesenc xmm6,xmm5
movups xmm5,XMMWORD[80+rcx]
aesenc xmm6,xmm4
je NEAR $L$aesenclast2
movups xmm4,XMMWORD[96+rcx]
aesenc xmm6,xmm5
movups xmm5,XMMWORD[112+rcx]
aesenc xmm6,xmm4
$L$aesenclast2:
aesenclast xmm6,xmm5
movups xmm4,XMMWORD[((16-112))+rcx]
nop
DB 15,56,203,202
movups xmm14,XMMWORD[32+rdi]
xorps xmm14,xmm15
movups XMMWORD[16+rdi*1+rsi],xmm6
xorps xmm6,xmm14
movups xmm5,XMMWORD[((-80))+rcx]
aesenc xmm6,xmm4
movdqa xmm0,XMMWORD[((288-128))+rax]
paddd xmm0,xmm11
DB 69,15,56,205,227
DB 69,15,56,204,234
movups xmm4,XMMWORD[((-64))+rcx]
aesenc xmm6,xmm5
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movdqa xmm3,xmm12
DB 102,65,15,58,15,219,4
paddd xmm13,xmm3
movups xmm5,XMMWORD[((-48))+rcx]
aesenc xmm6,xmm4
DB 15,56,203,202
movdqa xmm0,XMMWORD[((320-128))+rax]
paddd xmm0,xmm12
DB 69,15,56,205,236
DB 69,15,56,204,211
movups xmm4,XMMWORD[((-32))+rcx]
aesenc xmm6,xmm5
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movdqa xmm3,xmm13
DB 102,65,15,58,15,220,4
paddd xmm10,xmm3
movups xmm5,XMMWORD[((-16))+rcx]
aesenc xmm6,xmm4
DB 15,56,203,202
movdqa xmm0,XMMWORD[((352-128))+rax]
paddd xmm0,xmm13
DB 69,15,56,205,213
DB 69,15,56,204,220
movups xmm4,XMMWORD[rcx]
aesenc xmm6,xmm5
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movdqa xmm3,xmm10
DB 102,65,15,58,15,221,4
paddd xmm11,xmm3
movups xmm5,XMMWORD[16+rcx]
aesenc xmm6,xmm4
DB 15,56,203,202
movdqa xmm0,XMMWORD[((384-128))+rax]
paddd xmm0,xmm10
DB 69,15,56,205,218
DB 69,15,56,204,229
movups xmm4,XMMWORD[32+rcx]
aesenc xmm6,xmm5
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movdqa xmm3,xmm11
DB 102,65,15,58,15,218,4
paddd xmm12,xmm3
movups xmm5,XMMWORD[48+rcx]
aesenc xmm6,xmm4
DB 15,56,203,202
movdqa xmm0,XMMWORD[((416-128))+rax]
paddd xmm0,xmm11
DB 69,15,56,205,227
DB 69,15,56,204,234
cmp r11d,11
jb NEAR $L$aesenclast3
movups xmm4,XMMWORD[64+rcx]
aesenc xmm6,xmm5
movups xmm5,XMMWORD[80+rcx]
aesenc xmm6,xmm4
je NEAR $L$aesenclast3
movups xmm4,XMMWORD[96+rcx]
aesenc xmm6,xmm5
movups xmm5,XMMWORD[112+rcx]
aesenc xmm6,xmm4
$L$aesenclast3:
aesenclast xmm6,xmm5
movups xmm4,XMMWORD[((16-112))+rcx]
nop
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movdqa xmm3,xmm12
DB 102,65,15,58,15,219,4
paddd xmm13,xmm3
movups xmm14,XMMWORD[48+rdi]
xorps xmm14,xmm15
movups XMMWORD[32+rdi*1+rsi],xmm6
xorps xmm6,xmm14
movups xmm5,XMMWORD[((-80))+rcx]
aesenc xmm6,xmm4
movups xmm4,XMMWORD[((-64))+rcx]
aesenc xmm6,xmm5
DB 15,56,203,202
movdqa xmm0,XMMWORD[((448-128))+rax]
paddd xmm0,xmm12
DB 69,15,56,205,236
movdqa xmm3,xmm7
movups xmm5,XMMWORD[((-48))+rcx]
aesenc xmm6,xmm4
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movups xmm4,XMMWORD[((-32))+rcx]
aesenc xmm6,xmm5
DB 15,56,203,202
movdqa xmm0,XMMWORD[((480-128))+rax]
paddd xmm0,xmm13
movups xmm5,XMMWORD[((-16))+rcx]
aesenc xmm6,xmm4
movups xmm4,XMMWORD[rcx]
aesenc xmm6,xmm5
DB 15,56,203,209
pshufd xmm0,xmm0,0x0e
movups xmm5,XMMWORD[16+rcx]
aesenc xmm6,xmm4
DB 15,56,203,202
movups xmm4,XMMWORD[32+rcx]
aesenc xmm6,xmm5
movups xmm5,XMMWORD[48+rcx]
aesenc xmm6,xmm4
cmp r11d,11
jb NEAR $L$aesenclast4
movups xmm4,XMMWORD[64+rcx]
aesenc xmm6,xmm5
movups xmm5,XMMWORD[80+rcx]
aesenc xmm6,xmm4
je NEAR $L$aesenclast4
movups xmm4,XMMWORD[96+rcx]
aesenc xmm6,xmm5
movups xmm5,XMMWORD[112+rcx]
aesenc xmm6,xmm4
$L$aesenclast4:
aesenclast xmm6,xmm5
movups xmm4,XMMWORD[((16-112))+rcx]
nop
paddd xmm2,xmm9
paddd xmm1,xmm8
dec rdx
movups XMMWORD[48+rdi*1+rsi],xmm6
lea rdi,[64+rdi]
jnz NEAR $L$oop_shaext
pshufd xmm2,xmm2,0xb1
pshufd xmm3,xmm1,0x1b
pshufd xmm1,xmm1,0xb1
punpckhqdq xmm1,xmm2
DB 102,15,58,15,211,8
movups XMMWORD[r8],xmm6
movdqu XMMWORD[r9],xmm1
movdqu XMMWORD[16+r9],xmm2
movaps xmm6,XMMWORD[rsp]
movaps xmm7,XMMWORD[16+rsp]
movaps xmm8,XMMWORD[32+rsp]
movaps xmm9,XMMWORD[48+rsp]
movaps xmm10,XMMWORD[64+rsp]
movaps xmm11,XMMWORD[80+rsp]
movaps xmm12,XMMWORD[96+rsp]
movaps xmm13,XMMWORD[112+rsp]
movaps xmm14,XMMWORD[128+rsp]
movaps xmm15,XMMWORD[144+rsp]
lea rsp,[((8+160))+rsp]
$L$epilogue_shaext:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_aesni_cbc_sha256_enc_shaext:
EXTERN __imp_RtlVirtualUnwind
ALIGN 16
se_handler:
push rsi
push rdi
push rbx
push rbp
push r12
push r13
push r14
push r15
pushfq
sub rsp,64
mov rax,QWORD[120+r8]
mov rbx,QWORD[248+r8]
mov rsi,QWORD[8+r9]
mov r11,QWORD[56+r9]
mov r10d,DWORD[r11]
lea r10,[r10*1+rsi]
cmp rbx,r10
jb NEAR $L$in_prologue
mov rax,QWORD[152+r8]
mov r10d,DWORD[4+r11]
lea r10,[r10*1+rsi]
cmp rbx,r10
jae NEAR $L$in_prologue
lea r10,[aesni_cbc_sha256_enc_shaext]
cmp rbx,r10
jb NEAR $L$not_in_shaext
lea rsi,[rax]
lea rdi,[512+r8]
mov ecx,20
DD 0xa548f3fc
lea rax,[168+rax]
jmp NEAR $L$in_prologue
$L$not_in_shaext:
lea r10,[$L$avx2_shortcut]
cmp rbx,r10
jb NEAR $L$not_in_avx2
and rax,-256*4
add rax,448
$L$not_in_avx2:
mov rsi,rax
mov rax,QWORD[((64+56))+rax]
mov rbx,QWORD[((-8))+rax]
mov rbp,QWORD[((-16))+rax]
mov r12,QWORD[((-24))+rax]
mov r13,QWORD[((-32))+rax]
mov r14,QWORD[((-40))+rax]
mov r15,QWORD[((-48))+rax]
mov QWORD[144+r8],rbx
mov QWORD[160+r8],rbp
mov QWORD[216+r8],r12
mov QWORD[224+r8],r13
mov QWORD[232+r8],r14
mov QWORD[240+r8],r15
lea rsi,[((64+64))+rsi]
lea rdi,[512+r8]
mov ecx,20
DD 0xa548f3fc
$L$in_prologue:
mov rdi,QWORD[8+rax]
mov rsi,QWORD[16+rax]
mov QWORD[152+r8],rax
mov QWORD[168+r8],rsi
mov QWORD[176+r8],rdi
mov rdi,QWORD[40+r9]
mov rsi,r8
mov ecx,154
DD 0xa548f3fc
mov rsi,r9
xor rcx,rcx
mov rdx,QWORD[8+rsi]
mov r8,QWORD[rsi]
mov r9,QWORD[16+rsi]
mov r10,QWORD[40+rsi]
lea r11,[56+rsi]
lea r12,[24+rsi]
mov QWORD[32+rsp],r10
mov QWORD[40+rsp],r11
mov QWORD[48+rsp],r12
mov QWORD[56+rsp],rcx
call QWORD[__imp_RtlVirtualUnwind]
mov eax,1
add rsp,64
popfq
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
pop rdi
pop rsi
DB 0F3h,0C3h ;repret
section .pdata rdata align=4
DD $L$SEH_begin_aesni_cbc_sha256_enc_xop wrt ..imagebase
DD $L$SEH_end_aesni_cbc_sha256_enc_xop wrt ..imagebase
DD $L$SEH_info_aesni_cbc_sha256_enc_xop wrt ..imagebase
DD $L$SEH_begin_aesni_cbc_sha256_enc_avx wrt ..imagebase
DD $L$SEH_end_aesni_cbc_sha256_enc_avx wrt ..imagebase
DD $L$SEH_info_aesni_cbc_sha256_enc_avx wrt ..imagebase
DD $L$SEH_begin_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
DD $L$SEH_end_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
DD $L$SEH_info_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
DD $L$SEH_begin_aesni_cbc_sha256_enc_shaext wrt ..imagebase
DD $L$SEH_end_aesni_cbc_sha256_enc_shaext wrt ..imagebase
DD $L$SEH_info_aesni_cbc_sha256_enc_shaext wrt ..imagebase
section .xdata rdata align=8
ALIGN 8
$L$SEH_info_aesni_cbc_sha256_enc_xop:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase
$L$SEH_info_aesni_cbc_sha256_enc_avx:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
$L$SEH_info_aesni_cbc_sha256_enc_avx2:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase
$L$SEH_info_aesni_cbc_sha256_enc_shaext:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$prologue_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase
%else
; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
ret
%endif