parquet/internal/utils/unpack_bool_avx2_amd64.s (82 lines of code) (raw):
//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
TEXT ·_bytes_to_bools_avx2(SB), $0-32
MOVQ in+0(FP), DI
MOVQ len+8(FP), SI
MOVQ out+16(FP), DX
MOVQ outlen+24(FP), CX
WORD $0xf685 // test esi, esi
JLE LBB0_5
WORD $0x8941; BYTE $0xf0 // mov r8d, esi
LONG $0x03e0c149 // shl r8, 3
WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
JMP LBB0_2
LBB0_4:
LONG $0x08c28349 // add r10, 8
LONG $0x01c78348 // add rdi, 1
WORD $0x394d; BYTE $0xd0 // cmp r8, r10
JE LBB0_5
LBB0_2:
WORD $0x3941; BYTE $0xca // cmp r10d, ecx
JGE LBB0_4
WORD $0x8945; BYTE $0xd1 // mov r9d, r10d
WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi]
WORD $0x0124 // and al, 1
LONG $0x0a048842 // mov byte [rdx + r9], al
WORD $0x894c; BYTE $0xce // mov rsi, r9
LONG $0x01ce8348 // or rsi, 1
WORD $0xce39 // cmp esi, ecx
JGE LBB0_4
WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi]
WORD $0xe8d0 // shr al, 1
WORD $0x0124 // and al, 1
WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al
WORD $0x894c; BYTE $0xce // mov rsi, r9
LONG $0x02ce8348 // or rsi, 2
WORD $0xce39 // cmp esi, ecx
JGE LBB0_4
WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi]
WORD $0xe8c0; BYTE $0x02 // shr al, 2
WORD $0x0124 // and al, 1
WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al
WORD $0x894c; BYTE $0xce // mov rsi, r9
LONG $0x03ce8348 // or rsi, 3
WORD $0xce39 // cmp esi, ecx
JGE LBB0_4
WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi]
WORD $0xe8c0; BYTE $0x03 // shr al, 3
WORD $0x0124 // and al, 1
WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al
WORD $0x894c; BYTE $0xce // mov rsi, r9
LONG $0x04ce8348 // or rsi, 4
WORD $0xce39 // cmp esi, ecx
JGE LBB0_4
WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi]
WORD $0xe8c0; BYTE $0x04 // shr al, 4
WORD $0x0124 // and al, 1
WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al
WORD $0x894c; BYTE $0xce // mov rsi, r9
LONG $0x05ce8348 // or rsi, 5
WORD $0xce39 // cmp esi, ecx
JGE LBB0_4
WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi]
WORD $0xe8c0; BYTE $0x05 // shr al, 5
WORD $0x0124 // and al, 1
WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al
WORD $0x894c; BYTE $0xce // mov rsi, r9
LONG $0x06ce8348 // or rsi, 6
WORD $0xce39 // cmp esi, ecx
JGE LBB0_4
WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi]
WORD $0xe8c0; BYTE $0x06 // shr al, 6
WORD $0x0124 // and al, 1
WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al
LONG $0x07c98349 // or r9, 7
WORD $0x3941; BYTE $0xc9 // cmp r9d, ecx
JGE LBB0_4
WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi]
WORD $0xe8c0; BYTE $0x07 // shr al, 7
LONG $0x0a048842 // mov byte [rdx + r9], al
JMP LBB0_4
LBB0_5:
RET