parquet/internal/utils/unpack_bool_sse4_amd64.s (82 lines of code) (raw):

//+build !noasm !appengine // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT TEXT ·_bytes_to_bools_sse4(SB), $0-32 MOVQ in+0(FP), DI MOVQ len+8(FP), SI MOVQ out+16(FP), DX MOVQ outlen+24(FP), CX WORD $0xf685 // test esi, esi JLE LBB0_5 WORD $0x8941; BYTE $0xf0 // mov r8d, esi LONG $0x03e0c149 // shl r8, 3 WORD $0x3145; BYTE $0xd2 // xor r10d, r10d JMP LBB0_2 LBB0_4: LONG $0x08c28349 // add r10, 8 LONG $0x01c78348 // add rdi, 1 WORD $0x394d; BYTE $0xd0 // cmp r8, r10 JE LBB0_5 LBB0_2: WORD $0x3941; BYTE $0xca // cmp r10d, ecx JGE LBB0_4 WORD $0x8945; BYTE $0xd1 // mov r9d, r10d WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] WORD $0x0124 // and al, 1 LONG $0x0a048842 // mov byte [rdx + r9], al WORD $0x894c; BYTE $0xce // mov rsi, r9 LONG $0x01ce8348 // or rsi, 1 WORD $0xce39 // cmp esi, ecx JGE LBB0_4 WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] WORD $0xe8d0 // shr al, 1 WORD $0x0124 // and al, 1 WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al WORD $0x894c; BYTE $0xce // mov rsi, r9 LONG $0x02ce8348 // or rsi, 2 WORD $0xce39 // cmp esi, ecx JGE LBB0_4 WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] WORD $0xe8c0; BYTE $0x02 // shr al, 2 WORD $0x0124 // and al, 1 WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al WORD $0x894c; BYTE $0xce // mov rsi, r9 LONG $0x03ce8348 // or rsi, 3 WORD $0xce39 // cmp esi, ecx JGE LBB0_4 WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] WORD $0xe8c0; BYTE $0x03 // shr al, 3 WORD $0x0124 // and al, 1 WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al WORD $0x894c; BYTE $0xce // mov rsi, r9 LONG $0x04ce8348 // or rsi, 4 WORD $0xce39 // cmp esi, ecx JGE LBB0_4 WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] WORD $0xe8c0; BYTE $0x04 // shr al, 4 WORD $0x0124 // and al, 1 WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al WORD $0x894c; BYTE $0xce // mov rsi, r9 LONG $0x05ce8348 // or rsi, 5 WORD $0xce39 // cmp esi, ecx JGE LBB0_4 WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] WORD $0xe8c0; BYTE $0x05 // shr al, 5 WORD $0x0124 // and al, 1 WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al WORD $0x894c; BYTE $0xce // mov rsi, r9 LONG $0x06ce8348 // or rsi, 6 WORD $0xce39 // cmp esi, ecx JGE LBB0_4 WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] WORD $0xe8c0; BYTE $0x06 // shr al, 6 WORD $0x0124 // and al, 1 WORD $0x0488; BYTE $0x32 // mov byte [rdx + rsi], al LONG $0x07c98349 // or r9, 7 WORD $0x3941; BYTE $0xc9 // cmp r9d, ecx JGE LBB0_4 WORD $0xb60f; BYTE $0x07 // movzx eax, byte [rdi] WORD $0xe8c0; BYTE $0x07 // shr al, 7 LONG $0x0a048842 // mov byte [rdx + r9], al JMP LBB0_4 LBB0_5: RET