internal/utils/min_max_avx2_amd64.s (846 lines of code) (raw):

//+build !noasm !appengine // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT DATA LCDATA1<>+0x000(SB)/8, $0x8080808080808080 DATA LCDATA1<>+0x008(SB)/8, $0x8080808080808080 DATA LCDATA1<>+0x010(SB)/8, $0x8080808080808080 DATA LCDATA1<>+0x018(SB)/8, $0x8080808080808080 DATA LCDATA1<>+0x020(SB)/8, $0x7f7f7f7f7f7f7f7f DATA LCDATA1<>+0x028(SB)/8, $0x7f7f7f7f7f7f7f7f DATA LCDATA1<>+0x030(SB)/8, $0x7f7f7f7f7f7f7f7f DATA LCDATA1<>+0x038(SB)/8, $0x7f7f7f7f7f7f7f7f DATA LCDATA1<>+0x040(SB)/8, $0x7f7f7f7f7f7f7f7f DATA LCDATA1<>+0x048(SB)/8, $0x7f7f7f7f7f7f7f7f DATA LCDATA1<>+0x050(SB)/8, $0x8080808080808080 DATA LCDATA1<>+0x058(SB)/8, $0x8080808080808080 GLOBL LCDATA1<>(SB), 8, $96 TEXT ·_int8_max_min_avx2(SB), $8-32 MOVQ values+0(FP), DI MOVQ length+8(FP), SI MOVQ minout+16(FP), DX MOVQ maxout+24(FP), CX LEAQ LCDATA1<>(SB), BP WORD $0xf685 // test esi, esi JLE LBB0_1 WORD $0x8941; BYTE $0xf1 // mov r9d, esi WORD $0xfe83; BYTE $0x3f // cmp esi, 63 JA LBB0_4 WORD $0xb041; BYTE $0x80 // mov r8b, -128 WORD $0xb640; BYTE $0x7f // mov sil, 127 WORD $0x3145; BYTE $0xd2 // xor r10d, r10d JMP LBB0_11 LBB0_1: WORD $0xb640; BYTE $0x7f // mov sil, 127 WORD $0xb041; BYTE $0x80 // mov r8b, -128 JMP LBB0_12 LBB0_4: WORD $0x8945; BYTE $0xca // mov r10d, r9d LONG $0xc0e28341 // and r10d, -64 LONG $0xc0428d49 // lea rax, [r10 - 64] WORD $0x8949; BYTE $0xc0 // mov r8, rax LONG $0x06e8c149 // shr r8, 6 LONG $0x01c08349 // add r8, 1 WORD $0x8548; BYTE $0xc0 // test rax, rax JE LBB0_5 WORD $0x894c; BYTE $0xc6 // mov rsi, r8 LONG $0xfee68348 // and rsi, -2 WORD $0xf748; BYTE $0xde // neg rsi LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI0_0] */ LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI0_1] */ WORD $0xc031 // xor eax, eax LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1 LBB0_7: LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax] LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32] LONG $0x746ffec5; WORD $0x4007 // vmovdqu ymm6, yword [rdi + rax + 64] LONG $0x7c6ffec5; WORD $0x6007 // vmovdqu ymm7, yword [rdi + rax + 96] LONG $0x387de2c4; BYTE $0xc4 // vpminsb ymm0, ymm0, ymm4 LONG $0x386de2c4; BYTE $0xd5 // vpminsb ymm2, ymm2, ymm5 LONG $0x3c75e2c4; BYTE $0xcc // vpmaxsb ymm1, ymm1, ymm4 LONG $0x3c65e2c4; BYTE $0xdd // vpmaxsb ymm3, ymm3, ymm5 LONG $0x387de2c4; BYTE $0xc6 // vpminsb ymm0, ymm0, ymm6 LONG $0x386de2c4; BYTE $0xd7 // vpminsb ymm2, ymm2, ymm7 LONG $0x3c75e2c4; BYTE $0xce // vpmaxsb ymm1, ymm1, ymm6 LONG $0x3c65e2c4; BYTE $0xdf // vpmaxsb ymm3, ymm3, ymm7 LONG $0x80e88348 // sub rax, -128 LONG $0x02c68348 // add rsi, 2 JNE LBB0_7 LONG $0x01c0f641 // test r8b, 1 JE LBB0_10 LBB0_9: LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax] LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32] LONG $0x3c65e2c4; BYTE $0xdd // vpmaxsb ymm3, ymm3, ymm5 LONG $0x3c75e2c4; BYTE $0xcc // vpmaxsb ymm1, ymm1, ymm4 LONG $0x386de2c4; BYTE $0xd5 // vpminsb ymm2, ymm2, ymm5 LONG $0x387de2c4; BYTE $0xc4 // vpminsb ymm0, ymm0, ymm4 LBB0_10: LONG $0x3c75e2c4; BYTE $0xcb // vpmaxsb ymm1, ymm1, ymm3 LONG $0x397de3c4; WORD $0x01cb // vextracti128 xmm3, ymm1, 1 LONG $0x3c71e2c4; BYTE $0xcb // vpmaxsb xmm1, xmm1, xmm3 LONG $0x4deff1c5; BYTE $0x40 // vpxor xmm1, xmm1, oword 64[rbp] /* [rip + .LCPI0_2] */ LONG $0x387de2c4; BYTE $0xc2 // vpminsb ymm0, ymm0, ymm2 LONG $0xd171e9c5; BYTE $0x08 // vpsrlw xmm2, xmm1, 8 LONG $0xcadaf1c5 // vpminub xmm1, xmm1, xmm2 LONG $0x4179e2c4; BYTE $0xc9 // vphminposuw xmm1, xmm1 LONG $0x7e79c1c4; BYTE $0xc8 // vmovd r8d, xmm1 LONG $0x7ff08041 // xor r8b, 127 LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1 LONG $0x3879e2c4; BYTE $0xc1 // vpminsb xmm0, xmm0, xmm1 LONG $0x45eff9c5; BYTE $0x50 // vpxor xmm0, xmm0, oword 80[rbp] /* [rip + .LCPI0_3] */ LONG $0xd071f1c5; BYTE $0x08 // vpsrlw xmm1, xmm0, 8 LONG $0xc1daf9c5 // vpminub xmm0, xmm0, xmm1 LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0 LONG $0xc67ef9c5 // vmovd esi, xmm0 LONG $0x80f68040 // xor sil, -128 WORD $0x394d; BYTE $0xca // cmp r10, r9 JE LBB0_12 LBB0_11: LONG $0x04b60f42; BYTE $0x17 // movzx eax, byte [rdi + r10] WORD $0x3840; BYTE $0xc6 // cmp sil, al LONG $0xf6b60f40 // movzx esi, sil WORD $0x4f0f; BYTE $0xf0 // cmovg esi, eax WORD $0x3841; BYTE $0xc0 // cmp r8b, al LONG $0xc0b60f45 // movzx r8d, r8b LONG $0xc04c0f44 // cmovl r8d, eax LONG $0x01c28349 // add r10, 1 WORD $0x394d; BYTE $0xd1 // cmp r9, r10 JNE LBB0_11 LBB0_12: WORD $0x8844; BYTE $0x01 // mov byte [rcx], r8b WORD $0x8840; BYTE $0x32 // mov byte [rdx], sil VZEROUPPER RET LBB0_5: LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI0_0] */ LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI0_1] */ WORD $0xc031 // xor eax, eax LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1 LONG $0x01c0f641 // test r8b, 1 JNE LBB0_9 JMP LBB0_10 TEXT ·_uint8_max_min_avx2(SB), NOSPLIT, $0-32 MOVQ values+0(FP), DI MOVQ length+8(FP), SI MOVQ minout+16(FP), DX MOVQ maxout+24(FP), CX WORD $0xf685 // test esi, esi JLE LBB1_1 WORD $0x8941; BYTE $0xf1 // mov r9d, esi WORD $0xfe83; BYTE $0x3f // cmp esi, 63 JA LBB1_4 WORD $0xb640; BYTE $0xff // mov sil, -1 WORD $0x3145; BYTE $0xd2 // xor r10d, r10d WORD $0xc031 // xor eax, eax JMP LBB1_11 LBB1_1: WORD $0xb640; BYTE $0xff // mov sil, -1 WORD $0xc031 // xor eax, eax JMP LBB1_12 LBB1_4: WORD $0x8945; BYTE $0xca // mov r10d, r9d LONG $0xc0e28341 // and r10d, -64 LONG $0xc0428d49 // lea rax, [r10 - 64] WORD $0x8949; BYTE $0xc0 // mov r8, rax LONG $0x06e8c149 // shr r8, 6 LONG $0x01c08349 // add r8, 1 WORD $0x8548; BYTE $0xc0 // test rax, rax JE LBB1_5 WORD $0x894c; BYTE $0xc6 // mov rsi, r8 LONG $0xfee68348 // and rsi, -2 WORD $0xf748; BYTE $0xde // neg rsi LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 WORD $0xc031 // xor eax, eax LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3 LBB1_7: LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax] LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32] LONG $0x746ffec5; WORD $0x4007 // vmovdqu ymm6, yword [rdi + rax + 64] LONG $0x7c6ffec5; WORD $0x6007 // vmovdqu ymm7, yword [rdi + rax + 96] LONG $0xccdaf5c5 // vpminub ymm1, ymm1, ymm4 LONG $0xd5daedc5 // vpminub ymm2, ymm2, ymm5 LONG $0xc4defdc5 // vpmaxub ymm0, ymm0, ymm4 LONG $0xdddee5c5 // vpmaxub ymm3, ymm3, ymm5 LONG $0xcedaf5c5 // vpminub ymm1, ymm1, ymm6 LONG $0xd7daedc5 // vpminub ymm2, ymm2, ymm7 LONG $0xc6defdc5 // vpmaxub ymm0, ymm0, ymm6 LONG $0xdfdee5c5 // vpmaxub ymm3, ymm3, ymm7 LONG $0x80e88348 // sub rax, -128 LONG $0x02c68348 // add rsi, 2 JNE LBB1_7 LONG $0x01c0f641 // test r8b, 1 JE LBB1_10 LBB1_9: LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax] LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32] LONG $0xdddee5c5 // vpmaxub ymm3, ymm3, ymm5 LONG $0xc4defdc5 // vpmaxub ymm0, ymm0, ymm4 LONG $0xd5daedc5 // vpminub ymm2, ymm2, ymm5 LONG $0xccdaf5c5 // vpminub ymm1, ymm1, ymm4 LBB1_10: LONG $0xcadaf5c5 // vpminub ymm1, ymm1, ymm2 LONG $0xc3defdc5 // vpmaxub ymm0, ymm0, ymm3 LONG $0x397de3c4; WORD $0x01c2 // vextracti128 xmm2, ymm0, 1 LONG $0xc2def9c5 // vpmaxub xmm0, xmm0, xmm2 LONG $0xd276e9c5 // vpcmpeqd xmm2, xmm2, xmm2 LONG $0xc2eff9c5 // vpxor xmm0, xmm0, xmm2 LONG $0xd071e9c5; BYTE $0x08 // vpsrlw xmm2, xmm0, 8 LONG $0xc2daf9c5 // vpminub xmm0, xmm0, xmm2 LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0 LONG $0xc07ef9c5 // vmovd eax, xmm0 WORD $0xd0f6 // not al LONG $0x397de3c4; WORD $0x01c8 // vextracti128 xmm0, ymm1, 1 LONG $0xc0daf1c5 // vpminub xmm0, xmm1, xmm0 LONG $0xd071f1c5; BYTE $0x08 // vpsrlw xmm1, xmm0, 8 LONG $0xc1daf9c5 // vpminub xmm0, xmm0, xmm1 LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0 LONG $0xc67ef9c5 // vmovd esi, xmm0 WORD $0x394d; BYTE $0xca // cmp r10, r9 JE LBB1_12 LBB1_11: LONG $0x04b60f46; BYTE $0x17 // movzx r8d, byte [rdi + r10] WORD $0x3844; BYTE $0xc6 // cmp sil, r8b LONG $0xf6b60f40 // movzx esi, sil LONG $0xf0430f41 // cmovae esi, r8d WORD $0x3844; BYTE $0xc0 // cmp al, r8b WORD $0xb60f; BYTE $0xc0 // movzx eax, al LONG $0xc0460f41 // cmovbe eax, r8d LONG $0x01c28349 // add r10, 1 WORD $0x394d; BYTE $0xd1 // cmp r9, r10 JNE LBB1_11 LBB1_12: WORD $0x0188 // mov byte [rcx], al WORD $0x8840; BYTE $0x32 // mov byte [rdx], sil VZEROUPPER RET LBB1_5: LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 WORD $0xc031 // xor eax, eax LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3 LONG $0x01c0f641 // test r8b, 1 JNE LBB1_9 JMP LBB1_10 DATA LCDATA2<>+0x000(SB)/8, $0x8000800080008000 DATA LCDATA2<>+0x008(SB)/8, $0x8000800080008000 DATA LCDATA2<>+0x010(SB)/8, $0x8000800080008000 DATA LCDATA2<>+0x018(SB)/8, $0x8000800080008000 DATA LCDATA2<>+0x020(SB)/8, $0x7fff7fff7fff7fff DATA LCDATA2<>+0x028(SB)/8, $0x7fff7fff7fff7fff DATA LCDATA2<>+0x030(SB)/8, $0x7fff7fff7fff7fff DATA LCDATA2<>+0x038(SB)/8, $0x7fff7fff7fff7fff DATA LCDATA2<>+0x040(SB)/8, $0x7fff7fff7fff7fff DATA LCDATA2<>+0x048(SB)/8, $0x7fff7fff7fff7fff DATA LCDATA2<>+0x050(SB)/8, $0x8000800080008000 DATA LCDATA2<>+0x058(SB)/8, $0x8000800080008000 GLOBL LCDATA2<>(SB), 8, $96 TEXT ·_int16_max_min_avx2(SB), NOSPLIT, $8-32 MOVQ values+0(FP), DI MOVQ length+8(FP), SI MOVQ minout+16(FP), DX MOVQ maxout+24(FP), CX LEAQ LCDATA2<>(SB), BP WORD $0xf685 // test esi, esi JLE LBB2_1 WORD $0x8941; BYTE $0xf1 // mov r9d, esi WORD $0xfe83; BYTE $0x1f // cmp esi, 31 JA LBB2_4 LONG $0x00b84166; BYTE $0x80 // mov r8w, -32768 LONG $0x7fffbe66 // mov si, 32767 WORD $0x3145; BYTE $0xd2 // xor r10d, r10d JMP LBB2_11 LBB2_1: LONG $0x7fffbe66 // mov si, 32767 LONG $0x00b84166; BYTE $0x80 // mov r8w, -32768 JMP LBB2_12 LBB2_4: WORD $0x8945; BYTE $0xca // mov r10d, r9d LONG $0xe0e28341 // and r10d, -32 LONG $0xe0428d49 // lea rax, [r10 - 32] WORD $0x8949; BYTE $0xc0 // mov r8, rax LONG $0x05e8c149 // shr r8, 5 LONG $0x01c08349 // add r8, 1 WORD $0x8548; BYTE $0xc0 // test rax, rax JE LBB2_5 WORD $0x894c; BYTE $0xc6 // mov rsi, r8 LONG $0xfee68348 // and rsi, -2 WORD $0xf748; BYTE $0xde // neg rsi LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI2_0] */ LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI2_1] */ WORD $0xc031 // xor eax, eax LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1 LBB2_7: LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax] LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32] LONG $0x746ffec5; WORD $0x4047 // vmovdqu ymm6, yword [rdi + 2*rax + 64] LONG $0x7c6ffec5; WORD $0x6047 // vmovdqu ymm7, yword [rdi + 2*rax + 96] LONG $0xc4eafdc5 // vpminsw ymm0, ymm0, ymm4 LONG $0xd5eaedc5 // vpminsw ymm2, ymm2, ymm5 LONG $0xcceef5c5 // vpmaxsw ymm1, ymm1, ymm4 LONG $0xddeee5c5 // vpmaxsw ymm3, ymm3, ymm5 LONG $0xc6eafdc5 // vpminsw ymm0, ymm0, ymm6 LONG $0xd7eaedc5 // vpminsw ymm2, ymm2, ymm7 LONG $0xceeef5c5 // vpmaxsw ymm1, ymm1, ymm6 LONG $0xdfeee5c5 // vpmaxsw ymm3, ymm3, ymm7 LONG $0x40c08348 // add rax, 64 LONG $0x02c68348 // add rsi, 2 JNE LBB2_7 LONG $0x01c0f641 // test r8b, 1 JE LBB2_10 LBB2_9: LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax] LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32] LONG $0xddeee5c5 // vpmaxsw ymm3, ymm3, ymm5 LONG $0xcceef5c5 // vpmaxsw ymm1, ymm1, ymm4 LONG $0xd5eaedc5 // vpminsw ymm2, ymm2, ymm5 LONG $0xc4eafdc5 // vpminsw ymm0, ymm0, ymm4 LBB2_10: LONG $0xcbeef5c5 // vpmaxsw ymm1, ymm1, ymm3 LONG $0x397de3c4; WORD $0x01cb // vextracti128 xmm3, ymm1, 1 LONG $0xcbeef1c5 // vpmaxsw xmm1, xmm1, xmm3 LONG $0x4deff1c5; BYTE $0x40 // vpxor xmm1, xmm1, oword 64[rbp] /* [rip + .LCPI2_2] */ LONG $0xc2eafdc5 // vpminsw ymm0, ymm0, ymm2 LONG $0x4179e2c4; BYTE $0xc9 // vphminposuw xmm1, xmm1 LONG $0x7e79c1c4; BYTE $0xc8 // vmovd r8d, xmm1 LONG $0xfff08141; WORD $0x007f; BYTE $0x00 // xor r8d, 32767 LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1 LONG $0xc1eaf9c5 // vpminsw xmm0, xmm0, xmm1 LONG $0x45eff9c5; BYTE $0x50 // vpxor xmm0, xmm0, oword 80[rbp] /* [rip + .LCPI2_3] */ LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0 LONG $0xc67ef9c5 // vmovd esi, xmm0 LONG $0x8000f681; WORD $0x0000 // xor esi, 32768 WORD $0x394d; BYTE $0xca // cmp r10, r9 JE LBB2_12 LBB2_11: LONG $0x04b70f42; BYTE $0x57 // movzx eax, word [rdi + 2*r10] WORD $0x3966; BYTE $0xc6 // cmp si, ax WORD $0x4f0f; BYTE $0xf0 // cmovg esi, eax LONG $0xc0394166 // cmp r8w, ax LONG $0xc04c0f44 // cmovl r8d, eax LONG $0x01c28349 // add r10, 1 WORD $0x394d; BYTE $0xd1 // cmp r9, r10 JNE LBB2_11 LBB2_12: LONG $0x01894466 // mov word [rcx], r8w WORD $0x8966; BYTE $0x32 // mov word [rdx], si VZEROUPPER RET LBB2_5: LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI2_0] */ LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI2_1] */ WORD $0xc031 // xor eax, eax LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1 LONG $0x01c0f641 // test r8b, 1 JNE LBB2_9 JMP LBB2_10 TEXT ·_uint16_max_min_avx2(SB), $0-32 MOVQ values+0(FP), DI MOVQ length+8(FP), SI MOVQ minout+16(FP), DX MOVQ maxout+24(FP), CX WORD $0xf685 // test esi, esi JLE LBB3_1 WORD $0x8941; BYTE $0xf1 // mov r9d, esi WORD $0xfe83; BYTE $0x1f // cmp esi, 31 JA LBB3_4 LONG $0xffb84166; BYTE $0xff // mov r8w, -1 WORD $0x3145; BYTE $0xd2 // xor r10d, r10d WORD $0xf631 // xor esi, esi JMP LBB3_11 LBB3_1: LONG $0xffb84166; BYTE $0xff // mov r8w, -1 WORD $0xf631 // xor esi, esi JMP LBB3_12 LBB3_4: WORD $0x8945; BYTE $0xca // mov r10d, r9d LONG $0xe0e28341 // and r10d, -32 LONG $0xe0428d49 // lea rax, [r10 - 32] WORD $0x8949; BYTE $0xc0 // mov r8, rax LONG $0x05e8c149 // shr r8, 5 LONG $0x01c08349 // add r8, 1 WORD $0x8548; BYTE $0xc0 // test rax, rax JE LBB3_5 WORD $0x894c; BYTE $0xc6 // mov rsi, r8 LONG $0xfee68348 // and rsi, -2 WORD $0xf748; BYTE $0xde // neg rsi LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 WORD $0xc031 // xor eax, eax LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3 LBB3_7: LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax] LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32] LONG $0x746ffec5; WORD $0x4047 // vmovdqu ymm6, yword [rdi + 2*rax + 64] LONG $0x7c6ffec5; WORD $0x6047 // vmovdqu ymm7, yword [rdi + 2*rax + 96] LONG $0x3a75e2c4; BYTE $0xcc // vpminuw ymm1, ymm1, ymm4 LONG $0x3a6de2c4; BYTE $0xd5 // vpminuw ymm2, ymm2, ymm5 LONG $0x3e7de2c4; BYTE $0xc4 // vpmaxuw ymm0, ymm0, ymm4 LONG $0x3e65e2c4; BYTE $0xdd // vpmaxuw ymm3, ymm3, ymm5 LONG $0x3a75e2c4; BYTE $0xce // vpminuw ymm1, ymm1, ymm6 LONG $0x3a6de2c4; BYTE $0xd7 // vpminuw ymm2, ymm2, ymm7 LONG $0x3e7de2c4; BYTE $0xc6 // vpmaxuw ymm0, ymm0, ymm6 LONG $0x3e65e2c4; BYTE $0xdf // vpmaxuw ymm3, ymm3, ymm7 LONG $0x40c08348 // add rax, 64 LONG $0x02c68348 // add rsi, 2 JNE LBB3_7 LONG $0x01c0f641 // test r8b, 1 JE LBB3_10 LBB3_9: LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax] LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32] LONG $0x3e65e2c4; BYTE $0xdd // vpmaxuw ymm3, ymm3, ymm5 LONG $0x3e7de2c4; BYTE $0xc4 // vpmaxuw ymm0, ymm0, ymm4 LONG $0x3a6de2c4; BYTE $0xd5 // vpminuw ymm2, ymm2, ymm5 LONG $0x3a75e2c4; BYTE $0xcc // vpminuw ymm1, ymm1, ymm4 LBB3_10: LONG $0x3a75e2c4; BYTE $0xca // vpminuw ymm1, ymm1, ymm2 LONG $0x3e7de2c4; BYTE $0xc3 // vpmaxuw ymm0, ymm0, ymm3 LONG $0x397de3c4; WORD $0x01c2 // vextracti128 xmm2, ymm0, 1 LONG $0x3e79e2c4; BYTE $0xc2 // vpmaxuw xmm0, xmm0, xmm2 LONG $0xd276e9c5 // vpcmpeqd xmm2, xmm2, xmm2 LONG $0xc2eff9c5 // vpxor xmm0, xmm0, xmm2 LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0 LONG $0xc67ef9c5 // vmovd esi, xmm0 WORD $0xd6f7 // not esi LONG $0x397de3c4; WORD $0x01c8 // vextracti128 xmm0, ymm1, 1 LONG $0x3a71e2c4; BYTE $0xc0 // vpminuw xmm0, xmm1, xmm0 LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0 LONG $0x7e79c1c4; BYTE $0xc0 // vmovd r8d, xmm0 WORD $0x394d; BYTE $0xca // cmp r10, r9 JE LBB3_12 LBB3_11: LONG $0x04b70f42; BYTE $0x57 // movzx eax, word [rdi + 2*r10] LONG $0xc0394166 // cmp r8w, ax LONG $0xc0430f44 // cmovae r8d, eax WORD $0x3966; BYTE $0xc6 // cmp si, ax WORD $0x460f; BYTE $0xf0 // cmovbe esi, eax LONG $0x01c28349 // add r10, 1 WORD $0x394d; BYTE $0xd1 // cmp r9, r10 JNE LBB3_11 LBB3_12: WORD $0x8966; BYTE $0x31 // mov word [rcx], si LONG $0x02894466 // mov word [rdx], r8w VZEROUPPER RET LBB3_5: LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 WORD $0xc031 // xor eax, eax LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3 LONG $0x01c0f641 // test r8b, 1 JNE LBB3_9 JMP LBB3_10 DATA LCDATA3<>+0x000(SB)/8, $0x7fffffff80000000 GLOBL LCDATA3<>(SB), 8, $8 TEXT ·_int32_max_min_avx2(SB), NOSPLIT, $8-32 MOVQ values+0(FP), DI MOVQ length+8(FP), SI MOVQ minout+16(FP), DX MOVQ maxout+24(FP), CX LEAQ LCDATA3<>(SB), BP WORD $0xf685 // test esi, esi JLE LBB4_1 WORD $0x8941; BYTE $0xf0 // mov r8d, esi WORD $0xfe83; BYTE $0x1f // cmp esi, 31 JA LBB4_4 LONG $0x0000ba41; WORD $0x8000 // mov r10d, -2147483648 LONG $0xffffffb8; BYTE $0x7f // mov eax, 2147483647 WORD $0x3145; BYTE $0xc9 // xor r9d, r9d JMP LBB4_7 LBB4_1: LONG $0xffffffb8; BYTE $0x7f // mov eax, 2147483647 LONG $0x000000be; BYTE $0x80 // mov esi, -2147483648 JMP LBB4_8 LBB4_4: WORD $0x8945; BYTE $0xc1 // mov r9d, r8d LONG $0x587de2c4; WORD $0x0065 // vpbroadcastd ymm4, dword 0[rbp] /* [rip + .LCPI4_0] */ LONG $0xe0e18341 // and r9d, -32 LONG $0x587de2c4; WORD $0x0445 // vpbroadcastd ymm0, dword 4[rbp] /* [rip + .LCPI4_1] */ WORD $0xc031 // xor eax, eax LONG $0xc86ffdc5 // vmovdqa ymm1, ymm0 LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 LONG $0xd86ffdc5 // vmovdqa ymm3, ymm0 LONG $0xec6ffdc5 // vmovdqa ymm5, ymm4 LONG $0xf46ffdc5 // vmovdqa ymm6, ymm4 LONG $0xfc6ffdc5 // vmovdqa ymm7, ymm4 LBB4_5: LONG $0x046f7ec5; BYTE $0x87 // vmovdqu ymm8, yword [rdi + 4*rax] LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu ymm9, yword [rdi + 4*rax + 32] LONG $0x546f7ec5; WORD $0x4087 // vmovdqu ymm10, yword [rdi + 4*rax + 64] LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu ymm11, yword [rdi + 4*rax + 96] LONG $0x397dc2c4; BYTE $0xc0 // vpminsd ymm0, ymm0, ymm8 LONG $0x3975c2c4; BYTE $0xc9 // vpminsd ymm1, ymm1, ymm9 LONG $0x396dc2c4; BYTE $0xd2 // vpminsd ymm2, ymm2, ymm10 LONG $0x3965c2c4; BYTE $0xdb // vpminsd ymm3, ymm3, ymm11 LONG $0x3d5dc2c4; BYTE $0xe0 // vpmaxsd ymm4, ymm4, ymm8 LONG $0x3d55c2c4; BYTE $0xe9 // vpmaxsd ymm5, ymm5, ymm9 LONG $0x3d4dc2c4; BYTE $0xf2 // vpmaxsd ymm6, ymm6, ymm10 LONG $0x3d45c2c4; BYTE $0xfb // vpmaxsd ymm7, ymm7, ymm11 LONG $0x20c08348 // add rax, 32 WORD $0x3949; BYTE $0xc1 // cmp r9, rax JNE LBB4_5 LONG $0x3d5de2c4; BYTE $0xe5 // vpmaxsd ymm4, ymm4, ymm5 LONG $0x3d5de2c4; BYTE $0xe6 // vpmaxsd ymm4, ymm4, ymm6 LONG $0x3d5de2c4; BYTE $0xe7 // vpmaxsd ymm4, ymm4, ymm7 LONG $0x397de3c4; WORD $0x01e5 // vextracti128 xmm5, ymm4, 1 LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5 LONG $0xec70f9c5; BYTE $0x4e // vpshufd xmm5, xmm4, 78 LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5 LONG $0xec70f9c5; BYTE $0xe5 // vpshufd xmm5, xmm4, 229 LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5 LONG $0x7e79c1c4; BYTE $0xe2 // vmovd r10d, xmm4 LONG $0x397de2c4; BYTE $0xc1 // vpminsd ymm0, ymm0, ymm1 LONG $0x397de2c4; BYTE $0xc2 // vpminsd ymm0, ymm0, ymm2 LONG $0x397de2c4; BYTE $0xc3 // vpminsd ymm0, ymm0, ymm3 LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1 LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1 LONG $0xc870f9c5; BYTE $0x4e // vpshufd xmm1, xmm0, 78 LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1 LONG $0xc870f9c5; BYTE $0xe5 // vpshufd xmm1, xmm0, 229 LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1 LONG $0xc07ef9c5 // vmovd eax, xmm0 WORD $0x8944; BYTE $0xd6 // mov esi, r10d WORD $0x394d; BYTE $0xc1 // cmp r9, r8 JE LBB4_8 LBB4_7: LONG $0x8f348b42 // mov esi, dword [rdi + 4*r9] WORD $0xf039 // cmp eax, esi WORD $0x4f0f; BYTE $0xc6 // cmovg eax, esi WORD $0x3941; BYTE $0xf2 // cmp r10d, esi LONG $0xf24d0f41 // cmovge esi, r10d LONG $0x01c18349 // add r9, 1 WORD $0x8941; BYTE $0xf2 // mov r10d, esi WORD $0x394d; BYTE $0xc8 // cmp r8, r9 JNE LBB4_7 LBB4_8: WORD $0x3189 // mov dword [rcx], esi WORD $0x0289 // mov dword [rdx], eax VZEROUPPER RET TEXT ·_uint32_max_min_avx2(SB), $0-32 MOVQ values+0(FP), DI MOVQ length+8(FP), SI MOVQ minout+16(FP), DX MOVQ maxout+24(FP), CX WORD $0xf685 // test esi, esi JLE LBB5_1 WORD $0x8941; BYTE $0xf0 // mov r8d, esi WORD $0xfe83; BYTE $0x1f // cmp esi, 31 JA LBB5_4 WORD $0x3145; BYTE $0xc9 // xor r9d, r9d LONG $0xffffffb8; BYTE $0xff // mov eax, -1 WORD $0x3145; BYTE $0xd2 // xor r10d, r10d JMP LBB5_7 LBB5_1: LONG $0xffffffb8; BYTE $0xff // mov eax, -1 WORD $0xf631 // xor esi, esi JMP LBB5_8 LBB5_4: WORD $0x8945; BYTE $0xc1 // mov r9d, r8d LONG $0xe0e18341 // and r9d, -32 LONG $0xe4efd9c5 // vpxor xmm4, xmm4, xmm4 LONG $0xc076fdc5 // vpcmpeqd ymm0, ymm0, ymm0 WORD $0xc031 // xor eax, eax LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 LONG $0xdb76e5c5 // vpcmpeqd ymm3, ymm3, ymm3 LONG $0xedefd1c5 // vpxor xmm5, xmm5, xmm5 LONG $0xf6efc9c5 // vpxor xmm6, xmm6, xmm6 LONG $0xffefc1c5 // vpxor xmm7, xmm7, xmm7 LBB5_5: LONG $0x046f7ec5; BYTE $0x87 // vmovdqu ymm8, yword [rdi + 4*rax] LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu ymm9, yword [rdi + 4*rax + 32] LONG $0x546f7ec5; WORD $0x4087 // vmovdqu ymm10, yword [rdi + 4*rax + 64] LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu ymm11, yword [rdi + 4*rax + 96] LONG $0x3b7dc2c4; BYTE $0xc0 // vpminud ymm0, ymm0, ymm8 LONG $0x3b75c2c4; BYTE $0xc9 // vpminud ymm1, ymm1, ymm9 LONG $0x3b6dc2c4; BYTE $0xd2 // vpminud ymm2, ymm2, ymm10 LONG $0x3b65c2c4; BYTE $0xdb // vpminud ymm3, ymm3, ymm11 LONG $0x3f5dc2c4; BYTE $0xe0 // vpmaxud ymm4, ymm4, ymm8 LONG $0x3f55c2c4; BYTE $0xe9 // vpmaxud ymm5, ymm5, ymm9 LONG $0x3f4dc2c4; BYTE $0xf2 // vpmaxud ymm6, ymm6, ymm10 LONG $0x3f45c2c4; BYTE $0xfb // vpmaxud ymm7, ymm7, ymm11 LONG $0x20c08348 // add rax, 32 WORD $0x3949; BYTE $0xc1 // cmp r9, rax JNE LBB5_5 LONG $0x3f5de2c4; BYTE $0xe5 // vpmaxud ymm4, ymm4, ymm5 LONG $0x3f5de2c4; BYTE $0xe6 // vpmaxud ymm4, ymm4, ymm6 LONG $0x3f5de2c4; BYTE $0xe7 // vpmaxud ymm4, ymm4, ymm7 LONG $0x397de3c4; WORD $0x01e5 // vextracti128 xmm5, ymm4, 1 LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5 LONG $0xec70f9c5; BYTE $0x4e // vpshufd xmm5, xmm4, 78 LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5 LONG $0xec70f9c5; BYTE $0xe5 // vpshufd xmm5, xmm4, 229 LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5 LONG $0x7e79c1c4; BYTE $0xe2 // vmovd r10d, xmm4 LONG $0x3b7de2c4; BYTE $0xc1 // vpminud ymm0, ymm0, ymm1 LONG $0x3b7de2c4; BYTE $0xc2 // vpminud ymm0, ymm0, ymm2 LONG $0x3b7de2c4; BYTE $0xc3 // vpminud ymm0, ymm0, ymm3 LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1 LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1 LONG $0xc870f9c5; BYTE $0x4e // vpshufd xmm1, xmm0, 78 LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1 LONG $0xc870f9c5; BYTE $0xe5 // vpshufd xmm1, xmm0, 229 LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1 LONG $0xc07ef9c5 // vmovd eax, xmm0 WORD $0x8944; BYTE $0xd6 // mov esi, r10d WORD $0x394d; BYTE $0xc1 // cmp r9, r8 JE LBB5_8 LBB5_7: LONG $0x8f348b42 // mov esi, dword [rdi + 4*r9] WORD $0xf039 // cmp eax, esi WORD $0x430f; BYTE $0xc6 // cmovae eax, esi WORD $0x3941; BYTE $0xf2 // cmp r10d, esi LONG $0xf2470f41 // cmova esi, r10d LONG $0x01c18349 // add r9, 1 WORD $0x8941; BYTE $0xf2 // mov r10d, esi WORD $0x394d; BYTE $0xc8 // cmp r8, r9 JNE LBB5_7 LBB5_8: WORD $0x3189 // mov dword [rcx], esi WORD $0x0289 // mov dword [rdx], eax VZEROUPPER RET DATA LCDATA4<>+0x000(SB)/8, $0x8000000000000000 DATA LCDATA4<>+0x008(SB)/8, $0x7fffffffffffffff GLOBL LCDATA4<>(SB), 8, $16 TEXT ·_int64_max_min_avx2(SB), $8-32 MOVQ values+0(FP), DI MOVQ length+8(FP), SI MOVQ minout+16(FP), DX MOVQ maxout+24(FP), CX LEAQ LCDATA4<>(SB), BP QUAD $0xffffffffffffb848; WORD $0x7fff // mov rax, 9223372036854775807 WORD $0xf685 // test esi, esi JLE LBB6_1 WORD $0x8941; BYTE $0xf0 // mov r8d, esi WORD $0xfe83; BYTE $0x0f // cmp esi, 15 JA LBB6_4 LONG $0x01508d4c // lea r10, [rax + 1] WORD $0x3145; BYTE $0xc9 // xor r9d, r9d JMP LBB6_7 LBB6_1: LONG $0x01708d48 // lea rsi, [rax + 1] JMP LBB6_8 LBB6_4: WORD $0x8945; BYTE $0xc1 // mov r9d, r8d LONG $0x597de2c4; WORD $0x0065 // vpbroadcastq ymm4, qword 0[rbp] /* [rip + .LCPI6_0] */ LONG $0xf0e18341 // and r9d, -16 LONG $0x597de2c4; WORD $0x0845 // vpbroadcastq ymm0, qword 8[rbp] /* [rip + .LCPI6_1] */ WORD $0xc031 // xor eax, eax LONG $0xd86ffdc5 // vmovdqa ymm3, ymm0 LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 LONG $0xc86ffdc5 // vmovdqa ymm1, ymm0 LONG $0xfc6ffdc5 // vmovdqa ymm7, ymm4 LONG $0xf46ffdc5 // vmovdqa ymm6, ymm4 LONG $0xec6ffdc5 // vmovdqa ymm5, ymm4 LBB6_5: LONG $0x046f7ec5; BYTE $0xc7 // vmovdqu ymm8, yword [rdi + 8*rax] LONG $0x373d62c4; BYTE $0xc8 // vpcmpgtq ymm9, ymm8, ymm0 LONG $0x4b3de3c4; WORD $0x90c0 // vblendvpd ymm0, ymm8, ymm0, ymm9 LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu ymm9, yword [rdi + 8*rax + 32] LONG $0x373562c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm9, ymm3 LONG $0x4b35e3c4; WORD $0xa0db // vblendvpd ymm3, ymm9, ymm3, ymm10 LONG $0x546f7ec5; WORD $0x40c7 // vmovdqu ymm10, yword [rdi + 8*rax + 64] LONG $0x372d62c4; BYTE $0xda // vpcmpgtq ymm11, ymm10, ymm2 LONG $0x4b2de3c4; WORD $0xb0d2 // vblendvpd ymm2, ymm10, ymm2, ymm11 LONG $0x5c6f7ec5; WORD $0x60c7 // vmovdqu ymm11, yword [rdi + 8*rax + 96] LONG $0x372562c4; BYTE $0xe1 // vpcmpgtq ymm12, ymm11, ymm1 LONG $0x4b25e3c4; WORD $0xc0c9 // vblendvpd ymm1, ymm11, ymm1, ymm12 LONG $0x375d42c4; BYTE $0xe0 // vpcmpgtq ymm12, ymm4, ymm8 LONG $0x4b3de3c4; WORD $0xc0e4 // vblendvpd ymm4, ymm8, ymm4, ymm12 LONG $0x374542c4; BYTE $0xc1 // vpcmpgtq ymm8, ymm7, ymm9 LONG $0x4b35e3c4; WORD $0x80ff // vblendvpd ymm7, ymm9, ymm7, ymm8 LONG $0x374d42c4; BYTE $0xc2 // vpcmpgtq ymm8, ymm6, ymm10 LONG $0x4b2de3c4; WORD $0x80f6 // vblendvpd ymm6, ymm10, ymm6, ymm8 LONG $0x375542c4; BYTE $0xc3 // vpcmpgtq ymm8, ymm5, ymm11 LONG $0x4b25e3c4; WORD $0x80ed // vblendvpd ymm5, ymm11, ymm5, ymm8 LONG $0x10c08348 // add rax, 16 WORD $0x3949; BYTE $0xc1 // cmp r9, rax JNE LBB6_5 LONG $0x375d62c4; BYTE $0xc7 // vpcmpgtq ymm8, ymm4, ymm7 LONG $0x4b45e3c4; WORD $0x80e4 // vblendvpd ymm4, ymm7, ymm4, ymm8 LONG $0x375de2c4; BYTE $0xfe // vpcmpgtq ymm7, ymm4, ymm6 LONG $0x4b4de3c4; WORD $0x70e4 // vblendvpd ymm4, ymm6, ymm4, ymm7 LONG $0x375de2c4; BYTE $0xf5 // vpcmpgtq ymm6, ymm4, ymm5 LONG $0x4b55e3c4; WORD $0x60e4 // vblendvpd ymm4, ymm5, ymm4, ymm6 LONG $0x197de3c4; WORD $0x01e5 // vextractf128 xmm5, ymm4, 1 LONG $0x3759e2c4; BYTE $0xf5 // vpcmpgtq xmm6, xmm4, xmm5 LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd xmm4, xmm5, xmm4, xmm6 LONG $0x0479e3c4; WORD $0x4eec // vpermilps xmm5, xmm4, 78 LONG $0x3759e2c4; BYTE $0xf5 // vpcmpgtq xmm6, xmm4, xmm5 LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd xmm4, xmm5, xmm4, xmm6 LONG $0x7ef9c1c4; BYTE $0xe2 // vmovq r10, xmm4 LONG $0x3765e2c4; BYTE $0xe0 // vpcmpgtq ymm4, ymm3, ymm0 LONG $0x4b65e3c4; WORD $0x40c0 // vblendvpd ymm0, ymm3, ymm0, ymm4 LONG $0x376de2c4; BYTE $0xd8 // vpcmpgtq ymm3, ymm2, ymm0 LONG $0x4b6de3c4; WORD $0x30c0 // vblendvpd ymm0, ymm2, ymm0, ymm3 LONG $0x3775e2c4; BYTE $0xd0 // vpcmpgtq ymm2, ymm1, ymm0 LONG $0x4b75e3c4; WORD $0x20c0 // vblendvpd ymm0, ymm1, ymm0, ymm2 LONG $0x197de3c4; WORD $0x01c1 // vextractf128 xmm1, ymm0, 1 LONG $0x3771e2c4; BYTE $0xd0 // vpcmpgtq xmm2, xmm1, xmm0 LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd xmm0, xmm1, xmm0, xmm2 LONG $0x0479e3c4; WORD $0x4ec8 // vpermilps xmm1, xmm0, 78 LONG $0x3771e2c4; BYTE $0xd0 // vpcmpgtq xmm2, xmm1, xmm0 LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd xmm0, xmm1, xmm0, xmm2 LONG $0x7ef9e1c4; BYTE $0xc0 // vmovq rax, xmm0 WORD $0x894c; BYTE $0xd6 // mov rsi, r10 WORD $0x394d; BYTE $0xc1 // cmp r9, r8 JE LBB6_8 LBB6_7: LONG $0xcf348b4a // mov rsi, qword [rdi + 8*r9] WORD $0x3948; BYTE $0xf0 // cmp rax, rsi LONG $0xc64f0f48 // cmovg rax, rsi WORD $0x3949; BYTE $0xf2 // cmp r10, rsi LONG $0xf24d0f49 // cmovge rsi, r10 LONG $0x01c18349 // add r9, 1 WORD $0x8949; BYTE $0xf2 // mov r10, rsi WORD $0x394d; BYTE $0xc8 // cmp r8, r9 JNE LBB6_7 LBB6_8: WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi WORD $0x8948; BYTE $0x02 // mov qword [rdx], rax VZEROUPPER RET DATA LCDATA5<>+0x000(SB)/8, $0x8000000000000000 GLOBL LCDATA5<>(SB), 8, $8 TEXT ·_uint64_max_min_avx2(SB), $8-32 MOVQ values+0(FP), DI MOVQ length+8(FP), SI MOVQ minout+16(FP), DX MOVQ maxout+24(FP), CX LEAQ LCDATA5<>(SB), BP WORD $0xf685 // test esi, esi JLE LBB7_1 WORD $0x8941; BYTE $0xf0 // mov r8d, esi WORD $0xfe83; BYTE $0x0f // cmp esi, 15 JA LBB7_4 LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov rax, -1 WORD $0x3145; BYTE $0xc9 // xor r9d, r9d WORD $0x3145; BYTE $0xd2 // xor r10d, r10d JMP LBB7_7 LBB7_1: LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov rax, -1 WORD $0xf631 // xor esi, esi JMP LBB7_8 LBB7_4: WORD $0x8945; BYTE $0xc1 // mov r9d, r8d LONG $0xf0e18341 // and r9d, -16 LONG $0xedefd1c5 // vpxor xmm5, xmm5, xmm5 LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 WORD $0xc031 // xor eax, eax LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq ymm0, qword 0[rbp] /* [rip + .LCPI7_0] */ LONG $0xe476ddc5 // vpcmpeqd ymm4, ymm4, ymm4 LONG $0xdb76e5c5 // vpcmpeqd ymm3, ymm3, ymm3 LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 LONG $0xef3941c4; BYTE $0xc0 // vpxor xmm8, xmm8, xmm8 LONG $0xffefc1c5 // vpxor xmm7, xmm7, xmm7 LONG $0xf6efc9c5 // vpxor xmm6, xmm6, xmm6 LBB7_5: LONG $0x0c6f7ec5; BYTE $0xc7 // vmovdqu ymm9, yword [rdi + 8*rax] LONG $0xd0ef75c5 // vpxor ymm10, ymm1, ymm0 LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0 LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10 LONG $0x4b35e3c4; WORD $0xa0c9 // vblendvpd ymm1, ymm9, ymm1, ymm10 LONG $0xd0ef55c5 // vpxor ymm10, ymm5, ymm0 LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11 LONG $0x4b35e3c4; WORD $0xa0ed // vblendvpd ymm5, ymm9, ymm5, ymm10 LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu ymm9, yword [rdi + 8*rax + 32] LONG $0xd0ef5dc5 // vpxor ymm10, ymm4, ymm0 LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0 LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10 LONG $0x4b35e3c4; WORD $0xa0e4 // vblendvpd ymm4, ymm9, ymm4, ymm10 LONG $0xd0ef3dc5 // vpxor ymm10, ymm8, ymm0 LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11 LONG $0x5c6f7ec5; WORD $0x40c7 // vmovdqu ymm11, yword [rdi + 8*rax + 64] LONG $0x4b3543c4; WORD $0xa0c0 // vblendvpd ymm8, ymm9, ymm8, ymm10 LONG $0xc8ef65c5 // vpxor ymm9, ymm3, ymm0 LONG $0xd0ef25c5 // vpxor ymm10, ymm11, ymm0 LONG $0x372d42c4; BYTE $0xc9 // vpcmpgtq ymm9, ymm10, ymm9 LONG $0x4b25e3c4; WORD $0x90db // vblendvpd ymm3, ymm11, ymm3, ymm9 LONG $0xc8ef45c5 // vpxor ymm9, ymm7, ymm0 LONG $0x373542c4; BYTE $0xca // vpcmpgtq ymm9, ymm9, ymm10 LONG $0x4b25e3c4; WORD $0x90ff // vblendvpd ymm7, ymm11, ymm7, ymm9 LONG $0x4c6f7ec5; WORD $0x60c7 // vmovdqu ymm9, yword [rdi + 8*rax + 96] LONG $0xd0ef6dc5 // vpxor ymm10, ymm2, ymm0 LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0 LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10 LONG $0x4b35e3c4; WORD $0xa0d2 // vblendvpd ymm2, ymm9, ymm2, ymm10 LONG $0xd0ef4dc5 // vpxor ymm10, ymm6, ymm0 LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11 LONG $0x4b35e3c4; WORD $0xa0f6 // vblendvpd ymm6, ymm9, ymm6, ymm10 LONG $0x10c08348 // add rax, 16 WORD $0x3949; BYTE $0xc1 // cmp r9, rax JNE LBB7_5 LONG $0xc8ef3dc5 // vpxor ymm9, ymm8, ymm0 LONG $0xd0ef55c5 // vpxor ymm10, ymm5, ymm0 LONG $0x372d42c4; BYTE $0xc9 // vpcmpgtq ymm9, ymm10, ymm9 LONG $0x4b3de3c4; WORD $0x90ed // vblendvpd ymm5, ymm8, ymm5, ymm9 LONG $0xc05755c5 // vxorpd ymm8, ymm5, ymm0 LONG $0xc8ef45c5 // vpxor ymm9, ymm7, ymm0 LONG $0x373d42c4; BYTE $0xc1 // vpcmpgtq ymm8, ymm8, ymm9 LONG $0x4b45e3c4; WORD $0x80ed // vblendvpd ymm5, ymm7, ymm5, ymm8 LONG $0xf857d5c5 // vxorpd ymm7, ymm5, ymm0 LONG $0xc0ef4dc5 // vpxor ymm8, ymm6, ymm0 LONG $0x3745c2c4; BYTE $0xf8 // vpcmpgtq ymm7, ymm7, ymm8 LONG $0x4b4de3c4; WORD $0x70ed // vblendvpd ymm5, ymm6, ymm5, ymm7 LONG $0x197de3c4; WORD $0x01ee // vextractf128 xmm6, ymm5, 1 LONG $0xc05749c5 // vxorpd xmm8, xmm6, xmm0 LONG $0xf857d1c5 // vxorpd xmm7, xmm5, xmm0 LONG $0x3741c2c4; BYTE $0xf8 // vpcmpgtq xmm7, xmm7, xmm8 LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd xmm5, xmm6, xmm5, xmm7 LONG $0x0479e3c4; WORD $0x4ef5 // vpermilps xmm6, xmm5, 78 LONG $0xc05751c5 // vxorpd xmm8, xmm5, xmm0 LONG $0xf857c9c5 // vxorpd xmm7, xmm6, xmm0 LONG $0x3739e2c4; BYTE $0xff // vpcmpgtq xmm7, xmm8, xmm7 LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd xmm5, xmm6, xmm5, xmm7 LONG $0xf0eff5c5 // vpxor ymm6, ymm1, ymm0 LONG $0xf8efddc5 // vpxor ymm7, ymm4, ymm0 LONG $0x3745e2c4; BYTE $0xf6 // vpcmpgtq ymm6, ymm7, ymm6 LONG $0x4b5de3c4; WORD $0x60c9 // vblendvpd ymm1, ymm4, ymm1, ymm6 LONG $0xe057f5c5 // vxorpd ymm4, ymm1, ymm0 LONG $0xf0efe5c5 // vpxor ymm6, ymm3, ymm0 LONG $0x374de2c4; BYTE $0xe4 // vpcmpgtq ymm4, ymm6, ymm4 LONG $0x4b65e3c4; WORD $0x40c9 // vblendvpd ymm1, ymm3, ymm1, ymm4 LONG $0x7ef9c1c4; BYTE $0xea // vmovq r10, xmm5 LONG $0xd857f5c5 // vxorpd ymm3, ymm1, ymm0 LONG $0xe0efedc5 // vpxor ymm4, ymm2, ymm0 LONG $0x375de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm4, ymm3 LONG $0x4b6de3c4; WORD $0x30c9 // vblendvpd ymm1, ymm2, ymm1, ymm3 LONG $0x197de3c4; WORD $0x01ca // vextractf128 xmm2, ymm1, 1 LONG $0xd857f1c5 // vxorpd xmm3, xmm1, xmm0 LONG $0xe057e9c5 // vxorpd xmm4, xmm2, xmm0 LONG $0x3759e2c4; BYTE $0xdb // vpcmpgtq xmm3, xmm4, xmm3 LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd xmm1, xmm2, xmm1, xmm3 LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps xmm2, xmm1, 78 LONG $0xd857f1c5 // vxorpd xmm3, xmm1, xmm0 LONG $0xc057e9c5 // vxorpd xmm0, xmm2, xmm0 LONG $0x3779e2c4; BYTE $0xc3 // vpcmpgtq xmm0, xmm0, xmm3 LONG $0x4b69e3c4; WORD $0x00c1 // vblendvpd xmm0, xmm2, xmm1, xmm0 LONG $0x7ef9e1c4; BYTE $0xc0 // vmovq rax, xmm0 WORD $0x894c; BYTE $0xd6 // mov rsi, r10 WORD $0x394d; BYTE $0xc1 // cmp r9, r8 JE LBB7_8 LBB7_7: LONG $0xcf348b4a // mov rsi, qword [rdi + 8*r9] WORD $0x3948; BYTE $0xf0 // cmp rax, rsi LONG $0xc6430f48 // cmovae rax, rsi WORD $0x3949; BYTE $0xf2 // cmp r10, rsi LONG $0xf2470f49 // cmova rsi, r10 LONG $0x01c18349 // add r9, 1 WORD $0x8949; BYTE $0xf2 // mov r10, rsi WORD $0x394d; BYTE $0xc8 // cmp r8, r9 JNE LBB7_7 LBB7_8: WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi WORD $0x8948; BYTE $0x02 // mov qword [rdx], rax VZEROUPPER RET