internal/utils/min_max_avx2_amd64.s (846 lines of code) (raw):
//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
DATA LCDATA1<>+0x000(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x008(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x010(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x018(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x020(SB)/8, $0x7f7f7f7f7f7f7f7f
DATA LCDATA1<>+0x028(SB)/8, $0x7f7f7f7f7f7f7f7f
DATA LCDATA1<>+0x030(SB)/8, $0x7f7f7f7f7f7f7f7f
DATA LCDATA1<>+0x038(SB)/8, $0x7f7f7f7f7f7f7f7f
DATA LCDATA1<>+0x040(SB)/8, $0x7f7f7f7f7f7f7f7f
DATA LCDATA1<>+0x048(SB)/8, $0x7f7f7f7f7f7f7f7f
DATA LCDATA1<>+0x050(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x058(SB)/8, $0x8080808080808080
GLOBL LCDATA1<>(SB), 8, $96
TEXT ·_int8_max_min_avx2(SB), $8-32
MOVQ values+0(FP), DI
MOVQ length+8(FP), SI
MOVQ minout+16(FP), DX
MOVQ maxout+24(FP), CX
LEAQ LCDATA1<>(SB), BP
WORD $0xf685 // test esi, esi
JLE LBB0_1
WORD $0x8941; BYTE $0xf1 // mov r9d, esi
WORD $0xfe83; BYTE $0x3f // cmp esi, 63
JA LBB0_4
WORD $0xb041; BYTE $0x80 // mov r8b, -128
WORD $0xb640; BYTE $0x7f // mov sil, 127
WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
JMP LBB0_11
LBB0_1:
WORD $0xb640; BYTE $0x7f // mov sil, 127
WORD $0xb041; BYTE $0x80 // mov r8b, -128
JMP LBB0_12
LBB0_4:
WORD $0x8945; BYTE $0xca // mov r10d, r9d
LONG $0xc0e28341 // and r10d, -64
LONG $0xc0428d49 // lea rax, [r10 - 64]
WORD $0x8949; BYTE $0xc0 // mov r8, rax
LONG $0x06e8c149 // shr r8, 6
LONG $0x01c08349 // add r8, 1
WORD $0x8548; BYTE $0xc0 // test rax, rax
JE LBB0_5
WORD $0x894c; BYTE $0xc6 // mov rsi, r8
LONG $0xfee68348 // and rsi, -2
WORD $0xf748; BYTE $0xde // neg rsi
LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI0_0] */
LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI0_1] */
WORD $0xc031 // xor eax, eax
LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0
LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1
LBB0_7:
LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax]
LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32]
LONG $0x746ffec5; WORD $0x4007 // vmovdqu ymm6, yword [rdi + rax + 64]
LONG $0x7c6ffec5; WORD $0x6007 // vmovdqu ymm7, yword [rdi + rax + 96]
LONG $0x387de2c4; BYTE $0xc4 // vpminsb ymm0, ymm0, ymm4
LONG $0x386de2c4; BYTE $0xd5 // vpminsb ymm2, ymm2, ymm5
LONG $0x3c75e2c4; BYTE $0xcc // vpmaxsb ymm1, ymm1, ymm4
LONG $0x3c65e2c4; BYTE $0xdd // vpmaxsb ymm3, ymm3, ymm5
LONG $0x387de2c4; BYTE $0xc6 // vpminsb ymm0, ymm0, ymm6
LONG $0x386de2c4; BYTE $0xd7 // vpminsb ymm2, ymm2, ymm7
LONG $0x3c75e2c4; BYTE $0xce // vpmaxsb ymm1, ymm1, ymm6
LONG $0x3c65e2c4; BYTE $0xdf // vpmaxsb ymm3, ymm3, ymm7
LONG $0x80e88348 // sub rax, -128
LONG $0x02c68348 // add rsi, 2
JNE LBB0_7
LONG $0x01c0f641 // test r8b, 1
JE LBB0_10
LBB0_9:
LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax]
LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32]
LONG $0x3c65e2c4; BYTE $0xdd // vpmaxsb ymm3, ymm3, ymm5
LONG $0x3c75e2c4; BYTE $0xcc // vpmaxsb ymm1, ymm1, ymm4
LONG $0x386de2c4; BYTE $0xd5 // vpminsb ymm2, ymm2, ymm5
LONG $0x387de2c4; BYTE $0xc4 // vpminsb ymm0, ymm0, ymm4
LBB0_10:
LONG $0x3c75e2c4; BYTE $0xcb // vpmaxsb ymm1, ymm1, ymm3
LONG $0x397de3c4; WORD $0x01cb // vextracti128 xmm3, ymm1, 1
LONG $0x3c71e2c4; BYTE $0xcb // vpmaxsb xmm1, xmm1, xmm3
LONG $0x4deff1c5; BYTE $0x40 // vpxor xmm1, xmm1, oword 64[rbp] /* [rip + .LCPI0_2] */
LONG $0x387de2c4; BYTE $0xc2 // vpminsb ymm0, ymm0, ymm2
LONG $0xd171e9c5; BYTE $0x08 // vpsrlw xmm2, xmm1, 8
LONG $0xcadaf1c5 // vpminub xmm1, xmm1, xmm2
LONG $0x4179e2c4; BYTE $0xc9 // vphminposuw xmm1, xmm1
LONG $0x7e79c1c4; BYTE $0xc8 // vmovd r8d, xmm1
LONG $0x7ff08041 // xor r8b, 127
LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1
LONG $0x3879e2c4; BYTE $0xc1 // vpminsb xmm0, xmm0, xmm1
LONG $0x45eff9c5; BYTE $0x50 // vpxor xmm0, xmm0, oword 80[rbp] /* [rip + .LCPI0_3] */
LONG $0xd071f1c5; BYTE $0x08 // vpsrlw xmm1, xmm0, 8
LONG $0xc1daf9c5 // vpminub xmm0, xmm0, xmm1
LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0
LONG $0xc67ef9c5 // vmovd esi, xmm0
LONG $0x80f68040 // xor sil, -128
WORD $0x394d; BYTE $0xca // cmp r10, r9
JE LBB0_12
LBB0_11:
LONG $0x04b60f42; BYTE $0x17 // movzx eax, byte [rdi + r10]
WORD $0x3840; BYTE $0xc6 // cmp sil, al
LONG $0xf6b60f40 // movzx esi, sil
WORD $0x4f0f; BYTE $0xf0 // cmovg esi, eax
WORD $0x3841; BYTE $0xc0 // cmp r8b, al
LONG $0xc0b60f45 // movzx r8d, r8b
LONG $0xc04c0f44 // cmovl r8d, eax
LONG $0x01c28349 // add r10, 1
WORD $0x394d; BYTE $0xd1 // cmp r9, r10
JNE LBB0_11
LBB0_12:
WORD $0x8844; BYTE $0x01 // mov byte [rcx], r8b
WORD $0x8840; BYTE $0x32 // mov byte [rdx], sil
VZEROUPPER
RET
LBB0_5:
LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI0_0] */
LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI0_1] */
WORD $0xc031 // xor eax, eax
LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0
LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1
LONG $0x01c0f641 // test r8b, 1
JNE LBB0_9
JMP LBB0_10
TEXT ·_uint8_max_min_avx2(SB), NOSPLIT, $0-32
MOVQ values+0(FP), DI
MOVQ length+8(FP), SI
MOVQ minout+16(FP), DX
MOVQ maxout+24(FP), CX
WORD $0xf685 // test esi, esi
JLE LBB1_1
WORD $0x8941; BYTE $0xf1 // mov r9d, esi
WORD $0xfe83; BYTE $0x3f // cmp esi, 63
JA LBB1_4
WORD $0xb640; BYTE $0xff // mov sil, -1
WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
WORD $0xc031 // xor eax, eax
JMP LBB1_11
LBB1_1:
WORD $0xb640; BYTE $0xff // mov sil, -1
WORD $0xc031 // xor eax, eax
JMP LBB1_12
LBB1_4:
WORD $0x8945; BYTE $0xca // mov r10d, r9d
LONG $0xc0e28341 // and r10d, -64
LONG $0xc0428d49 // lea rax, [r10 - 64]
WORD $0x8949; BYTE $0xc0 // mov r8, rax
LONG $0x06e8c149 // shr r8, 6
LONG $0x01c08349 // add r8, 1
WORD $0x8548; BYTE $0xc0 // test rax, rax
JE LBB1_5
WORD $0x894c; BYTE $0xc6 // mov rsi, r8
LONG $0xfee68348 // and rsi, -2
WORD $0xf748; BYTE $0xde // neg rsi
LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0
LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1
WORD $0xc031 // xor eax, eax
LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2
LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3
LBB1_7:
LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax]
LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32]
LONG $0x746ffec5; WORD $0x4007 // vmovdqu ymm6, yword [rdi + rax + 64]
LONG $0x7c6ffec5; WORD $0x6007 // vmovdqu ymm7, yword [rdi + rax + 96]
LONG $0xccdaf5c5 // vpminub ymm1, ymm1, ymm4
LONG $0xd5daedc5 // vpminub ymm2, ymm2, ymm5
LONG $0xc4defdc5 // vpmaxub ymm0, ymm0, ymm4
LONG $0xdddee5c5 // vpmaxub ymm3, ymm3, ymm5
LONG $0xcedaf5c5 // vpminub ymm1, ymm1, ymm6
LONG $0xd7daedc5 // vpminub ymm2, ymm2, ymm7
LONG $0xc6defdc5 // vpmaxub ymm0, ymm0, ymm6
LONG $0xdfdee5c5 // vpmaxub ymm3, ymm3, ymm7
LONG $0x80e88348 // sub rax, -128
LONG $0x02c68348 // add rsi, 2
JNE LBB1_7
LONG $0x01c0f641 // test r8b, 1
JE LBB1_10
LBB1_9:
LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax]
LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32]
LONG $0xdddee5c5 // vpmaxub ymm3, ymm3, ymm5
LONG $0xc4defdc5 // vpmaxub ymm0, ymm0, ymm4
LONG $0xd5daedc5 // vpminub ymm2, ymm2, ymm5
LONG $0xccdaf5c5 // vpminub ymm1, ymm1, ymm4
LBB1_10:
LONG $0xcadaf5c5 // vpminub ymm1, ymm1, ymm2
LONG $0xc3defdc5 // vpmaxub ymm0, ymm0, ymm3
LONG $0x397de3c4; WORD $0x01c2 // vextracti128 xmm2, ymm0, 1
LONG $0xc2def9c5 // vpmaxub xmm0, xmm0, xmm2
LONG $0xd276e9c5 // vpcmpeqd xmm2, xmm2, xmm2
LONG $0xc2eff9c5 // vpxor xmm0, xmm0, xmm2
LONG $0xd071e9c5; BYTE $0x08 // vpsrlw xmm2, xmm0, 8
LONG $0xc2daf9c5 // vpminub xmm0, xmm0, xmm2
LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0
LONG $0xc07ef9c5 // vmovd eax, xmm0
WORD $0xd0f6 // not al
LONG $0x397de3c4; WORD $0x01c8 // vextracti128 xmm0, ymm1, 1
LONG $0xc0daf1c5 // vpminub xmm0, xmm1, xmm0
LONG $0xd071f1c5; BYTE $0x08 // vpsrlw xmm1, xmm0, 8
LONG $0xc1daf9c5 // vpminub xmm0, xmm0, xmm1
LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0
LONG $0xc67ef9c5 // vmovd esi, xmm0
WORD $0x394d; BYTE $0xca // cmp r10, r9
JE LBB1_12
LBB1_11:
LONG $0x04b60f46; BYTE $0x17 // movzx r8d, byte [rdi + r10]
WORD $0x3844; BYTE $0xc6 // cmp sil, r8b
LONG $0xf6b60f40 // movzx esi, sil
LONG $0xf0430f41 // cmovae esi, r8d
WORD $0x3844; BYTE $0xc0 // cmp al, r8b
WORD $0xb60f; BYTE $0xc0 // movzx eax, al
LONG $0xc0460f41 // cmovbe eax, r8d
LONG $0x01c28349 // add r10, 1
WORD $0x394d; BYTE $0xd1 // cmp r9, r10
JNE LBB1_11
LBB1_12:
WORD $0x0188 // mov byte [rcx], al
WORD $0x8840; BYTE $0x32 // mov byte [rdx], sil
VZEROUPPER
RET
LBB1_5:
LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0
LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1
WORD $0xc031 // xor eax, eax
LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2
LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3
LONG $0x01c0f641 // test r8b, 1
JNE LBB1_9
JMP LBB1_10
DATA LCDATA2<>+0x000(SB)/8, $0x8000800080008000
DATA LCDATA2<>+0x008(SB)/8, $0x8000800080008000
DATA LCDATA2<>+0x010(SB)/8, $0x8000800080008000
DATA LCDATA2<>+0x018(SB)/8, $0x8000800080008000
DATA LCDATA2<>+0x020(SB)/8, $0x7fff7fff7fff7fff
DATA LCDATA2<>+0x028(SB)/8, $0x7fff7fff7fff7fff
DATA LCDATA2<>+0x030(SB)/8, $0x7fff7fff7fff7fff
DATA LCDATA2<>+0x038(SB)/8, $0x7fff7fff7fff7fff
DATA LCDATA2<>+0x040(SB)/8, $0x7fff7fff7fff7fff
DATA LCDATA2<>+0x048(SB)/8, $0x7fff7fff7fff7fff
DATA LCDATA2<>+0x050(SB)/8, $0x8000800080008000
DATA LCDATA2<>+0x058(SB)/8, $0x8000800080008000
GLOBL LCDATA2<>(SB), 8, $96
TEXT ·_int16_max_min_avx2(SB), NOSPLIT, $8-32
MOVQ values+0(FP), DI
MOVQ length+8(FP), SI
MOVQ minout+16(FP), DX
MOVQ maxout+24(FP), CX
LEAQ LCDATA2<>(SB), BP
WORD $0xf685 // test esi, esi
JLE LBB2_1
WORD $0x8941; BYTE $0xf1 // mov r9d, esi
WORD $0xfe83; BYTE $0x1f // cmp esi, 31
JA LBB2_4
LONG $0x00b84166; BYTE $0x80 // mov r8w, -32768
LONG $0x7fffbe66 // mov si, 32767
WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
JMP LBB2_11
LBB2_1:
LONG $0x7fffbe66 // mov si, 32767
LONG $0x00b84166; BYTE $0x80 // mov r8w, -32768
JMP LBB2_12
LBB2_4:
WORD $0x8945; BYTE $0xca // mov r10d, r9d
LONG $0xe0e28341 // and r10d, -32
LONG $0xe0428d49 // lea rax, [r10 - 32]
WORD $0x8949; BYTE $0xc0 // mov r8, rax
LONG $0x05e8c149 // shr r8, 5
LONG $0x01c08349 // add r8, 1
WORD $0x8548; BYTE $0xc0 // test rax, rax
JE LBB2_5
WORD $0x894c; BYTE $0xc6 // mov rsi, r8
LONG $0xfee68348 // and rsi, -2
WORD $0xf748; BYTE $0xde // neg rsi
LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI2_0] */
LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI2_1] */
WORD $0xc031 // xor eax, eax
LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0
LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1
LBB2_7:
LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax]
LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32]
LONG $0x746ffec5; WORD $0x4047 // vmovdqu ymm6, yword [rdi + 2*rax + 64]
LONG $0x7c6ffec5; WORD $0x6047 // vmovdqu ymm7, yword [rdi + 2*rax + 96]
LONG $0xc4eafdc5 // vpminsw ymm0, ymm0, ymm4
LONG $0xd5eaedc5 // vpminsw ymm2, ymm2, ymm5
LONG $0xcceef5c5 // vpmaxsw ymm1, ymm1, ymm4
LONG $0xddeee5c5 // vpmaxsw ymm3, ymm3, ymm5
LONG $0xc6eafdc5 // vpminsw ymm0, ymm0, ymm6
LONG $0xd7eaedc5 // vpminsw ymm2, ymm2, ymm7
LONG $0xceeef5c5 // vpmaxsw ymm1, ymm1, ymm6
LONG $0xdfeee5c5 // vpmaxsw ymm3, ymm3, ymm7
LONG $0x40c08348 // add rax, 64
LONG $0x02c68348 // add rsi, 2
JNE LBB2_7
LONG $0x01c0f641 // test r8b, 1
JE LBB2_10
LBB2_9:
LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax]
LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32]
LONG $0xddeee5c5 // vpmaxsw ymm3, ymm3, ymm5
LONG $0xcceef5c5 // vpmaxsw ymm1, ymm1, ymm4
LONG $0xd5eaedc5 // vpminsw ymm2, ymm2, ymm5
LONG $0xc4eafdc5 // vpminsw ymm0, ymm0, ymm4
LBB2_10:
LONG $0xcbeef5c5 // vpmaxsw ymm1, ymm1, ymm3
LONG $0x397de3c4; WORD $0x01cb // vextracti128 xmm3, ymm1, 1
LONG $0xcbeef1c5 // vpmaxsw xmm1, xmm1, xmm3
LONG $0x4deff1c5; BYTE $0x40 // vpxor xmm1, xmm1, oword 64[rbp] /* [rip + .LCPI2_2] */
LONG $0xc2eafdc5 // vpminsw ymm0, ymm0, ymm2
LONG $0x4179e2c4; BYTE $0xc9 // vphminposuw xmm1, xmm1
LONG $0x7e79c1c4; BYTE $0xc8 // vmovd r8d, xmm1
LONG $0xfff08141; WORD $0x007f; BYTE $0x00 // xor r8d, 32767
LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1
LONG $0xc1eaf9c5 // vpminsw xmm0, xmm0, xmm1
LONG $0x45eff9c5; BYTE $0x50 // vpxor xmm0, xmm0, oword 80[rbp] /* [rip + .LCPI2_3] */
LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0
LONG $0xc67ef9c5 // vmovd esi, xmm0
LONG $0x8000f681; WORD $0x0000 // xor esi, 32768
WORD $0x394d; BYTE $0xca // cmp r10, r9
JE LBB2_12
LBB2_11:
LONG $0x04b70f42; BYTE $0x57 // movzx eax, word [rdi + 2*r10]
WORD $0x3966; BYTE $0xc6 // cmp si, ax
WORD $0x4f0f; BYTE $0xf0 // cmovg esi, eax
LONG $0xc0394166 // cmp r8w, ax
LONG $0xc04c0f44 // cmovl r8d, eax
LONG $0x01c28349 // add r10, 1
WORD $0x394d; BYTE $0xd1 // cmp r9, r10
JNE LBB2_11
LBB2_12:
LONG $0x01894466 // mov word [rcx], r8w
WORD $0x8966; BYTE $0x32 // mov word [rdx], si
VZEROUPPER
RET
LBB2_5:
LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI2_0] */
LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI2_1] */
WORD $0xc031 // xor eax, eax
LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0
LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1
LONG $0x01c0f641 // test r8b, 1
JNE LBB2_9
JMP LBB2_10
TEXT ·_uint16_max_min_avx2(SB), $0-32
MOVQ values+0(FP), DI
MOVQ length+8(FP), SI
MOVQ minout+16(FP), DX
MOVQ maxout+24(FP), CX
WORD $0xf685 // test esi, esi
JLE LBB3_1
WORD $0x8941; BYTE $0xf1 // mov r9d, esi
WORD $0xfe83; BYTE $0x1f // cmp esi, 31
JA LBB3_4
LONG $0xffb84166; BYTE $0xff // mov r8w, -1
WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
WORD $0xf631 // xor esi, esi
JMP LBB3_11
LBB3_1:
LONG $0xffb84166; BYTE $0xff // mov r8w, -1
WORD $0xf631 // xor esi, esi
JMP LBB3_12
LBB3_4:
WORD $0x8945; BYTE $0xca // mov r10d, r9d
LONG $0xe0e28341 // and r10d, -32
LONG $0xe0428d49 // lea rax, [r10 - 32]
WORD $0x8949; BYTE $0xc0 // mov r8, rax
LONG $0x05e8c149 // shr r8, 5
LONG $0x01c08349 // add r8, 1
WORD $0x8548; BYTE $0xc0 // test rax, rax
JE LBB3_5
WORD $0x894c; BYTE $0xc6 // mov rsi, r8
LONG $0xfee68348 // and rsi, -2
WORD $0xf748; BYTE $0xde // neg rsi
LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0
LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1
WORD $0xc031 // xor eax, eax
LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2
LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3
LBB3_7:
LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax]
LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32]
LONG $0x746ffec5; WORD $0x4047 // vmovdqu ymm6, yword [rdi + 2*rax + 64]
LONG $0x7c6ffec5; WORD $0x6047 // vmovdqu ymm7, yword [rdi + 2*rax + 96]
LONG $0x3a75e2c4; BYTE $0xcc // vpminuw ymm1, ymm1, ymm4
LONG $0x3a6de2c4; BYTE $0xd5 // vpminuw ymm2, ymm2, ymm5
LONG $0x3e7de2c4; BYTE $0xc4 // vpmaxuw ymm0, ymm0, ymm4
LONG $0x3e65e2c4; BYTE $0xdd // vpmaxuw ymm3, ymm3, ymm5
LONG $0x3a75e2c4; BYTE $0xce // vpminuw ymm1, ymm1, ymm6
LONG $0x3a6de2c4; BYTE $0xd7 // vpminuw ymm2, ymm2, ymm7
LONG $0x3e7de2c4; BYTE $0xc6 // vpmaxuw ymm0, ymm0, ymm6
LONG $0x3e65e2c4; BYTE $0xdf // vpmaxuw ymm3, ymm3, ymm7
LONG $0x40c08348 // add rax, 64
LONG $0x02c68348 // add rsi, 2
JNE LBB3_7
LONG $0x01c0f641 // test r8b, 1
JE LBB3_10
LBB3_9:
LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax]
LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32]
LONG $0x3e65e2c4; BYTE $0xdd // vpmaxuw ymm3, ymm3, ymm5
LONG $0x3e7de2c4; BYTE $0xc4 // vpmaxuw ymm0, ymm0, ymm4
LONG $0x3a6de2c4; BYTE $0xd5 // vpminuw ymm2, ymm2, ymm5
LONG $0x3a75e2c4; BYTE $0xcc // vpminuw ymm1, ymm1, ymm4
LBB3_10:
LONG $0x3a75e2c4; BYTE $0xca // vpminuw ymm1, ymm1, ymm2
LONG $0x3e7de2c4; BYTE $0xc3 // vpmaxuw ymm0, ymm0, ymm3
LONG $0x397de3c4; WORD $0x01c2 // vextracti128 xmm2, ymm0, 1
LONG $0x3e79e2c4; BYTE $0xc2 // vpmaxuw xmm0, xmm0, xmm2
LONG $0xd276e9c5 // vpcmpeqd xmm2, xmm2, xmm2
LONG $0xc2eff9c5 // vpxor xmm0, xmm0, xmm2
LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0
LONG $0xc67ef9c5 // vmovd esi, xmm0
WORD $0xd6f7 // not esi
LONG $0x397de3c4; WORD $0x01c8 // vextracti128 xmm0, ymm1, 1
LONG $0x3a71e2c4; BYTE $0xc0 // vpminuw xmm0, xmm1, xmm0
LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0
LONG $0x7e79c1c4; BYTE $0xc0 // vmovd r8d, xmm0
WORD $0x394d; BYTE $0xca // cmp r10, r9
JE LBB3_12
LBB3_11:
LONG $0x04b70f42; BYTE $0x57 // movzx eax, word [rdi + 2*r10]
LONG $0xc0394166 // cmp r8w, ax
LONG $0xc0430f44 // cmovae r8d, eax
WORD $0x3966; BYTE $0xc6 // cmp si, ax
WORD $0x460f; BYTE $0xf0 // cmovbe esi, eax
LONG $0x01c28349 // add r10, 1
WORD $0x394d; BYTE $0xd1 // cmp r9, r10
JNE LBB3_11
LBB3_12:
WORD $0x8966; BYTE $0x31 // mov word [rcx], si
LONG $0x02894466 // mov word [rdx], r8w
VZEROUPPER
RET
LBB3_5:
LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0
LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1
WORD $0xc031 // xor eax, eax
LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2
LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3
LONG $0x01c0f641 // test r8b, 1
JNE LBB3_9
JMP LBB3_10
DATA LCDATA3<>+0x000(SB)/8, $0x7fffffff80000000
GLOBL LCDATA3<>(SB), 8, $8
TEXT ·_int32_max_min_avx2(SB), NOSPLIT, $8-32
MOVQ values+0(FP), DI
MOVQ length+8(FP), SI
MOVQ minout+16(FP), DX
MOVQ maxout+24(FP), CX
LEAQ LCDATA3<>(SB), BP
WORD $0xf685 // test esi, esi
JLE LBB4_1
WORD $0x8941; BYTE $0xf0 // mov r8d, esi
WORD $0xfe83; BYTE $0x1f // cmp esi, 31
JA LBB4_4
LONG $0x0000ba41; WORD $0x8000 // mov r10d, -2147483648
LONG $0xffffffb8; BYTE $0x7f // mov eax, 2147483647
WORD $0x3145; BYTE $0xc9 // xor r9d, r9d
JMP LBB4_7
LBB4_1:
LONG $0xffffffb8; BYTE $0x7f // mov eax, 2147483647
LONG $0x000000be; BYTE $0x80 // mov esi, -2147483648
JMP LBB4_8
LBB4_4:
WORD $0x8945; BYTE $0xc1 // mov r9d, r8d
LONG $0x587de2c4; WORD $0x0065 // vpbroadcastd ymm4, dword 0[rbp] /* [rip + .LCPI4_0] */
LONG $0xe0e18341 // and r9d, -32
LONG $0x587de2c4; WORD $0x0445 // vpbroadcastd ymm0, dword 4[rbp] /* [rip + .LCPI4_1] */
WORD $0xc031 // xor eax, eax
LONG $0xc86ffdc5 // vmovdqa ymm1, ymm0
LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0
LONG $0xd86ffdc5 // vmovdqa ymm3, ymm0
LONG $0xec6ffdc5 // vmovdqa ymm5, ymm4
LONG $0xf46ffdc5 // vmovdqa ymm6, ymm4
LONG $0xfc6ffdc5 // vmovdqa ymm7, ymm4
LBB4_5:
LONG $0x046f7ec5; BYTE $0x87 // vmovdqu ymm8, yword [rdi + 4*rax]
LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu ymm9, yword [rdi + 4*rax + 32]
LONG $0x546f7ec5; WORD $0x4087 // vmovdqu ymm10, yword [rdi + 4*rax + 64]
LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu ymm11, yword [rdi + 4*rax + 96]
LONG $0x397dc2c4; BYTE $0xc0 // vpminsd ymm0, ymm0, ymm8
LONG $0x3975c2c4; BYTE $0xc9 // vpminsd ymm1, ymm1, ymm9
LONG $0x396dc2c4; BYTE $0xd2 // vpminsd ymm2, ymm2, ymm10
LONG $0x3965c2c4; BYTE $0xdb // vpminsd ymm3, ymm3, ymm11
LONG $0x3d5dc2c4; BYTE $0xe0 // vpmaxsd ymm4, ymm4, ymm8
LONG $0x3d55c2c4; BYTE $0xe9 // vpmaxsd ymm5, ymm5, ymm9
LONG $0x3d4dc2c4; BYTE $0xf2 // vpmaxsd ymm6, ymm6, ymm10
LONG $0x3d45c2c4; BYTE $0xfb // vpmaxsd ymm7, ymm7, ymm11
LONG $0x20c08348 // add rax, 32
WORD $0x3949; BYTE $0xc1 // cmp r9, rax
JNE LBB4_5
LONG $0x3d5de2c4; BYTE $0xe5 // vpmaxsd ymm4, ymm4, ymm5
LONG $0x3d5de2c4; BYTE $0xe6 // vpmaxsd ymm4, ymm4, ymm6
LONG $0x3d5de2c4; BYTE $0xe7 // vpmaxsd ymm4, ymm4, ymm7
LONG $0x397de3c4; WORD $0x01e5 // vextracti128 xmm5, ymm4, 1
LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5
LONG $0xec70f9c5; BYTE $0x4e // vpshufd xmm5, xmm4, 78
LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5
LONG $0xec70f9c5; BYTE $0xe5 // vpshufd xmm5, xmm4, 229
LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5
LONG $0x7e79c1c4; BYTE $0xe2 // vmovd r10d, xmm4
LONG $0x397de2c4; BYTE $0xc1 // vpminsd ymm0, ymm0, ymm1
LONG $0x397de2c4; BYTE $0xc2 // vpminsd ymm0, ymm0, ymm2
LONG $0x397de2c4; BYTE $0xc3 // vpminsd ymm0, ymm0, ymm3
LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1
LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1
LONG $0xc870f9c5; BYTE $0x4e // vpshufd xmm1, xmm0, 78
LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1
LONG $0xc870f9c5; BYTE $0xe5 // vpshufd xmm1, xmm0, 229
LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1
LONG $0xc07ef9c5 // vmovd eax, xmm0
WORD $0x8944; BYTE $0xd6 // mov esi, r10d
WORD $0x394d; BYTE $0xc1 // cmp r9, r8
JE LBB4_8
LBB4_7:
LONG $0x8f348b42 // mov esi, dword [rdi + 4*r9]
WORD $0xf039 // cmp eax, esi
WORD $0x4f0f; BYTE $0xc6 // cmovg eax, esi
WORD $0x3941; BYTE $0xf2 // cmp r10d, esi
LONG $0xf24d0f41 // cmovge esi, r10d
LONG $0x01c18349 // add r9, 1
WORD $0x8941; BYTE $0xf2 // mov r10d, esi
WORD $0x394d; BYTE $0xc8 // cmp r8, r9
JNE LBB4_7
LBB4_8:
WORD $0x3189 // mov dword [rcx], esi
WORD $0x0289 // mov dword [rdx], eax
VZEROUPPER
RET
TEXT ·_uint32_max_min_avx2(SB), $0-32
MOVQ values+0(FP), DI
MOVQ length+8(FP), SI
MOVQ minout+16(FP), DX
MOVQ maxout+24(FP), CX
WORD $0xf685 // test esi, esi
JLE LBB5_1
WORD $0x8941; BYTE $0xf0 // mov r8d, esi
WORD $0xfe83; BYTE $0x1f // cmp esi, 31
JA LBB5_4
WORD $0x3145; BYTE $0xc9 // xor r9d, r9d
LONG $0xffffffb8; BYTE $0xff // mov eax, -1
WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
JMP LBB5_7
LBB5_1:
LONG $0xffffffb8; BYTE $0xff // mov eax, -1
WORD $0xf631 // xor esi, esi
JMP LBB5_8
LBB5_4:
WORD $0x8945; BYTE $0xc1 // mov r9d, r8d
LONG $0xe0e18341 // and r9d, -32
LONG $0xe4efd9c5 // vpxor xmm4, xmm4, xmm4
LONG $0xc076fdc5 // vpcmpeqd ymm0, ymm0, ymm0
WORD $0xc031 // xor eax, eax
LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1
LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2
LONG $0xdb76e5c5 // vpcmpeqd ymm3, ymm3, ymm3
LONG $0xedefd1c5 // vpxor xmm5, xmm5, xmm5
LONG $0xf6efc9c5 // vpxor xmm6, xmm6, xmm6
LONG $0xffefc1c5 // vpxor xmm7, xmm7, xmm7
LBB5_5:
LONG $0x046f7ec5; BYTE $0x87 // vmovdqu ymm8, yword [rdi + 4*rax]
LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu ymm9, yword [rdi + 4*rax + 32]
LONG $0x546f7ec5; WORD $0x4087 // vmovdqu ymm10, yword [rdi + 4*rax + 64]
LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu ymm11, yword [rdi + 4*rax + 96]
LONG $0x3b7dc2c4; BYTE $0xc0 // vpminud ymm0, ymm0, ymm8
LONG $0x3b75c2c4; BYTE $0xc9 // vpminud ymm1, ymm1, ymm9
LONG $0x3b6dc2c4; BYTE $0xd2 // vpminud ymm2, ymm2, ymm10
LONG $0x3b65c2c4; BYTE $0xdb // vpminud ymm3, ymm3, ymm11
LONG $0x3f5dc2c4; BYTE $0xe0 // vpmaxud ymm4, ymm4, ymm8
LONG $0x3f55c2c4; BYTE $0xe9 // vpmaxud ymm5, ymm5, ymm9
LONG $0x3f4dc2c4; BYTE $0xf2 // vpmaxud ymm6, ymm6, ymm10
LONG $0x3f45c2c4; BYTE $0xfb // vpmaxud ymm7, ymm7, ymm11
LONG $0x20c08348 // add rax, 32
WORD $0x3949; BYTE $0xc1 // cmp r9, rax
JNE LBB5_5
LONG $0x3f5de2c4; BYTE $0xe5 // vpmaxud ymm4, ymm4, ymm5
LONG $0x3f5de2c4; BYTE $0xe6 // vpmaxud ymm4, ymm4, ymm6
LONG $0x3f5de2c4; BYTE $0xe7 // vpmaxud ymm4, ymm4, ymm7
LONG $0x397de3c4; WORD $0x01e5 // vextracti128 xmm5, ymm4, 1
LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5
LONG $0xec70f9c5; BYTE $0x4e // vpshufd xmm5, xmm4, 78
LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5
LONG $0xec70f9c5; BYTE $0xe5 // vpshufd xmm5, xmm4, 229
LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5
LONG $0x7e79c1c4; BYTE $0xe2 // vmovd r10d, xmm4
LONG $0x3b7de2c4; BYTE $0xc1 // vpminud ymm0, ymm0, ymm1
LONG $0x3b7de2c4; BYTE $0xc2 // vpminud ymm0, ymm0, ymm2
LONG $0x3b7de2c4; BYTE $0xc3 // vpminud ymm0, ymm0, ymm3
LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1
LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1
LONG $0xc870f9c5; BYTE $0x4e // vpshufd xmm1, xmm0, 78
LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1
LONG $0xc870f9c5; BYTE $0xe5 // vpshufd xmm1, xmm0, 229
LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1
LONG $0xc07ef9c5 // vmovd eax, xmm0
WORD $0x8944; BYTE $0xd6 // mov esi, r10d
WORD $0x394d; BYTE $0xc1 // cmp r9, r8
JE LBB5_8
LBB5_7:
LONG $0x8f348b42 // mov esi, dword [rdi + 4*r9]
WORD $0xf039 // cmp eax, esi
WORD $0x430f; BYTE $0xc6 // cmovae eax, esi
WORD $0x3941; BYTE $0xf2 // cmp r10d, esi
LONG $0xf2470f41 // cmova esi, r10d
LONG $0x01c18349 // add r9, 1
WORD $0x8941; BYTE $0xf2 // mov r10d, esi
WORD $0x394d; BYTE $0xc8 // cmp r8, r9
JNE LBB5_7
LBB5_8:
WORD $0x3189 // mov dword [rcx], esi
WORD $0x0289 // mov dword [rdx], eax
VZEROUPPER
RET
DATA LCDATA4<>+0x000(SB)/8, $0x8000000000000000
DATA LCDATA4<>+0x008(SB)/8, $0x7fffffffffffffff
GLOBL LCDATA4<>(SB), 8, $16
TEXT ·_int64_max_min_avx2(SB), $8-32
MOVQ values+0(FP), DI
MOVQ length+8(FP), SI
MOVQ minout+16(FP), DX
MOVQ maxout+24(FP), CX
LEAQ LCDATA4<>(SB), BP
QUAD $0xffffffffffffb848; WORD $0x7fff // mov rax, 9223372036854775807
WORD $0xf685 // test esi, esi
JLE LBB6_1
WORD $0x8941; BYTE $0xf0 // mov r8d, esi
WORD $0xfe83; BYTE $0x0f // cmp esi, 15
JA LBB6_4
LONG $0x01508d4c // lea r10, [rax + 1]
WORD $0x3145; BYTE $0xc9 // xor r9d, r9d
JMP LBB6_7
LBB6_1:
LONG $0x01708d48 // lea rsi, [rax + 1]
JMP LBB6_8
LBB6_4:
WORD $0x8945; BYTE $0xc1 // mov r9d, r8d
LONG $0x597de2c4; WORD $0x0065 // vpbroadcastq ymm4, qword 0[rbp] /* [rip + .LCPI6_0] */
LONG $0xf0e18341 // and r9d, -16
LONG $0x597de2c4; WORD $0x0845 // vpbroadcastq ymm0, qword 8[rbp] /* [rip + .LCPI6_1] */
WORD $0xc031 // xor eax, eax
LONG $0xd86ffdc5 // vmovdqa ymm3, ymm0
LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0
LONG $0xc86ffdc5 // vmovdqa ymm1, ymm0
LONG $0xfc6ffdc5 // vmovdqa ymm7, ymm4
LONG $0xf46ffdc5 // vmovdqa ymm6, ymm4
LONG $0xec6ffdc5 // vmovdqa ymm5, ymm4
LBB6_5:
LONG $0x046f7ec5; BYTE $0xc7 // vmovdqu ymm8, yword [rdi + 8*rax]
LONG $0x373d62c4; BYTE $0xc8 // vpcmpgtq ymm9, ymm8, ymm0
LONG $0x4b3de3c4; WORD $0x90c0 // vblendvpd ymm0, ymm8, ymm0, ymm9
LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu ymm9, yword [rdi + 8*rax + 32]
LONG $0x373562c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm9, ymm3
LONG $0x4b35e3c4; WORD $0xa0db // vblendvpd ymm3, ymm9, ymm3, ymm10
LONG $0x546f7ec5; WORD $0x40c7 // vmovdqu ymm10, yword [rdi + 8*rax + 64]
LONG $0x372d62c4; BYTE $0xda // vpcmpgtq ymm11, ymm10, ymm2
LONG $0x4b2de3c4; WORD $0xb0d2 // vblendvpd ymm2, ymm10, ymm2, ymm11
LONG $0x5c6f7ec5; WORD $0x60c7 // vmovdqu ymm11, yword [rdi + 8*rax + 96]
LONG $0x372562c4; BYTE $0xe1 // vpcmpgtq ymm12, ymm11, ymm1
LONG $0x4b25e3c4; WORD $0xc0c9 // vblendvpd ymm1, ymm11, ymm1, ymm12
LONG $0x375d42c4; BYTE $0xe0 // vpcmpgtq ymm12, ymm4, ymm8
LONG $0x4b3de3c4; WORD $0xc0e4 // vblendvpd ymm4, ymm8, ymm4, ymm12
LONG $0x374542c4; BYTE $0xc1 // vpcmpgtq ymm8, ymm7, ymm9
LONG $0x4b35e3c4; WORD $0x80ff // vblendvpd ymm7, ymm9, ymm7, ymm8
LONG $0x374d42c4; BYTE $0xc2 // vpcmpgtq ymm8, ymm6, ymm10
LONG $0x4b2de3c4; WORD $0x80f6 // vblendvpd ymm6, ymm10, ymm6, ymm8
LONG $0x375542c4; BYTE $0xc3 // vpcmpgtq ymm8, ymm5, ymm11
LONG $0x4b25e3c4; WORD $0x80ed // vblendvpd ymm5, ymm11, ymm5, ymm8
LONG $0x10c08348 // add rax, 16
WORD $0x3949; BYTE $0xc1 // cmp r9, rax
JNE LBB6_5
LONG $0x375d62c4; BYTE $0xc7 // vpcmpgtq ymm8, ymm4, ymm7
LONG $0x4b45e3c4; WORD $0x80e4 // vblendvpd ymm4, ymm7, ymm4, ymm8
LONG $0x375de2c4; BYTE $0xfe // vpcmpgtq ymm7, ymm4, ymm6
LONG $0x4b4de3c4; WORD $0x70e4 // vblendvpd ymm4, ymm6, ymm4, ymm7
LONG $0x375de2c4; BYTE $0xf5 // vpcmpgtq ymm6, ymm4, ymm5
LONG $0x4b55e3c4; WORD $0x60e4 // vblendvpd ymm4, ymm5, ymm4, ymm6
LONG $0x197de3c4; WORD $0x01e5 // vextractf128 xmm5, ymm4, 1
LONG $0x3759e2c4; BYTE $0xf5 // vpcmpgtq xmm6, xmm4, xmm5
LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd xmm4, xmm5, xmm4, xmm6
LONG $0x0479e3c4; WORD $0x4eec // vpermilps xmm5, xmm4, 78
LONG $0x3759e2c4; BYTE $0xf5 // vpcmpgtq xmm6, xmm4, xmm5
LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd xmm4, xmm5, xmm4, xmm6
LONG $0x7ef9c1c4; BYTE $0xe2 // vmovq r10, xmm4
LONG $0x3765e2c4; BYTE $0xe0 // vpcmpgtq ymm4, ymm3, ymm0
LONG $0x4b65e3c4; WORD $0x40c0 // vblendvpd ymm0, ymm3, ymm0, ymm4
LONG $0x376de2c4; BYTE $0xd8 // vpcmpgtq ymm3, ymm2, ymm0
LONG $0x4b6de3c4; WORD $0x30c0 // vblendvpd ymm0, ymm2, ymm0, ymm3
LONG $0x3775e2c4; BYTE $0xd0 // vpcmpgtq ymm2, ymm1, ymm0
LONG $0x4b75e3c4; WORD $0x20c0 // vblendvpd ymm0, ymm1, ymm0, ymm2
LONG $0x197de3c4; WORD $0x01c1 // vextractf128 xmm1, ymm0, 1
LONG $0x3771e2c4; BYTE $0xd0 // vpcmpgtq xmm2, xmm1, xmm0
LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd xmm0, xmm1, xmm0, xmm2
LONG $0x0479e3c4; WORD $0x4ec8 // vpermilps xmm1, xmm0, 78
LONG $0x3771e2c4; BYTE $0xd0 // vpcmpgtq xmm2, xmm1, xmm0
LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd xmm0, xmm1, xmm0, xmm2
LONG $0x7ef9e1c4; BYTE $0xc0 // vmovq rax, xmm0
WORD $0x894c; BYTE $0xd6 // mov rsi, r10
WORD $0x394d; BYTE $0xc1 // cmp r9, r8
JE LBB6_8
LBB6_7:
LONG $0xcf348b4a // mov rsi, qword [rdi + 8*r9]
WORD $0x3948; BYTE $0xf0 // cmp rax, rsi
LONG $0xc64f0f48 // cmovg rax, rsi
WORD $0x3949; BYTE $0xf2 // cmp r10, rsi
LONG $0xf24d0f49 // cmovge rsi, r10
LONG $0x01c18349 // add r9, 1
WORD $0x8949; BYTE $0xf2 // mov r10, rsi
WORD $0x394d; BYTE $0xc8 // cmp r8, r9
JNE LBB6_7
LBB6_8:
WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi
WORD $0x8948; BYTE $0x02 // mov qword [rdx], rax
VZEROUPPER
RET
DATA LCDATA5<>+0x000(SB)/8, $0x8000000000000000
GLOBL LCDATA5<>(SB), 8, $8
TEXT ·_uint64_max_min_avx2(SB), $8-32
MOVQ values+0(FP), DI
MOVQ length+8(FP), SI
MOVQ minout+16(FP), DX
MOVQ maxout+24(FP), CX
LEAQ LCDATA5<>(SB), BP
WORD $0xf685 // test esi, esi
JLE LBB7_1
WORD $0x8941; BYTE $0xf0 // mov r8d, esi
WORD $0xfe83; BYTE $0x0f // cmp esi, 15
JA LBB7_4
LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov rax, -1
WORD $0x3145; BYTE $0xc9 // xor r9d, r9d
WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
JMP LBB7_7
LBB7_1:
LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov rax, -1
WORD $0xf631 // xor esi, esi
JMP LBB7_8
LBB7_4:
WORD $0x8945; BYTE $0xc1 // mov r9d, r8d
LONG $0xf0e18341 // and r9d, -16
LONG $0xedefd1c5 // vpxor xmm5, xmm5, xmm5
LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1
WORD $0xc031 // xor eax, eax
LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq ymm0, qword 0[rbp] /* [rip + .LCPI7_0] */
LONG $0xe476ddc5 // vpcmpeqd ymm4, ymm4, ymm4
LONG $0xdb76e5c5 // vpcmpeqd ymm3, ymm3, ymm3
LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2
LONG $0xef3941c4; BYTE $0xc0 // vpxor xmm8, xmm8, xmm8
LONG $0xffefc1c5 // vpxor xmm7, xmm7, xmm7
LONG $0xf6efc9c5 // vpxor xmm6, xmm6, xmm6
LBB7_5:
LONG $0x0c6f7ec5; BYTE $0xc7 // vmovdqu ymm9, yword [rdi + 8*rax]
LONG $0xd0ef75c5 // vpxor ymm10, ymm1, ymm0
LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0
LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10
LONG $0x4b35e3c4; WORD $0xa0c9 // vblendvpd ymm1, ymm9, ymm1, ymm10
LONG $0xd0ef55c5 // vpxor ymm10, ymm5, ymm0
LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11
LONG $0x4b35e3c4; WORD $0xa0ed // vblendvpd ymm5, ymm9, ymm5, ymm10
LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu ymm9, yword [rdi + 8*rax + 32]
LONG $0xd0ef5dc5 // vpxor ymm10, ymm4, ymm0
LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0
LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10
LONG $0x4b35e3c4; WORD $0xa0e4 // vblendvpd ymm4, ymm9, ymm4, ymm10
LONG $0xd0ef3dc5 // vpxor ymm10, ymm8, ymm0
LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11
LONG $0x5c6f7ec5; WORD $0x40c7 // vmovdqu ymm11, yword [rdi + 8*rax + 64]
LONG $0x4b3543c4; WORD $0xa0c0 // vblendvpd ymm8, ymm9, ymm8, ymm10
LONG $0xc8ef65c5 // vpxor ymm9, ymm3, ymm0
LONG $0xd0ef25c5 // vpxor ymm10, ymm11, ymm0
LONG $0x372d42c4; BYTE $0xc9 // vpcmpgtq ymm9, ymm10, ymm9
LONG $0x4b25e3c4; WORD $0x90db // vblendvpd ymm3, ymm11, ymm3, ymm9
LONG $0xc8ef45c5 // vpxor ymm9, ymm7, ymm0
LONG $0x373542c4; BYTE $0xca // vpcmpgtq ymm9, ymm9, ymm10
LONG $0x4b25e3c4; WORD $0x90ff // vblendvpd ymm7, ymm11, ymm7, ymm9
LONG $0x4c6f7ec5; WORD $0x60c7 // vmovdqu ymm9, yword [rdi + 8*rax + 96]
LONG $0xd0ef6dc5 // vpxor ymm10, ymm2, ymm0
LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0
LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10
LONG $0x4b35e3c4; WORD $0xa0d2 // vblendvpd ymm2, ymm9, ymm2, ymm10
LONG $0xd0ef4dc5 // vpxor ymm10, ymm6, ymm0
LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11
LONG $0x4b35e3c4; WORD $0xa0f6 // vblendvpd ymm6, ymm9, ymm6, ymm10
LONG $0x10c08348 // add rax, 16
WORD $0x3949; BYTE $0xc1 // cmp r9, rax
JNE LBB7_5
LONG $0xc8ef3dc5 // vpxor ymm9, ymm8, ymm0
LONG $0xd0ef55c5 // vpxor ymm10, ymm5, ymm0
LONG $0x372d42c4; BYTE $0xc9 // vpcmpgtq ymm9, ymm10, ymm9
LONG $0x4b3de3c4; WORD $0x90ed // vblendvpd ymm5, ymm8, ymm5, ymm9
LONG $0xc05755c5 // vxorpd ymm8, ymm5, ymm0
LONG $0xc8ef45c5 // vpxor ymm9, ymm7, ymm0
LONG $0x373d42c4; BYTE $0xc1 // vpcmpgtq ymm8, ymm8, ymm9
LONG $0x4b45e3c4; WORD $0x80ed // vblendvpd ymm5, ymm7, ymm5, ymm8
LONG $0xf857d5c5 // vxorpd ymm7, ymm5, ymm0
LONG $0xc0ef4dc5 // vpxor ymm8, ymm6, ymm0
LONG $0x3745c2c4; BYTE $0xf8 // vpcmpgtq ymm7, ymm7, ymm8
LONG $0x4b4de3c4; WORD $0x70ed // vblendvpd ymm5, ymm6, ymm5, ymm7
LONG $0x197de3c4; WORD $0x01ee // vextractf128 xmm6, ymm5, 1
LONG $0xc05749c5 // vxorpd xmm8, xmm6, xmm0
LONG $0xf857d1c5 // vxorpd xmm7, xmm5, xmm0
LONG $0x3741c2c4; BYTE $0xf8 // vpcmpgtq xmm7, xmm7, xmm8
LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd xmm5, xmm6, xmm5, xmm7
LONG $0x0479e3c4; WORD $0x4ef5 // vpermilps xmm6, xmm5, 78
LONG $0xc05751c5 // vxorpd xmm8, xmm5, xmm0
LONG $0xf857c9c5 // vxorpd xmm7, xmm6, xmm0
LONG $0x3739e2c4; BYTE $0xff // vpcmpgtq xmm7, xmm8, xmm7
LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd xmm5, xmm6, xmm5, xmm7
LONG $0xf0eff5c5 // vpxor ymm6, ymm1, ymm0
LONG $0xf8efddc5 // vpxor ymm7, ymm4, ymm0
LONG $0x3745e2c4; BYTE $0xf6 // vpcmpgtq ymm6, ymm7, ymm6
LONG $0x4b5de3c4; WORD $0x60c9 // vblendvpd ymm1, ymm4, ymm1, ymm6
LONG $0xe057f5c5 // vxorpd ymm4, ymm1, ymm0
LONG $0xf0efe5c5 // vpxor ymm6, ymm3, ymm0
LONG $0x374de2c4; BYTE $0xe4 // vpcmpgtq ymm4, ymm6, ymm4
LONG $0x4b65e3c4; WORD $0x40c9 // vblendvpd ymm1, ymm3, ymm1, ymm4
LONG $0x7ef9c1c4; BYTE $0xea // vmovq r10, xmm5
LONG $0xd857f5c5 // vxorpd ymm3, ymm1, ymm0
LONG $0xe0efedc5 // vpxor ymm4, ymm2, ymm0
LONG $0x375de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm4, ymm3
LONG $0x4b6de3c4; WORD $0x30c9 // vblendvpd ymm1, ymm2, ymm1, ymm3
LONG $0x197de3c4; WORD $0x01ca // vextractf128 xmm2, ymm1, 1
LONG $0xd857f1c5 // vxorpd xmm3, xmm1, xmm0
LONG $0xe057e9c5 // vxorpd xmm4, xmm2, xmm0
LONG $0x3759e2c4; BYTE $0xdb // vpcmpgtq xmm3, xmm4, xmm3
LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd xmm1, xmm2, xmm1, xmm3
LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps xmm2, xmm1, 78
LONG $0xd857f1c5 // vxorpd xmm3, xmm1, xmm0
LONG $0xc057e9c5 // vxorpd xmm0, xmm2, xmm0
LONG $0x3779e2c4; BYTE $0xc3 // vpcmpgtq xmm0, xmm0, xmm3
LONG $0x4b69e3c4; WORD $0x00c1 // vblendvpd xmm0, xmm2, xmm1, xmm0
LONG $0x7ef9e1c4; BYTE $0xc0 // vmovq rax, xmm0
WORD $0x894c; BYTE $0xd6 // mov rsi, r10
WORD $0x394d; BYTE $0xc1 // cmp r9, r8
JE LBB7_8
LBB7_7:
LONG $0xcf348b4a // mov rsi, qword [rdi + 8*r9]
WORD $0x3948; BYTE $0xf0 // cmp rax, rsi
LONG $0xc6430f48 // cmovae rax, rsi
WORD $0x3949; BYTE $0xf2 // cmp r10, rsi
LONG $0xf2470f49 // cmova rsi, r10
LONG $0x01c18349 // add r9, 1
WORD $0x8949; BYTE $0xf2 // mov r10, rsi
WORD $0x394d; BYTE $0xc8 // cmp r8, r9
JNE LBB7_7
LBB7_8:
WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi
WORD $0x8948; BYTE $0x02 // mov qword [rdx], rax
VZEROUPPER
RET