in tcg/i386/tcg-target.c.inc [2632:2914]
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
const TCGArg args[TCG_MAX_OP_ARGS],
const int const_args[TCG_MAX_OP_ARGS])
{
static int const add_insn[4] = {
OPC_PADDB, OPC_PADDW, OPC_PADDD, OPC_PADDQ
};
static int const ssadd_insn[4] = {
OPC_PADDSB, OPC_PADDSW, OPC_UD2, OPC_UD2
};
static int const usadd_insn[4] = {
OPC_PADDUB, OPC_PADDUW, OPC_UD2, OPC_UD2
};
static int const sub_insn[4] = {
OPC_PSUBB, OPC_PSUBW, OPC_PSUBD, OPC_PSUBQ
};
static int const sssub_insn[4] = {
OPC_PSUBSB, OPC_PSUBSW, OPC_UD2, OPC_UD2
};
static int const ussub_insn[4] = {
OPC_PSUBUB, OPC_PSUBUW, OPC_UD2, OPC_UD2
};
static int const mul_insn[4] = {
OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_UD2
};
static int const shift_imm_insn[4] = {
OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib
};
static int const cmpeq_insn[4] = {
OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ
};
static int const cmpgt_insn[4] = {
OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ
};
static int const punpckl_insn[4] = {
OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ
};
static int const punpckh_insn[4] = {
OPC_PUNPCKHBW, OPC_PUNPCKHWD, OPC_PUNPCKHDQ, OPC_PUNPCKHQDQ
};
static int const packss_insn[4] = {
OPC_PACKSSWB, OPC_PACKSSDW, OPC_UD2, OPC_UD2
};
static int const packus_insn[4] = {
OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2
};
static int const smin_insn[4] = {
OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_UD2
};
static int const smax_insn[4] = {
OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_UD2
};
static int const umin_insn[4] = {
OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_UD2
};
static int const umax_insn[4] = {
OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2
};
static int const shlv_insn[4] = {
/* TODO: AVX512 adds support for MO_16. */
OPC_UD2, OPC_UD2, OPC_VPSLLVD, OPC_VPSLLVQ
};
static int const shrv_insn[4] = {
/* TODO: AVX512 adds support for MO_16. */
OPC_UD2, OPC_UD2, OPC_VPSRLVD, OPC_VPSRLVQ
};
static int const sarv_insn[4] = {
/* TODO: AVX512 adds support for MO_16, MO_64. */
OPC_UD2, OPC_UD2, OPC_VPSRAVD, OPC_UD2
};
static int const shls_insn[4] = {
OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ
};
static int const shrs_insn[4] = {
OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ
};
static int const sars_insn[4] = {
OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_UD2
};
static int const abs_insn[4] = {
/* TODO: AVX512 adds support for MO_64. */
OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2
};
TCGType type = vecl + TCG_TYPE_V64;
int insn, sub;
TCGArg a0, a1, a2;
a0 = args[0];
a1 = args[1];
a2 = args[2];
switch (opc) {
case INDEX_op_add_vec:
insn = add_insn[vece];
goto gen_simd;
case INDEX_op_ssadd_vec:
insn = ssadd_insn[vece];
goto gen_simd;
case INDEX_op_usadd_vec:
insn = usadd_insn[vece];
goto gen_simd;
case INDEX_op_sub_vec:
insn = sub_insn[vece];
goto gen_simd;
case INDEX_op_sssub_vec:
insn = sssub_insn[vece];
goto gen_simd;
case INDEX_op_ussub_vec:
insn = ussub_insn[vece];
goto gen_simd;
case INDEX_op_mul_vec:
insn = mul_insn[vece];
goto gen_simd;
case INDEX_op_and_vec:
insn = OPC_PAND;
goto gen_simd;
case INDEX_op_or_vec:
insn = OPC_POR;
goto gen_simd;
case INDEX_op_xor_vec:
insn = OPC_PXOR;
goto gen_simd;
case INDEX_op_smin_vec:
insn = smin_insn[vece];
goto gen_simd;
case INDEX_op_umin_vec:
insn = umin_insn[vece];
goto gen_simd;
case INDEX_op_smax_vec:
insn = smax_insn[vece];
goto gen_simd;
case INDEX_op_umax_vec:
insn = umax_insn[vece];
goto gen_simd;
case INDEX_op_shlv_vec:
insn = shlv_insn[vece];
goto gen_simd;
case INDEX_op_shrv_vec:
insn = shrv_insn[vece];
goto gen_simd;
case INDEX_op_sarv_vec:
insn = sarv_insn[vece];
goto gen_simd;
case INDEX_op_shls_vec:
insn = shls_insn[vece];
goto gen_simd;
case INDEX_op_shrs_vec:
insn = shrs_insn[vece];
goto gen_simd;
case INDEX_op_sars_vec:
insn = sars_insn[vece];
goto gen_simd;
case INDEX_op_x86_punpckl_vec:
insn = punpckl_insn[vece];
goto gen_simd;
case INDEX_op_x86_punpckh_vec:
insn = punpckh_insn[vece];
goto gen_simd;
case INDEX_op_x86_packss_vec:
insn = packss_insn[vece];
goto gen_simd;
case INDEX_op_x86_packus_vec:
insn = packus_insn[vece];
goto gen_simd;
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_dup2_vec:
/* First merge the two 32-bit inputs to a single 64-bit element. */
tcg_out_vex_modrm(s, OPC_PUNPCKLDQ, a0, a1, a2);
/* Then replicate the 64-bit elements across the rest of the vector. */
if (type != TCG_TYPE_V64) {
tcg_out_dup_vec(s, type, MO_64, a0, a0);
}
break;
#endif
case INDEX_op_abs_vec:
insn = abs_insn[vece];
a2 = a1;
a1 = 0;
goto gen_simd;
gen_simd:
tcg_debug_assert(insn != OPC_UD2);
if (type == TCG_TYPE_V256) {
insn |= P_VEXL;
}
tcg_out_vex_modrm(s, insn, a0, a1, a2);
break;
case INDEX_op_cmp_vec:
sub = args[3];
if (sub == TCG_COND_EQ) {
insn = cmpeq_insn[vece];
} else if (sub == TCG_COND_GT) {
insn = cmpgt_insn[vece];
} else {
g_assert_not_reached();
}
goto gen_simd;
case INDEX_op_andc_vec:
insn = OPC_PANDN;
if (type == TCG_TYPE_V256) {
insn |= P_VEXL;
}
tcg_out_vex_modrm(s, insn, a0, a2, a1);
break;
case INDEX_op_shli_vec:
sub = 6;
goto gen_shift;
case INDEX_op_shri_vec:
sub = 2;
goto gen_shift;
case INDEX_op_sari_vec:
tcg_debug_assert(vece != MO_64);
sub = 4;
gen_shift:
tcg_debug_assert(vece != MO_8);
insn = shift_imm_insn[vece];
if (type == TCG_TYPE_V256) {
insn |= P_VEXL;
}
tcg_out_vex_modrm(s, insn, sub, a0, a1);
tcg_out8(s, a2);
break;
case INDEX_op_ld_vec:
tcg_out_ld(s, type, a0, a1, a2);
break;
case INDEX_op_st_vec:
tcg_out_st(s, type, a0, a1, a2);
break;
case INDEX_op_dupm_vec:
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
break;
case INDEX_op_x86_shufps_vec:
insn = OPC_SHUFPS;
sub = args[3];
goto gen_simd_imm8;
case INDEX_op_x86_blend_vec:
if (vece == MO_16) {
insn = OPC_PBLENDW;
} else if (vece == MO_32) {
insn = (have_avx2 ? OPC_VPBLENDD : OPC_BLENDPS);
} else {
g_assert_not_reached();
}
sub = args[3];
goto gen_simd_imm8;
case INDEX_op_x86_vperm2i128_vec:
insn = OPC_VPERM2I128;
sub = args[3];
goto gen_simd_imm8;
gen_simd_imm8:
if (type == TCG_TYPE_V256) {
insn |= P_VEXL;
}
tcg_out_vex_modrm(s, insn, a0, a1, a2);
tcg_out8(s, sub);
break;
case INDEX_op_x86_vpblendvb_vec:
insn = OPC_VPBLENDVB;
if (type == TCG_TYPE_V256) {
insn |= P_VEXL;
}
tcg_out_vex_modrm(s, insn, a0, a1, a2);
tcg_out8(s, args[3] << 4);
break;
case INDEX_op_x86_psrldq_vec:
tcg_out_vex_modrm(s, OPC_GRP14, 3, a0, a1);
tcg_out8(s, a2);
break;
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
default:
g_assert_not_reached();
}
}