static void tcg_out_vec_op()

in tcg/i386/tcg-target.c.inc [2632:2914]


static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
                           unsigned vecl, unsigned vece,
                           const TCGArg args[TCG_MAX_OP_ARGS],
                           const int const_args[TCG_MAX_OP_ARGS])
{
    static int const add_insn[4] = {
        OPC_PADDB, OPC_PADDW, OPC_PADDD, OPC_PADDQ
    };
    static int const ssadd_insn[4] = {
        OPC_PADDSB, OPC_PADDSW, OPC_UD2, OPC_UD2
    };
    static int const usadd_insn[4] = {
        OPC_PADDUB, OPC_PADDUW, OPC_UD2, OPC_UD2
    };
    static int const sub_insn[4] = {
        OPC_PSUBB, OPC_PSUBW, OPC_PSUBD, OPC_PSUBQ
    };
    static int const sssub_insn[4] = {
        OPC_PSUBSB, OPC_PSUBSW, OPC_UD2, OPC_UD2
    };
    static int const ussub_insn[4] = {
        OPC_PSUBUB, OPC_PSUBUW, OPC_UD2, OPC_UD2
    };
    static int const mul_insn[4] = {
        OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_UD2
    };
    static int const shift_imm_insn[4] = {
        OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib
    };
    static int const cmpeq_insn[4] = {
        OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ
    };
    static int const cmpgt_insn[4] = {
        OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ
    };
    static int const punpckl_insn[4] = {
        OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ
    };
    static int const punpckh_insn[4] = {
        OPC_PUNPCKHBW, OPC_PUNPCKHWD, OPC_PUNPCKHDQ, OPC_PUNPCKHQDQ
    };
    static int const packss_insn[4] = {
        OPC_PACKSSWB, OPC_PACKSSDW, OPC_UD2, OPC_UD2
    };
    static int const packus_insn[4] = {
        OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2
    };
    static int const smin_insn[4] = {
        OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_UD2
    };
    static int const smax_insn[4] = {
        OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_UD2
    };
    static int const umin_insn[4] = {
        OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_UD2
    };
    static int const umax_insn[4] = {
        OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2
    };
    static int const shlv_insn[4] = {
        /* TODO: AVX512 adds support for MO_16.  */
        OPC_UD2, OPC_UD2, OPC_VPSLLVD, OPC_VPSLLVQ
    };
    static int const shrv_insn[4] = {
        /* TODO: AVX512 adds support for MO_16.  */
        OPC_UD2, OPC_UD2, OPC_VPSRLVD, OPC_VPSRLVQ
    };
    static int const sarv_insn[4] = {
        /* TODO: AVX512 adds support for MO_16, MO_64.  */
        OPC_UD2, OPC_UD2, OPC_VPSRAVD, OPC_UD2
    };
    static int const shls_insn[4] = {
        OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ
    };
    static int const shrs_insn[4] = {
        OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ
    };
    static int const sars_insn[4] = {
        OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_UD2
    };
    static int const abs_insn[4] = {
        /* TODO: AVX512 adds support for MO_64.  */
        OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2
    };

    TCGType type = vecl + TCG_TYPE_V64;
    int insn, sub;
    TCGArg a0, a1, a2;

    a0 = args[0];
    a1 = args[1];
    a2 = args[2];

    switch (opc) {
    case INDEX_op_add_vec:
        insn = add_insn[vece];
        goto gen_simd;
    case INDEX_op_ssadd_vec:
        insn = ssadd_insn[vece];
        goto gen_simd;
    case INDEX_op_usadd_vec:
        insn = usadd_insn[vece];
        goto gen_simd;
    case INDEX_op_sub_vec:
        insn = sub_insn[vece];
        goto gen_simd;
    case INDEX_op_sssub_vec:
        insn = sssub_insn[vece];
        goto gen_simd;
    case INDEX_op_ussub_vec:
        insn = ussub_insn[vece];
        goto gen_simd;
    case INDEX_op_mul_vec:
        insn = mul_insn[vece];
        goto gen_simd;
    case INDEX_op_and_vec:
        insn = OPC_PAND;
        goto gen_simd;
    case INDEX_op_or_vec:
        insn = OPC_POR;
        goto gen_simd;
    case INDEX_op_xor_vec:
        insn = OPC_PXOR;
        goto gen_simd;
    case INDEX_op_smin_vec:
        insn = smin_insn[vece];
        goto gen_simd;
    case INDEX_op_umin_vec:
        insn = umin_insn[vece];
        goto gen_simd;
    case INDEX_op_smax_vec:
        insn = smax_insn[vece];
        goto gen_simd;
    case INDEX_op_umax_vec:
        insn = umax_insn[vece];
        goto gen_simd;
    case INDEX_op_shlv_vec:
        insn = shlv_insn[vece];
        goto gen_simd;
    case INDEX_op_shrv_vec:
        insn = shrv_insn[vece];
        goto gen_simd;
    case INDEX_op_sarv_vec:
        insn = sarv_insn[vece];
        goto gen_simd;
    case INDEX_op_shls_vec:
        insn = shls_insn[vece];
        goto gen_simd;
    case INDEX_op_shrs_vec:
        insn = shrs_insn[vece];
        goto gen_simd;
    case INDEX_op_sars_vec:
        insn = sars_insn[vece];
        goto gen_simd;
    case INDEX_op_x86_punpckl_vec:
        insn = punpckl_insn[vece];
        goto gen_simd;
    case INDEX_op_x86_punpckh_vec:
        insn = punpckh_insn[vece];
        goto gen_simd;
    case INDEX_op_x86_packss_vec:
        insn = packss_insn[vece];
        goto gen_simd;
    case INDEX_op_x86_packus_vec:
        insn = packus_insn[vece];
        goto gen_simd;
#if TCG_TARGET_REG_BITS == 32
    case INDEX_op_dup2_vec:
        /* First merge the two 32-bit inputs to a single 64-bit element. */
        tcg_out_vex_modrm(s, OPC_PUNPCKLDQ, a0, a1, a2);
        /* Then replicate the 64-bit elements across the rest of the vector. */
        if (type != TCG_TYPE_V64) {
            tcg_out_dup_vec(s, type, MO_64, a0, a0);
        }
        break;
#endif
    case INDEX_op_abs_vec:
        insn = abs_insn[vece];
        a2 = a1;
        a1 = 0;
        goto gen_simd;
    gen_simd:
        tcg_debug_assert(insn != OPC_UD2);
        if (type == TCG_TYPE_V256) {
            insn |= P_VEXL;
        }
        tcg_out_vex_modrm(s, insn, a0, a1, a2);
        break;

    case INDEX_op_cmp_vec:
        sub = args[3];
        if (sub == TCG_COND_EQ) {
            insn = cmpeq_insn[vece];
        } else if (sub == TCG_COND_GT) {
            insn = cmpgt_insn[vece];
        } else {
            g_assert_not_reached();
        }
        goto gen_simd;

    case INDEX_op_andc_vec:
        insn = OPC_PANDN;
        if (type == TCG_TYPE_V256) {
            insn |= P_VEXL;
        }
        tcg_out_vex_modrm(s, insn, a0, a2, a1);
        break;

    case INDEX_op_shli_vec:
        sub = 6;
        goto gen_shift;
    case INDEX_op_shri_vec:
        sub = 2;
        goto gen_shift;
    case INDEX_op_sari_vec:
        tcg_debug_assert(vece != MO_64);
        sub = 4;
    gen_shift:
        tcg_debug_assert(vece != MO_8);
        insn = shift_imm_insn[vece];
        if (type == TCG_TYPE_V256) {
            insn |= P_VEXL;
        }
        tcg_out_vex_modrm(s, insn, sub, a0, a1);
        tcg_out8(s, a2);
        break;

    case INDEX_op_ld_vec:
        tcg_out_ld(s, type, a0, a1, a2);
        break;
    case INDEX_op_st_vec:
        tcg_out_st(s, type, a0, a1, a2);
        break;
    case INDEX_op_dupm_vec:
        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
        break;

    case INDEX_op_x86_shufps_vec:
        insn = OPC_SHUFPS;
        sub = args[3];
        goto gen_simd_imm8;
    case INDEX_op_x86_blend_vec:
        if (vece == MO_16) {
            insn = OPC_PBLENDW;
        } else if (vece == MO_32) {
            insn = (have_avx2 ? OPC_VPBLENDD : OPC_BLENDPS);
        } else {
            g_assert_not_reached();
        }
        sub = args[3];
        goto gen_simd_imm8;
    case INDEX_op_x86_vperm2i128_vec:
        insn = OPC_VPERM2I128;
        sub = args[3];
        goto gen_simd_imm8;
    gen_simd_imm8:
        if (type == TCG_TYPE_V256) {
            insn |= P_VEXL;
        }
        tcg_out_vex_modrm(s, insn, a0, a1, a2);
        tcg_out8(s, sub);
        break;

    case INDEX_op_x86_vpblendvb_vec:
        insn = OPC_VPBLENDVB;
        if (type == TCG_TYPE_V256) {
            insn |= P_VEXL;
        }
        tcg_out_vex_modrm(s, insn, a0, a1, a2);
        tcg_out8(s, args[3] << 4);
        break;

    case INDEX_op_x86_psrldq_vec:
        tcg_out_vex_modrm(s, OPC_GRP14, 3, a0, a1);
        tcg_out8(s, a2);
        break;

    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
    default:
        g_assert_not_reached();
    }
}