void CodeGen::genAvxFamilyIntrinsic()

in src/coreclr/jit/hwintrinsiccodegenxarch.cpp [2265:2966]


void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
{
    NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();

    if (HWIntrinsicInfo::IsFmaIntrinsic(intrinsicId))
    {
        genFMAIntrinsic(node, instOptions);
        return;
    }

    if (HWIntrinsicInfo::IsPermuteVar2x(intrinsicId))
    {
        genPermuteVar2x(node, instOptions);
        return;
    }

    var_types   baseType   = node->GetSimdBaseType();
    emitAttr    attr       = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize()));
    var_types   targetType = node->TypeGet();
    instruction ins        = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
    size_t      numArgs    = node->GetOperandCount();
    GenTree*    op1        = node->Op(1);
    regNumber   op1Reg     = REG_NA;
    regNumber   targetReg  = node->GetRegNum();
    emitter*    emit       = GetEmitter();

    genConsumeMultiOpOperands(node);

    switch (intrinsicId)
    {
        case NI_AVX2_ConvertToInt32:
        case NI_AVX2_ConvertToUInt32:
        {
            assert(instOptions == INS_OPTS_NONE);

            op1Reg = op1->GetRegNum();
            assert((baseType == TYP_INT) || (baseType == TYP_UINT));
            instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
            emit->emitIns_Mov(ins, emitActualTypeSize(baseType), targetReg, op1Reg, /* canSkip */ false);
            break;
        }

        case NI_AVX2_ConvertToVector256Int16:
        case NI_AVX2_ConvertToVector256Int32:
        case NI_AVX2_ConvertToVector256Int64:
        {
            instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);

            if (!varTypeIsSIMD(op1->gtType))
            {
                // Until we improve the handling of addressing modes in the emitter, we'll create a
                // temporary GT_IND to generate code with.
                GenTreeIndir load = indirForm(node->TypeGet(), op1);
                emit->emitInsLoadInd(ins, emitTypeSize(TYP_SIMD32), node->GetRegNum(), &load);
            }
            else
            {
                genHWIntrinsic_R_RM(node, ins, EA_32BYTE, targetReg, op1, instOptions);
            }
            break;
        }

        case NI_AVX2_GatherVector128:
        case NI_AVX2_GatherVector256:
        case NI_AVX2_GatherMaskVector128:
        case NI_AVX2_GatherMaskVector256:
        {
            assert(instOptions == INS_OPTS_NONE);

            GenTree* op2     = node->Op(2);
            GenTree* op3     = node->Op(3);
            GenTree* lastOp  = nullptr;
            GenTree* indexOp = nullptr;

            op1Reg                 = op1->GetRegNum();
            regNumber op2Reg       = op2->GetRegNum();
            regNumber addrBaseReg  = REG_NA;
            regNumber addrIndexReg = REG_NA;
            regNumber maskReg      = internalRegisters.Extract(node, RBM_ALLFLOAT);

            if (numArgs == 5)
            {
                assert(intrinsicId == NI_AVX2_GatherMaskVector128 || intrinsicId == NI_AVX2_GatherMaskVector256);

                GenTree* op4 = node->Op(4);
                lastOp       = node->Op(5);

                regNumber op3Reg = op3->GetRegNum();
                regNumber op4Reg = op4->GetRegNum();

                addrBaseReg  = op2Reg;
                addrIndexReg = op3Reg;
                indexOp      = op3;

                // copy op4Reg into the tmp mask register,
                // the mask register will be cleared by gather instructions
                emit->emitIns_Mov(INS_movaps, attr, maskReg, op4Reg, /* canSkip */ false);

                // copy source vector to the target register for masking merge
                emit->emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
            }
            else
            {
                assert(intrinsicId == NI_AVX2_GatherVector128 || intrinsicId == NI_AVX2_GatherVector256);
                addrBaseReg  = op1Reg;
                addrIndexReg = op2Reg;
                indexOp      = op2;
                lastOp       = op3;

                // generate all-one mask vector
                assert(!emitter::isHighSimdReg(targetReg));
                emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, maskReg, maskReg, maskReg, instOptions);
            }

            bool isVector128GatherWithVector256Index = (targetType == TYP_SIMD16) && (indexOp->TypeGet() == TYP_SIMD32);

            // hwintrinsiclistxarch.h uses Dword index instructions in default
            if (varTypeIsLong(node->GetAuxiliaryType()))
            {
                switch (ins)
                {
                    case INS_vpgatherdd:
                        ins = INS_vpgatherqd;
                        if (isVector128GatherWithVector256Index)
                        {
                            // YMM index in address mode
                            attr = emitTypeSize(TYP_SIMD32);
                        }
                        break;
                    case INS_vpgatherdq:
                        ins = INS_vpgatherqq;
                        break;
                    case INS_vgatherdps:
                        ins = INS_vgatherqps;
                        if (isVector128GatherWithVector256Index)
                        {
                            // YMM index in address mode
                            attr = emitTypeSize(TYP_SIMD32);
                        }
                        break;
                    case INS_vgatherdpd:
                        ins = INS_vgatherqpd;
                        break;
                    default:
                        unreached();
                }
            }

            assert(lastOp->IsCnsIntOrI());
            ssize_t ival = lastOp->AsIntCon()->IconValue();
            assert((ival >= 0) && (ival <= 255));

            assert(targetReg != maskReg);
            assert(targetReg != addrIndexReg);
            assert(maskReg != addrIndexReg);
            emit->emitIns_R_AR_R(ins, attr, targetReg, maskReg, addrBaseReg, addrIndexReg, (int8_t)ival, 0);

            break;
        }

        case NI_EVEX_AddMask:
        {
            assert(instOptions == INS_OPTS_NONE);

            uint32_t simdSize = node->GetSimdSize();
            uint32_t count    = simdSize / genTypeSize(baseType);

            if (count <= 8)
            {
                assert((count == 2) || (count == 4) || (count == 8));
                ins = INS_kaddb;
            }
            else if (count == 16)
            {
                ins = INS_kaddw;
            }
            else if (count == 32)
            {
                ins = INS_kaddd;
            }
            else
            {
                assert(count == 64);
                ins = INS_kaddq;
            }

            op1Reg = op1->GetRegNum();

            GenTree*  op2    = node->Op(2);
            regNumber op2Reg = op2->GetRegNum();

            assert(emitter::isMaskReg(targetReg));
            assert(emitter::isMaskReg(op1Reg));
            assert(emitter::isMaskReg(op2Reg));

            // Use EA_32BYTE to ensure the VEX.L bit gets set
            emit->emitIns_R_R_R(ins, EA_32BYTE, targetReg, op1Reg, op2Reg);
            break;
        }

        case NI_EVEX_AndMask:
        {
            assert(instOptions == INS_OPTS_NONE);

            uint32_t simdSize = node->GetSimdSize();
            uint32_t count    = simdSize / genTypeSize(baseType);

            if (count <= 8)
            {
                assert((count == 2) || (count == 4) || (count == 8));
                ins = INS_kandb;
            }
            else if (count == 16)
            {
                ins = INS_kandw;
            }
            else if (count == 32)
            {
                ins = INS_kandd;
            }
            else
            {
                assert(count == 64);
                ins = INS_kandq;
            }

            op1Reg = op1->GetRegNum();

            GenTree*  op2    = node->Op(2);
            regNumber op2Reg = op2->GetRegNum();

            assert(emitter::isMaskReg(targetReg));
            assert(emitter::isMaskReg(op1Reg));
            assert(emitter::isMaskReg(op2Reg));

            // Use EA_32BYTE to ensure the VEX.L bit gets set
            emit->emitIns_R_R_R(ins, EA_32BYTE, targetReg, op1Reg, op2Reg);
            break;
        }

        case NI_EVEX_AndNotMask:
        {
            assert(instOptions == INS_OPTS_NONE);

            uint32_t simdSize = node->GetSimdSize();
            uint32_t count    = simdSize / genTypeSize(baseType);

            if (count <= 8)
            {
                assert((count == 2) || (count == 4) || (count == 8));
                ins = INS_kandnb;
            }
            else if (count == 16)
            {
                ins = INS_kandnw;
            }
            else if (count == 32)
            {
                ins = INS_kandnd;
            }
            else
            {
                assert(count == 64);
                ins = INS_kandnq;
            }

            op1Reg = op1->GetRegNum();

            GenTree*  op2    = node->Op(2);
            regNumber op2Reg = op2->GetRegNum();

            assert(emitter::isMaskReg(targetReg));
            assert(emitter::isMaskReg(op1Reg));
            assert(emitter::isMaskReg(op2Reg));

            // Use EA_32BYTE to ensure the VEX.L bit gets set
            emit->emitIns_R_R_R(ins, EA_32BYTE, targetReg, op1Reg, op2Reg);
            break;
        }

        case NI_EVEX_MoveMask:
        {
            assert(instOptions == INS_OPTS_NONE);

            uint32_t simdSize = node->GetSimdSize();
            uint32_t count    = simdSize / genTypeSize(baseType);

            if (count <= 8)
            {
                assert((count == 2) || (count == 4) || (count == 8));
                ins  = INS_kmovb_gpr;
                attr = EA_4BYTE;
            }
            else if (count == 16)
            {
                ins  = INS_kmovw_gpr;
                attr = EA_4BYTE;
            }
            else if (count == 32)
            {
                ins  = INS_kmovd_gpr;
                attr = EA_4BYTE;
            }
            else
            {
                assert(count == 64);
                ins  = INS_kmovq_gpr;
                attr = EA_8BYTE;
            }

            op1Reg = op1->GetRegNum();
            assert(emitter::isMaskReg(op1Reg));

            emit->emitIns_Mov(ins, attr, targetReg, op1Reg, INS_FLAGS_DONT_CARE);
            break;
        }

        case NI_EVEX_KORTEST:
        {
            assert(instOptions == INS_OPTS_NONE);

            uint32_t simdSize = node->GetSimdSize();
            uint32_t count    = simdSize / genTypeSize(baseType);

            if (count <= 8)
            {
                assert((count == 2) || (count == 4) || (count == 8));
                ins = INS_kortestb;
            }
            else if (count == 16)
            {
                ins = INS_kortestw;
            }
            else if (count == 32)
            {
                ins = INS_kortestd;
            }
            else
            {
                assert(count == 64);
                ins = INS_kortestq;
            }

            op1Reg           = op1->GetRegNum();
            regNumber op2Reg = op1Reg;

            if (node->GetOperandCount() == 2)
            {
                GenTree* op2 = node->Op(2);
                op2Reg       = op2->GetRegNum();
            }

            assert(emitter::isMaskReg(op1Reg));
            assert(emitter::isMaskReg(op2Reg));

            emit->emitIns_R_R(ins, EA_8BYTE, op1Reg, op1Reg);
            break;
        }

        case NI_EVEX_KTEST:
        {
            assert(instOptions == INS_OPTS_NONE);

            uint32_t simdSize = node->GetSimdSize();
            uint32_t count    = simdSize / genTypeSize(baseType);

            if (count <= 8)
            {
                assert((count == 2) || (count == 4) || (count == 8));
                ins = INS_ktestb;
            }
            else if (count == 16)
            {
                ins = INS_ktestw;
            }
            else if (count == 32)
            {
                ins = INS_ktestd;
            }
            else
            {
                assert(count == 64);
                ins = INS_ktestq;
            }

            op1Reg = op1->GetRegNum();

            GenTree*  op2    = node->Op(2);
            regNumber op2Reg = op2->GetRegNum();

            assert(emitter::isMaskReg(op1Reg));
            assert(emitter::isMaskReg(op2Reg));

            emit->emitIns_R_R(ins, EA_8BYTE, op1Reg, op1Reg);
            break;
        }

        case NI_EVEX_NotMask:
        {
            assert(instOptions == INS_OPTS_NONE);

            uint32_t simdSize = node->GetSimdSize();
            uint32_t count    = simdSize / genTypeSize(baseType);

            if (count <= 8)
            {
                assert((count == 2) || (count == 4) || (count == 8));
                ins = INS_knotb;
            }
            else if (count == 16)
            {
                ins = INS_knotw;
            }
            else if (count == 32)
            {
                ins = INS_knotd;
            }
            else
            {
                assert(count == 64);
                ins = INS_knotq;
            }

            op1Reg = op1->GetRegNum();

            assert(emitter::isMaskReg(targetReg));
            assert(emitter::isMaskReg(op1Reg));

            emit->emitIns_R_R(ins, EA_8BYTE, targetReg, op1Reg);
            break;
        }

        case NI_EVEX_OrMask:
        {
            assert(instOptions == INS_OPTS_NONE);

            uint32_t simdSize = node->GetSimdSize();
            uint32_t count    = simdSize / genTypeSize(baseType);

            if (count <= 8)
            {
                assert((count == 2) || (count == 4) || (count == 8));
                ins = INS_korb;
            }
            else if (count == 16)
            {
                ins = INS_korw;
            }
            else if (count == 32)
            {
                ins = INS_kord;
            }
            else
            {
                assert(count == 64);
                ins = INS_korq;
            }

            op1Reg = op1->GetRegNum();

            GenTree*  op2    = node->Op(2);
            regNumber op2Reg = op2->GetRegNum();

            assert(emitter::isMaskReg(targetReg));
            assert(emitter::isMaskReg(op1Reg));
            assert(emitter::isMaskReg(op2Reg));

            // Use EA_32BYTE to ensure the VEX.L bit gets set
            emit->emitIns_R_R_R(ins, EA_32BYTE, targetReg, op1Reg, op2Reg);
            break;
        }

        case NI_EVEX_ShiftLeftMask:
        {
            assert(instOptions == INS_OPTS_NONE);

            uint32_t simdSize = node->GetSimdSize();
            uint32_t count    = simdSize / genTypeSize(baseType);

            if (count <= 8)
            {
                assert((count == 2) || (count == 4) || (count == 8));
                ins = INS_kshiftlb;
            }
            else if (count == 16)
            {
                ins = INS_kshiftlw;
            }
            else if (count == 32)
            {
                ins = INS_kshiftld;
            }
            else
            {
                assert(count == 64);
                ins = INS_kshiftlq;
            }

            op1Reg = op1->GetRegNum();

            GenTree* op2 = node->Op(2);
            assert(op2->IsCnsIntOrI() && op2->isContained());

            assert(emitter::isMaskReg(targetReg));
            assert(emitter::isMaskReg(op1Reg));

            ssize_t ival = op2->AsIntCon()->IconValue();
            assert((ival >= 0) && (ival <= 255));

            emit->emitIns_R_R_I(ins, EA_8BYTE, targetReg, op1Reg, (int8_t)ival);
            break;
        }

        case NI_EVEX_ShiftRightMask:
        {
            assert(instOptions == INS_OPTS_NONE);

            uint32_t simdSize = node->GetSimdSize();
            uint32_t count    = simdSize / genTypeSize(baseType);

            if (count <= 8)
            {
                assert((count == 2) || (count == 4) || (count == 8));
                ins = INS_kshiftrb;
            }
            else if (count == 16)
            {
                ins = INS_kshiftrw;
            }
            else if (count == 32)
            {
                ins = INS_kshiftrd;
            }
            else
            {
                assert(count == 64);
                ins = INS_kshiftrq;
            }

            op1Reg = op1->GetRegNum();

            GenTree* op2 = node->Op(2);
            assert(op2->IsCnsIntOrI() && op2->isContained());

            assert(emitter::isMaskReg(targetReg));
            assert(emitter::isMaskReg(op1Reg));

            ssize_t ival = op2->AsIntCon()->IconValue();
            assert((ival >= 0) && (ival <= 255));

            emit->emitIns_R_R_I(ins, EA_8BYTE, targetReg, op1Reg, (int8_t)ival);
            break;
        }

        case NI_EVEX_XorMask:
        {
            assert(instOptions == INS_OPTS_NONE);

            uint32_t simdSize = node->GetSimdSize();
            uint32_t count    = simdSize / genTypeSize(baseType);

            if (count <= 8)
            {
                assert((count == 2) || (count == 4) || (count == 8));
                ins = INS_kxorb;
            }
            else if (count == 16)
            {
                ins = INS_kxorw;
            }
            else if (count == 32)
            {
                ins = INS_kxord;
            }
            else
            {
                assert(count == 64);
                ins = INS_kxorq;
            }

            op1Reg = op1->GetRegNum();

            GenTree*  op2    = node->Op(2);
            regNumber op2Reg = op2->GetRegNum();

            assert(emitter::isMaskReg(targetReg));
            assert(emitter::isMaskReg(op1Reg));
            assert(emitter::isMaskReg(op2Reg));

            // Use EA_32BYTE to ensure the VEX.L bit gets set
            emit->emitIns_R_R_R(ins, EA_32BYTE, targetReg, op1Reg, op2Reg);
            break;
        }

        case NI_AVX512F_ConvertToInt32:
        case NI_AVX512F_ConvertToUInt32:
        case NI_AVX512F_ConvertToUInt32WithTruncation:
        case NI_AVX512F_X64_ConvertToInt64:
        case NI_AVX512F_X64_ConvertToUInt64:
        case NI_AVX512F_X64_ConvertToUInt64WithTruncation:
        case NI_AVX10v1_X64_ConvertToInt64:
        case NI_AVX10v1_X64_ConvertToUInt64:
        case NI_AVX10v1_X64_ConvertToUInt64WithTruncation:
        case NI_AVX10v1_ConvertToInt32:
        case NI_AVX10v1_ConvertToUInt32:
        case NI_AVX10v1_ConvertToUInt32WithTruncation:
        {
            assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT);
            emitAttr attr = emitTypeSize(targetType);

            instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
            genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions);
            break;
        }

        case NI_AVX512F_ConvertToVector256Int32:
        case NI_AVX512F_ConvertToVector256UInt32:
        case NI_AVX512F_VL_ConvertToVector128UInt32:
        case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation:
        case NI_AVX10v1_ConvertToVector128UInt32:
        case NI_AVX10v1_ConvertToVector128UInt32WithSaturation:
        {
            if (varTypeIsFloating(baseType))
            {
                instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
                genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions);
                break;
            }
            FALLTHROUGH;
        }

        case NI_AVX512F_ConvertToVector128Byte:
        case NI_AVX512F_ConvertToVector128ByteWithSaturation:
        case NI_AVX512F_ConvertToVector128Int16:
        case NI_AVX512F_ConvertToVector128Int16WithSaturation:
        case NI_AVX512F_ConvertToVector128SByte:
        case NI_AVX512F_ConvertToVector128SByteWithSaturation:
        case NI_AVX512F_ConvertToVector128UInt16:
        case NI_AVX512F_ConvertToVector128UInt16WithSaturation:
        case NI_AVX512F_ConvertToVector256Int16:
        case NI_AVX512F_ConvertToVector256Int16WithSaturation:
        case NI_AVX512F_ConvertToVector256Int32WithSaturation:
        case NI_AVX512F_ConvertToVector256UInt16:
        case NI_AVX512F_ConvertToVector256UInt16WithSaturation:
        case NI_AVX512F_ConvertToVector256UInt32WithSaturation:
        case NI_AVX512F_VL_ConvertToVector128Byte:
        case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation:
        case NI_AVX512F_VL_ConvertToVector128Int16:
        case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation:
        case NI_AVX512F_VL_ConvertToVector128Int32:
        case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation:
        case NI_AVX512F_VL_ConvertToVector128SByte:
        case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation:
        case NI_AVX512F_VL_ConvertToVector128UInt16:
        case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation:
        case NI_AVX512BW_ConvertToVector256Byte:
        case NI_AVX512BW_ConvertToVector256ByteWithSaturation:
        case NI_AVX512BW_ConvertToVector256SByte:
        case NI_AVX512BW_ConvertToVector256SByteWithSaturation:
        case NI_AVX512BW_VL_ConvertToVector128Byte:
        case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation:
        case NI_AVX512BW_VL_ConvertToVector128SByte:
        case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation:
        case NI_AVX10v1_ConvertToVector128Byte:
        case NI_AVX10v1_ConvertToVector128ByteWithSaturation:
        case NI_AVX10v1_ConvertToVector128Int16:
        case NI_AVX10v1_ConvertToVector128Int16WithSaturation:
        case NI_AVX10v1_ConvertToVector128Int32:
        case NI_AVX10v1_ConvertToVector128Int32WithSaturation:
        case NI_AVX10v1_ConvertToVector128SByte:
        case NI_AVX10v1_ConvertToVector128SByteWithSaturation:
        case NI_AVX10v1_ConvertToVector128UInt16:
        case NI_AVX10v1_ConvertToVector128UInt16WithSaturation:
        {
            instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);

            // These instructions are RM_R and so we need to ensure the targetReg
            // is passed in as the RM register and op1 is passed as the R register

            op1Reg = op1->GetRegNum();
            emit->emitIns_R_R(ins, attr, op1Reg, targetReg, instOptions);
            break;
        }

        case NI_AVX512F_X64_ConvertScalarToVector128Double:
        case NI_AVX512F_X64_ConvertScalarToVector128Single:
        case NI_AVX10v1_X64_ConvertScalarToVector128Double:
        case NI_AVX10v1_X64_ConvertScalarToVector128Single:
        {
            assert(baseType == TYP_ULONG || baseType == TYP_LONG);
            instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
            genHWIntrinsic_R_R_RM(node, ins, EA_8BYTE, instOptions);
            break;
        }

        default:
            unreached();
            break;
    }

    genProduceReg(node);
}