in src/coreclr/jit/hwintrinsiccodegenxarch.cpp [2265:2966]
void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
{
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
if (HWIntrinsicInfo::IsFmaIntrinsic(intrinsicId))
{
genFMAIntrinsic(node, instOptions);
return;
}
if (HWIntrinsicInfo::IsPermuteVar2x(intrinsicId))
{
genPermuteVar2x(node, instOptions);
return;
}
var_types baseType = node->GetSimdBaseType();
emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize()));
var_types targetType = node->TypeGet();
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
size_t numArgs = node->GetOperandCount();
GenTree* op1 = node->Op(1);
regNumber op1Reg = REG_NA;
regNumber targetReg = node->GetRegNum();
emitter* emit = GetEmitter();
genConsumeMultiOpOperands(node);
switch (intrinsicId)
{
case NI_AVX2_ConvertToInt32:
case NI_AVX2_ConvertToUInt32:
{
assert(instOptions == INS_OPTS_NONE);
op1Reg = op1->GetRegNum();
assert((baseType == TYP_INT) || (baseType == TYP_UINT));
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
emit->emitIns_Mov(ins, emitActualTypeSize(baseType), targetReg, op1Reg, /* canSkip */ false);
break;
}
case NI_AVX2_ConvertToVector256Int16:
case NI_AVX2_ConvertToVector256Int32:
case NI_AVX2_ConvertToVector256Int64:
{
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
if (!varTypeIsSIMD(op1->gtType))
{
// Until we improve the handling of addressing modes in the emitter, we'll create a
// temporary GT_IND to generate code with.
GenTreeIndir load = indirForm(node->TypeGet(), op1);
emit->emitInsLoadInd(ins, emitTypeSize(TYP_SIMD32), node->GetRegNum(), &load);
}
else
{
genHWIntrinsic_R_RM(node, ins, EA_32BYTE, targetReg, op1, instOptions);
}
break;
}
case NI_AVX2_GatherVector128:
case NI_AVX2_GatherVector256:
case NI_AVX2_GatherMaskVector128:
case NI_AVX2_GatherMaskVector256:
{
assert(instOptions == INS_OPTS_NONE);
GenTree* op2 = node->Op(2);
GenTree* op3 = node->Op(3);
GenTree* lastOp = nullptr;
GenTree* indexOp = nullptr;
op1Reg = op1->GetRegNum();
regNumber op2Reg = op2->GetRegNum();
regNumber addrBaseReg = REG_NA;
regNumber addrIndexReg = REG_NA;
regNumber maskReg = internalRegisters.Extract(node, RBM_ALLFLOAT);
if (numArgs == 5)
{
assert(intrinsicId == NI_AVX2_GatherMaskVector128 || intrinsicId == NI_AVX2_GatherMaskVector256);
GenTree* op4 = node->Op(4);
lastOp = node->Op(5);
regNumber op3Reg = op3->GetRegNum();
regNumber op4Reg = op4->GetRegNum();
addrBaseReg = op2Reg;
addrIndexReg = op3Reg;
indexOp = op3;
// copy op4Reg into the tmp mask register,
// the mask register will be cleared by gather instructions
emit->emitIns_Mov(INS_movaps, attr, maskReg, op4Reg, /* canSkip */ false);
// copy source vector to the target register for masking merge
emit->emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
}
else
{
assert(intrinsicId == NI_AVX2_GatherVector128 || intrinsicId == NI_AVX2_GatherVector256);
addrBaseReg = op1Reg;
addrIndexReg = op2Reg;
indexOp = op2;
lastOp = op3;
// generate all-one mask vector
assert(!emitter::isHighSimdReg(targetReg));
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, maskReg, maskReg, maskReg, instOptions);
}
bool isVector128GatherWithVector256Index = (targetType == TYP_SIMD16) && (indexOp->TypeGet() == TYP_SIMD32);
// hwintrinsiclistxarch.h uses Dword index instructions in default
if (varTypeIsLong(node->GetAuxiliaryType()))
{
switch (ins)
{
case INS_vpgatherdd:
ins = INS_vpgatherqd;
if (isVector128GatherWithVector256Index)
{
// YMM index in address mode
attr = emitTypeSize(TYP_SIMD32);
}
break;
case INS_vpgatherdq:
ins = INS_vpgatherqq;
break;
case INS_vgatherdps:
ins = INS_vgatherqps;
if (isVector128GatherWithVector256Index)
{
// YMM index in address mode
attr = emitTypeSize(TYP_SIMD32);
}
break;
case INS_vgatherdpd:
ins = INS_vgatherqpd;
break;
default:
unreached();
}
}
assert(lastOp->IsCnsIntOrI());
ssize_t ival = lastOp->AsIntCon()->IconValue();
assert((ival >= 0) && (ival <= 255));
assert(targetReg != maskReg);
assert(targetReg != addrIndexReg);
assert(maskReg != addrIndexReg);
emit->emitIns_R_AR_R(ins, attr, targetReg, maskReg, addrBaseReg, addrIndexReg, (int8_t)ival, 0);
break;
}
case NI_EVEX_AddMask:
{
assert(instOptions == INS_OPTS_NONE);
uint32_t simdSize = node->GetSimdSize();
uint32_t count = simdSize / genTypeSize(baseType);
if (count <= 8)
{
assert((count == 2) || (count == 4) || (count == 8));
ins = INS_kaddb;
}
else if (count == 16)
{
ins = INS_kaddw;
}
else if (count == 32)
{
ins = INS_kaddd;
}
else
{
assert(count == 64);
ins = INS_kaddq;
}
op1Reg = op1->GetRegNum();
GenTree* op2 = node->Op(2);
regNumber op2Reg = op2->GetRegNum();
assert(emitter::isMaskReg(targetReg));
assert(emitter::isMaskReg(op1Reg));
assert(emitter::isMaskReg(op2Reg));
// Use EA_32BYTE to ensure the VEX.L bit gets set
emit->emitIns_R_R_R(ins, EA_32BYTE, targetReg, op1Reg, op2Reg);
break;
}
case NI_EVEX_AndMask:
{
assert(instOptions == INS_OPTS_NONE);
uint32_t simdSize = node->GetSimdSize();
uint32_t count = simdSize / genTypeSize(baseType);
if (count <= 8)
{
assert((count == 2) || (count == 4) || (count == 8));
ins = INS_kandb;
}
else if (count == 16)
{
ins = INS_kandw;
}
else if (count == 32)
{
ins = INS_kandd;
}
else
{
assert(count == 64);
ins = INS_kandq;
}
op1Reg = op1->GetRegNum();
GenTree* op2 = node->Op(2);
regNumber op2Reg = op2->GetRegNum();
assert(emitter::isMaskReg(targetReg));
assert(emitter::isMaskReg(op1Reg));
assert(emitter::isMaskReg(op2Reg));
// Use EA_32BYTE to ensure the VEX.L bit gets set
emit->emitIns_R_R_R(ins, EA_32BYTE, targetReg, op1Reg, op2Reg);
break;
}
case NI_EVEX_AndNotMask:
{
assert(instOptions == INS_OPTS_NONE);
uint32_t simdSize = node->GetSimdSize();
uint32_t count = simdSize / genTypeSize(baseType);
if (count <= 8)
{
assert((count == 2) || (count == 4) || (count == 8));
ins = INS_kandnb;
}
else if (count == 16)
{
ins = INS_kandnw;
}
else if (count == 32)
{
ins = INS_kandnd;
}
else
{
assert(count == 64);
ins = INS_kandnq;
}
op1Reg = op1->GetRegNum();
GenTree* op2 = node->Op(2);
regNumber op2Reg = op2->GetRegNum();
assert(emitter::isMaskReg(targetReg));
assert(emitter::isMaskReg(op1Reg));
assert(emitter::isMaskReg(op2Reg));
// Use EA_32BYTE to ensure the VEX.L bit gets set
emit->emitIns_R_R_R(ins, EA_32BYTE, targetReg, op1Reg, op2Reg);
break;
}
case NI_EVEX_MoveMask:
{
assert(instOptions == INS_OPTS_NONE);
uint32_t simdSize = node->GetSimdSize();
uint32_t count = simdSize / genTypeSize(baseType);
if (count <= 8)
{
assert((count == 2) || (count == 4) || (count == 8));
ins = INS_kmovb_gpr;
attr = EA_4BYTE;
}
else if (count == 16)
{
ins = INS_kmovw_gpr;
attr = EA_4BYTE;
}
else if (count == 32)
{
ins = INS_kmovd_gpr;
attr = EA_4BYTE;
}
else
{
assert(count == 64);
ins = INS_kmovq_gpr;
attr = EA_8BYTE;
}
op1Reg = op1->GetRegNum();
assert(emitter::isMaskReg(op1Reg));
emit->emitIns_Mov(ins, attr, targetReg, op1Reg, INS_FLAGS_DONT_CARE);
break;
}
case NI_EVEX_KORTEST:
{
assert(instOptions == INS_OPTS_NONE);
uint32_t simdSize = node->GetSimdSize();
uint32_t count = simdSize / genTypeSize(baseType);
if (count <= 8)
{
assert((count == 2) || (count == 4) || (count == 8));
ins = INS_kortestb;
}
else if (count == 16)
{
ins = INS_kortestw;
}
else if (count == 32)
{
ins = INS_kortestd;
}
else
{
assert(count == 64);
ins = INS_kortestq;
}
op1Reg = op1->GetRegNum();
regNumber op2Reg = op1Reg;
if (node->GetOperandCount() == 2)
{
GenTree* op2 = node->Op(2);
op2Reg = op2->GetRegNum();
}
assert(emitter::isMaskReg(op1Reg));
assert(emitter::isMaskReg(op2Reg));
emit->emitIns_R_R(ins, EA_8BYTE, op1Reg, op1Reg);
break;
}
case NI_EVEX_KTEST:
{
assert(instOptions == INS_OPTS_NONE);
uint32_t simdSize = node->GetSimdSize();
uint32_t count = simdSize / genTypeSize(baseType);
if (count <= 8)
{
assert((count == 2) || (count == 4) || (count == 8));
ins = INS_ktestb;
}
else if (count == 16)
{
ins = INS_ktestw;
}
else if (count == 32)
{
ins = INS_ktestd;
}
else
{
assert(count == 64);
ins = INS_ktestq;
}
op1Reg = op1->GetRegNum();
GenTree* op2 = node->Op(2);
regNumber op2Reg = op2->GetRegNum();
assert(emitter::isMaskReg(op1Reg));
assert(emitter::isMaskReg(op2Reg));
emit->emitIns_R_R(ins, EA_8BYTE, op1Reg, op1Reg);
break;
}
case NI_EVEX_NotMask:
{
assert(instOptions == INS_OPTS_NONE);
uint32_t simdSize = node->GetSimdSize();
uint32_t count = simdSize / genTypeSize(baseType);
if (count <= 8)
{
assert((count == 2) || (count == 4) || (count == 8));
ins = INS_knotb;
}
else if (count == 16)
{
ins = INS_knotw;
}
else if (count == 32)
{
ins = INS_knotd;
}
else
{
assert(count == 64);
ins = INS_knotq;
}
op1Reg = op1->GetRegNum();
assert(emitter::isMaskReg(targetReg));
assert(emitter::isMaskReg(op1Reg));
emit->emitIns_R_R(ins, EA_8BYTE, targetReg, op1Reg);
break;
}
case NI_EVEX_OrMask:
{
assert(instOptions == INS_OPTS_NONE);
uint32_t simdSize = node->GetSimdSize();
uint32_t count = simdSize / genTypeSize(baseType);
if (count <= 8)
{
assert((count == 2) || (count == 4) || (count == 8));
ins = INS_korb;
}
else if (count == 16)
{
ins = INS_korw;
}
else if (count == 32)
{
ins = INS_kord;
}
else
{
assert(count == 64);
ins = INS_korq;
}
op1Reg = op1->GetRegNum();
GenTree* op2 = node->Op(2);
regNumber op2Reg = op2->GetRegNum();
assert(emitter::isMaskReg(targetReg));
assert(emitter::isMaskReg(op1Reg));
assert(emitter::isMaskReg(op2Reg));
// Use EA_32BYTE to ensure the VEX.L bit gets set
emit->emitIns_R_R_R(ins, EA_32BYTE, targetReg, op1Reg, op2Reg);
break;
}
case NI_EVEX_ShiftLeftMask:
{
assert(instOptions == INS_OPTS_NONE);
uint32_t simdSize = node->GetSimdSize();
uint32_t count = simdSize / genTypeSize(baseType);
if (count <= 8)
{
assert((count == 2) || (count == 4) || (count == 8));
ins = INS_kshiftlb;
}
else if (count == 16)
{
ins = INS_kshiftlw;
}
else if (count == 32)
{
ins = INS_kshiftld;
}
else
{
assert(count == 64);
ins = INS_kshiftlq;
}
op1Reg = op1->GetRegNum();
GenTree* op2 = node->Op(2);
assert(op2->IsCnsIntOrI() && op2->isContained());
assert(emitter::isMaskReg(targetReg));
assert(emitter::isMaskReg(op1Reg));
ssize_t ival = op2->AsIntCon()->IconValue();
assert((ival >= 0) && (ival <= 255));
emit->emitIns_R_R_I(ins, EA_8BYTE, targetReg, op1Reg, (int8_t)ival);
break;
}
case NI_EVEX_ShiftRightMask:
{
assert(instOptions == INS_OPTS_NONE);
uint32_t simdSize = node->GetSimdSize();
uint32_t count = simdSize / genTypeSize(baseType);
if (count <= 8)
{
assert((count == 2) || (count == 4) || (count == 8));
ins = INS_kshiftrb;
}
else if (count == 16)
{
ins = INS_kshiftrw;
}
else if (count == 32)
{
ins = INS_kshiftrd;
}
else
{
assert(count == 64);
ins = INS_kshiftrq;
}
op1Reg = op1->GetRegNum();
GenTree* op2 = node->Op(2);
assert(op2->IsCnsIntOrI() && op2->isContained());
assert(emitter::isMaskReg(targetReg));
assert(emitter::isMaskReg(op1Reg));
ssize_t ival = op2->AsIntCon()->IconValue();
assert((ival >= 0) && (ival <= 255));
emit->emitIns_R_R_I(ins, EA_8BYTE, targetReg, op1Reg, (int8_t)ival);
break;
}
case NI_EVEX_XorMask:
{
assert(instOptions == INS_OPTS_NONE);
uint32_t simdSize = node->GetSimdSize();
uint32_t count = simdSize / genTypeSize(baseType);
if (count <= 8)
{
assert((count == 2) || (count == 4) || (count == 8));
ins = INS_kxorb;
}
else if (count == 16)
{
ins = INS_kxorw;
}
else if (count == 32)
{
ins = INS_kxord;
}
else
{
assert(count == 64);
ins = INS_kxorq;
}
op1Reg = op1->GetRegNum();
GenTree* op2 = node->Op(2);
regNumber op2Reg = op2->GetRegNum();
assert(emitter::isMaskReg(targetReg));
assert(emitter::isMaskReg(op1Reg));
assert(emitter::isMaskReg(op2Reg));
// Use EA_32BYTE to ensure the VEX.L bit gets set
emit->emitIns_R_R_R(ins, EA_32BYTE, targetReg, op1Reg, op2Reg);
break;
}
case NI_AVX512F_ConvertToInt32:
case NI_AVX512F_ConvertToUInt32:
case NI_AVX512F_ConvertToUInt32WithTruncation:
case NI_AVX512F_X64_ConvertToInt64:
case NI_AVX512F_X64_ConvertToUInt64:
case NI_AVX512F_X64_ConvertToUInt64WithTruncation:
case NI_AVX10v1_X64_ConvertToInt64:
case NI_AVX10v1_X64_ConvertToUInt64:
case NI_AVX10v1_X64_ConvertToUInt64WithTruncation:
case NI_AVX10v1_ConvertToInt32:
case NI_AVX10v1_ConvertToUInt32:
case NI_AVX10v1_ConvertToUInt32WithTruncation:
{
assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT);
emitAttr attr = emitTypeSize(targetType);
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions);
break;
}
case NI_AVX512F_ConvertToVector256Int32:
case NI_AVX512F_ConvertToVector256UInt32:
case NI_AVX512F_VL_ConvertToVector128UInt32:
case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation:
case NI_AVX10v1_ConvertToVector128UInt32:
case NI_AVX10v1_ConvertToVector128UInt32WithSaturation:
{
if (varTypeIsFloating(baseType))
{
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions);
break;
}
FALLTHROUGH;
}
case NI_AVX512F_ConvertToVector128Byte:
case NI_AVX512F_ConvertToVector128ByteWithSaturation:
case NI_AVX512F_ConvertToVector128Int16:
case NI_AVX512F_ConvertToVector128Int16WithSaturation:
case NI_AVX512F_ConvertToVector128SByte:
case NI_AVX512F_ConvertToVector128SByteWithSaturation:
case NI_AVX512F_ConvertToVector128UInt16:
case NI_AVX512F_ConvertToVector128UInt16WithSaturation:
case NI_AVX512F_ConvertToVector256Int16:
case NI_AVX512F_ConvertToVector256Int16WithSaturation:
case NI_AVX512F_ConvertToVector256Int32WithSaturation:
case NI_AVX512F_ConvertToVector256UInt16:
case NI_AVX512F_ConvertToVector256UInt16WithSaturation:
case NI_AVX512F_ConvertToVector256UInt32WithSaturation:
case NI_AVX512F_VL_ConvertToVector128Byte:
case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation:
case NI_AVX512F_VL_ConvertToVector128Int16:
case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation:
case NI_AVX512F_VL_ConvertToVector128Int32:
case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation:
case NI_AVX512F_VL_ConvertToVector128SByte:
case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation:
case NI_AVX512F_VL_ConvertToVector128UInt16:
case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation:
case NI_AVX512BW_ConvertToVector256Byte:
case NI_AVX512BW_ConvertToVector256ByteWithSaturation:
case NI_AVX512BW_ConvertToVector256SByte:
case NI_AVX512BW_ConvertToVector256SByteWithSaturation:
case NI_AVX512BW_VL_ConvertToVector128Byte:
case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation:
case NI_AVX512BW_VL_ConvertToVector128SByte:
case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation:
case NI_AVX10v1_ConvertToVector128Byte:
case NI_AVX10v1_ConvertToVector128ByteWithSaturation:
case NI_AVX10v1_ConvertToVector128Int16:
case NI_AVX10v1_ConvertToVector128Int16WithSaturation:
case NI_AVX10v1_ConvertToVector128Int32:
case NI_AVX10v1_ConvertToVector128Int32WithSaturation:
case NI_AVX10v1_ConvertToVector128SByte:
case NI_AVX10v1_ConvertToVector128SByteWithSaturation:
case NI_AVX10v1_ConvertToVector128UInt16:
case NI_AVX10v1_ConvertToVector128UInt16WithSaturation:
{
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
// These instructions are RM_R and so we need to ensure the targetReg
// is passed in as the RM register and op1 is passed as the R register
op1Reg = op1->GetRegNum();
emit->emitIns_R_R(ins, attr, op1Reg, targetReg, instOptions);
break;
}
case NI_AVX512F_X64_ConvertScalarToVector128Double:
case NI_AVX512F_X64_ConvertScalarToVector128Single:
case NI_AVX10v1_X64_ConvertScalarToVector128Double:
case NI_AVX10v1_X64_ConvertScalarToVector128Single:
{
assert(baseType == TYP_ULONG || baseType == TYP_LONG);
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
genHWIntrinsic_R_R_RM(node, ins, EA_8BYTE, instOptions);
break;
}
default:
unreached();
break;
}
genProduceReg(node);
}