in src/coreclr/jit/lsraarm64.cpp [582:1321]
int LinearScan::BuildNode(GenTree* tree)
{
assert(!tree->isContained());
int srcCount;
int dstCount;
regMaskTP killMask = RBM_NONE;
bool isLocalDefUse = false;
// Reset the build-related members of LinearScan.
clearBuildState();
// Set the default dstCount. This may be modified below.
if (tree->IsValue())
{
dstCount = 1;
if (tree->IsUnusedValue())
{
isLocalDefUse = true;
}
}
else
{
dstCount = 0;
}
switch (tree->OperGet())
{
default:
srcCount = BuildSimple(tree);
break;
case GT_LCL_VAR:
// We make a final determination about whether a GT_LCL_VAR is a candidate or contained
// after liveness. In either case we don't build any uses or defs. Otherwise, this is a
// load of a stack-based local into a register and we'll fall through to the general
// local case below.
if (checkContainedOrCandidateLclVar(tree->AsLclVar()))
{
return 0;
}
FALLTHROUGH;
case GT_LCL_FLD:
{
srcCount = 0;
#ifdef FEATURE_SIMD
// Need an additional register to read upper 4 bytes of Vector3.
if (tree->TypeGet() == TYP_SIMD12)
{
// We need an internal register different from targetReg in which 'tree' produces its result
// because both targetReg and internal reg will be in use at the same time.
buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
setInternalRegsDelayFree = true;
buildInternalRegisterUses();
}
#endif
BuildDef(tree);
}
break;
case GT_STORE_LCL_VAR:
if (tree->IsMultiRegLclVar() && isCandidateMultiRegLclVar(tree->AsLclVar()))
{
dstCount = compiler->lvaGetDesc(tree->AsLclVar())->lvFieldCnt;
}
FALLTHROUGH;
case GT_STORE_LCL_FLD:
srcCount = BuildStoreLoc(tree->AsLclVarCommon());
break;
case GT_FIELD_LIST:
// These should always be contained. We don't correctly allocate or
// generate code for a non-contained GT_FIELD_LIST.
noway_assert(!"Non-contained GT_FIELD_LIST");
srcCount = 0;
break;
case GT_NO_OP:
case GT_START_NONGC:
srcCount = 0;
assert(dstCount == 0);
break;
case GT_PROF_HOOK:
srcCount = 0;
assert(dstCount == 0);
killMask = getKillSetForProfilerHook();
BuildKills(tree, killMask);
break;
case GT_START_PREEMPTGC:
// This kills GC refs in callee save regs
srcCount = 0;
assert(dstCount == 0);
BuildKills(tree, RBM_NONE);
break;
case GT_CNS_DBL:
{
GenTreeDblCon* dblConst = tree->AsDblCon();
double constValue = dblConst->AsDblCon()->DconValue();
if (emitter::emitIns_valid_imm_for_fmov(constValue))
{
// Directly encode constant to instructions.
}
else
{
// Reserve int to load constant from memory (IF_LARGELDC)
buildInternalIntRegisterDefForNode(tree);
buildInternalRegisterUses();
}
}
FALLTHROUGH;
case GT_CNS_INT:
{
srcCount = 0;
assert(dstCount == 1);
RefPosition* def = BuildDef(tree);
def->getInterval()->isConstant = true;
}
break;
case GT_CNS_VEC:
{
GenTreeVecCon* vecCon = tree->AsVecCon();
if (vecCon->IsAllBitsSet() || vecCon->IsZero())
{
// Directly encode constant to instructions.
}
else
{
// Reserve int to load constant from memory (IF_LARGELDC)
buildInternalIntRegisterDefForNode(tree);
buildInternalRegisterUses();
}
srcCount = 0;
assert(dstCount == 1);
RefPosition* def = BuildDef(tree);
def->getInterval()->isConstant = true;
break;
}
case GT_BOX:
case GT_COMMA:
case GT_QMARK:
case GT_COLON:
srcCount = 0;
assert(dstCount == 0);
unreached();
break;
case GT_RETURN:
srcCount = BuildReturn(tree);
killMask = getKillSetForReturn();
BuildKills(tree, killMask);
break;
#ifdef SWIFT_SUPPORT
case GT_SWIFT_ERROR_RET:
BuildUse(tree->gtGetOp1(), RBM_SWIFT_ERROR.GetIntRegSet());
// Plus one for error register
srcCount = BuildReturn(tree) + 1;
killMask = getKillSetForReturn();
BuildKills(tree, killMask);
break;
#endif // SWIFT_SUPPORT
case GT_RETFILT:
assert(dstCount == 0);
if (tree->TypeGet() == TYP_VOID)
{
srcCount = 0;
}
else
{
assert(tree->TypeGet() == TYP_INT);
srcCount = 1;
BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet());
}
break;
case GT_NOP:
srcCount = 0;
assert(tree->TypeIs(TYP_VOID));
assert(dstCount == 0);
break;
case GT_KEEPALIVE:
assert(dstCount == 0);
srcCount = BuildOperandUses(tree->gtGetOp1());
break;
case GT_JMP:
srcCount = 0;
assert(dstCount == 0);
break;
case GT_SWITCH:
// This should never occur since switch nodes must not be visible at this
// point in the JIT.
srcCount = 0;
noway_assert(!"Switch must be lowered at this point");
break;
case GT_JMPTABLE:
srcCount = 0;
assert(dstCount == 1);
BuildDef(tree);
break;
case GT_SWITCH_TABLE:
buildInternalIntRegisterDefForNode(tree);
srcCount = BuildBinaryUses(tree->AsOp());
assert(dstCount == 0);
break;
case GT_ADD:
case GT_SUB:
if (varTypeIsFloating(tree->TypeGet()))
{
// overflow operations aren't supported on float/double types.
assert(!tree->gtOverflow());
// No implicit conversions at this stage as the expectation is that
// everything is made explicit by adding casts.
assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet());
}
FALLTHROUGH;
case GT_AND:
case GT_AND_NOT:
case GT_OR:
case GT_XOR:
case GT_LSH:
case GT_RSH:
case GT_RSZ:
case GT_ROR:
srcCount = BuildBinaryUses(tree->AsOp());
assert(dstCount == 1);
BuildDef(tree);
break;
case GT_BFIZ:
assert(tree->gtGetOp1()->OperIs(GT_CAST));
srcCount = BuildOperandUses(tree->gtGetOp1()->gtGetOp1());
BuildDef(tree);
break;
case GT_RETURNTRAP:
// this just turns into a compare of its child with an int
// + a conditional call
BuildUse(tree->gtGetOp1());
srcCount = 1;
assert(dstCount == 0);
killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
BuildKills(tree, killMask);
break;
case GT_MOD:
case GT_UMOD:
NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64");
assert(!"Shouldn't see an integer typed GT_MOD node in ARM64");
srcCount = 0;
break;
case GT_MUL:
if (tree->gtOverflow())
{
// Need a register different from target reg to check for overflow.
buildInternalIntRegisterDefForNode(tree);
setInternalRegsDelayFree = true;
}
FALLTHROUGH;
case GT_DIV:
case GT_MULHI:
case GT_MUL_LONG:
case GT_UDIV:
{
srcCount = BuildBinaryUses(tree->AsOp());
buildInternalRegisterUses();
assert(dstCount == 1);
BuildDef(tree);
}
break;
case GT_INTRINSIC:
{
switch (tree->AsIntrinsic()->gtIntrinsicName)
{
case NI_System_Math_Max:
case NI_System_Math_Min:
case NI_System_Math_MaxNumber:
case NI_System_Math_MinNumber:
{
assert(varTypeIsFloating(tree->gtGetOp1()));
assert(varTypeIsFloating(tree->gtGetOp2()));
assert(tree->gtGetOp1()->TypeIs(tree->TypeGet()));
srcCount = BuildBinaryUses(tree->AsOp());
assert(dstCount == 1);
BuildDef(tree);
break;
}
case NI_System_Math_Abs:
case NI_System_Math_Ceiling:
case NI_System_Math_Floor:
case NI_System_Math_Truncate:
case NI_System_Math_Round:
case NI_System_Math_Sqrt:
{
assert(varTypeIsFloating(tree->gtGetOp1()));
assert(tree->gtGetOp1()->TypeIs(tree->TypeGet()));
BuildUse(tree->gtGetOp1());
srcCount = 1;
assert(dstCount == 1);
BuildDef(tree);
break;
}
default:
unreached();
}
}
break;
#ifdef FEATURE_HW_INTRINSICS
case GT_HWINTRINSIC:
srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic(), &dstCount);
break;
#endif // FEATURE_HW_INTRINSICS
case GT_CAST:
assert(dstCount == 1);
srcCount = BuildCast(tree->AsCast());
break;
case GT_NEG:
case GT_NOT:
srcCount = BuildOperandUses(tree->gtGetOp1(), RBM_NONE);
assert(dstCount == 1);
BuildDef(tree);
break;
case GT_EQ:
case GT_NE:
case GT_LT:
case GT_LE:
case GT_GE:
case GT_GT:
case GT_TEST_EQ:
case GT_TEST_NE:
case GT_CMP:
case GT_TEST:
case GT_CCMP:
case GT_JCMP:
case GT_JTEST:
srcCount = BuildCmp(tree);
break;
case GT_JTRUE:
BuildOperandUses(tree->gtGetOp1(), RBM_NONE);
srcCount = 1;
break;
case GT_CKFINITE:
srcCount = 1;
assert(dstCount == 1);
buildInternalIntRegisterDefForNode(tree);
BuildUse(tree->gtGetOp1());
BuildDef(tree);
buildInternalRegisterUses();
break;
case GT_CMPXCHG:
{
GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg();
srcCount = cmpXchgNode->Comparand()->isContained() ? 2 : 3;
assert(dstCount == 1);
if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
{
// For ARMv8 exclusives requires a single internal register
buildInternalIntRegisterDefForNode(tree);
}
// For ARMv8 exclusives the lifetime of the addr and data must be extended because
// it may be used used multiple during retries
// For ARMv8.1 atomic cas the lifetime of the addr and data must be extended to prevent
// them being reused as the target register which must be destroyed early
RefPosition* locationUse = BuildUse(tree->AsCmpXchg()->Addr());
setDelayFree(locationUse);
RefPosition* valueUse = BuildUse(tree->AsCmpXchg()->Data());
setDelayFree(valueUse);
if (!cmpXchgNode->Comparand()->isContained())
{
RefPosition* comparandUse = BuildUse(tree->AsCmpXchg()->Comparand());
// For ARMv8 exclusives the lifetime of the comparand must be extended because
// it may be used used multiple during retries
if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
{
setDelayFree(comparandUse);
}
}
// Internals may not collide with target
setInternalRegsDelayFree = true;
buildInternalRegisterUses();
BuildDef(tree);
}
break;
case GT_LOCKADD:
case GT_XORR:
case GT_XAND:
case GT_XADD:
case GT_XCHG:
{
assert(dstCount == (tree->TypeIs(TYP_VOID) ? 0 : 1));
srcCount = tree->gtGetOp2()->isContained() ? 1 : 2;
if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
{
// GT_XCHG requires a single internal register; the others require two.
buildInternalIntRegisterDefForNode(tree);
if (tree->OperGet() != GT_XCHG)
{
buildInternalIntRegisterDefForNode(tree);
}
}
else if (tree->OperIs(GT_XAND))
{
// for ldclral we need an internal register.
buildInternalIntRegisterDefForNode(tree);
}
assert(!tree->gtGetOp1()->isContained());
RefPosition* op1Use = BuildUse(tree->gtGetOp1());
RefPosition* op2Use = nullptr;
if (!tree->gtGetOp2()->isContained())
{
op2Use = BuildUse(tree->gtGetOp2());
}
// For ARMv8 exclusives the lifetime of the addr and data must be extended because
// it may be used used multiple during retries
if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
{
// Internals may not collide with target
if (dstCount == 1)
{
setDelayFree(op1Use);
if (op2Use != nullptr)
{
setDelayFree(op2Use);
}
setInternalRegsDelayFree = true;
}
}
buildInternalRegisterUses();
if (dstCount == 1)
{
BuildDef(tree);
}
}
break;
#if FEATURE_ARG_SPLIT
case GT_PUTARG_SPLIT:
srcCount = BuildPutArgSplit(tree->AsPutArgSplit());
dstCount = tree->AsPutArgSplit()->gtNumRegs;
break;
#endif // FEATURE_ARG_SPLIT
case GT_PUTARG_STK:
srcCount = BuildPutArgStk(tree->AsPutArgStk());
break;
case GT_PUTARG_REG:
srcCount = BuildPutArgReg(tree->AsUnOp());
break;
case GT_CALL:
srcCount = BuildCall(tree->AsCall());
if (tree->AsCall()->HasMultiRegRetVal())
{
dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
}
break;
case GT_BLK:
// These should all be eliminated prior to Lowering.
assert(!"Non-store block node in Lowering");
srcCount = 0;
break;
case GT_STORE_BLK:
srcCount = BuildBlockStore(tree->AsBlk());
break;
case GT_INIT_VAL:
// Always a passthrough of its child's value.
assert(!"INIT_VAL should always be contained");
srcCount = 0;
break;
case GT_LCLHEAP:
{
assert(dstCount == 1);
// Need a variable number of temp regs (see genLclHeap() in codegenarm64.cpp):
// Here '-' means don't care.
//
// Size? Init Memory? # temp regs
// 0 - 0
// const and <=UnrollLimit - 0
// const and <PageSize No 0
// >UnrollLimit Yes 0
// Non-const Yes 0
// Non-const No 2
//
GenTree* size = tree->gtGetOp1();
if (size->IsCnsIntOrI())
{
assert(size->isContained());
srcCount = 0;
size_t sizeVal = size->AsIntCon()->gtIconVal;
if (sizeVal != 0)
{
// Compute the amount of memory to properly STACK_ALIGN.
// Note: The GenTree node is not updated here as it is cheap to recompute stack aligned size.
// This should also help in debugging as we can examine the original size specified with
// localloc.
sizeVal = AlignUp(sizeVal, STACK_ALIGN);
if (sizeVal <= compiler->getUnrollThreshold(Compiler::UnrollKind::Memset))
{
// Need no internal registers
}
else if (!compiler->info.compInitMem)
{
// No need to initialize allocated stack space.
if (sizeVal < compiler->eeGetPageSize())
{
// Need no internal registers
}
else
{
// We need two registers: regCnt and RegTmp
buildInternalIntRegisterDefForNode(tree);
buildInternalIntRegisterDefForNode(tree);
}
}
}
}
else
{
srcCount = 1;
if (!compiler->info.compInitMem)
{
buildInternalIntRegisterDefForNode(tree);
buildInternalIntRegisterDefForNode(tree);
}
}
if (!size->isContained())
{
BuildUse(size);
}
buildInternalRegisterUses();
BuildDef(tree);
}
break;
case GT_BOUNDS_CHECK:
{
GenTreeBoundsChk* node = tree->AsBoundsChk();
// Consumes arrLen & index - has no result
assert(dstCount == 0);
srcCount = BuildOperandUses(node->GetIndex());
srcCount += BuildOperandUses(node->GetArrayLength());
}
break;
case GT_ARR_ELEM:
// These must have been lowered
noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
srcCount = 0;
assert(dstCount == 0);
break;
case GT_LEA:
{
GenTreeAddrMode* lea = tree->AsAddrMode();
GenTree* base = lea->Base();
GenTree* index = lea->Index();
int cns = lea->Offset();
// This LEA is instantiating an address, so we set up the srcCount here.
srcCount = 0;
if (base != nullptr)
{
srcCount++;
BuildUse(base);
}
if (index != nullptr)
{
srcCount++;
if (index->OperIs(GT_BFIZ) && index->isContained())
{
GenTreeCast* cast = index->gtGetOp1()->AsCast();
assert(cast->isContained() && (cns == 0));
BuildUse(cast->CastOp());
}
else if (index->OperIs(GT_CAST) && index->isContained())
{
GenTreeCast* cast = index->AsCast();
assert(cast->isContained() && (cns == 0));
BuildUse(cast->CastOp());
}
else
{
BuildUse(index);
}
}
assert(dstCount == 1);
// On ARM64 we may need a single internal register
// (when both conditions are true then we still only need a single internal register)
if ((index != nullptr) && (cns != 0))
{
// ARM64 does not support both Index and offset so we need an internal register
buildInternalIntRegisterDefForNode(tree);
}
else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE))
{
// This offset can't be contained in the add instruction, so we need an internal register
buildInternalIntRegisterDefForNode(tree);
}
buildInternalRegisterUses();
BuildDef(tree);
}
break;
case GT_STOREIND:
{
assert(dstCount == 0);
if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree->AsStoreInd()))
{
srcCount = BuildGCWriteBarrier(tree);
break;
}
srcCount = BuildIndir(tree->AsIndir());
if (!tree->gtGetOp2()->isContained())
{
BuildUse(tree->gtGetOp2());
srcCount++;
}
}
break;
case GT_NULLCHECK:
case GT_IND:
assert(dstCount == (tree->OperIs(GT_NULLCHECK) ? 0 : 1));
srcCount = BuildIndir(tree->AsIndir());
break;
case GT_CATCH_ARG:
srcCount = 0;
assert(dstCount == 1);
BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet());
break;
case GT_INDEX_ADDR:
assert(dstCount == 1);
srcCount = BuildBinaryUses(tree->AsOp());
buildInternalIntRegisterDefForNode(tree);
if (!tree->AsIndexAddr()->Index()->TypeIs(TYP_I_IMPL) &&
!(isPow2(tree->AsIndexAddr()->gtElemSize) && (tree->AsIndexAddr()->gtElemSize <= 32768)))
{
// We're going to need a temp reg to widen the index.
buildInternalIntRegisterDefForNode(tree);
}
buildInternalRegisterUses();
BuildDef(tree);
break;
case GT_SELECT:
assert(dstCount == 1);
srcCount = BuildSelect(tree->AsConditional());
break;
case GT_SELECTCC:
assert(dstCount == 1);
srcCount = BuildSelect(tree->AsOp());
break;
#ifdef SWIFT_SUPPORT
case GT_SWIFT_ERROR:
srcCount = 0;
assert(dstCount == 1);
// Any register should do here, but the error register value should immediately
// be moved from GT_SWIFT_ERROR's destination register to the SwiftError struct,
// and we know REG_SWIFT_ERROR should be busy up to this point, anyway.
// By forcing LSRA to use REG_SWIFT_ERROR as both the source and destination register,
// we can ensure the redundant move is elided.
BuildDef(tree, RBM_SWIFT_ERROR.GetIntRegSet());
break;
#endif // SWIFT_SUPPORT
} // end switch (tree->OperGet())
if (tree->IsUnusedValue() && (dstCount != 0))
{
isLocalDefUse = true;
}
// We need to be sure that we've set srcCount and dstCount appropriately
assert((dstCount < 2) || tree->IsMultiRegNode());
assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
assert(!tree->IsValue() || (dstCount != 0));
assert(dstCount == tree->GetRegisterDstCount(compiler));
return srcCount;
}