in llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp [2274:3431]
bool AArch64InstructionSelector::select(MachineInstr &I) {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
const AArch64Subtarget *Subtarget =
&static_cast<const AArch64Subtarget &>(MF.getSubtarget());
if (Subtarget->requiresStrictAlign()) {
// We don't support this feature yet.
LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
return false;
}
MIB.setInstrAndDebugLoc(I);
unsigned Opcode = I.getOpcode();
// G_PHI requires same handling as PHI
if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
// Certain non-generic instructions also need some special handling.
if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI.getType(DefReg);
const RegClassOrRegBank &RegClassOrBank =
MRI.getRegClassOrRegBank(DefReg);
const TargetRegisterClass *DefRC
= RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
if (!DefRC) {
if (!DefTy.isValid()) {
LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
return false;
}
const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
if (!DefRC) {
LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
return false;
}
}
I.setDesc(TII.get(TargetOpcode::PHI));
return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
}
if (I.isCopy())
return selectCopy(I, TII, MRI, TRI, RBI);
return true;
}
if (I.getNumOperands() != I.getNumExplicitOperands()) {
LLVM_DEBUG(
dbgs() << "Generic instruction has unexpected implicit operands\n");
return false;
}
// Try to do some lowering before we start instruction selecting. These
// lowerings are purely transformations on the input G_MIR and so selection
// must continue after any modification of the instruction.
if (preISelLower(I)) {
Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
}
// There may be patterns where the importer can't deal with them optimally,
// but does select it to a suboptimal sequence so our custom C++ selection
// code later never has a chance to work on it. Therefore, we have an early
// selection attempt here to give priority to certain selection routines
// over the imported ones.
if (earlySelect(I))
return true;
if (selectImpl(I, *CoverageInfo))
return true;
LLT Ty =
I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
switch (Opcode) {
case TargetOpcode::G_SBFX:
case TargetOpcode::G_UBFX: {
static const unsigned OpcTable[2][2] = {
{AArch64::UBFMWri, AArch64::UBFMXri},
{AArch64::SBFMWri, AArch64::SBFMXri}};
bool IsSigned = Opcode == TargetOpcode::G_SBFX;
unsigned Size = Ty.getSizeInBits();
unsigned Opc = OpcTable[IsSigned][Size == 64];
auto Cst1 =
getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
assert(Cst1 && "Should have gotten a constant for src 1?");
auto Cst2 =
getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
assert(Cst2 && "Should have gotten a constant for src 2?");
auto LSB = Cst1->Value.getZExtValue();
auto Width = Cst2->Value.getZExtValue();
auto BitfieldInst =
MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
.addImm(LSB)
.addImm(LSB + Width - 1);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
}
case TargetOpcode::G_BRCOND:
return selectCompareBranch(I, MF, MRI);
case TargetOpcode::G_BRINDIRECT: {
I.setDesc(TII.get(AArch64::BR));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_BRJT:
return selectBrJT(I, MRI);
case AArch64::G_ADD_LOW: {
// This op may have been separated from it's ADRP companion by the localizer
// or some other code motion pass. Given that many CPUs will try to
// macro fuse these operations anyway, select this into a MOVaddr pseudo
// which will later be expanded into an ADRP+ADD pair after scheduling.
MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
if (BaseMI->getOpcode() != AArch64::ADRP) {
I.setDesc(TII.get(AArch64::ADDXri));
I.addOperand(MachineOperand::CreateImm(0));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
assert(TM.getCodeModel() == CodeModel::Small &&
"Expected small code model");
auto Op1 = BaseMI->getOperand(1);
auto Op2 = I.getOperand(2);
auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
.addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
Op1.getTargetFlags())
.addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
Op2.getTargetFlags());
I.eraseFromParent();
return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
}
case TargetOpcode::G_BSWAP: {
// Handle vector types for G_BSWAP directly.
Register DstReg = I.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
// We should only get vector types here; everything else is handled by the
// importer right now.
if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
return false;
}
// Only handle 4 and 2 element vectors for now.
// TODO: 16-bit elements.
unsigned NumElts = DstTy.getNumElements();
if (NumElts != 4 && NumElts != 2) {
LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
return false;
}
// Choose the correct opcode for the supported types. Right now, that's
// v2s32, v4s32, and v2s64.
unsigned Opc = 0;
unsigned EltSize = DstTy.getElementType().getSizeInBits();
if (EltSize == 32)
Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
: AArch64::REV32v16i8;
else if (EltSize == 64)
Opc = AArch64::REV64v16i8;
// We should always get something by the time we get here...
assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
I.setDesc(TII.get(Opc));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_CONSTANT: {
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
const LLT s8 = LLT::scalar(8);
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
const LLT s128 = LLT::scalar(128);
const LLT p0 = LLT::pointer(0, 64);
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI.getType(DefReg);
const unsigned DefSize = DefTy.getSizeInBits();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
// FIXME: Redundant check, but even less readable when factored out.
if (isFP) {
if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
<< " constant, expected: " << s16 << " or " << s32
<< " or " << s64 << " or " << s128 << '\n');
return false;
}
if (RB.getID() != AArch64::FPRRegBankID) {
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
<< " constant on bank: " << RB
<< ", expected: FPR\n");
return false;
}
// The case when we have 0.0 is covered by tablegen. Reject it here so we
// can be sure tablegen works correctly and isn't rescued by this code.
// 0.0 is not covered by tablegen for FP128. So we will handle this
// scenario in the code here.
if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
return false;
} else {
// s32 and s64 are covered by tablegen.
if (Ty != p0 && Ty != s8 && Ty != s16) {
LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
<< " constant, expected: " << s32 << ", " << s64
<< ", or " << p0 << '\n');
return false;
}
if (RB.getID() != AArch64::GPRRegBankID) {
LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
<< " constant on bank: " << RB
<< ", expected: GPR\n");
return false;
}
}
if (isFP) {
const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize);
// For 16, 64, and 128b values, emit a constant pool load.
switch (DefSize) {
default:
llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
case 32:
// For s32, use a cp load if we have optsize/minsize.
if (!shouldOptForSize(&MF))
break;
LLVM_FALLTHROUGH;
case 16:
case 64:
case 128: {
auto *FPImm = I.getOperand(1).getFPImm();
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
if (!LoadMI) {
LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
return false;
}
MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
I.eraseFromParent();
return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
}
}
// Either emit a FMOV, or emit a copy to emit a normal mov.
assert(DefSize == 32 &&
"Expected constant pool loads for all sizes other than 32!");
const Register DefGPRReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MachineOperand &RegOp = I.getOperand(0);
RegOp.setReg(DefGPRReg);
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
MIB.buildCopy({DefReg}, {DefGPRReg});
if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
return false;
}
MachineOperand &ImmOp = I.getOperand(1);
// FIXME: Is going through int64_t always correct?
ImmOp.ChangeToImmediate(
ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
} else if (I.getOperand(1).isCImm()) {
uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
I.getOperand(1).ChangeToImmediate(Val);
} else if (I.getOperand(1).isImm()) {
uint64_t Val = I.getOperand(1).getImm();
I.getOperand(1).ChangeToImmediate(Val);
}
const unsigned MovOpc =
DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
I.setDesc(TII.get(MovOpc));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
case TargetOpcode::G_EXTRACT: {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
LLT DstTy = MRI.getType(DstReg);
(void)DstTy;
unsigned SrcSize = SrcTy.getSizeInBits();
if (SrcTy.getSizeInBits() > 64) {
// This should be an extract of an s128, which is like a vector extract.
if (SrcTy.getSizeInBits() != 128)
return false;
// Only support extracting 64 bits from an s128 at the moment.
if (DstTy.getSizeInBits() != 64)
return false;
unsigned Offset = I.getOperand(2).getImm();
if (Offset % 64 != 0)
return false;
// Check we have the right regbank always.
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
if (SrcRB.getID() == AArch64::GPRRegBankID) {
MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
.addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
I.eraseFromParent();
return true;
}
// Emit the same code as a vector extract.
// Offset must be a multiple of 64.
unsigned LaneIdx = Offset / 64;
MachineInstr *Extract = emitExtractVectorElt(
DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
if (!Extract)
return false;
I.eraseFromParent();
return true;
}
I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
Ty.getSizeInBits() - 1);
if (SrcSize < 64) {
assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
"unexpected G_EXTRACT types");
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
.addReg(DstReg, 0, AArch64::sub_32);
RBI.constrainGenericRegister(I.getOperand(0).getReg(),
AArch64::GPR32RegClass, MRI);
I.getOperand(0).setReg(DstReg);
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_INSERT: {
LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
unsigned DstSize = DstTy.getSizeInBits();
// Larger inserts are vectors, same-size ones should be something else by
// now (split up or turned into COPYs).
if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
return false;
I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
unsigned LSB = I.getOperand(3).getImm();
unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
I.getOperand(3).setImm((DstSize - LSB) % DstSize);
MachineInstrBuilder(MF, I).addImm(Width - 1);
if (DstSize < 64) {
assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
"unexpected G_INSERT types");
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
TII.get(AArch64::SUBREG_TO_REG))
.addDef(SrcReg)
.addImm(0)
.addUse(I.getOperand(2).getReg())
.addImm(AArch64::sub_32);
RBI.constrainGenericRegister(I.getOperand(2).getReg(),
AArch64::GPR32RegClass, MRI);
I.getOperand(2).setReg(SrcReg);
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_FRAME_INDEX: {
// allocas and G_FRAME_INDEX are only supported in addrspace(0).
if (Ty != LLT::pointer(0, 64)) {
LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
<< ", expected: " << LLT::pointer(0, 64) << '\n');
return false;
}
I.setDesc(TII.get(AArch64::ADDXri));
// MOs for a #0 shifted immediate.
I.addOperand(MachineOperand::CreateImm(0));
I.addOperand(MachineOperand::CreateImm(0));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_GLOBAL_VALUE: {
auto GV = I.getOperand(1).getGlobal();
if (GV->isThreadLocal())
return selectTLSGlobalValue(I, MRI);
unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
if (OpFlags & AArch64II::MO_GOT) {
I.setDesc(TII.get(AArch64::LOADgot));
I.getOperand(1).setTargetFlags(OpFlags);
} else if (TM.getCodeModel() == CodeModel::Large) {
// Materialize the global using movz/movk instructions.
materializeLargeCMVal(I, GV, OpFlags);
I.eraseFromParent();
return true;
} else if (TM.getCodeModel() == CodeModel::Tiny) {
I.setDesc(TII.get(AArch64::ADR));
I.getOperand(1).setTargetFlags(OpFlags);
} else {
I.setDesc(TII.get(AArch64::MOVaddr));
I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
MachineInstrBuilder MIB(MF, I);
MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
}
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE: {
GLoadStore &LdSt = cast<GLoadStore>(I);
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
LLT PtrTy = MRI.getType(LdSt.getPointerReg());
if (PtrTy != LLT::pointer(0, 64)) {
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
<< ", expected: " << LLT::pointer(0, 64) << '\n');
return false;
}
uint64_t MemSizeInBytes = LdSt.getMemSize();
unsigned MemSizeInBits = LdSt.getMemSizeInBits();
AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
// Need special instructions for atomics that affect ordering.
if (Order != AtomicOrdering::NotAtomic &&
Order != AtomicOrdering::Unordered &&
Order != AtomicOrdering::Monotonic) {
assert(!isa<GZExtLoad>(LdSt));
if (MemSizeInBytes > 64)
return false;
if (isa<GLoad>(LdSt)) {
static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
AArch64::LDARW, AArch64::LDARX};
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
} else {
static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
AArch64::STLRW, AArch64::STLRX};
Register ValReg = LdSt.getReg(0);
if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
// Emit a subreg copy of 32 bits.
Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
.addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
I.getOperand(0).setReg(NewVal);
}
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
}
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
#ifndef NDEBUG
const Register PtrReg = LdSt.getPointerReg();
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
// Check that the pointer register is valid.
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
"Load/Store pointer operand isn't a GPR");
assert(MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer");
#endif
const Register ValReg = LdSt.getReg(0);
const LLT ValTy = MRI.getType(ValReg);
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
// The code below doesn't support truncating stores, so we need to split it
// again.
if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
unsigned SubReg;
LLT MemTy = LdSt.getMMO().getMemoryType();
auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
if (!getSubRegForClass(RC, TRI, SubReg))
return false;
// Generate a subreg copy.
auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
.addReg(ValReg, 0, SubReg)
.getReg(0);
RBI.constrainGenericRegister(Copy, *RC, MRI);
LdSt.getOperand(0).setReg(Copy);
} else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
// If this is an any-extending load from the FPR bank, split it into a regular
// load + extend.
if (RB.getID() == AArch64::FPRRegBankID) {
unsigned SubReg;
LLT MemTy = LdSt.getMMO().getMemoryType();
auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
if (!getSubRegForClass(RC, TRI, SubReg))
return false;
Register OldDst = LdSt.getReg(0);
Register NewDst =
MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
LdSt.getOperand(0).setReg(NewDst);
MRI.setRegBank(NewDst, RB);
// Generate a SUBREG_TO_REG to extend it.
MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
.addImm(0)
.addUse(NewDst)
.addImm(SubReg);
auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
MIB.setInstr(LdSt);
}
}
// Helper lambda for partially selecting I. Either returns the original
// instruction with an updated opcode, or a new instruction.
auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
bool IsStore = isa<GStore>(I);
const unsigned NewOpc =
selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
if (NewOpc == I.getOpcode())
return nullptr;
// Check if we can fold anything into the addressing mode.
auto AddrModeFns =
selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
if (!AddrModeFns) {
// Can't fold anything. Use the original instruction.
I.setDesc(TII.get(NewOpc));
I.addOperand(MachineOperand::CreateImm(0));
return &I;
}
// Folded something. Create a new instruction and return it.
auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
Register CurValReg = I.getOperand(0).getReg();
IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
NewInst.cloneMemRefs(I);
for (auto &Fn : *AddrModeFns)
Fn(NewInst);
I.eraseFromParent();
return &*NewInst;
};
MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
if (!LoadStore)
return false;
// If we're storing a 0, use WZR/XZR.
if (Opcode == TargetOpcode::G_STORE) {
auto CVal = getIConstantVRegValWithLookThrough(
LoadStore->getOperand(0).getReg(), MRI);
if (CVal && CVal->Value == 0) {
switch (LoadStore->getOpcode()) {
case AArch64::STRWui:
case AArch64::STRHHui:
case AArch64::STRBBui:
LoadStore->getOperand(0).setReg(AArch64::WZR);
break;
case AArch64::STRXui:
LoadStore->getOperand(0).setReg(AArch64::XZR);
break;
}
}
}
if (IsZExtLoad) {
// The zextload from a smaller type to i32 should be handled by the
// importer.
if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
return false;
// If we have a ZEXTLOAD then change the load's type to be a narrower reg
// and zero_extend with SUBREG_TO_REG.
Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
Register DstReg = LoadStore->getOperand(0).getReg();
LoadStore->getOperand(0).setReg(LdReg);
MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
.addImm(0)
.addUse(LdReg)
.addImm(AArch64::sub_32);
constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
MRI);
}
return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
}
case TargetOpcode::G_SMULH:
case TargetOpcode::G_UMULH: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
const Register DefReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
if (RB.getID() != AArch64::GPRRegBankID) {
LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
return false;
}
if (Ty != LLT::scalar(64)) {
LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
<< ", expected: " << LLT::scalar(64) << '\n');
return false;
}
unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
: AArch64::UMULHrr;
I.setDesc(TII.get(NewOpc));
// Now that we selected an opcode, we need to constrain the register
// operands to use appropriate classes.
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR:
if (MRI.getType(I.getOperand(0).getReg()).isVector())
return selectVectorAshrLshr(I, MRI);
LLVM_FALLTHROUGH;
case TargetOpcode::G_SHL:
if (Opcode == TargetOpcode::G_SHL &&
MRI.getType(I.getOperand(0).getReg()).isVector())
return selectVectorSHL(I, MRI);
// These shifts were legalized to have 64 bit shift amounts because we
// want to take advantage of the selection patterns that assume the
// immediates are s64s, however, selectBinaryOp will assume both operands
// will have the same bit size.
{
Register SrcReg = I.getOperand(1).getReg();
Register ShiftReg = I.getOperand(2).getReg();
const LLT ShiftTy = MRI.getType(ShiftReg);
const LLT SrcTy = MRI.getType(SrcReg);
if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
ShiftTy.getSizeInBits() == 64) {
assert(!ShiftTy.isVector() && "unexpected vector shift ty");
assert(MRI.getVRegDef(ShiftReg) &&
"could not find a vreg definition for shift amount");
// Insert a subregister copy to implement a 64->32 trunc
auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
.addReg(ShiftReg, 0, AArch64::sub_32);
MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
I.getOperand(2).setReg(Trunc.getReg(0));
}
}
LLVM_FALLTHROUGH;
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_OR: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
const unsigned OpSize = Ty.getSizeInBits();
const Register DefReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
if (NewOpc == I.getOpcode())
return false;
I.setDesc(TII.get(NewOpc));
// FIXME: Should the type be always reset in setDesc?
// Now that we selected an opcode, we need to constrain the register
// operands to use appropriate classes.
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_PTR_ADD: {
emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_SADDO:
case TargetOpcode::G_UADDO:
case TargetOpcode::G_SSUBO:
case TargetOpcode::G_USUBO: {
// Emit the operation and get the correct condition code.
auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
I.getOperand(2), I.getOperand(3), MIB);
// Now, put the overflow result in the register given by the first operand
// to the overflow op. CSINC increments the result when the predicate is
// false, so to get the increment when it's true, we need to use the
// inverse. In this case, we want to increment when carry is set.
Register ZReg = AArch64::WZR;
emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
getInvertedCondCode(OpAndCC.second), MIB);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_PTRMASK: {
Register MaskReg = I.getOperand(2).getReg();
Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
// TODO: Implement arbitrary cases
if (!MaskVal || !isShiftedMask_64(*MaskVal))
return false;
uint64_t Mask = *MaskVal;
I.setDesc(TII.get(AArch64::ANDXri));
I.getOperand(2).ChangeToImmediate(
AArch64_AM::encodeLogicalImmediate(Mask, 64));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_PTRTOINT:
case TargetOpcode::G_TRUNC: {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
const Register DstReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
if (DstRB.getID() != SrcRB.getID()) {
LLVM_DEBUG(
dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
return false;
}
if (DstRB.getID() == AArch64::GPRRegBankID) {
const TargetRegisterClass *DstRC =
getRegClassForTypeOnBank(DstTy, DstRB, RBI);
if (!DstRC)
return false;
const TargetRegisterClass *SrcRC =
getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
if (!SrcRC)
return false;
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
return false;
}
if (DstRC == SrcRC) {
// Nothing to be done
} else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
SrcTy == LLT::scalar(64)) {
llvm_unreachable("TableGen can import this case");
return false;
} else if (DstRC == &AArch64::GPR32RegClass &&
SrcRC == &AArch64::GPR64RegClass) {
I.getOperand(1).setSubReg(AArch64::sub_32);
} else {
LLVM_DEBUG(
dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
return false;
}
I.setDesc(TII.get(TargetOpcode::COPY));
return true;
} else if (DstRB.getID() == AArch64::FPRRegBankID) {
if (DstTy == LLT::fixed_vector(4, 16) &&
SrcTy == LLT::fixed_vector(4, 32)) {
I.setDesc(TII.get(AArch64::XTNv4i16));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
MachineInstr *Extract = emitExtractVectorElt(
DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
if (!Extract)
return false;
I.eraseFromParent();
return true;
}
// We might have a vector G_PTRTOINT, in which case just emit a COPY.
if (Opcode == TargetOpcode::G_PTRTOINT) {
assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
I.setDesc(TII.get(TargetOpcode::COPY));
return selectCopy(I, TII, MRI, TRI, RBI);
}
}
return false;
}
case TargetOpcode::G_ANYEXT: {
if (selectUSMovFromExtend(I, MRI))
return true;
const Register DstReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
if (RBDst.getID() != AArch64::GPRRegBankID) {
LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
<< ", expected: GPR\n");
return false;
}
const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
if (RBSrc.getID() != AArch64::GPRRegBankID) {
LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
<< ", expected: GPR\n");
return false;
}
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
if (DstSize == 0) {
LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
return false;
}
if (DstSize != 64 && DstSize > 32) {
LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
<< ", expected: 32 or 64\n");
return false;
}
// At this point G_ANYEXT is just like a plain COPY, but we need
// to explicitly form the 64-bit value if any.
if (DstSize > 32) {
Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
.addDef(ExtSrc)
.addImm(0)
.addUse(SrcReg)
.addImm(AArch64::sub_32);
I.getOperand(1).setReg(ExtSrc);
}
return selectCopy(I, TII, MRI, TRI, RBI);
}
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_SEXT_INREG:
case TargetOpcode::G_SEXT: {
if (selectUSMovFromExtend(I, MRI))
return true;
unsigned Opcode = I.getOpcode();
const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
const Register DefReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
const LLT DstTy = MRI.getType(DefReg);
const LLT SrcTy = MRI.getType(SrcReg);
unsigned DstSize = DstTy.getSizeInBits();
unsigned SrcSize = SrcTy.getSizeInBits();
// SEXT_INREG has the same src reg size as dst, the size of the value to be
// extended is encoded in the imm.
if (Opcode == TargetOpcode::G_SEXT_INREG)
SrcSize = I.getOperand(2).getImm();
if (DstTy.isVector())
return false; // Should be handled by imported patterns.
assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
AArch64::GPRRegBankID &&
"Unexpected ext regbank");
MachineInstr *ExtI;
// First check if we're extending the result of a load which has a dest type
// smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
// GPR register on AArch64 and all loads which are smaller automatically
// zero-extend the upper bits. E.g.
// %v(s8) = G_LOAD %p, :: (load 1)
// %v2(s32) = G_ZEXT %v(s8)
if (!IsSigned) {
auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
bool IsGPR =
RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
if (LoadMI && IsGPR) {
const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
unsigned BytesLoaded = MemOp->getSize();
if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
return selectCopy(I, TII, MRI, TRI, RBI);
}
// For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
// + SUBREG_TO_REG.
//
// If we are zero extending from 32 bits to 64 bits, it's possible that
// the instruction implicitly does the zero extend for us. In that case,
// we only need the SUBREG_TO_REG.
if (IsGPR && SrcSize == 32 && DstSize == 64) {
// Unlike with the G_LOAD case, we don't want to look through copies
// here. (See isDef32.)
MachineInstr *Def = MRI.getVRegDef(SrcReg);
Register SubregToRegSrc = SrcReg;
// Does the instruction implicitly zero extend?
if (!Def || !isDef32(*Def)) {
// No. Zero out using an OR.
Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
const Register ZReg = AArch64::WZR;
MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
SubregToRegSrc = OrDst;
}
MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
.addImm(0)
.addUse(SubregToRegSrc)
.addImm(AArch64::sub_32);
if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
return false;
}
if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
return false;
}
I.eraseFromParent();
return true;
}
}
if (DstSize == 64) {
if (Opcode != TargetOpcode::G_SEXT_INREG) {
// FIXME: Can we avoid manually doing this?
if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
<< " operand\n");
return false;
}
SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
{&AArch64::GPR64RegClass}, {})
.addImm(0)
.addUse(SrcReg)
.addImm(AArch64::sub_32)
.getReg(0);
}
ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
{DefReg}, {SrcReg})
.addImm(0)
.addImm(SrcSize - 1);
} else if (DstSize <= 32) {
ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
{DefReg}, {SrcReg})
.addImm(0)
.addImm(SrcSize - 1);
} else {
return false;
}
constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP:
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI: {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
SrcTy = MRI.getType(I.getOperand(1).getReg());
const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
if (NewOpc == Opcode)
return false;
I.setDesc(TII.get(NewOpc));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
case TargetOpcode::G_FREEZE:
return selectCopy(I, TII, MRI, TRI, RBI);
case TargetOpcode::G_INTTOPTR:
// The importer is currently unable to import pointer types since they
// didn't exist in SelectionDAG.
return selectCopy(I, TII, MRI, TRI, RBI);
case TargetOpcode::G_BITCAST:
// Imported SelectionDAG rules can handle every bitcast except those that
// bitcast from a type to the same type. Ideally, these shouldn't occur
// but we might not run an optimizer that deletes them. The other exception
// is bitcasts involving pointer types, as SelectionDAG has no knowledge
// of them.
return selectCopy(I, TII, MRI, TRI, RBI);
case TargetOpcode::G_SELECT: {
if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
<< ", expected: " << LLT::scalar(1) << '\n');
return false;
}
const Register CondReg = I.getOperand(1).getReg();
const Register TReg = I.getOperand(2).getReg();
const Register FReg = I.getOperand(3).getReg();
if (tryOptSelect(I))
return true;
// Make sure to use an unused vreg instead of wzr, so that the peephole
// optimizations will be able to optimize these.
Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
return false;
I.eraseFromParent();
return true;
}
case TargetOpcode::G_ICMP: {
if (Ty.isVector())
return selectVectorICmp(I, MRI);
if (Ty != LLT::scalar(32)) {
LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
<< ", expected: " << LLT::scalar(32) << '\n');
return false;
}
auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
const AArch64CC::CondCode InvCC =
changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
/*Src2=*/AArch64::WZR, InvCC, MIB);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_FCMP: {
CmpInst::Predicate Pred =
static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
Pred) ||
!emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
return false;
I.eraseFromParent();
return true;
}
case TargetOpcode::G_VASTART:
return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
: selectVaStartAAPCS(I, MF, MRI);
case TargetOpcode::G_INTRINSIC:
return selectIntrinsic(I, MRI);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
return selectIntrinsicWithSideEffects(I, MRI);
case TargetOpcode::G_IMPLICIT_DEF: {
I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const Register DstReg = I.getOperand(0).getReg();
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
const TargetRegisterClass *DstRC =
getRegClassForTypeOnBank(DstTy, DstRB, RBI);
RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
return true;
}
case TargetOpcode::G_BLOCK_ADDR: {
if (TM.getCodeModel() == CodeModel::Large) {
materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
I.eraseFromParent();
return true;
} else {
I.setDesc(TII.get(AArch64::MOVaddrBA));
auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
I.getOperand(0).getReg())
.addBlockAddress(I.getOperand(1).getBlockAddress(),
/* Offset */ 0, AArch64II::MO_PAGE)
.addBlockAddress(
I.getOperand(1).getBlockAddress(), /* Offset */ 0,
AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
}
}
case AArch64::G_DUP: {
// When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
// imported patterns. Do it manually here. Avoiding generating s16 gpr is
// difficult because at RBS we may end up pessimizing the fpr case if we
// decided to add an anyextend to fix this. Manual selection is the most
// robust solution for now.
if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
AArch64::GPRRegBankID)
return false; // We expect the fpr regbank case to be imported.
LLT VecTy = MRI.getType(I.getOperand(0).getReg());
if (VecTy == LLT::fixed_vector(8, 8))
I.setDesc(TII.get(AArch64::DUPv8i8gpr));
else if (VecTy == LLT::fixed_vector(16, 8))
I.setDesc(TII.get(AArch64::DUPv16i8gpr));
else if (VecTy == LLT::fixed_vector(4, 16))
I.setDesc(TII.get(AArch64::DUPv4i16gpr));
else if (VecTy == LLT::fixed_vector(8, 16))
I.setDesc(TII.get(AArch64::DUPv8i16gpr));
else
return false;
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_INTRINSIC_TRUNC:
return selectIntrinsicTrunc(I, MRI);
case TargetOpcode::G_INTRINSIC_ROUND:
return selectIntrinsicRound(I, MRI);
case TargetOpcode::G_BUILD_VECTOR:
return selectBuildVector(I, MRI);
case TargetOpcode::G_MERGE_VALUES:
return selectMergeValues(I, MRI);
case TargetOpcode::G_UNMERGE_VALUES:
return selectUnmergeValues(I, MRI);
case TargetOpcode::G_SHUFFLE_VECTOR:
return selectShuffleVector(I, MRI);
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
return selectExtractElt(I, MRI);
case TargetOpcode::G_INSERT_VECTOR_ELT:
return selectInsertElt(I, MRI);
case TargetOpcode::G_CONCAT_VECTORS:
return selectConcatVectors(I, MRI);
case TargetOpcode::G_JUMP_TABLE:
return selectJumpTable(I, MRI);
case TargetOpcode::G_VECREDUCE_FADD:
case TargetOpcode::G_VECREDUCE_ADD:
return selectReduction(I, MRI);
}
return false;
}