in llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp [3615:5382]
void ARMDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
if (N->isMachineOpcode()) {
N->setNodeId(-1);
return; // Already selected.
}
switch (N->getOpcode()) {
default: break;
case ISD::STORE: {
// For Thumb1, match an sp-relative store in C++. This is a little
// unfortunate, but I don't think I can make the chain check work
// otherwise. (The chain of the store has to be the same as the chain
// of the CopyFromReg, or else we can't replace the CopyFromReg with
// a direct reference to "SP".)
//
// This is only necessary on Thumb1 because Thumb1 sp-relative stores use
// a different addressing mode from other four-byte stores.
//
// This pattern usually comes up with call arguments.
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Ptr = ST->getBasePtr();
if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
int RHSC = 0;
if (Ptr.getOpcode() == ISD::ADD &&
isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
Ptr = Ptr.getOperand(0);
if (Ptr.getOpcode() == ISD::CopyFromReg &&
cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
Ptr.getOperand(0) == ST->getChain()) {
SDValue Ops[] = {ST->getValue(),
CurDAG->getRegister(ARM::SP, MVT::i32),
CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32),
ST->getChain()};
MachineSDNode *ResNode =
CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
MachineMemOperand *MemOp = ST->getMemOperand();
CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
ReplaceNode(N, ResNode);
return;
}
}
break;
}
case ISD::WRITE_REGISTER:
if (tryWriteRegister(N))
return;
break;
case ISD::READ_REGISTER:
if (tryReadRegister(N))
return;
break;
case ISD::INLINEASM:
case ISD::INLINEASM_BR:
if (tryInlineAsm(N))
return;
break;
case ISD::XOR:
// Select special operations if XOR node forms integer ABS pattern
if (tryABSOp(N))
return;
// Other cases are autogenerated.
break;
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
// If we can't materialize the constant we need to use a literal pool
if (ConstantMaterializationCost(Val, Subtarget) > 2) {
SDValue CPIdx = CurDAG->getTargetConstantPool(
ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
TLI->getPointerTy(CurDAG->getDataLayout()));
SDNode *ResNode;
if (Subtarget->isThumb()) {
SDValue Ops[] = {
CPIdx,
getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32),
CurDAG->getEntryNode()
};
ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
Ops);
} else {
SDValue Ops[] = {
CPIdx,
CurDAG->getTargetConstant(0, dl, MVT::i32),
getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32),
CurDAG->getEntryNode()
};
ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
Ops);
}
// Annotate the Node with memory operand information so that MachineInstr
// queries work properly. This e.g. gives the register allocation the
// required information for rematerialization.
MachineFunction& MF = CurDAG->getMachineFunction();
MachineMemOperand *MemOp =
MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
MachineMemOperand::MOLoad, 4, Align(4));
CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
ReplaceNode(N, ResNode);
return;
}
// Other cases are autogenerated.
break;
}
case ISD::FrameIndex: {
// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
if (Subtarget->isThumb1Only()) {
// Set the alignment of the frame object to 4, to avoid having to generate
// more than one ADD
MachineFrameInfo &MFI = MF->getFrameInfo();
if (MFI.getObjectAlign(FI) < Align(4))
MFI.setObjectAlignment(FI, Align(4));
CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
CurDAG->getTargetConstant(0, dl, MVT::i32));
return;
} else {
unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
ARM::t2ADDri : ARM::ADDri);
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
return;
}
}
case ISD::INSERT_VECTOR_ELT: {
if (tryInsertVectorElt(N))
return;
break;
}
case ISD::SRL:
if (tryV6T2BitfieldExtractOp(N, false))
return;
break;
case ISD::SIGN_EXTEND_INREG:
case ISD::SRA:
if (tryV6T2BitfieldExtractOp(N, true))
return;
break;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT_SAT:
case ISD::FP_TO_SINT_SAT:
if (tryFP_TO_INT(N, dl))
return;
break;
case ISD::FMUL:
if (tryFMULFixed(N, dl))
return;
break;
case ISD::MUL:
if (Subtarget->isThumb1Only())
break;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
unsigned RHSV = C->getZExtValue();
if (!RHSV) break;
if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
unsigned ShImm = Log2_32(RHSV-1);
if (ShImm >= 32)
break;
SDValue V = N->getOperand(0);
ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
return;
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
Reg0 };
CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
return;
}
}
if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
unsigned ShImm = Log2_32(RHSV+1);
if (ShImm >= 32)
break;
SDValue V = N->getOperand(0);
ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
return;
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
Reg0 };
CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
return;
}
}
}
break;
case ISD::AND: {
// Check for unsigned bitfield extract
if (tryV6T2BitfieldExtractOp(N, false))
return;
// If an immediate is used in an AND node, it is possible that the immediate
// can be more optimally materialized when negated. If this is the case we
// can negate the immediate and use a BIC instead.
auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
uint32_t Imm = (uint32_t) N1C->getZExtValue();
// In Thumb2 mode, an AND can take a 12-bit immediate. If this
// immediate can be negated and fit in the immediate operand of
// a t2BIC, don't do any manual transform here as this can be
// handled by the generic ISel machinery.
bool PreferImmediateEncoding =
Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
if (!PreferImmediateEncoding &&
ConstantMaterializationCost(Imm, Subtarget) >
ConstantMaterializationCost(~Imm, Subtarget)) {
// The current immediate costs more to materialize than a negated
// immediate, so negate the immediate and use a BIC.
SDValue NewImm =
CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
// If the new constant didn't exist before, reposition it in the topological
// ordering so it is just before N. Otherwise, don't touch its location.
if (NewImm->getNodeId() == -1)
CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
if (!Subtarget->hasThumb2()) {
SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
N->getOperand(0), NewImm, getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32)};
ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
return;
} else {
SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32)};
ReplaceNode(N,
CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
return;
}
}
}
// (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
// of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
// are entirely contributed by c2 and lower 16-bits are entirely contributed
// by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
// Select it to: "movt x, ((c1 & 0xffff) >> 16)
EVT VT = N->getValueType(0);
if (VT != MVT::i32)
break;
unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
? ARM::t2MOVTi16
: (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
if (!Opc)
break;
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
N1C = dyn_cast<ConstantSDNode>(N1);
if (!N1C)
break;
if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
SDValue N2 = N0.getOperand(1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
if (!N2C)
break;
unsigned N1CVal = N1C->getZExtValue();
unsigned N2CVal = N2C->getZExtValue();
if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
(N1CVal & 0xffffU) == 0xffffU &&
(N2CVal & 0xffffU) == 0x0U) {
SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
dl, MVT::i32);
SDValue Ops[] = { N0.getOperand(0), Imm16,
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
return;
}
}
break;
}
case ARMISD::UMAAL: {
unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
N->getOperand(2), N->getOperand(3),
getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32) };
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
return;
}
case ARMISD::UMLAL:{
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32)};
ReplaceNode(
N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
return;
}else{
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
ReplaceNode(N, CurDAG->getMachineNode(
Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
MVT::i32, MVT::i32, Ops));
return;
}
}
case ARMISD::SMLAL:{
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32)};
ReplaceNode(
N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
return;
}else{
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
ReplaceNode(N, CurDAG->getMachineNode(
Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
MVT::i32, MVT::i32, Ops));
return;
}
}
case ARMISD::SUBE: {
if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
break;
// Look for a pattern to match SMMLS
// (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
N->getOperand(2).getOpcode() != ARMISD::SUBC ||
!SDValue(N, 1).use_empty())
break;
if (Subtarget->isThumb())
assert(Subtarget->hasThumb2() &&
"This pattern should not be generated for Thumb");
SDValue SmulLoHi = N->getOperand(1);
SDValue Subc = N->getOperand(2);
auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
if (!Zero || Zero->getZExtValue() != 0 ||
Subc.getOperand(1) != SmulLoHi.getValue(0) ||
N->getOperand(1) != SmulLoHi.getValue(1) ||
N->getOperand(2) != Subc.getValue(1))
break;
unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
N->getOperand(0), getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32) };
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
return;
}
case ISD::LOAD: {
if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
return;
if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
if (tryT2IndexedLoad(N))
return;
} else if (Subtarget->isThumb()) {
if (tryT1IndexedLoad(N))
return;
} else if (tryARMIndexedLoad(N))
return;
// Other cases are autogenerated.
break;
}
case ISD::MLOAD:
if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
return;
// Other cases are autogenerated.
break;
case ARMISD::WLSSETUP: {
SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
N->getOperand(0));
ReplaceUses(N, New);
CurDAG->RemoveDeadNode(N);
return;
}
case ARMISD::WLS: {
SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
N->getOperand(1), N->getOperand(2),
N->getOperand(0));
ReplaceUses(N, New);
CurDAG->RemoveDeadNode(N);
return;
}
case ARMISD::LE: {
SDValue Ops[] = { N->getOperand(1),
N->getOperand(2),
N->getOperand(0) };
unsigned Opc = ARM::t2LoopEnd;
SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
ReplaceUses(N, New);
CurDAG->RemoveDeadNode(N);
return;
}
case ARMISD::LDRD: {
if (Subtarget->isThumb2())
break; // TableGen handles isel in this case.
SDValue Base, RegOffset, ImmOffset;
const SDValue &Chain = N->getOperand(0);
const SDValue &Addr = N->getOperand(1);
SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
// The register-offset variant of LDRD mandates that the register
// allocated to RegOffset is not reused in any of the remaining operands.
// This restriction is currently not enforced. Therefore emitting this
// variant is explicitly avoided.
Base = Addr;
RegOffset = CurDAG->getRegister(0, MVT::i32);
}
SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
{MVT::Untyped, MVT::Other}, Ops);
SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
SDValue(New, 0));
SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
SDValue(New, 0));
transferMemOperands(N, New);
ReplaceUses(SDValue(N, 0), Lo);
ReplaceUses(SDValue(N, 1), Hi);
ReplaceUses(SDValue(N, 2), SDValue(New, 1));
CurDAG->RemoveDeadNode(N);
return;
}
case ARMISD::STRD: {
if (Subtarget->isThumb2())
break; // TableGen handles isel in this case.
SDValue Base, RegOffset, ImmOffset;
const SDValue &Chain = N->getOperand(0);
const SDValue &Addr = N->getOperand(3);
SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
// The register-offset variant of STRD mandates that the register
// allocated to RegOffset is not reused in any of the remaining operands.
// This restriction is currently not enforced. Therefore emitting this
// variant is explicitly avoided.
Base = Addr;
RegOffset = CurDAG->getRegister(0, MVT::i32);
}
SDNode *RegPair =
createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
transferMemOperands(N, New);
ReplaceUses(SDValue(N, 0), SDValue(New, 0));
CurDAG->RemoveDeadNode(N);
return;
}
case ARMISD::LOOP_DEC: {
SDValue Ops[] = { N->getOperand(1),
N->getOperand(2),
N->getOperand(0) };
SDNode *Dec =
CurDAG->getMachineNode(ARM::t2LoopDec, dl,
CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
ReplaceUses(N, Dec);
CurDAG->RemoveDeadNode(N);
return;
}
case ARMISD::BRCOND: {
// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
// Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
// Pattern complexity = 6 cost = 1 size = 0
// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
// Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
// Pattern complexity = 6 cost = 1 size = 0
// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
// Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
// Pattern complexity = 6 cost = 1 size = 0
unsigned Opc = Subtarget->isThumb() ?
((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
SDValue Chain = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
SDValue N3 = N->getOperand(3);
SDValue InFlag = N->getOperand(4);
assert(N1.getOpcode() == ISD::BasicBlock);
assert(N2.getOpcode() == ISD::Constant);
assert(N3.getOpcode() == ISD::Register);
unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
if (InFlag.getOpcode() == ARMISD::CMPZ) {
if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
SDValue Int = InFlag.getOperand(0);
uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
// Handle low-overhead loops.
if (ID == Intrinsic::loop_decrement_reg) {
SDValue Elements = Int.getOperand(2);
SDValue Size = CurDAG->getTargetConstant(
cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
MVT::i32);
SDValue Args[] = { Elements, Size, Int.getOperand(0) };
SDNode *LoopDec =
CurDAG->getMachineNode(ARM::t2LoopDec, dl,
CurDAG->getVTList(MVT::i32, MVT::Other),
Args);
ReplaceUses(Int.getNode(), LoopDec);
SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
SDNode *LoopEnd =
CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
ReplaceUses(N, LoopEnd);
CurDAG->RemoveDeadNode(N);
CurDAG->RemoveDeadNode(InFlag.getNode());
CurDAG->RemoveDeadNode(Int.getNode());
return;
}
}
bool SwitchEQNEToPLMI;
SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
InFlag = N->getOperand(4);
if (SwitchEQNEToPLMI) {
switch ((ARMCC::CondCodes)CC) {
default: llvm_unreachable("CMPZ must be either NE or EQ!");
case ARMCC::NE:
CC = (unsigned)ARMCC::MI;
break;
case ARMCC::EQ:
CC = (unsigned)ARMCC::PL;
break;
}
}
}
SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
MVT::Glue, Ops);
Chain = SDValue(ResNode, 0);
if (N->getNumValues() == 2) {
InFlag = SDValue(ResNode, 1);
ReplaceUses(SDValue(N, 1), InFlag);
}
ReplaceUses(SDValue(N, 0),
SDValue(Chain.getNode(), Chain.getResNo()));
CurDAG->RemoveDeadNode(N);
return;
}
case ARMISD::CMPZ: {
// select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
// This allows us to avoid materializing the expensive negative constant.
// The CMPZ #0 is useless and will be peepholed away but we need to keep it
// for its glue output.
SDValue X = N->getOperand(0);
auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
int64_t Addend = -C->getSExtValue();
SDNode *Add = nullptr;
// ADDS can be better than CMN if the immediate fits in a
// 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
// Outside that range we can just use a CMN which is 32-bit but has a
// 12-bit immediate range.
if (Addend < 1<<8) {
if (Subtarget->isThumb2()) {
SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
} else {
unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
CurDAG->getTargetConstant(Addend, dl, MVT::i32),
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
}
}
if (Add) {
SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
}
}
// Other cases are autogenerated.
break;
}
case ARMISD::CMOV: {
SDValue InFlag = N->getOperand(4);
if (InFlag.getOpcode() == ARMISD::CMPZ) {
bool SwitchEQNEToPLMI;
SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
if (SwitchEQNEToPLMI) {
SDValue ARMcc = N->getOperand(2);
ARMCC::CondCodes CC =
(ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
switch (CC) {
default: llvm_unreachable("CMPZ must be either NE or EQ!");
case ARMCC::NE:
CC = ARMCC::MI;
break;
case ARMCC::EQ:
CC = ARMCC::PL;
break;
}
SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
N->getOperand(3), N->getOperand(4)};
CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
}
}
// Other cases are autogenerated.
break;
}
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
default: return;
case MVT::v8i8: Opc = ARM::VZIPd8; break;
case MVT::v4f16:
case MVT::v4i16: Opc = ARM::VZIPd16; break;
case MVT::v2f32:
// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VZIPq8; break;
case MVT::v8f16:
case MVT::v8i16: Opc = ARM::VZIPq16; break;
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VZIPq32; break;
}
SDValue Pred = getAL(CurDAG, dl);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
return;
}
case ARMISD::VUZP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
default: return;
case MVT::v8i8: Opc = ARM::VUZPd8; break;
case MVT::v4f16:
case MVT::v4i16: Opc = ARM::VUZPd16; break;
case MVT::v2f32:
// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VUZPq8; break;
case MVT::v8f16:
case MVT::v8i16: Opc = ARM::VUZPq16; break;
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VUZPq32; break;
}
SDValue Pred = getAL(CurDAG, dl);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
return;
}
case ARMISD::VTRN: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
default: return;
case MVT::v8i8: Opc = ARM::VTRNd8; break;
case MVT::v4f16:
case MVT::v4i16: Opc = ARM::VTRNd16; break;
case MVT::v2f32:
case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VTRNq8; break;
case MVT::v8f16:
case MVT::v8i16: Opc = ARM::VTRNq16; break;
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VTRNq32; break;
}
SDValue Pred = getAL(CurDAG, dl);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
return;
}
case ARMISD::BUILD_VECTOR: {
EVT VecVT = N->getValueType(0);
EVT EltVT = VecVT.getVectorElementType();
unsigned NumElts = VecVT.getVectorNumElements();
if (EltVT == MVT::f64) {
assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
ReplaceNode(
N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
return;
}
assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
if (NumElts == 2) {
ReplaceNode(
N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
return;
}
assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
ReplaceNode(N,
createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
N->getOperand(2), N->getOperand(3)));
return;
}
case ARMISD::VLD1DUP: {
static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
ARM::VLD1DUPd32 };
static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
ARM::VLD1DUPq32 };
SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
return;
}
case ARMISD::VLD2DUP: {
static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
ARM::VLD2DUPd32 };
SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
return;
}
case ARMISD::VLD3DUP: {
static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
ARM::VLD3DUPd16Pseudo,
ARM::VLD3DUPd32Pseudo };
SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
return;
}
case ARMISD::VLD4DUP: {
static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
ARM::VLD4DUPd16Pseudo,
ARM::VLD4DUPd32Pseudo };
SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
return;
}
case ARMISD::VLD1DUP_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
ARM::VLD1DUPd16wb_fixed,
ARM::VLD1DUPd32wb_fixed };
static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
ARM::VLD1DUPq16wb_fixed,
ARM::VLD1DUPq32wb_fixed };
SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
return;
}
case ARMISD::VLD2DUP_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
ARM::VLD2DUPd16wb_fixed,
ARM::VLD2DUPd32wb_fixed,
ARM::VLD1q64wb_fixed };
static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
ARM::VLD2DUPq16EvenPseudo,
ARM::VLD2DUPq32EvenPseudo };
static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
ARM::VLD2DUPq16OddPseudoWB_fixed,
ARM::VLD2DUPq32OddPseudoWB_fixed };
SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case ARMISD::VLD3DUP_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
ARM::VLD3DUPd16Pseudo_UPD,
ARM::VLD3DUPd32Pseudo_UPD,
ARM::VLD1d64TPseudoWB_fixed };
static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
ARM::VLD3DUPq16EvenPseudo,
ARM::VLD3DUPq32EvenPseudo };
static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
ARM::VLD3DUPq16OddPseudo_UPD,
ARM::VLD3DUPq32OddPseudo_UPD };
SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case ARMISD::VLD4DUP_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
ARM::VLD4DUPd16Pseudo_UPD,
ARM::VLD4DUPd32Pseudo_UPD,
ARM::VLD1d64QPseudoWB_fixed };
static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
ARM::VLD4DUPq16EvenPseudo,
ARM::VLD4DUPq32EvenPseudo };
static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
ARM::VLD4DUPq16OddPseudo_UPD,
ARM::VLD4DUPq32OddPseudo_UPD };
SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case ARMISD::VLD1_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
ARM::VLD1d16wb_fixed,
ARM::VLD1d32wb_fixed,
ARM::VLD1d64wb_fixed };
static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
ARM::VLD1q16wb_fixed,
ARM::VLD1q32wb_fixed,
ARM::VLD1q64wb_fixed };
SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
return;
}
case ARMISD::VLD2_UPD: {
if (Subtarget->hasNEON()) {
static const uint16_t DOpcodes[] = {
ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
ARM::VLD1q64wb_fixed};
static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
ARM::VLD2q16PseudoWB_fixed,
ARM::VLD2q32PseudoWB_fixed};
SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
} else {
static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
ARM::MVE_VLD21_8_wb};
static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
ARM::MVE_VLD21_16_wb};
static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
ARM::MVE_VLD21_32_wb};
static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
SelectMVE_VLD(N, 2, Opcodes, true);
}
return;
}
case ARMISD::VLD3_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
ARM::VLD3d16Pseudo_UPD,
ARM::VLD3d32Pseudo_UPD,
ARM::VLD1d64TPseudoWB_fixed};
static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
ARM::VLD3q16Pseudo_UPD,
ARM::VLD3q32Pseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
ARM::VLD3q16oddPseudo_UPD,
ARM::VLD3q32oddPseudo_UPD };
SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case ARMISD::VLD4_UPD: {
if (Subtarget->hasNEON()) {
static const uint16_t DOpcodes[] = {
ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
ARM::VLD1d64QPseudoWB_fixed};
static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
ARM::VLD4q16Pseudo_UPD,
ARM::VLD4q32Pseudo_UPD};
static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
ARM::VLD4q16oddPseudo_UPD,
ARM::VLD4q32oddPseudo_UPD};
SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
} else {
static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
ARM::MVE_VLD42_8,
ARM::MVE_VLD43_8_wb};
static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
ARM::MVE_VLD42_16,
ARM::MVE_VLD43_16_wb};
static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
ARM::MVE_VLD42_32,
ARM::MVE_VLD43_32_wb};
static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
SelectMVE_VLD(N, 4, Opcodes, true);
}
return;
}
case ARMISD::VLD1x2_UPD: {
if (Subtarget->hasNEON()) {
static const uint16_t DOpcodes[] = {
ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
ARM::VLD1q64wb_fixed};
static const uint16_t QOpcodes[] = {
ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
return;
}
break;
}
case ARMISD::VLD1x3_UPD: {
if (Subtarget->hasNEON()) {
static const uint16_t DOpcodes[] = {
ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
static const uint16_t QOpcodes0[] = {
ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
static const uint16_t QOpcodes1[] = {
ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
break;
}
case ARMISD::VLD1x4_UPD: {
if (Subtarget->hasNEON()) {
static const uint16_t DOpcodes[] = {
ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
static const uint16_t QOpcodes0[] = {
ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
static const uint16_t QOpcodes1[] = {
ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
break;
}
case ARMISD::VLD2LN_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
ARM::VLD2LNd16Pseudo_UPD,
ARM::VLD2LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
ARM::VLD2LNq32Pseudo_UPD };
SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
return;
}
case ARMISD::VLD3LN_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
ARM::VLD3LNd16Pseudo_UPD,
ARM::VLD3LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
ARM::VLD3LNq32Pseudo_UPD };
SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
return;
}
case ARMISD::VLD4LN_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
ARM::VLD4LNd16Pseudo_UPD,
ARM::VLD4LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
ARM::VLD4LNq32Pseudo_UPD };
SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
return;
}
case ARMISD::VST1_UPD: {
static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
ARM::VST1d16wb_fixed,
ARM::VST1d32wb_fixed,
ARM::VST1d64wb_fixed };
static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
ARM::VST1q16wb_fixed,
ARM::VST1q32wb_fixed,
ARM::VST1q64wb_fixed };
SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
return;
}
case ARMISD::VST2_UPD: {
if (Subtarget->hasNEON()) {
static const uint16_t DOpcodes[] = {
ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
ARM::VST1q64wb_fixed};
static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
ARM::VST2q16PseudoWB_fixed,
ARM::VST2q32PseudoWB_fixed};
SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
return;
}
break;
}
case ARMISD::VST3_UPD: {
static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
ARM::VST3d16Pseudo_UPD,
ARM::VST3d32Pseudo_UPD,
ARM::VST1d64TPseudoWB_fixed};
static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
ARM::VST3q16Pseudo_UPD,
ARM::VST3q32Pseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
ARM::VST3q16oddPseudo_UPD,
ARM::VST3q32oddPseudo_UPD };
SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case ARMISD::VST4_UPD: {
if (Subtarget->hasNEON()) {
static const uint16_t DOpcodes[] = {
ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
ARM::VST1d64QPseudoWB_fixed};
static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
ARM::VST4q16Pseudo_UPD,
ARM::VST4q32Pseudo_UPD};
static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
ARM::VST4q16oddPseudo_UPD,
ARM::VST4q32oddPseudo_UPD};
SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
break;
}
case ARMISD::VST1x2_UPD: {
if (Subtarget->hasNEON()) {
static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
ARM::VST1q16wb_fixed,
ARM::VST1q32wb_fixed,
ARM::VST1q64wb_fixed};
static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
ARM::VST1d16QPseudoWB_fixed,
ARM::VST1d32QPseudoWB_fixed,
ARM::VST1d64QPseudoWB_fixed };
SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
return;
}
break;
}
case ARMISD::VST1x3_UPD: {
if (Subtarget->hasNEON()) {
static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
ARM::VST1d16TPseudoWB_fixed,
ARM::VST1d32TPseudoWB_fixed,
ARM::VST1d64TPseudoWB_fixed };
static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
ARM::VST1q16LowTPseudo_UPD,
ARM::VST1q32LowTPseudo_UPD,
ARM::VST1q64LowTPseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
ARM::VST1q16HighTPseudo_UPD,
ARM::VST1q32HighTPseudo_UPD,
ARM::VST1q64HighTPseudo_UPD };
SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
break;
}
case ARMISD::VST1x4_UPD: {
if (Subtarget->hasNEON()) {
static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
ARM::VST1d16QPseudoWB_fixed,
ARM::VST1d32QPseudoWB_fixed,
ARM::VST1d64QPseudoWB_fixed };
static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
ARM::VST1q16LowQPseudo_UPD,
ARM::VST1q32LowQPseudo_UPD,
ARM::VST1q64LowQPseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
ARM::VST1q16HighQPseudo_UPD,
ARM::VST1q32HighQPseudo_UPD,
ARM::VST1q64HighQPseudo_UPD };
SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
break;
}
case ARMISD::VST2LN_UPD: {
static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
ARM::VST2LNd16Pseudo_UPD,
ARM::VST2LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
ARM::VST2LNq32Pseudo_UPD };
SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
return;
}
case ARMISD::VST3LN_UPD: {
static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
ARM::VST3LNd16Pseudo_UPD,
ARM::VST3LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
ARM::VST3LNq32Pseudo_UPD };
SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
return;
}
case ARMISD::VST4LN_UPD: {
static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
ARM::VST4LNd16Pseudo_UPD,
ARM::VST4LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
ARM::VST4LNq32Pseudo_UPD };
SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
return;
}
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntNo) {
default:
break;
case Intrinsic::arm_mrrc:
case Intrinsic::arm_mrrc2: {
SDLoc dl(N);
SDValue Chain = N->getOperand(0);
unsigned Opc;
if (Subtarget->isThumb())
Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
else
Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
SmallVector<SDValue, 5> Ops;
Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
// The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
// instruction will always be '1111' but it is possible in assembly language to specify
// AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
if (Opc != ARM::MRRC2) {
Ops.push_back(getAL(CurDAG, dl));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
}
Ops.push_back(Chain);
// Writes to two registers.
const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
return;
}
case Intrinsic::arm_ldaexd:
case Intrinsic::arm_ldrexd: {
SDLoc dl(N);
SDValue Chain = N->getOperand(0);
SDValue MemAddr = N->getOperand(2);
bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
: (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
// arm_ldrexd returns a i64 value in {i32, i32}
std::vector<EVT> ResTys;
if (isThumb) {
ResTys.push_back(MVT::i32);
ResTys.push_back(MVT::i32);
} else
ResTys.push_back(MVT::Untyped);
ResTys.push_back(MVT::Other);
// Place arguments in the right order.
SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32), Chain};
SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
// Remap uses.
SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
if (!SDValue(N, 0).use_empty()) {
SDValue Result;
if (isThumb)
Result = SDValue(Ld, 0);
else {
SDValue SubRegIdx =
CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
Result = SDValue(ResNode,0);
}
ReplaceUses(SDValue(N, 0), Result);
}
if (!SDValue(N, 1).use_empty()) {
SDValue Result;
if (isThumb)
Result = SDValue(Ld, 1);
else {
SDValue SubRegIdx =
CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
Result = SDValue(ResNode,0);
}
ReplaceUses(SDValue(N, 1), Result);
}
ReplaceUses(SDValue(N, 2), OutChain);
CurDAG->RemoveDeadNode(N);
return;
}
case Intrinsic::arm_stlexd:
case Intrinsic::arm_strexd: {
SDLoc dl(N);
SDValue Chain = N->getOperand(0);
SDValue Val0 = N->getOperand(2);
SDValue Val1 = N->getOperand(3);
SDValue MemAddr = N->getOperand(4);
// Store exclusive double return a i32 value which is the return status
// of the issued store.
const EVT ResTys[] = {MVT::i32, MVT::Other};
bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
// Place arguments in the right order.
SmallVector<SDValue, 7> Ops;
if (isThumb) {
Ops.push_back(Val0);
Ops.push_back(Val1);
} else
// arm_strexd uses GPRPair.
Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
Ops.push_back(MemAddr);
Ops.push_back(getAL(CurDAG, dl));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
Ops.push_back(Chain);
bool IsRelease = IntNo == Intrinsic::arm_stlexd;
unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
: (IsRelease ? ARM::STLEXD : ARM::STREXD);
SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
ReplaceNode(N, St);
return;
}
case Intrinsic::arm_neon_vld1: {
static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
ARM::VLD1d32, ARM::VLD1d64 };
static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
ARM::VLD1q32, ARM::VLD1q64};
SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
return;
}
case Intrinsic::arm_neon_vld1x2: {
static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
ARM::VLD1q32, ARM::VLD1q64 };
static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
ARM::VLD1d16QPseudo,
ARM::VLD1d32QPseudo,
ARM::VLD1d64QPseudo };
SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
return;
}
case Intrinsic::arm_neon_vld1x3: {
static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
ARM::VLD1d16TPseudo,
ARM::VLD1d32TPseudo,
ARM::VLD1d64TPseudo };
static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
ARM::VLD1q16LowTPseudo_UPD,
ARM::VLD1q32LowTPseudo_UPD,
ARM::VLD1q64LowTPseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
ARM::VLD1q16HighTPseudo,
ARM::VLD1q32HighTPseudo,
ARM::VLD1q64HighTPseudo };
SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case Intrinsic::arm_neon_vld1x4: {
static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
ARM::VLD1d16QPseudo,
ARM::VLD1d32QPseudo,
ARM::VLD1d64QPseudo };
static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
ARM::VLD1q16LowQPseudo_UPD,
ARM::VLD1q32LowQPseudo_UPD,
ARM::VLD1q64LowQPseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
ARM::VLD1q16HighQPseudo,
ARM::VLD1q32HighQPseudo,
ARM::VLD1q64HighQPseudo };
SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case Intrinsic::arm_neon_vld2: {
static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
ARM::VLD2d32, ARM::VLD1q64 };
static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
ARM::VLD2q32Pseudo };
SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
return;
}
case Intrinsic::arm_neon_vld3: {
static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
ARM::VLD3d16Pseudo,
ARM::VLD3d32Pseudo,
ARM::VLD1d64TPseudo };
static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
ARM::VLD3q16Pseudo_UPD,
ARM::VLD3q32Pseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
ARM::VLD3q16oddPseudo,
ARM::VLD3q32oddPseudo };
SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case Intrinsic::arm_neon_vld4: {
static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
ARM::VLD4d16Pseudo,
ARM::VLD4d32Pseudo,
ARM::VLD1d64QPseudo };
static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
ARM::VLD4q16Pseudo_UPD,
ARM::VLD4q32Pseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
ARM::VLD4q16oddPseudo,
ARM::VLD4q32oddPseudo };
SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case Intrinsic::arm_neon_vld2dup: {
static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
ARM::VLD2DUPd32, ARM::VLD1q64 };
static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
ARM::VLD2DUPq16EvenPseudo,
ARM::VLD2DUPq32EvenPseudo };
static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
ARM::VLD2DUPq16OddPseudo,
ARM::VLD2DUPq32OddPseudo };
SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case Intrinsic::arm_neon_vld3dup: {
static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
ARM::VLD3DUPd16Pseudo,
ARM::VLD3DUPd32Pseudo,
ARM::VLD1d64TPseudo };
static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
ARM::VLD3DUPq16EvenPseudo,
ARM::VLD3DUPq32EvenPseudo };
static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
ARM::VLD3DUPq16OddPseudo,
ARM::VLD3DUPq32OddPseudo };
SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case Intrinsic::arm_neon_vld4dup: {
static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
ARM::VLD4DUPd16Pseudo,
ARM::VLD4DUPd32Pseudo,
ARM::VLD1d64QPseudo };
static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
ARM::VLD4DUPq16EvenPseudo,
ARM::VLD4DUPq32EvenPseudo };
static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
ARM::VLD4DUPq16OddPseudo,
ARM::VLD4DUPq32OddPseudo };
SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case Intrinsic::arm_neon_vld2lane: {
static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
ARM::VLD2LNd16Pseudo,
ARM::VLD2LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
ARM::VLD2LNq32Pseudo };
SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
return;
}
case Intrinsic::arm_neon_vld3lane: {
static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
ARM::VLD3LNd16Pseudo,
ARM::VLD3LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
ARM::VLD3LNq32Pseudo };
SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
return;
}
case Intrinsic::arm_neon_vld4lane: {
static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
ARM::VLD4LNd16Pseudo,
ARM::VLD4LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
ARM::VLD4LNq32Pseudo };
SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
return;
}
case Intrinsic::arm_neon_vst1: {
static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
ARM::VST1d32, ARM::VST1d64 };
static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
ARM::VST1q32, ARM::VST1q64 };
SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
return;
}
case Intrinsic::arm_neon_vst1x2: {
static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
ARM::VST1q32, ARM::VST1q64 };
static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
ARM::VST1d16QPseudo,
ARM::VST1d32QPseudo,
ARM::VST1d64QPseudo };
SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
return;
}
case Intrinsic::arm_neon_vst1x3: {
static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
ARM::VST1d16TPseudo,
ARM::VST1d32TPseudo,
ARM::VST1d64TPseudo };
static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
ARM::VST1q16LowTPseudo_UPD,
ARM::VST1q32LowTPseudo_UPD,
ARM::VST1q64LowTPseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
ARM::VST1q16HighTPseudo,
ARM::VST1q32HighTPseudo,
ARM::VST1q64HighTPseudo };
SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case Intrinsic::arm_neon_vst1x4: {
static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
ARM::VST1d16QPseudo,
ARM::VST1d32QPseudo,
ARM::VST1d64QPseudo };
static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
ARM::VST1q16LowQPseudo_UPD,
ARM::VST1q32LowQPseudo_UPD,
ARM::VST1q64LowQPseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
ARM::VST1q16HighQPseudo,
ARM::VST1q32HighQPseudo,
ARM::VST1q64HighQPseudo };
SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case Intrinsic::arm_neon_vst2: {
static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
ARM::VST2d32, ARM::VST1q64 };
static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
ARM::VST2q32Pseudo };
SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
return;
}
case Intrinsic::arm_neon_vst3: {
static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
ARM::VST3d16Pseudo,
ARM::VST3d32Pseudo,
ARM::VST1d64TPseudo };
static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
ARM::VST3q16Pseudo_UPD,
ARM::VST3q32Pseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
ARM::VST3q16oddPseudo,
ARM::VST3q32oddPseudo };
SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case Intrinsic::arm_neon_vst4: {
static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
ARM::VST4d16Pseudo,
ARM::VST4d32Pseudo,
ARM::VST1d64QPseudo };
static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
ARM::VST4q16Pseudo_UPD,
ARM::VST4q32Pseudo_UPD };
static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
ARM::VST4q16oddPseudo,
ARM::VST4q32oddPseudo };
SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
return;
}
case Intrinsic::arm_neon_vst2lane: {
static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
ARM::VST2LNd16Pseudo,
ARM::VST2LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
ARM::VST2LNq32Pseudo };
SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
return;
}
case Intrinsic::arm_neon_vst3lane: {
static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
ARM::VST3LNd16Pseudo,
ARM::VST3LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
ARM::VST3LNq32Pseudo };
SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
return;
}
case Intrinsic::arm_neon_vst4lane: {
static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
ARM::VST4LNd16Pseudo,
ARM::VST4LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
ARM::VST4LNq32Pseudo };
SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
return;
}
case Intrinsic::arm_mve_vldr_gather_base_wb:
case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
ARM::MVE_VLDRDU64_qi_pre};
SelectMVE_WB(N, Opcodes,
IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
return;
}
case Intrinsic::arm_mve_vld2q: {
static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
ARM::MVE_VLD21_16};
static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
ARM::MVE_VLD21_32};
static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
SelectMVE_VLD(N, 2, Opcodes, false);
return;
}
case Intrinsic::arm_mve_vld4q: {
static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
ARM::MVE_VLD42_16,
ARM::MVE_VLD43_16};
static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
ARM::MVE_VLD42_32,
ARM::MVE_VLD43_32};
static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
SelectMVE_VLD(N, 4, Opcodes, false);
return;
}
}
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IntNo) {
default:
break;
// Scalar f32 -> bf16
case Intrinsic::arm_neon_vcvtbfp2bf: {
SDLoc dl(N);
const SDValue &Src = N->getOperand(1);
llvm::EVT DestTy = N->getValueType(0);
SDValue Pred = getAL(CurDAG, dl);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { Src, Src, Pred, Reg0 };
CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
return;
}
// Vector v4f32 -> v4bf16
case Intrinsic::arm_neon_vcvtfp2bf: {
SDLoc dl(N);
const SDValue &Src = N->getOperand(1);
SDValue Pred = getAL(CurDAG, dl);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { Src, Pred, Reg0 };
CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
return;
}
case Intrinsic::arm_mve_urshrl:
SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
return;
case Intrinsic::arm_mve_uqshll:
SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
return;
case Intrinsic::arm_mve_srshrl:
SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
return;
case Intrinsic::arm_mve_sqshll:
SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
return;
case Intrinsic::arm_mve_uqrshll:
SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
return;
case Intrinsic::arm_mve_sqrshrl:
SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
return;
case Intrinsic::arm_mve_vadc:
case Intrinsic::arm_mve_vadc_predicated:
SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
IntNo == Intrinsic::arm_mve_vadc_predicated);
return;
case Intrinsic::arm_mve_vsbc:
case Intrinsic::arm_mve_vsbc_predicated:
SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
IntNo == Intrinsic::arm_mve_vsbc_predicated);
return;
case Intrinsic::arm_mve_vshlc:
case Intrinsic::arm_mve_vshlc_predicated:
SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
return;
case Intrinsic::arm_mve_vmlldava:
case Intrinsic::arm_mve_vmlldava_predicated: {
static const uint16_t OpcodesU[] = {
ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
};
static const uint16_t OpcodesS[] = {
ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
};
SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
OpcodesS, OpcodesU);
return;
}
case Intrinsic::arm_mve_vrmlldavha:
case Intrinsic::arm_mve_vrmlldavha_predicated: {
static const uint16_t OpcodesU[] = {
ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
};
static const uint16_t OpcodesS[] = {
ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
};
SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
OpcodesS, OpcodesU);
return;
}
case Intrinsic::arm_mve_vidup:
case Intrinsic::arm_mve_vidup_predicated: {
static const uint16_t Opcodes[] = {
ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
};
SelectMVE_VxDUP(N, Opcodes, false,
IntNo == Intrinsic::arm_mve_vidup_predicated);
return;
}
case Intrinsic::arm_mve_vddup:
case Intrinsic::arm_mve_vddup_predicated: {
static const uint16_t Opcodes[] = {
ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
};
SelectMVE_VxDUP(N, Opcodes, false,
IntNo == Intrinsic::arm_mve_vddup_predicated);
return;
}
case Intrinsic::arm_mve_viwdup:
case Intrinsic::arm_mve_viwdup_predicated: {
static const uint16_t Opcodes[] = {
ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
};
SelectMVE_VxDUP(N, Opcodes, true,
IntNo == Intrinsic::arm_mve_viwdup_predicated);
return;
}
case Intrinsic::arm_mve_vdwdup:
case Intrinsic::arm_mve_vdwdup_predicated: {
static const uint16_t Opcodes[] = {
ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
};
SelectMVE_VxDUP(N, Opcodes, true,
IntNo == Intrinsic::arm_mve_vdwdup_predicated);
return;
}
case Intrinsic::arm_cde_cx1d:
case Intrinsic::arm_cde_cx1da:
case Intrinsic::arm_cde_cx2d:
case Intrinsic::arm_cde_cx2da:
case Intrinsic::arm_cde_cx3d:
case Intrinsic::arm_cde_cx3da: {
bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
IntNo == Intrinsic::arm_cde_cx2da ||
IntNo == Intrinsic::arm_cde_cx3da;
size_t NumExtraOps;
uint16_t Opcode;
switch (IntNo) {
case Intrinsic::arm_cde_cx1d:
case Intrinsic::arm_cde_cx1da:
NumExtraOps = 0;
Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
break;
case Intrinsic::arm_cde_cx2d:
case Intrinsic::arm_cde_cx2da:
NumExtraOps = 1;
Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
break;
case Intrinsic::arm_cde_cx3d:
case Intrinsic::arm_cde_cx3da:
NumExtraOps = 2;
Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
break;
default:
llvm_unreachable("Unexpected opcode");
}
SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
return;
}
}
break;
}
case ISD::ATOMIC_CMP_SWAP:
SelectCMP_SWAP(N);
return;
}
SelectCode(N);
}