in llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp [4966:6040]
void PPCDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
if (N->isMachineOpcode()) {
N->setNodeId(-1);
return; // Already selected.
}
// In case any misguided DAG-level optimizations form an ADD with a
// TargetConstant operand, crash here instead of miscompiling (by selecting
// an r+r add instead of some kind of r+i add).
if (N->getOpcode() == ISD::ADD &&
N->getOperand(1).getOpcode() == ISD::TargetConstant)
llvm_unreachable("Invalid ADD with TargetConstant operand");
// Try matching complex bit permutations before doing anything else.
if (tryBitPermutation(N))
return;
// Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
if (tryIntCompareInGPR(N))
return;
switch (N->getOpcode()) {
default: break;
case ISD::Constant:
if (N->getValueType(0) == MVT::i64) {
ReplaceNode(N, selectI64Imm(CurDAG, N));
return;
}
break;
case ISD::INTRINSIC_VOID: {
auto IntrinsicID = N->getConstantOperandVal(1);
if (IntrinsicID == Intrinsic::ppc_tdw || IntrinsicID == Intrinsic::ppc_tw) {
unsigned Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TDI : PPC::TWI;
SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
int16_t SImmOperand2;
int16_t SImmOperand3;
int16_t SImmOperand4;
bool isOperand2IntS16Immediate =
isIntS16Immediate(N->getOperand(2), SImmOperand2);
bool isOperand3IntS16Immediate =
isIntS16Immediate(N->getOperand(3), SImmOperand3);
// We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
// reg or imm + imm. The imm + imm form will be optimized to either an
// unconditional trap or a nop in a later pass.
if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
else if (isOperand3IntS16Immediate)
// The 2nd and 3rd operands are reg + imm.
Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
else {
// The 2nd and 3rd operands are imm + reg.
bool isOperand4IntS16Immediate =
isIntS16Immediate(N->getOperand(4), SImmOperand4);
(void)isOperand4IntS16Immediate;
assert(isOperand4IntS16Immediate &&
"The 4th operand is not an Immediate");
// We need to flip the condition immediate TO.
int16_t TO = int(SImmOperand4) & 0x1F;
// We swap the first and second bit of TO if they are not same.
if ((TO & 0x1) != ((TO & 0x2) >> 1))
TO = (TO & 0x1) ? TO + 1 : TO - 1;
// We swap the fourth and fifth bit of TO if they are not same.
if ((TO & 0x8) != ((TO & 0x10) >> 1))
TO = (TO & 0x8) ? TO + 8 : TO - 8;
Ops[0] = getI32Imm(TO, dl);
Ops[1] = N->getOperand(3);
Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
}
CurDAG->SelectNodeTo(N, Opcode, MVT::Other, Ops);
return;
}
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
// We emit the PPC::FSELS instruction here because of type conflicts with
// the comparison operand. The FSELS instruction is defined to use an 8-byte
// comparison like the FSELD version. The fsels intrinsic takes a 4-byte
// value for the comparison. When selecting through a .td file, a type
// error is raised. Must check this first so we never break on the
// !Subtarget->isISA3_1() check.
auto IntID = N->getConstantOperandVal(0);
if (IntID == Intrinsic::ppc_fsels) {
SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
return;
}
if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
auto Pred = N->getConstantOperandVal(1);
unsigned Opcode =
IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
unsigned SubReg = 0;
unsigned ShiftVal = 0;
bool Reverse = false;
switch (Pred) {
case 0:
SubReg = PPC::sub_eq;
ShiftVal = 1;
break;
case 1:
SubReg = PPC::sub_eq;
ShiftVal = 1;
Reverse = true;
break;
case 2:
SubReg = PPC::sub_lt;
ShiftVal = 3;
break;
case 3:
SubReg = PPC::sub_lt;
ShiftVal = 3;
Reverse = true;
break;
case 4:
SubReg = PPC::sub_gt;
ShiftVal = 2;
break;
case 5:
SubReg = PPC::sub_gt;
ShiftVal = 2;
Reverse = true;
break;
case 6:
SubReg = PPC::sub_un;
break;
case 7:
SubReg = PPC::sub_un;
Reverse = true;
break;
}
EVT VTs[] = {MVT::v16i8, MVT::Glue};
SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
CurDAG->getTargetConstant(0, dl, MVT::i32)};
SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
// On Power10, we can use SETBC[R]. On prior architectures, we have to use
// MFOCRF and shift/negate the value.
if (Subtarget->isISA3_1()) {
SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
SDValue CRBit = SDValue(
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
CR6Reg, SubRegIdx, BCDOp.getValue(1)),
0);
CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
CRBit);
} else {
SDValue Move =
SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
BCDOp.getValue(1)),
0);
SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
getI32Imm(31, dl), getI32Imm(31, dl)};
if (!Reverse)
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
else {
SDValue Shift = SDValue(
CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
}
}
return;
}
if (!Subtarget->isISA3_1())
break;
unsigned Opcode = 0;
switch (IntID) {
default:
break;
case Intrinsic::ppc_altivec_vstribr_p:
Opcode = PPC::VSTRIBR_rec;
break;
case Intrinsic::ppc_altivec_vstribl_p:
Opcode = PPC::VSTRIBL_rec;
break;
case Intrinsic::ppc_altivec_vstrihr_p:
Opcode = PPC::VSTRIHR_rec;
break;
case Intrinsic::ppc_altivec_vstrihl_p:
Opcode = PPC::VSTRIHL_rec;
break;
}
if (!Opcode)
break;
// Generate the appropriate vector string isolate intrinsic to match.
EVT VTs[] = {MVT::v16i8, MVT::Glue};
SDValue VecStrOp =
SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
// Vector string isolate instructions update the EQ bit of CR6.
// Generate a SETBC instruction to extract the bit and place it in a GPR.
SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
SDValue CRBit = SDValue(
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
0);
CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
return;
}
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
if (trySETCC(N))
return;
break;
// These nodes will be transformed into GETtlsADDR32 node, which
// later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
case PPCISD::ADDI_TLSLD_L_ADDR:
case PPCISD::ADDI_TLSGD_L_ADDR: {
const Module *Mod = MF->getFunction().getParent();
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
!Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
Mod->getPICLevel() == PICLevel::SmallPIC)
break;
// Attach global base pointer on GETtlsADDR32 node in order to
// generate secure plt code for TLS symbols.
getGlobalBaseReg();
} break;
case PPCISD::CALL: {
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
!TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||
!Subtarget->isTargetELF())
break;
SDValue Op = N->getOperand(1);
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
if (GA->getTargetFlags() == PPCII::MO_PLT)
getGlobalBaseReg();
}
else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
if (ES->getTargetFlags() == PPCII::MO_PLT)
getGlobalBaseReg();
}
}
break;
case PPCISD::GlobalBaseReg:
ReplaceNode(N, getGlobalBaseReg());
return;
case ISD::FrameIndex:
selectFrameIndex(N, N);
return;
case PPCISD::MFOCRF: {
SDValue InFlag = N->getOperand(1);
ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
N->getOperand(0), InFlag));
return;
}
case PPCISD::READ_TIME_BASE:
ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
MVT::Other, N->getOperand(0)));
return;
case PPCISD::SRA_ADDZE: {
SDValue N0 = N->getOperand(0);
SDValue ShiftAmt =
CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
getConstantIntValue(), dl,
N->getValueType(0));
if (N->getValueType(0) == MVT::i64) {
SDNode *Op =
CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
N0, ShiftAmt);
CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
SDValue(Op, 1));
return;
} else {
assert(N->getValueType(0) == MVT::i32 &&
"Expecting i64 or i32 in PPCISD::SRA_ADDZE");
SDNode *Op =
CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
N0, ShiftAmt);
CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
SDValue(Op, 1));
return;
}
}
case ISD::STORE: {
// Change TLS initial-exec D-form stores to X-form stores.
StoreSDNode *ST = cast<StoreSDNode>(N);
if (EnableTLSOpt && Subtarget->isELFv2ABI() &&
ST->getAddressingMode() != ISD::PRE_INC)
if (tryTLSXFormStore(ST))
return;
break;
}
case ISD::LOAD: {
// Handle preincrement loads.
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT();
// Normal loads are handled by code generated from the .td file.
if (LD->getAddressingMode() != ISD::PRE_INC) {
// Change TLS initial-exec D-form loads to X-form loads.
if (EnableTLSOpt && Subtarget->isELFv2ABI())
if (tryTLSXFormLoad(LD))
return;
break;
}
SDValue Offset = LD->getOffset();
if (Offset.getOpcode() == ISD::TargetConstant ||
Offset.getOpcode() == ISD::TargetGlobalAddress) {
unsigned Opcode;
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
if (LD->getValueType(0) != MVT::i64) {
// Handle PPC32 integer and normal FP loads.
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
case MVT::f64: Opcode = PPC::LFDU; break;
case MVT::f32: Opcode = PPC::LFSU; break;
case MVT::i32: Opcode = PPC::LWZU; break;
case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
case MVT::i1:
case MVT::i8: Opcode = PPC::LBZU; break;
}
} else {
assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
case MVT::i64: Opcode = PPC::LDU; break;
case MVT::i32: Opcode = PPC::LWZU8; break;
case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
case MVT::i1:
case MVT::i8: Opcode = PPC::LBZU8; break;
}
}
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Offset, Base, Chain };
SDNode *MN = CurDAG->getMachineNode(
Opcode, dl, LD->getValueType(0),
PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
transferMemOperands(N, MN);
ReplaceNode(N, MN);
return;
} else {
unsigned Opcode;
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
if (LD->getValueType(0) != MVT::i64) {
// Handle PPC32 integer and normal FP loads.
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
case MVT::f64: Opcode = PPC::LFDUX; break;
case MVT::f32: Opcode = PPC::LFSUX; break;
case MVT::i32: Opcode = PPC::LWZUX; break;
case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
case MVT::i1:
case MVT::i8: Opcode = PPC::LBZUX; break;
}
} else {
assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
"Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
case MVT::i64: Opcode = PPC::LDUX; break;
case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
case MVT::i1:
case MVT::i8: Opcode = PPC::LBZUX8; break;
}
}
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Base, Offset, Chain };
SDNode *MN = CurDAG->getMachineNode(
Opcode, dl, LD->getValueType(0),
PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
transferMemOperands(N, MN);
ReplaceNode(N, MN);
return;
}
}
case ISD::AND:
// If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
return;
// Other cases are autogenerated.
break;
case ISD::OR: {
if (N->getValueType(0) == MVT::i32)
if (tryBitfieldInsert(N))
return;
int16_t Imm;
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
isIntS16Immediate(N->getOperand(1), Imm)) {
KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
// If this is equivalent to an add, then we can fold it with the
// FrameIndex calculation.
if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
return;
}
}
// If this is 'or' against an imm with consecutive ones and both sides zero,
// try to emit rldimi
if (tryAsSingleRLDIMI(N))
return;
// OR with a 32-bit immediate can be handled by ori + oris
// without creating an immediate in a GPR.
uint64_t Imm64 = 0;
bool IsPPC64 = Subtarget->isPPC64();
if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
(Imm64 & ~0xFFFFFFFFuLL) == 0) {
// If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
uint64_t ImmHi = Imm64 >> 16;
uint64_t ImmLo = Imm64 & 0xFFFF;
if (ImmHi != 0 && ImmLo != 0) {
SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
N->getOperand(0),
getI16Imm(ImmLo, dl));
SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
return;
}
}
// Other cases are autogenerated.
break;
}
case ISD::XOR: {
// XOR with a 32-bit immediate can be handled by xori + xoris
// without creating an immediate in a GPR.
uint64_t Imm64 = 0;
bool IsPPC64 = Subtarget->isPPC64();
if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
(Imm64 & ~0xFFFFFFFFuLL) == 0) {
// If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
uint64_t ImmHi = Imm64 >> 16;
uint64_t ImmLo = Imm64 & 0xFFFF;
if (ImmHi != 0 && ImmLo != 0) {
SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
N->getOperand(0),
getI16Imm(ImmLo, dl));
SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
return;
}
}
break;
}
case ISD::ADD: {
int16_t Imm;
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
isIntS16Immediate(N->getOperand(1), Imm)) {
selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
return;
}
break;
}
case ISD::SHL: {
unsigned Imm, SH, MB, ME;
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
isRotateAndMask(N, Imm, true, SH, MB, ME)) {
SDValue Ops[] = { N->getOperand(0).getOperand(0),
getI32Imm(SH, dl), getI32Imm(MB, dl),
getI32Imm(ME, dl) };
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
return;
}
// Other cases are autogenerated.
break;
}
case ISD::SRL: {
unsigned Imm, SH, MB, ME;
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
isRotateAndMask(N, Imm, true, SH, MB, ME)) {
SDValue Ops[] = { N->getOperand(0).getOperand(0),
getI32Imm(SH, dl), getI32Imm(MB, dl),
getI32Imm(ME, dl) };
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
return;
}
// Other cases are autogenerated.
break;
}
case ISD::MUL: {
SDValue Op1 = N->getOperand(1);
if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64)
break;
// If the multiplier fits int16, we can handle it with mulli.
int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue();
unsigned Shift = countTrailingZeros<uint64_t>(Imm);
if (isInt<16>(Imm) || !Shift)
break;
// If the shifted value fits int16, we can do this transformation:
// (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
// DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
uint64_t ImmSh = Imm >> Shift;
if (isInt<16>(ImmSh)) {
uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
N->getOperand(0), SDImm);
CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0),
getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl));
return;
}
break;
}
// FIXME: Remove this once the ANDI glue bug is fixed:
case PPCISD::ANDI_rec_1_EQ_BIT:
case PPCISD::ANDI_rec_1_GT_BIT: {
if (!ANDIGlueBug)
break;
EVT InVT = N->getOperand(0).getValueType();
assert((InVT == MVT::i64 || InVT == MVT::i32) &&
"Invalid input type for ANDI_rec_1_EQ_BIT");
unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
N->getOperand(0),
CurDAG->getTargetConstant(1, dl, InVT)),
0);
SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
SDValue SRIdxVal = CurDAG->getTargetConstant(
N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
dl, MVT::i32);
CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
return;
}
case ISD::SELECT_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
EVT PtrVT =
CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
bool isPPC64 = (PtrVT == MVT::i64);
// If this is a select of i1 operands, we'll pattern match it.
if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
break;
if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
bool NeedSwapOps = false;
bool IsUnCmp = false;
if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (NeedSwapOps)
std::swap(LHS, RHS);
// Make use of SelectCC to generate the comparison to set CR bits, for
// equality comparisons having one literal operand, SelectCC probably
// doesn't need to materialize the whole literal and just use xoris to
// check it first, it leads the following comparison result can't
// exactly represent GT/LT relationship. So to avoid this we specify
// SETGT/SETUGT here instead of SETEQ.
SDValue GenCC =
SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
CurDAG->SelectNodeTo(
N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
N->getValueType(0), GenCC);
NumP9Setb++;
return;
}
}
// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
if (!isPPC64)
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
if (N1C->isZero() && N3C->isZero() && N2C->getZExtValue() == 1ULL &&
CC == ISD::SETNE &&
// FIXME: Implement this optzn for PPC64.
N->getValueType(0) == MVT::i32) {
SDNode *Tmp =
CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
N->getOperand(0), getI32Imm(~0U, dl));
CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
N->getOperand(0), SDValue(Tmp, 1));
return;
}
SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
if (N->getValueType(0) == MVT::i1) {
// An i1 select is: (c & t) | (!c & f).
bool Inv;
unsigned Idx = getCRIdxForSetCC(CC, Inv);
unsigned SRI;
switch (Idx) {
default: llvm_unreachable("Invalid CC index");
case 0: SRI = PPC::sub_lt; break;
case 1: SRI = PPC::sub_gt; break;
case 2: SRI = PPC::sub_eq; break;
case 3: SRI = PPC::sub_un; break;
}
SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
CCBit, CCBit), 0);
SDValue C = Inv ? NotCCBit : CCBit,
NotC = Inv ? CCBit : NotCCBit;
SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
C, N->getOperand(2)), 0);
SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
NotC, N->getOperand(3)), 0);
CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
return;
}
unsigned BROpc =
getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
unsigned SelectCCOp;
if (N->getValueType(0) == MVT::i32)
SelectCCOp = PPC::SELECT_CC_I4;
else if (N->getValueType(0) == MVT::i64)
SelectCCOp = PPC::SELECT_CC_I8;
else if (N->getValueType(0) == MVT::f32) {
if (Subtarget->hasP8Vector())
SelectCCOp = PPC::SELECT_CC_VSSRC;
else if (Subtarget->hasSPE())
SelectCCOp = PPC::SELECT_CC_SPE4;
else
SelectCCOp = PPC::SELECT_CC_F4;
} else if (N->getValueType(0) == MVT::f64) {
if (Subtarget->hasVSX())
SelectCCOp = PPC::SELECT_CC_VSFRC;
else if (Subtarget->hasSPE())
SelectCCOp = PPC::SELECT_CC_SPE;
else
SelectCCOp = PPC::SELECT_CC_F8;
} else if (N->getValueType(0) == MVT::f128)
SelectCCOp = PPC::SELECT_CC_F16;
else if (Subtarget->hasSPE())
SelectCCOp = PPC::SELECT_CC_SPE;
else if (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)
SelectCCOp = PPC::SELECT_CC_VSRC;
else
SelectCCOp = PPC::SELECT_CC_VRRC;
SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
getI32Imm(BROpc, dl) };
CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
return;
}
case ISD::VECTOR_SHUFFLE:
if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
unsigned DM[2];
for (int i = 0; i < 2; ++i)
if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
DM[i] = 0;
else
DM[i] = 1;
if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
isa<LoadSDNode>(Op1.getOperand(0))) {
LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
SDValue Base, Offset;
if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
(LD->getMemoryVT() == MVT::f64 ||
LD->getMemoryVT() == MVT::i64) &&
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
SDValue Chain = LD->getChain();
SDValue Ops[] = { Base, Offset, Chain };
MachineMemOperand *MemOp = LD->getMemOperand();
SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
N->getValueType(0), Ops);
CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
return;
}
}
// For little endian, we must swap the input operands and adjust
// the mask elements (reverse and invert them).
if (Subtarget->isLittleEndian()) {
std::swap(Op1, Op2);
unsigned tmp = DM[0];
DM[0] = 1 - DM[1];
DM[1] = 1 - tmp;
}
SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
MVT::i32);
SDValue Ops[] = { Op1, Op2, DMV };
CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
return;
}
break;
case PPCISD::BDNZ:
case PPCISD::BDZ: {
bool IsPPC64 = Subtarget->isPPC64();
SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
: (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
MVT::Other, Ops);
return;
}
case PPCISD::COND_BRANCH: {
// Op #0 is the Chain.
// Op #1 is the PPC::PRED_* number.
// Op #2 is the CR#
// Op #3 is the Dest MBB
// Op #4 is the Flag.
// Prevent PPC::PRED_* from being selected into LI.
unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
if (EnableBranchHint)
PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
SDValue Pred = getI32Imm(PCC, dl);
SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
N->getOperand(0), N->getOperand(4) };
CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
return;
}
case ISD::BR_CC: {
if (tryFoldSWTestBRCC(N))
return;
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
unsigned PCC =
getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
if (N->getOperand(2).getValueType() == MVT::i1) {
unsigned Opc;
bool Swap;
switch (PCC) {
default: llvm_unreachable("Unexpected Boolean-operand predicate");
case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
}
// A signed comparison of i1 values produces the opposite result to an
// unsigned one if the condition code includes less-than or greater-than.
// This is because 1 is the most negative signed i1 number and the most
// positive unsigned i1 number. The CR-logical operations used for such
// comparisons are non-commutative so for signed comparisons vs. unsigned
// ones, the input operands just need to be swapped.
if (ISD::isSignedIntSetCC(CC))
Swap = !Swap;
SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
N->getOperand(Swap ? 3 : 2),
N->getOperand(Swap ? 2 : 3)), 0);
CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
N->getOperand(0));
return;
}
if (EnableBranchHint)
PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
N->getOperand(4), N->getOperand(0) };
CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
return;
}
case ISD::BRIND: {
// FIXME: Should custom lower this.
SDValue Chain = N->getOperand(0);
SDValue Target = N->getOperand(1);
unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
Chain), 0);
CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
return;
}
case PPCISD::TOC_ENTRY: {
const bool isPPC64 = Subtarget->isPPC64();
const bool isELFABI = Subtarget->isSVR4ABI();
const bool isAIXABI = Subtarget->isAIXABI();
// PowerPC only support small, medium and large code model.
const CodeModel::Model CModel = TM.getCodeModel();
assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
"PowerPC doesn't support tiny or kernel code models.");
if (isAIXABI && CModel == CodeModel::Medium)
report_fatal_error("Medium code model is not supported on AIX.");
// For 64-bit ELF small code model, we allow SelectCodeCommon to handle
// this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
// small code model, we need to check for a toc-data attribute.
if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
break;
auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
EVT OperandTy) {
SDValue GA = TocEntry->getOperand(0);
SDValue TocBase = TocEntry->getOperand(1);
SDNode *MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
transferMemOperands(TocEntry, MN);
ReplaceNode(TocEntry, MN);
};
// Handle 32-bit small code model.
if (!isPPC64 && CModel == CodeModel::Small) {
// Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
// PPC::ADDItoc, or PPC::LWZtoc
if (isELFABI) {
assert(TM.isPositionIndependent() &&
"32-bit ELF can only have TOC entries in position independent"
" code.");
// 32-bit ELF always uses a small code model toc access.
replaceWith(PPC::LWZtoc, N, MVT::i32);
return;
}
assert(isAIXABI && "ELF ABI already handled");
if (hasTocDataAttr(N->getOperand(0),
CurDAG->getDataLayout().getPointerSize())) {
replaceWith(PPC::ADDItoc, N, MVT::i32);
return;
}
replaceWith(PPC::LWZtoc, N, MVT::i32);
return;
}
if (isPPC64 && CModel == CodeModel::Small) {
assert(isAIXABI && "ELF ABI handled in common SelectCode");
if (hasTocDataAttr(N->getOperand(0),
CurDAG->getDataLayout().getPointerSize())) {
replaceWith(PPC::ADDItoc8, N, MVT::i64);
return;
}
// Break if it doesn't have toc data attribute. Proceed with common
// SelectCode.
break;
}
assert(CModel != CodeModel::Small && "All small code models handled.");
assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
" ELF/AIX or 32-bit AIX in the following.");
// Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode
// or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We
// generate two instructions as described below. The first source operand
// is a symbol reference. If it must be toc-referenced according to
// Subtarget, we generate:
// [32-bit AIX]
// LWZtocL(@sym, ADDIStocHA(%r2, @sym))
// [64-bit ELF/AIX]
// LDtocL(@sym, ADDIStocHA8(%x2, @sym))
// Otherwise we generate:
// ADDItocL(ADDIStocHA8(%x2, @sym), @sym)
SDValue GA = N->getOperand(0);
SDValue TOCbase = N->getOperand(1);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDNode *Tmp = CurDAG->getMachineNode(
isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
if (PPCLowering->isAccessedAsGotIndirect(GA)) {
// If it is accessed as got-indirect, we need an extra LWZ/LD to load
// the address.
SDNode *MN = CurDAG->getMachineNode(
isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
transferMemOperands(N, MN);
ReplaceNode(N, MN);
return;
}
// Build the address relative to the TOC-pointer.
ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA));
return;
}
case PPCISD::PPC32_PICGOT:
// Generate a PIC-safe GOT reference.
assert(Subtarget->is32BitELFABI() &&
"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
PPCLowering->getPointerTy(CurDAG->getDataLayout()),
MVT::i32);
return;
case PPCISD::VADD_SPLAT: {
// This expands into one of three sequences, depending on whether
// the first operand is odd or even, positive or negative.
assert(isa<ConstantSDNode>(N->getOperand(0)) &&
isa<ConstantSDNode>(N->getOperand(1)) &&
"Invalid operand on VADD_SPLAT!");
int Elt = N->getConstantOperandVal(0);
int EltSize = N->getConstantOperandVal(1);
unsigned Opc1, Opc2, Opc3;
EVT VT;
if (EltSize == 1) {
Opc1 = PPC::VSPLTISB;
Opc2 = PPC::VADDUBM;
Opc3 = PPC::VSUBUBM;
VT = MVT::v16i8;
} else if (EltSize == 2) {
Opc1 = PPC::VSPLTISH;
Opc2 = PPC::VADDUHM;
Opc3 = PPC::VSUBUHM;
VT = MVT::v8i16;
} else {
assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
Opc1 = PPC::VSPLTISW;
Opc2 = PPC::VADDUWM;
Opc3 = PPC::VSUBUWM;
VT = MVT::v4i32;
}
if ((Elt & 1) == 0) {
// Elt is even, in the range [-32,-18] + [16,30].
//
// Convert: VADD_SPLAT elt, size
// Into: tmp = VSPLTIS[BHW] elt
// VADDU[BHW]M tmp, tmp
// Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
SDValue EltVal = getI32Imm(Elt >> 1, dl);
SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
SDValue TmpVal = SDValue(Tmp, 0);
ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
return;
} else if (Elt > 0) {
// Elt is odd and positive, in the range [17,31].
//
// Convert: VADD_SPLAT elt, size
// Into: tmp1 = VSPLTIS[BHW] elt-16
// tmp2 = VSPLTIS[BHW] -16
// VSUBU[BHW]M tmp1, tmp2
SDValue EltVal = getI32Imm(Elt - 16, dl);
SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
EltVal = getI32Imm(-16, dl);
SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
SDValue(Tmp2, 0)));
return;
} else {
// Elt is odd and negative, in the range [-31,-17].
//
// Convert: VADD_SPLAT elt, size
// Into: tmp1 = VSPLTIS[BHW] elt+16
// tmp2 = VSPLTIS[BHW] -16
// VADDU[BHW]M tmp1, tmp2
SDValue EltVal = getI32Imm(Elt + 16, dl);
SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
EltVal = getI32Imm(-16, dl);
SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
SDValue(Tmp2, 0)));
return;
}
}
case PPCISD::LD_SPLAT: {
// Here we want to handle splat load for type v16i8 and v8i16 when there is
// no direct move, we don't need to use stack for this case. If target has
// direct move, we should be able to get the best selection in the .td file.
if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
break;
EVT Type = N->getValueType(0);
if (Type != MVT::v16i8 && Type != MVT::v8i16)
break;
// If the alignment for the load is 16 or bigger, we don't need the
// permutated mask to get the required value. The value must be the 0
// element in big endian target or 7/15 in little endian target in the
// result vsx register of lvx instruction.
// Select the instruction in the .td file.
if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
isOffsetMultipleOf(N, 16))
break;
SDValue ZeroReg =
CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
// v16i8 LD_SPLAT addr
// ======>
// Mask = LVSR/LVSL 0, addr
// LoadLow = LVX 0, addr
// Perm = VPERM LoadLow, LoadLow, Mask
// Splat = VSPLTB 15/0, Perm
//
// v8i16 LD_SPLAT addr
// ======>
// Mask = LVSR/LVSL 0, addr
// LoadLow = LVX 0, addr
// LoadHigh = LVX (LI, 1), addr
// Perm = VPERM LoadLow, LoadHigh, Mask
// Splat = VSPLTH 7/0, Perm
unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
unsigned SplatElemIndex =
Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
SDNode *Mask = CurDAG->getMachineNode(
Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
N->getOperand(1));
SDNode *LoadLow =
CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
{ZeroReg, N->getOperand(1), N->getOperand(0)});
SDNode *LoadHigh = LoadLow;
if (Type == MVT::v8i16) {
LoadHigh = CurDAG->getMachineNode(
PPC::LVX, dl, MVT::v16i8, MVT::Other,
{SDValue(CurDAG->getMachineNode(
LIOpcode, dl, MVT::i32,
CurDAG->getTargetConstant(1, dl, MVT::i8)),
0),
N->getOperand(1), SDValue(LoadLow, 1)});
}
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
transferMemOperands(N, LoadHigh);
SDNode *Perm =
CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
SDValue(LoadHigh, 0), SDValue(Mask, 0));
CurDAG->SelectNodeTo(N, SplatOp, Type,
CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
SDValue(Perm, 0));
return;
}
}
SelectCode(N);
}