void PPCDAGToDAGISel::Select()

in llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp [4966:6040]


void PPCDAGToDAGISel::Select(SDNode *N) {
  SDLoc dl(N);
  if (N->isMachineOpcode()) {
    N->setNodeId(-1);
    return;   // Already selected.
  }

  // In case any misguided DAG-level optimizations form an ADD with a
  // TargetConstant operand, crash here instead of miscompiling (by selecting
  // an r+r add instead of some kind of r+i add).
  if (N->getOpcode() == ISD::ADD &&
      N->getOperand(1).getOpcode() == ISD::TargetConstant)
    llvm_unreachable("Invalid ADD with TargetConstant operand");

  // Try matching complex bit permutations before doing anything else.
  if (tryBitPermutation(N))
    return;

  // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
  if (tryIntCompareInGPR(N))
    return;

  switch (N->getOpcode()) {
  default: break;

  case ISD::Constant:
    if (N->getValueType(0) == MVT::i64) {
      ReplaceNode(N, selectI64Imm(CurDAG, N));
      return;
    }
    break;

  case ISD::INTRINSIC_VOID: {
    auto IntrinsicID = N->getConstantOperandVal(1);
    if (IntrinsicID == Intrinsic::ppc_tdw || IntrinsicID == Intrinsic::ppc_tw) {
      unsigned Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TDI : PPC::TWI;
      SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
      int16_t SImmOperand2;
      int16_t SImmOperand3;
      int16_t SImmOperand4;
      bool isOperand2IntS16Immediate =
          isIntS16Immediate(N->getOperand(2), SImmOperand2);
      bool isOperand3IntS16Immediate =
          isIntS16Immediate(N->getOperand(3), SImmOperand3);
      // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
      // reg or imm + imm. The imm + imm form will be optimized to either an
      // unconditional trap or a nop in a later pass.
      if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
        Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
      else if (isOperand3IntS16Immediate)
        // The 2nd and 3rd operands are reg + imm.
        Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
      else {
        // The 2nd and 3rd operands are imm + reg.
        bool isOperand4IntS16Immediate =
            isIntS16Immediate(N->getOperand(4), SImmOperand4);
        (void)isOperand4IntS16Immediate;
        assert(isOperand4IntS16Immediate &&
               "The 4th operand is not an Immediate");
        // We need to flip the condition immediate TO.
        int16_t TO = int(SImmOperand4) & 0x1F;
        // We swap the first and second bit of TO if they are not same.
        if ((TO & 0x1) != ((TO & 0x2) >> 1))
          TO = (TO & 0x1) ? TO + 1 : TO - 1;
        // We swap the fourth and fifth bit of TO if they are not same.
        if ((TO & 0x8) != ((TO & 0x10) >> 1))
          TO = (TO & 0x8) ? TO + 8 : TO - 8;
        Ops[0] = getI32Imm(TO, dl);
        Ops[1] = N->getOperand(3);
        Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
      }
      CurDAG->SelectNodeTo(N, Opcode, MVT::Other, Ops);
      return;
    }
    break;
  }

  case ISD::INTRINSIC_WO_CHAIN: {
    // We emit the PPC::FSELS instruction here because of type conflicts with
    // the comparison operand. The FSELS instruction is defined to use an 8-byte
    // comparison like the FSELD version. The fsels intrinsic takes a 4-byte
    // value for the comparison. When selecting through a .td file, a type
    // error is raised. Must check this first so we never break on the
    // !Subtarget->isISA3_1() check.
    auto IntID = N->getConstantOperandVal(0);
    if (IntID == Intrinsic::ppc_fsels) {
      SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
      CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
      return;
    }

    if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
      auto Pred = N->getConstantOperandVal(1);
      unsigned Opcode =
          IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
      unsigned SubReg = 0;
      unsigned ShiftVal = 0;
      bool Reverse = false;
      switch (Pred) {
      case 0:
        SubReg = PPC::sub_eq;
        ShiftVal = 1;
        break;
      case 1:
        SubReg = PPC::sub_eq;
        ShiftVal = 1;
        Reverse = true;
        break;
      case 2:
        SubReg = PPC::sub_lt;
        ShiftVal = 3;
        break;
      case 3:
        SubReg = PPC::sub_lt;
        ShiftVal = 3;
        Reverse = true;
        break;
      case 4:
        SubReg = PPC::sub_gt;
        ShiftVal = 2;
        break;
      case 5:
        SubReg = PPC::sub_gt;
        ShiftVal = 2;
        Reverse = true;
        break;
      case 6:
        SubReg = PPC::sub_un;
        break;
      case 7:
        SubReg = PPC::sub_un;
        Reverse = true;
        break;
      }

      EVT VTs[] = {MVT::v16i8, MVT::Glue};
      SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
                       CurDAG->getTargetConstant(0, dl, MVT::i32)};
      SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
      SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
      // On Power10, we can use SETBC[R]. On prior architectures, we have to use
      // MFOCRF and shift/negate the value.
      if (Subtarget->isISA3_1()) {
        SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
        SDValue CRBit = SDValue(
            CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
                                   CR6Reg, SubRegIdx, BCDOp.getValue(1)),
            0);
        CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
                             CRBit);
      } else {
        SDValue Move =
            SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
                                           BCDOp.getValue(1)),
                    0);
        SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
                         getI32Imm(31, dl), getI32Imm(31, dl)};
        if (!Reverse)
          CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
        else {
          SDValue Shift = SDValue(
              CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
          CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
        }
      }
      return;
    }

    if (!Subtarget->isISA3_1())
      break;
    unsigned Opcode = 0;
    switch (IntID) {
    default:
      break;
    case Intrinsic::ppc_altivec_vstribr_p:
      Opcode = PPC::VSTRIBR_rec;
      break;
    case Intrinsic::ppc_altivec_vstribl_p:
      Opcode = PPC::VSTRIBL_rec;
      break;
    case Intrinsic::ppc_altivec_vstrihr_p:
      Opcode = PPC::VSTRIHR_rec;
      break;
    case Intrinsic::ppc_altivec_vstrihl_p:
      Opcode = PPC::VSTRIHL_rec;
      break;
    }
    if (!Opcode)
      break;

    // Generate the appropriate vector string isolate intrinsic to match.
    EVT VTs[] = {MVT::v16i8, MVT::Glue};
    SDValue VecStrOp =
        SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
    // Vector string isolate instructions update the EQ bit of CR6.
    // Generate a SETBC instruction to extract the bit and place it in a GPR.
    SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
    SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
    SDValue CRBit = SDValue(
        CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
                               CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
        0);
    CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
    return;
  }

  case ISD::SETCC:
  case ISD::STRICT_FSETCC:
  case ISD::STRICT_FSETCCS:
    if (trySETCC(N))
      return;
    break;
  // These nodes will be transformed into GETtlsADDR32 node, which
  // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
  case PPCISD::ADDI_TLSLD_L_ADDR:
  case PPCISD::ADDI_TLSGD_L_ADDR: {
    const Module *Mod = MF->getFunction().getParent();
    if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
        !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
        Mod->getPICLevel() == PICLevel::SmallPIC)
      break;
    // Attach global base pointer on GETtlsADDR32 node in order to
    // generate secure plt code for TLS symbols.
    getGlobalBaseReg();
  } break;
  case PPCISD::CALL: {
    if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
        !TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||
        !Subtarget->isTargetELF())
      break;

    SDValue Op = N->getOperand(1);

    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
      if (GA->getTargetFlags() == PPCII::MO_PLT)
        getGlobalBaseReg();
    }
    else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
      if (ES->getTargetFlags() == PPCII::MO_PLT)
        getGlobalBaseReg();
    }
  }
    break;

  case PPCISD::GlobalBaseReg:
    ReplaceNode(N, getGlobalBaseReg());
    return;

  case ISD::FrameIndex:
    selectFrameIndex(N, N);
    return;

  case PPCISD::MFOCRF: {
    SDValue InFlag = N->getOperand(1);
    ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
                                          N->getOperand(0), InFlag));
    return;
  }

  case PPCISD::READ_TIME_BASE:
    ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
                                          MVT::Other, N->getOperand(0)));
    return;

  case PPCISD::SRA_ADDZE: {
    SDValue N0 = N->getOperand(0);
    SDValue ShiftAmt =
      CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
                                  getConstantIntValue(), dl,
                                  N->getValueType(0));
    if (N->getValueType(0) == MVT::i64) {
      SDNode *Op =
        CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
                               N0, ShiftAmt);
      CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
                           SDValue(Op, 1));
      return;
    } else {
      assert(N->getValueType(0) == MVT::i32 &&
             "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
      SDNode *Op =
        CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
                               N0, ShiftAmt);
      CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
                           SDValue(Op, 1));
      return;
    }
  }

  case ISD::STORE: {
    // Change TLS initial-exec D-form stores to X-form stores.
    StoreSDNode *ST = cast<StoreSDNode>(N);
    if (EnableTLSOpt && Subtarget->isELFv2ABI() &&
        ST->getAddressingMode() != ISD::PRE_INC)
      if (tryTLSXFormStore(ST))
        return;
    break;
  }
  case ISD::LOAD: {
    // Handle preincrement loads.
    LoadSDNode *LD = cast<LoadSDNode>(N);
    EVT LoadedVT = LD->getMemoryVT();

    // Normal loads are handled by code generated from the .td file.
    if (LD->getAddressingMode() != ISD::PRE_INC) {
      // Change TLS initial-exec D-form loads to X-form loads.
      if (EnableTLSOpt && Subtarget->isELFv2ABI())
        if (tryTLSXFormLoad(LD))
          return;
      break;
    }

    SDValue Offset = LD->getOffset();
    if (Offset.getOpcode() == ISD::TargetConstant ||
        Offset.getOpcode() == ISD::TargetGlobalAddress) {

      unsigned Opcode;
      bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
      if (LD->getValueType(0) != MVT::i64) {
        // Handle PPC32 integer and normal FP loads.
        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
        switch (LoadedVT.getSimpleVT().SimpleTy) {
          default: llvm_unreachable("Invalid PPC load type!");
          case MVT::f64: Opcode = PPC::LFDU; break;
          case MVT::f32: Opcode = PPC::LFSU; break;
          case MVT::i32: Opcode = PPC::LWZU; break;
          case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
          case MVT::i1:
          case MVT::i8:  Opcode = PPC::LBZU; break;
        }
      } else {
        assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
        switch (LoadedVT.getSimpleVT().SimpleTy) {
          default: llvm_unreachable("Invalid PPC load type!");
          case MVT::i64: Opcode = PPC::LDU; break;
          case MVT::i32: Opcode = PPC::LWZU8; break;
          case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
          case MVT::i1:
          case MVT::i8:  Opcode = PPC::LBZU8; break;
        }
      }

      SDValue Chain = LD->getChain();
      SDValue Base = LD->getBasePtr();
      SDValue Ops[] = { Offset, Base, Chain };
      SDNode *MN = CurDAG->getMachineNode(
          Opcode, dl, LD->getValueType(0),
          PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
      transferMemOperands(N, MN);
      ReplaceNode(N, MN);
      return;
    } else {
      unsigned Opcode;
      bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
      if (LD->getValueType(0) != MVT::i64) {
        // Handle PPC32 integer and normal FP loads.
        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
        switch (LoadedVT.getSimpleVT().SimpleTy) {
          default: llvm_unreachable("Invalid PPC load type!");
          case MVT::f64: Opcode = PPC::LFDUX; break;
          case MVT::f32: Opcode = PPC::LFSUX; break;
          case MVT::i32: Opcode = PPC::LWZUX; break;
          case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
          case MVT::i1:
          case MVT::i8:  Opcode = PPC::LBZUX; break;
        }
      } else {
        assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
        assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
               "Invalid sext update load");
        switch (LoadedVT.getSimpleVT().SimpleTy) {
          default: llvm_unreachable("Invalid PPC load type!");
          case MVT::i64: Opcode = PPC::LDUX; break;
          case MVT::i32: Opcode = isSExt ? PPC::LWAUX  : PPC::LWZUX8; break;
          case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
          case MVT::i1:
          case MVT::i8:  Opcode = PPC::LBZUX8; break;
        }
      }

      SDValue Chain = LD->getChain();
      SDValue Base = LD->getBasePtr();
      SDValue Ops[] = { Base, Offset, Chain };
      SDNode *MN = CurDAG->getMachineNode(
          Opcode, dl, LD->getValueType(0),
          PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
      transferMemOperands(N, MN);
      ReplaceNode(N, MN);
      return;
    }
  }

  case ISD::AND:
    // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
    if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
        tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
      return;

    // Other cases are autogenerated.
    break;
  case ISD::OR: {
    if (N->getValueType(0) == MVT::i32)
      if (tryBitfieldInsert(N))
        return;

    int16_t Imm;
    if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
        isIntS16Immediate(N->getOperand(1), Imm)) {
      KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));

      // If this is equivalent to an add, then we can fold it with the
      // FrameIndex calculation.
      if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
        selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
        return;
      }
    }

    // If this is 'or' against an imm with consecutive ones and both sides zero,
    // try to emit rldimi
    if (tryAsSingleRLDIMI(N))
      return;

    // OR with a 32-bit immediate can be handled by ori + oris
    // without creating an immediate in a GPR.
    uint64_t Imm64 = 0;
    bool IsPPC64 = Subtarget->isPPC64();
    if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
        (Imm64 & ~0xFFFFFFFFuLL) == 0) {
      // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
      uint64_t ImmHi = Imm64 >> 16;
      uint64_t ImmLo = Imm64 & 0xFFFF;
      if (ImmHi != 0 && ImmLo != 0) {
        SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
                                            N->getOperand(0),
                                            getI16Imm(ImmLo, dl));
        SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
        CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
        return;
      }
    }

    // Other cases are autogenerated.
    break;
  }
  case ISD::XOR: {
    // XOR with a 32-bit immediate can be handled by xori + xoris
    // without creating an immediate in a GPR.
    uint64_t Imm64 = 0;
    bool IsPPC64 = Subtarget->isPPC64();
    if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
        (Imm64 & ~0xFFFFFFFFuLL) == 0) {
      // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
      uint64_t ImmHi = Imm64 >> 16;
      uint64_t ImmLo = Imm64 & 0xFFFF;
      if (ImmHi != 0 && ImmLo != 0) {
        SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
                                            N->getOperand(0),
                                            getI16Imm(ImmLo, dl));
        SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
        CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
        return;
      }
    }

    break;
  }
  case ISD::ADD: {
    int16_t Imm;
    if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
        isIntS16Immediate(N->getOperand(1), Imm)) {
      selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
      return;
    }

    break;
  }
  case ISD::SHL: {
    unsigned Imm, SH, MB, ME;
    if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
        isRotateAndMask(N, Imm, true, SH, MB, ME)) {
      SDValue Ops[] = { N->getOperand(0).getOperand(0),
                          getI32Imm(SH, dl), getI32Imm(MB, dl),
                          getI32Imm(ME, dl) };
      CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
      return;
    }

    // Other cases are autogenerated.
    break;
  }
  case ISD::SRL: {
    unsigned Imm, SH, MB, ME;
    if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
        isRotateAndMask(N, Imm, true, SH, MB, ME)) {
      SDValue Ops[] = { N->getOperand(0).getOperand(0),
                          getI32Imm(SH, dl), getI32Imm(MB, dl),
                          getI32Imm(ME, dl) };
      CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
      return;
    }

    // Other cases are autogenerated.
    break;
  }
  case ISD::MUL: {
    SDValue Op1 = N->getOperand(1);
    if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64)
      break;

    // If the multiplier fits int16, we can handle it with mulli.
    int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue();
    unsigned Shift = countTrailingZeros<uint64_t>(Imm);
    if (isInt<16>(Imm) || !Shift)
      break;

    // If the shifted value fits int16, we can do this transformation:
    // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
    // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
    uint64_t ImmSh = Imm >> Shift;
    if (isInt<16>(ImmSh)) {
      uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
      SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
      SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
                                               N->getOperand(0), SDImm);
      CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0),
                           getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl));
      return;
    }
    break;
  }
  // FIXME: Remove this once the ANDI glue bug is fixed:
  case PPCISD::ANDI_rec_1_EQ_BIT:
  case PPCISD::ANDI_rec_1_GT_BIT: {
    if (!ANDIGlueBug)
      break;

    EVT InVT = N->getOperand(0).getValueType();
    assert((InVT == MVT::i64 || InVT == MVT::i32) &&
           "Invalid input type for ANDI_rec_1_EQ_BIT");

    unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
    SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
                                        N->getOperand(0),
                                        CurDAG->getTargetConstant(1, dl, InVT)),
                 0);
    SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
    SDValue SRIdxVal = CurDAG->getTargetConstant(
        N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
        dl, MVT::i32);

    CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
                         SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
    return;
  }
  case ISD::SELECT_CC: {
    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
    EVT PtrVT =
        CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
    bool isPPC64 = (PtrVT == MVT::i64);

    // If this is a select of i1 operands, we'll pattern match it.
    if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
      break;

    if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
      bool NeedSwapOps = false;
      bool IsUnCmp = false;
      if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
        SDValue LHS = N->getOperand(0);
        SDValue RHS = N->getOperand(1);
        if (NeedSwapOps)
          std::swap(LHS, RHS);

        // Make use of SelectCC to generate the comparison to set CR bits, for
        // equality comparisons having one literal operand, SelectCC probably
        // doesn't need to materialize the whole literal and just use xoris to
        // check it first, it leads the following comparison result can't
        // exactly represent GT/LT relationship. So to avoid this we specify
        // SETGT/SETUGT here instead of SETEQ.
        SDValue GenCC =
            SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
        CurDAG->SelectNodeTo(
            N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
            N->getValueType(0), GenCC);
        NumP9Setb++;
        return;
      }
    }

    // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
    if (!isPPC64)
      if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
        if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
          if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
            if (N1C->isZero() && N3C->isZero() && N2C->getZExtValue() == 1ULL &&
                CC == ISD::SETNE &&
                // FIXME: Implement this optzn for PPC64.
                N->getValueType(0) == MVT::i32) {
              SDNode *Tmp =
                CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
                                       N->getOperand(0), getI32Imm(~0U, dl));
              CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
                                   N->getOperand(0), SDValue(Tmp, 1));
              return;
            }

    SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);

    if (N->getValueType(0) == MVT::i1) {
      // An i1 select is: (c & t) | (!c & f).
      bool Inv;
      unsigned Idx = getCRIdxForSetCC(CC, Inv);

      unsigned SRI;
      switch (Idx) {
      default: llvm_unreachable("Invalid CC index");
      case 0: SRI = PPC::sub_lt; break;
      case 1: SRI = PPC::sub_gt; break;
      case 2: SRI = PPC::sub_eq; break;
      case 3: SRI = PPC::sub_un; break;
      }

      SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);

      SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
                                              CCBit, CCBit), 0);
      SDValue C =    Inv ? NotCCBit : CCBit,
              NotC = Inv ? CCBit    : NotCCBit;

      SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
                                           C, N->getOperand(2)), 0);
      SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
                                              NotC, N->getOperand(3)), 0);

      CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
      return;
    }

    unsigned BROpc =
        getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);

    unsigned SelectCCOp;
    if (N->getValueType(0) == MVT::i32)
      SelectCCOp = PPC::SELECT_CC_I4;
    else if (N->getValueType(0) == MVT::i64)
      SelectCCOp = PPC::SELECT_CC_I8;
    else if (N->getValueType(0) == MVT::f32) {
      if (Subtarget->hasP8Vector())
        SelectCCOp = PPC::SELECT_CC_VSSRC;
      else if (Subtarget->hasSPE())
        SelectCCOp = PPC::SELECT_CC_SPE4;
      else
        SelectCCOp = PPC::SELECT_CC_F4;
    } else if (N->getValueType(0) == MVT::f64) {
      if (Subtarget->hasVSX())
        SelectCCOp = PPC::SELECT_CC_VSFRC;
      else if (Subtarget->hasSPE())
        SelectCCOp = PPC::SELECT_CC_SPE;
      else
        SelectCCOp = PPC::SELECT_CC_F8;
    } else if (N->getValueType(0) == MVT::f128)
      SelectCCOp = PPC::SELECT_CC_F16;
    else if (Subtarget->hasSPE())
      SelectCCOp = PPC::SELECT_CC_SPE;
    else if (N->getValueType(0) == MVT::v2f64 ||
             N->getValueType(0) == MVT::v2i64)
      SelectCCOp = PPC::SELECT_CC_VSRC;
    else
      SelectCCOp = PPC::SELECT_CC_VRRC;

    SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
                        getI32Imm(BROpc, dl) };
    CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
    return;
  }
  case ISD::VECTOR_SHUFFLE:
    if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
                                N->getValueType(0) == MVT::v2i64)) {
      ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);

      SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
              Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
      unsigned DM[2];

      for (int i = 0; i < 2; ++i)
        if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
          DM[i] = 0;
        else
          DM[i] = 1;

      if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
          Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
          isa<LoadSDNode>(Op1.getOperand(0))) {
        LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
        SDValue Base, Offset;

        if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
            (LD->getMemoryVT() == MVT::f64 ||
             LD->getMemoryVT() == MVT::i64) &&
            SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
          SDValue Chain = LD->getChain();
          SDValue Ops[] = { Base, Offset, Chain };
          MachineMemOperand *MemOp = LD->getMemOperand();
          SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
                                              N->getValueType(0), Ops);
          CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
          return;
        }
      }

      // For little endian, we must swap the input operands and adjust
      // the mask elements (reverse and invert them).
      if (Subtarget->isLittleEndian()) {
        std::swap(Op1, Op2);
        unsigned tmp = DM[0];
        DM[0] = 1 - DM[1];
        DM[1] = 1 - tmp;
      }

      SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
                                              MVT::i32);
      SDValue Ops[] = { Op1, Op2, DMV };
      CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
      return;
    }

    break;
  case PPCISD::BDNZ:
  case PPCISD::BDZ: {
    bool IsPPC64 = Subtarget->isPPC64();
    SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
    CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
                                ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
                                : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
                         MVT::Other, Ops);
    return;
  }
  case PPCISD::COND_BRANCH: {
    // Op #0 is the Chain.
    // Op #1 is the PPC::PRED_* number.
    // Op #2 is the CR#
    // Op #3 is the Dest MBB
    // Op #4 is the Flag.
    // Prevent PPC::PRED_* from being selected into LI.
    unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
    if (EnableBranchHint)
      PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));

    SDValue Pred = getI32Imm(PCC, dl);
    SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
      N->getOperand(0), N->getOperand(4) };
    CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
    return;
  }
  case ISD::BR_CC: {
    if (tryFoldSWTestBRCC(N))
      return;
    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
    unsigned PCC =
        getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);

    if (N->getOperand(2).getValueType() == MVT::i1) {
      unsigned Opc;
      bool Swap;
      switch (PCC) {
      default: llvm_unreachable("Unexpected Boolean-operand predicate");
      case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true;  break;
      case PPC::PRED_LE: Opc = PPC::CRORC;  Swap = true;  break;
      case PPC::PRED_EQ: Opc = PPC::CREQV;  Swap = false; break;
      case PPC::PRED_GE: Opc = PPC::CRORC;  Swap = false; break;
      case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
      case PPC::PRED_NE: Opc = PPC::CRXOR;  Swap = false; break;
      }

      // A signed comparison of i1 values produces the opposite result to an
      // unsigned one if the condition code includes less-than or greater-than.
      // This is because 1 is the most negative signed i1 number and the most
      // positive unsigned i1 number. The CR-logical operations used for such
      // comparisons are non-commutative so for signed comparisons vs. unsigned
      // ones, the input operands just need to be swapped.
      if (ISD::isSignedIntSetCC(CC))
        Swap = !Swap;

      SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
                                             N->getOperand(Swap ? 3 : 2),
                                             N->getOperand(Swap ? 2 : 3)), 0);
      CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
                           N->getOperand(0));
      return;
    }

    if (EnableBranchHint)
      PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));

    SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
    SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
                        N->getOperand(4), N->getOperand(0) };
    CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
    return;
  }
  case ISD::BRIND: {
    // FIXME: Should custom lower this.
    SDValue Chain = N->getOperand(0);
    SDValue Target = N->getOperand(1);
    unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
    unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
    Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
                                           Chain), 0);
    CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
    return;
  }
  case PPCISD::TOC_ENTRY: {
    const bool isPPC64 = Subtarget->isPPC64();
    const bool isELFABI = Subtarget->isSVR4ABI();
    const bool isAIXABI = Subtarget->isAIXABI();

    // PowerPC only support small, medium and large code model.
    const CodeModel::Model CModel = TM.getCodeModel();
    assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
           "PowerPC doesn't support tiny or kernel code models.");

    if (isAIXABI && CModel == CodeModel::Medium)
      report_fatal_error("Medium code model is not supported on AIX.");

    // For 64-bit ELF small code model, we allow SelectCodeCommon to handle
    // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
    // small code model, we need to check for a toc-data attribute.
    if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
      break;

    auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
                                   EVT OperandTy) {
      SDValue GA = TocEntry->getOperand(0);
      SDValue TocBase = TocEntry->getOperand(1);
      SDNode *MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
      transferMemOperands(TocEntry, MN);
      ReplaceNode(TocEntry, MN);
    };

    // Handle 32-bit small code model.
    if (!isPPC64 && CModel == CodeModel::Small) {
      // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
      // PPC::ADDItoc, or PPC::LWZtoc
      if (isELFABI) {
        assert(TM.isPositionIndependent() &&
               "32-bit ELF can only have TOC entries in position independent"
               " code.");
        // 32-bit ELF always uses a small code model toc access.
        replaceWith(PPC::LWZtoc, N, MVT::i32);
        return;
      }

      assert(isAIXABI && "ELF ABI already handled");

      if (hasTocDataAttr(N->getOperand(0),
                         CurDAG->getDataLayout().getPointerSize())) {
        replaceWith(PPC::ADDItoc, N, MVT::i32);
        return;
      }

      replaceWith(PPC::LWZtoc, N, MVT::i32);
      return;
    }

    if (isPPC64 && CModel == CodeModel::Small) {
      assert(isAIXABI && "ELF ABI handled in common SelectCode");

      if (hasTocDataAttr(N->getOperand(0),
                         CurDAG->getDataLayout().getPointerSize())) {
        replaceWith(PPC::ADDItoc8, N, MVT::i64);
        return;
      }
      // Break if it doesn't have toc data attribute. Proceed with common
      // SelectCode.
      break;
    }

    assert(CModel != CodeModel::Small && "All small code models handled.");

    assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
           " ELF/AIX or 32-bit AIX in the following.");

    // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode
    // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We
    // generate two instructions as described below. The first source operand
    // is a symbol reference. If it must be toc-referenced according to
    // Subtarget, we generate:
    // [32-bit AIX]
    //   LWZtocL(@sym, ADDIStocHA(%r2, @sym))
    // [64-bit ELF/AIX]
    //   LDtocL(@sym, ADDIStocHA8(%x2, @sym))
    // Otherwise we generate:
    //   ADDItocL(ADDIStocHA8(%x2, @sym), @sym)
    SDValue GA = N->getOperand(0);
    SDValue TOCbase = N->getOperand(1);

    EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
    SDNode *Tmp = CurDAG->getMachineNode(
        isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);

    if (PPCLowering->isAccessedAsGotIndirect(GA)) {
      // If it is accessed as got-indirect, we need an extra LWZ/LD to load
      // the address.
      SDNode *MN = CurDAG->getMachineNode(
          isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));

      transferMemOperands(N, MN);
      ReplaceNode(N, MN);
      return;
    }

    // Build the address relative to the TOC-pointer.
    ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
                                          SDValue(Tmp, 0), GA));
    return;
  }
  case PPCISD::PPC32_PICGOT:
    // Generate a PIC-safe GOT reference.
    assert(Subtarget->is32BitELFABI() &&
           "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
    CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
                         PPCLowering->getPointerTy(CurDAG->getDataLayout()),
                         MVT::i32);
    return;

  case PPCISD::VADD_SPLAT: {
    // This expands into one of three sequences, depending on whether
    // the first operand is odd or even, positive or negative.
    assert(isa<ConstantSDNode>(N->getOperand(0)) &&
           isa<ConstantSDNode>(N->getOperand(1)) &&
           "Invalid operand on VADD_SPLAT!");

    int Elt     = N->getConstantOperandVal(0);
    int EltSize = N->getConstantOperandVal(1);
    unsigned Opc1, Opc2, Opc3;
    EVT VT;

    if (EltSize == 1) {
      Opc1 = PPC::VSPLTISB;
      Opc2 = PPC::VADDUBM;
      Opc3 = PPC::VSUBUBM;
      VT = MVT::v16i8;
    } else if (EltSize == 2) {
      Opc1 = PPC::VSPLTISH;
      Opc2 = PPC::VADDUHM;
      Opc3 = PPC::VSUBUHM;
      VT = MVT::v8i16;
    } else {
      assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
      Opc1 = PPC::VSPLTISW;
      Opc2 = PPC::VADDUWM;
      Opc3 = PPC::VSUBUWM;
      VT = MVT::v4i32;
    }

    if ((Elt & 1) == 0) {
      // Elt is even, in the range [-32,-18] + [16,30].
      //
      // Convert: VADD_SPLAT elt, size
      // Into:    tmp = VSPLTIS[BHW] elt
      //          VADDU[BHW]M tmp, tmp
      // Where:   [BHW] = B for size = 1, H for size = 2, W for size = 4
      SDValue EltVal = getI32Imm(Elt >> 1, dl);
      SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
      SDValue TmpVal = SDValue(Tmp, 0);
      ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
      return;
    } else if (Elt > 0) {
      // Elt is odd and positive, in the range [17,31].
      //
      // Convert: VADD_SPLAT elt, size
      // Into:    tmp1 = VSPLTIS[BHW] elt-16
      //          tmp2 = VSPLTIS[BHW] -16
      //          VSUBU[BHW]M tmp1, tmp2
      SDValue EltVal = getI32Imm(Elt - 16, dl);
      SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
      EltVal = getI32Imm(-16, dl);
      SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
      ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
                                            SDValue(Tmp2, 0)));
      return;
    } else {
      // Elt is odd and negative, in the range [-31,-17].
      //
      // Convert: VADD_SPLAT elt, size
      // Into:    tmp1 = VSPLTIS[BHW] elt+16
      //          tmp2 = VSPLTIS[BHW] -16
      //          VADDU[BHW]M tmp1, tmp2
      SDValue EltVal = getI32Imm(Elt + 16, dl);
      SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
      EltVal = getI32Imm(-16, dl);
      SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
      ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
                                            SDValue(Tmp2, 0)));
      return;
    }
  }
  case PPCISD::LD_SPLAT: {
    // Here we want to handle splat load for type v16i8 and v8i16 when there is
    // no direct move, we don't need to use stack for this case. If target has
    // direct move, we should be able to get the best selection in the .td file.
    if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
      break;

    EVT Type = N->getValueType(0);
    if (Type != MVT::v16i8 && Type != MVT::v8i16)
      break;

    // If the alignment for the load is 16 or bigger, we don't need the
    // permutated mask to get the required value. The value must be the 0
    // element in big endian target or 7/15 in little endian target in the
    // result vsx register of lvx instruction.
    // Select the instruction in the .td file.
    if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
        isOffsetMultipleOf(N, 16))
      break;

    SDValue ZeroReg =
        CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
                            Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
    unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
    // v16i8 LD_SPLAT addr
    // ======>
    // Mask = LVSR/LVSL 0, addr
    // LoadLow = LVX 0, addr
    // Perm = VPERM LoadLow, LoadLow, Mask
    // Splat = VSPLTB 15/0, Perm
    //
    // v8i16 LD_SPLAT addr
    // ======>
    // Mask = LVSR/LVSL 0, addr
    // LoadLow = LVX 0, addr
    // LoadHigh = LVX (LI, 1), addr
    // Perm = VPERM LoadLow, LoadHigh, Mask
    // Splat = VSPLTH 7/0, Perm
    unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
    unsigned SplatElemIndex =
        Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;

    SDNode *Mask = CurDAG->getMachineNode(
        Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
        N->getOperand(1));

    SDNode *LoadLow =
        CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
                               {ZeroReg, N->getOperand(1), N->getOperand(0)});

    SDNode *LoadHigh = LoadLow;
    if (Type == MVT::v8i16) {
      LoadHigh = CurDAG->getMachineNode(
          PPC::LVX, dl, MVT::v16i8, MVT::Other,
          {SDValue(CurDAG->getMachineNode(
                       LIOpcode, dl, MVT::i32,
                       CurDAG->getTargetConstant(1, dl, MVT::i8)),
                   0),
           N->getOperand(1), SDValue(LoadLow, 1)});
    }

    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
    transferMemOperands(N, LoadHigh);

    SDNode *Perm =
        CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
                               SDValue(LoadHigh, 0), SDValue(Mask, 0));
    CurDAG->SelectNodeTo(N, SplatOp, Type,
                         CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
                         SDValue(Perm, 0));
    return;
  }
  }

  SelectCode(N);
}