void ARMDAGToDAGISel::Select()

in llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp [3615:5382]


void ARMDAGToDAGISel::Select(SDNode *N) {
  SDLoc dl(N);

  if (N->isMachineOpcode()) {
    N->setNodeId(-1);
    return;   // Already selected.
  }

  switch (N->getOpcode()) {
  default: break;
  case ISD::STORE: {
    // For Thumb1, match an sp-relative store in C++. This is a little
    // unfortunate, but I don't think I can make the chain check work
    // otherwise.  (The chain of the store has to be the same as the chain
    // of the CopyFromReg, or else we can't replace the CopyFromReg with
    // a direct reference to "SP".)
    //
    // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
    // a different addressing mode from other four-byte stores.
    //
    // This pattern usually comes up with call arguments.
    StoreSDNode *ST = cast<StoreSDNode>(N);
    SDValue Ptr = ST->getBasePtr();
    if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
      int RHSC = 0;
      if (Ptr.getOpcode() == ISD::ADD &&
          isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
        Ptr = Ptr.getOperand(0);

      if (Ptr.getOpcode() == ISD::CopyFromReg &&
          cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
          Ptr.getOperand(0) == ST->getChain()) {
        SDValue Ops[] = {ST->getValue(),
                         CurDAG->getRegister(ARM::SP, MVT::i32),
                         CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
                         getAL(CurDAG, dl),
                         CurDAG->getRegister(0, MVT::i32),
                         ST->getChain()};
        MachineSDNode *ResNode =
            CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
        MachineMemOperand *MemOp = ST->getMemOperand();
        CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
        ReplaceNode(N, ResNode);
        return;
      }
    }
    break;
  }
  case ISD::WRITE_REGISTER:
    if (tryWriteRegister(N))
      return;
    break;
  case ISD::READ_REGISTER:
    if (tryReadRegister(N))
      return;
    break;
  case ISD::INLINEASM:
  case ISD::INLINEASM_BR:
    if (tryInlineAsm(N))
      return;
    break;
  case ISD::XOR:
    // Select special operations if XOR node forms integer ABS pattern
    if (tryABSOp(N))
      return;
    // Other cases are autogenerated.
    break;
  case ISD::Constant: {
    unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
    // If we can't materialize the constant we need to use a literal pool
    if (ConstantMaterializationCost(Val, Subtarget) > 2) {
      SDValue CPIdx = CurDAG->getTargetConstantPool(
          ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
          TLI->getPointerTy(CurDAG->getDataLayout()));

      SDNode *ResNode;
      if (Subtarget->isThumb()) {
        SDValue Ops[] = {
          CPIdx,
          getAL(CurDAG, dl),
          CurDAG->getRegister(0, MVT::i32),
          CurDAG->getEntryNode()
        };
        ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
                                         Ops);
      } else {
        SDValue Ops[] = {
          CPIdx,
          CurDAG->getTargetConstant(0, dl, MVT::i32),
          getAL(CurDAG, dl),
          CurDAG->getRegister(0, MVT::i32),
          CurDAG->getEntryNode()
        };
        ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
                                         Ops);
      }
      // Annotate the Node with memory operand information so that MachineInstr
      // queries work properly. This e.g. gives the register allocation the
      // required information for rematerialization.
      MachineFunction& MF = CurDAG->getMachineFunction();
      MachineMemOperand *MemOp =
          MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
                                  MachineMemOperand::MOLoad, 4, Align(4));

      CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});

      ReplaceNode(N, ResNode);
      return;
    }

    // Other cases are autogenerated.
    break;
  }
  case ISD::FrameIndex: {
    // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
    int FI = cast<FrameIndexSDNode>(N)->getIndex();
    SDValue TFI = CurDAG->getTargetFrameIndex(
        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
    if (Subtarget->isThumb1Only()) {
      // Set the alignment of the frame object to 4, to avoid having to generate
      // more than one ADD
      MachineFrameInfo &MFI = MF->getFrameInfo();
      if (MFI.getObjectAlign(FI) < Align(4))
        MFI.setObjectAlignment(FI, Align(4));
      CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
                           CurDAG->getTargetConstant(0, dl, MVT::i32));
      return;
    } else {
      unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
                      ARM::t2ADDri : ARM::ADDri);
      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
                        CurDAG->getRegister(0, MVT::i32) };
      CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
      return;
    }
  }
  case ISD::INSERT_VECTOR_ELT: {
    if (tryInsertVectorElt(N))
      return;
    break;
  }
  case ISD::SRL:
    if (tryV6T2BitfieldExtractOp(N, false))
      return;
    break;
  case ISD::SIGN_EXTEND_INREG:
  case ISD::SRA:
    if (tryV6T2BitfieldExtractOp(N, true))
      return;
    break;
  case ISD::FP_TO_UINT:
  case ISD::FP_TO_SINT:
  case ISD::FP_TO_UINT_SAT:
  case ISD::FP_TO_SINT_SAT:
    if (tryFP_TO_INT(N, dl))
      return;
    break;
  case ISD::FMUL:
    if (tryFMULFixed(N, dl))
      return;
    break;
  case ISD::MUL:
    if (Subtarget->isThumb1Only())
      break;
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
      unsigned RHSV = C->getZExtValue();
      if (!RHSV) break;
      if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
        unsigned ShImm = Log2_32(RHSV-1);
        if (ShImm >= 32)
          break;
        SDValue V = N->getOperand(0);
        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
        if (Subtarget->isThumb()) {
          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
          CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
          return;
        } else {
          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
                            Reg0 };
          CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
          return;
        }
      }
      if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
        unsigned ShImm = Log2_32(RHSV+1);
        if (ShImm >= 32)
          break;
        SDValue V = N->getOperand(0);
        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
        if (Subtarget->isThumb()) {
          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
          CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
          return;
        } else {
          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
                            Reg0 };
          CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
          return;
        }
      }
    }
    break;
  case ISD::AND: {
    // Check for unsigned bitfield extract
    if (tryV6T2BitfieldExtractOp(N, false))
      return;

    // If an immediate is used in an AND node, it is possible that the immediate
    // can be more optimally materialized when negated. If this is the case we
    // can negate the immediate and use a BIC instead.
    auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
    if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
      uint32_t Imm = (uint32_t) N1C->getZExtValue();

      // In Thumb2 mode, an AND can take a 12-bit immediate. If this
      // immediate can be negated and fit in the immediate operand of
      // a t2BIC, don't do any manual transform here as this can be
      // handled by the generic ISel machinery.
      bool PreferImmediateEncoding =
        Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
      if (!PreferImmediateEncoding &&
          ConstantMaterializationCost(Imm, Subtarget) >
              ConstantMaterializationCost(~Imm, Subtarget)) {
        // The current immediate costs more to materialize than a negated
        // immediate, so negate the immediate and use a BIC.
        SDValue NewImm =
          CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
        // If the new constant didn't exist before, reposition it in the topological
        // ordering so it is just before N. Otherwise, don't touch its location.
        if (NewImm->getNodeId() == -1)
          CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());

        if (!Subtarget->hasThumb2()) {
          SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
                           N->getOperand(0), NewImm, getAL(CurDAG, dl),
                           CurDAG->getRegister(0, MVT::i32)};
          ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
          return;
        } else {
          SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
                           CurDAG->getRegister(0, MVT::i32),
                           CurDAG->getRegister(0, MVT::i32)};
          ReplaceNode(N,
                      CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
          return;
        }
      }
    }

    // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
    // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
    // are entirely contributed by c2 and lower 16-bits are entirely contributed
    // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
    // Select it to: "movt x, ((c1 & 0xffff) >> 16)
    EVT VT = N->getValueType(0);
    if (VT != MVT::i32)
      break;
    unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
      ? ARM::t2MOVTi16
      : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
    if (!Opc)
      break;
    SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
    N1C = dyn_cast<ConstantSDNode>(N1);
    if (!N1C)
      break;
    if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
      SDValue N2 = N0.getOperand(1);
      ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
      if (!N2C)
        break;
      unsigned N1CVal = N1C->getZExtValue();
      unsigned N2CVal = N2C->getZExtValue();
      if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
          (N1CVal & 0xffffU) == 0xffffU &&
          (N2CVal & 0xffffU) == 0x0U) {
        SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
                                                  dl, MVT::i32);
        SDValue Ops[] = { N0.getOperand(0), Imm16,
                          getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
        ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
        return;
      }
    }

    break;
  }
  case ARMISD::UMAAL: {
    unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                      N->getOperand(2), N->getOperand(3),
                      getAL(CurDAG, dl),
                      CurDAG->getRegister(0, MVT::i32) };
    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
    return;
  }
  case ARMISD::UMLAL:{
    if (Subtarget->isThumb()) {
      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                        N->getOperand(3), getAL(CurDAG, dl),
                        CurDAG->getRegister(0, MVT::i32)};
      ReplaceNode(
          N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
      return;
    }else{
      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                        N->getOperand(3), getAL(CurDAG, dl),
                        CurDAG->getRegister(0, MVT::i32),
                        CurDAG->getRegister(0, MVT::i32) };
      ReplaceNode(N, CurDAG->getMachineNode(
                         Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
                         MVT::i32, MVT::i32, Ops));
      return;
    }
  }
  case ARMISD::SMLAL:{
    if (Subtarget->isThumb()) {
      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                        N->getOperand(3), getAL(CurDAG, dl),
                        CurDAG->getRegister(0, MVT::i32)};
      ReplaceNode(
          N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
      return;
    }else{
      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                        N->getOperand(3), getAL(CurDAG, dl),
                        CurDAG->getRegister(0, MVT::i32),
                        CurDAG->getRegister(0, MVT::i32) };
      ReplaceNode(N, CurDAG->getMachineNode(
                         Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
                         MVT::i32, MVT::i32, Ops));
      return;
    }
  }
  case ARMISD::SUBE: {
    if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
      break;
    // Look for a pattern to match SMMLS
    // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
    if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
        N->getOperand(2).getOpcode() != ARMISD::SUBC ||
        !SDValue(N, 1).use_empty())
      break;

    if (Subtarget->isThumb())
      assert(Subtarget->hasThumb2() &&
             "This pattern should not be generated for Thumb");

    SDValue SmulLoHi = N->getOperand(1);
    SDValue Subc = N->getOperand(2);
    auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));

    if (!Zero || Zero->getZExtValue() != 0 ||
        Subc.getOperand(1) != SmulLoHi.getValue(0) ||
        N->getOperand(1) != SmulLoHi.getValue(1) ||
        N->getOperand(2) != Subc.getValue(1))
      break;

    unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
    SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
                      N->getOperand(0), getAL(CurDAG, dl),
                      CurDAG->getRegister(0, MVT::i32) };
    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
    return;
  }
  case ISD::LOAD: {
    if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
      return;
    if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
      if (tryT2IndexedLoad(N))
        return;
    } else if (Subtarget->isThumb()) {
      if (tryT1IndexedLoad(N))
        return;
    } else if (tryARMIndexedLoad(N))
      return;
    // Other cases are autogenerated.
    break;
  }
  case ISD::MLOAD:
    if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
      return;
    // Other cases are autogenerated.
    break;
  case ARMISD::WLSSETUP: {
    SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
                                         N->getOperand(0));
    ReplaceUses(N, New);
    CurDAG->RemoveDeadNode(N);
    return;
  }
  case ARMISD::WLS: {
    SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
                                         N->getOperand(1), N->getOperand(2),
                                         N->getOperand(0));
    ReplaceUses(N, New);
    CurDAG->RemoveDeadNode(N);
    return;
  }
  case ARMISD::LE: {
    SDValue Ops[] = { N->getOperand(1),
                      N->getOperand(2),
                      N->getOperand(0) };
    unsigned Opc = ARM::t2LoopEnd;
    SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
    ReplaceUses(N, New);
    CurDAG->RemoveDeadNode(N);
    return;
  }
  case ARMISD::LDRD: {
    if (Subtarget->isThumb2())
      break; // TableGen handles isel in this case.
    SDValue Base, RegOffset, ImmOffset;
    const SDValue &Chain = N->getOperand(0);
    const SDValue &Addr = N->getOperand(1);
    SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
    if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
      // The register-offset variant of LDRD mandates that the register
      // allocated to RegOffset is not reused in any of the remaining operands.
      // This restriction is currently not enforced. Therefore emitting this
      // variant is explicitly avoided.
      Base = Addr;
      RegOffset = CurDAG->getRegister(0, MVT::i32);
    }
    SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
    SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
                                         {MVT::Untyped, MVT::Other}, Ops);
    SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
                                                SDValue(New, 0));
    SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
                                                SDValue(New, 0));
    transferMemOperands(N, New);
    ReplaceUses(SDValue(N, 0), Lo);
    ReplaceUses(SDValue(N, 1), Hi);
    ReplaceUses(SDValue(N, 2), SDValue(New, 1));
    CurDAG->RemoveDeadNode(N);
    return;
  }
  case ARMISD::STRD: {
    if (Subtarget->isThumb2())
      break; // TableGen handles isel in this case.
    SDValue Base, RegOffset, ImmOffset;
    const SDValue &Chain = N->getOperand(0);
    const SDValue &Addr = N->getOperand(3);
    SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
    if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
      // The register-offset variant of STRD mandates that the register
      // allocated to RegOffset is not reused in any of the remaining operands.
      // This restriction is currently not enforced. Therefore emitting this
      // variant is explicitly avoided.
      Base = Addr;
      RegOffset = CurDAG->getRegister(0, MVT::i32);
    }
    SDNode *RegPair =
        createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
    SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
    SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
    transferMemOperands(N, New);
    ReplaceUses(SDValue(N, 0), SDValue(New, 0));
    CurDAG->RemoveDeadNode(N);
    return;
  }
  case ARMISD::LOOP_DEC: {
    SDValue Ops[] = { N->getOperand(1),
                      N->getOperand(2),
                      N->getOperand(0) };
    SDNode *Dec =
      CurDAG->getMachineNode(ARM::t2LoopDec, dl,
                             CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
    ReplaceUses(N, Dec);
    CurDAG->RemoveDeadNode(N);
    return;
  }
  case ARMISD::BRCOND: {
    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
    // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
    // Pattern complexity = 6  cost = 1  size = 0

    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
    // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
    // Pattern complexity = 6  cost = 1  size = 0

    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
    // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
    // Pattern complexity = 6  cost = 1  size = 0

    unsigned Opc = Subtarget->isThumb() ?
      ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
    SDValue Chain = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
    SDValue N2 = N->getOperand(2);
    SDValue N3 = N->getOperand(3);
    SDValue InFlag = N->getOperand(4);
    assert(N1.getOpcode() == ISD::BasicBlock);
    assert(N2.getOpcode() == ISD::Constant);
    assert(N3.getOpcode() == ISD::Register);

    unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();

    if (InFlag.getOpcode() == ARMISD::CMPZ) {
      if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
        SDValue Int = InFlag.getOperand(0);
        uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();

        // Handle low-overhead loops.
        if (ID == Intrinsic::loop_decrement_reg) {
          SDValue Elements = Int.getOperand(2);
          SDValue Size = CurDAG->getTargetConstant(
            cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
                                 MVT::i32);

          SDValue Args[] = { Elements, Size, Int.getOperand(0) };
          SDNode *LoopDec =
            CurDAG->getMachineNode(ARM::t2LoopDec, dl,
                                   CurDAG->getVTList(MVT::i32, MVT::Other),
                                   Args);
          ReplaceUses(Int.getNode(), LoopDec);

          SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
          SDNode *LoopEnd =
            CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);

          ReplaceUses(N, LoopEnd);
          CurDAG->RemoveDeadNode(N);
          CurDAG->RemoveDeadNode(InFlag.getNode());
          CurDAG->RemoveDeadNode(Int.getNode());
          return;
        }
      }

      bool SwitchEQNEToPLMI;
      SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
      InFlag = N->getOperand(4);

      if (SwitchEQNEToPLMI) {
        switch ((ARMCC::CondCodes)CC) {
        default: llvm_unreachable("CMPZ must be either NE or EQ!");
        case ARMCC::NE:
          CC = (unsigned)ARMCC::MI;
          break;
        case ARMCC::EQ:
          CC = (unsigned)ARMCC::PL;
          break;
        }
      }
    }

    SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
    SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
    SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
                                             MVT::Glue, Ops);
    Chain = SDValue(ResNode, 0);
    if (N->getNumValues() == 2) {
      InFlag = SDValue(ResNode, 1);
      ReplaceUses(SDValue(N, 1), InFlag);
    }
    ReplaceUses(SDValue(N, 0),
                SDValue(Chain.getNode(), Chain.getResNo()));
    CurDAG->RemoveDeadNode(N);
    return;
  }

  case ARMISD::CMPZ: {
    // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
    //   This allows us to avoid materializing the expensive negative constant.
    //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
    //   for its glue output.
    SDValue X = N->getOperand(0);
    auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
    if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
      int64_t Addend = -C->getSExtValue();

      SDNode *Add = nullptr;
      // ADDS can be better than CMN if the immediate fits in a
      // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
      // Outside that range we can just use a CMN which is 32-bit but has a
      // 12-bit immediate range.
      if (Addend < 1<<8) {
        if (Subtarget->isThumb2()) {
          SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
                            CurDAG->getRegister(0, MVT::i32) };
          Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
        } else {
          unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
          SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
                           CurDAG->getTargetConstant(Addend, dl, MVT::i32),
                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
          Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
        }
      }
      if (Add) {
        SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
        CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
      }
    }
    // Other cases are autogenerated.
    break;
  }

  case ARMISD::CMOV: {
    SDValue InFlag = N->getOperand(4);

    if (InFlag.getOpcode() == ARMISD::CMPZ) {
      bool SwitchEQNEToPLMI;
      SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);

      if (SwitchEQNEToPLMI) {
        SDValue ARMcc = N->getOperand(2);
        ARMCC::CondCodes CC =
          (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();

        switch (CC) {
        default: llvm_unreachable("CMPZ must be either NE or EQ!");
        case ARMCC::NE:
          CC = ARMCC::MI;
          break;
        case ARMCC::EQ:
          CC = ARMCC::PL;
          break;
        }
        SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
        SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
                         N->getOperand(3), N->getOperand(4)};
        CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
      }

    }
    // Other cases are autogenerated.
    break;
  }

  case ARMISD::VZIP: {
    unsigned Opc = 0;
    EVT VT = N->getValueType(0);
    switch (VT.getSimpleVT().SimpleTy) {
    default: return;
    case MVT::v8i8:  Opc = ARM::VZIPd8; break;
    case MVT::v4f16:
    case MVT::v4i16: Opc = ARM::VZIPd16; break;
    case MVT::v2f32:
    // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
    case MVT::v2i32: Opc = ARM::VTRNd32; break;
    case MVT::v16i8: Opc = ARM::VZIPq8; break;
    case MVT::v8f16:
    case MVT::v8i16: Opc = ARM::VZIPq16; break;
    case MVT::v4f32:
    case MVT::v4i32: Opc = ARM::VZIPq32; break;
    }
    SDValue Pred = getAL(CurDAG, dl);
    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
    return;
  }
  case ARMISD::VUZP: {
    unsigned Opc = 0;
    EVT VT = N->getValueType(0);
    switch (VT.getSimpleVT().SimpleTy) {
    default: return;
    case MVT::v8i8:  Opc = ARM::VUZPd8; break;
    case MVT::v4f16:
    case MVT::v4i16: Opc = ARM::VUZPd16; break;
    case MVT::v2f32:
    // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
    case MVT::v2i32: Opc = ARM::VTRNd32; break;
    case MVT::v16i8: Opc = ARM::VUZPq8; break;
    case MVT::v8f16:
    case MVT::v8i16: Opc = ARM::VUZPq16; break;
    case MVT::v4f32:
    case MVT::v4i32: Opc = ARM::VUZPq32; break;
    }
    SDValue Pred = getAL(CurDAG, dl);
    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
    return;
  }
  case ARMISD::VTRN: {
    unsigned Opc = 0;
    EVT VT = N->getValueType(0);
    switch (VT.getSimpleVT().SimpleTy) {
    default: return;
    case MVT::v8i8:  Opc = ARM::VTRNd8; break;
    case MVT::v4f16:
    case MVT::v4i16: Opc = ARM::VTRNd16; break;
    case MVT::v2f32:
    case MVT::v2i32: Opc = ARM::VTRNd32; break;
    case MVT::v16i8: Opc = ARM::VTRNq8; break;
    case MVT::v8f16:
    case MVT::v8i16: Opc = ARM::VTRNq16; break;
    case MVT::v4f32:
    case MVT::v4i32: Opc = ARM::VTRNq32; break;
    }
    SDValue Pred = getAL(CurDAG, dl);
    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
    return;
  }
  case ARMISD::BUILD_VECTOR: {
    EVT VecVT = N->getValueType(0);
    EVT EltVT = VecVT.getVectorElementType();
    unsigned NumElts = VecVT.getVectorNumElements();
    if (EltVT == MVT::f64) {
      assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
      ReplaceNode(
          N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
      return;
    }
    assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
    if (NumElts == 2) {
      ReplaceNode(
          N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
      return;
    }
    assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
    ReplaceNode(N,
                createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
                                    N->getOperand(2), N->getOperand(3)));
    return;
  }

  case ARMISD::VLD1DUP: {
    static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
                                         ARM::VLD1DUPd32 };
    static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
                                         ARM::VLD1DUPq32 };
    SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
    return;
  }

  case ARMISD::VLD2DUP: {
    static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
                                        ARM::VLD2DUPd32 };
    SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
    return;
  }

  case ARMISD::VLD3DUP: {
    static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
                                        ARM::VLD3DUPd16Pseudo,
                                        ARM::VLD3DUPd32Pseudo };
    SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
    return;
  }

  case ARMISD::VLD4DUP: {
    static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
                                        ARM::VLD4DUPd16Pseudo,
                                        ARM::VLD4DUPd32Pseudo };
    SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
    return;
  }

  case ARMISD::VLD1DUP_UPD: {
    static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
                                         ARM::VLD1DUPd16wb_fixed,
                                         ARM::VLD1DUPd32wb_fixed };
    static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
                                         ARM::VLD1DUPq16wb_fixed,
                                         ARM::VLD1DUPq32wb_fixed };
    SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
    return;
  }

  case ARMISD::VLD2DUP_UPD: {
    static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
                                         ARM::VLD2DUPd16wb_fixed,
                                         ARM::VLD2DUPd32wb_fixed,
                                         ARM::VLD1q64wb_fixed };
    static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
                                          ARM::VLD2DUPq16EvenPseudo,
                                          ARM::VLD2DUPq32EvenPseudo };
    static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
                                          ARM::VLD2DUPq16OddPseudoWB_fixed,
                                          ARM::VLD2DUPq32OddPseudoWB_fixed };
    SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
    return;
  }

  case ARMISD::VLD3DUP_UPD: {
    static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
                                         ARM::VLD3DUPd16Pseudo_UPD,
                                         ARM::VLD3DUPd32Pseudo_UPD,
                                         ARM::VLD1d64TPseudoWB_fixed };
    static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
                                          ARM::VLD3DUPq16EvenPseudo,
                                          ARM::VLD3DUPq32EvenPseudo };
    static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
                                          ARM::VLD3DUPq16OddPseudo_UPD,
                                          ARM::VLD3DUPq32OddPseudo_UPD };
    SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
    return;
  }

  case ARMISD::VLD4DUP_UPD: {
    static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
                                         ARM::VLD4DUPd16Pseudo_UPD,
                                         ARM::VLD4DUPd32Pseudo_UPD,
                                         ARM::VLD1d64QPseudoWB_fixed };
    static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
                                          ARM::VLD4DUPq16EvenPseudo,
                                          ARM::VLD4DUPq32EvenPseudo };
    static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
                                          ARM::VLD4DUPq16OddPseudo_UPD,
                                          ARM::VLD4DUPq32OddPseudo_UPD };
    SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
    return;
  }

  case ARMISD::VLD1_UPD: {
    static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
                                         ARM::VLD1d16wb_fixed,
                                         ARM::VLD1d32wb_fixed,
                                         ARM::VLD1d64wb_fixed };
    static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
                                         ARM::VLD1q16wb_fixed,
                                         ARM::VLD1q32wb_fixed,
                                         ARM::VLD1q64wb_fixed };
    SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
    return;
  }

  case ARMISD::VLD2_UPD: {
    if (Subtarget->hasNEON()) {
      static const uint16_t DOpcodes[] = {
          ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
          ARM::VLD1q64wb_fixed};
      static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
                                          ARM::VLD2q16PseudoWB_fixed,
                                          ARM::VLD2q32PseudoWB_fixed};
      SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
    } else {
      static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
                                          ARM::MVE_VLD21_8_wb};
      static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
                                           ARM::MVE_VLD21_16_wb};
      static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
                                           ARM::MVE_VLD21_32_wb};
      static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
      SelectMVE_VLD(N, 2, Opcodes, true);
    }
    return;
  }

  case ARMISD::VLD3_UPD: {
    static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
                                         ARM::VLD3d16Pseudo_UPD,
                                         ARM::VLD3d32Pseudo_UPD,
                                         ARM::VLD1d64TPseudoWB_fixed};
    static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
                                          ARM::VLD3q16Pseudo_UPD,
                                          ARM::VLD3q32Pseudo_UPD };
    static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
                                          ARM::VLD3q16oddPseudo_UPD,
                                          ARM::VLD3q32oddPseudo_UPD };
    SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
    return;
  }

  case ARMISD::VLD4_UPD: {
    if (Subtarget->hasNEON()) {
      static const uint16_t DOpcodes[] = {
          ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
          ARM::VLD1d64QPseudoWB_fixed};
      static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
                                           ARM::VLD4q16Pseudo_UPD,
                                           ARM::VLD4q32Pseudo_UPD};
      static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
                                           ARM::VLD4q16oddPseudo_UPD,
                                           ARM::VLD4q32oddPseudo_UPD};
      SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
    } else {
      static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
                                          ARM::MVE_VLD42_8,
                                          ARM::MVE_VLD43_8_wb};
      static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
                                           ARM::MVE_VLD42_16,
                                           ARM::MVE_VLD43_16_wb};
      static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
                                           ARM::MVE_VLD42_32,
                                           ARM::MVE_VLD43_32_wb};
      static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
      SelectMVE_VLD(N, 4, Opcodes, true);
    }
    return;
  }

  case ARMISD::VLD1x2_UPD: {
    if (Subtarget->hasNEON()) {
      static const uint16_t DOpcodes[] = {
          ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
          ARM::VLD1q64wb_fixed};
      static const uint16_t QOpcodes[] = {
          ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
          ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
      SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
      return;
    }
    break;
  }

  case ARMISD::VLD1x3_UPD: {
    if (Subtarget->hasNEON()) {
      static const uint16_t DOpcodes[] = {
          ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
          ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
      static const uint16_t QOpcodes0[] = {
          ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
          ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
      static const uint16_t QOpcodes1[] = {
          ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
          ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
      SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }
    break;
  }

  case ARMISD::VLD1x4_UPD: {
    if (Subtarget->hasNEON()) {
      static const uint16_t DOpcodes[] = {
          ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
          ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
      static const uint16_t QOpcodes0[] = {
          ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
          ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
      static const uint16_t QOpcodes1[] = {
          ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
          ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
      SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }
    break;
  }

  case ARMISD::VLD2LN_UPD: {
    static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
                                         ARM::VLD2LNd16Pseudo_UPD,
                                         ARM::VLD2LNd32Pseudo_UPD };
    static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
                                         ARM::VLD2LNq32Pseudo_UPD };
    SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
    return;
  }

  case ARMISD::VLD3LN_UPD: {
    static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
                                         ARM::VLD3LNd16Pseudo_UPD,
                                         ARM::VLD3LNd32Pseudo_UPD };
    static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
                                         ARM::VLD3LNq32Pseudo_UPD };
    SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
    return;
  }

  case ARMISD::VLD4LN_UPD: {
    static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
                                         ARM::VLD4LNd16Pseudo_UPD,
                                         ARM::VLD4LNd32Pseudo_UPD };
    static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
                                         ARM::VLD4LNq32Pseudo_UPD };
    SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
    return;
  }

  case ARMISD::VST1_UPD: {
    static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
                                         ARM::VST1d16wb_fixed,
                                         ARM::VST1d32wb_fixed,
                                         ARM::VST1d64wb_fixed };
    static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
                                         ARM::VST1q16wb_fixed,
                                         ARM::VST1q32wb_fixed,
                                         ARM::VST1q64wb_fixed };
    SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
    return;
  }

  case ARMISD::VST2_UPD: {
    if (Subtarget->hasNEON()) {
      static const uint16_t DOpcodes[] = {
          ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
          ARM::VST1q64wb_fixed};
      static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
                                          ARM::VST2q16PseudoWB_fixed,
                                          ARM::VST2q32PseudoWB_fixed};
      SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
      return;
    }
    break;
  }

  case ARMISD::VST3_UPD: {
    static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
                                         ARM::VST3d16Pseudo_UPD,
                                         ARM::VST3d32Pseudo_UPD,
                                         ARM::VST1d64TPseudoWB_fixed};
    static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
                                          ARM::VST3q16Pseudo_UPD,
                                          ARM::VST3q32Pseudo_UPD };
    static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
                                          ARM::VST3q16oddPseudo_UPD,
                                          ARM::VST3q32oddPseudo_UPD };
    SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
    return;
  }

  case ARMISD::VST4_UPD: {
    if (Subtarget->hasNEON()) {
      static const uint16_t DOpcodes[] = {
          ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
          ARM::VST1d64QPseudoWB_fixed};
      static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
                                           ARM::VST4q16Pseudo_UPD,
                                           ARM::VST4q32Pseudo_UPD};
      static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
                                           ARM::VST4q16oddPseudo_UPD,
                                           ARM::VST4q32oddPseudo_UPD};
      SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }
    break;
  }

  case ARMISD::VST1x2_UPD: {
    if (Subtarget->hasNEON()) {
      static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
                                           ARM::VST1q16wb_fixed,
                                           ARM::VST1q32wb_fixed,
                                           ARM::VST1q64wb_fixed};
      static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
                                           ARM::VST1d16QPseudoWB_fixed,
                                           ARM::VST1d32QPseudoWB_fixed,
                                           ARM::VST1d64QPseudoWB_fixed };
      SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
      return;
    }
    break;
  }

  case ARMISD::VST1x3_UPD: {
    if (Subtarget->hasNEON()) {
      static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
                                           ARM::VST1d16TPseudoWB_fixed,
                                           ARM::VST1d32TPseudoWB_fixed,
                                           ARM::VST1d64TPseudoWB_fixed };
      static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
                                            ARM::VST1q16LowTPseudo_UPD,
                                            ARM::VST1q32LowTPseudo_UPD,
                                            ARM::VST1q64LowTPseudo_UPD };
      static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
                                            ARM::VST1q16HighTPseudo_UPD,
                                            ARM::VST1q32HighTPseudo_UPD,
                                            ARM::VST1q64HighTPseudo_UPD };
      SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }
    break;
  }

  case ARMISD::VST1x4_UPD: {
    if (Subtarget->hasNEON()) {
      static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
                                           ARM::VST1d16QPseudoWB_fixed,
                                           ARM::VST1d32QPseudoWB_fixed,
                                           ARM::VST1d64QPseudoWB_fixed };
      static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
                                            ARM::VST1q16LowQPseudo_UPD,
                                            ARM::VST1q32LowQPseudo_UPD,
                                            ARM::VST1q64LowQPseudo_UPD };
      static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
                                            ARM::VST1q16HighQPseudo_UPD,
                                            ARM::VST1q32HighQPseudo_UPD,
                                            ARM::VST1q64HighQPseudo_UPD };
      SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }
    break;
  }
  case ARMISD::VST2LN_UPD: {
    static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
                                         ARM::VST2LNd16Pseudo_UPD,
                                         ARM::VST2LNd32Pseudo_UPD };
    static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
                                         ARM::VST2LNq32Pseudo_UPD };
    SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
    return;
  }

  case ARMISD::VST3LN_UPD: {
    static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
                                         ARM::VST3LNd16Pseudo_UPD,
                                         ARM::VST3LNd32Pseudo_UPD };
    static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
                                         ARM::VST3LNq32Pseudo_UPD };
    SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
    return;
  }

  case ARMISD::VST4LN_UPD: {
    static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
                                         ARM::VST4LNd16Pseudo_UPD,
                                         ARM::VST4LNd32Pseudo_UPD };
    static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
                                         ARM::VST4LNq32Pseudo_UPD };
    SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
    return;
  }

  case ISD::INTRINSIC_VOID:
  case ISD::INTRINSIC_W_CHAIN: {
    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
    switch (IntNo) {
    default:
      break;

    case Intrinsic::arm_mrrc:
    case Intrinsic::arm_mrrc2: {
      SDLoc dl(N);
      SDValue Chain = N->getOperand(0);
      unsigned Opc;

      if (Subtarget->isThumb())
        Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
      else
        Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);

      SmallVector<SDValue, 5> Ops;
      Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
      Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
      Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */

      // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
      // instruction will always be '1111' but it is possible in assembly language to specify
      // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
      if (Opc != ARM::MRRC2) {
        Ops.push_back(getAL(CurDAG, dl));
        Ops.push_back(CurDAG->getRegister(0, MVT::i32));
      }

      Ops.push_back(Chain);

      // Writes to two registers.
      const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};

      ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
      return;
    }
    case Intrinsic::arm_ldaexd:
    case Intrinsic::arm_ldrexd: {
      SDLoc dl(N);
      SDValue Chain = N->getOperand(0);
      SDValue MemAddr = N->getOperand(2);
      bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();

      bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
      unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
                                : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);

      // arm_ldrexd returns a i64 value in {i32, i32}
      std::vector<EVT> ResTys;
      if (isThumb) {
        ResTys.push_back(MVT::i32);
        ResTys.push_back(MVT::i32);
      } else
        ResTys.push_back(MVT::Untyped);
      ResTys.push_back(MVT::Other);

      // Place arguments in the right order.
      SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
                       CurDAG->getRegister(0, MVT::i32), Chain};
      SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
      // Transfer memoperands.
      MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
      CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});

      // Remap uses.
      SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
      if (!SDValue(N, 0).use_empty()) {
        SDValue Result;
        if (isThumb)
          Result = SDValue(Ld, 0);
        else {
          SDValue SubRegIdx =
            CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
          Result = SDValue(ResNode,0);
        }
        ReplaceUses(SDValue(N, 0), Result);
      }
      if (!SDValue(N, 1).use_empty()) {
        SDValue Result;
        if (isThumb)
          Result = SDValue(Ld, 1);
        else {
          SDValue SubRegIdx =
            CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
          Result = SDValue(ResNode,0);
        }
        ReplaceUses(SDValue(N, 1), Result);
      }
      ReplaceUses(SDValue(N, 2), OutChain);
      CurDAG->RemoveDeadNode(N);
      return;
    }
    case Intrinsic::arm_stlexd:
    case Intrinsic::arm_strexd: {
      SDLoc dl(N);
      SDValue Chain = N->getOperand(0);
      SDValue Val0 = N->getOperand(2);
      SDValue Val1 = N->getOperand(3);
      SDValue MemAddr = N->getOperand(4);

      // Store exclusive double return a i32 value which is the return status
      // of the issued store.
      const EVT ResTys[] = {MVT::i32, MVT::Other};

      bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
      // Place arguments in the right order.
      SmallVector<SDValue, 7> Ops;
      if (isThumb) {
        Ops.push_back(Val0);
        Ops.push_back(Val1);
      } else
        // arm_strexd uses GPRPair.
        Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
      Ops.push_back(MemAddr);
      Ops.push_back(getAL(CurDAG, dl));
      Ops.push_back(CurDAG->getRegister(0, MVT::i32));
      Ops.push_back(Chain);

      bool IsRelease = IntNo == Intrinsic::arm_stlexd;
      unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
                                : (IsRelease ? ARM::STLEXD : ARM::STREXD);

      SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
      // Transfer memoperands.
      MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
      CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});

      ReplaceNode(N, St);
      return;
    }

    case Intrinsic::arm_neon_vld1: {
      static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
                                           ARM::VLD1d32, ARM::VLD1d64 };
      static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
                                           ARM::VLD1q32, ARM::VLD1q64};
      SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
      return;
    }

    case Intrinsic::arm_neon_vld1x2: {
      static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
                                           ARM::VLD1q32, ARM::VLD1q64 };
      static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
                                           ARM::VLD1d16QPseudo,
                                           ARM::VLD1d32QPseudo,
                                           ARM::VLD1d64QPseudo };
      SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
      return;
    }

    case Intrinsic::arm_neon_vld1x3: {
      static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
                                           ARM::VLD1d16TPseudo,
                                           ARM::VLD1d32TPseudo,
                                           ARM::VLD1d64TPseudo };
      static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
                                            ARM::VLD1q16LowTPseudo_UPD,
                                            ARM::VLD1q32LowTPseudo_UPD,
                                            ARM::VLD1q64LowTPseudo_UPD };
      static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
                                            ARM::VLD1q16HighTPseudo,
                                            ARM::VLD1q32HighTPseudo,
                                            ARM::VLD1q64HighTPseudo };
      SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }

    case Intrinsic::arm_neon_vld1x4: {
      static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
                                           ARM::VLD1d16QPseudo,
                                           ARM::VLD1d32QPseudo,
                                           ARM::VLD1d64QPseudo };
      static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
                                            ARM::VLD1q16LowQPseudo_UPD,
                                            ARM::VLD1q32LowQPseudo_UPD,
                                            ARM::VLD1q64LowQPseudo_UPD };
      static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
                                            ARM::VLD1q16HighQPseudo,
                                            ARM::VLD1q32HighQPseudo,
                                            ARM::VLD1q64HighQPseudo };
      SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }

    case Intrinsic::arm_neon_vld2: {
      static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
                                           ARM::VLD2d32, ARM::VLD1q64 };
      static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
                                           ARM::VLD2q32Pseudo };
      SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
      return;
    }

    case Intrinsic::arm_neon_vld3: {
      static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
                                           ARM::VLD3d16Pseudo,
                                           ARM::VLD3d32Pseudo,
                                           ARM::VLD1d64TPseudo };
      static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
                                            ARM::VLD3q16Pseudo_UPD,
                                            ARM::VLD3q32Pseudo_UPD };
      static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
                                            ARM::VLD3q16oddPseudo,
                                            ARM::VLD3q32oddPseudo };
      SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }

    case Intrinsic::arm_neon_vld4: {
      static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
                                           ARM::VLD4d16Pseudo,
                                           ARM::VLD4d32Pseudo,
                                           ARM::VLD1d64QPseudo };
      static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
                                            ARM::VLD4q16Pseudo_UPD,
                                            ARM::VLD4q32Pseudo_UPD };
      static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
                                            ARM::VLD4q16oddPseudo,
                                            ARM::VLD4q32oddPseudo };
      SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }

    case Intrinsic::arm_neon_vld2dup: {
      static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
                                           ARM::VLD2DUPd32, ARM::VLD1q64 };
      static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
                                            ARM::VLD2DUPq16EvenPseudo,
                                            ARM::VLD2DUPq32EvenPseudo };
      static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
                                            ARM::VLD2DUPq16OddPseudo,
                                            ARM::VLD2DUPq32OddPseudo };
      SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
                   DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }

    case Intrinsic::arm_neon_vld3dup: {
      static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
                                           ARM::VLD3DUPd16Pseudo,
                                           ARM::VLD3DUPd32Pseudo,
                                           ARM::VLD1d64TPseudo };
      static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
                                            ARM::VLD3DUPq16EvenPseudo,
                                            ARM::VLD3DUPq32EvenPseudo };
      static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
                                            ARM::VLD3DUPq16OddPseudo,
                                            ARM::VLD3DUPq32OddPseudo };
      SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
                   DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }

    case Intrinsic::arm_neon_vld4dup: {
      static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
                                           ARM::VLD4DUPd16Pseudo,
                                           ARM::VLD4DUPd32Pseudo,
                                           ARM::VLD1d64QPseudo };
      static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
                                            ARM::VLD4DUPq16EvenPseudo,
                                            ARM::VLD4DUPq32EvenPseudo };
      static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
                                            ARM::VLD4DUPq16OddPseudo,
                                            ARM::VLD4DUPq32OddPseudo };
      SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
                   DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }

    case Intrinsic::arm_neon_vld2lane: {
      static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
                                           ARM::VLD2LNd16Pseudo,
                                           ARM::VLD2LNd32Pseudo };
      static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
                                           ARM::VLD2LNq32Pseudo };
      SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
      return;
    }

    case Intrinsic::arm_neon_vld3lane: {
      static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
                                           ARM::VLD3LNd16Pseudo,
                                           ARM::VLD3LNd32Pseudo };
      static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
                                           ARM::VLD3LNq32Pseudo };
      SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
      return;
    }

    case Intrinsic::arm_neon_vld4lane: {
      static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
                                           ARM::VLD4LNd16Pseudo,
                                           ARM::VLD4LNd32Pseudo };
      static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
                                           ARM::VLD4LNq32Pseudo };
      SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
      return;
    }

    case Intrinsic::arm_neon_vst1: {
      static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
                                           ARM::VST1d32, ARM::VST1d64 };
      static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
                                           ARM::VST1q32, ARM::VST1q64 };
      SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
      return;
    }

    case Intrinsic::arm_neon_vst1x2: {
      static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
                                           ARM::VST1q32, ARM::VST1q64 };
      static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
                                           ARM::VST1d16QPseudo,
                                           ARM::VST1d32QPseudo,
                                           ARM::VST1d64QPseudo };
      SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
      return;
    }

    case Intrinsic::arm_neon_vst1x3: {
      static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
                                           ARM::VST1d16TPseudo,
                                           ARM::VST1d32TPseudo,
                                           ARM::VST1d64TPseudo };
      static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
                                            ARM::VST1q16LowTPseudo_UPD,
                                            ARM::VST1q32LowTPseudo_UPD,
                                            ARM::VST1q64LowTPseudo_UPD };
      static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
                                            ARM::VST1q16HighTPseudo,
                                            ARM::VST1q32HighTPseudo,
                                            ARM::VST1q64HighTPseudo };
      SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }

    case Intrinsic::arm_neon_vst1x4: {
      static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
                                           ARM::VST1d16QPseudo,
                                           ARM::VST1d32QPseudo,
                                           ARM::VST1d64QPseudo };
      static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
                                            ARM::VST1q16LowQPseudo_UPD,
                                            ARM::VST1q32LowQPseudo_UPD,
                                            ARM::VST1q64LowQPseudo_UPD };
      static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
                                            ARM::VST1q16HighQPseudo,
                                            ARM::VST1q32HighQPseudo,
                                            ARM::VST1q64HighQPseudo };
      SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }

    case Intrinsic::arm_neon_vst2: {
      static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
                                           ARM::VST2d32, ARM::VST1q64 };
      static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
                                           ARM::VST2q32Pseudo };
      SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
      return;
    }

    case Intrinsic::arm_neon_vst3: {
      static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
                                           ARM::VST3d16Pseudo,
                                           ARM::VST3d32Pseudo,
                                           ARM::VST1d64TPseudo };
      static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
                                            ARM::VST3q16Pseudo_UPD,
                                            ARM::VST3q32Pseudo_UPD };
      static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
                                            ARM::VST3q16oddPseudo,
                                            ARM::VST3q32oddPseudo };
      SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }

    case Intrinsic::arm_neon_vst4: {
      static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
                                           ARM::VST4d16Pseudo,
                                           ARM::VST4d32Pseudo,
                                           ARM::VST1d64QPseudo };
      static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
                                            ARM::VST4q16Pseudo_UPD,
                                            ARM::VST4q32Pseudo_UPD };
      static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
                                            ARM::VST4q16oddPseudo,
                                            ARM::VST4q32oddPseudo };
      SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
      return;
    }

    case Intrinsic::arm_neon_vst2lane: {
      static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
                                           ARM::VST2LNd16Pseudo,
                                           ARM::VST2LNd32Pseudo };
      static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
                                           ARM::VST2LNq32Pseudo };
      SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
      return;
    }

    case Intrinsic::arm_neon_vst3lane: {
      static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
                                           ARM::VST3LNd16Pseudo,
                                           ARM::VST3LNd32Pseudo };
      static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
                                           ARM::VST3LNq32Pseudo };
      SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
      return;
    }

    case Intrinsic::arm_neon_vst4lane: {
      static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
                                           ARM::VST4LNd16Pseudo,
                                           ARM::VST4LNd32Pseudo };
      static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
                                           ARM::VST4LNq32Pseudo };
      SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
      return;
    }

    case Intrinsic::arm_mve_vldr_gather_base_wb:
    case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
      static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
                                         ARM::MVE_VLDRDU64_qi_pre};
      SelectMVE_WB(N, Opcodes,
                   IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
      return;
    }

    case Intrinsic::arm_mve_vld2q: {
      static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
      static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
                                           ARM::MVE_VLD21_16};
      static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
                                           ARM::MVE_VLD21_32};
      static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
      SelectMVE_VLD(N, 2, Opcodes, false);
      return;
    }

    case Intrinsic::arm_mve_vld4q: {
      static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
                                          ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
      static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
                                           ARM::MVE_VLD42_16,
                                           ARM::MVE_VLD43_16};
      static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
                                           ARM::MVE_VLD42_32,
                                           ARM::MVE_VLD43_32};
      static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
      SelectMVE_VLD(N, 4, Opcodes, false);
      return;
    }
    }
    break;
  }

  case ISD::INTRINSIC_WO_CHAIN: {
    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
    switch (IntNo) {
    default:
      break;

    // Scalar f32 -> bf16
    case Intrinsic::arm_neon_vcvtbfp2bf: {
      SDLoc dl(N);
      const SDValue &Src = N->getOperand(1);
      llvm::EVT DestTy = N->getValueType(0);
      SDValue Pred = getAL(CurDAG, dl);
      SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
      SDValue Ops[] = { Src, Src, Pred, Reg0 };
      CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
      return;
    }

    // Vector v4f32 -> v4bf16
    case Intrinsic::arm_neon_vcvtfp2bf: {
      SDLoc dl(N);
      const SDValue &Src = N->getOperand(1);
      SDValue Pred = getAL(CurDAG, dl);
      SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
      SDValue Ops[] = { Src, Pred, Reg0 };
      CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
      return;
    }

    case Intrinsic::arm_mve_urshrl:
      SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
      return;
    case Intrinsic::arm_mve_uqshll:
      SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
      return;
    case Intrinsic::arm_mve_srshrl:
      SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
      return;
    case Intrinsic::arm_mve_sqshll:
      SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
      return;
    case Intrinsic::arm_mve_uqrshll:
      SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
      return;
    case Intrinsic::arm_mve_sqrshrl:
      SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
      return;

    case Intrinsic::arm_mve_vadc:
    case Intrinsic::arm_mve_vadc_predicated:
      SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
                        IntNo == Intrinsic::arm_mve_vadc_predicated);
      return;
    case Intrinsic::arm_mve_vsbc:
    case Intrinsic::arm_mve_vsbc_predicated:
      SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
                        IntNo == Intrinsic::arm_mve_vsbc_predicated);
      return;
    case Intrinsic::arm_mve_vshlc:
    case Intrinsic::arm_mve_vshlc_predicated:
      SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
      return;

    case Intrinsic::arm_mve_vmlldava:
    case Intrinsic::arm_mve_vmlldava_predicated: {
      static const uint16_t OpcodesU[] = {
          ARM::MVE_VMLALDAVu16,   ARM::MVE_VMLALDAVu32,
          ARM::MVE_VMLALDAVau16,  ARM::MVE_VMLALDAVau32,
      };
      static const uint16_t OpcodesS[] = {
          ARM::MVE_VMLALDAVs16,   ARM::MVE_VMLALDAVs32,
          ARM::MVE_VMLALDAVas16,  ARM::MVE_VMLALDAVas32,
          ARM::MVE_VMLALDAVxs16,  ARM::MVE_VMLALDAVxs32,
          ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
          ARM::MVE_VMLSLDAVs16,   ARM::MVE_VMLSLDAVs32,
          ARM::MVE_VMLSLDAVas16,  ARM::MVE_VMLSLDAVas32,
          ARM::MVE_VMLSLDAVxs16,  ARM::MVE_VMLSLDAVxs32,
          ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
      };
      SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
                        OpcodesS, OpcodesU);
      return;
    }

    case Intrinsic::arm_mve_vrmlldavha:
    case Intrinsic::arm_mve_vrmlldavha_predicated: {
      static const uint16_t OpcodesU[] = {
          ARM::MVE_VRMLALDAVHu32,  ARM::MVE_VRMLALDAVHau32,
      };
      static const uint16_t OpcodesS[] = {
          ARM::MVE_VRMLALDAVHs32,  ARM::MVE_VRMLALDAVHas32,
          ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
          ARM::MVE_VRMLSLDAVHs32,  ARM::MVE_VRMLSLDAVHas32,
          ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
      };
      SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
                          OpcodesS, OpcodesU);
      return;
    }

    case Intrinsic::arm_mve_vidup:
    case Intrinsic::arm_mve_vidup_predicated: {
      static const uint16_t Opcodes[] = {
          ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
      };
      SelectMVE_VxDUP(N, Opcodes, false,
                      IntNo == Intrinsic::arm_mve_vidup_predicated);
      return;
    }

    case Intrinsic::arm_mve_vddup:
    case Intrinsic::arm_mve_vddup_predicated: {
      static const uint16_t Opcodes[] = {
          ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
      };
      SelectMVE_VxDUP(N, Opcodes, false,
                      IntNo == Intrinsic::arm_mve_vddup_predicated);
      return;
    }

    case Intrinsic::arm_mve_viwdup:
    case Intrinsic::arm_mve_viwdup_predicated: {
      static const uint16_t Opcodes[] = {
          ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
      };
      SelectMVE_VxDUP(N, Opcodes, true,
                      IntNo == Intrinsic::arm_mve_viwdup_predicated);
      return;
    }

    case Intrinsic::arm_mve_vdwdup:
    case Intrinsic::arm_mve_vdwdup_predicated: {
      static const uint16_t Opcodes[] = {
          ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
      };
      SelectMVE_VxDUP(N, Opcodes, true,
                      IntNo == Intrinsic::arm_mve_vdwdup_predicated);
      return;
    }

    case Intrinsic::arm_cde_cx1d:
    case Intrinsic::arm_cde_cx1da:
    case Intrinsic::arm_cde_cx2d:
    case Intrinsic::arm_cde_cx2da:
    case Intrinsic::arm_cde_cx3d:
    case Intrinsic::arm_cde_cx3da: {
      bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
                      IntNo == Intrinsic::arm_cde_cx2da ||
                      IntNo == Intrinsic::arm_cde_cx3da;
      size_t NumExtraOps;
      uint16_t Opcode;
      switch (IntNo) {
      case Intrinsic::arm_cde_cx1d:
      case Intrinsic::arm_cde_cx1da:
        NumExtraOps = 0;
        Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
        break;
      case Intrinsic::arm_cde_cx2d:
      case Intrinsic::arm_cde_cx2da:
        NumExtraOps = 1;
        Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
        break;
      case Intrinsic::arm_cde_cx3d:
      case Intrinsic::arm_cde_cx3da:
        NumExtraOps = 2;
        Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
        break;
      default:
        llvm_unreachable("Unexpected opcode");
      }
      SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
      return;
    }
    }
    break;
  }

  case ISD::ATOMIC_CMP_SWAP:
    SelectCMP_SWAP(N);
    return;
  }

  SelectCode(N);
}