bool ARMExpandPseudo::ExpandMI()

in llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp [2038:3119]


bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI,
                               MachineBasicBlock::iterator &NextMBBI) {
  MachineInstr &MI = *MBBI;
  unsigned Opcode = MI.getOpcode();
  switch (Opcode) {
    default:
      return false;

    case ARM::VBSPd:
    case ARM::VBSPq: {
      Register DstReg = MI.getOperand(0).getReg();
      if (DstReg == MI.getOperand(3).getReg()) {
        // Expand to VBIT
        unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq;
        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
            .add(MI.getOperand(0))
            .add(MI.getOperand(3))
            .add(MI.getOperand(2))
            .add(MI.getOperand(1))
            .addImm(MI.getOperand(4).getImm())
            .add(MI.getOperand(5));
      } else if (DstReg == MI.getOperand(2).getReg()) {
        // Expand to VBIF
        unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq;
        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
            .add(MI.getOperand(0))
            .add(MI.getOperand(2))
            .add(MI.getOperand(3))
            .add(MI.getOperand(1))
            .addImm(MI.getOperand(4).getImm())
            .add(MI.getOperand(5));
      } else {
        // Expand to VBSL
        unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq;
        if (DstReg == MI.getOperand(1).getReg()) {
          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
              .add(MI.getOperand(0))
              .add(MI.getOperand(1))
              .add(MI.getOperand(2))
              .add(MI.getOperand(3))
              .addImm(MI.getOperand(4).getImm())
              .add(MI.getOperand(5));
        } else {
          // Use move to satisfy constraints
          unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq;
          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc))
              .addReg(DstReg,
                      RegState::Define |
                          getRenamableRegState(MI.getOperand(0).isRenamable()))
              .add(MI.getOperand(1))
              .add(MI.getOperand(1))
              .addImm(MI.getOperand(4).getImm())
              .add(MI.getOperand(5));
          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
              .add(MI.getOperand(0))
              .addReg(DstReg,
                      RegState::Kill |
                          getRenamableRegState(MI.getOperand(0).isRenamable()))
              .add(MI.getOperand(2))
              .add(MI.getOperand(3))
              .addImm(MI.getOperand(4).getImm())
              .add(MI.getOperand(5));
        }
      }
      MI.eraseFromParent();
      return true;
    }

    case ARM::TCRETURNdi:
    case ARM::TCRETURNri: {
      MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
      assert(MBBI->isReturn() &&
             "Can only insert epilog into returning blocks");
      unsigned RetOpcode = MBBI->getOpcode();
      DebugLoc dl = MBBI->getDebugLoc();
      const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
          MBB.getParent()->getSubtarget().getInstrInfo());

      // Tail call return: adjust the stack pointer and jump to callee.
      MBBI = MBB.getLastNonDebugInstr();
      MachineOperand &JumpTarget = MBBI->getOperand(0);

      // Jump to label or value in register.
      if (RetOpcode == ARM::TCRETURNdi) {
        unsigned TCOpcode =
            STI->isThumb()
                ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
                : ARM::TAILJMPd;
        MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
        if (JumpTarget.isGlobal())
          MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
                               JumpTarget.getTargetFlags());
        else {
          assert(JumpTarget.isSymbol());
          MIB.addExternalSymbol(JumpTarget.getSymbolName(),
                                JumpTarget.getTargetFlags());
        }

        // Add the default predicate in Thumb mode.
        if (STI->isThumb())
          MIB.add(predOps(ARMCC::AL));
      } else if (RetOpcode == ARM::TCRETURNri) {
        unsigned Opcode =
          STI->isThumb() ? ARM::tTAILJMPr
                         : (STI->hasV4TOps() ? ARM::TAILJMPr : ARM::TAILJMPr4);
        BuildMI(MBB, MBBI, dl,
                TII.get(Opcode))
            .addReg(JumpTarget.getReg(), RegState::Kill);
      }

      auto NewMI = std::prev(MBBI);
      for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
        NewMI->addOperand(MBBI->getOperand(i));


      // Update call site info and delete the pseudo instruction TCRETURN.
      if (MI.isCandidateForCallSiteEntry())
        MI.getMF()->moveCallSiteInfo(&MI, &*NewMI);
      MBB.erase(MBBI);

      MBBI = NewMI;
      return true;
    }
    case ARM::tBXNS_RET: {
      // For v8.0-M.Main we need to authenticate LR before clearing FPRs, which
      // uses R12 as a scratch register.
      if (!STI->hasV8_1MMainlineOps() && AFI->shouldSignReturnAddress())
        BuildMI(MBB, MBBI, DebugLoc(), TII->get(ARM::t2AUT));

      MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI);

      if (STI->hasV8_1MMainlineOps()) {
        // Restore the non-secure floating point context.
        BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
                TII->get(ARM::VLDR_FPCXTNS_post), ARM::SP)
            .addReg(ARM::SP)
            .addImm(4)
            .add(predOps(ARMCC::AL));

        if (AFI->shouldSignReturnAddress())
          BuildMI(AfterBB, AfterBB.end(), DebugLoc(), TII->get(ARM::t2AUT));
      }

      // Clear all GPR that are not a use of the return instruction.
      assert(llvm::all_of(MBBI->operands(), [](const MachineOperand &Op) {
        return !Op.isReg() || Op.getReg() != ARM::R12;
      }));
      SmallVector<unsigned, 5> ClearRegs;
      determineGPRegsToClear(
          *MBBI, {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12}, ClearRegs);
      CMSEClearGPRegs(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), ClearRegs,
                      ARM::LR);

      MachineInstrBuilder NewMI =
          BuildMI(AfterBB, AfterBB.end(), MBBI->getDebugLoc(),
                  TII->get(ARM::tBXNS))
              .addReg(ARM::LR)
              .add(predOps(ARMCC::AL));
      for (const MachineOperand &Op : MI.operands())
        NewMI->addOperand(Op);
      MI.eraseFromParent();
      return true;
    }
    case ARM::tBLXNS_CALL: {
      DebugLoc DL = MBBI->getDebugLoc();
      unsigned JumpReg = MBBI->getOperand(0).getReg();

      // Figure out which registers are live at the point immediately before the
      // call. When we indiscriminately push a set of registers, the live
      // registers are added as ordinary use operands, whereas dead registers
      // are "undef".
      LivePhysRegs LiveRegs(*TRI);
      LiveRegs.addLiveOuts(MBB);
      for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse()))
        LiveRegs.stepBackward(MI);
      LiveRegs.stepBackward(*MBBI);

      CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs,
                          AFI->isThumb1OnlyFunction());

      SmallVector<unsigned, 16> ClearRegs;
      determineGPRegsToClear(*MBBI,
                             {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4,
                              ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9,
                              ARM::R10, ARM::R11, ARM::R12},
                             ClearRegs);
      auto OriginalClearRegs = ClearRegs;

      // Get the first cleared register as a scratch (to use later with tBIC).
      // We need to use the first so we can ensure it is a low register.
      unsigned ScratchReg = ClearRegs.front();

      // Clear LSB of JumpReg
      if (AFI->isThumb2Function()) {
        BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), JumpReg)
            .addReg(JumpReg)
            .addImm(1)
            .add(predOps(ARMCC::AL))
            .add(condCodeOp());
      } else {
        // We need to use an extra register to cope with 8M Baseline,
        // since we have saved all of the registers we are ok to trash a non
        // argument register here.
        BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVi8), ScratchReg)
            .add(condCodeOp())
            .addImm(1)
            .add(predOps(ARMCC::AL));
        BuildMI(MBB, MBBI, DL, TII->get(ARM::tBIC), JumpReg)
            .addReg(ARM::CPSR, RegState::Define)
            .addReg(JumpReg)
            .addReg(ScratchReg)
            .add(predOps(ARMCC::AL));
      }

      CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs,
                          ClearRegs); // save+clear FP regs with ClearRegs
      CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg);

      const MachineInstrBuilder NewCall =
          BuildMI(MBB, MBBI, DL, TII->get(ARM::tBLXNSr))
              .add(predOps(ARMCC::AL))
              .addReg(JumpReg, RegState::Kill);

      for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
        NewCall->addOperand(MO);
      if (MI.isCandidateForCallSiteEntry())
        MI.getMF()->moveCallSiteInfo(&MI, NewCall.getInstr());

      CMSERestoreFPRegs(MBB, MBBI, DL, OriginalClearRegs); // restore FP registers

      CMSEPopCalleeSaves(*TII, MBB, MBBI, JumpReg, AFI->isThumb1OnlyFunction());

      MI.eraseFromParent();
      return true;
    }
    case ARM::VMOVHcc:
    case ARM::VMOVScc:
    case ARM::VMOVDcc: {
      unsigned newOpc = Opcode != ARM::VMOVDcc ? ARM::VMOVS : ARM::VMOVD;
      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
              MI.getOperand(1).getReg())
          .add(MI.getOperand(2))
          .addImm(MI.getOperand(3).getImm()) // 'pred'
          .add(MI.getOperand(4))
          .add(makeImplicit(MI.getOperand(1)));

      MI.eraseFromParent();
      return true;
    }
    case ARM::t2MOVCCr:
    case ARM::MOVCCr: {
      unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
              MI.getOperand(1).getReg())
          .add(MI.getOperand(2))
          .addImm(MI.getOperand(3).getImm()) // 'pred'
          .add(MI.getOperand(4))
          .add(condCodeOp()) // 's' bit
          .add(makeImplicit(MI.getOperand(1)));

      MI.eraseFromParent();
      return true;
    }
    case ARM::MOVCCsi: {
      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
              (MI.getOperand(1).getReg()))
          .add(MI.getOperand(2))
          .addImm(MI.getOperand(3).getImm())
          .addImm(MI.getOperand(4).getImm()) // 'pred'
          .add(MI.getOperand(5))
          .add(condCodeOp()) // 's' bit
          .add(makeImplicit(MI.getOperand(1)));

      MI.eraseFromParent();
      return true;
    }
    case ARM::MOVCCsr: {
      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
              (MI.getOperand(1).getReg()))
          .add(MI.getOperand(2))
          .add(MI.getOperand(3))
          .addImm(MI.getOperand(4).getImm())
          .addImm(MI.getOperand(5).getImm()) // 'pred'
          .add(MI.getOperand(6))
          .add(condCodeOp()) // 's' bit
          .add(makeImplicit(MI.getOperand(1)));

      MI.eraseFromParent();
      return true;
    }
    case ARM::t2MOVCCi16:
    case ARM::MOVCCi16: {
      unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16;
      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
              MI.getOperand(1).getReg())
          .addImm(MI.getOperand(2).getImm())
          .addImm(MI.getOperand(3).getImm()) // 'pred'
          .add(MI.getOperand(4))
          .add(makeImplicit(MI.getOperand(1)));
      MI.eraseFromParent();
      return true;
    }
    case ARM::t2MOVCCi:
    case ARM::MOVCCi: {
      unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
              MI.getOperand(1).getReg())
          .addImm(MI.getOperand(2).getImm())
          .addImm(MI.getOperand(3).getImm()) // 'pred'
          .add(MI.getOperand(4))
          .add(condCodeOp()) // 's' bit
          .add(makeImplicit(MI.getOperand(1)));

      MI.eraseFromParent();
      return true;
    }
    case ARM::t2MVNCCi:
    case ARM::MVNCCi: {
      unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi;
      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
              MI.getOperand(1).getReg())
          .addImm(MI.getOperand(2).getImm())
          .addImm(MI.getOperand(3).getImm()) // 'pred'
          .add(MI.getOperand(4))
          .add(condCodeOp()) // 's' bit
          .add(makeImplicit(MI.getOperand(1)));

      MI.eraseFromParent();
      return true;
    }
    case ARM::t2MOVCClsl:
    case ARM::t2MOVCClsr:
    case ARM::t2MOVCCasr:
    case ARM::t2MOVCCror: {
      unsigned NewOpc;
      switch (Opcode) {
      case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break;
      case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break;
      case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break;
      case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break;
      default: llvm_unreachable("unexpeced conditional move");
      }
      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
              MI.getOperand(1).getReg())
          .add(MI.getOperand(2))
          .addImm(MI.getOperand(3).getImm())
          .addImm(MI.getOperand(4).getImm()) // 'pred'
          .add(MI.getOperand(5))
          .add(condCodeOp()) // 's' bit
          .add(makeImplicit(MI.getOperand(1)));
      MI.eraseFromParent();
      return true;
    }
    case ARM::Int_eh_sjlj_dispatchsetup: {
      MachineFunction &MF = *MI.getParent()->getParent();
      const ARMBaseInstrInfo *AII =
        static_cast<const ARMBaseInstrInfo*>(TII);
      const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
      // For functions using a base pointer, we rematerialize it (via the frame
      // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
      // for us. Otherwise, expand to nothing.
      if (RI.hasBasePointer(MF)) {
        int32_t NumBytes = AFI->getFramePtrSpillOffset();
        Register FramePtr = RI.getFrameRegister(MF);
        assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
               "base pointer without frame pointer?");

        if (AFI->isThumb2Function()) {
          emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
                                 FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
        } else if (AFI->isThumbFunction()) {
          emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
                                    FramePtr, -NumBytes, *TII, RI);
        } else {
          emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
                                  FramePtr, -NumBytes, ARMCC::AL, 0,
                                  *TII);
        }
        // If there's dynamic realignment, adjust for it.
        if (RI.hasStackRealignment(MF)) {
          MachineFrameInfo &MFI = MF.getFrameInfo();
          Align MaxAlign = MFI.getMaxAlign();
          assert (!AFI->isThumb1OnlyFunction());
          // Emit bic r6, r6, MaxAlign
          assert(MaxAlign <= Align(256) &&
                 "The BIC instruction cannot encode "
                 "immediates larger than 256 with all lower "
                 "bits set.");
          unsigned bicOpc = AFI->isThumbFunction() ?
            ARM::t2BICri : ARM::BICri;
          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6)
              .addReg(ARM::R6, RegState::Kill)
              .addImm(MaxAlign.value() - 1)
              .add(predOps(ARMCC::AL))
              .add(condCodeOp());
        }
      }
      MI.eraseFromParent();
      return true;
    }

    case ARM::MOVsrl_flag:
    case ARM::MOVsra_flag: {
      // These are just fancy MOVs instructions.
      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
              MI.getOperand(0).getReg())
          .add(MI.getOperand(1))
          .addImm(ARM_AM::getSORegOpc(
              (Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1))
          .add(predOps(ARMCC::AL))
          .addReg(ARM::CPSR, RegState::Define);
      MI.eraseFromParent();
      return true;
    }
    case ARM::RRX: {
      // This encodes as "MOVs Rd, Rm, rrx
      MachineInstrBuilder MIB =
          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
                  MI.getOperand(0).getReg())
              .add(MI.getOperand(1))
              .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))
              .add(predOps(ARMCC::AL))
              .add(condCodeOp());
      TransferImpOps(MI, MIB, MIB);
      MI.eraseFromParent();
      return true;
    }
    case ARM::tTPsoft:
    case ARM::TPsoft: {
      const bool Thumb = Opcode == ARM::tTPsoft;

      MachineInstrBuilder MIB;
      MachineFunction *MF = MBB.getParent();
      if (STI->genLongCalls()) {
        MachineConstantPool *MCP = MF->getConstantPool();
        unsigned PCLabelID = AFI->createPICLabelUId();
        MachineConstantPoolValue *CPV =
            ARMConstantPoolSymbol::Create(MF->getFunction().getContext(),
                                          "__aeabi_read_tp", PCLabelID, 0);
        Register Reg = MI.getOperand(0).getReg();
        MIB =
            BuildMI(MBB, MBBI, MI.getDebugLoc(),
                    TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg)
                .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4)));
        if (!Thumb)
          MIB.addImm(0);
        MIB.add(predOps(ARMCC::AL));

        MIB =
            BuildMI(MBB, MBBI, MI.getDebugLoc(),
                    TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF)));
        if (Thumb)
          MIB.add(predOps(ARMCC::AL));
        MIB.addReg(Reg, RegState::Kill);
      } else {
        MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
                      TII->get(Thumb ? ARM::tBL : ARM::BL));
        if (Thumb)
          MIB.add(predOps(ARMCC::AL));
        MIB.addExternalSymbol("__aeabi_read_tp", 0);
      }

      MIB.cloneMemRefs(MI);
      TransferImpOps(MI, MIB, MIB);
      // Update the call site info.
      if (MI.isCandidateForCallSiteEntry())
        MF->moveCallSiteInfo(&MI, &*MIB);
      MI.eraseFromParent();
      return true;
    }
    case ARM::tLDRpci_pic:
    case ARM::t2LDRpci_pic: {
      unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
        ? ARM::tLDRpci : ARM::t2LDRpci;
      Register DstReg = MI.getOperand(0).getReg();
      bool DstIsDead = MI.getOperand(0).isDead();
      MachineInstrBuilder MIB1 =
          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
              .add(MI.getOperand(1))
              .add(predOps(ARMCC::AL));
      MIB1.cloneMemRefs(MI);
      MachineInstrBuilder MIB2 =
          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
              .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
              .addReg(DstReg)
              .add(MI.getOperand(2));
      TransferImpOps(MI, MIB1, MIB2);
      MI.eraseFromParent();
      return true;
    }

    case ARM::LDRLIT_ga_abs:
    case ARM::LDRLIT_ga_pcrel:
    case ARM::LDRLIT_ga_pcrel_ldr:
    case ARM::tLDRLIT_ga_abs:
    case ARM::t2LDRLIT_ga_pcrel:
    case ARM::tLDRLIT_ga_pcrel: {
      Register DstReg = MI.getOperand(0).getReg();
      bool DstIsDead = MI.getOperand(0).isDead();
      const MachineOperand &MO1 = MI.getOperand(1);
      auto Flags = MO1.getTargetFlags();
      const GlobalValue *GV = MO1.getGlobal();
      bool IsARM = Opcode != ARM::tLDRLIT_ga_pcrel &&
                   Opcode != ARM::tLDRLIT_ga_abs &&
                   Opcode != ARM::t2LDRLIT_ga_pcrel;
      bool IsPIC =
          Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs;
      unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci;
      if (Opcode == ARM::t2LDRLIT_ga_pcrel)
        LDRLITOpc = ARM::t2LDRpci;
      unsigned PICAddOpc =
          IsARM
              ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
              : ARM::tPICADD;

      // We need a new const-pool entry to load from.
      MachineConstantPool *MCP = MBB.getParent()->getConstantPool();
      unsigned ARMPCLabelIndex = 0;
      MachineConstantPoolValue *CPV;

      if (IsPIC) {
        unsigned PCAdj = IsARM ? 8 : 4;
        auto Modifier = (Flags & ARMII::MO_GOT)
                            ? ARMCP::GOT_PREL
                            : ARMCP::no_modifier;
        ARMPCLabelIndex = AFI->createPICLabelUId();
        CPV = ARMConstantPoolConstant::Create(
            GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, Modifier,
            /*AddCurrentAddr*/ Modifier == ARMCP::GOT_PREL);
      } else
        CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier);

      MachineInstrBuilder MIB =
          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg)
              .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4)));
      if (IsARM)
        MIB.addImm(0);
      MIB.add(predOps(ARMCC::AL));

      if (IsPIC) {
        MachineInstrBuilder MIB =
          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc))
            .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
            .addReg(DstReg)
            .addImm(ARMPCLabelIndex);

        if (IsARM)
          MIB.add(predOps(ARMCC::AL));
      }

      MI.eraseFromParent();
      return true;
    }
    case ARM::MOV_ga_pcrel:
    case ARM::MOV_ga_pcrel_ldr:
    case ARM::t2MOV_ga_pcrel: {
      // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
      unsigned LabelId = AFI->createPICLabelUId();
      Register DstReg = MI.getOperand(0).getReg();
      bool DstIsDead = MI.getOperand(0).isDead();
      const MachineOperand &MO1 = MI.getOperand(1);
      const GlobalValue *GV = MO1.getGlobal();
      unsigned TF = MO1.getTargetFlags();
      bool isARM = Opcode != ARM::t2MOV_ga_pcrel;
      unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
      unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel;
      unsigned LO16TF = TF | ARMII::MO_LO16;
      unsigned HI16TF = TF | ARMII::MO_HI16;
      unsigned PICAddOpc = isARM
        ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
        : ARM::tPICADD;
      MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
                                         TII->get(LO16Opc), DstReg)
        .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
        .addImm(LabelId);

      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg)
        .addReg(DstReg)
        .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
        .addImm(LabelId);

      MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
                                         TII->get(PICAddOpc))
        .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
        .addReg(DstReg).addImm(LabelId);
      if (isARM) {
        MIB3.add(predOps(ARMCC::AL));
        if (Opcode == ARM::MOV_ga_pcrel_ldr)
          MIB3.cloneMemRefs(MI);
      }
      TransferImpOps(MI, MIB1, MIB3);
      MI.eraseFromParent();
      return true;
    }

    case ARM::MOVi32imm:
    case ARM::MOVCCi32imm:
    case ARM::t2MOVi32imm:
    case ARM::t2MOVCCi32imm:
      ExpandMOV32BitImm(MBB, MBBI);
      return true;

    case ARM::SUBS_PC_LR: {
      MachineInstrBuilder MIB =
          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
              .addReg(ARM::LR)
              .add(MI.getOperand(0))
              .add(MI.getOperand(1))
              .add(MI.getOperand(2))
              .addReg(ARM::CPSR, RegState::Undef);
      TransferImpOps(MI, MIB, MIB);
      MI.eraseFromParent();
      return true;
    }
    case ARM::VLDMQIA: {
      unsigned NewOpc = ARM::VLDMDIA;
      MachineInstrBuilder MIB =
        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
      unsigned OpIdx = 0;

      // Grab the Q register destination.
      bool DstIsDead = MI.getOperand(OpIdx).isDead();
      Register DstReg = MI.getOperand(OpIdx++).getReg();

      // Copy the source register.
      MIB.add(MI.getOperand(OpIdx++));

      // Copy the predicate operands.
      MIB.add(MI.getOperand(OpIdx++));
      MIB.add(MI.getOperand(OpIdx++));

      // Add the destination operands (D subregs).
      Register D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
      Register D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
      MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
        .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));

      // Add an implicit def for the super-register.
      MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
      TransferImpOps(MI, MIB, MIB);
      MIB.cloneMemRefs(MI);
      MI.eraseFromParent();
      return true;
    }

    case ARM::VSTMQIA: {
      unsigned NewOpc = ARM::VSTMDIA;
      MachineInstrBuilder MIB =
        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
      unsigned OpIdx = 0;

      // Grab the Q register source.
      bool SrcIsKill = MI.getOperand(OpIdx).isKill();
      Register SrcReg = MI.getOperand(OpIdx++).getReg();

      // Copy the destination register.
      MachineOperand Dst(MI.getOperand(OpIdx++));
      MIB.add(Dst);

      // Copy the predicate operands.
      MIB.add(MI.getOperand(OpIdx++));
      MIB.add(MI.getOperand(OpIdx++));

      // Add the source operands (D subregs).
      Register D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
      Register D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
      MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0)
         .addReg(D1, SrcIsKill ? RegState::Kill : 0);

      if (SrcIsKill)      // Add an implicit kill for the Q register.
        MIB->addRegisterKilled(SrcReg, TRI, true);

      TransferImpOps(MI, MIB, MIB);
      MIB.cloneMemRefs(MI);
      MI.eraseFromParent();
      return true;
    }

    case ARM::VLD2q8Pseudo:
    case ARM::VLD2q16Pseudo:
    case ARM::VLD2q32Pseudo:
    case ARM::VLD2q8PseudoWB_fixed:
    case ARM::VLD2q16PseudoWB_fixed:
    case ARM::VLD2q32PseudoWB_fixed:
    case ARM::VLD2q8PseudoWB_register:
    case ARM::VLD2q16PseudoWB_register:
    case ARM::VLD2q32PseudoWB_register:
    case ARM::VLD3d8Pseudo:
    case ARM::VLD3d16Pseudo:
    case ARM::VLD3d32Pseudo:
    case ARM::VLD1d8TPseudo:
    case ARM::VLD1d8TPseudoWB_fixed:
    case ARM::VLD1d8TPseudoWB_register:
    case ARM::VLD1d16TPseudo:
    case ARM::VLD1d16TPseudoWB_fixed:
    case ARM::VLD1d16TPseudoWB_register:
    case ARM::VLD1d32TPseudo:
    case ARM::VLD1d32TPseudoWB_fixed:
    case ARM::VLD1d32TPseudoWB_register:
    case ARM::VLD1d64TPseudo:
    case ARM::VLD1d64TPseudoWB_fixed:
    case ARM::VLD1d64TPseudoWB_register:
    case ARM::VLD3d8Pseudo_UPD:
    case ARM::VLD3d16Pseudo_UPD:
    case ARM::VLD3d32Pseudo_UPD:
    case ARM::VLD3q8Pseudo_UPD:
    case ARM::VLD3q16Pseudo_UPD:
    case ARM::VLD3q32Pseudo_UPD:
    case ARM::VLD3q8oddPseudo:
    case ARM::VLD3q16oddPseudo:
    case ARM::VLD3q32oddPseudo:
    case ARM::VLD3q8oddPseudo_UPD:
    case ARM::VLD3q16oddPseudo_UPD:
    case ARM::VLD3q32oddPseudo_UPD:
    case ARM::VLD4d8Pseudo:
    case ARM::VLD4d16Pseudo:
    case ARM::VLD4d32Pseudo:
    case ARM::VLD1d8QPseudo:
    case ARM::VLD1d8QPseudoWB_fixed:
    case ARM::VLD1d8QPseudoWB_register:
    case ARM::VLD1d16QPseudo:
    case ARM::VLD1d16QPseudoWB_fixed:
    case ARM::VLD1d16QPseudoWB_register:
    case ARM::VLD1d32QPseudo:
    case ARM::VLD1d32QPseudoWB_fixed:
    case ARM::VLD1d32QPseudoWB_register:
    case ARM::VLD1d64QPseudo:
    case ARM::VLD1d64QPseudoWB_fixed:
    case ARM::VLD1d64QPseudoWB_register:
    case ARM::VLD1q8HighQPseudo:
    case ARM::VLD1q8HighQPseudo_UPD:
    case ARM::VLD1q8LowQPseudo_UPD:
    case ARM::VLD1q8HighTPseudo:
    case ARM::VLD1q8HighTPseudo_UPD:
    case ARM::VLD1q8LowTPseudo_UPD:
    case ARM::VLD1q16HighQPseudo:
    case ARM::VLD1q16HighQPseudo_UPD:
    case ARM::VLD1q16LowQPseudo_UPD:
    case ARM::VLD1q16HighTPseudo:
    case ARM::VLD1q16HighTPseudo_UPD:
    case ARM::VLD1q16LowTPseudo_UPD:
    case ARM::VLD1q32HighQPseudo:
    case ARM::VLD1q32HighQPseudo_UPD:
    case ARM::VLD1q32LowQPseudo_UPD:
    case ARM::VLD1q32HighTPseudo:
    case ARM::VLD1q32HighTPseudo_UPD:
    case ARM::VLD1q32LowTPseudo_UPD:
    case ARM::VLD1q64HighQPseudo:
    case ARM::VLD1q64HighQPseudo_UPD:
    case ARM::VLD1q64LowQPseudo_UPD:
    case ARM::VLD1q64HighTPseudo:
    case ARM::VLD1q64HighTPseudo_UPD:
    case ARM::VLD1q64LowTPseudo_UPD:
    case ARM::VLD4d8Pseudo_UPD:
    case ARM::VLD4d16Pseudo_UPD:
    case ARM::VLD4d32Pseudo_UPD:
    case ARM::VLD4q8Pseudo_UPD:
    case ARM::VLD4q16Pseudo_UPD:
    case ARM::VLD4q32Pseudo_UPD:
    case ARM::VLD4q8oddPseudo:
    case ARM::VLD4q16oddPseudo:
    case ARM::VLD4q32oddPseudo:
    case ARM::VLD4q8oddPseudo_UPD:
    case ARM::VLD4q16oddPseudo_UPD:
    case ARM::VLD4q32oddPseudo_UPD:
    case ARM::VLD3DUPd8Pseudo:
    case ARM::VLD3DUPd16Pseudo:
    case ARM::VLD3DUPd32Pseudo:
    case ARM::VLD3DUPd8Pseudo_UPD:
    case ARM::VLD3DUPd16Pseudo_UPD:
    case ARM::VLD3DUPd32Pseudo_UPD:
    case ARM::VLD4DUPd8Pseudo:
    case ARM::VLD4DUPd16Pseudo:
    case ARM::VLD4DUPd32Pseudo:
    case ARM::VLD4DUPd8Pseudo_UPD:
    case ARM::VLD4DUPd16Pseudo_UPD:
    case ARM::VLD4DUPd32Pseudo_UPD:
    case ARM::VLD2DUPq8EvenPseudo:
    case ARM::VLD2DUPq8OddPseudo:
    case ARM::VLD2DUPq16EvenPseudo:
    case ARM::VLD2DUPq16OddPseudo:
    case ARM::VLD2DUPq32EvenPseudo:
    case ARM::VLD2DUPq32OddPseudo:
    case ARM::VLD2DUPq8OddPseudoWB_fixed:
    case ARM::VLD2DUPq8OddPseudoWB_register:
    case ARM::VLD2DUPq16OddPseudoWB_fixed:
    case ARM::VLD2DUPq16OddPseudoWB_register:
    case ARM::VLD2DUPq32OddPseudoWB_fixed:
    case ARM::VLD2DUPq32OddPseudoWB_register:
    case ARM::VLD3DUPq8EvenPseudo:
    case ARM::VLD3DUPq8OddPseudo:
    case ARM::VLD3DUPq16EvenPseudo:
    case ARM::VLD3DUPq16OddPseudo:
    case ARM::VLD3DUPq32EvenPseudo:
    case ARM::VLD3DUPq32OddPseudo:
    case ARM::VLD3DUPq8OddPseudo_UPD:
    case ARM::VLD3DUPq16OddPseudo_UPD:
    case ARM::VLD3DUPq32OddPseudo_UPD:
    case ARM::VLD4DUPq8EvenPseudo:
    case ARM::VLD4DUPq8OddPseudo:
    case ARM::VLD4DUPq16EvenPseudo:
    case ARM::VLD4DUPq16OddPseudo:
    case ARM::VLD4DUPq32EvenPseudo:
    case ARM::VLD4DUPq32OddPseudo:
    case ARM::VLD4DUPq8OddPseudo_UPD:
    case ARM::VLD4DUPq16OddPseudo_UPD:
    case ARM::VLD4DUPq32OddPseudo_UPD:
      ExpandVLD(MBBI);
      return true;

    case ARM::VST2q8Pseudo:
    case ARM::VST2q16Pseudo:
    case ARM::VST2q32Pseudo:
    case ARM::VST2q8PseudoWB_fixed:
    case ARM::VST2q16PseudoWB_fixed:
    case ARM::VST2q32PseudoWB_fixed:
    case ARM::VST2q8PseudoWB_register:
    case ARM::VST2q16PseudoWB_register:
    case ARM::VST2q32PseudoWB_register:
    case ARM::VST3d8Pseudo:
    case ARM::VST3d16Pseudo:
    case ARM::VST3d32Pseudo:
    case ARM::VST1d8TPseudo:
    case ARM::VST1d8TPseudoWB_fixed:
    case ARM::VST1d8TPseudoWB_register:
    case ARM::VST1d16TPseudo:
    case ARM::VST1d16TPseudoWB_fixed:
    case ARM::VST1d16TPseudoWB_register:
    case ARM::VST1d32TPseudo:
    case ARM::VST1d32TPseudoWB_fixed:
    case ARM::VST1d32TPseudoWB_register:
    case ARM::VST1d64TPseudo:
    case ARM::VST1d64TPseudoWB_fixed:
    case ARM::VST1d64TPseudoWB_register:
    case ARM::VST3d8Pseudo_UPD:
    case ARM::VST3d16Pseudo_UPD:
    case ARM::VST3d32Pseudo_UPD:
    case ARM::VST3q8Pseudo_UPD:
    case ARM::VST3q16Pseudo_UPD:
    case ARM::VST3q32Pseudo_UPD:
    case ARM::VST3q8oddPseudo:
    case ARM::VST3q16oddPseudo:
    case ARM::VST3q32oddPseudo:
    case ARM::VST3q8oddPseudo_UPD:
    case ARM::VST3q16oddPseudo_UPD:
    case ARM::VST3q32oddPseudo_UPD:
    case ARM::VST4d8Pseudo:
    case ARM::VST4d16Pseudo:
    case ARM::VST4d32Pseudo:
    case ARM::VST1d8QPseudo:
    case ARM::VST1d8QPseudoWB_fixed:
    case ARM::VST1d8QPseudoWB_register:
    case ARM::VST1d16QPseudo:
    case ARM::VST1d16QPseudoWB_fixed:
    case ARM::VST1d16QPseudoWB_register:
    case ARM::VST1d32QPseudo:
    case ARM::VST1d32QPseudoWB_fixed:
    case ARM::VST1d32QPseudoWB_register:
    case ARM::VST1d64QPseudo:
    case ARM::VST1d64QPseudoWB_fixed:
    case ARM::VST1d64QPseudoWB_register:
    case ARM::VST4d8Pseudo_UPD:
    case ARM::VST4d16Pseudo_UPD:
    case ARM::VST4d32Pseudo_UPD:
    case ARM::VST1q8HighQPseudo:
    case ARM::VST1q8LowQPseudo_UPD:
    case ARM::VST1q8HighTPseudo:
    case ARM::VST1q8LowTPseudo_UPD:
    case ARM::VST1q16HighQPseudo:
    case ARM::VST1q16LowQPseudo_UPD:
    case ARM::VST1q16HighTPseudo:
    case ARM::VST1q16LowTPseudo_UPD:
    case ARM::VST1q32HighQPseudo:
    case ARM::VST1q32LowQPseudo_UPD:
    case ARM::VST1q32HighTPseudo:
    case ARM::VST1q32LowTPseudo_UPD:
    case ARM::VST1q64HighQPseudo:
    case ARM::VST1q64LowQPseudo_UPD:
    case ARM::VST1q64HighTPseudo:
    case ARM::VST1q64LowTPseudo_UPD:
    case ARM::VST1q8HighTPseudo_UPD:
    case ARM::VST1q16HighTPseudo_UPD:
    case ARM::VST1q32HighTPseudo_UPD:
    case ARM::VST1q64HighTPseudo_UPD:
    case ARM::VST1q8HighQPseudo_UPD:
    case ARM::VST1q16HighQPseudo_UPD:
    case ARM::VST1q32HighQPseudo_UPD:
    case ARM::VST1q64HighQPseudo_UPD:
    case ARM::VST4q8Pseudo_UPD:
    case ARM::VST4q16Pseudo_UPD:
    case ARM::VST4q32Pseudo_UPD:
    case ARM::VST4q8oddPseudo:
    case ARM::VST4q16oddPseudo:
    case ARM::VST4q32oddPseudo:
    case ARM::VST4q8oddPseudo_UPD:
    case ARM::VST4q16oddPseudo_UPD:
    case ARM::VST4q32oddPseudo_UPD:
      ExpandVST(MBBI);
      return true;

    case ARM::VLD1LNq8Pseudo:
    case ARM::VLD1LNq16Pseudo:
    case ARM::VLD1LNq32Pseudo:
    case ARM::VLD1LNq8Pseudo_UPD:
    case ARM::VLD1LNq16Pseudo_UPD:
    case ARM::VLD1LNq32Pseudo_UPD:
    case ARM::VLD2LNd8Pseudo:
    case ARM::VLD2LNd16Pseudo:
    case ARM::VLD2LNd32Pseudo:
    case ARM::VLD2LNq16Pseudo:
    case ARM::VLD2LNq32Pseudo:
    case ARM::VLD2LNd8Pseudo_UPD:
    case ARM::VLD2LNd16Pseudo_UPD:
    case ARM::VLD2LNd32Pseudo_UPD:
    case ARM::VLD2LNq16Pseudo_UPD:
    case ARM::VLD2LNq32Pseudo_UPD:
    case ARM::VLD3LNd8Pseudo:
    case ARM::VLD3LNd16Pseudo:
    case ARM::VLD3LNd32Pseudo:
    case ARM::VLD3LNq16Pseudo:
    case ARM::VLD3LNq32Pseudo:
    case ARM::VLD3LNd8Pseudo_UPD:
    case ARM::VLD3LNd16Pseudo_UPD:
    case ARM::VLD3LNd32Pseudo_UPD:
    case ARM::VLD3LNq16Pseudo_UPD:
    case ARM::VLD3LNq32Pseudo_UPD:
    case ARM::VLD4LNd8Pseudo:
    case ARM::VLD4LNd16Pseudo:
    case ARM::VLD4LNd32Pseudo:
    case ARM::VLD4LNq16Pseudo:
    case ARM::VLD4LNq32Pseudo:
    case ARM::VLD4LNd8Pseudo_UPD:
    case ARM::VLD4LNd16Pseudo_UPD:
    case ARM::VLD4LNd32Pseudo_UPD:
    case ARM::VLD4LNq16Pseudo_UPD:
    case ARM::VLD4LNq32Pseudo_UPD:
    case ARM::VST1LNq8Pseudo:
    case ARM::VST1LNq16Pseudo:
    case ARM::VST1LNq32Pseudo:
    case ARM::VST1LNq8Pseudo_UPD:
    case ARM::VST1LNq16Pseudo_UPD:
    case ARM::VST1LNq32Pseudo_UPD:
    case ARM::VST2LNd8Pseudo:
    case ARM::VST2LNd16Pseudo:
    case ARM::VST2LNd32Pseudo:
    case ARM::VST2LNq16Pseudo:
    case ARM::VST2LNq32Pseudo:
    case ARM::VST2LNd8Pseudo_UPD:
    case ARM::VST2LNd16Pseudo_UPD:
    case ARM::VST2LNd32Pseudo_UPD:
    case ARM::VST2LNq16Pseudo_UPD:
    case ARM::VST2LNq32Pseudo_UPD:
    case ARM::VST3LNd8Pseudo:
    case ARM::VST3LNd16Pseudo:
    case ARM::VST3LNd32Pseudo:
    case ARM::VST3LNq16Pseudo:
    case ARM::VST3LNq32Pseudo:
    case ARM::VST3LNd8Pseudo_UPD:
    case ARM::VST3LNd16Pseudo_UPD:
    case ARM::VST3LNd32Pseudo_UPD:
    case ARM::VST3LNq16Pseudo_UPD:
    case ARM::VST3LNq32Pseudo_UPD:
    case ARM::VST4LNd8Pseudo:
    case ARM::VST4LNd16Pseudo:
    case ARM::VST4LNd32Pseudo:
    case ARM::VST4LNq16Pseudo:
    case ARM::VST4LNq32Pseudo:
    case ARM::VST4LNd8Pseudo_UPD:
    case ARM::VST4LNd16Pseudo_UPD:
    case ARM::VST4LNd32Pseudo_UPD:
    case ARM::VST4LNq16Pseudo_UPD:
    case ARM::VST4LNq32Pseudo_UPD:
      ExpandLaneOp(MBBI);
      return true;

    case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
    case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
    case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
    case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;

    case ARM::MQQPRLoad:
    case ARM::MQQPRStore:
    case ARM::MQQQQPRLoad:
    case ARM::MQQQQPRStore:
      ExpandMQQPRLoadStore(MBBI);
      return true;

    case ARM::tCMP_SWAP_8:
      assert(STI->isThumb());
      return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, ARM::tUXTB,
                            NextMBBI);
    case ARM::tCMP_SWAP_16:
      assert(STI->isThumb());
      return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH,
                            NextMBBI);

    case ARM::CMP_SWAP_8:
      assert(!STI->isThumb());
      return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, ARM::UXTB,
                            NextMBBI);
    case ARM::CMP_SWAP_16:
      assert(!STI->isThumb());
      return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH,
                            NextMBBI);
    case ARM::CMP_SWAP_32:
      if (STI->isThumb())
        return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0,
                              NextMBBI);
      else
        return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI);

    case ARM::CMP_SWAP_64:
      return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI);

    case ARM::tBL_PUSHLR:
    case ARM::BL_PUSHLR: {
      const bool Thumb = Opcode == ARM::tBL_PUSHLR;
      Register Reg = MI.getOperand(0).getReg();
      assert(Reg == ARM::LR && "expect LR register!");
      MachineInstrBuilder MIB;
      if (Thumb) {
        // push {lr}
        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH))
            .add(predOps(ARMCC::AL))
            .addReg(Reg);

        // bl __gnu_mcount_nc
        MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL));
      } else {
        // stmdb   sp!, {lr}
        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD))
            .addReg(ARM::SP, RegState::Define)
            .addReg(ARM::SP)
            .add(predOps(ARMCC::AL))
            .addReg(Reg);

        // bl __gnu_mcount_nc
        MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL));
      }
      MIB.cloneMemRefs(MI);
      for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
        MIB.add(MO);
      MI.eraseFromParent();
      return true;
    }
    case ARM::t2CALL_BTI: {
      MachineFunction &MF = *MI.getMF();
      MachineInstrBuilder MIB =
          BuildMI(MF, MI.getDebugLoc(), TII->get(ARM::tBL));
      MIB.cloneMemRefs(MI);
      for (unsigned i = 0; i < MI.getNumOperands(); ++i)
        MIB.add(MI.getOperand(i));
      if (MI.isCandidateForCallSiteEntry())
        MF.moveCallSiteInfo(&MI, MIB.getInstr());
      MIBundleBuilder Bundler(MBB, MI);
      Bundler.append(MIB);
      Bundler.append(BuildMI(MF, MI.getDebugLoc(), TII->get(ARM::t2BTI)));
      finalizeBundle(MBB, Bundler.begin(), Bundler.end());
      MI.eraseFromParent();
      return true;
    }
    case ARM::LOADDUAL:
    case ARM::STOREDUAL: {
      Register PairReg = MI.getOperand(0).getReg();

      MachineInstrBuilder MIB =
          BuildMI(MBB, MBBI, MI.getDebugLoc(),
                  TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD))
              .addReg(TRI->getSubReg(PairReg, ARM::gsub_0),
                      Opcode == ARM::LOADDUAL ? RegState::Define : 0)
              .addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
                      Opcode == ARM::LOADDUAL ? RegState::Define : 0);
      for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
        MIB.add(MO);
      MIB.add(predOps(ARMCC::AL));
      MIB.cloneMemRefs(MI);
      MI.eraseFromParent();
      return true;
    }
  }
}