static void DisassembleMachO()

in MachODump.cpp [3590:4122]


static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
                             StringRef DisSegName, StringRef DisSectName) {
  const char *McpuDefault = nullptr;
  const Target *ThumbTarget = nullptr;
  const Target *TheTarget = GetTarget(MachOOF, &McpuDefault, &ThumbTarget);
  if (!TheTarget) {
    // GetTarget prints out stuff.
    return;
  }
  if (MCPU.empty() && McpuDefault)
    MCPU = McpuDefault;

  std::unique_ptr<const MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
  std::unique_ptr<const MCInstrInfo> ThumbInstrInfo;
  if (ThumbTarget)
    ThumbInstrInfo.reset(ThumbTarget->createMCInstrInfo());

  // Package up features to be passed to target/subtarget
  std::string FeaturesStr;
  if (MAttrs.size()) {
    SubtargetFeatures Features;
    for (unsigned i = 0; i != MAttrs.size(); ++i)
      Features.AddFeature(MAttrs[i]);
    FeaturesStr = Features.getString();
  }

  MCTargetOptions MCOptions;
  // Set up disassembler.
  std::unique_ptr<const MCRegisterInfo> MRI(
      TheTarget->createMCRegInfo(TripleName));
  std::unique_ptr<const MCAsmInfo> AsmInfo(
      TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
  std::unique_ptr<const MCSubtargetInfo> STI(
      TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
  MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
  std::unique_ptr<MCDisassembler> DisAsm(
      TheTarget->createMCDisassembler(*STI, Ctx));
  std::unique_ptr<MCSymbolizer> Symbolizer;
  struct DisassembleInfo SymbolizerInfo;
  std::unique_ptr<MCRelocationInfo> RelInfo(
      TheTarget->createMCRelocationInfo(TripleName, Ctx));
  if (RelInfo) {
    Symbolizer.reset(TheTarget->createMCSymbolizer(
        TripleName, SymbolizerGetOpInfo, SymbolizerSymbolLookUp,
        &SymbolizerInfo, &Ctx, std::move(RelInfo)));
    DisAsm->setSymbolizer(std::move(Symbolizer));
  }
  int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
  std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
      Triple(TripleName), AsmPrinterVariant, *AsmInfo, *InstrInfo, *MRI));
  // Set the display preference for hex vs. decimal immediates.
  IP->setPrintImmHex(PrintImmHex);
  // Comment stream and backing vector.
  SmallString<128> CommentsToEmit;
  raw_svector_ostream CommentStream(CommentsToEmit);
  // FIXME: Setting the CommentStream in the InstPrinter is problematic in that
  // if it is done then arm64 comments for string literals don't get printed
  // and some constant get printed instead and not setting it causes intel
  // (32-bit and 64-bit) comments printed with different spacing before the
  // comment causing different diffs with the 'C' disassembler library API.
  // IP->setCommentStream(CommentStream);

  if (!AsmInfo || !STI || !DisAsm || !IP) {
    errs() << "error: couldn't initialize disassembler for target "
           << TripleName << '\n';
    return;
  }

  // Set up separate thumb disassembler if needed.
  std::unique_ptr<const MCRegisterInfo> ThumbMRI;
  std::unique_ptr<const MCAsmInfo> ThumbAsmInfo;
  std::unique_ptr<const MCSubtargetInfo> ThumbSTI;
  std::unique_ptr<MCDisassembler> ThumbDisAsm;
  std::unique_ptr<MCInstPrinter> ThumbIP;
  std::unique_ptr<MCContext> ThumbCtx;
  std::unique_ptr<MCSymbolizer> ThumbSymbolizer;
  struct DisassembleInfo ThumbSymbolizerInfo;
  std::unique_ptr<MCRelocationInfo> ThumbRelInfo;
  if (ThumbTarget) {
    ThumbMRI.reset(ThumbTarget->createMCRegInfo(ThumbTripleName));
    ThumbAsmInfo.reset(
        ThumbTarget->createMCAsmInfo(*ThumbMRI, ThumbTripleName, MCOptions));
    ThumbSTI.reset(
        ThumbTarget->createMCSubtargetInfo(ThumbTripleName, MCPU, FeaturesStr));
    ThumbCtx.reset(new MCContext(Triple(ThumbTripleName), ThumbAsmInfo.get(),
                                 ThumbMRI.get(), ThumbSTI.get()));
    ThumbDisAsm.reset(ThumbTarget->createMCDisassembler(*ThumbSTI, *ThumbCtx));
    MCContext *PtrThumbCtx = ThumbCtx.get();
    ThumbRelInfo.reset(
        ThumbTarget->createMCRelocationInfo(ThumbTripleName, *PtrThumbCtx));
    if (ThumbRelInfo) {
      ThumbSymbolizer.reset(ThumbTarget->createMCSymbolizer(
          ThumbTripleName, SymbolizerGetOpInfo, SymbolizerSymbolLookUp,
          &ThumbSymbolizerInfo, PtrThumbCtx, std::move(ThumbRelInfo)));
      ThumbDisAsm->setSymbolizer(std::move(ThumbSymbolizer));
    }
    int ThumbAsmPrinterVariant = ThumbAsmInfo->getAssemblerDialect();
    ThumbIP.reset(ThumbTarget->createMCInstPrinter(
        Triple(ThumbTripleName), ThumbAsmPrinterVariant, *ThumbAsmInfo,
        *ThumbInstrInfo, *ThumbMRI));
    // Set the display preference for hex vs. decimal immediates.
    ThumbIP->setPrintImmHex(PrintImmHex);
  }

  if (ThumbTarget && (!ThumbAsmInfo || !ThumbSTI || !ThumbDisAsm || !ThumbIP)) {
    errs() << "error: couldn't initialize disassembler for target "
           << ThumbTripleName << '\n';
    return;
  }

  MachO::mach_header Header = MachOOF->getHeader();

  // FIXME: Using the -cfg command line option, this code used to be able to
  // annotate relocations with the referenced symbol's name, and if this was
  // inside a __[cf]string section, the data it points to. This is now replaced
  // by the upcoming MCSymbolizer, which needs the appropriate setup done above.
  std::vector<SectionRef> Sections;
  std::vector<SymbolRef> Symbols;
  SmallVector<uint64_t, 8> FoundFns;
  uint64_t BaseSegmentAddress;

  getSectionsAndSymbols(MachOOF, Sections, Symbols, FoundFns,
                        BaseSegmentAddress);

  // Sort the symbols by address, just in case they didn't come in that way.
  std::sort(Symbols.begin(), Symbols.end(), SymbolSorter());

  // Build a data in code table that is sorted on by the address of each entry.
  uint64_t BaseAddress = 0;
  if (Header.filetype == MachO::MH_OBJECT)
    BaseAddress = Sections[0].getAddress();
  else
    BaseAddress = BaseSegmentAddress;
  DiceTable Dices;
  for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices();
       DI != DE; ++DI) {
    uint32_t Offset;
    DI->getOffset(Offset);
    Dices.push_back(std::make_pair(BaseAddress + Offset, *DI));
  }
  array_pod_sort(Dices.begin(), Dices.end());

  std::unique_ptr<DIContext> diContext;
  ObjectFile *DbgObj = MachOOF;
  // Try to find debug info and set up the DIContext for it.
  if (UseDbg) {
    // A separate DSym file path was specified, parse it as a macho file,
    // get the sections and supply it to the section name parsing machinery.
    if (!DSYMFile.empty()) {
      ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
          MemoryBuffer::getFileOrSTDIN(DSYMFile);
      if (std::error_code EC = BufOrErr.getError()) {
        errs() << "llvm-mctoll: " << Filename << ": " << EC.message() << '\n';
        return;
      }
      DbgObj =
          ObjectFile::createMachOObjectFile(BufOrErr.get()->getMemBufferRef())
              .get()
              .release();
    }

    // Setup the DIContext
    diContext = DWARFContext::create(*DbgObj);
  }

  if (FilterSections.size() == 0)
    outs() << "(" << DisSegName << "," << DisSectName << ") section\n";

  for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
    Expected<StringRef> SecNameOrErr = Sections[SectIdx].getName();
    if (!SecNameOrErr) {
      consumeError(SecNameOrErr.takeError());
      continue;
    }
    if (*SecNameOrErr != DisSectName)
      continue;

    DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl();

    StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR);
    if (SegmentName != DisSegName)
      continue;

    StringRef BytesStr =
        unwrapOrError(Sections[SectIdx].getContents(), Filename);
    ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(BytesStr.data()),
                            BytesStr.size());
    uint64_t SectAddress = Sections[SectIdx].getAddress();

    bool symbolTableWorked = false;

    // Create a map of symbol addresses to symbol names for use by
    // the SymbolizerSymbolLookUp() routine.
    SymbolAddressMap AddrMap;
    bool DisSymNameFound = false;
    for (const SymbolRef &Symbol : MachOOF->symbols()) {
      Expected<SymbolRef::Type> STOrErr = Symbol.getType();
      if (!STOrErr)
        report_error(STOrErr.takeError(), MachOOF->getFileName());
      SymbolRef::Type ST = *STOrErr;
      if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data ||
          ST == SymbolRef::ST_Other) {
        auto SymOrErr = Symbol.getValue();
        if (!SymOrErr)
          report_error(SymOrErr.takeError(), Symbol.getObject()->getFileName());

        uint64_t Address = *SymOrErr;
        Expected<StringRef> SymNameOrErr = Symbol.getName();
        if (!SymNameOrErr)
          report_error(SymNameOrErr.takeError(), MachOOF->getFileName());
        StringRef SymName = *SymNameOrErr;
        AddrMap[Address] = SymName;
        if (!DisSymName.empty() && DisSymName == SymName)
          DisSymNameFound = true;
      }
    }
    if (!DisSymName.empty() && !DisSymNameFound) {
      outs() << "Can't find -dis-symname: " << DisSymName << "\n";
      return;
    }
    // Set up the block of info used by the Symbolizer call backs.
    SymbolizerInfo.verbose = !NoSymbolicOperands;
    SymbolizerInfo.O = MachOOF;
    SymbolizerInfo.S = Sections[SectIdx];
    SymbolizerInfo.AddrMap = &AddrMap;
    SymbolizerInfo.Sections = &Sections;
    SymbolizerInfo.class_name = nullptr;
    SymbolizerInfo.selector_name = nullptr;
    SymbolizerInfo.method = nullptr;
    SymbolizerInfo.demangled_name = nullptr;
    SymbolizerInfo.bindtable = nullptr;
    SymbolizerInfo.adrp_addr = 0;
    SymbolizerInfo.adrp_inst = 0;
    // Same for the ThumbSymbolizer
    ThumbSymbolizerInfo.verbose = !NoSymbolicOperands;
    ThumbSymbolizerInfo.O = MachOOF;
    ThumbSymbolizerInfo.S = Sections[SectIdx];
    ThumbSymbolizerInfo.AddrMap = &AddrMap;
    ThumbSymbolizerInfo.Sections = &Sections;
    ThumbSymbolizerInfo.class_name = nullptr;
    ThumbSymbolizerInfo.selector_name = nullptr;
    ThumbSymbolizerInfo.method = nullptr;
    ThumbSymbolizerInfo.demangled_name = nullptr;
    ThumbSymbolizerInfo.bindtable = nullptr;
    ThumbSymbolizerInfo.adrp_addr = 0;
    ThumbSymbolizerInfo.adrp_inst = 0;

    unsigned int Arch = MachOOF->getArch();

    // Skip all symbols if this is a stubs file.
    if (Bytes.size() == 0)
      return;

    // If the section has symbols but no symbol at the start of the section
    // these are used to make sure the bytes before the first symbol are
    // disassembled.
    bool FirstSymbol = true;
    bool FirstSymbolAtSectionStart = true;

    // Disassemble symbol by symbol.
    for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
      Expected<StringRef> SymNameOrErr = Symbols[SymIdx].getName();
      if (!SymNameOrErr)
        report_error(SymNameOrErr.takeError(), MachOOF->getFileName());
      StringRef SymName = *SymNameOrErr;

      Expected<SymbolRef::Type> STOrErr = Symbols[SymIdx].getType();
      if (!STOrErr)
        report_error(STOrErr.takeError(), MachOOF->getFileName());
      SymbolRef::Type ST = *STOrErr;
      if (ST != SymbolRef::ST_Function && ST != SymbolRef::ST_Data)
        continue;

      // Make sure the symbol is defined in this section.
      bool containsSym = Sections[SectIdx].containsSymbol(Symbols[SymIdx]);
      if (!containsSym) {
        if (!DisSymName.empty() && DisSymName == SymName) {
          outs() << "-dis-symname: " << DisSymName << " not in the section\n";
          return;
        }
        continue;
      }
      // The __mh_execute_header is special and we need to deal with that fact
      // this symbol is before the start of the (__TEXT,__text) section and at
      // the address of the start of the __TEXT segment.  This is because this
      // symbol is an N_SECT symbol in the (__TEXT,__text) but its address is
      // before the start of the section in a standard MH_EXECUTE filetype.
      if (!DisSymName.empty() && DisSymName == "__mh_execute_header") {
        outs() << "-dis-symname: __mh_execute_header not in any section\n";
        return;
      }
      // When this code is trying to disassemble a symbol at a time and in the
      // case there is only the __mh_execute_header symbol left as in a stripped
      // executable, we need to deal with this by ignoring this symbol so the
      // whole section is disassembled and this symbol is then not displayed.
      if (SymName == "__mh_execute_header" || SymName == "__mh_dylib_header" ||
          SymName == "__mh_bundle_header" || SymName == "__mh_object_header" ||
          SymName == "__mh_preload_header" || SymName == "__mh_dylinker_header")
        continue;

      // If we are only disassembling one symbol see if this is that symbol.
      if (!DisSymName.empty() && DisSymName != SymName)
        continue;

      // Start at the address of the symbol relative to the section's address.
      uint64_t SectSize = Sections[SectIdx].getSize();
      auto SymOrErr = Symbols[SymIdx].getValue();
      if (!SymOrErr)
        report_error(SymOrErr.takeError(),
                     Symbols[SymIdx].getObject()->getFileName());
      uint64_t Start = *SymOrErr;
      uint64_t SectionAddress = Sections[SectIdx].getAddress();
      Start -= SectionAddress;

      if (Start > SectSize) {
        outs() << "section data ends, " << SymName
               << " lies outside valid range\n";
        return;
      }

      // Stop disassembling either at the beginning of the next symbol or at
      // the end of the section.
      bool containsNextSym = false;
      uint64_t NextSym = 0;
      uint64_t NextSymIdx = SymIdx + 1;
      while (Symbols.size() > NextSymIdx) {
        Expected<SymbolRef::Type> STOrErr = Symbols[NextSymIdx].getType();
        if (!STOrErr)
          report_error(STOrErr.takeError(), MachOOF->getFileName());
        SymbolRef::Type NextSymType = *STOrErr;
        if (NextSymType == SymbolRef::ST_Function) {
          containsNextSym =
              Sections[SectIdx].containsSymbol(Symbols[NextSymIdx]);
          auto SymOrErr = Symbols[NextSymIdx].getValue();
          if (!SymOrErr)
            report_error(SymOrErr.takeError(),
                         Symbols[NextSymIdx].getObject()->getFileName());

          NextSym = *SymOrErr;
          NextSym -= SectionAddress;
          break;
        }
        ++NextSymIdx;
      }

      uint64_t End = containsNextSym ? std::min(NextSym, SectSize) : SectSize;
      uint64_t Size;

      symbolTableWorked = true;

      DataRefImpl Symb = Symbols[SymIdx].getRawDataRefImpl();
      uint32_t SymbolFlags = cantFail(MachOOF->getSymbolFlags(Symb));
      bool IsThumb = SymbolFlags & SymbolRef::SF_Thumb;

      // We only need the dedicated Thumb target if there's a real choice
      // (i.e. we're not targeting M-class) and the function is Thumb.
      bool UseThumbTarget = IsThumb && ThumbTarget;

      // If we are not specifying a symbol to start disassembly with and this
      // is the first symbol in the section but not at the start of the section
      // then move the disassembly index to the start of the section and
      // don't print the symbol name just yet.  This is so the bytes before the
      // first symbol are disassembled.
      uint64_t SymbolStart = Start;
      if (DisSymName.empty() && FirstSymbol && Start != 0) {
        FirstSymbolAtSectionStart = false;
        Start = 0;
      } else
        outs() << SymName << ":\n";

      DILineInfo lastLine;
      for (uint64_t Index = Start; Index < End; Index += Size) {
        MCInst Inst;

        // If this is the first symbol in the section and it was not at the
        // start of the section, see if we are at its Index now and if so print
        // the symbol name.
        if (FirstSymbol && !FirstSymbolAtSectionStart && Index == SymbolStart)
          outs() << SymName << ":\n";

        uint64_t PC = SectAddress + Index;
        if (FullLeadingAddr) {
          if (MachOOF->is64Bit())
            outs() << format("%016" PRIx64, PC);
          else
            outs() << format("%08" PRIx64, PC);
        } else {
          outs() << format("%8" PRIx64 ":", PC);
        }
        if (Arch == Triple::arm)
          outs() << "\t";

        // Check the data in code table here to see if this is data not an
        // instruction to be disassembled.
        DiceTable Dice;
        Dice.push_back(std::make_pair(PC, DiceRef()));
        dice_table_iterator DTI =
            std::search(Dices.begin(), Dices.end(), Dice.begin(), Dice.end(),
                        compareDiceTableEntries);
        if (DTI != Dices.end()) {
          uint16_t Length;
          DTI->second.getLength(Length);
          uint16_t Kind;
          DTI->second.getKind(Kind);
          Size = DumpDataInCode(Bytes.data() + Index, Length, Kind);
          if ((Kind == MachO::DICE_KIND_JUMP_TABLE8) &&
              (PC == (DTI->first + Length - 1)) && (Length & 1))
            Size++;
          continue;
        }

        SmallVector<char, 64> AnnotationsBytes;
        raw_svector_ostream Annotations(AnnotationsBytes);

        bool gotInst;
        if (UseThumbTarget)
          gotInst = ThumbDisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
                                                PC, Annotations);
        else
          gotInst = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), PC,
                                           Annotations);
        if (gotInst) {
          if (Arch == Triple::arm) {
            dumpBytes(makeArrayRef(Bytes.data() + Index, Size), outs());
          }
          formatted_raw_ostream FormattedOS(outs());
          StringRef AnnotationsStr = Annotations.str();
          if (UseThumbTarget)
            ThumbIP->printInst(&Inst, PC, AnnotationsStr, *ThumbSTI,
                               FormattedOS);
          else
            IP->printInst(&Inst, PC, AnnotationsStr, *STI, FormattedOS);

          emitComments(CommentStream, CommentsToEmit, FormattedOS, *AsmInfo);

          // Print debug info.
          if (diContext) {
            DILineInfo dli = diContext->getLineInfoForAddress({PC, SectIdx});
            // Print valid line info if it changed.
            if (dli != lastLine && dli.Line != 0)
              outs() << "\t## " << dli.FileName << ':' << dli.Line << ':'
                     << dli.Column;
            lastLine = dli;
          }
          outs() << "\n";
        } else {
          unsigned int Arch = MachOOF->getArch();
          if (Arch == Triple::x86_64 || Arch == Triple::x86) {
            outs() << format("\t.byte 0x%02x #bad opcode\n",
                             *(Bytes.data() + Index) & 0xff);
            Size = 1; // skip exactly one illegible byte and move on.
          } else if (Arch == Triple::aarch64 ||
                     (Arch == Triple::arm && !IsThumb)) {
            uint32_t opcode = (*(Bytes.data() + Index) & 0xff) |
                              (*(Bytes.data() + Index + 1) & 0xff) << 8 |
                              (*(Bytes.data() + Index + 2) & 0xff) << 16 |
                              (*(Bytes.data() + Index + 3) & 0xff) << 24;
            outs() << format("\t.long\t0x%08x\n", opcode);
            Size = 4;
          } else if (Arch == Triple::arm) {
            assert(IsThumb && "ARM mode should have been dealt with above");
            uint32_t opcode = (*(Bytes.data() + Index) & 0xff) |
                              (*(Bytes.data() + Index + 1) & 0xff) << 8;
            outs() << format("\t.short\t0x%04x\n", opcode);
            Size = 2;
          } else {
            errs() << "llvm-mctoll: warning: invalid instruction encoding\n";
            if (Size == 0)
              Size = 1; // skip illegible bytes
          }
        }
      }
      // Now that we are done disassembled the first symbol set the bool that
      // were doing this to false.
      FirstSymbol = false;
    }
    if (!symbolTableWorked) {
      // Reading the symbol table didn't work, disassemble the whole section.
      uint64_t SectAddress = Sections[SectIdx].getAddress();
      uint64_t SectSize = Sections[SectIdx].getSize();
      uint64_t InstSize;
      for (uint64_t Index = 0; Index < SectSize; Index += InstSize) {
        MCInst Inst;

        uint64_t PC = SectAddress + Index;
        SmallVector<char, 64> AnnotationsBytes;
        raw_svector_ostream Annotations(AnnotationsBytes);
        if (DisAsm->getInstruction(Inst, InstSize, Bytes.slice(Index), PC,
                                   Annotations)) {
          if (FullLeadingAddr) {
            if (MachOOF->is64Bit())
              outs() << format("%016" PRIx64, PC);
            else
              outs() << format("%08" PRIx64, PC);
          } else {
            outs() << format("%8" PRIx64 ":", PC);
          }
          if (Arch == Triple::arm) {
            outs() << "\t";
            dumpBytes(makeArrayRef(Bytes.data() + Index, InstSize), outs());
          }
          StringRef AnnotationsStr = Annotations.str();
          IP->printInst(&Inst, PC, AnnotationsStr, *STI, outs());
          outs() << "\n";
        } else {
          unsigned int Arch = MachOOF->getArch();
          if (Arch == Triple::x86_64 || Arch == Triple::x86) {
            outs() << format("\t.byte 0x%02x #bad opcode\n",
                             *(Bytes.data() + Index) & 0xff);
            InstSize = 1; // skip exactly one illegible byte and move on.
          } else {
            errs() << "llvm-mctoll: warning: invalid instruction encoding\n";
            if (InstSize == 0)
              InstSize = 1; // skip illegible bytes
          }
        }
      }
    }
    // The TripleName's need to be reset if we are called again for a different
    // archtecture.
    TripleName = "";
    ThumbTripleName = "";

    if (SymbolizerInfo.method != nullptr)
      free(SymbolizerInfo.method);
    if (SymbolizerInfo.demangled_name != nullptr)
      free(SymbolizerInfo.demangled_name);
    if (ThumbSymbolizerInfo.method != nullptr)
      free(ThumbSymbolizerInfo.method);
    if (ThumbSymbolizerInfo.demangled_name != nullptr)
      free(ThumbSymbolizerInfo.demangled_name);
  }
}