in MachODump.cpp [3590:4122]
static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
StringRef DisSegName, StringRef DisSectName) {
const char *McpuDefault = nullptr;
const Target *ThumbTarget = nullptr;
const Target *TheTarget = GetTarget(MachOOF, &McpuDefault, &ThumbTarget);
if (!TheTarget) {
// GetTarget prints out stuff.
return;
}
if (MCPU.empty() && McpuDefault)
MCPU = McpuDefault;
std::unique_ptr<const MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
std::unique_ptr<const MCInstrInfo> ThumbInstrInfo;
if (ThumbTarget)
ThumbInstrInfo.reset(ThumbTarget->createMCInstrInfo());
// Package up features to be passed to target/subtarget
std::string FeaturesStr;
if (MAttrs.size()) {
SubtargetFeatures Features;
for (unsigned i = 0; i != MAttrs.size(); ++i)
Features.AddFeature(MAttrs[i]);
FeaturesStr = Features.getString();
}
MCTargetOptions MCOptions;
// Set up disassembler.
std::unique_ptr<const MCRegisterInfo> MRI(
TheTarget->createMCRegInfo(TripleName));
std::unique_ptr<const MCAsmInfo> AsmInfo(
TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
std::unique_ptr<const MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
std::unique_ptr<MCDisassembler> DisAsm(
TheTarget->createMCDisassembler(*STI, Ctx));
std::unique_ptr<MCSymbolizer> Symbolizer;
struct DisassembleInfo SymbolizerInfo;
std::unique_ptr<MCRelocationInfo> RelInfo(
TheTarget->createMCRelocationInfo(TripleName, Ctx));
if (RelInfo) {
Symbolizer.reset(TheTarget->createMCSymbolizer(
TripleName, SymbolizerGetOpInfo, SymbolizerSymbolLookUp,
&SymbolizerInfo, &Ctx, std::move(RelInfo)));
DisAsm->setSymbolizer(std::move(Symbolizer));
}
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
Triple(TripleName), AsmPrinterVariant, *AsmInfo, *InstrInfo, *MRI));
// Set the display preference for hex vs. decimal immediates.
IP->setPrintImmHex(PrintImmHex);
// Comment stream and backing vector.
SmallString<128> CommentsToEmit;
raw_svector_ostream CommentStream(CommentsToEmit);
// FIXME: Setting the CommentStream in the InstPrinter is problematic in that
// if it is done then arm64 comments for string literals don't get printed
// and some constant get printed instead and not setting it causes intel
// (32-bit and 64-bit) comments printed with different spacing before the
// comment causing different diffs with the 'C' disassembler library API.
// IP->setCommentStream(CommentStream);
if (!AsmInfo || !STI || !DisAsm || !IP) {
errs() << "error: couldn't initialize disassembler for target "
<< TripleName << '\n';
return;
}
// Set up separate thumb disassembler if needed.
std::unique_ptr<const MCRegisterInfo> ThumbMRI;
std::unique_ptr<const MCAsmInfo> ThumbAsmInfo;
std::unique_ptr<const MCSubtargetInfo> ThumbSTI;
std::unique_ptr<MCDisassembler> ThumbDisAsm;
std::unique_ptr<MCInstPrinter> ThumbIP;
std::unique_ptr<MCContext> ThumbCtx;
std::unique_ptr<MCSymbolizer> ThumbSymbolizer;
struct DisassembleInfo ThumbSymbolizerInfo;
std::unique_ptr<MCRelocationInfo> ThumbRelInfo;
if (ThumbTarget) {
ThumbMRI.reset(ThumbTarget->createMCRegInfo(ThumbTripleName));
ThumbAsmInfo.reset(
ThumbTarget->createMCAsmInfo(*ThumbMRI, ThumbTripleName, MCOptions));
ThumbSTI.reset(
ThumbTarget->createMCSubtargetInfo(ThumbTripleName, MCPU, FeaturesStr));
ThumbCtx.reset(new MCContext(Triple(ThumbTripleName), ThumbAsmInfo.get(),
ThumbMRI.get(), ThumbSTI.get()));
ThumbDisAsm.reset(ThumbTarget->createMCDisassembler(*ThumbSTI, *ThumbCtx));
MCContext *PtrThumbCtx = ThumbCtx.get();
ThumbRelInfo.reset(
ThumbTarget->createMCRelocationInfo(ThumbTripleName, *PtrThumbCtx));
if (ThumbRelInfo) {
ThumbSymbolizer.reset(ThumbTarget->createMCSymbolizer(
ThumbTripleName, SymbolizerGetOpInfo, SymbolizerSymbolLookUp,
&ThumbSymbolizerInfo, PtrThumbCtx, std::move(ThumbRelInfo)));
ThumbDisAsm->setSymbolizer(std::move(ThumbSymbolizer));
}
int ThumbAsmPrinterVariant = ThumbAsmInfo->getAssemblerDialect();
ThumbIP.reset(ThumbTarget->createMCInstPrinter(
Triple(ThumbTripleName), ThumbAsmPrinterVariant, *ThumbAsmInfo,
*ThumbInstrInfo, *ThumbMRI));
// Set the display preference for hex vs. decimal immediates.
ThumbIP->setPrintImmHex(PrintImmHex);
}
if (ThumbTarget && (!ThumbAsmInfo || !ThumbSTI || !ThumbDisAsm || !ThumbIP)) {
errs() << "error: couldn't initialize disassembler for target "
<< ThumbTripleName << '\n';
return;
}
MachO::mach_header Header = MachOOF->getHeader();
// FIXME: Using the -cfg command line option, this code used to be able to
// annotate relocations with the referenced symbol's name, and if this was
// inside a __[cf]string section, the data it points to. This is now replaced
// by the upcoming MCSymbolizer, which needs the appropriate setup done above.
std::vector<SectionRef> Sections;
std::vector<SymbolRef> Symbols;
SmallVector<uint64_t, 8> FoundFns;
uint64_t BaseSegmentAddress;
getSectionsAndSymbols(MachOOF, Sections, Symbols, FoundFns,
BaseSegmentAddress);
// Sort the symbols by address, just in case they didn't come in that way.
std::sort(Symbols.begin(), Symbols.end(), SymbolSorter());
// Build a data in code table that is sorted on by the address of each entry.
uint64_t BaseAddress = 0;
if (Header.filetype == MachO::MH_OBJECT)
BaseAddress = Sections[0].getAddress();
else
BaseAddress = BaseSegmentAddress;
DiceTable Dices;
for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices();
DI != DE; ++DI) {
uint32_t Offset;
DI->getOffset(Offset);
Dices.push_back(std::make_pair(BaseAddress + Offset, *DI));
}
array_pod_sort(Dices.begin(), Dices.end());
std::unique_ptr<DIContext> diContext;
ObjectFile *DbgObj = MachOOF;
// Try to find debug info and set up the DIContext for it.
if (UseDbg) {
// A separate DSym file path was specified, parse it as a macho file,
// get the sections and supply it to the section name parsing machinery.
if (!DSYMFile.empty()) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
MemoryBuffer::getFileOrSTDIN(DSYMFile);
if (std::error_code EC = BufOrErr.getError()) {
errs() << "llvm-mctoll: " << Filename << ": " << EC.message() << '\n';
return;
}
DbgObj =
ObjectFile::createMachOObjectFile(BufOrErr.get()->getMemBufferRef())
.get()
.release();
}
// Setup the DIContext
diContext = DWARFContext::create(*DbgObj);
}
if (FilterSections.size() == 0)
outs() << "(" << DisSegName << "," << DisSectName << ") section\n";
for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
Expected<StringRef> SecNameOrErr = Sections[SectIdx].getName();
if (!SecNameOrErr) {
consumeError(SecNameOrErr.takeError());
continue;
}
if (*SecNameOrErr != DisSectName)
continue;
DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl();
StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR);
if (SegmentName != DisSegName)
continue;
StringRef BytesStr =
unwrapOrError(Sections[SectIdx].getContents(), Filename);
ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(BytesStr.data()),
BytesStr.size());
uint64_t SectAddress = Sections[SectIdx].getAddress();
bool symbolTableWorked = false;
// Create a map of symbol addresses to symbol names for use by
// the SymbolizerSymbolLookUp() routine.
SymbolAddressMap AddrMap;
bool DisSymNameFound = false;
for (const SymbolRef &Symbol : MachOOF->symbols()) {
Expected<SymbolRef::Type> STOrErr = Symbol.getType();
if (!STOrErr)
report_error(STOrErr.takeError(), MachOOF->getFileName());
SymbolRef::Type ST = *STOrErr;
if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data ||
ST == SymbolRef::ST_Other) {
auto SymOrErr = Symbol.getValue();
if (!SymOrErr)
report_error(SymOrErr.takeError(), Symbol.getObject()->getFileName());
uint64_t Address = *SymOrErr;
Expected<StringRef> SymNameOrErr = Symbol.getName();
if (!SymNameOrErr)
report_error(SymNameOrErr.takeError(), MachOOF->getFileName());
StringRef SymName = *SymNameOrErr;
AddrMap[Address] = SymName;
if (!DisSymName.empty() && DisSymName == SymName)
DisSymNameFound = true;
}
}
if (!DisSymName.empty() && !DisSymNameFound) {
outs() << "Can't find -dis-symname: " << DisSymName << "\n";
return;
}
// Set up the block of info used by the Symbolizer call backs.
SymbolizerInfo.verbose = !NoSymbolicOperands;
SymbolizerInfo.O = MachOOF;
SymbolizerInfo.S = Sections[SectIdx];
SymbolizerInfo.AddrMap = &AddrMap;
SymbolizerInfo.Sections = &Sections;
SymbolizerInfo.class_name = nullptr;
SymbolizerInfo.selector_name = nullptr;
SymbolizerInfo.method = nullptr;
SymbolizerInfo.demangled_name = nullptr;
SymbolizerInfo.bindtable = nullptr;
SymbolizerInfo.adrp_addr = 0;
SymbolizerInfo.adrp_inst = 0;
// Same for the ThumbSymbolizer
ThumbSymbolizerInfo.verbose = !NoSymbolicOperands;
ThumbSymbolizerInfo.O = MachOOF;
ThumbSymbolizerInfo.S = Sections[SectIdx];
ThumbSymbolizerInfo.AddrMap = &AddrMap;
ThumbSymbolizerInfo.Sections = &Sections;
ThumbSymbolizerInfo.class_name = nullptr;
ThumbSymbolizerInfo.selector_name = nullptr;
ThumbSymbolizerInfo.method = nullptr;
ThumbSymbolizerInfo.demangled_name = nullptr;
ThumbSymbolizerInfo.bindtable = nullptr;
ThumbSymbolizerInfo.adrp_addr = 0;
ThumbSymbolizerInfo.adrp_inst = 0;
unsigned int Arch = MachOOF->getArch();
// Skip all symbols if this is a stubs file.
if (Bytes.size() == 0)
return;
// If the section has symbols but no symbol at the start of the section
// these are used to make sure the bytes before the first symbol are
// disassembled.
bool FirstSymbol = true;
bool FirstSymbolAtSectionStart = true;
// Disassemble symbol by symbol.
for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
Expected<StringRef> SymNameOrErr = Symbols[SymIdx].getName();
if (!SymNameOrErr)
report_error(SymNameOrErr.takeError(), MachOOF->getFileName());
StringRef SymName = *SymNameOrErr;
Expected<SymbolRef::Type> STOrErr = Symbols[SymIdx].getType();
if (!STOrErr)
report_error(STOrErr.takeError(), MachOOF->getFileName());
SymbolRef::Type ST = *STOrErr;
if (ST != SymbolRef::ST_Function && ST != SymbolRef::ST_Data)
continue;
// Make sure the symbol is defined in this section.
bool containsSym = Sections[SectIdx].containsSymbol(Symbols[SymIdx]);
if (!containsSym) {
if (!DisSymName.empty() && DisSymName == SymName) {
outs() << "-dis-symname: " << DisSymName << " not in the section\n";
return;
}
continue;
}
// The __mh_execute_header is special and we need to deal with that fact
// this symbol is before the start of the (__TEXT,__text) section and at
// the address of the start of the __TEXT segment. This is because this
// symbol is an N_SECT symbol in the (__TEXT,__text) but its address is
// before the start of the section in a standard MH_EXECUTE filetype.
if (!DisSymName.empty() && DisSymName == "__mh_execute_header") {
outs() << "-dis-symname: __mh_execute_header not in any section\n";
return;
}
// When this code is trying to disassemble a symbol at a time and in the
// case there is only the __mh_execute_header symbol left as in a stripped
// executable, we need to deal with this by ignoring this symbol so the
// whole section is disassembled and this symbol is then not displayed.
if (SymName == "__mh_execute_header" || SymName == "__mh_dylib_header" ||
SymName == "__mh_bundle_header" || SymName == "__mh_object_header" ||
SymName == "__mh_preload_header" || SymName == "__mh_dylinker_header")
continue;
// If we are only disassembling one symbol see if this is that symbol.
if (!DisSymName.empty() && DisSymName != SymName)
continue;
// Start at the address of the symbol relative to the section's address.
uint64_t SectSize = Sections[SectIdx].getSize();
auto SymOrErr = Symbols[SymIdx].getValue();
if (!SymOrErr)
report_error(SymOrErr.takeError(),
Symbols[SymIdx].getObject()->getFileName());
uint64_t Start = *SymOrErr;
uint64_t SectionAddress = Sections[SectIdx].getAddress();
Start -= SectionAddress;
if (Start > SectSize) {
outs() << "section data ends, " << SymName
<< " lies outside valid range\n";
return;
}
// Stop disassembling either at the beginning of the next symbol or at
// the end of the section.
bool containsNextSym = false;
uint64_t NextSym = 0;
uint64_t NextSymIdx = SymIdx + 1;
while (Symbols.size() > NextSymIdx) {
Expected<SymbolRef::Type> STOrErr = Symbols[NextSymIdx].getType();
if (!STOrErr)
report_error(STOrErr.takeError(), MachOOF->getFileName());
SymbolRef::Type NextSymType = *STOrErr;
if (NextSymType == SymbolRef::ST_Function) {
containsNextSym =
Sections[SectIdx].containsSymbol(Symbols[NextSymIdx]);
auto SymOrErr = Symbols[NextSymIdx].getValue();
if (!SymOrErr)
report_error(SymOrErr.takeError(),
Symbols[NextSymIdx].getObject()->getFileName());
NextSym = *SymOrErr;
NextSym -= SectionAddress;
break;
}
++NextSymIdx;
}
uint64_t End = containsNextSym ? std::min(NextSym, SectSize) : SectSize;
uint64_t Size;
symbolTableWorked = true;
DataRefImpl Symb = Symbols[SymIdx].getRawDataRefImpl();
uint32_t SymbolFlags = cantFail(MachOOF->getSymbolFlags(Symb));
bool IsThumb = SymbolFlags & SymbolRef::SF_Thumb;
// We only need the dedicated Thumb target if there's a real choice
// (i.e. we're not targeting M-class) and the function is Thumb.
bool UseThumbTarget = IsThumb && ThumbTarget;
// If we are not specifying a symbol to start disassembly with and this
// is the first symbol in the section but not at the start of the section
// then move the disassembly index to the start of the section and
// don't print the symbol name just yet. This is so the bytes before the
// first symbol are disassembled.
uint64_t SymbolStart = Start;
if (DisSymName.empty() && FirstSymbol && Start != 0) {
FirstSymbolAtSectionStart = false;
Start = 0;
} else
outs() << SymName << ":\n";
DILineInfo lastLine;
for (uint64_t Index = Start; Index < End; Index += Size) {
MCInst Inst;
// If this is the first symbol in the section and it was not at the
// start of the section, see if we are at its Index now and if so print
// the symbol name.
if (FirstSymbol && !FirstSymbolAtSectionStart && Index == SymbolStart)
outs() << SymName << ":\n";
uint64_t PC = SectAddress + Index;
if (FullLeadingAddr) {
if (MachOOF->is64Bit())
outs() << format("%016" PRIx64, PC);
else
outs() << format("%08" PRIx64, PC);
} else {
outs() << format("%8" PRIx64 ":", PC);
}
if (Arch == Triple::arm)
outs() << "\t";
// Check the data in code table here to see if this is data not an
// instruction to be disassembled.
DiceTable Dice;
Dice.push_back(std::make_pair(PC, DiceRef()));
dice_table_iterator DTI =
std::search(Dices.begin(), Dices.end(), Dice.begin(), Dice.end(),
compareDiceTableEntries);
if (DTI != Dices.end()) {
uint16_t Length;
DTI->second.getLength(Length);
uint16_t Kind;
DTI->second.getKind(Kind);
Size = DumpDataInCode(Bytes.data() + Index, Length, Kind);
if ((Kind == MachO::DICE_KIND_JUMP_TABLE8) &&
(PC == (DTI->first + Length - 1)) && (Length & 1))
Size++;
continue;
}
SmallVector<char, 64> AnnotationsBytes;
raw_svector_ostream Annotations(AnnotationsBytes);
bool gotInst;
if (UseThumbTarget)
gotInst = ThumbDisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
PC, Annotations);
else
gotInst = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), PC,
Annotations);
if (gotInst) {
if (Arch == Triple::arm) {
dumpBytes(makeArrayRef(Bytes.data() + Index, Size), outs());
}
formatted_raw_ostream FormattedOS(outs());
StringRef AnnotationsStr = Annotations.str();
if (UseThumbTarget)
ThumbIP->printInst(&Inst, PC, AnnotationsStr, *ThumbSTI,
FormattedOS);
else
IP->printInst(&Inst, PC, AnnotationsStr, *STI, FormattedOS);
emitComments(CommentStream, CommentsToEmit, FormattedOS, *AsmInfo);
// Print debug info.
if (diContext) {
DILineInfo dli = diContext->getLineInfoForAddress({PC, SectIdx});
// Print valid line info if it changed.
if (dli != lastLine && dli.Line != 0)
outs() << "\t## " << dli.FileName << ':' << dli.Line << ':'
<< dli.Column;
lastLine = dli;
}
outs() << "\n";
} else {
unsigned int Arch = MachOOF->getArch();
if (Arch == Triple::x86_64 || Arch == Triple::x86) {
outs() << format("\t.byte 0x%02x #bad opcode\n",
*(Bytes.data() + Index) & 0xff);
Size = 1; // skip exactly one illegible byte and move on.
} else if (Arch == Triple::aarch64 ||
(Arch == Triple::arm && !IsThumb)) {
uint32_t opcode = (*(Bytes.data() + Index) & 0xff) |
(*(Bytes.data() + Index + 1) & 0xff) << 8 |
(*(Bytes.data() + Index + 2) & 0xff) << 16 |
(*(Bytes.data() + Index + 3) & 0xff) << 24;
outs() << format("\t.long\t0x%08x\n", opcode);
Size = 4;
} else if (Arch == Triple::arm) {
assert(IsThumb && "ARM mode should have been dealt with above");
uint32_t opcode = (*(Bytes.data() + Index) & 0xff) |
(*(Bytes.data() + Index + 1) & 0xff) << 8;
outs() << format("\t.short\t0x%04x\n", opcode);
Size = 2;
} else {
errs() << "llvm-mctoll: warning: invalid instruction encoding\n";
if (Size == 0)
Size = 1; // skip illegible bytes
}
}
}
// Now that we are done disassembled the first symbol set the bool that
// were doing this to false.
FirstSymbol = false;
}
if (!symbolTableWorked) {
// Reading the symbol table didn't work, disassemble the whole section.
uint64_t SectAddress = Sections[SectIdx].getAddress();
uint64_t SectSize = Sections[SectIdx].getSize();
uint64_t InstSize;
for (uint64_t Index = 0; Index < SectSize; Index += InstSize) {
MCInst Inst;
uint64_t PC = SectAddress + Index;
SmallVector<char, 64> AnnotationsBytes;
raw_svector_ostream Annotations(AnnotationsBytes);
if (DisAsm->getInstruction(Inst, InstSize, Bytes.slice(Index), PC,
Annotations)) {
if (FullLeadingAddr) {
if (MachOOF->is64Bit())
outs() << format("%016" PRIx64, PC);
else
outs() << format("%08" PRIx64, PC);
} else {
outs() << format("%8" PRIx64 ":", PC);
}
if (Arch == Triple::arm) {
outs() << "\t";
dumpBytes(makeArrayRef(Bytes.data() + Index, InstSize), outs());
}
StringRef AnnotationsStr = Annotations.str();
IP->printInst(&Inst, PC, AnnotationsStr, *STI, outs());
outs() << "\n";
} else {
unsigned int Arch = MachOOF->getArch();
if (Arch == Triple::x86_64 || Arch == Triple::x86) {
outs() << format("\t.byte 0x%02x #bad opcode\n",
*(Bytes.data() + Index) & 0xff);
InstSize = 1; // skip exactly one illegible byte and move on.
} else {
errs() << "llvm-mctoll: warning: invalid instruction encoding\n";
if (InstSize == 0)
InstSize = 1; // skip illegible bytes
}
}
}
}
// The TripleName's need to be reset if we are called again for a different
// archtecture.
TripleName = "";
ThumbTripleName = "";
if (SymbolizerInfo.method != nullptr)
free(SymbolizerInfo.method);
if (SymbolizerInfo.demangled_name != nullptr)
free(SymbolizerInfo.demangled_name);
if (ThumbSymbolizerInfo.method != nullptr)
free(ThumbSymbolizerInfo.method);
if (ThumbSymbolizerInfo.demangled_name != nullptr)
free(ThumbSymbolizerInfo.demangled_name);
}
}