in llvm-mctoll.cpp [829:1486]
static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
if (StartAddress > StopAddress)
error("Start address should be less than stop address");
const Target *TheTarget = getTarget(Obj);
// Package up features to be passed to target/subtarget
SubtargetFeatures Features = Obj->getFeatures();
if (MAttrs.size()) {
for (unsigned i = 0; i != MAttrs.size(); ++i)
Features.AddFeature(MAttrs[i]);
}
std::unique_ptr<const MCRegisterInfo> MRI(
TheTarget->createMCRegInfo(TripleName));
if (!MRI)
report_error(Obj->getFileName(),
"no register info for target " + TripleName);
MCTargetOptions MCOptions;
// Set up disassembler.
std::unique_ptr<const MCAsmInfo> AsmInfo(
TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
if (!AsmInfo)
report_error(Obj->getFileName(),
"no assembly info for target " + TripleName);
std::unique_ptr<const MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
if (!STI)
report_error(Obj->getFileName(),
"no subtarget info for target " + TripleName);
std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
if (!MII)
report_error(Obj->getFileName(),
"no instruction info for target " + TripleName);
MCObjectFileInfo MOFI;
MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
// FIXME: for now initialize MCObjectFileInfo with default values
MOFI.initMCObjectFileInfo(Ctx, /*PIC=*/false);
std::unique_ptr<MCDisassembler> DisAsm(
TheTarget->createMCDisassembler(*STI, Ctx));
if (!DisAsm)
report_error(Obj->getFileName(),
"no disassembler for target " + TripleName);
std::unique_ptr<const MCInstrAnalysis> MIA(
TheTarget->createMCInstrAnalysis(MII.get()));
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
if (!IP)
report_error(Obj->getFileName(),
"no instruction printer for target " + TripleName);
IP->setPrintImmHex(PrintImmHex);
PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
LLVMContext llvmCtx;
std::unique_ptr<TargetMachine> Target(
TheTarget->createTargetMachine(TripleName, MCPU, Features.getString(),
TargetOptions(), /* RelocModel */ None));
assert(Target && "Could not allocate target machine!");
LLVMTargetMachine &llvmTgtMach = static_cast<LLVMTargetMachine &>(*Target);
MachineModuleInfoWrapperPass *machineModuleInfo =
new MachineModuleInfoWrapperPass(&llvmTgtMach);
/* New Module instance with file name */
Module module(Obj->getFileName(), llvmCtx);
/* Set datalayout of the module to be the same as LLVMTargetMachine */
module.setDataLayout(Target->createDataLayout());
machineModuleInfo->doInitialization(module);
// Initialize all module raisers that are supported and are part of current
// LLVM build.
ModuleRaiser::InitializeAllModuleRaisers();
// Get the module raiser for Target of the binary being raised
ModuleRaiser *moduleRaiser = RaiserContext::getModuleRaiser(Target.get());
assert((moduleRaiser != nullptr) && "Failed to build module raiser");
// Set data of module raiser
moduleRaiser->setModuleRaiserInfo(&module, Target.get(),
&machineModuleInfo->getMMI(), MIA.get(),
MII.get(), Obj, DisAsm.get());
// Collect dynamic relocations.
moduleRaiser->collectDynamicRelocations();
// Create a mapping, RelocSecs = SectionRelocMap[S], where sections
// in RelocSecs contain the relocations for section S.
std::error_code EC;
std::map<SectionRef, SmallVector<SectionRef, 1>> SectionRelocMap;
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
Expected<section_iterator> SecOrErr = Section.getRelocatedSection();
if (!SecOrErr) {
break;
}
section_iterator Sec2 = *SecOrErr;
if (Sec2 != Obj->section_end())
SectionRelocMap[*Sec2].push_back(Section);
}
// Create a mapping from virtual address to symbol name. This is used to
// pretty print the symbols while disassembling.
std::map<SectionRef, SectionSymbolsTy> AllSymbols;
for (const SymbolRef &Symbol : Obj->symbols()) {
Expected<uint64_t> AddressOrErr = Symbol.getAddress();
if (!AddressOrErr)
report_error(AddressOrErr.takeError(), Obj->getFileName());
uint64_t Address = *AddressOrErr;
Expected<StringRef> Name = Symbol.getName();
if (!Name)
report_error(Name.takeError(), Obj->getFileName());
if (Name->empty())
continue;
Expected<section_iterator> SectionOrErr = Symbol.getSection();
if (!SectionOrErr)
report_error(SectionOrErr.takeError(), Obj->getFileName());
section_iterator SecI = *SectionOrErr;
if (SecI == Obj->section_end())
continue;
uint8_t SymbolType = ELF::STT_NOTYPE;
if (Obj->isELF())
SymbolType = getElfSymbolType(Obj, Symbol);
AllSymbols[*SecI].emplace_back(Address, *Name, SymbolType);
}
if (AllSymbols.empty() && Obj->isELF())
addDynamicElfSymbols(Obj, AllSymbols);
// Create a mapping from virtual address to section.
std::vector<std::pair<uint64_t, SectionRef>> SectionAddresses;
for (SectionRef Sec : Obj->sections())
SectionAddresses.emplace_back(Sec.getAddress(), Sec);
array_pod_sort(SectionAddresses.begin(), SectionAddresses.end());
// Linked executables (.exe and .dll files) typically don't include a real
// symbol table but they might contain an export table.
if (const auto *COFFObj = dyn_cast<COFFObjectFile>(Obj)) {
for (const auto &ExportEntry : COFFObj->export_directories()) {
StringRef Name;
error(ExportEntry.getSymbolName(Name));
if (Name.empty())
continue;
uint32_t RVA;
error(ExportEntry.getExportRVA(RVA));
uint64_t VA = COFFObj->getImageBase() + RVA;
auto Sec = std::upper_bound(
SectionAddresses.begin(), SectionAddresses.end(), VA,
[](uint64_t LHS, const std::pair<uint64_t, SectionRef> &RHS) {
return LHS < RHS.first;
});
if (Sec != SectionAddresses.begin())
--Sec;
else
Sec = SectionAddresses.end();
if (Sec != SectionAddresses.end())
AllSymbols[Sec->second].emplace_back(VA, Name, ELF::STT_NOTYPE);
}
}
// Sort all the symbols, this allows us to use a simple binary search to find
// a symbol near an address.
for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
array_pod_sort(SecSyms.second.begin(), SecSyms.second.end());
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
if ((!Section.isText() || Section.isVirtual()))
continue;
StringRef SectionName;
if (auto NameOrErr = Section.getName())
SectionName = *NameOrErr;
else
consumeError(NameOrErr.takeError());
uint64_t SectionAddr = Section.getAddress();
uint64_t SectSize = Section.getSize();
if (!SectSize)
continue;
// Get the list of all the symbols in this section.
SectionSymbolsTy &Symbols = AllSymbols[Section];
std::vector<uint64_t> DataMappingSymsAddr;
std::vector<uint64_t> TextMappingSymsAddr;
if (isArmElf(Obj)) {
for (const auto &Symb : Symbols) {
uint64_t Address = Symb.Addr;
StringRef Name = Symb.Name;
if (Name.startswith("$d"))
DataMappingSymsAddr.push_back(Address - SectionAddr);
if (Name.startswith("$x"))
TextMappingSymsAddr.push_back(Address - SectionAddr);
if (Name.startswith("$a"))
TextMappingSymsAddr.push_back(Address - SectionAddr);
if (Name.startswith("$t"))
TextMappingSymsAddr.push_back(Address - SectionAddr);
}
}
std::sort(DataMappingSymsAddr.begin(), DataMappingSymsAddr.end());
std::sort(TextMappingSymsAddr.begin(), TextMappingSymsAddr.end());
if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
// AMDGPU disassembler uses symbolizer for printing labels
std::unique_ptr<MCRelocationInfo> RelInfo(
TheTarget->createMCRelocationInfo(TripleName, Ctx));
if (RelInfo) {
std::unique_ptr<MCSymbolizer> Symbolizer(TheTarget->createMCSymbolizer(
TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
DisAsm->setSymbolizer(std::move(Symbolizer));
}
}
// Make a list of all the relocations for this section.
std::vector<RelocationRef> Rels;
if (InlineRelocs) {
for (const SectionRef &RelocSec : SectionRelocMap[Section]) {
for (const RelocationRef &Reloc : RelocSec.relocations()) {
Rels.push_back(Reloc);
}
}
}
// Sort relocations by address.
std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
// If the section has no symbol at the start, just insert a dummy one.
StringRef name;
if (Symbols.empty() || Symbols[0].Addr != 0) {
Symbols.insert(
Symbols.begin(),
SymbolInfoTy(SectionAddr, name,
Section.isText() ? ELF::STT_FUNC : ELF::STT_OBJECT));
}
SmallString<40> Comments;
raw_svector_ostream CommentStream(Comments);
StringRef BytesStr =
unwrapOrError(Section.getContents(), Obj->getFileName());
ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(BytesStr.data()),
BytesStr.size());
uint64_t Size;
uint64_t Index;
FunctionFilter *FuncFilter = moduleRaiser->getFunctionFilter();
auto FilterConfigFileName = FilterFunctionSet.getValue();
if (!FilterConfigFileName.empty()) {
if (!FuncFilter->readFilterFunctionConfigFile(FilterConfigFileName)) {
dbgs() << "Unable to read function filter configuration file "
<< FilterConfigFileName << ". Ignoring\n";
}
}
// Build a map of relocations (if they exist in the binary) of text
// section whose instructions are being raised.
moduleRaiser->collectTextSectionRelocs(Section);
// Set used to record all branch targets of a function.
std::set<uint64_t> branchTargetSet;
MachineFunctionRaiser *curMFRaiser = nullptr;
// Disassemble symbol by symbol.
LLVM_DEBUG(dbgs() << "BEGIN Disassembly of Functions in Section : "
<< SectionName.data() << "\n");
for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
uint64_t Start = Symbols[si].Addr - SectionAddr;
// The end is either the section end or the beginning of the next
// symbol.
uint64_t End =
(si == se - 1) ? SectSize : Symbols[si + 1].Addr - SectionAddr;
// Don't try to disassemble beyond the end of section contents.
if (End > SectSize)
End = SectSize;
// If this symbol has the same address as the next symbol, then skip it.
if (Start >= End)
continue;
// Check if we need to skip symbol
// Skip if the symbol's data is not between StartAddress and StopAddress
if (End + SectionAddr < StartAddress ||
Start + SectionAddr > StopAddress) {
continue;
}
// Stop disassembly at the stop address specified
if (End + SectionAddr > StopAddress)
End = StopAddress - SectionAddr;
if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
// make size 4 bytes folded
End = Start + ((End - Start) & ~0x3ull);
if (Symbols[si].Type == ELF::STT_AMDGPU_HSA_KERNEL) {
// skip amd_kernel_code_t at the begining of kernel symbol (256 bytes)
Start += 256;
}
if (si == se - 1 ||
Symbols[si + 1].Type == ELF::STT_AMDGPU_HSA_KERNEL) {
// cut trailing zeroes at the end of kernel
// cut up to 256 bytes
const uint64_t EndAlign = 256;
const auto Limit = End - (std::min)(EndAlign, End - Start);
while (End > Limit && *reinterpret_cast<const support::ulittle32_t *>(
&Bytes[End - 4]) == 0)
End -= 4;
}
}
if (isAFunctionSymbol(Obj, Symbols[si])) {
auto &SymStr = Symbols[si].Name;
bool raiseFuncSymbol = true;
if ((!FilterFunctionSet.getValue().empty())) {
// Check the symbol name whether it should be excluded or not.
// Check in a non-empty exclude list
if (!FuncFilter->isFilterSetEmpty(FunctionFilter::FILTER_EXCLUDE)) {
FunctionFilter::FuncInfo *FI = FuncFilter->findFuncInfoBySymbol(
SymStr, FunctionFilter::FILTER_EXCLUDE);
if (FI != nullptr) {
// Record the function start index.
FI->StartIdx = Start;
// Skip raising this function symbol
raiseFuncSymbol = false;
}
}
if (!FuncFilter->isFilterSetEmpty(FunctionFilter::FILTER_INCLUDE)) {
// Include list specified. Unless the current function symbol is
// specified in the include list, skip raising it.
raiseFuncSymbol = false;
// Check the symbol name whether it should be included or not.
if (FuncFilter->findFuncInfoBySymbol(
SymStr, FunctionFilter::FILTER_INCLUDE) != nullptr)
raiseFuncSymbol = true;
}
}
// If Symbol is in the ELFCRTSymbol list return this is a symbol of a
// function we are not interested in disassembling and raising.
if (ELFCRTSymbols.find(SymStr) != ELFCRTSymbols.end())
raiseFuncSymbol = false;
// Check if raising function symbol should be skipped
if (!raiseFuncSymbol)
continue;
// Note that since LLVM infrastructure was built to be used to build a
// conventional compiler pipeline, MachineFunction is built well after
// Function object was created and populated fully. Hence, creation of
// a Function object is necessary to build MachineFunction.
// However, in a raiser, we are conceptually walking the traditional
// compiler pipeline backwards. So we build MachineFunction from
// the binary before building Function object. Given the dependency,
// build a place holder Function object to allow for building the
// MachineFunction object.
// This Function object is NOT populated when raising MachineFunction
// abstraction of the binary function. Instead, a new Function is
// created using the LLVMContext and name of this Function object.
FunctionType *FTy = FunctionType::get(Type::getVoidTy(llvmCtx), false);
StringRef FunctionName(Symbols[si].Name);
// Strip leading underscore if the binary is MachO
if (Obj->isMachO()) {
FunctionName.consume_front("_");
}
Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
FunctionName, &module);
// New function symbol encountered. Record all targets collected to
// current MachineFunctionRaiser before we start parsing the new
// function bytes.
curMFRaiser = moduleRaiser->getCurrentMachineFunctionRaiser();
for (auto target : branchTargetSet) {
assert(curMFRaiser != nullptr &&
"Encountered unintialized MachineFunction raiser object");
curMFRaiser->getMCInstRaiser()->addTarget(target);
}
// Clear the set used to record all branch targets of this function.
branchTargetSet.clear();
// Create a new MachineFunction raiser
curMFRaiser = moduleRaiser->CreateAndAddMachineFunctionRaiser(
Func, moduleRaiser, Start, End);
LLVM_DEBUG(dbgs() << "\nFunction " << Symbols[si].Name << ":\n");
} else {
// Continue using to the most recent MachineFunctionRaiser
// Get current MachineFunctionRaiser
curMFRaiser = moduleRaiser->getCurrentMachineFunctionRaiser();
// assert(curMFRaiser != nullptr && "Current Machine Function Raiser not
// initialized");
if (curMFRaiser == nullptr) {
// At this point in the instruction stream, we do not have a function
// symbol to which the bytes being parsed can be made part of. So skip
// parsing the bytes of this symbol.
continue;
}
// Adjust function end to represent the addition of the content of the
// current symbol. This represents a situation where we have discovered
// bytes (most likely data bytes) that belong to the most recent
// function being parsed.
MCInstRaiser *mcInstRaiser = curMFRaiser->getMCInstRaiser();
if (mcInstRaiser->getFuncEnd() < End) {
assert(mcInstRaiser->adjustFuncEnd(End) &&
"Unable to adjust function end value");
}
}
// Get the associated MCInstRaiser
MCInstRaiser *mcInstRaiser = curMFRaiser->getMCInstRaiser();
// Start new basic block at the symbol.
branchTargetSet.insert(Start);
for (Index = Start; Index < End; Index += Size) {
MCInst Inst;
if (Index + SectionAddr < StartAddress ||
Index + SectionAddr > StopAddress) {
// skip byte by byte till StartAddress is reached
Size = 1;
continue;
}
// AArch64 ELF binaries can interleave data and text in the
// same section. We rely on the markers introduced to
// understand what we need to dump. If the data marker is within a
// function, it is denoted as a word/short etc
if (isArmElf(Obj) && Symbols[si].Type != ELF::STT_OBJECT) {
uint64_t Stride = 0;
auto DAI = std::lower_bound(DataMappingSymsAddr.begin(),
DataMappingSymsAddr.end(), Index);
if (DAI != DataMappingSymsAddr.end() && *DAI == Index) {
// Switch to data.
while (Index < End) {
if (Index + 4 <= End) {
Stride = 4;
uint32_t Data = 0;
if (Obj->isLittleEndian()) {
const auto Word =
reinterpret_cast<const support::ulittle32_t *>(
Bytes.data() + Index);
Data = *Word;
} else {
const auto Word = reinterpret_cast<const support::ubig32_t *>(
Bytes.data() + Index);
Data = *Word;
}
mcInstRaiser->addMCInstOrData(Index, Data);
} else if (Index + 2 <= End) {
Stride = 2;
uint16_t Data = 0;
if (Obj->isLittleEndian()) {
const auto Short =
reinterpret_cast<const support::ulittle16_t *>(
Bytes.data() + Index);
Data = *Short;
} else {
const auto Short =
reinterpret_cast<const support::ubig16_t *>(Bytes.data() +
Index);
Data = *Short;
}
mcInstRaiser->addMCInstOrData(Index, Data);
} else {
Stride = 1;
mcInstRaiser->addMCInstOrData(Index, Bytes.slice(Index, 1)[0]);
}
Index += Stride;
auto TAI = std::lower_bound(TextMappingSymsAddr.begin(),
TextMappingSymsAddr.end(), Index);
if (TAI != TextMappingSymsAddr.end() && *TAI == Index)
break;
}
}
}
// If there is a data symbol inside an ELF text section and we are
// only disassembling text, we are in a situation where we must print
// the data and not disassemble it.
// TODO : Get rid of the following code in the if-block.
if (Obj->isELF() && Symbols[si].Type == ELF::STT_OBJECT &&
Section.isText()) {
// parse data up to 8 bytes at a time
uint8_t AsciiData[9] = {'\0'};
uint8_t Byte;
int NumBytes = 0;
for (Index = Start; Index < End; Index += 1) {
if (((SectionAddr + Index) < StartAddress) ||
((SectionAddr + Index) > StopAddress))
continue;
if (NumBytes == 0) {
outs() << format("%8" PRIx64 ":", SectionAddr + Index);
outs() << "\t";
}
Byte = Bytes.slice(Index)[0];
outs() << format(" %02x", Byte);
AsciiData[NumBytes] = isprint(Byte) ? Byte : '.';
uint8_t IndentOffset = 0;
NumBytes++;
if (Index == End - 1 || NumBytes > 8) {
// Indent the space for less than 8 bytes data.
// 2 spaces for byte and one for space between bytes
IndentOffset = 3 * (8 - NumBytes);
for (int Excess = 8 - NumBytes; Excess < 8; Excess++)
AsciiData[Excess] = '\0';
NumBytes = 8;
}
if (NumBytes == 8) {
AsciiData[8] = '\0';
outs() << std::string(IndentOffset, ' ') << " ";
outs() << reinterpret_cast<char *>(AsciiData);
outs() << '\n';
NumBytes = 0;
}
}
}
if (Index >= End)
break;
// Disassemble a real instruction or a data
bool Disassembled = DisAsm->getInstruction(
Inst, Size, Bytes.slice(Index), SectionAddr + Index, CommentStream);
if (Size == 0)
Size = 1;
if (!Disassembled) {
errs() << "**** Warning: Failed to decode instruction\n";
PIP.printInst(*IP, Disassembled ? &Inst : nullptr,
Bytes.slice(Index, Size), SectionAddr + Index, outs(),
"", *STI);
outs() << CommentStream.str();
Comments.clear();
errs() << "\n";
}
// Add MCInst to the list if all instructions were decoded
// successfully till now. Else, do not bother adding since no attempt
// will be made to raise this function.
if (Disassembled) {
mcInstRaiser->addMCInstOrData(Index, Inst);
// Find branch target and record it. Call targets are not
// recorded as they are not needed to build per-function CFG.
if (MIA && MIA->isBranch(Inst)) {
uint64_t Target;
if (MIA->evaluateBranch(Inst, Index, Size, Target)) {
// In a relocatable object, the target's section must reside in
// the same section as the call instruction or it is accessed
// through a relocation.
//
// In a non-relocatable object, the target may be in any
// section.
//
// N.B. We don't walk the relocations in the relocatable case
// yet.
if (!Obj->isRelocatableObject()) {
auto SectionAddress = std::upper_bound(
SectionAddresses.begin(), SectionAddresses.end(), Target,
[](uint64_t LHS,
const std::pair<uint64_t, SectionRef> &RHS) {
return LHS < RHS.first;
});
if (SectionAddress != SectionAddresses.begin()) {
--SectionAddress;
}
}
// Add the index Target to target indices set.
branchTargetSet.insert(Target);
}
// Mark the next instruction as a target.
uint64_t fallThruIndex = Index + Size;
branchTargetSet.insert(fallThruIndex);
}
}
}
FuncFilter->eraseFunctionBySymbol(Symbols[si].Name,
FunctionFilter::FILTER_INCLUDE);
}
LLVM_DEBUG(dbgs() << "END Disassembly of Functions in Section : "
<< SectionName.data() << "\n");
// Record all targets of the last function parsed
curMFRaiser = moduleRaiser->getCurrentMachineFunctionRaiser();
for (auto target : branchTargetSet)
curMFRaiser->getMCInstRaiser()->addTarget(target);
moduleRaiser->runMachineFunctionPasses();
if (!FuncFilter->isFilterSetEmpty(FunctionFilter::FILTER_INCLUDE)) {
errs() << "***** WARNING: The following include filter symbol(s) are not "
"found :\n";
FuncFilter->dump(FunctionFilter::FILTER_INCLUDE);
}
}
// Add the pass manager
Triple TheTriple = Triple(TripleName);
// Decide where to send the output.
std::unique_ptr<ToolOutputFile> Out = GetOutputStream(Obj->getFileName());
if (!Out)
return;
// Keep the file created.
Out->keep();
raw_pwrite_stream *OS = &Out->os();
legacy::PassManager PM;
LLVMTargetMachine &LLVMTM = static_cast<LLVMTargetMachine &>(*Target);
if (RunPassNames->empty()) {
TargetPassConfig &TPC = *LLVMTM.createPassConfig(PM);
if (TPC.hasLimitedCodeGenPipeline()) {
errs() << ToolName << ": run-pass cannot be used with "
<< TPC.getLimitedCodeGenPipelineReason(" and ") << ".\n";
return;
}
TPC.setDisableVerify(NoVerify);
PM.add(&TPC);
PM.add(machineModuleInfo);
// Add optimizations prior to emitting the output file.
PM.add(new PeepholeOptimizationPass());
// Add print pass to emit ouptut file.
PM.add(new EmitRaisedOutputPass(*OS, OutputFormat));
TPC.printAndVerify("");
for (const std::string &RunPassName : *RunPassNames) {
if (addPass(PM, ToolName, RunPassName, TPC))
return;
}
TPC.setInitialized();
} else if (Target->addPassesToEmitFile(
PM, *OS, nullptr, /* no dwarf output file stream*/
OutputFormat, NoVerify, machineModuleInfo)) {
outs() << ToolName << "run system pass!\n";
}
cl::PrintOptionValues();
PM.run(module);
}