tools/hbcdump/hbcdump.cpp (479 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "HBCParser.h"
#include "ProfileAnalyzer.h"
#include "hermes/BCGen/HBC/BytecodeDisassembler.h"
#include "hermes/Public/Buffer.h"
#include "hermes/SourceMap/SourceMapParser.h"
#include "hermes/Support/MemoryBuffer.h"
#include "llvh/ADT/SmallVector.h"
#include "llvh/Support/CommandLine.h"
#include "llvh/Support/FileSystem.h"
#include "llvh/Support/InitLLVM.h"
#include "llvh/Support/MemoryBuffer.h"
#include "llvh/Support/PrettyStackTrace.h"
#include "llvh/Support/Signals.h"
#include "llvh/Support/raw_ostream.h"
#include <iostream>
#include <map>
#include <sstream>
#include <string>
using namespace hermes;
using namespace hermes::hbc;
using llvh::raw_fd_ostream;
static llvh::cl::opt<std::string> InputFilename(
llvh::cl::Positional,
llvh::cl::desc("input file"),
llvh::cl::Required);
static llvh::cl::opt<std::string> DumpOutputFilename(
"out",
llvh::cl::desc("Output file name"));
static llvh::cl::opt<std::string> SourceMapFilename(
"source-map",
llvh::cl::desc("Optional source-map file name, used by function-info"));
static llvh::cl::opt<std::string> StartupCommands(
"c",
llvh::cl::desc(
"A list of commands to execute before entering "
"interactive mode separated by semicolon. "
"You can use this option to execute a bunch of commands "
"without entering interactive mode, like -c \"cmd1;cmd2;quit\""));
enum class DisassemblyFormat {
Raw,
Pretty,
Objdump,
};
static llvh::cl::opt<DisassemblyFormat> DisassemblyOutputFormat(
llvh::cl::desc("Disassembly formatting:"),
llvh::cl::init(DisassemblyFormat::Pretty),
llvh::cl::values(
clEnumValN(DisassemblyFormat::Raw, "raw-disassemble", "Legacy format"),
clEnumValN(
DisassemblyFormat::Pretty,
"pretty-disassemble",
"Pretty print"),
clEnumValN(
DisassemblyFormat::Objdump,
"objdump-disassemble",
"Like objdump")));
static llvh::cl::opt<bool> ListOpCodes(
"list-opcodes",
llvh::cl::init(false),
llvh::cl::desc("For objdump format, also include a list of all opcodes"));
static llvh::cl::opt<std::string> AnalyzeMode(
"mode",
llvh::cl::desc(
"The analysis mode you want to use(either instruction or function)"));
static llvh::cl::opt<std::string> ProfileFile(
"profile-file",
llvh::cl::desc(
"Log file in json format generated by basic block profiler"));
static llvh::cl::opt<bool> ShowSectionRanges(
"show-section-ranges",
llvh::cl::init(false),
llvh::cl::desc("Show the byte range of each section in bytecode"));
static llvh::cl::opt<bool> HumanizeSectionRanges(
"human",
llvh::cl::init(false),
llvh::cl::desc("Print bytecode section ranges in hex format"));
static bool executeCommand(
llvh::raw_ostream &os,
ProfileAnalyzer &analyzer,
BytecodeDisassembler &disassembler,
const std::string &commandWithOptions);
/// Wrapper around std::getline().
/// Read a line from cin, storing it into \p line.
/// \return true if we have a line, false if input was exhausted.
static bool getline(std::string &line) {
for (;;) {
// On receiving EINTR, getline() in libc++ appears to incorrectly mark
// cin's EOF bit. This means that sucessive getline() calls will return
// EOF. Workaround this iostream bug by clearing the cin flags on EINTR.
errno = 0;
if (std::getline(std::cin, line)) {
return true;
} else if (errno == EINTR) {
std::cin.clear();
} else {
// Input exhausted.
return false;
}
}
}
static void printHelp(llvh::Optional<llvh::StringRef> command = llvh::None) {
// Declare variables for help text.
static const std::unordered_map<std::string, std::string> commandToHelpText = {
{"function",
"'function': Compute the runtime instruction frequency "
"for each function and display in desceding order."
"Each function name is displayed together with its source code line number.\n\n"
"'function <FUNC_ID>': Dump basic block stats for function with id <FUNC_ID>.\n\n"
"'function -used': List all invoked function IDs, one per line.\n\n"
"USAGE: function [<FUNC_ID> | -used]\n"
" fun [<FUNC_ID> | -used]\n"},
{"instruction",
"Computes the runtime instruction frequency for each instruction"
"and displays it in descending order.\n\n"
"USAGE: instruction\n"
" inst\n"},
{"disassemble",
"'disassemble': Display bytecode disassembled output of whole binary.\n"
"'disassemble <FUNC_ID>': Display bytecode disassembled output of function with id <FUNC_ID>.\n"
"Add the '-offsets' flag to show virtual offsets for all instructions.\n\n"
"USAGE: disassemble <FUNC_ID> [-offsets]\n"
" dis <FUNC_ID> [-offsets]\n"},
{"summary",
"Display overall summary information.\n\n"
"USAGE: summary\n"},
{"io",
"Visualize function page I/O access working set"
"in basic block profile trace.\n\n"
"USAGE: io\n"},
{"block",
"Display top hot basic blocks in sorted order.\n\n"
"USAGE: block\n"},
{"at-virtual",
"Display information about the function at a given virtual offset.\n\n"
"USAGE: at-virtual <OFFSET>\n"},
{"at-offset",
"Display information about the function at a given file offset.\n\n"
"USAGE: at-offset <OFFSET>\n"},
{"help",
"Help instructions for hbcdump tool commands.\n\n"
"USAGE: help <COMMAND>\n"
" h <COMMAND>\n"},
{"function-info",
"Display info about a specific function, or all functions\n\n"
"USAGE: function-info [<FUNC_ID>]\n"
"NOTE: Virtual offset is the offset from the beginning of the segment\n"},
{"string",
"Display string for ID\n\n"
"USAGE: string <STRING_ID>\n"},
{"filename",
"Display file name for ID\n\n"
"USAGE: filename <FILENAME_ID>\n"},
{"epilogue",
"Dump the epilogue.\n\n"
"USAGE: epilogue\n"},
};
if (command.hasValue() && !command->empty()) {
const auto it = commandToHelpText.find(*command);
if (it == commandToHelpText.end()) {
llvh::outs() << "Invalid command: " << *command << '\n';
return;
}
llvh::outs() << it->second;
} else {
static const std::string topLevelHelpText =
"These commands are defined internally. Type `help' to see this list.\n"
"Type `help name' to find out more about the function `name'.\n\n";
llvh::outs() << topLevelHelpText;
for (const auto &it : commandToHelpText) {
llvh::outs() << it.first << '\n';
}
}
}
/// Enters interactive command loop.
static void enterCommandLoop(
llvh::raw_ostream &os,
std::shared_ptr<hbc::BCProvider> bcProvider,
llvh::Optional<std::unique_ptr<llvh::MemoryBuffer>> profileBufferOpt,
std::unique_ptr<SourceMap> &&sourceMap,
const std::vector<std::string> &startupCommands) {
BytecodeDisassembler disassembler(bcProvider);
// Include source information and func IDs by default in disassembly output.
DisassemblyOptions options = DisassemblyOptions::IncludeSource |
DisassemblyOptions::IncludeFunctionIds;
switch (DisassemblyOutputFormat) {
case DisassemblyFormat::Raw:
break;
case DisassemblyFormat::Pretty:
options = options | DisassemblyOptions::Pretty;
break;
case DisassemblyFormat::Objdump:
options = options | DisassemblyOptions::Objdump;
break;
}
if (ListOpCodes) {
assert(
DisassemblyOutputFormat == DisassemblyFormat::Objdump &&
"only supported for objdump format");
options = options | DisassemblyOptions::IncludeOpCodeList;
}
disassembler.setOptions(options);
ProfileAnalyzer analyzer(
os,
bcProvider,
profileBufferOpt.hasValue()
? llvh::Optional<std::unique_ptr<llvh::MemoryBuffer>>(
std::move(profileBufferOpt.getValue()))
: llvh::None,
std::move(sourceMap));
// Process startup commands.
bool terminateLoop = false;
for (const auto &command : startupCommands) {
if (executeCommand(os, analyzer, disassembler, command)) {
terminateLoop = true;
}
}
while (!terminateLoop) {
os << "hbcdump> ";
std::string line;
if (!getline(line)) {
break;
}
terminateLoop = executeCommand(os, analyzer, disassembler, line);
}
}
/// Find the first instance of a value in a container and remove it.
/// \return true if the value was found and removed, false otherwise.
template <typename Container, typename Value>
static bool findAndRemoveOne(Container &haystack, const Value &needle) {
auto it = std::find(haystack.begin(), haystack.end(), needle);
if (it != haystack.end()) {
haystack.erase(it);
return true;
}
return false;
}
/// Simple RAII helper for setting and reverting disassembler options.
class DisassemblerOptionsHolder {
public:
DisassemblerOptionsHolder(
BytecodeDisassembler &disassembler,
DisassemblyOptions newOptions)
: disassembler_(disassembler), savedOptions_(disassembler.getOptions()) {
disassembler_.setOptions(newOptions);
}
~DisassemblerOptionsHolder() {
disassembler_.setOptions(savedOptions_);
}
private:
BytecodeDisassembler &disassembler_;
DisassemblyOptions savedOptions_;
};
/// Execute a single command from \p commandTokens.
/// \return true telling caller to terminate the interactive command loop.
static bool executeCommand(
llvh::raw_ostream &os,
ProfileAnalyzer &analyzer,
BytecodeDisassembler &disassembler,
const std::string &commandWithOptions) {
// Parse command tokens.
llvh::SmallVector<llvh::StringRef, 8> commandTokens;
llvh::StringRef(commandWithOptions).split(commandTokens, ' ');
if (commandTokens.empty()) {
// Ignore empty input.
return false;
}
const llvh::StringRef command = commandTokens[0];
if (command == "function" || command == "fun") {
if (findAndRemoveOne(commandTokens, "-used")) {
analyzer.dumpUsedFunctionIDs();
} else if (commandTokens.size() == 1) {
analyzer.dumpFunctionStats();
} else if (commandTokens.size() == 2) {
uint32_t funcId;
if (commandTokens[1].getAsInteger(0, funcId)) {
os << "Error: cannot parse func_id as integer.\n";
return false;
}
analyzer.dumpFunctionBasicBlockStat(funcId);
} else {
printHelp(command);
return false;
}
} else if (command == "instruction" || command == "inst") {
if (commandTokens.size() == 1) {
analyzer.dumpInstructionStats();
} else {
printHelp(command);
return false;
}
} else if (command == "disassemble" || command == "dis") {
auto localOptions = findAndRemoveOne(commandTokens, "-offsets")
? DisassemblyOptions::IncludeVirtualOffsets
: DisassemblyOptions::None;
DisassemblerOptionsHolder optionsHolder(
disassembler, disassembler.getOptions() | localOptions);
if (commandTokens.size() == 1) {
disassembler.disassemble(os);
} else if (commandTokens.size() == 2) {
uint32_t funcId;
if (commandTokens[1].getAsInteger(0, funcId)) {
os << "Error: cannot parse func_id as integer.\n";
return false;
}
if (funcId >= disassembler.getFunctionCount()) {
os << "Error: no function with id: " << funcId << " exists.\n";
return false;
}
disassembler.disassembleFunction(funcId, os);
} else {
printHelp(command);
return false;
}
} else if (command == "string" || command == "str") {
if (commandTokens.size() != 2) {
printHelp(command);
return false;
}
uint32_t stringId;
if (commandTokens[1].getAsInteger(0, stringId)) {
os << "Error: cannot parse string_id as integer.\n";
return false;
}
analyzer.dumpString(stringId);
} else if (command == "filename") {
if (commandTokens.size() != 2) {
printHelp(command);
return false;
}
uint32_t filenameId;
if (commandTokens[1].getAsInteger(0, filenameId)) {
os << "Error: cannot parse filename_id as integer.\n";
return false;
}
analyzer.dumpFileName(filenameId);
} else if (command == "function-info") {
JSONEmitter json(os, /* pretty */ true);
if (commandTokens.size() == 1) {
analyzer.dumpAllFunctionInfo(json);
} else if (commandTokens.size() == 2) {
uint32_t funcId;
if (commandTokens[1].getAsInteger(0, funcId)) {
os << "Error: cannot parse func_id as integer.\n";
return false;
}
analyzer.dumpFunctionInfo(funcId, json);
} else {
printHelp(command);
return false;
}
} else if (command == "io") {
analyzer.dumpIO();
} else if (command == "summary" || command == "sum") {
analyzer.dumpSummary();
} else if (command == "block") {
analyzer.dumpBasicBlockStats();
} else if (command == "at_virtual" || command == "at-virtual") {
JSONEmitter json(os, /* pretty */ true);
if (commandTokens.size() == 2) {
uint32_t virtualOffset;
if (commandTokens[1].getAsInteger(0, virtualOffset)) {
os << "Error: cannot parse virtualOffset as integer.\n";
return false;
}
auto funcId = analyzer.getFunctionFromVirtualOffset(virtualOffset);
if (funcId.hasValue()) {
analyzer.dumpFunctionInfo(*funcId, json);
} else {
os << "Virtual offset " << virtualOffset << " is invalid.\n";
}
} else {
printHelp(command);
return false;
}
} else if (command == "at_offset" || command == "at-offset") {
JSONEmitter json(os, /* pretty */ true);
if (commandTokens.size() == 2) {
uint32_t offset;
if (commandTokens[1].getAsInteger(0, offset)) {
os << "Error: cannot parse offset as integer.\n";
return false;
}
auto funcId = analyzer.getFunctionFromOffset(offset);
if (funcId.hasValue()) {
analyzer.dumpFunctionInfo(*funcId, json);
} else {
os << "Offset " << offset << " is invalid.\n";
}
} else {
printHelp(command);
return false;
}
} else if (command == "epilogue" || command == "epi") {
analyzer.dumpEpilogue();
} else if (command == "help" || command == "h") {
// Interactive help command.
if (commandTokens.size() == 2) {
printHelp(commandTokens[1]);
} else {
printHelp();
}
return false;
} else if (command == "quit") {
// Quit command loop.
return true;
} else {
printHelp(command);
return false;
}
os << "\n";
return false;
}
int main(int argc, char **argv) {
#ifndef HERMES_FBCODE_BUILD
// Normalize the arg vector.
llvh::InitLLVM initLLVM(argc, argv);
#else
// When both HERMES_FBCODE_BUILD and sanitizers are enabled, InitLLVM may have
// been already created and destroyed before main() is invoked. This presents
// a problem because InitLLVM can't be instantiated more than once in the same
// process. The most important functionality InitLLVM provides is shutting
// down LLVM in its destructor. We can use "llvm_shutdown_obj" to do the same.
llvh::llvm_shutdown_obj Y;
#endif
llvh::cl::ParseCommandLineOptions(argc, argv, "Hermes bytecode dump tool\n");
llvh::ErrorOr<std::unique_ptr<llvh::MemoryBuffer>> fileBufOrErr =
llvh::MemoryBuffer::getFile(InputFilename);
if (!fileBufOrErr) {
llvh::errs() << "Error: fail to open file: " << InputFilename << ": "
<< fileBufOrErr.getError().message() << "\n";
return -1;
}
auto buffer =
std::make_unique<hermes::MemoryBuffer>(fileBufOrErr.get().get());
const uint8_t *bytecodeStart = buffer->data();
auto ret =
hbc::BCProviderFromBuffer::createBCProviderFromBuffer(std::move(buffer));
if (!ret.first) {
llvh::errs() << "Error: fail to deserializing bytecode: " << ret.second;
return 1;
}
// Parse startup commands list(separated by semicolon).
std::vector<std::string> startupCommands;
if (!StartupCommands.empty()) {
std::istringstream iss(StartupCommands.data());
std::string command;
while (getline(iss, command, ';')) {
startupCommands.emplace_back(command);
}
}
llvh::Optional<raw_fd_ostream> fileOS;
if (!DumpOutputFilename.empty()) {
std::error_code EC;
fileOS.emplace(DumpOutputFilename.data(), EC, llvh::sys::fs::F_Text);
if (EC) {
llvh::errs() << "Error: fail to open file " << DumpOutputFilename << ": "
<< EC.message() << '\n';
return -1;
}
}
auto &output = fileOS ? *fileOS : llvh::outs();
std::unique_ptr<SourceMap> sourceMap;
if (!SourceMapFilename.empty()) {
llvh::ErrorOr<std::unique_ptr<llvh::MemoryBuffer>> sourceMapBufOrErr =
llvh::MemoryBuffer::getFile(SourceMapFilename);
if (!sourceMapBufOrErr) {
llvh::errs() << "Error: fail to open file: " << SourceMapFilename << ": "
<< sourceMapBufOrErr.getError().message() << "\n";
return -1;
}
SourceErrorManager sm;
sourceMap = SourceMapParser::parse(*sourceMapBufOrErr.get().get(), sm);
if (!sourceMap) {
llvh::errs() << "Error loading source map: " << SourceMapFilename << "\n";
return -1;
}
}
if (ProfileFile.empty()) {
if (ShowSectionRanges) {
BytecodeSectionWalker walker(bytecodeStart, std::move(ret.first), output);
walker.printSectionRanges(HumanizeSectionRanges);
} else {
enterCommandLoop(
output,
std::move(ret.first),
llvh::None,
std::move(sourceMap),
startupCommands);
}
} else {
llvh::ErrorOr<std::unique_ptr<llvh::MemoryBuffer>> profileBuffer =
llvh::MemoryBuffer::getFile(ProfileFile);
if (!profileBuffer) {
llvh::errs() << "Error: fail to open file: " << ProfileFile
<< profileBuffer.getError().message() << "\n";
return -1;
}
enterCommandLoop(
output,
std::move(ret.first),
std::move(profileBuffer.get()),
std::move(sourceMap),
startupCommands);
}
return 0;
}