lib/BCGen/HBC/BytecodeDisassembler.cpp (993 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "hermes/BCGen/HBC/BytecodeDisassembler.h"
#include "hermes/BCGen/HBC/Bytecode.h"
#include "hermes/BCGen/HBC/SerializedLiteralGenerator.h"
#include "hermes/FrontEndDefs/Builtins.h"
#include "hermes/Support/JenkinsHash.h"
#include "hermes/Support/RegExpSerialization.h"
#include "hermes/Support/SHA1.h"
#include <cstdint>
#include <iomanip>
#include <locale>
#include <string>
#include "llvh/Support/Endian.h"
#include "llvh/Support/ErrorHandling.h"
using namespace hermes::inst;
using SLG = hermes::hbc::SerializedLiteralGenerator;
namespace hermes {
namespace hbc {
using param_t = int64_t;
/// Check if the zero based \p operandIndex in instruction \p opCode is a
/// string table ID.
static bool isOperandStringID(OpCode opCode, unsigned operandIndex) {
#define OPERAND_STRING_ID(name, operandNumber) \
if (opCode == OpCode::name && operandIndex == operandNumber - 1) \
return true;
#include "hermes/BCGen/HBC/BytecodeList.def"
return false;
}
std::pair<int, SLG::TagType> checkBufferTag(const unsigned char *buff) {
auto keyTag = buff[0];
if (keyTag & 0x80) {
return std::pair<int, SLG::TagType>{
((keyTag & 0x0f) << 8) | (buff[1]), keyTag & SLG::TagMask};
} else {
return std::pair<int, SLG::TagType>{keyTag & 0x0f, keyTag & SLG::TagMask};
}
}
namespace {
std::string SLPToString(SLG::TagType tag, const unsigned char *buff, int *ind) {
std::string rBracket{"]"};
switch (tag) {
case SLG::ByteStringTag: {
uint8_t val = llvh::support::endian::read<uint8_t, 1>(
buff + *ind, llvh::support::endianness::little);
*ind += 1;
return std::string("[String ") + std::to_string(val) + rBracket;
}
case SLG::ShortStringTag: {
uint16_t val = llvh::support::endian::read<uint16_t, 1>(
buff + *ind, llvh::support::endianness::little);
*ind += 2;
return std::string("[String ") + std::to_string(val) + rBracket;
}
case SLG::LongStringTag: {
uint32_t val = llvh::support::endian::read<uint32_t, 1>(
buff + *ind, llvh::support::endianness::little);
*ind += 4;
return std::string("[String ") + std::to_string(val) + rBracket;
}
case SLG::NumberTag: {
double val = llvh::support::endian::read<double, 1>(
buff + *ind, llvh::support::endianness::little);
*ind += 8;
return std::string("[double ") + std::to_string(val) + rBracket;
}
case SLG::IntegerTag: {
uint32_t val = llvh::support::endian::read<uint32_t, 1>(
buff + *ind, llvh::support::endianness::little);
*ind += 4;
return std::string("[int ") + std::to_string(val) + rBracket;
}
case SLG::NullTag:
return "null";
case SLG::TrueTag:
return "true";
case SLG::FalseTag:
return "false";
}
return "empty";
}
const char *stringKindTag(StringKind::Kind kind) {
switch (kind) {
case StringKind::String:
return "s";
case StringKind::Identifier:
return "i";
}
llvm_unreachable("Unrecognised String Kind.");
}
} // namespace
void BytecodeDisassembler::disassembleBytecodeFileHeader(raw_ostream &OS) {
const auto bcopts = bcProvider_->getBytecodeOptions();
OS << "Bytecode File Information:\n";
// If the version number in the bytecode file differs from this, the bytecode
// provider would have exited with an error message.
OS << " Bytecode version number: " << hbc::BYTECODE_VERSION << "\n";
OS << " Source hash: " << hashAsString(bcProvider_->getSourceHash()) << "\n";
OS << " Function count: " << bcProvider_->getFunctionCount() << "\n";
OS << " String count: " << bcProvider_->getStringCount() << "\n";
OS << " String Kind Entry count: " << bcProvider_->getStringKinds().size()
<< "\n";
OS << " RegExp count: " << bcProvider_->getRegExpTable().size() << "\n";
OS << " Segment ID: " << bcProvider_->getSegmentID() << "\n";
OS << " CommonJS module count: " << bcProvider_->getCJSModuleTable().size()
<< "\n";
OS << " CommonJS module count (static): "
<< bcProvider_->getCJSModuleTableStatic().size() << "\n";
OS << " Function source count: "
<< bcProvider_->getFunctionSourceTable().size() << "\n";
OS << " Bytecode options:\n";
OS << " staticBuiltins: " << bcopts.staticBuiltins << "\n";
OS << " cjsModulesStaticallyResolved: "
<< bcopts.cjsModulesStaticallyResolved << "\n";
OS << "\n";
}
void BytecodeDisassembler::disassembleStringStorage(raw_ostream &OS) {
auto strStorage = bcProvider_->getStringStorage();
auto hashes = bcProvider_->getIdentifierHashes();
const auto strCount = bcProvider_->getStringCount();
const auto hashCount = hashes.size();
if (strCount == 0)
return;
auto kinds = bcProvider_->getStringKinds();
uint32_t strID = 0;
uint32_t hashID = 0;
OS << "Global String Table:\n";
const std::locale loc("C");
for (auto kindEntry : kinds) {
for (uint32_t i = 0; i < kindEntry.count(); ++i, ++strID) {
auto strEntry = bcProvider_->getStringTableEntry(strID);
OS << stringKindTag(kindEntry.kind()) << strID << "[";
uint32_t offset = strEntry.getOffset();
uint32_t length = strEntry.getLength();
if (strEntry.isUTF16()) {
OS << "UTF-16";
length *= 2;
} else {
OS << "ASCII";
}
int64_t end = static_cast<int64_t>(offset) + length - 1;
OS << ", " << offset << ".." << end << "]";
switch (kindEntry.kind()) {
case StringKind::Identifier:
OS << " #"
<< llvh::format_hex_no_prefix(
hashes[hashID++], 8, /* Upper */ true);
break;
default:
break;
}
OS << ": ";
for (unsigned j = 0; j < length; ++j) {
unsigned char c = strStorage[offset + j];
if (!strEntry.isUTF16() && isprint((char)c, loc)) {
OS << c;
} else {
OS << "\\x" << llvh::format_hex_no_prefix(c, 2, true);
}
}
OS << "\n";
}
}
OS << "\n";
assert(strID == strCount && "Visited all strings.");
(void)strCount;
assert(hashID == hashCount && "Visited all hashes.");
(void)hashCount;
}
/// NOTE: The output might not show the value of every literal used
/// by NewArrayWithBuffer (explained in serializeBuffer's header).
void BytecodeDisassembler::disassembleArrayBuffer(raw_ostream &OS) {
auto arrayBuffer = bcProvider_->getArrayBuffer();
if (arrayBuffer.size() == 0)
return;
OS << "Array Buffer:\n";
int ind = 0;
while ((size_t)ind < arrayBuffer.size()) {
std::pair<int, SLG::TagType> tag = checkBufferTag(arrayBuffer.data() + ind);
ind += (tag.first > 0x0f ? 2 : 1);
for (int i = 0; i < tag.first; i++) {
OS << SLPToString(tag.second, arrayBuffer.data(), &ind) << "\n";
}
}
}
/// NOTE: The output might not show the value of every literal used
/// by NewObjectWithBuffer (explained in serializeBuffer's header).
void BytecodeDisassembler::disassembleObjectBuffer(raw_ostream &OS) {
auto objKeyBuffer = bcProvider_->getObjectKeyBuffer();
auto objValueBuffer = bcProvider_->getObjectValueBuffer();
if (objKeyBuffer.size() == 0)
return;
int keyInd = 0;
int valInd = 0;
OS << "Object Key Buffer:\n";
while ((size_t)keyInd < objKeyBuffer.size()) {
std::pair<int, SLG::TagType> keyTag =
checkBufferTag(objKeyBuffer.data() + keyInd);
keyInd += (keyTag.first > 0x0f ? 2 : 1);
for (int i = 0; i < keyTag.first; i++) {
OS << SLPToString(keyTag.second, objKeyBuffer.data(), &keyInd) << "\n";
}
}
OS << "Object Value Buffer:\n";
while ((size_t)valInd < objValueBuffer.size()) {
std::pair<int, SLG::TagType> valTag =
checkBufferTag(objValueBuffer.data() + valInd);
valInd += (valTag.first > 0x0f ? 2 : 1);
for (int i = 0; i < valTag.first; i++) {
OS << SLPToString(valTag.second, objValueBuffer.data(), &valInd) << "\n";
}
}
}
void BytecodeDisassembler::disassembleCJSModuleTable(raw_ostream &OS) {
auto cjsModules = bcProvider_->getCJSModuleTable();
if (!cjsModules.empty()) {
OS << "CommonJS Modules:\n";
for (const auto &pair : cjsModules) {
OS << " File ID " << pair.first << " -> function ID " << pair.second
<< '\n';
}
OS << '\n';
}
auto cjsModulesStatic = bcProvider_->getCJSModuleTableStatic();
if (!cjsModulesStatic.empty()) {
OS << "CommonJS Modules (Static):\n";
for (uint32_t i = 0; i < cjsModulesStatic.size(); ++i) {
uint32_t moduleID = cjsModulesStatic[i].first;
uint32_t functionID = cjsModulesStatic[i].second;
OS << "Module ID " << moduleID << " -> function ID " << functionID
<< '\n';
}
OS << '\n';
}
}
void BytecodeDisassembler::disassembleFunctionSourceTable(raw_ostream &OS) {
auto functionSources = bcProvider_->getFunctionSourceTable();
if (!functionSources.empty()) {
OS << "Function Source Table:\n";
for (const auto &pair : functionSources) {
OS << " Function ID " << pair.first << " -> s" << pair.second << '\n';
}
OS << '\n';
}
}
void BytecodeDisassembler::disassembleExceptionHandlers(
unsigned funcId,
raw_ostream &OS) {
auto funcExceptionHandlers = bcProvider_->getExceptionTable(funcId);
if (funcExceptionHandlers.size() == 0)
return;
OS << "Exception Handlers:\n";
for (unsigned i = 0, e = funcExceptionHandlers.size(); i < e; ++i) {
const auto &entry = funcExceptionHandlers[i];
OS << i << ": start = " << entry.start << ", end = " << entry.end
<< ", target = " << entry.target << "\n";
}
OS << "\n";
}
void BytecodeDisassembler::disassembleExceptionHandlersPretty(
unsigned funcId,
const JumpTargetsTy &jumpTargets,
raw_ostream &OS) {
auto funcExceptionHandlers = bcProvider_->getExceptionTable(funcId);
if (funcExceptionHandlers.size() == 0)
return;
const uint8_t *bytecodeStart = bcProvider_->getBytecode(funcId);
OS << "Exception Handlers:\n";
for (unsigned i = 0, e = funcExceptionHandlers.size(); i < e; ++i) {
const auto &entry = funcExceptionHandlers[i];
OS << i << ": start = L" << jumpTargets.at(bytecodeStart + entry.start)
<< ", end = L" << jumpTargets.at(bytecodeStart + entry.end)
<< ", target = L" << jumpTargets.at(bytecodeStart + entry.target)
<< "\n";
}
OS << "\n";
}
namespace {
/// Given a SwitchImm instruction, loop through each entry of the associated
/// jump table.
/// F: (current index into primary jump table, jump target offset, destination
/// instruction) -> void.
template <typename F>
void switchJumpTableForEach(const inst::Inst *inst, F f) {
assert(inst->opCode == inst::OpCode::SwitchImm && "expected SwitchImm");
unsigned start = inst->iSwitchImm.op4;
unsigned end = inst->iSwitchImm.op5;
assert(start < end);
unsigned numberOfEntries = end - start;
/// Get the current SwitchImm instruction's subview [start, end] start pointer
/// from primary jump table. This is the same computation done by the
/// interpreter to figure out the start of the jump table view.
const auto *curJmpTableView =
reinterpret_cast<const uint32_t *>(llvh::alignAddr(
(const uint8_t *)inst + inst->iSwitchImm.op2, sizeof(uint32_t)));
for (unsigned curJmpTableViewOffset = 0;
curJmpTableViewOffset <= numberOfEntries;
curJmpTableViewOffset++) {
auto jumpTargetOffset = curJmpTableView[curJmpTableViewOffset];
f(curJmpTableViewOffset + start,
jumpTargetOffset,
(const uint8_t *)inst + jumpTargetOffset);
}
}
} // namespace
void BytecodeVisitor::visitInstructionsInFunction(unsigned funcId) {
funcId_ = funcId;
RuntimeFunctionHeader functionHeader = bcProvider_->getFunctionHeader(funcId);
const uint8_t *bytecodeStart = bcProvider_->getBytecode(funcId);
const uint8_t *bytecodeEnd =
bytecodeStart + functionHeader.bytecodeSizeInBytes();
beforeStart(funcId, bytecodeStart);
visitInstructionsInBody(
bytecodeStart, bytecodeEnd, /* visitSwitchImmTargets = */ true);
afterStart();
} // namespace hbc
void BytecodeVisitor::visitInstructionsInBody(
const uint8_t *bytecodeStart,
const uint8_t *bytecodeEnd,
bool visitSwitchImmTargets) {
auto ip = bytecodeStart;
while (ip < bytecodeEnd) {
const auto md = inst::getInstMetaData(
(reinterpret_cast<const inst::Inst *>(ip))->opCode);
OpCode op = md.opCode;
auto instLength = md.size;
preVisitInstruction(md.opCode, ip, instLength);
// Visit branch targets of the SwitchImm instruction.
if (op == OpCode::SwitchImm && visitSwitchImmTargets) {
switchJumpTableForEach(
(inst::Inst const *)ip,
[this](uint32_t jmpIdx, int32_t offset, const uint8_t *dest) {
this->visitSwitchImmTargets(jmpIdx, offset, dest);
});
}
const uint8_t *operandBuf = ip + sizeof(op);
int operandCount = md.numOperands;
for (int operandIndex = 0; operandIndex < operandCount; operandIndex++) {
auto operandType = md.operandType[operandIndex];
visitOperand(ip, operandType, operandBuf, operandIndex);
operandBuf += getOperandSize(operandType);
}
postVisitInstruction(op, ip, instLength);
ip += instLength;
}
}
class BytecodeHasher : public BytecodeVisitor {
protected:
uint32_t hash_{0};
bool useStrings_;
bool useIntConstants_;
uint8_t opcode_{0xff};
void preVisitInstruction(OpCode opcode, const uint8_t *ip, int length)
override {
hash_ = updateJenkinsHash(hash_, opcode);
opcode_ = static_cast<uint8_t>(opcode);
}
void visitOperand(
const uint8_t *ip,
OperandType operandType,
const uint8_t *operandBuf,
int operandIndex) override {
const bool isStringID =
isOperandStringID(static_cast<OpCode>(opcode_), operandIndex);
switch (operandType) {
#define DEFINE_OPERAND_TYPE(name, ctype) \
case OperandType::name: { \
ctype operandVal; \
decodeOperand(operandBuf, &operandVal); \
if (useStrings_ && isStringID) { \
hashOperandString(operandVal); \
} else if ( \
useIntConstants_ && \
(operandType == OperandType::Imm32 || \
opcode_ == (uint8_t)OpCode::LoadConstUInt8)) { \
hashImmediate(operandVal); \
} \
break; \
}
#include "hermes/BCGen/HBC/BytecodeList.def"
}
}
void hashOperandString(StringID stringID) {
auto strStorage = bcProvider_->getStringStorage();
auto entry = bcProvider_->getStringTableEntry(stringID);
auto stringBegin = strStorage.begin() + entry.getOffset();
auto stringEnd = stringBegin + entry.getLength();
if (entry.isUTF16()) {
for (auto from = stringBegin; from < stringEnd; from += 2) {
hash_ =
updateJenkinsHash(hash_, *reinterpret_cast<const char16_t *>(from));
}
} else {
for (auto from = stringBegin; from < stringEnd; ++from) {
hash_ = updateJenkinsHash(hash_, *from);
}
}
}
void hashImmediate(uint32_t imm) {
while (imm) {
hash_ = updateJenkinsHash(hash_, static_cast<uint8_t>(imm));
imm >>= 8;
}
}
public:
BytecodeHasher(
std::shared_ptr<hbc::BCProvider> bcProvider,
bool useStrings,
bool useIntConstants)
: BytecodeVisitor(bcProvider),
useStrings_(useStrings),
useIntConstants_(useIntConstants) {}
uint32_t getHash() const {
return hash_;
}
};
uint32_t BytecodeDisassembler::fuzzyHashBytecode(
unsigned funcId,
bool useStrings,
bool useIntConstants) {
BytecodeHasher hasher(bcProvider_, useStrings, useIntConstants);
hasher.visitInstructionsInFunction(funcId);
return hasher.getHash();
}
void JumpTargetsVisitor::afterStart() {
for (const auto &entry : bcProvider_->getExceptionTable(funcId_)) {
createOrSetLabel(bytecodeStart_ + entry.start);
createOrSetLabel(bytecodeStart_ + entry.end);
createOrSetLabel(bytecodeStart_ + entry.target);
}
}
void JumpTargetsVisitor::preVisitInstruction(
OpCode opcode,
const uint8_t *ip,
int length) {
switch (opcode) {
case OpCode::SwitchImm:
// Decode jump table of SwitchImm instruction.
switchInsts_.push_back((inst::Inst const *)ip);
break;
case OpCode::Ret:
case OpCode::Throw:
case OpCode::Jmp:
case OpCode::JmpLong:
createOrSetLabel(ip + length);
break;
default:
break;
}
}
void JumpTargetsVisitor::visitOperand(
const uint8_t *ip,
OperandType operandType,
const uint8_t *operandBuf,
int operandIndex) {
switch (operandType) {
#define DEFINE_OPERAND_TYPE(name, ctype) \
case OperandType::name: { \
if (operandType == OperandType::Addr8 || \
operandType == OperandType::Addr32) { \
ctype operandVal; \
decodeOperand(operandBuf, &operandVal); \
/* operandVal is relative to current ip.*/ \
createOrSetLabel(ip + (int32_t)operandVal); \
} \
break; \
}
#include "hermes/BCGen/HBC/BytecodeList.def"
}
}
/// Dump a string table entry referenced by an opcode operand. It is truncated
/// to about 16 characters (by appending "...") and all non-ASCII values are
/// escaped.
void PrettyDisassembleVisitor::dumpOperandString(
StringID stringID,
raw_ostream &OS) {
// After this limit we truncate the string.
static constexpr unsigned LEN_LIMIT = 16;
unsigned len = 0;
os_ << '"';
auto strStorage = bcProvider_->getStringStorage();
auto entry = bcProvider_->getStringTableEntry(stringID);
auto stringBegin = strStorage.begin() + entry.getOffset();
auto stringEnd = stringBegin + entry.getLength();
if (entry.isUTF16()) {
for (auto from = stringBegin; from < stringEnd; from += 2) {
if (len > LEN_LIMIT) {
OS << "\"...";
return;
}
uint16_t cp = *(const uint16_t *)from;
if (cp == '"') {
OS << "\\\"";
len += 2;
continue;
}
if (cp < 32) {
OS << "\\x" << llvh::format_hex_no_prefix(cp, 2);
len += 4;
continue;
}
if (cp > 127) {
OS << "\\u" << llvh::format_hex_no_prefix(cp, 4);
len += 6;
continue;
}
OS << (char)cp;
++len;
}
} else {
for (auto from = stringBegin; from < stringEnd; ++from) {
if (len > LEN_LIMIT) {
OS << "\"...";
return;
}
OS << *from;
++len;
}
}
OS << '"';
}
unsigned PrettyDisassembleVisitor::getIndentation() {
return 0;
}
void PrettyDisassembleVisitor::beforeStart(
unsigned funcId,
const uint8_t *bytecodeStart) {
bytecodeStart_ = bytecodeStart;
funcVirtualOffset_ = bcProvider_->getVirtualOffsetForFunction(funcId);
// Print source line for the function.
printSourceLineForOffset(0);
}
void PrettyDisassembleVisitor::preVisitInstruction(
OpCode opcode,
const uint8_t *ip,
int length) {
opcode_ = opcode;
auto label = jumpTargets_.find(ip);
assert(ip >= bytecodeStart_ && "Why is ip less than bytecodeStart_?");
uint32_t offset = ip - bytecodeStart_;
if (label != jumpTargets_.end()) {
os_ << "L" << label->second << ":\n";
printSourceLineForOffset(offset);
// Use the overrided indention for next line's output.
os_ << llvh::left_justify("", getIndentation());
}
uint32_t globalVirtualOffset = funcVirtualOffset_ + offset;
if ((options_ & DisassemblyOptions::IncludeVirtualOffsets) ==
DisassemblyOptions::IncludeVirtualOffsets) {
os_ << " ";
os_ << llvh::right_justify(formatString("%d", globalVirtualOffset), 10);
}
os_ << " ";
os_ << llvh::left_justify(getOpCodeString(opcode), 17);
}
void PrettyDisassembleVisitor::visitOperand(
const uint8_t *ip,
OperandType operandType,
const uint8_t *operandBuf,
int operandIndex) {
if (operandIndex) {
os_ << ",";
}
os_ << " ";
// Special handling for CallBuiltin and GetBuiltinClosure.
if (operandIndex == 1 &&
(opcode_ == inst::OpCode::CallBuiltin ||
opcode_ == inst::OpCode::CallBuiltinLong ||
opcode_ == inst::OpCode::GetBuiltinClosure)) {
uint8_t builtinIndex;
decodeOperand(operandBuf, &builtinIndex);
os_ << '"' << getBuiltinMethodName(builtinIndex) << '"';
return;
}
if (operandType == OperandType::Reg8 || operandType == OperandType::Reg32) {
os_ << "r";
}
const bool isStringID = isOperandStringID(opcode_, operandIndex);
switch (operandType) {
#define DEFINE_OPERAND_TYPE(name, ctype) \
case OperandType::name: { \
ctype operandVal; \
decodeOperand(operandBuf, &operandVal); \
if (operandType == OperandType::Addr8 || \
operandType == OperandType::Addr32) { \
/* operandVal is relative to current ip.*/ \
os_ << "L" << jumpTargets_[ip + (int32_t)operandVal]; \
} else if (isStringID) { \
dumpOperandString(operandVal, os_); \
} else if (operandType == OperandType::Double) { \
char buf[hermes::NUMBER_TO_STRING_BUF_SIZE]; \
(void)hermes::numberToString(operandVal, buf, sizeof(buf)); \
os_ << buf; \
} else { \
/* Trick to print out 1-byte value as int instead of char. */ \
os_ << +operandVal; \
} \
break; \
}
#include "hermes/BCGen/HBC/BytecodeList.def"
}
}
void PrettyDisassembleVisitor::printSourceLineForOffset(uint32_t opcodeOffset) {
if ((options_ & DisassemblyOptions::IncludeSource) ==
DisassemblyOptions::IncludeSource) {
llvh::Optional<SourceMapTextLocation> sourceLocOpt =
bcProvider_->getLocationForAddress(funcId_, opcodeOffset);
if (sourceLocOpt.hasValue()) {
const std::string &fileNameStr = sourceLocOpt.getValue().fileName;
os_ << formatString(
"%s[%d:%d]",
fileNameStr.c_str(),
sourceLocOpt.getValue().line,
sourceLocOpt.getValue().column)
<< "\n";
}
}
}
/// Visitor to disassemble a function in non-pretty mode which
/// does not display jump labels and decode string operands.
class DisassembleVisitor : public BytecodeVisitor {
private:
raw_ostream &os_;
std::vector<inst::Inst const *> switchInsts_{};
protected:
void preVisitInstruction(OpCode opcode, const uint8_t *ip, int length) {
int offset = ip - bcProvider_->getBytecode(funcId_);
assert(offset >= 0);
os_ << "[@ " << offset << "] " << getOpCodeString(opcode);
if (opcode == OpCode::SwitchImm) {
const inst::Inst *inst = (inst::Inst const *)ip;
switchInsts_.push_back(inst);
}
}
void postVisitInstruction(OpCode opcode, const uint8_t *ip, int length) {
os_ << "\n";
}
void visitOperand(
const uint8_t *ip,
OperandType operandType,
const uint8_t *operandBuf,
int operandIndex) {
if (operandIndex > 0) {
os_ << ",";
}
switch (operandType) {
// For each operand, we load it based on the size of
// this operand's type. The + sign before the value is a
// trick to print out 1-byte value as int instead of char.
#define DEFINE_OPERAND_TYPE(name, ctype) \
case OperandType::name: { \
ctype operandVal; \
decodeOperand(operandBuf, &operandVal); \
os_ << " " << +operandVal << "<" << #name << ">"; \
break; \
}
#include "hermes/BCGen/HBC/BytecodeList.def"
}
}
public:
DisassembleVisitor(
std::shared_ptr<hbc::BCProvider> bcProvider,
raw_ostream &os)
: BytecodeVisitor(bcProvider), os_(os) {}
std::vector<inst::Inst const *> &getSwitchIntructions() {
return switchInsts_;
}
};
BytecodeSectionWalker::BytecodeSectionWalker(
const uint8_t *bytecodeStart,
std::shared_ptr<hbc::BCProviderFromBuffer> bcProvider,
llvh::raw_ostream &os)
: bytecodeStart_(bytecodeStart), bcProvider_(bcProvider), os_(os) {
const auto *fileHeader =
reinterpret_cast<const hbc::BytecodeFileHeader *>(bytecodeStart_);
auto functionHeadersStart = bcProvider->getSmallFunctionHeaders().begin();
addSection(
"Function table",
functionHeadersStart,
functionHeadersStart + fileHeader->functionCount);
addSection(
"String Kinds",
bcProvider->getStringKinds().begin(),
bcProvider->getStringKinds().end());
addSection(
"Identifier hashes",
bcProvider->getIdentifierHashes().begin(),
bcProvider->getIdentifierHashes().end());
addSection(
"String table",
bcProvider->getSmallStringTableEntries().begin(),
bcProvider->getSmallStringTableEntries().end());
addSection(
"Overflow String table",
bcProvider->getOverflowStringTableEntries().begin(),
bcProvider->getOverflowStringTableEntries().end());
addSection(
"String storage",
bcProvider->getStringStorage().begin(),
bcProvider->getStringStorage().end());
addSection(
"Array buffer",
bcProvider->getArrayBuffer().begin(),
bcProvider->getArrayBuffer().end());
addSection(
"Object key buffer",
bcProvider->getObjectKeyBuffer().begin(),
bcProvider->getObjectKeyBuffer().end());
addSection(
"Object value buffer",
bcProvider->getObjectValueBuffer().begin(),
bcProvider->getObjectValueBuffer().end());
addSection(
"Regular expression table",
bcProvider->getRegExpTable().begin(),
bcProvider->getRegExpTable().end());
addSection(
"Regular expression storage",
bcProvider->getRegExpStorage().begin(),
bcProvider->getRegExpStorage().end());
addSection(
"CommonJS module table",
bcProvider->getCJSModuleTable().begin(),
bcProvider->getCJSModuleTable().end());
auto firstFuncStart = bcProvider->getBytecode(0);
auto firstFuncHeader = bcProvider->getFunctionHeader(0);
auto firstFuncInfoStart = bytecodeStart + firstFuncHeader.infoOffset();
auto debugInfoStart = bytecodeStart + fileHeader->debugInfoOffset;
addSection("Function body", firstFuncStart, firstFuncInfoStart);
addSection("Function info", firstFuncInfoStart, debugInfoStart);
addSection(
"Debug info", debugInfoStart, bytecodeStart + fileHeader->fileLength);
assert(
sectionNames_.size() == sectionStarts_.size() &&
sectionStarts_.size() == sectionEnds_.size());
}
void BytecodeSectionWalker::printSectionRanges(bool human) {
os_ << "Byte range of each section in bytecode:\n";
for (unsigned i = 0; i < sectionNames_.size(); ++i) {
os_ << sectionNames_[i] << ": [";
std::stringstream ss;
if (human) {
ss << "0x" << std::hex << std::setfill('0')
<< sectionStarts_[i] - bytecodeStart_ << ", "
<< "0x" << std::hex << std::setfill('0')
<< sectionEnds_[i] - bytecodeStart_ << ")\n";
} else {
ss << sectionStarts_[i] - bytecodeStart_ << ", "
<< sectionEnds_[i] - bytecodeStart_ << ")\n";
}
os_ << ss.str();
}
}
/// Visitor to build regex pattern/flag string table.
class RegexStringTableVisitor : public BytecodeVisitor {
private:
/// RegexIndex => <RegexPattern_StringID, RegexFlag_StringID> map.
std::vector<std::pair<uint32_t, uint32_t>> regexStringIDMap_;
protected:
void preVisitInstruction(OpCode opcode, const uint8_t *ip, int length) {
if (opcode == OpCode::CreateRegExp) {
assert(
getInstMetaData(OpCode::CreateRegExp).numOperands == 4 &&
"CreateRegExp should have 4 operands.");
uint32_t patternStringId, flagStringId, regexId;
auto patternOperandBuffer = ip + sizeof(opcode) + sizeof(uint8_t);
decodeOperand(patternOperandBuffer, &patternStringId);
decodeOperand(
patternOperandBuffer + sizeof(patternStringId), &flagStringId);
decodeOperand(
patternOperandBuffer + sizeof(patternStringId) + sizeof(flagStringId),
®exId);
assert(regexId < regexStringIDMap_.size() && "Invalid regex id");
regexStringIDMap_[regexId].first = patternStringId;
regexStringIDMap_[regexId].second = flagStringId;
}
}
public:
RegexStringTableVisitor(std::shared_ptr<hbc::BCProvider> bcProvider)
: BytecodeVisitor(bcProvider) {
regexStringIDMap_.resize(bcProvider_->getRegExpTable().size());
}
std::vector<std::pair<uint32_t, uint32_t>> &getRegexStringIDMap() {
return regexStringIDMap_;
}
};
void BytecodeDisassembler::disassembleFunctionPretty(
unsigned funcId,
raw_ostream &OS) {
// Build jump targets table so that we can map from label address to label
// number.
JumpTargetsVisitor jumpVisitor(bcProvider_);
jumpVisitor.visitInstructionsInFunction(funcId);
auto &jumpTargets = jumpVisitor.getJumpTargets();
PrettyDisassembleVisitor disassembleVisitor(
bcProvider_, jumpTargets, OS, options_);
disassembleVisitor.visitInstructionsInFunction(funcId);
// Print out switch jump tables, if any.
auto &switchInsts = jumpVisitor.getSwitchIntructions();
if (!switchInsts.empty()) {
OS << "\n "
<< "Jump Tables: \n";
for (auto *inst : switchInsts) {
OS << " "
<< "offset " << inst->iSwitchImm.op2 << "\n";
switchJumpTableForEach(
inst, [&](uint32_t jmpIdx, int32_t offset, const uint8_t *dest) {
OS << " " << jmpIdx << " : "
<< "L" << jumpTargets[dest] << "\n";
});
}
}
OS << "\n";
disassembleExceptionHandlersPretty(funcId, jumpTargets, OS);
}
void BytecodeDisassembler::disassembleFunctionRaw(
unsigned funcId,
raw_ostream &OS) {
DisassembleVisitor disassembleVisitor(bcProvider_, OS);
disassembleVisitor.visitInstructionsInFunction(funcId);
// Print out switch jump tables, if any.
auto &switchInsts = disassembleVisitor.getSwitchIntructions();
if (!switchInsts.empty()) {
OS << "\n "
<< "Jump Tables: \n";
for (auto *inst : switchInsts) {
OS << " "
<< "offset " << inst->iSwitchImm.op2 << "\n";
switchJumpTableForEach(
inst, [&](uint32_t jmpIdx, int32_t offset, const uint8_t *dest) {
OS << " " << jmpIdx << " : " << offset << "\n";
});
}
}
OS << "\n";
disassembleExceptionHandlers(funcId, OS);
}
std::vector<std::pair<uint32_t, uint32_t>>
BytecodeDisassembler::generateRegexStringIDMap() {
RegexStringTableVisitor regexVisitor(bcProvider_);
for (unsigned funcId = 0; funcId < bcProvider_->getFunctionCount();
++funcId) {
regexVisitor.visitInstructionsInFunction(funcId);
}
return regexVisitor.getRegexStringIDMap();
}
void BytecodeDisassembler::disassembleRegexs(raw_ostream &OS) {
auto regexStorage = bcProvider_->getRegExpStorage();
if (regexStorage.empty()) {
return;
}
std::vector<std::pair<uint32_t, uint32_t>> regexStringIDMap =
generateRegexStringIDMap();
OS << "RegExp Bytecodes:\n";
uint32_t index = 0;
for (auto &entry : bcProvider_->getRegExpTable()) {
OS << index << ": /"
<< bcProvider_->getStringRefFromID(regexStringIDMap[index].first) << "/"
<< bcProvider_->getStringRefFromID(regexStringIDMap[index].second)
<< "\n";
auto bytecode = regexStorage.slice(entry.offset, entry.length);
dumpRegexBytecode(bytecode, OS);
++index;
}
OS << '\n';
}
/// Outputs disassembly in a format resembling that of the "objdump" tool.
/// This is meant to be consumed by tools that expect such a format, not
/// for humans to read.
class ObjdumpDisassembleVisitor : public BytecodeVisitor {
private:
unsigned funcId_ = 0;
unsigned funcOffset_ = 0;
const uint8_t *bytecodeStart_ = nullptr;
raw_ostream &os_;
void beforeStart(unsigned funcId, const uint8_t *bytecodeStart) override {
funcId_ = funcId;
funcOffset_ = bcProvider_->getFunctionHeader(funcId).offset();
bytecodeStart_ = bytecodeStart;
os_ << "\n"
<< llvh::format_hex_no_prefix(funcOffset_, 16) << " <_" << funcId
<< ">:\n";
}
void preVisitInstruction(inst::OpCode opcode, const uint8_t *ip, int length)
override {
os_ << llvh::format_hex_no_prefix(ip - bytecodeStart_ + funcOffset_, 8)
<< ":\t";
for (int i = 0; i < length; ++i)
os_ << llvh::format_hex_no_prefix(ip[i], 2) << " ";
// Align/justify to help any humans debugging the output.
for (int i = length; i < 20; ++i)
os_ << " ";
os_ << llvh::left_justify(getOpCodeString(opcode), 32);
}
void postVisitInstruction(inst::OpCode opcode, const uint8_t *ip, int length)
override {
os_ << "\n";
}
void visitOperand(
const uint8_t *ip,
inst::OperandType operandType,
const uint8_t *operandBuf,
int operandIndex) override {
if (operandIndex) {
os_ << ",";
}
os_ << " ";
switch (operandType) {
#define DEFINE_OPERAND_TYPE(name, ctype) \
case OperandType::name: { \
ctype operandVal; \
decodeOperand(operandBuf, &operandVal); \
if (operandType == OperandType::Addr8 || \
operandType == OperandType::Addr32) { \
/* operandVal is relative to current ip.*/ \
os_ << llvh::format_hex_no_prefix( \
ip + (int32_t)operandVal - bytecodeStart_ + funcOffset_, 8); \
} else if (operandType == OperandType::Double) { \
uint64_t raw; \
memcpy(&raw, operandBuf, sizeof(raw)); \
os_ << "$" << llvh::format_hex(raw, sizeof(raw)); \
} else if ( \
operandType == OperandType::Reg8 || \
operandType == OperandType::Reg32) { \
/* "+" is a trick to print out 1-byte value as int instead of char. */ \
os_ << "%r" << +operandVal; \
} else { \
os_ << "$" \
<< llvh::format_hex(operandVal, getOperandSize(operandType) * 2); \
} \
break; \
}
#include "hermes/BCGen/HBC/BytecodeList.def"
}
}
public:
ObjdumpDisassembleVisitor(
std::shared_ptr<hbc::BCProvider> bcProvider,
raw_ostream &os)
: BytecodeVisitor(bcProvider), os_(os) {}
/// Disassemble a synthetic function with all opcodes with all zero operands.
void listOpCodes() {
os_ << "\n"
<< llvh::format_hex_no_prefix((unsigned)-1, 16) << " <_" << (unsigned)-1
<< ">:\n";
// Synthesize the function body.
std::vector<uint8_t> bytecode;
for (uint8_t op = 0; op < (uint8_t)inst::OpCode::_last; ++op) {
bytecode.push_back(op);
auto instLength =
inst::getInstMetaData(static_cast<inst::OpCode>(op)).size;
bytecode.resize(bytecode.size() + instLength - 1);
}
bytecodeStart_ = bytecode.data();
visitInstructionsInBody(
bytecode.data(),
bytecode.data() + bytecode.size(),
/* visitSwitchImmTargets = */ false);
}
};
void BytecodeDisassembler::disassemble(raw_ostream &OS) {
if ((options_ & DisassemblyOptions::Objdump) == DisassemblyOptions::Objdump) {
OS << "\n" << hashAsString(bcProvider_->getSourceHash()) << ": ";
OS << "file format HBC-" << hbc::BYTECODE_VERSION << "\n\n\n";
OS << "Disassembly of section .text:\n";
for (unsigned funcId = 0; funcId < bcProvider_->getFunctionCount();
++funcId) {
ObjdumpDisassembleVisitor disassembleVisitor(bcProvider_, OS);
disassembleVisitor.visitInstructionsInFunction(funcId);
}
if ((options_ & DisassemblyOptions::IncludeOpCodeList) ==
DisassemblyOptions::IncludeOpCodeList) {
ObjdumpDisassembleVisitor disassembleVisitor(bcProvider_, OS);
disassembleVisitor.listOpCodes();
}
return;
}
disassembleBytecodeFileHeader(OS);
disassembleStringStorage(OS);
disassembleArrayBuffer(OS);
disassembleObjectBuffer(OS);
disassembleCJSModuleTable(OS);
disassembleFunctionSourceTable(OS);
for (unsigned funcId = 0; funcId < bcProvider_->getFunctionCount();
++funcId) {
RuntimeFunctionHeader functionHeader =
bcProvider_->getFunctionHeader(funcId);
auto functionName =
bcProvider_->getStringRefFromID(functionHeader.functionName());
StringRef defKindStr{};
switch (functionHeader.flags().prohibitInvoke) {
case FunctionHeaderFlag::ProhibitCall:
defKindStr = "Constructor";
break;
case FunctionHeaderFlag::ProhibitConstruct:
defKindStr = "NCFunction";
break;
default:
defKindStr = "Function";
break;
}
OS << defKindStr << "<" << functionName << ">";
if ((options_ & DisassemblyOptions::IncludeFunctionIds) ==
DisassemblyOptions::IncludeFunctionIds) {
OS << funcId;
}
OS << "(" << functionHeader.paramCount() << " params, "
<< functionHeader.frameSize() << " registers, "
<< static_cast<unsigned int>(functionHeader.environmentSize())
<< " symbols)";
OS << ":\n";
auto *funcDebugOffsets = bcProvider_->getDebugOffsets(funcId);
if (functionHeader.flags().hasDebugInfo && funcDebugOffsets != nullptr) {
OS << "Offset in debug table: source ";
uint32_t debugSourceOffset = funcDebugOffsets->sourceLocations;
if (debugSourceOffset == DebugOffsets::NO_OFFSET) {
OS << "none";
} else {
OS << llvh::format_hex(debugSourceOffset, 6);
}
OS << ", lexical ";
uint32_t debugLexicalOffset = funcDebugOffsets->lexicalData;
if (debugLexicalOffset == DebugOffsets::NO_OFFSET) {
OS << "none";
} else {
OS << llvh::format_hex(debugLexicalOffset, 6);
}
OS << '\n';
}
disassembleFunction(funcId, OS);
}
disassembleRegexs(OS);
bcProvider_->getDebugInfo()->disassemble(OS);
}
} // namespace hbc
} // namespace hermes