lib/IR/IR.cpp (601 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "llvh/ADT/SetVector.h"
#include "llvh/ADT/SmallString.h"
#include "llvh/Support/Casting.h"
#include "llvh/Support/ErrorHandling.h"
#include "llvh/Support/raw_ostream.h"
#include "hermes/AST/Context.h"
#include "hermes/IR/IR.h"
#include "hermes/IR/Instrs.h"
#include "hermes/Utils/Dumper.h"
#include <set>
#include <type_traits>
#define INCLUDE_ALL_INSTRS
using namespace hermes;
using llvh::cast;
using llvh::dyn_cast;
// Make sure the ValueKinds.def tree is consistent with the class hierarchy.
#define QUOTE(X) #X
#define DEF_VALUE(CLASS, PARENT) \
static_assert( \
std::is_base_of<PARENT, CLASS>::value, \
QUOTE(CLASS) " should directly inherit from " QUOTE(PARENT)); \
static_assert( \
std::is_convertible<CLASS *, PARENT *>::value, \
QUOTE(CLASS) " should publicly inherit from " QUOTE(PARENT)); \
static_assert( \
ValueKind::CLASS##Kind > ValueKind::First_##PARENT##Kind, \
QUOTE(CLASS) "Kind should be after First_" QUOTE(PARENT) "Kind"); \
static_assert( \
ValueKind::CLASS##Kind < ValueKind::Last_##PARENT##Kind, \
QUOTE(CLASS) "Kind should be before Last_" QUOTE(PARENT) "Kind"); \
static_assert( \
ValueKind::PARENT##Kind == \
static_cast<ValueKind>( \
static_cast<unsigned>(ValueKind::First_##PARENT##Kind) + 1), \
QUOTE(PARENT) "Kind should be right after First_" QUOTE(PARENT) "Kind");
#include "hermes/IR/ValueKinds.def"
#undef QUOTE
void Value::destroy(Value *V) {
if (!V)
return;
switch (V->Kind) {
default:
llvm_unreachable("Invalid kind");
#define DEF_VALUE(XX, PARENT) \
case ValueKind::XX##Kind: \
delete cast<XX>(V); \
break;
#include "hermes/IR/ValueKinds.def"
}
}
StringRef Value::getKindStr() const {
switch (Kind) {
default:
llvm_unreachable("Invalid kind");
#define DEF_VALUE(XX, PARENT) \
case ValueKind::XX##Kind: \
return StringRef(#XX);
#include "hermes/IR/ValueKinds.def"
}
}
const Value::UseListTy &Value::getUsers() const {
return Users;
}
unsigned Value::getNumUsers() const {
return Users.size();
}
bool Value::hasUsers() const {
return Users.size();
}
bool Value::hasOneUser() const {
return 1 == Users.size();
}
void Value::removeUse(Use U) {
assert(Users.size() && "Removing a user from an empty list");
assert(U.first == this && "Invalid user");
// We don't care about the order of the operands in the use vector. One cheap
// way to delete an element is to pop the last element and save it on top of
// the element that we want to remove. This is faster than moving the whole
// array.
Users[U.second] = Users.back();
Users.pop_back();
// If we've changed the location of a use in the use list then we need to
// update the operand in the user.
if (U.second != Users.size()) {
Use oldUse = {this, Users.size()};
auto &operands = Users[U.second]->Operands;
for (int i = 0, e = operands.size(); i < e; i++) {
if (operands[i] == oldUse) {
operands[i] = {this, U.second};
return;
}
}
llvm_unreachable("Can't find user in operand list");
}
}
Value::Use Value::addUser(Instruction *Inst) {
Users.push_back(Inst);
return {this, Users.size() - 1};
}
void Value::replaceAllUsesWith(Value *Other) {
if (this == Other) {
return;
}
// Ask the users of this value to unregister themselves. Notice that the users
// modify and invalidate the iterators of Users.
while (Users.size()) {
Users[Users.size() - 1]->replaceFirstOperandWith(this, Other);
}
}
void Value::removeAllUses() {
// Ask the users of this value to delete operands of this value. Notice that
// the users modify and invalidate the iterators of Users.
while (Users.size()) {
Users[Users.size() - 1]->eraseOperand(this);
}
}
bool Value::hasUser(Value *other) {
return std::find(Users.begin(), Users.end(), other) != Users.end();
}
bool VariableScope::isGlobalScope() const {
return function_->isGlobalScope() && function_->getFunctionScope() == this;
}
ExternalScope::ExternalScope(Function *function, int32_t depth)
: VariableScope(ValueKind::ExternalScopeKind, function), depth_(depth) {
function->addExternalScope(this);
}
Function::Function(
ValueKind kind,
Module *parent,
Identifier originalName,
DefinitionKind definitionKind,
bool strictMode,
SourceVisibility sourceVisibility,
bool isGlobal,
SMRange sourceRange,
Function *insertBefore)
: Value(kind),
parent_(parent),
isGlobal_(isGlobal),
externalScopes_(),
functionScope_(this),
originalOrInferredName_(originalName),
definitionKind_(definitionKind),
strictMode_(strictMode),
SourceRange(sourceRange),
sourceVisibility_(sourceVisibility),
internalName_(parent->deriveUniqueInternalName(originalName)) {
if (insertBefore) {
assert(insertBefore != this && "Cannot insert a function before itself!");
assert(
insertBefore->getParent() == parent &&
"Function to insert before is from a different module!");
parent->insert(insertBefore->getIterator(), this);
} else {
parent->push_back(this);
}
// Derive internalName from originalName.
assert(originalName.isValid() && "Function originalName must be valid");
}
Function::~Function() {
// Free all parameters.
for (auto *p : Parameters) {
Value::destroy(p);
}
Value::destroy(thisParameter);
// Free all external scopes.
for (auto *ES : externalScopes_)
Value::destroy(ES);
}
std::string Function::getDefinitionKindStr(bool isDescriptive) const {
switch (definitionKind_) {
case Function::DefinitionKind::ES5Function:
return "function";
case Function::DefinitionKind::ES6Constructor:
return "constructor";
case Function::DefinitionKind::ES6Arrow:
return isDescriptive ? "arrow function" : "arrow";
case Function::DefinitionKind::ES6Method:
return "method";
}
assert(false && "Invalid DefinitionKind");
return "function";
}
std::string Function::getDescriptiveDefinitionKindStr() const {
return (isAnonymous() ? "anonymous " : "") + getDefinitionKindStr(true);
}
llvh::Optional<llvh::StringRef> Function::getSourceRepresentationStr() const {
switch (getSourceVisibility()) {
case SourceVisibility::ShowSource: {
// Return the actual source code string.
auto sourceRange = getSourceRange();
assert(
sourceRange.isValid() && "Function does not have source available");
const auto sourceStart = sourceRange.Start.getPointer();
llvh::StringRef strBuf{
sourceStart, (size_t)(sourceRange.End.getPointer() - sourceStart)};
return strBuf;
}
case SourceVisibility::HideSource:
case SourceVisibility::Sensitive: {
// For implementation-hiding functions, the spec requires the source code
// representation of them "must have the syntax of a NativeFunction".
//
// Naively adding 'function <name>() { [ native code ] }' to string table
// for each function would be a huge waste of space in the generated hbc.
// Instead, we associate all such functions with an empty string to
// effectively "mark" them. (the assumption is that an real function
// source can't be empty). This reduced the constant factor of the space
// cost to only 8 bytes (one FunctionSourceTable entry) per function.
return llvh::StringRef("");
}
case SourceVisibility::Default:
default: {
// No need of special source representation for these cases, their
// 'toString' will return `{ [ bytecode ] }` as the default.
return llvh::None;
}
}
}
BasicBlock::BasicBlock(Function *parent)
: Value(ValueKind::BasicBlockKind), Parent(parent) {
assert(Parent && "Invalid parent function");
Parent->addBlock(this);
}
void BasicBlock::dump() {
IRPrinter D(getParent()->getContext(), llvh::outs());
D.visit(*this);
}
void BasicBlock::printAsOperand(llvh::raw_ostream &OS, bool) const {
// Use the address of the basic block when LLVM prints the CFG.
size_t Num = (size_t)this;
OS << "BB#" << std::to_string(Num);
}
void Instruction::dump(llvh::raw_ostream &os) {
IRPrinter D(getParent()->getContext(), os);
D.visit(*this);
}
Instruction::Instruction(
const Instruction *src,
llvh::ArrayRef<Value *> operands)
: Instruction(src->getKind()) {
assert(
src->getNumOperands() == operands.size() && "invalid number of operands");
setType(src->getType());
location_ = src->location_;
statementIndex_ = src->statementIndex_;
for (auto val : operands)
pushOperand(val);
}
void Instruction::pushOperand(Value *Val) {
Operands.push_back({nullptr, 0});
setOperand(Val, getNumOperands() - 1);
}
void Instruction::setOperand(Value *Val, unsigned Index) {
assert(Index < Operands.size() && "Not all operands have been pushed!");
Value *CurrentValue = Operands[Index].first;
// If the operand is already set then there is nothing to do. The instruction
// is already registered in the use-list of the value.
if (CurrentValue == Val) {
return;
}
// Remove the current instruction from the old value that we are removing.
if (CurrentValue) {
CurrentValue->removeUse(Operands[Index]);
}
// Register this instruction as a user of the new value and set the operand.
if (Val) {
Operands[Index] = Val->addUser(this);
} else {
Operands[Index] = {nullptr, 0};
}
}
Value *Instruction::getOperand(unsigned Index) const {
return Operands[Index].first;
}
unsigned Instruction::getNumOperands() const {
return Operands.size();
}
void Instruction::removeOperand(unsigned index) {
// We call to setOperand before deleting the operand because setOperand
// un-registers the user from the user list.
setOperand(nullptr, index);
Operands.erase(Operands.begin() + index);
}
void Instruction::replaceFirstOperandWith(Value *OldValue, Value *NewValue) {
for (int i = 0, e = getNumOperands(); i < e; i++) {
if (OldValue == getOperand(i)) {
setOperand(NewValue, i);
return;
}
}
llvm_unreachable("Can't find operand. Invalid use-def chain.");
}
void Instruction::eraseOperand(Value *Value) {
// Overwrite all of the operands that we are removing with null. This will
// unregister them from the use list.
for (int i = 0, e = getNumOperands(); i < e; ++i) {
if (getOperand(i) == Value)
setOperand(nullptr, i);
}
// Now remove all null operands from the list.
auto new_end = std::remove_if(
Operands.begin(), Operands.end(), [](Use U) { return !U.first; });
Operands.erase(new_end, Operands.end());
assert(!Value->hasUser(this) && "corrupt uselist");
}
void Instruction::insertBefore(Instruction *InsertPos) {
InsertPos->getParent()->getInstList().insert(InsertPos->getIterator(), this);
}
void Instruction::insertAfter(Instruction *InsertPos) {
InsertPos->getParent()->getInstList().insertAfter(
InsertPos->getIterator(), this);
}
void Instruction::moveBefore(Instruction *Later) {
if (this == Later)
return;
getParent()->getInstList().remove(this);
Later->getParent()->getInstList().insert(Later->getIterator(), this);
setParent(Later->getParent());
}
void BasicBlock::remove(Instruction *I) {
InstList.remove(I);
}
void BasicBlock::erase(Instruction *I) {
InstList.erase(I);
}
void Instruction::removeFromParent() {
getParent()->remove(this);
}
void Instruction::eraseFromParent() {
// Release this instruction from the use-list of other instructions.
for (unsigned i = 0; i < getNumOperands(); i++)
setOperand(nullptr, i);
getParent()->erase(this);
}
void Function::eraseFromParentNoDestroy() {
// Erase all of the basic blocks before deleting the function.
while (begin() != end()) {
begin()->replaceAllUsesWith(nullptr);
begin()->eraseFromParent();
}
assert(!hasUsers() && "Use list is not empty");
getParent()->getFunctionList().remove(getIterator());
}
StringRef Instruction::getName() {
switch (getKind()) {
default:
llvm_unreachable("Invalid kind");
#define DEF_VALUE(XX, PARENT) \
case ValueKind::XX##Kind: \
return #XX;
#include "hermes/IR/Instrs.def"
}
}
SideEffectKind Instruction::getDerivedSideEffect() {
switch (getKind()) {
default:
llvm_unreachable("Invalid kind");
#define DEF_VALUE(XX, PARENT) \
case ValueKind::XX##Kind: \
return cast<XX>(this)->getSideEffect();
#include "hermes/IR/Instrs.def"
}
}
WordBitSet<> Instruction::getChangedOperands() {
switch (getKind()) {
default:
llvm_unreachable("Invalid kind");
#define DEF_VALUE(XX, PARENT) \
case ValueKind::XX##Kind: \
return cast<XX>(this)->getChangedOperandsImpl(); \
break;
#include "hermes/IR/Instrs.def"
}
}
Parameter::Parameter(Function *parent, Identifier name)
: Value(ValueKind::ParameterKind), Parent(parent), Name(name) {
assert(Parent && "Invalid parent");
if (name.str() == "this") {
Parent->setThisParameter(this);
} else {
Parent->addParameter(this);
}
}
Variable::Variable(
ValueKind k,
VariableScope *scope,
DeclKind declKind,
Identifier txt)
: Value(k), declKind(declKind), text(txt), parent(scope) {
scope->addVariable(this);
}
Variable::~Variable() {}
int Variable::getIndexInVariableList() const {
int index = 0;
for (auto V : parent->getVariables()) {
if (V == this)
return index;
index++;
}
llvm_unreachable("Cannot find variable in the variable list");
}
Identifier Parameter::getName() const {
return Name;
}
void BasicBlock::push_back(Instruction *I) {
InstList.push_back(I);
}
TerminatorInst *BasicBlock::getTerminator() {
if (InstList.empty())
return nullptr;
return llvh::dyn_cast<TerminatorInst>(&InstList.back());
}
const TerminatorInst *BasicBlock::getTerminator() const {
if (InstList.empty())
return nullptr;
return llvh::dyn_cast<TerminatorInst>(&InstList.back());
}
void BasicBlock::removeFromParent() {
getParent()->getBasicBlockList().remove(getIterator());
}
void BasicBlock::eraseFromParent() {
// Erase all of the instructions in the block before deleting the block.
// We are starting to delete from the start of the block. Naturally we will
// have forward dependencies between instructions. To allow safe deletion
// we replace all uses with the invalid null value. setOperand knows how
// to deal with null values.
while (begin() != end()) {
begin()->replaceAllUsesWith(nullptr);
begin()->eraseFromParent();
}
assert(!hasUsers() && "Use list is not empty");
// Erase the block itself:
getParent()->getBasicBlockList().erase(getIterator());
}
Context &Function::getContext() const {
return parent_->getContext();
}
void Function::addBlock(BasicBlock *BB) {
BasicBlockList.push_back(BB);
}
void Function::addParameter(Parameter *A) {
Parameters.push_back(A);
}
Module::~Module() {
FunctionList.clear();
// Free global properties.
globalPropertyMap_.clear();
for (auto *prop : globalPropertyList_) {
Value::destroy(prop);
}
llvh::SmallVector<Literal *, 32> toDelete;
// Collect all literals.
// Note that we cannot delete while iterating due to the implementation
// of FoldingSet.
for (auto &L : literalNumbers) {
toDelete.push_back(&L);
}
for (auto &L : literalStrings) {
toDelete.push_back(&L);
}
// Free the literals.
for (auto *L : toDelete) {
Value::destroy(L);
}
}
void Module::push_back(Function *F) {
FunctionList.push_back(F);
}
void Module::insert(iterator position, Function *F) {
FunctionList.insert(position, F);
}
GlobalObjectProperty *Module::findGlobalProperty(Identifier name) {
auto it = globalPropertyMap_.find(name);
return it != globalPropertyMap_.end() ? it->second : nullptr;
}
GlobalObjectProperty *Module::addGlobalProperty(
Identifier name,
bool declared) {
auto &ref = globalPropertyMap_[name];
if (!ref) {
ref = new GlobalObjectProperty(this, getLiteralString(name), declared);
globalPropertyList_.push_back(ref);
} else {
ref->orDeclared(declared);
}
return ref;
}
void Module::eraseGlobalProperty(GlobalObjectProperty *prop) {
globalPropertyMap_.erase(prop->getName()->getValue());
auto it =
std::find(globalPropertyList_.begin(), globalPropertyList_.end(), prop);
if (it != globalPropertyList_.end()) {
Value::destroy(*it);
globalPropertyList_.erase(it);
}
}
void Module::populateCJSModuleUseGraph() {
if (!cjsModuleUseGraph_.empty()) {
return;
}
for (Function &f : *this) {
for (Instruction *user : f.getUsers()) {
// Add an edge to f, from the function which uses f.
cjsModuleUseGraph_[user->getParent()->getParent()].insert(&f);
}
}
}
llvh::DenseSet<Function *> Module::getFunctionsInSegment(uint32_t segment) {
populateCJSModuleUseGraph();
// Final set of functions which must be output when generating this segment.
llvh::DenseSet<Function *> result{};
// Current set of functions which we haven't inspected (the frontier).
// Use this to perform graph search and find all used functions.
llvh::SetVector<Function *> worklist{};
// Populate the worklist initially with the wrapper functions for each module
// in the given segment.
for (Function *fn : cjsModuleSegmentMap_[segment]) {
worklist.insert(fn);
}
while (!worklist.empty()) {
Function *cur = worklist.back();
worklist.pop_back();
if (result.count(cur)) {
// We've already visited this function and added its children, so don't do
// it again.
continue;
}
result.insert(cur);
// The functions that are used by the function cur.
const auto targets = cjsModuleUseGraph_[cur];
worklist.insert(targets.begin(), targets.end());
}
return result;
}
uint32_t Module::getTemplateObjectID(RawStringList &&rawStrings) {
// Try inserting into the map with a dummy value.
auto res = templateObjectIDMap_.emplace(std::move(rawStrings), 0);
if (res.second) {
// Insertion succeeded. Overwrite the value with the correct id.
res.first->second = templateObjectIDMap_.size() - 1;
}
return res.first->second;
}
Context &Instruction::getContext() const {
return Parent->getContext();
}
Context &BasicBlock::getContext() const {
return Parent->getContext();
}
Context &Parameter::getContext() const {
return Parent->getContext();
}
/// \returns true if this parameter is a 'this' parameter.
bool Parameter::isThisParameter() const {
return Parent->getThisParameter() == this;
}
int Parameter::getIndexInParamList() const {
int index = 0;
for (auto P : Parent->getParameters()) {
if (P == this)
return index;
index++;
}
llvm_unreachable("Cannot find parameter in the function");
}
void Function::dump() {
IRPrinter D(getParent()->getContext(), llvh::outs());
D.visit(*this);
}
void Function::viewGraph() {
::viewGraph(this);
}
/// Strip the " #number" suffice of a generated internal name, or a name that
/// just happens to be in that format.
static inline Identifier stripInternalNameSuffix(
Context &context,
Identifier originalName) {
auto originalStr = originalName.str();
auto e = originalStr.end();
if (!(e - originalStr.begin() >= 3 && e[-1] == '#' && e[-2] >= '0' &&
e[-2] <= '9')) {
return originalName;
}
e -= 2;
while (e != originalStr.begin() && e[-1] >= '0' && e[-1] <= '9') {
--e;
}
if (!(e != originalStr.begin() && e[-1] == ' '))
return originalName;
--e;
return context.getIdentifier(originalStr.slice(0, e - originalStr.begin()));
}
Identifier Module::deriveUniqueInternalName(Identifier originalName) {
assert(originalName.isValid() && "originalName must be valid");
// Check whether the original name already is in the form "... number#" and
// if so, strip the suffix.
originalName = stripInternalNameSuffix(getContext(), originalName);
auto insertResult = internalNamesMap_.try_emplace(originalName, 0);
// If inserted for the first time, there is no need for a suffix.
if (insertResult.second)
return originalName;
// Construct a suffix using the number of internal names derived from this
// identifier.
++insertResult.first->second;
char itoaBuf[16];
snprintf(itoaBuf, sizeof(itoaBuf), "%u", insertResult.first->second);
llvh::SmallString<32> buf;
buf.append(originalName.str());
buf.append(" ");
buf.append(itoaBuf);
buf.append("#");
return getContext().getIdentifier(buf);
}
void Module::viewGraph() {
for (auto &F : *this) {
::viewGraph(&F);
}
}
void Module::dump() {
for (auto &F : *this) {
F.dump();
}
}
LiteralNumber *Module::getLiteralNumber(double value) {
// Check to see if we've already seen this tuple before.
llvh::FoldingSetNodeID ID;
LiteralNumber::Profile(ID, value);
// If this is not the first time we see this tuple then return the old copy.
void *InsertPos = nullptr;
if (LiteralNumber *LN = literalNumbers.FindNodeOrInsertPos(ID, InsertPos))
return LN;
auto New = new LiteralNumber(value);
literalNumbers.InsertNode(New, InsertPos);
return New;
}
LiteralString *Module::getLiteralString(Identifier value) {
// Check to see if we've already seen this tuple before.
llvh::FoldingSetNodeID ID;
LiteralString::Profile(ID, value);
// If this is not the first time we see this tuple then return the old copy.
void *InsertPos = nullptr;
if (LiteralString *LS = literalStrings.FindNodeOrInsertPos(ID, InsertPos))
return LS;
auto New = new LiteralString(value);
literalStrings.InsertNode(New, InsertPos);
return New;
}
LiteralBool *Module::getLiteralBool(bool value) {
if (value)
return &literalTrue;
return &literalFalse;
}
void Type::print(llvh::raw_ostream &OS) const {
bool first = true;
for (unsigned i = 0; i < (unsigned)Type::TypeKind::LAST_TYPE; i++) {
// Don't print the object type annotations if the type is closure or regex.
if (i == (unsigned)Type::TypeKind::Object &&
(isClosureType() || isRegExpType())) {
continue;
}
if (bitmask_ & (1 << i)) {
if (!first) {
OS << "|";
}
OS << getKindStr((Type::TypeKind)i);
first = false;
}
}
}
raw_ostream &llvh::operator<<(raw_ostream &OS, const hermes::Type &T) {
T.print(OS);
return OS;
}