hphp/hhbbc/emit.cpp (1,037 lines of code) (raw):
/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#include "hphp/hhbbc/emit.h"
#include <vector>
#include <algorithm>
#include <iterator>
#include <map>
#include <memory>
#include <type_traits>
#include <folly/gen/Base.h>
#include <folly/Conv.h>
#include <folly/Memory.h>
#include "hphp/hhbbc/cfg.h"
#include "hphp/hhbbc/class-util.h"
#include "hphp/hhbbc/context.h"
#include "hphp/hhbbc/func-util.h"
#include "hphp/hhbbc/index.h"
#include "hphp/hhbbc/options.h"
#include "hphp/hhbbc/representation.h"
#include "hphp/hhbbc/type-structure.h"
#include "hphp/hhbbc/unit-util.h"
#include "hphp/runtime/base/repo-auth-type-array.h"
#include "hphp/runtime/base/repo-auth-type-codec.h"
#include "hphp/runtime/base/repo-auth-type.h"
#include "hphp/runtime/base/tv-comparisons.h"
#include "hphp/runtime/vm/bytecode.h"
#include "hphp/runtime/vm/func-emitter.h"
#include "hphp/runtime/vm/native.h"
#include "hphp/runtime/vm/preclass-emitter.h"
#include "hphp/runtime/vm/type-alias-emitter.h"
#include "hphp/runtime/vm/unit-emitter.h"
namespace HPHP::HHBBC {
TRACE_SET_MOD(hhbbc_emit);
namespace {
//////////////////////////////////////////////////////////////////////
const StaticString s_invoke("__invoke");
//////////////////////////////////////////////////////////////////////
struct PceInfo {
PreClassEmitter* pce;
Id origId;
};
struct EmitUnitState {
explicit EmitUnitState(const Index& index, const php::Unit* unit) :
index(index), unit(unit) {}
/*
* Access to the Index for this program.
*/
const Index& index;
/*
* Access to the unit we're emitting
*/
const php::Unit* unit;
/*
* While emitting bytecode, we keep track of the classes and funcs
* we emit.
*/
std::vector<Offset> classOffsets;
std::vector<PceInfo> pceInfo;
};
/*
* Some bytecodes need to be mutated before being emitted. Pass those
* bytecodes by value to their respective emit_op functions.
*/
template<typename T>
struct OpInfoHelper {
static constexpr bool by_value = T::op == Op::CreateCl;
using type = typename std::conditional<by_value, T, const T&>::type;
};
template<typename T>
using OpInfo = typename OpInfoHelper<T>::type;
/*
* Helper to conditionally call fun on data provided data is of the
* given type.
*/
template<Op op, typename F, typename Bc>
std::enable_if_t<std::remove_reference_t<Bc>::op == op>
caller(F&& fun, Bc&& data) { fun(std::forward<Bc>(data)); }
template<Op op, typename F, typename Bc>
std::enable_if_t<std::remove_reference_t<Bc>::op != op>
caller(F&&, Bc&&) {}
Id recordClass(EmitUnitState& euState, UnitEmitter& ue, Id id) {
auto cls = euState.unit->classes[id].get();
euState.pceInfo.push_back(
{ ue.newPreClassEmitter(cls->name->toCppString()), id }
);
return euState.pceInfo.back().pce->id();
}
//////////////////////////////////////////////////////////////////////
php::SrcLoc srcLoc(const php::Func& func, int32_t ix) {
if (ix < 0) return php::SrcLoc{};
auto const unit = func.originalUnit ? func.originalUnit : func.unit;
return unit->srcLocs[ix];
}
/*
* Order the blocks for bytecode emission.
*
* Rules about block order:
*
* - Each DV funclet must have all of its blocks contiguous, with the
* entry block first.
*
* - Main entry point must be the first block.
*
* It is not a requirement, but we attempt to locate all the DV entry
* points after the rest of the primary function body. The normal
* case for DV initializers is that each one falls through to the
* next, with the block jumping back to the main entry point.
*/
std::vector<BlockId> order_blocks(const php::WideFunc& f) {
auto sorted = rpoSortFromMain(f);
// Get the DV blocks, without the rest of the primary function body,
// and then add them to the end of sorted.
auto const dvBlocks = [&] {
auto withDVs = rpoSortAddDVs(f);
withDVs.erase(
std::find(begin(withDVs), end(withDVs), sorted.front()),
end(withDVs)
);
return withDVs;
}();
sorted.insert(end(sorted), begin(dvBlocks), end(dvBlocks));
FTRACE(2, " block order:{}\n",
[&] {
std::string ret;
for (auto const bid : sorted) {
ret += " ";
ret += folly::to<std::string>(bid);
}
return ret;
}()
);
return sorted;
}
// While emitting bytecode, we learn about some metadata that will
// need to be registered in the FuncEmitter.
struct EmitBcInfo {
struct JmpFixup { Offset instrOff; Offset jmpImmedOff; };
struct BlockInfo {
BlockInfo()
: offset(kInvalidOffset)
, past(kInvalidOffset)
, regionsToPop(0)
{}
// The offset of the block, if we've already emitted it.
// Otherwise kInvalidOffset.
Offset offset;
// The offset past the end of this block.
Offset past;
// How many catch regions the jump at the end of this block is leaving.
// 0 if there is no jump or if the jump is to the same catch region or a
// child
int regionsToPop;
// When we emit a forward jump to a block we haven't seen yet, we
// write down where the jump was so we can fix it up when we get
// to that block.
std::vector<JmpFixup> forwardJumps;
// When we see a forward jump to a block, we record the stack
// depth at the jump site here. This is needed to track
// currentStackDepth correctly (and we also assert all the jumps
// have the same depth).
Optional<uint32_t> expectedStackDepth;
};
std::vector<BlockId> blockOrder;
uint32_t maxStackDepth;
bool containsCalls;
std::vector<BlockInfo> blockInfo;
};
using ExnNodePtr = php::ExnNode*;
bool handleEquivalent(const php::Func& func, ExnNodeId eh1, ExnNodeId eh2) {
auto entry = [&] (ExnNodeId eid) {
return func.exnNodes[eid].region.catchEntry;
};
while (eh1 != eh2) {
assertx(eh1 != NoExnNodeId &&
eh2 != NoExnNodeId &&
func.exnNodes[eh1].depth == func.exnNodes[eh2].depth);
if (entry(eh1) != entry(eh2)) return false;
eh1 = func.exnNodes[eh1].parent;
eh2 = func.exnNodes[eh2].parent;
}
return true;
};
// The common parent P of eh1 and eh2 is the deepest region such that
// eh1 and eh2 are both handle-equivalent to P or a child of P
ExnNodeId commonParent(const php::Func& func, ExnNodeId eh1, ExnNodeId eh2) {
if (eh1 == NoExnNodeId || eh2 == NoExnNodeId) return NoExnNodeId;
while (func.exnNodes[eh1].depth > func.exnNodes[eh2].depth) {
eh1 = func.exnNodes[eh1].parent;
}
while (func.exnNodes[eh2].depth > func.exnNodes[eh1].depth) {
eh2 = func.exnNodes[eh2].parent;
}
while (!handleEquivalent(func, eh1, eh2)) {
eh1 = func.exnNodes[eh1].parent;
eh2 = func.exnNodes[eh2].parent;
}
return eh1;
};
const StaticString
s_hhbbc_fail_verification("__hhvm_intrinsics\\hhbbc_fail_verification");
EmitBcInfo emit_bytecode(EmitUnitState& euState, UnitEmitter& ue, FuncEmitter& fe,
const php::WideFunc& func) {
EmitBcInfo ret = {};
auto& blockInfo = ret.blockInfo;
blockInfo.resize(func.blocks().size());
// Track the stack depth while emitting to determine maxStackDepth.
int32_t currentStackDepth { 0 };
// Temporary buffer for vector immediates. (Hoisted so it's not
// allocated in the loop.)
std::vector<uint8_t> immVec;
// Offset of the last emitted bytecode.
Offset lastOff { 0 };
bool traceBc = false;
SCOPE_ASSERT_DETAIL("emit") {
std::string ret;
for (auto bid : func.blockRange()) {
auto const block = show(*func, *func.blocks()[bid]);
folly::format(&ret, "block #{}\n{}", bid, block);
}
return ret;
};
auto const map_local = [&] (LocalId id) {
if (id >= func->locals.size()) return id;
auto const loc = func->locals[id];
assertx(!loc.killed);
assertx(loc.id <= id);
id = loc.id;
return id;
};
auto const map_local_name = [&] (NamedLocal nl) {
nl.id = map_local(nl.id);
if (nl.name == kInvalidLocalName) return nl;
if (nl.name >= func->locals.size()) return nl;
auto const loc = func->locals[nl.name];
if (!loc.name) {
nl.name = kInvalidLocalName;
return nl;
}
assertx(!loc.unusedName);
nl.name = loc.nameId;
return nl;
};
auto const set_expected_depth = [&] (BlockId block) {
auto& info = blockInfo[block];
if (info.expectedStackDepth) {
assertx(*info.expectedStackDepth == currentStackDepth);
} else {
info.expectedStackDepth = currentStackDepth;
}
};
auto const make_member_key = [&] (MKey mkey) {
switch (mkey.mcode) {
case MEC: case MPC:
return MemberKey{mkey.mcode, static_cast<int32_t>(mkey.idx), mkey.rop};
case MEL: case MPL:
return MemberKey{mkey.mcode, map_local_name(mkey.local), mkey.rop};
case MET: case MPT: case MQT:
return MemberKey{mkey.mcode, mkey.litstr, mkey.rop};
case MEI:
return MemberKey{mkey.mcode, mkey.int64, mkey.rop};
case MW:
return MemberKey{};
}
not_reached();
};
auto const emit_inst = [&] (const Bytecode& inst) {
auto const startOffset = fe.bcPos();
lastOff = startOffset;
FTRACE(4, " emit: {} -- {} @ {}\n", currentStackDepth, show(func, inst),
show(srcLoc(*func, inst.srcLoc)));
if (options.TraceBytecodes.count(inst.op)) traceBc = true;
auto const emit_vsa = [&] (const CompactVector<LSString>& keys) {
auto n = keys.size();
fe.emitIVA(n);
for (size_t i = 0; i < n; ++i) {
fe.emitInt32(ue.mergeLitstr(keys[i]));
}
};
auto const emit_branch = [&] (BlockId id) {
auto& info = blockInfo[id];
if (info.offset != kInvalidOffset) {
fe.emitInt32(info.offset - startOffset);
} else {
info.forwardJumps.push_back({ startOffset, fe.bcPos() });
fe.emitInt32(0);
}
};
auto const emit_switch = [&] (const SwitchTab& targets) {
fe.emitIVA(targets.size());
for (auto t : targets) {
set_expected_depth(t);
emit_branch(t);
}
};
auto const emit_sswitch = [&] (const SSwitchTab& targets) {
fe.emitIVA(targets.size());
for (size_t i = 0; i < targets.size() - 1; ++i) {
set_expected_depth(targets[i].second);
fe.emitInt32(ue.mergeLitstr(targets[i].first));
emit_branch(targets[i].second);
}
fe.emitInt32(-1);
set_expected_depth(targets[targets.size() - 1].second);
emit_branch(targets[targets.size() - 1].second);
};
auto const emit_srcloc = [&] {
auto const sl = srcLoc(*func, inst.srcLoc);
auto const loc = sl.isValid() ?
Location::Range(sl.start.line, sl.start.col, sl.past.line, sl.past.col)
: Location::Range(-1,-1,-1,-1);
fe.recordSourceLocation(loc, startOffset);
};
auto const pop = [&] (int32_t n) {
currentStackDepth -= n;
assertx(currentStackDepth >= 0);
};
auto const push = [&] (int32_t n) {
currentStackDepth += n;
ret.maxStackDepth =
std::max<uint32_t>(ret.maxStackDepth, currentStackDepth);
};
auto const ret_assert = [&] { assertx(currentStackDepth == inst.numPop()); };
auto const createcl = [&] (auto& data) {
auto& id = data.arg2;
if (euState.classOffsets[id] != kInvalidOffset) {
for (auto const& elm : euState.pceInfo) {
if (elm.origId == id) {
id = elm.pce->id();
return;
}
}
always_assert(false);
}
euState.classOffsets[id] = startOffset;
id = recordClass(euState, ue, id);
};
auto const emit_lar = [&](const LocalRange& range) {
encodeLocalRange(fe, HPHP::LocalRange{
map_local(range.first), range.count
});
};
auto const emit_ita = [&](IterArgs ita) {
if (ita.hasKey()) ita.keyId = map_local(ita.keyId);
ita.valId = map_local(ita.valId);
encodeIterArgs(fe, ita);
};
#define IMM_BLA(n) emit_switch(data.targets);
#define IMM_SLA(n) emit_sswitch(data.targets);
#define IMM_IVA(n) fe.emitIVA(data.arg##n);
#define IMM_I64A(n) fe.emitInt64(data.arg##n);
#define IMM_LA(n) fe.emitIVA(map_local(data.loc##n));
#define IMM_NLA(n) fe.emitNamedLocal(map_local_name(data.nloc##n));
#define IMM_ILA(n) fe.emitIVA(map_local(data.loc##n));
#define IMM_IA(n) fe.emitIVA(data.iter##n);
#define IMM_DA(n) fe.emitDouble(data.dbl##n);
#define IMM_SA(n) fe.emitInt32(ue.mergeLitstr(data.str##n));
#define IMM_RATA(n) encodeRAT(fe, data.rat);
#define IMM_AA(n) fe.emitInt32(ue.mergeArray(data.arr##n));
#define IMM_OA_IMPL(n) fe.emitByte(static_cast<uint8_t>(data.subop##n));
#define IMM_OA(type) IMM_OA_IMPL
#define IMM_BA(n) targets[numTargets++] = data.target##n; \
emit_branch(data.target##n);
#define IMM_VSA(n) emit_vsa(data.keys);
#define IMM_KA(n) encode_member_key(make_member_key(data.mkey), fe);
#define IMM_LAR(n) emit_lar(data.locrange);
#define IMM_ITA(n) emit_ita(data.ita);
#define IMM_FCA(n) encodeFCallArgs( \
fe, data.fca.base(), \
data.fca.enforceInOut(), \
[&] { \
data.fca.applyIO( \
[&] (int numBytes, const uint8_t* inOut) { \
encodeFCallArgsBoolVec(fe, numBytes, inOut); \
} \
); \
}, \
data.fca.enforceReadonly(), \
[&] { \
data.fca.applyReadonly( \
[&] (int numBytes, const uint8_t* readonly) { \
encodeFCallArgsBoolVec(fe, numBytes, readonly); \
} \
); \
}, \
data.fca.asyncEagerTarget() != NoBlockId, \
[&] { \
set_expected_depth(data.fca.asyncEagerTarget()); \
emit_branch(data.fca.asyncEagerTarget()); \
}, \
data.fca.context() != nullptr, \
[&] { \
fe.emitInt32(ue.mergeLitstr(data.fca.context()));\
}); \
if (!data.fca.hasUnpack()) ret.containsCalls = true;
#define IMM_NA
#define IMM_ONE(x) IMM_##x(1)
#define IMM_TWO(x, y) IMM_##x(1); IMM_##y(2);
#define IMM_THREE(x, y, z) IMM_TWO(x, y); IMM_##z(3);
#define IMM_FOUR(x, y, z, n) IMM_THREE(x, y, z); IMM_##n(4);
#define IMM_FIVE(x, y, z, n, m) IMM_FOUR(x, y, z, n); IMM_##m(5);
#define IMM_SIX(x, y, z, n, m, o) IMM_FIVE(x, y, z, n, m); IMM_##o(6);
#define POP_NOV
#define POP_ONE(x) pop(1);
#define POP_TWO(x, y) pop(2);
#define POP_THREE(x, y, z) pop(3);
#define POP_MFINAL pop(data.arg1);
#define POP_C_MFINAL(n) pop(n); pop(data.arg1);
#define POP_CMANY pop(data.arg##1);
#define POP_SMANY pop(data.keys.size());
#define POP_CUMANY pop(data.arg##1);
#define POP_FCALL(nin, nobj) \
pop(nin + data.fca.numInputs() + 1 + data.fca.numRets());
#define PUSH_NOV
#define PUSH_ONE(x) push(1);
#define PUSH_TWO(x, y) push(2);
#define PUSH_THREE(x, y, z) push(3);
#define PUSH_CMANY push(data.arg1);
#define PUSH_FCALL push(data.fca.numRets());
#define O(opcode, imms, inputs, outputs, flags) \
case Op::opcode: { \
if (Op::opcode == Op::Nop) break; \
OpInfo<bc::opcode> data{inst.opcode}; \
if (RuntimeOption::EnableIntrinsicsExtension) { \
if (Op::opcode == Op::FCallFuncD && \
inst.FCallFuncD.str2->isame( \
s_hhbbc_fail_verification.get())) { \
fe.emitOp(Op::CheckProp); \
fe.emitInt32( \
ue.mergeLitstr(inst.FCallFuncD.str2)); \
fe.emitOp(Op::PopC); \
ret.maxStackDepth++; \
} \
} \
caller<Op::CreateCl>(createcl, data); \
\
if (isRet(Op::opcode)) ret_assert(); \
fe.emitOp(Op::opcode); \
POP_##inputs \
\
size_t numTargets = 0; \
std::array<BlockId, kMaxHhbcImms> targets; \
\
if (Op::opcode == Op::MemoGet) { \
IMM_##imms \
assertx(numTargets == 1); \
set_expected_depth(targets[0]); \
PUSH_##outputs \
} else if (Op::opcode == Op::MemoGetEager) { \
IMM_##imms \
assertx(numTargets == 2); \
set_expected_depth(targets[0]); \
PUSH_##outputs \
set_expected_depth(targets[1]); \
} else { \
PUSH_##outputs \
IMM_##imms \
for (size_t i = 0; i < numTargets; ++i) { \
set_expected_depth(targets[i]); \
} \
} \
\
if (flags & TF) currentStackDepth = 0; \
emit_srcloc(); \
break; \
}
switch (inst.op) { OPCODES }
#undef O
#undef IMM_MA
#undef IMM_BLA
#undef IMM_SLA
#undef IMM_IVA
#undef IMM_I64A
#undef IMM_LA
#undef IMM_NLA
#undef IMM_ILA
#undef IMM_IA
#undef IMM_DA
#undef IMM_SA
#undef IMM_RATA
#undef IMM_AA
#undef IMM_BA
#undef IMM_OA_IMPL
#undef IMM_OA
#undef IMM_VSA
#undef IMM_KA
#undef IMM_LAR
#undef IMM_FCA
#undef IMM_NA
#undef IMM_ONE
#undef IMM_TWO
#undef IMM_THREE
#undef IMM_FOUR
#undef IMM_FIVE
#undef IMM_SIX
#undef POP_NOV
#undef POP_ONE
#undef POP_TWO
#undef POP_THREE
#undef POP_CMANY
#undef POP_SMANY
#undef POP_CUMANY
#undef POP_FCALL
#undef POP_MFINAL
#undef POP_C_MFINAL
#undef PUSH_NOV
#undef PUSH_ONE
#undef PUSH_TWO
#undef PUSH_THREE
#undef PUSH_CMANY
#undef PUSH_FCALL
};
ret.blockOrder = order_blocks(func);
auto blockIt = begin(ret.blockOrder);
auto const endBlockIt = end(ret.blockOrder);
for (; blockIt != endBlockIt; ++blockIt) {
auto bid = *blockIt;
auto& info = blockInfo[bid];
auto const b = func.blocks()[bid].get();
info.offset = fe.bcPos();
FTRACE(2, " block {}: {}\n", bid, info.offset);
for (auto& fixup : info.forwardJumps) {
fe.emitInt32(info.offset - fixup.instrOff, fixup.jmpImmedOff);
}
if (!info.expectedStackDepth) {
// unreachable, or entry block
info.expectedStackDepth = b->catchEntry ? 1 : 0;
}
currentStackDepth = *info.expectedStackDepth;
auto fallthrough = b->fallthrough;
auto end = b->hhbcs.end();
auto flip = false;
if (is_single_nop(*b)) {
if (blockIt == begin(ret.blockOrder)) {
// If the first block is just a Nop, this means that there is
// a jump to the second block from somewhere in the
// function. We don't want this, so we change this nop to an
// EntryNop so it doesn't get optimized away
emit_inst(bc_with_loc(b->hhbcs.front().srcLoc, bc::EntryNop {}));
}
} else {
// If the block ends with JmpZ or JmpNZ to the next block, flip
// the condition to make the fallthrough the next block
if (b->hhbcs.back().op == Op::JmpZ ||
b->hhbcs.back().op == Op::JmpNZ) {
auto const& bc = b->hhbcs.back();
auto const target =
bc.op == Op::JmpNZ ? bc.JmpNZ.target1 : bc.JmpZ.target1;
if (std::next(blockIt) != endBlockIt && blockIt[1] == target) {
fallthrough = target;
--end;
flip = true;
}
}
for (auto iit = b->hhbcs.begin(); iit != end; ++iit) emit_inst(*iit);
if (flip) {
if (end->op == Op::JmpNZ) {
emit_inst(bc_with_loc(end->srcLoc, bc::JmpZ { b->fallthrough }));
} else {
emit_inst(bc_with_loc(end->srcLoc, bc::JmpNZ { b->fallthrough }));
}
}
}
info.past = fe.bcPos();
if (fallthrough != NoBlockId) {
set_expected_depth(fallthrough);
if (std::next(blockIt) == endBlockIt ||
blockIt[1] != fallthrough) {
if (b->fallthroughNS) {
emit_inst(bc::JmpNS { fallthrough });
} else {
emit_inst(bc::Jmp { fallthrough });
}
auto const nextExnId = func.blocks()[fallthrough]->exnNodeId;
auto const parent = commonParent(*func, nextExnId, b->exnNodeId);
auto depth = [&] (ExnNodeId eid) {
return eid == NoExnNodeId ? 0 : func->exnNodes[eid].depth;
};
// If we are in an exn region we pop from the current region to the
// common parent. If the common parent is null, we pop all regions
info.regionsToPop = depth(b->exnNodeId) - depth(parent);
assertx(info.regionsToPop >= 0);
FTRACE(4, " popped catch regions: {}\n", info.regionsToPop);
}
}
if (b->throwExit != NoBlockId) {
FTRACE(4, " throw: {}\n", b->throwExit);
}
if (fallthrough != NoBlockId) {
FTRACE(4, " fallthrough: {}\n", fallthrough);
}
FTRACE(2, " block {} end: {}\n", bid, info.past);
}
if (traceBc) {
FTRACE(0, "TraceBytecode (emit): {}::{} in {}\n",
func->cls ? func->cls->name->data() : "",
func->name, func->unit->filename);
}
return ret;
}
void emit_locals_and_params(FuncEmitter& fe, const php::Func& func,
const EmitBcInfo& info) {
Id id = 0;
for (auto const& loc : func.locals) {
if (loc.id < func.params.size()) {
assertx(loc.name);
assertx(!loc.killed);
assertx(!loc.unusedName);
auto& param = func.params[id];
FuncEmitter::ParamInfo pinfo;
pinfo.defaultValue = param.defaultValue;
pinfo.typeConstraint = param.typeConstraint;
pinfo.userType = param.userTypeConstraint;
pinfo.upperBounds = param.upperBounds;
pinfo.phpCode = param.phpCode;
pinfo.userAttributes = param.userAttributes;
pinfo.builtinType = param.builtinType;
if (param.inout) pinfo.setFlag(Func::ParamInfo::Flags::InOut);
if (param.readonly) pinfo.setFlag(Func::ParamInfo::Flags::Readonly);
if (param.isVariadic) pinfo.setFlag(Func::ParamInfo::Flags::Variadic);
fe.appendParam(func.locals[id].name, pinfo);
auto const dv = param.dvEntryPoint;
if (dv != NoBlockId) {
fe.params[id].funcletOff = info.blockInfo[dv].offset;
}
++id;
} else {
if (loc.killed) continue;
if (loc.name && !loc.unusedName && loc.name) {
fe.allocVarId(loc.name);
assertx(fe.lookupVarId(loc.name) == id);
assertx(loc.id == id);
++id;
} else {
fe.allocUnnamedLocal();
++id;
}
}
}
for (auto const& loc : func.locals) {
if (loc.killed && !loc.unusedName && loc.name) {
fe.allocVarId(loc.name, true);
}
}
if (debug) {
for (auto const& loc : func.locals) {
if (!loc.killed) {
assertx(loc.id < fe.numLocals());
}
if (!loc.unusedName && loc.name) {
assertx(loc.nameId < fe.numNamedLocals());
}
}
}
assertx(fe.numLocals() == id);
fe.setNumIterators(func.numIters);
}
struct EHRegion {
const php::ExnNode* node;
EHRegion* parent;
Offset start;
Offset past;
};
template<class BlockInfo, class ParentIndexMap>
void emit_eh_region(FuncEmitter& fe,
const EHRegion* region,
const BlockInfo& blockInfo,
ParentIndexMap& parentIndexMap) {
FTRACE(2, " func {}: ExnNode {}\n", fe.name, region->node->idx);
auto const unreachable = [&] (const php::ExnNode& node) {
return blockInfo[node.region.catchEntry].offset == kInvalidOffset;
};
// A region on a single empty block.
if (region->start == region->past) {
FTRACE(2, " Skipping\n");
return;
} else if (unreachable(*region->node)) {
FTRACE(2, " Unreachable\n");
return;
}
FTRACE(2, " Process @ {}-{}\n", region->start, region->past);
auto& eh = fe.addEHEnt();
eh.m_base = region->start;
eh.m_past = region->past;
assertx(eh.m_past >= eh.m_base);
assertx(eh.m_base != kInvalidOffset && eh.m_past != kInvalidOffset);
// An unreachable parent won't be emitted (and thus its offset won't be set),
// so find the closest reachable one.
auto parent = region->parent;
while (parent && unreachable(*parent->node)) parent = parent->parent;
if (parent) {
auto parentIt = parentIndexMap.find(parent);
assertx(parentIt != end(parentIndexMap));
eh.m_parentIndex = parentIt->second;
} else {
eh.m_parentIndex = -1;
}
parentIndexMap[region] = fe.ehtab.size() - 1;
auto const& cr = region->node->region;
eh.m_handler = blockInfo[cr.catchEntry].offset;
eh.m_end = kInvalidOffset;
eh.m_iterId = cr.iterId;
assertx(eh.m_handler != kInvalidOffset);
}
void exn_path(const php::Func& func,
std::vector<const php::ExnNode*>& ret,
ExnNodeId id) {
if (id == NoExnNodeId) return;
auto const& n = func.exnNodes[id];
exn_path(func, ret, n.parent);
ret.push_back(&n);
}
// Return the count of shared elements in the front of two forward
// ranges.
template<class ForwardRange1, class ForwardRange2>
size_t shared_prefix(ForwardRange1& r1, ForwardRange2& r2) {
auto r1it = begin(r1);
auto r2it = begin(r2);
auto const r1end = end(r1);
auto const r2end = end(r2);
auto ret = size_t{0};
while (r1it != r1end && r2it != r2end && *r1it == *r2it) {
++ret; ++r1it; ++r2it;
}
return ret;
}
/*
* Traverse the actual block layout, and find out the intervals for
* each exception region in the tree.
*
* The basic idea here is that we haven't constrained block layout
* based on the exception tree, but adjacent blocks are still
* reasonably likely to have the same ExnNode. Try to coalesce the EH
* regions we create for in those cases.
*/
void emit_ehent_tree(FuncEmitter& fe, const php::WideFunc& func,
const EmitBcInfo& info) {
hphp_fast_map<
const php::ExnNode*,
std::vector<std::unique_ptr<EHRegion>>
> exnMap;
/*
* While walking over the blocks in layout order, we track the set
* of "active" exnNodes. This are a list of exnNodes that inherit
* from each other. When a new active node is pushed, begin an
* EHEnt, and when it's popped, it's done.
*/
std::vector<const php::ExnNode*> activeList;
auto pop_active = [&] (Offset past) {
auto p = activeList.back();
activeList.pop_back();
exnMap[p].back()->past = past;
};
auto push_active = [&] (const php::ExnNode* p, Offset start) {
auto const parent = activeList.empty()
? nullptr
: exnMap[activeList.back()].back().get();
exnMap[p].push_back(
std::make_unique<EHRegion>(
EHRegion { p, parent, start, kInvalidOffset }
)
);
activeList.push_back(p);
};
/*
* Walk over the blocks, and compare the new block's exnNode path to
* the active one. Find the least common ancestor of the two paths,
* then modify the active list by popping and then pushing nodes to
* set it to the new block's path.
*/
for (auto const bid : info.blockOrder) {
auto const b = func.blocks()[bid].get();
auto const offset = info.blockInfo[bid].offset;
if (b->exnNodeId == NoExnNodeId) {
while (!activeList.empty()) pop_active(offset);
continue;
}
std::vector<const php::ExnNode*> current;
exn_path(*func, current, b->exnNodeId);
auto const prefix = shared_prefix(current, activeList);
for (size_t i = prefix, sz = activeList.size(); i < sz; ++i) {
pop_active(offset);
}
for (size_t i = prefix, sz = current.size(); i < sz; ++i) {
push_active(current[i], offset);
}
for (int i = 0; i < info.blockInfo[bid].regionsToPop; i++) {
// If the block ended in a jump out of the catch region, this effectively
// ends all catch regions deeper than the one we are jumping to
pop_active(info.blockInfo[bid].past);
}
if (debug && !activeList.empty()) {
current.clear();
exn_path(*func, current, activeList.back()->idx);
assertx(current == activeList);
}
}
while (!activeList.empty()) {
pop_active(info.blockInfo[info.blockOrder.back()].past);
}
/*
* We've created all our regions, but we need to sort them instead
* of trying to get the UnitEmitter to do it.
*
* The UnitEmitter expects EH regions that look a certain way
* (basically the way emitter.cpp likes them). There are some rules
* about the order it needs to have at runtime, which we set up
* here.
*
* Essentially, an entry a is less than an entry b iff:
*
* - a starts before b
* - a starts at the same place, but encloses b entirely
* - a has the same extents as b, but is a parent of b
*/
std::vector<EHRegion*> regions;
for (auto& mapEnt : exnMap) {
for (auto& region : mapEnt.second) {
regions.push_back(region.get());
}
}
std::sort(
begin(regions), end(regions),
[&] (const EHRegion* a, const EHRegion* b) {
if (a == b) return false;
if (a->start == b->start) {
if (a->past == b->past) {
// When regions exactly overlap, the parent is less than the
// child.
for (auto p = b->parent; p != nullptr; p = p->parent) {
if (p == a) return true;
}
// If a is not a parent of b, and they have the same region;
// then b better be a parent of a.
if (debug) {
auto p = a->parent;
for (; p != b && p != nullptr; p = p->parent) continue;
assertx(p == b);
}
return false;
}
return a->past > b->past;
}
return a->start < b->start;
}
);
hphp_fast_map<const EHRegion*,uint32_t> parentIndexMap;
for (auto& r : regions) {
emit_eh_region(fe, r, info.blockInfo, parentIndexMap);
}
fe.setEHTabIsSorted();
}
void emit_finish_func(EmitUnitState& state, FuncEmitter& fe,
php::WideFunc& wf, const EmitBcInfo& info) {
auto const& func = *wf;
if (info.containsCalls) fe.containsCalls = true;;
emit_locals_and_params(fe, func, info);
emit_ehent_tree(fe, wf, info);
wf.blocks().clear();
fe.userAttributes = func.userAttributes;
fe.retUserType = func.returnUserType;
fe.retUpperBounds = func.returnUBs;
fe.originalFilename =
func.originalFilename ? func.originalFilename :
func.originalUnit ? func.originalUnit->filename : nullptr;
fe.isClosureBody = func.isClosureBody;
fe.isAsync = func.isAsync;
fe.isGenerator = func.isGenerator;
fe.isPairGenerator = func.isPairGenerator;
fe.isNative = func.nativeInfo != nullptr;
fe.isMemoizeWrapper = func.isMemoizeWrapper;
fe.isMemoizeWrapperLSB = func.isMemoizeWrapperLSB;
fe.hasParamsWithMultiUBs = func.hasParamsWithMultiUBs;
fe.hasReturnWithMultiUBs = func.hasReturnWithMultiUBs;
for (auto& name : func.staticCoeffects) fe.staticCoeffects.push_back(name);
for (auto& rule : func.coeffectRules) fe.coeffectRules.push_back(rule);
auto const retTy = state.index.lookup_return_type_raw(&func).first;
if (!retTy.subtypeOf(BBottom)) {
auto const rat = make_repo_type(*state.index.array_table_builder(), retTy);
fe.repoReturnType = rat;
}
if (is_specialized_wait_handle(retTy)) {
auto const awaitedTy = wait_handle_inner(retTy);
if (!awaitedTy.subtypeOf(BBottom)) {
auto const rat = make_repo_type(
*state.index.array_table_builder(),
awaitedTy
);
fe.repoAwaitedReturnType = rat;
}
}
if (func.nativeInfo) {
fe.hniReturnType = func.nativeInfo->returnType;
}
fe.retTypeConstraint = func.retTypeConstraint;
fe.maxStackCells = info.maxStackDepth +
fe.numLocals() +
fe.numIterators() * kNumIterCells;
fe.finish();
}
void renumber_locals(php::Func& func) {
Id id = 0;
Id nameId = 0;
// We initialise local name ids in two passes, since locals that have not
// been remapped may require their name be at the same offset as the local.
// That's true for params, volatile locals, or locals in funcs with VarEnvs.
//
// In the first pass, we assume that all local names are used. Only in the
// second pass do we apply the fact that some local names are never used.
for (auto& loc : func.locals) {
if (loc.killed) {
// Make sure it's out of range, in case someone tries to read it.
loc.id = INT_MAX;
} else {
loc.nameId = nameId++;
loc.id = id++;
}
}
for (auto& loc : func.locals) {
if (loc.unusedName || !loc.name) {
// Make sure it's out of range, in case someone tries to read it.
loc.nameId = INT_MAX;
} else if (loc.killed) {
// The local uses a shared local slot, but its name is still used.
loc.nameId = nameId++;
}
}
}
void emit_init_func(FuncEmitter& fe, const php::Func& func) {
fe.init(
std::get<0>(func.srcInfo.loc),
std::get<1>(func.srcInfo.loc),
func.attrs | (func.sampleDynamicCalls ? AttrDynamicallyCallable : AttrNone),
func.srcInfo.docComment
);
}
void emit_func(EmitUnitState& state, UnitEmitter& ue,
FuncEmitter& fe, php::Func& f) {
FTRACE(2, " func {}\n", f.name->data());
assertx(f.attrs & AttrUnique);
assertx(f.attrs & AttrPersistent);
renumber_locals(f);
emit_init_func(fe, f);
auto func = php::WideFunc::mut(&f);
auto const info = emit_bytecode(state, ue, fe, func);
emit_finish_func(state, fe, func, info);
}
void emit_class(EmitUnitState& state, UnitEmitter& ue, PreClassEmitter* pce,
Offset offset, php::Class& cls) {
FTRACE(2, " class: {}\n", cls.name->data());
assertx(cls.attrs & AttrUnique);
assertx(cls.attrs & AttrPersistent);
pce->init(
std::get<0>(cls.srcInfo.loc),
std::get<1>(cls.srcInfo.loc),
cls.attrs |
(cls.sampleDynamicConstruct ? AttrDynamicallyConstructible : AttrNone),
cls.parentName ? cls.parentName : staticEmptyString(),
cls.srcInfo.docComment
);
pce->setUserAttributes(cls.userAttributes);
for (auto& x : cls.interfaceNames) pce->addInterface(x);
for (auto& x : cls.includedEnumNames) pce->addEnumInclude(x);
for (auto& x : cls.usedTraitNames) pce->addUsedTrait(x);
for (auto& x : cls.requirements) pce->addClassRequirement(x);
for (auto& x : cls.traitPrecRules) pce->addTraitPrecRule(x);
for (auto& x : cls.traitAliasRules) pce->addTraitAliasRule(x);
pce->setIfaceVtableSlot(state.index.lookup_iface_vtable_slot(&cls));
bool needs86cinit = false;
auto const nativeConsts = cls.attrs & AttrBuiltin ?
Native::getClassConstants(cls.name) : nullptr;
for (auto& cconst : cls.constants) {
if (nativeConsts && nativeConsts->count(cconst.name)) {
continue;
}
if (cconst.kind == ConstModifiers::Kind::Context) {
assertx(cconst.cls == &cls);
assertx(!cconst.resolvedTypeStructure);
assertx(cconst.invariance == php::Const::Invariance::None);
pce->addContextConstant(
cconst.name,
std::vector<LowStringPtr>(cconst.coeffects),
cconst.isAbstract,
cconst.isFromTrait
);
} else if (!cconst.val.has_value()) {
assertx(cconst.cls == &cls);
assertx(!cconst.resolvedTypeStructure);
assertx(cconst.invariance == php::Const::Invariance::None);
pce->addAbstractConstant(
cconst.name,
cconst.kind,
cconst.isFromTrait
);
} else {
needs86cinit |= cconst.val->m_type == KindOfUninit;
pce->addConstant(
cconst.name,
(cconst.cls == &cls) ? nullptr : cconst.cls->name,
&cconst.val.value(),
ArrNR{cconst.resolvedTypeStructure},
cconst.kind,
cconst.invariance,
cconst.isFromTrait,
cconst.isAbstract
);
}
}
for (auto& m : cls.methods) {
if (!needs86cinit && m->name == s_86cinit.get()) continue;
FTRACE(2, " method: {}\n", m->name->data());
auto const fe = ue.newMethodEmitter(m->name, pce);
emit_func(state, ue, *fe, *m);
pce->addMethod(fe);
}
CompactVector<Type> useVars;
if (is_closure(cls)) {
auto f = find_method(&cls, s_invoke.get());
useVars = state.index.lookup_closure_use_vars(f, true);
}
auto uvIt = useVars.begin();
auto const privateProps = state.index.lookup_private_props(&cls, true);
auto const privateStatics = state.index.lookup_private_statics(&cls, true);
auto const publicStatics = state.index.lookup_public_statics(&cls);
for (auto const& prop : cls.properties) {
auto const makeRat = [&] (const Type& ty) -> RepoAuthType {
if (!ty.subtypeOf(BCell)) return RepoAuthType{};
if (ty.subtypeOf(BBottom)) {
// A property can be TBottom if no sets (nor its initial value) is
// compatible with its type-constraint, or if its LateInit and there's
// no sets to it. The repo auth type here doesn't particularly matter,
// since such a prop will be inaccessible.
return RepoAuthType{};
}
return make_repo_type(*state.index.array_table_builder(), ty);
};
auto const privPropTy = [&] (const PropState& ps) -> std::pair<Type, bool> {
if (is_closure(cls)) {
// For closures use variables will be the first properties of the
// closure object, in declaration order
if (uvIt != useVars.end()) return std::make_pair(*uvIt++, true);
return std::make_pair(TCell, true);
}
auto it = ps.find(prop.name);
if (it == end(ps)) return std::make_pair(TCell, true);
return std::make_pair(it->second.ty, it->second.everModified);
};
auto propTy = TCell;
auto everModified = true;
auto attrs = prop.attrs;
if (attrs & AttrPrivate) {
std::tie(propTy, everModified) =
privPropTy((attrs & AttrStatic) ? privateStatics : privateProps);
} else if ((attrs & (AttrPublic|AttrProtected)) && (attrs & AttrStatic)) {
std::tie(propTy, everModified) = [&] {
auto const it = publicStatics.find(prop.name);
if (it == end(publicStatics)) return std::make_pair(TCell, true);
return std::make_pair(it->second.ty, it->second.everModified);
}();
}
if (!everModified && (attrs & AttrStatic)) {
attrs |= AttrPersistent;
}
pce->addProperty(
prop.name,
attrs,
prop.userType,
prop.typeConstraint,
prop.ubs,
prop.docComment,
&prop.val,
makeRat(propTy),
prop.userAttributes
);
}
assertx(uvIt == useVars.end());
pce->setEnumBaseTy(cls.enumBaseTy);
}
void emit_typealias(UnitEmitter& ue, const php::TypeAlias& alias) {
assertx(alias.attrs & AttrUnique);
assertx(alias.attrs & AttrPersistent);
auto const te = ue.newTypeAliasEmitter(alias.name->toCppString());
te->init(
std::get<0>(alias.srcInfo.loc),
std::get<1>(alias.srcInfo.loc),
alias.attrs,
alias.value,
alias.type,
alias.nullable,
alias.typeStructure,
alias.resolvedTypeStructure
);
te->setUserAttributes(alias.userAttrs);
}
void emit_constant(UnitEmitter& ue, const php::Constant& constant) {
assertx(constant.attrs & AttrUnique);
assertx(constant.attrs & AttrPersistent);
Constant c {
constant.name,
constant.val,
constant.attrs,
};
ue.addConstant(c);
}
//////////////////////////////////////////////////////////////////////
}
std::unique_ptr<UnitEmitter> emit_unit(const Index& index, php::Unit& unit) {
Trace::Bump bumper{
Trace::hhbbc_emit, kSystemLibBump, is_systemlib_part(unit)
};
assertx(check(unit));
auto ue = std::make_unique<UnitEmitter>(unit.sha1,
SHA1{},
Native::s_noNativeFuncs,
true);
FTRACE(1, " unit {}\n", unit.filename->data());
ue->m_sn = unit.sn;
ue->m_filepath = unit.filename;
ue->m_metaData = unit.metaData;
ue->m_fileAttributes = unit.fileAttributes;
ue->m_moduleName = unit.moduleName;
if (unit.fatalInfo) {
ue->m_fatalUnit = true;
ue->m_fatalOp = unit.fatalInfo->fatalOp;
ue->m_fatalLoc = unit.fatalInfo->fatalLoc;
ue->m_fatalMsg = unit.fatalInfo->fatalMsg;
}
EmitUnitState state { index, &unit };
state.classOffsets.resize(unit.classes.size(), kInvalidOffset);
// Go thought all constant and see if they still need their matching 86cinit
// func. In repo mode we are able to optimize away most of them away. And if
// the const don't need them anymore we should not emit them.
std::unordered_set<
const StringData*,
string_data_hash,
string_data_same
> const_86cinit_funcs;
for (size_t id = 0; id < unit.constants.size(); ++id) {
auto& c = unit.constants[id];
if (type(c->val) != KindOfUninit) {
const_86cinit_funcs.insert(Constant::funcNameFromName(c->name));
}
}
/*
* Top level funcs are always defined when the unit is loaded, and
* don't have a DefFunc bytecode. Process them up front.
*/
for (size_t id = 0; id < unit.funcs.size(); ++id) {
auto const f = unit.funcs[id].get();
if (const_86cinit_funcs.find(f->name) != const_86cinit_funcs.end()) {
continue;
}
auto fe = ue->newFuncEmitter(f->name);
emit_func(state, *ue, *fe, *f);
}
/*
* Find any top-level classes that need to be included due to
* hoistability.
*/
for (size_t id = 0; id < unit.classes.size(); ++id) {
if (state.classOffsets[id] != kInvalidOffset) continue;
auto const c = unit.classes[id].get();
// Closures are AlwaysHoistable; but there's no need to include
// them unless there's a reachable CreateCl.
if (is_closure(*c)) continue;
recordClass(state, *ue, id);
}
size_t pceId = 0;
// Note that state.pceInfo can grow inside the loop
while (pceId < state.pceInfo.size()) {
auto const& pceInfo = state.pceInfo[pceId++];
auto const id = pceInfo.origId;
emit_class(state, *ue, pceInfo.pce,
state.classOffsets[id], *unit.classes[id]);
}
for (size_t id = 0; id < unit.typeAliases.size(); ++id) {
emit_typealias(*ue, *unit.typeAliases[id]);
}
for (size_t id = 0; id < unit.constants.size(); ++id) {
emit_constant(*ue, *unit.constants[id]);
}
ue->finish();
return ue;
}
//////////////////////////////////////////////////////////////////////
}