hphp/runtime/vm/hhbc.h (488 lines of code) (raw):
/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#pragma once
#include <type_traits>
#include "hphp/runtime/base/repo-auth-type.h"
#include "hphp/runtime/base/typed-value.h"
#include "hphp/runtime/base/types.h"
#include "hphp/runtime/base/header-kind.h"
#include "hphp/runtime/vm/fcall-args-flags.h"
#include "hphp/runtime/vm/hhbc-shared.h"
#include "hphp/runtime/vm/member-key.h"
#include "hphp/runtime/vm/opcodes.h"
#include "hphp/util/compact-vector.h"
#include "hphp/util/either.h"
#include "hphp/util/functional.h"
#include "hphp/util/hash-set.h"
namespace HPHP {
//////////////////////////////////////////////////////////////////////
struct Unit;
struct UnitEmitter;
struct Func;
struct FuncEmitter;
constexpr size_t kMaxHhbcImms = 6;
// A contiguous range of locals. The count is the number of locals
// including the first. If the range is empty, count will be zero and
// first's value is arbitrary.
struct LocalRange {
uint32_t first;
uint32_t count;
};
/*
* Arguments to IterInit / IterNext opcodes.
* hhas format: <iterId> K:<keyId> V:<valId> (for key-value iters)
* <iterId> NK V:<valId> (for value-only iters)
* hhbc format: <uint8:flags> <iva:iterId> <iva:(keyId + 1)> <iva:valId>
*
* For value-only iters, keyId will be -1 (an invalid local ID); to take
* advantage of the one-byte encoding for IVA arguments, we add 1 to the key
* when encoding these args in the hhbc format.
*
* We don't accept flags from hhas because our flags require analyses that we
* currently only do in HHBBC.
*/
struct IterArgs {
enum Flags : uint8_t {
None = 0,
// The base is stored in a local, and that local is unmodified in the loop.
BaseConst = (1 << 0),
};
static constexpr int32_t kNoKey = -1;
explicit IterArgs(Flags flags, int32_t iterId, int32_t keyId, int32_t valId)
: iterId(iterId), keyId(keyId), valId(valId), flags(flags) {}
bool hasKey() const {
assertx(keyId == kNoKey || keyId >= 0);
return keyId != kNoKey;
};
bool operator==(const IterArgs& other) const {
return iterId == other.iterId && keyId == other.keyId &&
valId == other.valId && flags == other.flags;
}
int32_t iterId;
int32_t keyId;
int32_t valId;
Flags flags;
};
// Arguments to FCall opcodes.
// hhas format: <flags> <numArgs> <numRets> <inoutArgs> <readonlyArgs>
// <asyncEagerOffset>
// hhbc format: <uint8:flags> ?<iva:numArgs> ?<iva:numRets>
// ?<boolvec:inoutArgs> ?<boolvec:readonlyArgs>
// ?<ba:asyncEagerOffset>
// flags = flags (hhas doesn't have HHBC-only flags)
// numArgs = flags >> kFirstNumArgsBit
// ? flags >> kFirstNumArgsBit - 1 : decode_iva()
// numRets = flags & HasInOut ? decode_iva() : 1
// inoutArgs = flags & EnforceInOut ? decode bool vec : nullptr
// asyncEagerOffset = flags & HasAEO ? decode_ba() : kInvalidOffset
struct FCallArgsBase {
using Flags = FCallArgsFlags;
// The first (lowest) bit of numArgs.
static constexpr uint16_t kFirstNumArgsBit = 12;
// Flags that are valid on FCallArgsBase::flags struct (i.e. non-HHBC-only).
static constexpr Flags kInternalFlags =
Flags::HasUnpack |
Flags::HasGenerics |
Flags::LockWhileUnwinding |
Flags::SkipRepack |
Flags::SkipCoeffectsCheck |
Flags::EnforceMutableReturn |
Flags::EnforceReadonlyThis;
explicit FCallArgsBase(Flags flags, uint32_t numArgs, uint32_t numRets)
: numArgs(numArgs)
, numRets(numRets)
, flags(flags)
{
assertx(!(flags & ~kInternalFlags));
}
bool hasUnpack() const { return flags & Flags::HasUnpack; }
bool hasGenerics() const { return flags & Flags::HasGenerics; }
bool lockWhileUnwinding() const { return flags & Flags::LockWhileUnwinding; }
bool skipRepack() const { return flags & Flags::SkipRepack; }
bool skipCoeffectsCheck() const { return flags & Flags::SkipCoeffectsCheck; }
bool enforceMutableReturn() const {
return flags & Flags::EnforceMutableReturn;
}
bool enforceReadonlyThis() const {
return flags & Flags::EnforceReadonlyThis;
}
uint32_t numInputs() const {
return numArgs + (hasUnpack() ? 1 : 0) + (hasGenerics() ? 1 : 0);
}
uint32_t numArgs;
uint32_t numRets;
Flags flags;
};
struct FCallArgs : FCallArgsBase {
explicit FCallArgs(Flags flags, uint32_t numArgs, uint32_t numRets,
const uint8_t* inoutArgs, const uint8_t* readonlyArgs,
Offset asyncEagerOffset, const StringData* context)
: FCallArgsBase(flags, numArgs, numRets)
, asyncEagerOffset(asyncEagerOffset)
, inoutArgs(inoutArgs)
, readonlyArgs(readonlyArgs)
, context(context) {
assertx(IMPLIES(inoutArgs != nullptr || readonlyArgs != nullptr,
numArgs != 0));
if (readonlyArgs && !anyReadonly(readonlyArgs, numArgs)) {
readonlyArgs = nullptr;
}
}
static bool isReadonlyArg(const uint8_t* readonlyArgs, uint32_t i) {
assertx(readonlyArgs != nullptr);
return readonlyArgs[i / 8] & (1 << (i % 8));
}
static bool anyReadonly(const uint8_t* readonlyArgs, uint32_t numArgs) {
assertx(readonlyArgs != nullptr);
for (size_t i = 0; i < numArgs; ++i) {
if (isReadonlyArg(readonlyArgs, i)) return true;
}
return false;
}
bool enforceInOut() const { return inoutArgs != nullptr; }
bool isInOut(uint32_t i) const {
assertx(enforceInOut());
return inoutArgs[i / 8] & (1 << (i % 8));
}
bool enforceReadonly() const {
assertx(IMPLIES(readonlyArgs != nullptr,
anyReadonly(readonlyArgs, numArgs)));
return readonlyArgs != nullptr;
}
bool isReadonly(uint32_t i) const {
assertx(enforceReadonly());
return isReadonlyArg(readonlyArgs, i);
}
FCallArgs withGenerics() const {
assertx(!hasGenerics());
return FCallArgs(
static_cast<Flags>(flags | Flags::HasGenerics),
numArgs, numRets, inoutArgs, readonlyArgs, asyncEagerOffset, context);
}
Offset asyncEagerOffset;
const uint8_t* inoutArgs;
const uint8_t* readonlyArgs;
const StringData* context;
};
static_assert(1 << FCallArgs::kFirstNumArgsBit == FCallArgsFlags::NumArgsStart, "");
using PrintLocal = std::function<std::string(int32_t local)>;
std::string show(const IterArgs&, PrintLocal);
std::string show(const LocalRange&);
std::string show(uint32_t numArgs, const uint8_t* boolVecArgs);
std::string show(const FCallArgsBase&, const uint8_t* inoutArgs,
const uint8_t* readonlyArgs,
std::string asyncEagerLabel, const StringData* ctx);
/*
* Variable-size immediates are implemented as follows: To determine which size
* the immediate is, examine the first byte where the immediate is expected,
* and examine its high-order bit. If it is zero, it's a 1-byte immediate
* and the byte is the value. Otherwise, it's 4 bytes, and bits 8..31 must be
* logical-shifted to the right by one to get rid of the flag bit.
*
* The types in this macro for BLA, SLA, and VSA are meaningless since they
* are never read out of ArgUnion (they use ImmVector).
*
* There are several different local immediate types:
* - LA immediates are for bytecodes that only require the TypedValue* to
* perform their operation.
* - ILA immediates are used by bytecodes that need both the TypedValue* and
* the slot index to implement their operation. This could be used by
* opcodes that print an error message including this slot info.
* - NLA immediates are used by bytecodes that need both the TypedValue* and
* the name of the local to be implemented. This is commonly used for
* ops that raise warnings for undefined local uses.
*
* ArgTypes and their various decoding helpers should be kept in sync with the
* `hhx' bytecode inspection GDB command.
*/
#define ARGTYPES \
ARGTYPE(NA, void*) /* unused */ \
ARGTYPEVEC(BLA, Offset) /* Bytecode offset vector immediate */ \
ARGTYPEVEC(SLA, Id) /* String id/offset pair vector */ \
ARGTYPE(IVA, uint32_t) /* Variable size: 8 or 32-bit uint */ \
ARGTYPE(I64A, int64_t) /* 64-bit Integer */ \
ARGTYPE(LA, int32_t) /* Local: 8 or 32-bit int */ \
ARGTYPE(NLA, NamedLocal) /* Local w/ name: 2x 8 or 32-bit int */ \
ARGTYPE(ILA, int32_t) /* Local w/ ID: 8 or 32-bit int */ \
ARGTYPE(IA, int32_t) /* Iterator ID: 8 or 32-bit int */ \
ARGTYPE(DA, double) /* Double */ \
ARGTYPE(SA, Id) /* Static string ID */ \
ARGTYPE(AA, Id) /* Static array ID */ \
ARGTYPE(RATA, RepoAuthType) /* Statically inferred RepoAuthType */ \
ARGTYPE(BA, Offset) /* Bytecode offset */ \
ARGTYPE(OA, unsigned char) /* Sub-opcode, untyped */ \
ARGTYPE(KA, MemberKey) /* Member key: local, stack, int, str */ \
ARGTYPE(LAR, LocalRange) /* Contiguous range of locals */ \
ARGTYPE(ITA, IterArgs) /* Iterator arguments */ \
ARGTYPE(FCA, FCallArgs) /* FCall arguments */ \
ARGTYPEVEC(VSA, Id) /* Vector of static string IDs */
enum ArgType {
#define ARGTYPE(name, type) name,
#define ARGTYPEVEC(name, type) name,
ARGTYPES
#undef ARGTYPE
#undef ARGTYPEVEC
};
union ArgUnion {
ArgUnion() : u_LA{0} {}
uint8_t bytes[0];
#define ARGTYPE(name, type) type u_##name;
#define ARGTYPEVEC(name, type) type u_##name;
ARGTYPES
#undef ARGTYPE
#undef ARGTYPEVEC
};
enum FlavorDesc {
NOV, // None
CV, // TypedValue
UV, // Uninit
CUV, // TypedValue, or Uninit argument
};
enum InstrFlags {
/* No flags. */
NF = 0x0,
/* Terminal: next instruction is not reachable via fall through or the callee
* returning control. This includes instructions like Throw that always throw
* exceptions. */
TF = 0x1,
/* Control flow: If this instruction finishes executing (doesn't throw an
* exception), vmpc() is not guaranteed to point to the next instruction in
* the bytecode stream. This does not take VM reentry into account, as that
* operation is part of the instruction that performed the reentry, and does
* not affect what vmpc() is set to after the instruction completes. */
CF = 0x2,
/* Shorthand for common combinations. */
CF_TF = (CF | TF),
};
inline bool isPre(IncDecOp op) {
return
op == IncDecOp::PreInc || op == IncDecOp::PreIncO ||
op == IncDecOp::PreDec || op == IncDecOp::PreDecO;
}
inline bool isInc(IncDecOp op) {
return
op == IncDecOp::PreInc || op == IncDecOp::PreIncO ||
op == IncDecOp::PostInc || op == IncDecOp::PostIncO;
}
inline bool isIncDecO(IncDecOp op) {
return
op == IncDecOp::PreIncO || op == IncDecOp::PreDecO ||
op == IncDecOp::PostIncO || op == IncDecOp::PostDecO;
}
constexpr uint32_t kMaxConcatN = 4;
#define O(...) + 1
constexpr size_t Op_count = OPCODES;
#undef O
enum class Op : std::conditional<Op_count <= 256, uint8_t, uint16_t>::type {
#define O(name, ...) name,
OPCODES
#undef O
};
/*
* Also put Op* in the enclosing namespace, to avoid having to change every
* existing usage site of the enum values.
*/
#define O(name, ...) UNUSED auto constexpr Op##name = Op::name;
OPCODES
#undef O
// These are comparable by default under MSVC.
#ifndef _MSC_VER
inline constexpr bool operator<(Op a, Op b) { return size_t(a) < size_t(b); }
inline constexpr bool operator>(Op a, Op b) { return size_t(a) > size_t(b); }
inline constexpr bool operator<=(Op a, Op b) {
return size_t(a) <= size_t(b);
}
inline constexpr bool operator>=(Op a, Op b) {
return size_t(a) >= size_t(b);
}
#endif
constexpr bool isValidOpcode(Op op) {
return size_t(op) < Op_count;
}
inline MOpMode getQueryMOpMode(QueryMOp op) {
switch (op) {
case QueryMOp::CGet: return MOpMode::Warn;
case QueryMOp::CGetQuiet:
case QueryMOp::Isset: return MOpMode::None;
case QueryMOp::InOut: return MOpMode::InOut;
}
always_assert(false);
}
#define HIGH_OPCODES \
O(FuncPrologue) \
O(TraceletGuard)
enum HighOp {
OpHighStart = Op_count-1,
#define O(name) Op##name,
HIGH_OPCODES
#undef O
};
struct StrVecItem {
Id str;
Offset dest;
};
struct ImmVector {
explicit ImmVector() : m_start(0) {}
explicit ImmVector(const uint8_t* start,
int32_t length,
int32_t numStack)
: m_length(length)
, m_numStack(numStack)
, m_start(start)
{}
bool isValid() const { return m_start != 0; }
const int32_t* vec32() const {
return reinterpret_cast<const int32_t*>(m_start);
}
folly::Range<const int32_t*> range32() const {
auto base = vec32();
return {base, base + size()};
}
const StrVecItem* strvec() const {
return reinterpret_cast<const StrVecItem*>(m_start);
}
/*
* Returns the length of the immediate vector in bytes (for M
* vectors) or elements (for switch vectors)
*/
int32_t size() const { return m_length; }
/*
* Returns the number of elements on the execution stack that this vector
* will need to access.
*/
int numStackValues() const { return m_numStack; }
private:
int32_t m_length;
int32_t m_numStack;
const uint8_t* m_start;
};
// Must be an opcode that actually has an ImmVector.
ImmVector getImmVector(PC opcode);
// Some decoding helper functions.
int numImmediates(Op opcode);
ArgType immType(Op opcode, int idx);
bool hasImmVector(Op opcode);
int instrLen(PC opcode);
int numSuccs(PC opcode);
PC skipCall(PC pc);
/*
* The returned struct has normalized variable-sized immediates. u must be
* provided unless you know that the immediate is not of type KA.
*
* Don't use with RATA immediates.
*/
ArgUnion getImm(PC opcode, int idx, const Unit* u = nullptr);
// Don't use this with variable-sized immediates!
ArgUnion* getImmPtr(PC opcode, int idx);
void staticStreamer(const TypedValue* tv, std::string& out);
std::string instrToString(PC it, Either<const Func*, const FuncEmitter*> f);
void staticArrayStreamer(const ArrayData*, std::string&);
/*
* Convert subopcodes or opcodes into strings.
*/
const char* opcodeToName(Op op);
const char* subopToName(InitPropOp);
const char* subopToName(IsTypeOp);
const char* subopToName(FatalOp);
const char* subopToName(CollectionType);
const char* subopToName(SetOpOp);
const char* subopToName(IncDecOp);
const char* subopToName(BareThisOp);
const char* subopToName(SilenceOp);
const char* subopToName(OODeclExistsOp);
const char* subopToName(ObjMethodOp);
const char* subopToName(SwitchKind);
const char* subopToName(MOpMode);
const char* subopToName(QueryMOp);
const char* subopToName(SetRangeOp);
const char* subopToName(TypeStructResolveOp);
const char* subopToName(ContCheckOp);
const char* subopToName(SpecialClsRef);
const char* subopToName(IsLogAsDynamicCallOp);
const char* subopToName(ReadonlyOp);
/*
* Returns true iff the given SubOp is in the valid range for its type.
*/
template<class Subop>
bool subopValid(Subop);
/*
* Try to parse a string into a subop name of a given type.
*
* Returns std::nullopt if the string is not recognized as that type of
* subop.
*/
template<class SubOpType>
Optional<SubOpType> nameToSubop(const char*);
using OffsetList = std::vector<Offset>;
// Returns a jump offsets relative to the instruction, or nothing if
// the instruction cannot jump.
OffsetList instrJumpOffsets(PC instr);
// returns absolute address of targets, or nothing if instruction
// cannot jump
OffsetList instrJumpTargets(PC instrs, Offset pos);
/*
* Returns the set of bytecode offsets for the instructions that may
* be executed immediately after opc.
*/
using OffsetSet = hphp_hash_set<Offset>;
OffsetSet instrSuccOffsets(PC opc, const Func* func);
/*
* Some CF instructions can be treated as non-CF instructions for most analysis
* purposes, such as bytecode verification and HHBBC. These instructions change
* vmpc() to point somewhere in a different function, but the runtime
* guarantees that if excution ever returns to the original frame, it will be
* at the location immediately following the instruction in question. This
* creates the illusion that the instruction fell through normally to the
* instruction after it, within the context of its execution frame.
*
* The canonical example of this behavior are the FCall* instructions, so we use
* "non-call control flow" to describe the set of CF instruction that do not
* exhibit this behavior. This function returns true if `opcode' is a non-call
* control flow instruction.
*/
bool instrIsNonCallControlFlow(Op opcode);
bool instrAllowsFallThru(Op opcode);
constexpr InstrFlags instrFlagsData[] = {
#define O(unusedName, unusedImm, unusedPop, unusedPush, flags) flags,
OPCODES
#undef O
};
constexpr InstrFlags instrFlags(Op opcode) {
return instrFlagsData[size_t(opcode)];
}
constexpr bool instrIsControlFlow(Op opcode) {
return (instrFlags(opcode) & CF) != 0;
}
constexpr bool isUnconditionalJmp(Op opcode) {
return opcode == Op::Jmp || opcode == Op::JmpNS;
}
constexpr bool isConditionalJmp(Op opcode) {
return opcode == Op::JmpZ || opcode == Op::JmpNZ;
}
constexpr bool isJmp(Op opcode) {
return
opcode == Op::Jmp ||
opcode == Op::JmpNS ||
opcode == Op::JmpZ ||
opcode == Op::JmpNZ;
}
constexpr bool isObjectConstructorOp(Op opcode) {
return
opcode == Op::NewObj ||
opcode == Op::NewObjD ||
opcode == Op::NewObjR ||
opcode == Op::NewObjRD ||
opcode == Op::NewObjS;
}
constexpr bool isArrLikeConstructorOp(Op opcode) {
return
opcode == Op::Dict ||
opcode == Op::Keyset ||
opcode == Op::Vec ||
opcode == Op::NewDictArray ||
opcode == Op::NewStructDict ||
opcode == Op::NewVec ||
opcode == Op::NewKeysetArray;
}
constexpr bool isArrLikeCastOp(Op opcode) {
return
opcode == Op::CastVec ||
opcode == Op::CastDict ||
opcode == Op::CastKeyset;
}
constexpr bool isComparisonOp(Op opcode) {
return
opcode == Op::Cmp ||
opcode == Op::Eq ||
opcode == Op::Neq ||
opcode == Op::Gt ||
opcode == Op::Gte ||
opcode == Op::Lt ||
opcode == Op::Lte ||
opcode == Op::Same ||
opcode == Op::NSame;
}
constexpr bool isFCallClsMethod(Op opcode) {
return
opcode == OpFCallClsMethod ||
opcode == OpFCallClsMethodD ||
opcode == OpFCallClsMethodS ||
opcode == OpFCallClsMethodSD;
}
constexpr bool isFCallFunc(Op opcode) {
return
opcode == OpFCallFunc ||
opcode == OpFCallFuncD;
}
constexpr bool isFCallObjMethod(Op opcode) {
return
opcode == OpFCallObjMethod ||
opcode == OpFCallObjMethodD;
}
constexpr bool isFCall(Op opcode) {
return
opcode == OpFCallCtor ||
isFCallClsMethod(opcode) ||
isFCallFunc(opcode) ||
isFCallObjMethod(opcode);
}
constexpr bool isRet(Op op) {
return op == OpRetC || op == OpRetCSuspended || op == OpRetM;
}
constexpr bool isReturnish(Op op) {
return isRet(op) || op == Op::NativeImpl;
}
constexpr bool isSwitch(Op op) {
return op == OpSwitch || op == OpSSwitch;
}
constexpr bool isTypeAssert(Op op) {
return op == OpAssertRATL || op == OpAssertRATStk;
}
constexpr bool isIteratorOp(Op op) {
return op == OpIterInit || op == Op::LIterInit ||
op == OpIterNext || op == Op::LIterNext;
}
inline bool isMemberBaseOp(Op op) {
switch (op) {
case Op::BaseGC:
case Op::BaseGL:
case Op::BaseSC:
case Op::BaseL:
case Op::BaseC:
case Op::BaseH:
return true;
default:
return false;
}
}
inline bool isMemberDimOp(Op op) {
return op == Op::Dim;
}
inline bool isMemberFinalOp(Op op) {
switch (op) {
case Op::QueryM:
case Op::SetM:
case Op::SetRangeM:
case Op::IncDecM:
case Op::SetOpM:
case Op::UnsetM:
return true;
default:
return false;
}
}
inline bool isMemberOp(Op op) {
return isMemberBaseOp(op) || isMemberDimOp(op) || isMemberFinalOp(op);
}
inline MOpMode finalMemberOpMode(Op op) {
switch(op){
case Op::SetM:
case Op::SetRangeM:
case Op::IncDecM:
case Op::SetOpM:
return MOpMode::Define;
case Op::UnsetM:
return MOpMode::Unset;
case Op::QueryM:
return MOpMode::None;
default:
always_assert_flog(
false, "Unknown final member op {}", opcodeToName(op)
);
}
}
// true if the opcode body can set pc=0 to halt the interpreter.
constexpr bool instrCanHalt(Op op) {
return op == OpRetC || op == OpNativeImpl ||
op == OpAwait || op == OpAwaitAll || op == OpCreateCont ||
op == OpYield || op == OpYieldK || op == OpRetM ||
op == OpRetCSuspended;
}
int instrNumPops(PC opcode);
int instrNumPushes(PC opcode);
FlavorDesc instrInputFlavor(PC op, uint32_t idx);
}
//////////////////////////////////////////////////////////////////////
namespace std {
template<>
struct hash<HPHP::Op> {
size_t operator()(HPHP::Op op) const {
return HPHP::hash_int64(size_t(op));
}
};
}
//////////////////////////////////////////////////////////////////////