Jit/hir/builder.cpp (3,320 lines of code) (raw):
// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
#include "Jit/hir/builder.h"
#include "Python.h"
#include "ceval.h"
#include "opcode.h"
#include "structmember.h"
#include "Jit/bitvector.h"
#include "Jit/bytecode.h"
#include "Jit/hir/hir.h"
#include "Jit/hir/optimization.h"
#include "Jit/hir/preload.h"
#include "Jit/hir/ssa.h"
#include "Jit/hir/type.h"
#include "Jit/pyjit.h"
#include "Jit/ref.h"
#include "Jit/threaded_compile.h"
#include <algorithm>
#include <deque>
#include <memory>
#include <set>
#include <unordered_set>
#include <utility>
#include <vector>
namespace jit {
namespace hir {
using jit::BytecodeInstruction;
// Allocate a temp register that may be used for the stack. It should not be a
// register that will be treated specially in the FrameState (e.g. tracked as
// containing a local or cell.)
Register* TempAllocator::AllocateStack() {
Register* reg = env_->AllocateRegister();
cache_.emplace_back(reg);
return reg;
}
// Get the i-th stack temporary or allocate one.
Register* TempAllocator::GetOrAllocateStack(std::size_t idx) {
if (idx < cache_.size()) {
Register* reg = cache_[idx];
return reg;
}
return AllocateStack();
}
// Allocate a temp register that will not be used for a stack value.
Register* TempAllocator::AllocateNonStack() {
return env_->AllocateRegister();
}
// Opcodes that we know how to translate into HIR
const std::unordered_set<int> kSupportedOpcodes = {
BEFORE_ASYNC_WITH,
BEGIN_FINALLY,
BINARY_ADD,
BINARY_AND,
BINARY_FLOOR_DIVIDE,
BINARY_LSHIFT,
BINARY_MATRIX_MULTIPLY,
BINARY_MODULO,
BINARY_MULTIPLY,
BINARY_OR,
BINARY_POWER,
BINARY_RSHIFT,
BINARY_SUBSCR,
BINARY_SUBTRACT,
BINARY_TRUE_DIVIDE,
BINARY_XOR,
BUILD_CHECKED_LIST,
BUILD_CHECKED_MAP,
BUILD_CONST_KEY_MAP,
BUILD_LIST,
BUILD_LIST_UNPACK,
BUILD_MAP,
BUILD_MAP_UNPACK,
BUILD_MAP_UNPACK_WITH_CALL,
BUILD_SET,
BUILD_SET_UNPACK,
BUILD_SLICE,
BUILD_STRING,
BUILD_TUPLE,
BUILD_TUPLE_UNPACK,
BUILD_TUPLE_UNPACK_WITH_CALL,
CALL_FINALLY,
CALL_FUNCTION,
CALL_FUNCTION_EX,
CALL_FUNCTION_KW,
CALL_METHOD,
CAST,
CHECK_ARGS,
COMPARE_OP,
CONVERT_PRIMITIVE,
DELETE_ATTR,
DELETE_FAST,
DELETE_SUBSCR,
DUP_TOP,
DUP_TOP_TWO,
END_ASYNC_FOR,
END_FINALLY,
EXTENDED_ARG,
FAST_LEN,
FORMAT_VALUE,
FOR_ITER,
FUNC_CREDENTIAL,
GET_AITER,
GET_ANEXT,
GET_AWAITABLE,
GET_ITER,
GET_YIELD_FROM_ITER,
IMPORT_FROM,
IMPORT_NAME,
INPLACE_ADD,
INPLACE_AND,
INPLACE_FLOOR_DIVIDE,
INPLACE_LSHIFT,
INPLACE_MATRIX_MULTIPLY,
INPLACE_MODULO,
INPLACE_MULTIPLY,
INPLACE_OR,
INPLACE_POWER,
INPLACE_RSHIFT,
INPLACE_SUBTRACT,
INPLACE_TRUE_DIVIDE,
INPLACE_XOR,
INT_LOAD_CONST_OLD,
INVOKE_FUNCTION,
INVOKE_METHOD,
JUMP_ABSOLUTE,
JUMP_FORWARD,
JUMP_IF_FALSE_OR_POP,
JUMP_IF_NONZERO_OR_POP,
JUMP_IF_TRUE_OR_POP,
JUMP_IF_ZERO_OR_POP,
LIST_APPEND,
LOAD_ATTR,
LOAD_ATTR_SUPER,
LOAD_CLOSURE,
LOAD_CONST,
LOAD_DEREF,
LOAD_FAST,
LOAD_FIELD,
LOAD_GLOBAL,
LOAD_ITERABLE_ARG,
LOAD_LOCAL,
LOAD_METHOD,
LOAD_METHOD_SUPER,
LOAD_TYPE,
MAKE_FUNCTION,
MAP_ADD,
NOP,
POP_BLOCK,
POP_EXCEPT,
POP_FINALLY,
POP_JUMP_IF_FALSE,
POP_JUMP_IF_NONZERO,
POP_JUMP_IF_TRUE,
POP_JUMP_IF_ZERO,
POP_TOP,
PRIMITIVE_BINARY_OP,
PRIMITIVE_BOX,
PRIMITIVE_COMPARE_OP,
PRIMITIVE_LOAD_CONST,
PRIMITIVE_UNARY_OP,
PRIMITIVE_UNBOX,
RAISE_VARARGS,
REFINE_TYPE,
RETURN_PRIMITIVE,
RETURN_VALUE,
ROT_FOUR,
ROT_THREE,
ROT_TWO,
SEQUENCE_GET,
SEQUENCE_REPEAT,
SEQUENCE_SET,
SETUP_ASYNC_WITH,
SETUP_FINALLY,
SETUP_WITH,
SET_ADD,
STORE_ATTR,
STORE_DEREF,
STORE_FAST,
STORE_FIELD,
STORE_LOCAL,
STORE_SUBSCR,
TP_ALLOC,
UNARY_INVERT,
UNARY_NEGATIVE,
UNARY_NOT,
UNARY_POSITIVE,
UNPACK_EX,
UNPACK_SEQUENCE,
WITH_CLEANUP_FINISH,
WITH_CLEANUP_START,
YIELD_FROM,
YIELD_VALUE,
};
static bool can_translate(PyCodeObject* code) {
static const std::unordered_set<std::string> kBannedNames{
"eval", "exec", "locals"};
PyObject* names = code->co_names;
std::unordered_set<Py_ssize_t> banned_name_ids;
auto name_at = [&](Py_ssize_t i) {
return PyUnicode_AsUTF8(PyTuple_GET_ITEM(names, i));
};
for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(names); i++) {
if (kBannedNames.count(name_at(i))) {
banned_name_ids.insert(i);
}
}
for (auto& bci : BytecodeInstructionBlock{code}) {
auto opcode = bci.opcode();
int oparg = bci.oparg();
if (!kSupportedOpcodes.count(opcode)) {
JIT_DLOG("Unsupported opcode: %d", opcode);
return false;
} else if (opcode == LOAD_GLOBAL && banned_name_ids.count(oparg)) {
JIT_DLOG("'%s' unsupported", name_at(oparg));
return false;
}
}
return true;
}
void HIRBuilder::AllocateRegistersForLocals(
Environment* env,
FrameState& state) {
auto nlocals = code_->co_nlocals;
state.locals.clear();
state.locals.reserve(nlocals);
for (int i = 0; i < nlocals; i++) {
state.locals.emplace_back(env->AllocateRegister());
}
}
void HIRBuilder::AllocateRegistersForCells(
Environment* env,
FrameState& state) {
Py_ssize_t ncells = PyTuple_GET_SIZE(code_->co_cellvars) +
PyTuple_GET_SIZE(code_->co_freevars);
state.cells.clear();
state.cells.reserve(ncells);
for (int i = 0; i < ncells; i++) {
state.cells.emplace_back(env->AllocateRegister());
}
}
// Holds the current state of translation for a given basic block
struct HIRBuilder::TranslationContext {
TranslationContext(BasicBlock* b, const FrameState& fs)
: block(b), frame(fs) {}
template <typename T, typename... Args>
T* emit(Args&&... args) {
auto instr = block->appendWithOff<T>(
frame.instr_offset(), std::forward<Args>(args)...);
return instr;
}
template <typename T, typename... Args>
T* emitChecked(Args&&... args) {
auto instr = emit<T>(std::forward<Args>(args)...);
auto out = instr->GetOutput();
emit<CheckExc>(out, out, frame);
return instr;
}
template <typename T, typename... Args>
T* emitVariadic(
TempAllocator& temps,
std::size_t num_operands,
Args&&... args) {
Register* out = temps.AllocateStack();
auto call = emit<T>(num_operands, out, std::forward<Args>(args)...);
for (auto i = num_operands; i > 0; i--) {
Register* operand = frame.stack.pop();
call->SetOperand(i - 1, operand);
}
call->setFrameState(frame);
frame.stack.push(out);
return call;
}
void setCurrentInstr(const jit::BytecodeInstruction& cur_bci) {
frame.next_instr_offset = cur_bci.NextInstrOffset();
}
void snapshot() {
auto terminator = block->GetTerminator();
if ((terminator != nullptr) && terminator->IsSnapshot()) {
auto snapshot = static_cast<Snapshot*>(terminator);
snapshot->setFrameState(frame);
} else {
emit<Snapshot>(frame);
}
}
BasicBlock* block{nullptr};
FrameState frame;
};
void HIRBuilder::addInitialYield(TranslationContext& tc) {
auto out = temps_.AllocateNonStack();
tc.emitChecked<InitialYield>(out, tc.frame);
}
// Add LoadArg instructions for each function argument. This ensures that the
// corresponding variables are always assigned and allows for a uniform
// treatment of registers that correspond to arguments (vs locals) during
// definite assignment analysis.
void HIRBuilder::addLoadArgs(TranslationContext& tc, int num_args) {
for (int i = 0; i < num_args; i++) {
// Arguments in CPython are the first N locals
Register* dst = tc.frame.locals[i];
JIT_CHECK(dst != nullptr, "No register for argument %d", i);
Type type = preloader_.checkArgType(i);
tc.emit<LoadArg>(dst, i, type);
}
}
// Add a MakeCell for each cellvar and load each freevar from closure.
void HIRBuilder::addInitializeCells(
TranslationContext& tc,
Register* cur_func) {
Py_ssize_t ncellvars = PyTuple_GET_SIZE(code_->co_cellvars);
Py_ssize_t nfreevars = PyTuple_GET_SIZE(code_->co_freevars);
Register* null_reg = ncellvars > 0 ? temps_.AllocateNonStack() : nullptr;
for (int i = 0; i < ncellvars; i++) {
int arg = CO_CELL_NOT_AN_ARG;
auto dst = tc.frame.cells[i];
JIT_CHECK(dst != nullptr, "No register for cell %d", i);
Register* cell_contents = null_reg;
if (code_->co_cell2arg != NULL &&
(arg = code_->co_cell2arg[i]) != CO_CELL_NOT_AN_ARG) {
// cell is for argument local number `arg`
JIT_CHECK(
static_cast<unsigned>(arg) < tc.frame.locals.size(),
"co_cell2arg says cell %d is local %d but locals size is %ld",
i,
arg,
tc.frame.locals.size());
cell_contents = tc.frame.locals[arg];
}
tc.emit<MakeCell>(dst, cell_contents, tc.frame);
if (arg != CO_CELL_NOT_AN_ARG) {
// Clear the local once we have it in a cell.
tc.frame.locals[arg] = null_reg;
}
}
if (nfreevars == 0) {
return;
}
JIT_CHECK(cur_func != nullptr, "No cur_func in function with freevars");
Register* func_closure = temps_.AllocateNonStack();
tc.emit<LoadField>(
func_closure,
cur_func,
"func_closure",
offsetof(PyFunctionObject, func_closure),
TTuple);
for (int i = 0; i < nfreevars; i++) {
auto cell_idx = i + ncellvars;
Register* dst = tc.frame.cells[cell_idx];
JIT_CHECK(dst != nullptr, "No register for cell %ld", cell_idx);
tc.emit<LoadTupleItem>(dst, func_closure, i);
}
}
static bool should_snapshot(
const BytecodeInstruction& bci,
bool is_in_async_for_header_block) {
switch (bci.opcode()) {
// These instructions conditionally alter the operand stack based on which
// branch is taken, thus we cannot safely take a snapshot in the same basic
// block. They're also control instructions, so snapshotting in the same
// basic block doesn't make sense anyway.
case FOR_ITER:
case JUMP_IF_FALSE_OR_POP:
case JUMP_IF_NONZERO_OR_POP:
case JUMP_IF_TRUE_OR_POP:
case JUMP_IF_ZERO_OR_POP:
// These are all control instructions. Taking a snapshot after them in the
// same basic block doesn't make sense, as control immediately transfers
// to another basic block.
case BEGIN_FINALLY:
case CALL_FINALLY:
case END_FINALLY:
case JUMP_ABSOLUTE:
case JUMP_FORWARD:
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
case POP_JUMP_IF_ZERO:
case POP_JUMP_IF_NONZERO:
case RETURN_PRIMITIVE:
case RETURN_VALUE:
case RAISE_VARARGS:
// These instructions only modify frame state and are always safe to
// replay. We don't snapshot these in order to limit the amount of
// unnecessary metadata in the lowered IR.
case CHECK_ARGS:
case CONVERT_PRIMITIVE:
case DUP_TOP:
case DUP_TOP_TWO:
case EXTENDED_ARG:
case INT_LOAD_CONST_OLD:
case LOAD_CLOSURE:
case LOAD_CONST:
case LOAD_FAST:
case LOAD_LOCAL:
case NOP:
case POP_FINALLY:
case POP_TOP:
case PRIMITIVE_BOX:
case PRIMITIVE_LOAD_CONST:
case PRIMITIVE_UNARY_OP:
case PRIMITIVE_UNBOX:
case REFINE_TYPE:
case ROT_FOUR:
case ROT_THREE:
case ROT_TWO:
case STORE_FAST:
case STORE_LOCAL: {
return false;
}
// The `is` and `is not` comparison operators are implemented using pointer
// equality. They are always safe to replay.
case COMPARE_OP: {
auto op = static_cast<CompareOp>(bci.oparg());
return (op != CompareOp::kIs) && (op != CompareOp::kIsNot);
}
// In an async-for header block YIELD_FROM controls whether we end the loop
case YIELD_FROM: {
return !is_in_async_for_header_block;
}
// Take a snapshot after translating all other bytecode instructions. This
// may generate unnecessary deoptimization metadata but will always be
// correct.
default: {
return true;
}
}
}
// Compute basic block boundaries and allocate corresponding HIR blocks
HIRBuilder::BlockMap HIRBuilder::createBlocks(
Function& irfunc,
const BytecodeInstructionBlock& bc_block) {
BlockMap block_map;
// Mark the beginning of each basic block in the bytecode
std::set<Py_ssize_t> block_starts = {0};
auto maybe_add_next_instr = [&](const BytecodeInstruction& bc_instr) {
Py_ssize_t next_instr_idx = bc_instr.NextInstrIndex();
if (next_instr_idx < bc_block.size()) {
block_starts.insert(next_instr_idx);
}
};
for (auto bc_instr : bc_block) {
auto opcode = bc_instr.opcode();
if (bc_instr.IsBranch()) {
maybe_add_next_instr(bc_instr);
auto target = bc_instr.GetJumpTargetAsIndex();
block_starts.insert(target);
} else if (
// We always split after YIELD_FROM to handle the case where it's the
// top of an async-for loop and so generate a HIR conditional jump.
(opcode == BEGIN_FINALLY) || (opcode == END_FINALLY) ||
(opcode == POP_FINALLY) || (opcode == RAISE_VARARGS) ||
(opcode == RETURN_VALUE) || (opcode == YIELD_FROM)) {
maybe_add_next_instr(bc_instr);
} else {
JIT_CHECK(!bc_instr.IsTerminator(), "Terminator should split block");
}
}
// Allocate blocks
auto it = block_starts.begin();
while (it != block_starts.end()) {
Py_ssize_t start_idx = *it;
++it;
Py_ssize_t end_idx;
if (it != block_starts.end()) {
end_idx = *it;
} else {
end_idx = bc_block.size();
}
auto block = irfunc.cfg.AllocateBlock();
block_map.blocks[start_idx * sizeof(_Py_CODEUNIT)] = block;
block_map.bc_blocks.emplace(
std::piecewise_construct,
std::forward_as_tuple(block),
std::forward_as_tuple(bc_block.bytecode(), start_idx, end_idx));
}
return block_map;
}
BasicBlock* HIRBuilder::getBlockAtOff(Py_ssize_t off) {
auto it = block_map_.blocks.find(off);
JIT_DCHECK(it != block_map_.blocks.end(), "No block for offset %ld", off);
return it->second;
}
// Convenience wrapper, used only in tests
std::unique_ptr<Function> buildHIR(BorrowedRef<PyFunctionObject> func) {
JIT_CHECK(
!g_threaded_compile_context.compileRunning(),
"multi-thread compile must preload first");
return buildHIR(Preloader(func));
}
std::unique_ptr<Function> buildHIR(const Preloader& preloader) {
return HIRBuilder{preloader}.buildHIR();
}
// This performs an abstract interpretation over the bytecode for func in order
// to translate it from a stack to register machine. The translation proceeds
// in two passes over the bytecode. First, basic block boundaries are
// enumerated and a mapping from block start offset to basic block is
// created. Next, basic blocks are filled in by simulating the effect that each
// instruction has on the stack.
//
// The correctness of the translation depends on the invariant that the depth
// the operand stack is be constant at each program point. All of the CPython
// bytecode that we currently support maintain this invariant. However, there
// are a few bytecodes that do not (e.g. SETUP_FINALLY). We will need to deal
// with that if we ever want to support compiling them.
std::unique_ptr<Function> HIRBuilder::buildHIR() {
if (!can_translate(code_)) {
JIT_DLOG("Can't translate all opcodes in %s", preloader_.fullname());
return nullptr;
}
std::unique_ptr<Function> irfunc = preloader_.makeFunction();
buildHIRImpl(irfunc.get(), /*frame_state=*/nullptr);
// Use RemoveTrampolineBlocks and RemoveUnreachableBlocks directly instead of
// Run because the rest of CleanCFG requires SSA.
CleanCFG::RemoveTrampolineBlocks(&irfunc->cfg);
CleanCFG::RemoveUnreachableBlocks(&irfunc->cfg);
return irfunc;
}
BasicBlock* HIRBuilder::buildHIRImpl(
Function* irfunc,
FrameState* frame_state) {
temps_ = TempAllocator(&irfunc->env);
BytecodeInstructionBlock bc_instrs{code_};
block_map_ = createBlocks(*irfunc, bc_instrs);
// Ensure that the entry block isn't a loop header
BasicBlock* entry_block = getBlockAtOff(0);
for (const auto& bci : bc_instrs) {
if (bci.IsBranch() && bci.GetJumpTarget() == 0) {
entry_block = irfunc->cfg.AllocateBlock();
break;
}
}
if (frame_state == nullptr) {
// Function is not being inlined (irfunc matches code) so set the whole
// CFG's entry block.
irfunc->cfg.entry_block = entry_block;
}
// Insert LoadArg, LoadClosureCell, and MakeCell/MakeNullCell instructions
// for the entry block
TranslationContext entry_tc{
entry_block,
FrameState{
code_,
preloader_.globals(),
preloader_.builtins(),
/*parent=*/frame_state}};
AllocateRegistersForLocals(&irfunc->env, entry_tc.frame);
AllocateRegistersForCells(&irfunc->env, entry_tc.frame);
addLoadArgs(entry_tc, preloader_.numArgs());
Register* cur_func = nullptr;
// TODO(emacs): Check if the code object or preloader uses runtime func and
// drop the frame_state == nullptr check. Inlined functions should load a
// const instead of using LoadCurrentFunc.
if (frame_state == nullptr && irfunc->uses_runtime_func) {
cur_func = temps_.AllocateNonStack();
entry_tc.emit<LoadCurrentFunc>(cur_func);
}
addInitializeCells(entry_tc, cur_func);
if (code_->co_flags & kCoFlagsAnyGenerator) {
// InitialYield must be after args are loaded so they can be spilled to
// the suspendable state. It must also come before anything which can
// deopt as generator deopt assumes we're running from state stored
// in a generator object.
addInitialYield(entry_tc);
}
BasicBlock* first_block = getBlockAtOff(0);
if (entry_block != first_block) {
entry_block->appendWithOff<Branch>(0, first_block);
}
entry_tc.block = first_block;
translate(*irfunc, bc_instrs, entry_tc);
return entry_block;
}
void HIRBuilder::emitProfiledTypes(
TranslationContext& tc,
const CodeProfileData& profile_data,
const BytecodeInstruction& bc_instr) {
if (bc_instr.opcode() == CALL_METHOD) {
// TODO(T107300350): Ignore profiling data for CALL_METHOD because we lie
// about its stack inputs.
return;
}
const PolymorphicTypes types =
getProfiledTypes(profile_data, bc_instr.offset());
if (types.empty() || types[0].size() > tc.frame.stack.size()) {
// The types are either absent or invalid (e.g., from a different version
// of the code than what we're running now).
return;
}
const std::vector<BorrowedRef<PyTypeObject>> first_profile = types[0];
if (bc_instr.opcode() == WITH_CLEANUP_START ||
bc_instr.opcode() == END_FINALLY) {
// TOS for WITH_CLEANUP_START can be nullptr or a type, and TOS for
// END_FINALLY can be nullptr, a type, or an int. In both cases, a type TOS
// signals that an exception has been raised and a nullptr TOS indicates a
// normal exit from the context manager or finally block. Since we deopt
// when an exception is raised, the JIT statically knows that TOS for
// WITH_CLEANUP_START is TNullptr, and that value flows to TOS for
// END_FINALLY.
//
// TODO(T110447724): If the profiled types for either opcode's TOS is is a
// type, that means that during profiling, we always left this block by
// raising an exception. This implies that the code we're compiling is
// probably unreachable, and we may want to consider leaving it out of the
// HIR to save space (replacing it with a Deopt).
//
// More importantly, if we emit a GuardType<TypeExact> here, the TNullptr
// TOS value will conflict with GuardType's input type of TObject. This is
// currently the only situation where we try to give a possibly-null value
// to GuardType; if we run into more we may want to consider making
// GuardType null-aware.
return;
}
// TODO(T115140951): Add a more robust method of determining what type
// information differs between interpreter runs and static JITted bytecode
if (bc_instr.opcode() == STORE_FIELD) {
auto& [offset, type, name] = preloader_.fieldInfo(constArg(bc_instr));
if (type <= TPrimitive) {
return;
}
}
// Except for function calls, all instructions profile all of their inputs,
// with deeper stack elements first.
ssize_t stack_idx = first_profile.size() - 1;
if (bc_instr.opcode() == CALL_FUNCTION) {
stack_idx = bc_instr.oparg();
}
if (types.size() == 1) {
for (auto type : first_profile) {
if (type != nullptr) {
Register* value = tc.frame.stack.top(stack_idx);
tc.emit<GuardType>(value, Type::fromTypeExact(type), value);
}
stack_idx--;
}
} else {
ProfiledTypes all_types;
for (auto type_vec : types) {
std::vector<Type> types;
for (auto type : type_vec) {
if (type != nullptr) {
types.emplace_back(Type::fromTypeExact(type));
}
}
all_types.emplace_back(types);
}
std::vector<Register*> args;
while (stack_idx >= 0) {
args.emplace_back(tc.frame.stack.top(stack_idx--));
}
tc.emit<HintType>(args.size(), all_types, args);
}
}
InlineResult HIRBuilder::inlineHIR(
Function* caller,
FrameState* caller_frame_state) {
if (!can_translate(code_)) {
JIT_DLOG("Can't translate all opcodes in %s", preloader_.fullname());
return {nullptr, nullptr};
}
BasicBlock* entry_block = buildHIRImpl(caller, caller_frame_state);
// Make one block with a Return that merges the return branches from the
// callee. After SSA, it will turn into a massive Phi. The caller can find
// the Return and use it as the output of the call instruction.
Register* return_val = caller->env.AllocateRegister();
BasicBlock* exit_block = caller->cfg.AllocateBlock();
// Enum types are always unboxed in the JIT despite the preloader's type
// descr being an enum.
// TODO(emacs): Find a better place for this branch, since we do it in two
// places in the builder.
if (preloader_.returnType() <= TCEnum) {
exit_block->append<Return>(return_val, TCInt64);
} else {
exit_block->append<Return>(return_val, preloader_.returnType());
}
for (auto block : caller->cfg.GetRPOTraversal(entry_block)) {
auto instr = block->GetTerminator();
if (instr->IsReturn()) {
auto assign = Assign::create(return_val, instr->GetOperand(0));
auto branch = Branch::create(exit_block);
instr->ExpandInto({assign, branch});
delete instr;
}
}
// Map of FrameState to parent pointers. We must completely disconnect the
// inlined function's CFG from its caller for SSAify to run properly: it will
// find uses (in FrameState) before defs and insert LoadConst<Nullptr>.
std::unordered_map<FrameState*, FrameState*> framestate_parent;
for (BasicBlock* block : caller->cfg.GetRPOTraversal(entry_block)) {
for (Instr& instr : *block) {
JIT_CHECK(
!instr.IsBeginInlinedFunction(),
"there should be no BeginInlinedFunction in inlined functions");
JIT_CHECK(
!instr.IsEndInlinedFunction(),
"there should be no EndInlinedFunction in inlined functions");
FrameState* fs = nullptr;
if (auto db = dynamic_cast<DeoptBase*>(&instr)) {
fs = db->frameState();
} else if (auto snap = dynamic_cast<Snapshot*>(&instr)) {
fs = snap->frameState();
}
if (fs == nullptr || fs->parent == nullptr) {
continue;
}
bool inserted = framestate_parent.emplace(fs, fs->parent).second;
JIT_CHECK(inserted, "there should not be duplicate FrameState pointers");
fs->parent = nullptr;
}
}
// The caller function has already been converted to SSA form and all HIR
// passes require input to be in SSA form. SSAify the inlined function.
SSAify{}.Run(entry_block, &caller->env);
// Re-link the CFG.
for (auto& [fs, parent] : framestate_parent) {
fs->parent = parent;
}
return {entry_block, exit_block};
}
void HIRBuilder::translate(
Function& irfunc,
const jit::BytecodeInstructionBlock& bc_instrs,
const TranslationContext& tc,
FinallyCompleter complete_finally) {
std::deque<TranslationContext> queue = {tc};
std::unordered_set<BasicBlock*> processed;
std::unordered_set<BasicBlock*> loop_headers;
const CodeProfileData* profile_data = getProfileData(tc.frame.code);
while (!queue.empty()) {
auto tc = std::move(queue.front());
queue.pop_front();
if (processed.count(tc.block)) {
continue;
}
processed.emplace(tc.block);
// Translate remaining instructions into HIR
auto& bc_block = map_get(block_map_.bc_blocks, tc.block);
tc.frame.next_instr_offset = bc_block.startOffset();
tc.snapshot();
auto is_in_async_for_header_block = [&tc, &bc_instrs]() {
if (tc.frame.block_stack.isEmpty()) {
return false;
}
const ExecutionBlock& block_top = tc.frame.block_stack.top();
return block_top.isAsyncForHeaderBlock(bc_instrs);
};
for (auto bc_it = bc_block.begin(); bc_it != bc_block.end(); ++bc_it) {
BytecodeInstruction bc_instr = *bc_it;
tc.setCurrentInstr(bc_instr);
if (profile_data != nullptr) {
emitProfiledTypes(tc, *profile_data, bc_instr);
}
// Translate instruction
switch (bc_instr.opcode()) {
case NOP: {
break;
}
case BINARY_ADD:
case BINARY_AND:
case BINARY_FLOOR_DIVIDE:
case BINARY_LSHIFT:
case BINARY_MATRIX_MULTIPLY:
case BINARY_MODULO:
case BINARY_MULTIPLY:
case BINARY_OR:
case BINARY_POWER:
case BINARY_RSHIFT:
case BINARY_SUBSCR:
case BINARY_SUBTRACT:
case BINARY_TRUE_DIVIDE:
case BINARY_XOR: {
emitBinaryOp(tc, bc_instr);
break;
}
case INPLACE_ADD:
case INPLACE_AND:
case INPLACE_FLOOR_DIVIDE:
case INPLACE_LSHIFT:
case INPLACE_MATRIX_MULTIPLY:
case INPLACE_MODULO:
case INPLACE_MULTIPLY:
case INPLACE_OR:
case INPLACE_POWER:
case INPLACE_RSHIFT:
case INPLACE_SUBTRACT:
case INPLACE_TRUE_DIVIDE:
case INPLACE_XOR: {
emitInPlaceOp(tc, bc_instr);
break;
}
case UNARY_NOT:
case UNARY_NEGATIVE:
case UNARY_POSITIVE:
case UNARY_INVERT: {
emitUnaryOp(tc, bc_instr);
break;
}
case BUILD_LIST:
case BUILD_TUPLE:
emitMakeListTuple(tc, bc_instr);
break;
case BUILD_LIST_UNPACK:
case BUILD_TUPLE_UNPACK:
case BUILD_TUPLE_UNPACK_WITH_CALL:
emitMakeListTupleUnpack(tc, bc_instr);
break;
case BUILD_CHECKED_LIST: {
emitBuildCheckedList(tc, bc_instr);
break;
}
case BUILD_CHECKED_MAP: {
emitBuildCheckedMap(tc, bc_instr);
break;
}
case BUILD_MAP: {
emitBuildMap(tc, bc_instr);
break;
}
case BUILD_MAP_UNPACK:
emitBuildMapUnpack(tc, bc_instr, false);
break;
case BUILD_MAP_UNPACK_WITH_CALL:
emitBuildMapUnpack(tc, bc_instr, true);
break;
case BUILD_SET: {
emitBuildSet(tc, bc_instr);
break;
}
case BUILD_SET_UNPACK: {
emitBuildSetUnpack(tc, bc_instr);
break;
}
case BUILD_CONST_KEY_MAP: {
emitBuildConstKeyMap(tc, bc_instr);
break;
}
case CALL_FUNCTION:
case CALL_FUNCTION_EX:
case CALL_FUNCTION_KW:
case CALL_METHOD:
case INVOKE_FUNCTION:
case INVOKE_METHOD: {
emitAnyCall(irfunc.cfg, tc, bc_it, bc_instrs);
break;
}
case FUNC_CREDENTIAL:
emitFunctionCredential(tc, bc_instr);
break;
case COMPARE_OP: {
emitCompareOp(tc, bc_instr);
break;
}
case DELETE_ATTR: {
emitDeleteAttr(tc, bc_instr);
break;
}
case LOAD_ATTR: {
emitLoadAttr(tc, bc_instr);
break;
}
case LOAD_METHOD: {
emitLoadMethod(tc, bc_instr);
break;
}
case LOAD_METHOD_SUPER: {
emitLoadMethodOrAttrSuper(tc, bc_instr, true);
break;
}
case LOAD_ATTR_SUPER: {
emitLoadMethodOrAttrSuper(tc, bc_instr, false);
break;
}
case LOAD_CLOSURE: {
tc.frame.stack.push(tc.frame.cells[bc_instr.oparg()]);
break;
}
case LOAD_DEREF: {
emitLoadDeref(tc, bc_instr);
break;
}
case STORE_DEREF: {
emitStoreDeref(tc, bc_instr);
break;
}
case LOAD_CONST: {
emitLoadConst(tc, bc_instr);
break;
}
case LOAD_FAST: {
emitLoadFast(tc, bc_instr);
break;
}
case LOAD_LOCAL: {
emitLoadLocal(tc, bc_instr);
break;
}
case LOAD_TYPE: {
emitLoadType(tc, bc_instr);
break;
}
case CONVERT_PRIMITIVE: {
emitConvertPrimitive(tc, bc_instr);
break;
}
case PRIMITIVE_LOAD_CONST: {
emitPrimitiveLoadConst(tc, bc_instr);
break;
}
case INT_LOAD_CONST_OLD: {
emitIntLoadConstOld(tc, bc_instr);
break;
}
case PRIMITIVE_BOX: {
emitPrimitiveBox(tc, bc_instr);
break;
}
case PRIMITIVE_UNBOX: {
emitPrimitiveUnbox(tc, bc_instr);
break;
}
case PRIMITIVE_BINARY_OP: {
emitPrimitiveBinaryOp(tc, bc_instr);
break;
}
case PRIMITIVE_COMPARE_OP: {
emitPrimitiveCompare(tc, bc_instr);
break;
}
case PRIMITIVE_UNARY_OP: {
emitPrimitiveUnaryOp(tc, bc_instr);
break;
}
case FAST_LEN: {
emitFastLen(irfunc.cfg, tc, bc_instr);
break;
}
case REFINE_TYPE: {
emitRefineType(tc, bc_instr);
break;
}
case SEQUENCE_GET: {
emitSequenceGet(tc, bc_instr);
break;
}
case SEQUENCE_SET: {
emitSequenceSet(tc, bc_instr);
break;
}
case SEQUENCE_REPEAT: {
emitSequenceRepeat(irfunc.cfg, tc, bc_instr);
break;
}
case LOAD_GLOBAL: {
emitLoadGlobal(tc, bc_instr);
break;
}
case JUMP_ABSOLUTE:
case JUMP_FORWARD: {
auto target_off = bc_instr.GetJumpTarget();
auto target = getBlockAtOff(target_off);
if ((bc_instr.opcode() == JUMP_ABSOLUTE) &&
(target_off <= bc_instr.offset())) {
loop_headers.emplace(target);
}
tc.emit<Branch>(target);
break;
}
case JUMP_IF_FALSE_OR_POP:
case JUMP_IF_NONZERO_OR_POP:
case JUMP_IF_TRUE_OR_POP:
case JUMP_IF_ZERO_OR_POP: {
emitJumpIf(tc, bc_instr);
break;
}
case POP_BLOCK: {
popBlock(irfunc.cfg, tc);
break;
}
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE: {
auto target_off = bc_instr.GetJumpTarget();
auto target = getBlockAtOff(target_off);
if (target_off <= bc_instr.offset()) {
loop_headers.emplace(target);
}
emitPopJumpIf(tc, bc_instr);
break;
}
case POP_TOP: {
tc.frame.stack.pop();
break;
}
case RETURN_PRIMITIVE: {
Type type = prim_type_to_type(bc_instr.oparg());
if (preloader_.returnType() <= TCEnum) {
JIT_CHECK(
type <= TCInt64,
"bad return type %s for enum, expected CInt64",
type);
} else {
JIT_CHECK(
type <= preloader_.returnType(),
"bad return type %s, expected %s",
type,
preloader_.returnType());
}
Register* reg = tc.frame.stack.pop();
tc.emit<Return>(reg, type);
break;
}
case RETURN_VALUE: {
Register* reg = tc.frame.stack.pop();
// TODO add preloader_.returnType() to Return instr here to validate
// that all values flowing to return are of correct type; will
// require consistency of static compiler and JIT types, see
// T86480663
JIT_CHECK(
tc.frame.block_stack.isEmpty(),
"Returning with non-empty block stack");
tc.emit<Return>(reg);
break;
}
case BEGIN_FINALLY: {
emitBeginFinally(irfunc, tc, bc_instrs, bc_instr, queue);
break;
}
case CALL_FINALLY: {
emitCallFinally(irfunc, tc, bc_instrs, bc_instr, queue);
break;
}
case END_ASYNC_FOR: {
emitEndAsyncFor(tc, bc_instr);
break;
}
case END_FINALLY: {
emitEndFinally(tc, bc_instr, complete_finally);
break;
}
case POP_FINALLY: {
emitPopFinally(tc, bc_instr, complete_finally);
break;
}
case SETUP_FINALLY: {
emitSetupFinally(tc, bc_instr);
break;
}
case STORE_ATTR: {
emitStoreAttr(tc, bc_instr);
break;
}
case STORE_FAST: {
emitStoreFast(tc, bc_instr);
break;
}
case STORE_LOCAL: {
emitStoreLocal(tc, bc_instr);
break;
}
case STORE_SUBSCR: {
emitStoreSubscr(tc);
break;
}
case BUILD_SLICE: {
emitBuildSlice(tc, bc_instr);
break;
}
case GET_AITER: {
emitGetAIter(tc);
break;
}
case GET_ANEXT: {
emitGetANext(tc);
break;
}
case GET_ITER: {
emitGetIter(tc);
break;
}
case GET_YIELD_FROM_ITER: {
emitGetYieldFromIter(irfunc.cfg, tc);
break;
}
case MAKE_FUNCTION: {
emitMakeFunction(tc, bc_instr);
break;
}
case LIST_APPEND: {
emitListAppend(tc, bc_instr);
break;
}
case LOAD_ITERABLE_ARG: {
emitLoadIterableArg(irfunc.cfg, tc, bc_instr);
break;
}
case DUP_TOP: {
auto& stack = tc.frame.stack;
stack.push(stack.top());
break;
}
case DUP_TOP_TWO: {
auto& stack = tc.frame.stack;
Register* top = stack.top();
Register* snd = stack.top(1);
stack.push(snd);
stack.push(top);
break;
}
case ROT_TWO: {
auto& stack = tc.frame.stack;
Register* top = stack.pop();
Register* snd = stack.pop();
stack.push(top);
stack.push(snd);
break;
}
case ROT_THREE: {
auto& stack = tc.frame.stack;
Register* top = stack.pop();
Register* snd = stack.pop();
Register* thd = stack.pop();
stack.push(top);
stack.push(thd);
stack.push(snd);
break;
}
case ROT_FOUR: {
auto& stack = tc.frame.stack;
Register* r1 = stack.pop();
Register* r2 = stack.pop();
Register* r3 = stack.pop();
Register* r4 = stack.pop();
stack.push(r1);
stack.push(r4);
stack.push(r3);
stack.push(r2);
break;
}
case FOR_ITER: {
emitForIter(tc, bc_instr);
break;
}
case LOAD_FIELD: {
emitLoadField(tc, bc_instr);
break;
}
case CAST: {
emitCast(tc, bc_instr);
break;
}
case TP_ALLOC: {
emitTpAlloc(tc, bc_instr);
break;
}
case CHECK_ARGS: {
// check args is handled in the prologue
break;
}
case STORE_FIELD: {
emitStoreField(tc, bc_instr);
break;
}
case POP_JUMP_IF_ZERO:
case POP_JUMP_IF_NONZERO: {
emitPopJumpIf(tc, bc_instr);
break;
}
case IMPORT_FROM: {
emitImportFrom(tc, bc_instr);
break;
}
case IMPORT_NAME: {
emitImportName(tc, bc_instr);
break;
}
case RAISE_VARARGS: {
emitRaiseVarargs(tc, bc_instr);
break;
}
case YIELD_VALUE: {
emitYieldValue(tc);
break;
}
case YIELD_FROM: {
if (is_in_async_for_header_block()) {
emitAsyncForHeaderYieldFrom(tc, bc_instr);
} else {
emitYieldFrom(tc, temps_.AllocateStack());
}
break;
}
case GET_AWAITABLE: {
Py_ssize_t idx = bc_instr.index();
int prev_op = idx ? bc_instrs.at(idx - 1).opcode() : 0;
emitGetAwaitable(irfunc.cfg, tc, prev_op);
break;
}
case BUILD_STRING: {
emitBuildString(tc, bc_instr);
break;
}
case FORMAT_VALUE: {
emitFormatValue(tc, bc_instr);
break;
}
case MAP_ADD: {
emitMapAdd(tc, bc_instr);
break;
}
case SET_ADD: {
emitSetAdd(tc, bc_instr);
break;
}
case UNPACK_EX: {
emitUnpackEx(tc, bc_instr);
break;
}
case UNPACK_SEQUENCE: {
emitUnpackSequence(irfunc.cfg, tc, bc_instr);
break;
}
case DELETE_SUBSCR: {
Register* sub = tc.frame.stack.pop();
Register* container = tc.frame.stack.pop();
tc.emit<DeleteSubscr>(container, sub, tc.frame);
break;
}
case DELETE_FAST: {
int var_idx = bc_instr.oparg();
Register* var = tc.frame.locals[var_idx];
tc.emit<LoadConst>(var, TNullptr);
break;
}
case BEFORE_ASYNC_WITH: {
emitBeforeAsyncWith(tc);
break;
}
case SETUP_ASYNC_WITH: {
emitSetupAsyncWith(tc, bc_instr);
break;
}
case SETUP_WITH: {
emitSetupWith(tc, bc_instr);
break;
}
case WITH_CLEANUP_START: {
emitWithCleanupStart(tc);
break;
}
case WITH_CLEANUP_FINISH: {
emitWithCleanupFinish(tc);
break;
}
default: {
// NOTREACHED
JIT_CHECK(false, "unhandled opcode: %d", bc_instr.opcode());
break;
}
}
if (should_snapshot(bc_instr, is_in_async_for_header_block())) {
tc.snapshot();
}
}
// Insert jumps for blocks that fall through.
auto last_instr = tc.block->GetTerminator();
if ((last_instr == nullptr) || !last_instr->IsTerminator()) {
auto off = bc_block.endOffset();
last_instr = tc.emit<Branch>(getBlockAtOff(off));
}
// Make sure any values left on the stack are in the registers that we
// expect
BlockCanonicalizer bc;
bc.Run(tc.block, temps_, tc.frame.stack);
// Add successors to be processed
//
// These bytecodes alter the operand stack along one branch and leave it
// untouched along the other. Thus, they must be special cased.
BytecodeInstruction last_bc_instr = bc_block.lastInstr();
switch (last_bc_instr.opcode()) {
case BEGIN_FINALLY:
case CALL_FINALLY:
case END_FINALLY:
case POP_FINALLY: {
// Opcodes for handling finally blocks are handled specially because
// CPython does not guarantee a constant stack depth when entering a
// finally block. We work around the issue by "tail duplicating" the
// finally block at each "call site" (BEGIN_FINALLY or CALL_FINALLY) by
// recursing into the compiler with a fresh set of basic blocks. The
// callee then links the finally block back to us and queues the
// appropriate block for processing. See the various `emit` functions
// for these opcodes for the implementation.
break;
}
case FOR_ITER: {
auto condbr = static_cast<CondBranchIterNotDone*>(last_instr);
auto new_frame = tc.frame;
// Sentinel value signaling iteration is complete and the iterator
// itself
new_frame.stack.discard(2);
queue.emplace_back(condbr->true_bb(), tc.frame);
queue.emplace_back(condbr->false_bb(), new_frame);
break;
}
case JUMP_IF_FALSE_OR_POP:
case JUMP_IF_ZERO_OR_POP: {
auto condbr = static_cast<CondBranch*>(last_instr);
auto new_frame = tc.frame;
new_frame.stack.pop();
queue.emplace_back(condbr->true_bb(), new_frame);
queue.emplace_back(condbr->false_bb(), tc.frame);
break;
}
case JUMP_IF_NONZERO_OR_POP:
case JUMP_IF_TRUE_OR_POP: {
auto condbr = static_cast<CondBranch*>(last_instr);
auto new_frame = tc.frame;
new_frame.stack.pop();
queue.emplace_back(condbr->true_bb(), tc.frame);
queue.emplace_back(condbr->false_bb(), new_frame);
break;
}
default: {
if (last_bc_instr.opcode() == YIELD_FROM &&
is_in_async_for_header_block()) {
JIT_CHECK(
last_instr->IsCondBranch(),
"Async-for header should end with CondBranch");
auto condbr = static_cast<CondBranch*>(last_instr);
FrameState new_frame = tc.frame;
new_frame.stack.pop();
queue.emplace_back(condbr->true_bb(), tc.frame);
queue.emplace_back(condbr->false_bb(), std::move(new_frame));
break;
}
for (std::size_t i = 0; i < last_instr->numEdges(); i++) {
auto succ = last_instr->successor(i);
queue.emplace_back(succ, tc.frame);
}
break;
}
}
}
for (auto block : loop_headers) {
insertEvalBreakerCheckForLoop(irfunc.cfg, block);
}
}
void BlockCanonicalizer::InsertCopies(
Register* reg,
TempAllocator& temps,
Instr& terminator,
std::vector<Register*>& alloced) {
if (done_.count(reg)) {
return;
} else if (processing_.count(reg)) {
// We've detected a cycle. Move the register to a new home
// in order to break the cycle.
auto tmp = temps.AllocateStack();
auto mov = Assign::create(tmp, reg);
mov->copyBytecodeOffset(terminator);
mov->InsertBefore(terminator);
moved_[reg] = tmp;
alloced.emplace_back(tmp);
return;
}
auto orig_reg = reg;
for (auto dst : copies_[reg]) {
auto it = copies_.find(dst);
if (it != copies_.end()) {
// The destination also needs to be moved. So deal with it first.
processing_.insert(reg);
InsertCopies(dst, temps, terminator, alloced);
processing_.erase(reg);
// It's possible that the register we were processing was moved
// because it participated in a cycle
auto it2 = moved_.find(reg);
if (it2 != moved_.end()) {
reg = it2->second;
}
}
auto mov = Assign::create(dst, reg);
mov->copyBytecodeOffset(terminator);
mov->InsertBefore(terminator);
}
done_.insert(orig_reg);
}
void BlockCanonicalizer::Run(
BasicBlock* block,
TempAllocator& temps,
OperandStack& stack) {
if (stack.isEmpty()) {
return;
}
processing_.clear();
copies_.clear();
moved_.clear();
// Compute the desired stack layout
std::vector<Register*> dsts;
dsts.reserve(stack.size());
for (std::size_t i = 0; i < stack.size(); i++) {
auto reg = temps.GetOrAllocateStack(i);
dsts.emplace_back(reg);
}
// Compute the minimum number of copies that need to happen
std::vector<Register*> need_copy;
auto term = block->GetTerminator();
std::vector<Register*> alloced;
for (std::size_t i = 0; i < stack.size(); i++) {
auto src = stack.at(i);
auto dst = dsts[i];
if (src != dst) {
need_copy.emplace_back(src);
copies_[src].emplace_back(dst);
if (term->Uses(src)) {
term->ReplaceUsesOf(src, dst);
} else if (term->Uses(dst)) {
auto tmp = temps.AllocateStack();
alloced.emplace_back(tmp);
auto mov = Assign::create(tmp, dst);
mov->InsertBefore(*term);
term->ReplaceUsesOf(dst, tmp);
}
}
}
if (need_copy.empty()) {
return;
}
for (auto reg : need_copy) {
InsertCopies(reg, temps, *term, alloced);
}
// Put the stack in canonical form
for (std::size_t i = 0; i < stack.size(); i++) {
stack.atPut(i, dsts[i]);
}
}
static inline BinaryOpKind get_bin_op_kind(
const jit::BytecodeInstruction& bc_instr) {
switch (bc_instr.opcode()) {
case BINARY_ADD: {
return BinaryOpKind::kAdd;
}
case BINARY_AND: {
return BinaryOpKind::kAnd;
}
case BINARY_FLOOR_DIVIDE: {
return BinaryOpKind::kFloorDivide;
}
case BINARY_LSHIFT: {
return BinaryOpKind::kLShift;
}
case BINARY_MATRIX_MULTIPLY: {
return BinaryOpKind::kMatrixMultiply;
}
case BINARY_MODULO: {
return BinaryOpKind::kModulo;
}
case BINARY_MULTIPLY: {
return BinaryOpKind::kMultiply;
}
case BINARY_OR: {
return BinaryOpKind::kOr;
}
case BINARY_POWER: {
return BinaryOpKind::kPower;
}
case BINARY_RSHIFT: {
return BinaryOpKind::kRShift;
}
case BINARY_SUBSCR: {
return BinaryOpKind::kSubscript;
}
case BINARY_SUBTRACT: {
return BinaryOpKind::kSubtract;
}
case BINARY_TRUE_DIVIDE: {
return BinaryOpKind::kTrueDivide;
}
case BINARY_XOR: {
return BinaryOpKind::kXor;
}
default: {
JIT_CHECK(false, "unhandled binary op %d", bc_instr.opcode());
// NOTREACHED
break;
}
}
}
void HIRBuilder::emitAnyCall(
CFG& cfg,
TranslationContext& tc,
jit::BytecodeInstructionBlock::Iterator& bc_it,
const jit::BytecodeInstructionBlock& bc_instrs) {
BytecodeInstruction bc_instr = *bc_it;
int idx = bc_instr.index();
bool is_awaited = code_->co_flags & CO_COROUTINE &&
// We only need to be followed by GET_AWAITABLE to know we are awaited,
// but we also need to ensure the following LOAD_CONST and YIELD_FROM are
// inside this BytecodeInstructionBlock. This may not be the case if the
// 'await' is shared as in 'await (x if y else z)'.
bc_it.remainingInstrs() >= 3 &&
bc_instrs.at(idx + 1).opcode() == GET_AWAITABLE;
JIT_CHECK(
!is_awaited ||
(bc_instrs.at(idx + 2).opcode() == LOAD_CONST &&
bc_instrs.at(idx + 3).opcode() == YIELD_FROM),
"GET_AWAITABLE should always be followed by LOAD_CONST and "
"YIELD_FROM");
bool call_used_is_awaited = true;
switch (bc_instr.opcode()) {
case CALL_FUNCTION: {
emitCallFunction(tc, bc_instr, is_awaited);
break;
}
case CALL_FUNCTION_EX: {
emitCallEx(tc, bc_instr, is_awaited);
break;
}
case CALL_FUNCTION_KW: {
emitCallKWArgs(tc, bc_instr, is_awaited);
break;
}
case CALL_METHOD: {
emitCallMethod(tc, bc_instr, is_awaited);
break;
}
case INVOKE_FUNCTION: {
call_used_is_awaited = emitInvokeFunction(tc, bc_instr, is_awaited);
break;
}
case INVOKE_METHOD: {
call_used_is_awaited = emitInvokeMethod(tc, bc_instr, is_awaited);
break;
}
default: {
JIT_CHECK(false, "Unhandled call opcode");
}
}
if (is_awaited && call_used_is_awaited) {
Register* out = temps_.AllocateStack();
TranslationContext await_block{cfg.AllocateBlock(), tc.frame};
TranslationContext post_await_block{cfg.AllocateBlock(), tc.frame};
emitDispatchEagerCoroResult(
cfg, tc, out, await_block.block, post_await_block.block);
tc.block = await_block.block;
++bc_it;
emitGetAwaitable(cfg, tc, bc_instr.opcode());
++bc_it;
emitLoadConst(tc, *bc_it);
++bc_it;
emitYieldFrom(tc, out);
tc.emit<Branch>(post_await_block.block);
tc.block = post_await_block.block;
}
}
void HIRBuilder::emitBinaryOp(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& stack = tc.frame.stack;
Register* right = stack.pop();
Register* left = stack.pop();
Register* result = temps_.AllocateStack();
BinaryOpKind op_kind = get_bin_op_kind(bc_instr);
tc.emit<BinaryOp>(result, op_kind, left, right, tc.frame);
stack.push(result);
}
static inline InPlaceOpKind get_inplace_op_kind(
const jit::BytecodeInstruction& bc_instr) {
switch (bc_instr.opcode()) {
case INPLACE_ADD: {
return InPlaceOpKind::kAdd;
}
case INPLACE_AND: {
return InPlaceOpKind::kAnd;
}
case INPLACE_FLOOR_DIVIDE: {
return InPlaceOpKind::kFloorDivide;
}
case INPLACE_LSHIFT: {
return InPlaceOpKind::kLShift;
}
case INPLACE_MATRIX_MULTIPLY: {
return InPlaceOpKind::kMatrixMultiply;
}
case INPLACE_MODULO: {
return InPlaceOpKind::kModulo;
}
case INPLACE_MULTIPLY: {
return InPlaceOpKind::kMultiply;
}
case INPLACE_OR: {
return InPlaceOpKind::kOr;
}
case INPLACE_POWER: {
return InPlaceOpKind::kPower;
}
case INPLACE_RSHIFT: {
return InPlaceOpKind::kRShift;
}
case INPLACE_SUBTRACT: {
return InPlaceOpKind::kSubtract;
}
case INPLACE_TRUE_DIVIDE: {
return InPlaceOpKind::kTrueDivide;
}
case INPLACE_XOR: {
return InPlaceOpKind::kXor;
}
default: {
JIT_CHECK(false, "unhandled inplace op %d", bc_instr.opcode());
// NOTREACHED
break;
}
}
}
void HIRBuilder::emitInPlaceOp(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& stack = tc.frame.stack;
Register* right = stack.pop();
Register* left = stack.pop();
Register* result = temps_.AllocateStack();
InPlaceOpKind op_kind = get_inplace_op_kind(bc_instr);
tc.emit<InPlaceOp>(result, op_kind, left, right, tc.frame);
stack.push(result);
}
static inline UnaryOpKind get_unary_op_kind(
const jit::BytecodeInstruction& bc_instr) {
switch (bc_instr.opcode()) {
case UNARY_NOT:
return UnaryOpKind::kNot;
case UNARY_NEGATIVE:
return UnaryOpKind::kPositive;
case UNARY_POSITIVE:
return UnaryOpKind::kNegate;
case UNARY_INVERT:
return UnaryOpKind::kInvert;
default:
JIT_CHECK(false, "unhandled unary op %d", bc_instr.opcode());
// NOTREACHED
break;
}
}
void HIRBuilder::emitUnaryOp(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* operand = tc.frame.stack.pop();
Register* result = temps_.AllocateStack();
UnaryOpKind op_kind = get_unary_op_kind(bc_instr);
tc.emit<UnaryOp>(result, op_kind, operand, tc.frame);
tc.frame.stack.push(result);
}
void HIRBuilder::emitCallFunction(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr,
bool is_awaited) {
std::size_t num_operands = static_cast<std::size_t>(bc_instr.oparg()) + 1;
tc.emitVariadic<VectorCall>(temps_, num_operands, is_awaited);
}
void HIRBuilder::emitCallEx(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr,
bool is_awaited) {
Register* dst = temps_.AllocateStack();
OperandStack& stack = tc.frame.stack;
if (bc_instr.oparg() & 0x1) {
Register* kwargs = stack.pop();
Register* pargs = stack.pop();
Register* func = stack.pop();
CallExKw* call = tc.emit<CallExKw>(dst, func, pargs, kwargs, is_awaited);
call->setFrameState(tc.frame);
} else {
Register* pargs = stack.pop();
Register* func = stack.pop();
CallEx* call = tc.emit<CallEx>(dst, func, pargs, is_awaited);
call->setFrameState(tc.frame);
}
stack.push(dst);
}
void HIRBuilder::emitCallKWArgs(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr,
bool is_awaited) {
std::size_t num_operands = static_cast<std::size_t>(bc_instr.oparg()) + 2;
tc.emitVariadic<VectorCallKW>(temps_, num_operands, is_awaited);
}
void HIRBuilder::emitCallMethod(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr,
bool is_awaited) {
std::size_t num_operands = static_cast<std::size_t>(bc_instr.oparg()) + 2;
tc.emitVariadic<CallMethod>(temps_, num_operands, is_awaited, tc.frame);
}
void HIRBuilder::emitBuildSlice(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
std::size_t num_operands = static_cast<std::size_t>(bc_instr.oparg());
tc.emitVariadic<BuildSlice>(temps_, num_operands);
}
void HIRBuilder::emitListAppend(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto item = tc.frame.stack.pop();
auto list = tc.frame.stack.peek(bc_instr.oparg());
auto dst = temps_.AllocateStack();
tc.emit<ListAppend>(dst, list, item, tc.frame);
}
void HIRBuilder::emitLoadIterableArg(
CFG& cfg,
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto iterable = tc.frame.stack.pop();
Register* tuple;
if (iterable->type() != TTupleExact) {
TranslationContext tuple_path{cfg.AllocateBlock(), tc.frame};
tuple_path.snapshot();
TranslationContext non_tuple_path{cfg.AllocateBlock(), tc.frame};
non_tuple_path.snapshot();
tc.emit<CondBranchCheckType>(
iterable, TTuple, tuple_path.block, non_tuple_path.block);
tc.block = cfg.AllocateBlock();
tc.snapshot();
tuple = temps_.AllocateStack();
tuple_path.emit<Assign>(tuple, iterable);
tuple_path.emit<Branch>(tc.block);
non_tuple_path.emit<GetTuple>(tuple, iterable, tc.frame);
non_tuple_path.emit<Branch>(tc.block);
} else {
tuple = iterable;
}
auto tmp = temps_.AllocateStack();
auto tup_idx = temps_.AllocateStack();
auto element = temps_.AllocateStack();
tc.emit<LoadConst>(tmp, Type::fromCInt(bc_instr.oparg(), TCInt64));
tc.emitChecked<PrimitiveBox>(tup_idx, tmp, TCInt64);
tc.emit<BinaryOp>(
element, BinaryOpKind::kSubscript, tuple, tup_idx, tc.frame);
tc.frame.stack.push(element);
tc.frame.stack.push(tuple);
}
bool HIRBuilder::tryEmitDirectMethodCall(
const InvokeTarget& target,
TranslationContext& tc,
long nargs) {
if (target.is_statically_typed || nargs == target.builtin_expected_nargs) {
Instr* staticCall;
Register* out = NULL;
if (target.builtin_returns_void) {
staticCall = tc.emit<CallStaticRetVoid>(nargs, target.builtin_c_func);
} else {
out = temps_.AllocateStack();
Type ret_type =
target.builtin_returns_error_code ? TCInt32 : target.return_type;
staticCall =
tc.emit<CallStatic>(nargs, out, target.builtin_c_func, ret_type);
}
auto& stack = tc.frame.stack;
for (auto i = nargs - 1; i >= 0; i--) {
Register* operand = stack.pop();
staticCall->SetOperand(i, operand);
}
if (target.builtin_returns_error_code) {
tc.emit<CheckNeg>(out, out, tc.frame);
} else if (out != NULL && !(target.return_type.couldBe(TPrimitive))) {
tc.emit<CheckExc>(out, out, tc.frame);
}
if (target.builtin_returns_void || target.builtin_returns_error_code) {
// We could update the compiler so that void returning functions either
// are only used in void contexts, or explicitly emit a LOAD_CONST None
// when not used in a void context. For now we just produce None here (and
// in _PyClassLoader_ConvertRet).
Register* tmp = temps_.AllocateStack();
tc.emit<LoadConst>(tmp, TNoneType);
stack.push(tmp);
} else {
stack.push(out);
}
return true;
}
return false;
}
std::vector<Register*> HIRBuilder::setupStaticArgs(
TranslationContext& tc,
const InvokeTarget& target,
long nargs) {
auto arg_regs = std::vector<Register*>(nargs, nullptr);
for (auto i = nargs - 1; i >= 0; i--) {
arg_regs[i] = tc.frame.stack.pop();
}
// If we have a static func but we couldn't emit a direct x64 call, we
// have to box any primitive args
if (target.is_statically_typed) {
for (auto [argnum, type] : target.primitive_arg_types) {
Register* reg = arg_regs.at(argnum);
auto boxed_primitive_tmp = temps_.AllocateStack();
tc.emitChecked<PrimitiveBox>(boxed_primitive_tmp, reg, type);
arg_regs[argnum] = boxed_primitive_tmp;
}
}
return arg_regs;
}
void HIRBuilder::fixStaticReturn(
TranslationContext& tc,
Register* ret_val,
Type ret_type) {
Type boxed_ret = ret_type;
if (boxed_ret <= TPrimitive) {
boxed_ret = boxed_ret.asBoxed();
}
if (boxed_ret < TObject) {
// TODO(T108048062): This should be a type check rather than a RefineType.
tc.emit<RefineType>(ret_val, boxed_ret, ret_val);
}
// Since we are not doing an x64 call, we will get a boxed value; if the
// function is supposed to return a primitive, we need to unbox it because
// later code in the function will expect the primitive.
if (ret_type <= TPrimitive) {
unboxPrimitive(tc, ret_val, ret_val, ret_type);
}
}
bool HIRBuilder::emitInvokeFunction(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr,
bool is_awaited) {
BorrowedRef<> arg = constArg(bc_instr);
BorrowedRef<> descr = PyTuple_GET_ITEM(arg.get(), 0);
long nargs = PyLong_AsLong(PyTuple_GET_ITEM(arg.get(), 1));
const InvokeTarget& target = preloader_.invokeFunctionTarget(descr);
Register* funcreg = temps_.AllocateStack();
if (target.container_is_immutable) {
// try to emit a direct x64 call (InvokeStaticFunction/CallStatic) if we can
if (!target.uses_runtime_func) {
if (target.is_function && target.is_statically_typed) {
if (_PyJIT_CompileFunction(target.func()) == PYJIT_RESULT_RETRY) {
JIT_DLOG(
"Warning: recursive compile of '%s' failed as it is already "
"being compiled",
funcFullname(target.func()));
}
// Direct invoke is safe whether we succeeded in JIT-compiling or not,
// it'll just have an extra indirection if not JIT compiled.
Register* out = temps_.AllocateStack();
Type typ = target.return_type <= TCEnum ? TCInt64 : target.return_type;
auto call =
tc.emit<InvokeStaticFunction>(nargs, out, target.func(), typ);
for (auto i = nargs - 1; i >= 0; i--) {
Register* operand = tc.frame.stack.pop();
call->SetOperand(i, operand);
}
call->setFrameState(tc.frame);
tc.frame.stack.push(out);
return false;
} else if (
target.is_builtin && tryEmitDirectMethodCall(target, tc, nargs)) {
return false;
}
}
// we couldn't emit an x64 call, but we know what object we'll vectorcall,
// so load it directly
tc.emit<LoadConst>(funcreg, Type::fromObject(target.callable));
} else {
// The target is patchable so we have to load it indirectly
tc.emit<LoadFunctionIndirect>(
target.indirect_ptr, descr, funcreg, tc.frame);
}
std::vector<Register*> arg_regs = setupStaticArgs(tc, target, nargs);
Register* out = temps_.AllocateStack();
VectorCallBase* call;
if (target.container_is_immutable) {
call = tc.emit<VectorCallStatic>(nargs + 1, out, is_awaited);
} else {
call = tc.emit<VectorCall>(nargs + 1, out, is_awaited);
}
for (auto i = 0; i < nargs; i++) {
call->SetOperand(i + 1, arg_regs.at(i));
}
call->SetOperand(0, funcreg);
call->setFrameState(tc.frame);
fixStaticReturn(tc, out, target.return_type);
tc.frame.stack.push(out);
return true;
}
bool HIRBuilder::emitInvokeMethod(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr,
bool is_awaited) {
BorrowedRef<> arg = constArg(bc_instr);
BorrowedRef<> descr = PyTuple_GET_ITEM(arg.get(), 0);
long nargs = PyLong_AsLong(PyTuple_GET_ITEM(arg.get(), 1)) + 1;
bool is_classmethod = PyTuple_GET_SIZE(arg.get()) == 3 &&
(PyTuple_GET_ITEM(arg.get(), 2) == Py_True);
const InvokeTarget& target = preloader_.invokeMethodTarget(descr);
if (target.is_builtin && tryEmitDirectMethodCall(target, tc, nargs)) {
return false;
}
std::vector<Register*> arg_regs = setupStaticArgs(tc, target, nargs);
Register* out = temps_.AllocateStack();
auto call = tc.emit<InvokeMethod>(
nargs, out, target.slot, is_awaited, is_classmethod);
for (auto i = 0; i < nargs; i++) {
call->SetOperand(i, arg_regs.at(i));
}
call->setFrameState(tc.frame);
fixStaticReturn(tc, out, target.return_type);
tc.frame.stack.push(out);
return true;
}
void HIRBuilder::emitCompareOp(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& stack = tc.frame.stack;
Register* right = stack.pop();
Register* left = stack.pop();
Register* result = temps_.AllocateStack();
CompareOp op = static_cast<CompareOp>(bc_instr.oparg());
tc.emit<Compare>(result, op, left, right, tc.frame);
stack.push(result);
}
void HIRBuilder::emitJumpIf(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* var = tc.frame.stack.top();
Py_ssize_t true_offset, false_offset;
bool check_truthy = true;
switch (bc_instr.opcode()) {
case JUMP_IF_NONZERO_OR_POP:
check_truthy = false;
case JUMP_IF_TRUE_OR_POP: {
true_offset = bc_instr.oparg();
false_offset = bc_instr.NextInstrOffset();
break;
}
case JUMP_IF_ZERO_OR_POP:
check_truthy = false;
case JUMP_IF_FALSE_OR_POP: {
false_offset = bc_instr.oparg();
true_offset = bc_instr.NextInstrOffset();
break;
}
default: {
// NOTREACHED
JIT_CHECK(
false,
"trying to translate non-jump-if bytecode: %d",
bc_instr.opcode());
break;
}
}
BasicBlock* true_block = getBlockAtOff(true_offset);
BasicBlock* false_block = getBlockAtOff(false_offset);
if (check_truthy) {
Register* tval = temps_.AllocateNonStack();
// Registers that hold the result of `IsTruthy` are guaranteed to never be
// the home of a value left on the stack at the end of a basic block, so we
// don't need to worry about potentially storing a PyObject in them.
tc.emit<IsTruthy>(tval, var, tc.frame);
tc.emit<CondBranch>(tval, true_block, false_block);
} else {
tc.emit<CondBranch>(var, true_block, false_block);
}
}
void HIRBuilder::emitDeleteAttr(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* receiver = tc.frame.stack.pop();
tc.emit<DeleteAttr>(receiver, bc_instr.oparg(), tc.frame);
}
void HIRBuilder::emitLoadAttr(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* receiver = tc.frame.stack.pop();
Register* result = temps_.AllocateStack();
tc.emit<LoadAttr>(result, receiver, bc_instr.oparg(), tc.frame);
tc.frame.stack.push(result);
}
void HIRBuilder::emitLoadMethod(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* receiver = tc.frame.stack.top();
Register* result = temps_.AllocateStack();
tc.emit<LoadMethod>(result, receiver, bc_instr.oparg(), tc.frame);
tc.frame.stack.push(result);
}
void HIRBuilder::emitLoadMethodOrAttrSuper(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr,
bool load_method) {
Register* receiver = tc.frame.stack.pop();
Register* type = tc.frame.stack.pop();
Register* global_super = tc.frame.stack.pop();
Register* result = temps_.AllocateStack();
PyObject* oparg = PyTuple_GET_ITEM(code_->co_consts, bc_instr.oparg());
int name_idx = PyLong_AsLong(PyTuple_GET_ITEM(oparg, 0));
bool no_args_in_super_call = PyTuple_GET_ITEM(oparg, 1) == Py_True;
if (load_method) {
tc.frame.stack.push(receiver);
tc.emit<LoadMethodSuper>(
result,
global_super,
type,
receiver,
name_idx,
no_args_in_super_call,
tc.frame);
} else {
tc.emit<LoadAttrSuper>(
result,
global_super,
type,
receiver,
name_idx,
no_args_in_super_call,
tc.frame);
}
tc.frame.stack.push(result);
}
void HIRBuilder::emitLoadDeref(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
int idx = bc_instr.oparg();
Register* src = tc.frame.cells[idx];
Register* dst = temps_.AllocateStack();
int frame_idx = tc.frame.locals.size() + idx;
tc.emit<LoadCellItem>(dst, src);
tc.emit<CheckVar>(dst, dst, getVarname(code_, frame_idx), tc.frame);
tc.frame.stack.push(dst);
}
void HIRBuilder::emitStoreDeref(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* old = temps_.AllocateStack();
Register* dst = tc.frame.cells[bc_instr.oparg()];
Register* src = tc.frame.stack.pop();
tc.emit<StealCellItem>(old, dst);
tc.emit<SetCellItem>(dst, src, old);
}
void HIRBuilder::emitLoadConst(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* tmp = temps_.AllocateStack();
JIT_CHECK(
bc_instr.oparg() < PyTuple_Size(code_->co_consts),
"LOAD_CONST index out of bounds");
tc.emit<LoadConst>(
tmp,
Type::fromObject(PyTuple_GET_ITEM(code_->co_consts, bc_instr.oparg())));
tc.frame.stack.push(tmp);
}
void HIRBuilder::emitLoadFast(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
int var_idx = bc_instr.oparg();
Register* var = tc.frame.locals[var_idx];
tc.emit<CheckVar>(var, var, getVarname(code_, var_idx), tc.frame);
tc.frame.stack.push(var);
}
void HIRBuilder::emitLoadLocal(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
PyObject* index_and_descr =
PyTuple_GET_ITEM(code_->co_consts, bc_instr.oparg());
int index = PyLong_AsLong(PyTuple_GET_ITEM(index_and_descr, 0));
auto var = tc.frame.locals[index];
tc.frame.stack.push(var);
}
void HIRBuilder::emitStoreLocal(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* src = tc.frame.stack.pop();
PyObject* index_and_descr =
PyTuple_GET_ITEM(code_->co_consts, bc_instr.oparg());
int index = PyLong_AsLong(PyTuple_GET_ITEM(index_and_descr, 0));
auto dst = tc.frame.locals[index];
moveOverwrittenStackRegisters(tc, dst);
tc.emit<Assign>(dst, src);
}
void HIRBuilder::emitLoadType(
TranslationContext& tc,
const jit::BytecodeInstruction&) {
Register* instance = tc.frame.stack.pop();
auto type = temps_.AllocateStack();
tc.emit<LoadField>(
type, instance, "ob_type", offsetof(PyObject, ob_type), TType);
tc.frame.stack.push(type);
}
void HIRBuilder::emitConvertPrimitive(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* val = tc.frame.stack.pop();
Register* out = temps_.AllocateStack();
Type to_type = prim_type_to_type(bc_instr.oparg() >> 4);
tc.emit<IntConvert>(out, val, to_type);
tc.frame.stack.push(out);
}
void HIRBuilder::emitPrimitiveLoadConst(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* tmp = temps_.AllocateStack();
int index = bc_instr.oparg();
JIT_CHECK(
index < PyTuple_Size(code_->co_consts),
"PRIMITIVE_LOAD_CONST index out of bounds");
PyObject* num_and_type = PyTuple_GET_ITEM(code_->co_consts, index);
JIT_CHECK(
PyTuple_Size(num_and_type) == 2,
"wrong size for PRIMITIVE_LOAD_CONST arg tuple")
PyObject* num = PyTuple_GET_ITEM(num_and_type, 0);
Type size =
prim_type_to_type(PyLong_AsSsize_t(PyTuple_GET_ITEM(num_and_type, 1)));
Type type = TBottom;
if (size == TCDouble) {
type = Type::fromCDouble(PyFloat_AsDouble(num));
} else if (size <= TCBool) {
type = Type::fromCBool(num == Py_True);
} else {
type = (size <= TCUnsigned)
? Type::fromCUInt(PyLong_AsUnsignedLong(num), size)
: Type::fromCInt(PyLong_AsLong(num), size);
}
tc.emit<LoadConst>(tmp, type);
tc.frame.stack.push(tmp);
}
void HIRBuilder::emitIntLoadConstOld(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* tmp = temps_.AllocateStack();
tc.emit<LoadConst>(tmp, Type::fromCInt(bc_instr.oparg(), TCInt64));
tc.frame.stack.push(tmp);
}
void HIRBuilder::emitPrimitiveBox(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* tmp = temps_.AllocateStack();
Register* src = tc.frame.stack.pop();
Type typ = preloader_.type(constArg(bc_instr));
tc.emitChecked<PrimitiveBox>(tmp, src, typ);
tc.frame.stack.push(tmp);
}
void HIRBuilder::emitPrimitiveUnbox(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* tmp = temps_.AllocateStack();
Register* src = tc.frame.stack.pop();
Type typ = preloader_.type(constArg(bc_instr));
unboxPrimitive(tc, tmp, src, typ);
tc.frame.stack.push(tmp);
}
void HIRBuilder::unboxPrimitive(
TranslationContext& tc,
Register* dst,
Register* src,
Type type) {
tc.emit<PrimitiveUnbox>(dst, src, type);
if (!(type <= (TCBool | TCDouble))) {
Register* did_unbox_work = temps_.AllocateStack();
tc.emit<IsNegativeAndErrOccurred>(did_unbox_work, dst, tc.frame);
}
}
static inline BinaryOpKind get_primitive_bin_op_kind(
const jit::BytecodeInstruction& bc_instr) {
switch (bc_instr.oparg()) {
case PRIM_OP_ADD_DBL:
case PRIM_OP_ADD_INT: {
return BinaryOpKind::kAdd;
}
case PRIM_OP_AND_INT: {
return BinaryOpKind::kAnd;
}
case PRIM_OP_DIV_INT: {
return BinaryOpKind::kFloorDivide;
}
case PRIM_OP_DIV_UN_INT: {
return BinaryOpKind::kFloorDivideUnsigned;
}
case PRIM_OP_LSHIFT_INT: {
return BinaryOpKind::kLShift;
}
case PRIM_OP_MOD_INT: {
return BinaryOpKind::kModulo;
}
case PRIM_OP_MOD_UN_INT: {
return BinaryOpKind::kModuloUnsigned;
}
case PRIM_OP_MUL_DBL:
case PRIM_OP_MUL_INT: {
return BinaryOpKind::kMultiply;
}
case PRIM_OP_OR_INT: {
return BinaryOpKind::kOr;
}
case PRIM_OP_RSHIFT_INT: {
return BinaryOpKind::kRShift;
}
case PRIM_OP_RSHIFT_UN_INT: {
return BinaryOpKind::kRShiftUnsigned;
}
case PRIM_OP_SUB_DBL:
case PRIM_OP_SUB_INT: {
return BinaryOpKind::kSubtract;
}
case PRIM_OP_XOR_INT: {
return BinaryOpKind::kXor;
}
case PRIM_OP_DIV_DBL: {
return BinaryOpKind::kTrueDivide;
}
case PRIM_OP_POW_UN_INT: {
return BinaryOpKind::kPowerUnsigned;
}
case PRIM_OP_POW_INT:
case PRIM_OP_POW_DBL: {
return BinaryOpKind::kPower;
}
default: {
JIT_CHECK(false, "unhandled binary op %d", bc_instr.oparg());
// NOTREACHED
break;
}
}
}
static inline bool is_double_binop(int oparg) {
switch (oparg) {
case PRIM_OP_ADD_INT:
case PRIM_OP_AND_INT:
case PRIM_OP_DIV_INT:
case PRIM_OP_DIV_UN_INT:
case PRIM_OP_LSHIFT_INT:
case PRIM_OP_MOD_INT:
case PRIM_OP_MOD_UN_INT:
case PRIM_OP_POW_INT:
case PRIM_OP_POW_UN_INT:
case PRIM_OP_MUL_INT:
case PRIM_OP_OR_INT:
case PRIM_OP_RSHIFT_INT:
case PRIM_OP_RSHIFT_UN_INT:
case PRIM_OP_SUB_INT:
case PRIM_OP_XOR_INT: {
return false;
}
case PRIM_OP_ADD_DBL:
case PRIM_OP_SUB_DBL:
case PRIM_OP_DIV_DBL:
case PRIM_OP_MUL_DBL:
case PRIM_OP_POW_DBL: {
return true;
}
default: {
JIT_CHECK(false, "Invalid binary op %d", oparg);
// NOTREACHED
break;
}
}
}
static inline Type element_type_from_seq_type(int seq_type) {
switch (seq_type) {
case SEQ_ARRAY_INT8:
return TCInt8;
case SEQ_ARRAY_INT16:
return TCInt16;
case SEQ_ARRAY_INT32:
return TCInt32;
case SEQ_ARRAY_INT64:
return TCInt64;
case SEQ_ARRAY_UINT8:
return TCUInt8;
case SEQ_ARRAY_UINT16:
return TCUInt16;
case SEQ_ARRAY_UINT32:
return TCUInt32;
case SEQ_ARRAY_UINT64:
return TCUInt64;
case SEQ_LIST:
case SEQ_LIST_INEXACT:
case SEQ_CHECKED_LIST:
case SEQ_TUPLE:
return TObject;
default:
JIT_CHECK(false, "invalid sequence type: (%d)", seq_type);
// NOTREACHED
break;
}
}
void HIRBuilder::emitPrimitiveBinaryOp(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& stack = tc.frame.stack;
Register* right = stack.pop();
Register* left = stack.pop();
Register* result = temps_.AllocateStack();
BinaryOpKind op_kind = get_primitive_bin_op_kind(bc_instr);
if (is_double_binop(bc_instr.oparg())) {
tc.emit<DoubleBinaryOp>(result, op_kind, left, right);
} else {
tc.emit<IntBinaryOp>(result, op_kind, left, right);
}
stack.push(result);
}
void HIRBuilder::emitPrimitiveCompare(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& stack = tc.frame.stack;
Register* right = stack.pop();
Register* left = stack.pop();
Register* result = temps_.AllocateStack();
PrimitiveCompareOp op;
switch (bc_instr.oparg()) {
case PRIM_OP_EQ_INT:
case PRIM_OP_EQ_DBL:
op = PrimitiveCompareOp::kEqual;
break;
case PRIM_OP_NE_INT:
case PRIM_OP_NE_DBL:
op = PrimitiveCompareOp::kNotEqual;
break;
case PRIM_OP_LT_INT:
op = PrimitiveCompareOp::kLessThan;
break;
case PRIM_OP_LE_INT:
op = PrimitiveCompareOp::kLessThanEqual;
break;
case PRIM_OP_GT_INT:
op = PrimitiveCompareOp::kGreaterThan;
break;
case PRIM_OP_GE_INT:
op = PrimitiveCompareOp::kGreaterThanEqual;
break;
case PRIM_OP_LT_UN_INT:
case PRIM_OP_LT_DBL:
op = PrimitiveCompareOp::kLessThanUnsigned;
break;
case PRIM_OP_LE_UN_INT:
case PRIM_OP_LE_DBL:
op = PrimitiveCompareOp::kLessThanEqualUnsigned;
break;
case PRIM_OP_GT_UN_INT:
case PRIM_OP_GT_DBL:
op = PrimitiveCompareOp::kGreaterThanUnsigned;
break;
case PRIM_OP_GE_UN_INT:
case PRIM_OP_GE_DBL:
op = PrimitiveCompareOp::kGreaterThanEqualUnsigned;
break;
default:
JIT_CHECK(false, "unsupported comparison");
break;
}
tc.emit<PrimitiveCompare>(result, op, left, right);
stack.push(result);
}
void HIRBuilder::emitPrimitiveUnaryOp(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* value = tc.frame.stack.pop();
Register* result = temps_.AllocateStack();
PrimitiveUnaryOpKind op;
switch (bc_instr.oparg()) {
case PRIM_OP_NEG_INT: {
op = PrimitiveUnaryOpKind::kNegateInt;
tc.emit<PrimitiveUnaryOp>(result, op, value);
break;
}
case PRIM_OP_INV_INT: {
op = PrimitiveUnaryOpKind::kInvertInt;
tc.emit<PrimitiveUnaryOp>(result, op, value);
break;
}
case PRIM_OP_NOT_INT: {
op = PrimitiveUnaryOpKind::kNotInt;
tc.emit<PrimitiveUnaryOp>(result, op, value);
break;
}
case PRIM_OP_NEG_DBL: {
// For doubles, there's no easy way to unary negate a value, so just
// multiply it by -1
auto tmp = temps_.AllocateStack();
tc.emit<LoadConst>(tmp, Type::fromCDouble(-1.0));
tc.emit<DoubleBinaryOp>(result, BinaryOpKind::kMultiply, tmp, value);
break;
}
default: {
JIT_CHECK(false, "unsupported unary op");
break;
}
}
tc.frame.stack.push(result);
}
void HIRBuilder::emitFastLen(
CFG& cfg,
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto result = temps_.AllocateStack();
Register* collection;
auto oparg = bc_instr.oparg();
int inexact = oparg & FAST_LEN_INEXACT;
std::size_t offset = 0;
auto type = TBottom;
oparg &= ~FAST_LEN_INEXACT;
const char* name = "";
if (oparg == FAST_LEN_LIST) {
type = TListExact;
offset = offsetof(PyVarObject, ob_size);
name = "ob_size";
} else if (oparg == FAST_LEN_TUPLE) {
type = TTupleExact;
offset = offsetof(PyVarObject, ob_size);
name = "ob_size";
} else if (oparg == FAST_LEN_ARRAY) {
type = TArrayExact;
offset = offsetof(PyVarObject, ob_size);
name = "ob_size";
} else if (oparg == FAST_LEN_DICT) {
type = TDictExact;
offset = offsetof(PyDictObject, ma_used);
name = "ma_used";
} else if (oparg == FAST_LEN_SET) {
type = TSetExact;
offset = offsetof(PySetObject, used);
name = "used";
} else if (oparg == FAST_LEN_STR) {
type = TUnicodeExact;
// Note: In debug mode, the interpreter has an assert that
// ensures the string is "ready", check PyUnicode_GET_LENGTH
offset = offsetof(PyASCIIObject, length);
name = "length";
}
JIT_CHECK(offset > 0, "Bad oparg for FAST_LEN");
if (inexact) {
TranslationContext deopt_path{cfg.AllocateBlock(), tc.frame};
deopt_path.frame.next_instr_offset = bc_instr.offset();
deopt_path.snapshot();
deopt_path.emit<Deopt>();
collection = tc.frame.stack.pop();
BasicBlock* fast_path = cfg.AllocateBlock();
tc.emit<CondBranchCheckType>(collection, type, fast_path, deopt_path.block);
tc.block = fast_path;
// TODO(T105038867): Remove once we have RefineTypeInsertion
tc.emit<RefineType>(collection, type, collection);
} else {
collection = tc.frame.stack.pop();
}
tc.emit<LoadField>(result, collection, name, offset, TCInt64);
tc.frame.stack.push(result);
}
void HIRBuilder::emitRefineType(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Type type = preloader_.type(constArg(bc_instr));
Register* dst = tc.frame.stack.top();
tc.emit<RefineType>(dst, type, dst);
}
void HIRBuilder::emitSequenceGet(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& stack = tc.frame.stack;
auto idx = stack.pop();
auto sequence = stack.pop();
auto oparg = bc_instr.oparg();
if (oparg == SEQ_LIST_INEXACT) {
auto type = temps_.AllocateStack();
tc.emit<LoadField>(
type, sequence, "ob_type", offsetof(PyObject, ob_type), TType);
tc.emit<GuardIs>(type, (PyObject*)&PyList_Type, type);
tc.emit<RefineType>(sequence, TListExact, sequence);
}
Register* adjusted_idx;
int unchecked = oparg & SEQ_SUBSCR_UNCHECKED;
if (!unchecked) {
adjusted_idx = temps_.AllocateStack();
tc.emit<CheckSequenceBounds>(adjusted_idx, sequence, idx, tc.frame);
} else {
adjusted_idx = idx;
oparg &= ~SEQ_SUBSCR_UNCHECKED;
}
auto ob_item = temps_.AllocateStack();
auto result = temps_.AllocateStack();
int offset;
if (_Py_IS_TYPED_ARRAY(oparg)) {
offset = offsetof(PyStaticArrayObject, ob_item);
} else if (
oparg == SEQ_LIST || oparg == SEQ_LIST_INEXACT ||
oparg == SEQ_CHECKED_LIST) {
offset = offsetof(PyListObject, ob_item);
} else {
JIT_CHECK(false, "Unsupported oparg for SEQUENCE_GET: %d", oparg);
}
tc.emit<LoadField>(ob_item, sequence, "ob_item", offset, TCPtr);
auto type = element_type_from_seq_type(oparg);
tc.emit<LoadArrayItem>(
result, ob_item, adjusted_idx, sequence, /*offset=*/0, type);
stack.push(result);
}
void HIRBuilder::emitSequenceRepeat(
CFG& cfg,
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& stack = tc.frame.stack;
Register* num;
Register* seq;
auto result = temps_.AllocateStack();
int oparg = bc_instr.oparg();
int seq_inexact = oparg & SEQ_REPEAT_INEXACT_SEQ;
int num_inexact = oparg & SEQ_REPEAT_INEXACT_NUM;
int primitive_num = oparg & SEQ_REPEAT_PRIMITIVE_NUM;
oparg &= ~SEQ_REPEAT_FLAGS;
JIT_DCHECK(
oparg == SEQ_LIST || oparg == SEQ_TUPLE,
"Bad oparg for SEQUENCE_REPEAT: %d",
oparg);
if (seq_inexact || num_inexact) {
TranslationContext deopt_path{cfg.AllocateBlock(), tc.frame};
deopt_path.frame.next_instr_offset = bc_instr.offset();
deopt_path.snapshot();
deopt_path.emit<Deopt>();
// Stack pops must wait until after we snapshot, so if we deopt they are
// still on stack.
num = stack.pop();
if (num_inexact) {
BasicBlock* fast_path = cfg.AllocateBlock();
tc.emit<CondBranchCheckType>(
num, TLongExact, fast_path, deopt_path.block);
tc.block = fast_path;
// TODO(T105038867): Remove once we have RefineTypeInsertion
tc.emit<RefineType>(num, TLongExact, num);
}
seq = stack.pop();
if (seq_inexact) {
BasicBlock* fast_path = cfg.AllocateBlock();
tc.emit<CondBranchCheckType>(
seq,
(oparg == SEQ_LIST) ? TListExact : TTupleExact,
fast_path,
deopt_path.block);
tc.block = fast_path;
// TODO(T105038867): Remove once we have RefineTypeInsertion
tc.emit<RefineType>(
seq, (oparg == SEQ_LIST) ? TListExact : TTupleExact, seq);
}
} else {
num = stack.pop();
seq = stack.pop();
}
if (!primitive_num) {
auto unboxed_num = temps_.AllocateStack();
tc.emit<PrimitiveUnbox>(unboxed_num, num, TCInt64);
num = unboxed_num;
}
if (oparg == SEQ_LIST) {
tc.emit<RepeatList>(result, seq, num);
} else {
tc.emit<RepeatTuple>(result, seq, num);
}
stack.push(result);
}
void HIRBuilder::emitSequenceSet(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& stack = tc.frame.stack;
auto idx = stack.pop();
auto sequence = stack.pop();
auto value = stack.pop();
auto adjusted_idx = temps_.AllocateStack();
auto oparg = bc_instr.oparg();
if (oparg == SEQ_LIST_INEXACT) {
auto type = temps_.AllocateStack();
tc.emit<LoadField>(
type, sequence, "ob_type", offsetof(PyObject, ob_type), TType);
tc.emit<GuardIs>(type, (PyObject*)&PyList_Type, type);
tc.emit<RefineType>(sequence, TListExact, sequence);
}
tc.emit<CheckSequenceBounds>(adjusted_idx, sequence, idx, tc.frame);
auto ob_item = temps_.AllocateStack();
int offset;
if (_Py_IS_TYPED_ARRAY(oparg)) {
offset = offsetof(PyStaticArrayObject, ob_item);
} else if (oparg == SEQ_LIST || oparg == SEQ_LIST_INEXACT) {
offset = offsetof(PyListObject, ob_item);
} else {
JIT_CHECK(false, "Unsupported oparg for SEQUENCE_SET: %d", oparg);
}
tc.emit<LoadField>(ob_item, sequence, "ob_item", offset, TCPtr);
tc.emit<StoreArrayItem>(
ob_item,
adjusted_idx,
value,
sequence,
element_type_from_seq_type(oparg));
}
void HIRBuilder::emitLoadGlobal(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto name_idx = bc_instr.oparg();
Register* result = temps_.AllocateStack();
auto try_fast_path = [&] {
BorrowedRef<> value = preloader_.global(name_idx);
if (value == nullptr) {
return false;
}
tc.emit<LoadGlobalCached>(result, code_, preloader_.globals(), name_idx);
auto guard_is = tc.emit<GuardIs>(result, value, result);
BorrowedRef<> name = PyTuple_GET_ITEM(code_->co_names, name_idx);
guard_is->setDescr(fmt::format("LOAD_GLOBAL: {}", PyUnicode_AsUTF8(name)));
return true;
};
if (!try_fast_path()) {
tc.emit<LoadGlobal>(result, name_idx, tc.frame);
}
tc.frame.stack.push(result);
}
void HIRBuilder::emitMakeFunction(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
int oparg = bc_instr.oparg();
Register* func = temps_.AllocateStack();
Register* qualname = tc.frame.stack.pop();
Register* codeobj = tc.frame.stack.pop();
// make a function
tc.emit<MakeFunction>(func, qualname, codeobj, tc.frame);
if (oparg & 0x08) {
Register* closure = tc.frame.stack.pop();
tc.emit<SetFunctionAttr>(closure, func, FunctionAttr::kClosure);
}
if (oparg & 0x04) {
Register* annotations = tc.frame.stack.pop();
tc.emit<SetFunctionAttr>(annotations, func, FunctionAttr::kAnnotations);
}
if (oparg & 0x02) {
Register* kwdefaults = tc.frame.stack.pop();
tc.emit<SetFunctionAttr>(kwdefaults, func, FunctionAttr::kKwDefaults);
}
if (oparg & 0x01) {
Register* defaults = tc.frame.stack.pop();
tc.emit<SetFunctionAttr>(defaults, func, FunctionAttr::kDefaults);
}
tc.emit<InitFunction>(func);
tc.frame.stack.push(func);
}
void HIRBuilder::emitFunctionCredential(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
int oparg = bc_instr.oparg();
JIT_CHECK(
oparg < PyTuple_Size(code_->co_consts),
"FUNC_CREDENTIAL index out of bounds");
Register* fc_tuple = temps_.AllocateStack();
tc.emit<LoadConst>(
fc_tuple, Type::fromObject(PyTuple_GET_ITEM(code_->co_consts, oparg)));
Register* fc = temps_.AllocateStack();
tc.emitChecked<CallCFunc>(
1, fc, CallCFunc::Func::kfunc_cred_new, std::vector<Register*>{fc_tuple});
tc.frame.stack.push(fc);
}
void HIRBuilder::emitMakeListTuple(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
bool is_tuple = (bc_instr.opcode() == BUILD_TUPLE);
auto num_elems = static_cast<size_t>(bc_instr.oparg());
auto dst = temps_.AllocateStack();
tc.emit<MakeListTuple>(is_tuple, dst, num_elems, tc.frame);
auto init_lt = tc.emit<InitListTuple>(num_elems + 1, is_tuple);
init_lt->SetOperand(0, dst);
for (size_t i = num_elems; i > 0; i--) {
auto opnd = tc.frame.stack.pop();
init_lt->SetOperand(i, opnd);
}
auto new_dst = temps_.AllocateStack();
tc.emit<Assign>(new_dst, dst);
tc.frame.stack.push(new_dst);
}
void HIRBuilder::emitMakeListTupleUnpack(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* list = temps_.AllocateStack();
tc.emit<MakeListTuple>(false, list, 0, tc.frame);
bool with_call = bc_instr.opcode() == BUILD_TUPLE_UNPACK_WITH_CALL;
int oparg = bc_instr.oparg();
Register* func =
with_call ? tc.frame.stack.peek(oparg + 1) : temps_.AllocateStack();
for (int i = oparg; i > 0; i--) {
Register* iterable = tc.frame.stack.peek(i);
Register* none = temps_.AllocateStack();
tc.emit<ListExtend>(none, list, iterable, func, tc.frame);
}
Register* retval = list;
bool is_tuple = bc_instr.opcode() != BUILD_LIST_UNPACK;
if (is_tuple) {
Register* tuple = temps_.AllocateStack();
tc.emit<MakeTupleFromList>(tuple, list, tc.frame);
retval = tuple;
}
tc.frame.stack.discard(oparg);
tc.frame.stack.push(retval);
}
void HIRBuilder::emitBuildCheckedList(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
BorrowedRef<> arg = constArg(bc_instr);
BorrowedRef<> descr = PyTuple_GET_ITEM(arg.get(), 0);
Py_ssize_t list_size = PyLong_AsLong(PyTuple_GET_ITEM(arg.get(), 1));
Type type = preloader_.type(descr);
JIT_CHECK(
_PyCheckedList_TypeCheck(type.uniquePyType()),
"expected CheckedList type");
Register* list = temps_.AllocateStack();
tc.emit<MakeCheckedList>(list, list_size, type, tc.frame);
// Fill list
auto init_checked_list = tc.emit<InitListTuple>(list_size + 1, false);
init_checked_list->SetOperand(0, list);
for (size_t i = list_size; i > 0; i--) {
auto operand = tc.frame.stack.pop();
init_checked_list->SetOperand(i, operand);
}
tc.frame.stack.push(list);
}
void HIRBuilder::emitBuildCheckedMap(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
BorrowedRef<> arg = constArg(bc_instr);
BorrowedRef<> descr = PyTuple_GET_ITEM(arg.get(), 0);
Py_ssize_t dict_size = PyLong_AsLong(PyTuple_GET_ITEM(arg.get(), 1));
Type type = preloader_.type(descr);
JIT_CHECK(
_PyCheckedDict_TypeCheck(type.uniquePyType()),
"expected CheckedDict type");
Register* dict = temps_.AllocateStack();
tc.emit<MakeCheckedDict>(dict, dict_size, type, tc.frame);
// Fill dict
auto& stack = tc.frame.stack;
for (auto i = stack.size() - dict_size * 2, end = stack.size(); i < end;
i += 2) {
auto key = stack.at(i);
auto value = stack.at(i + 1);
auto result = temps_.AllocateStack();
tc.emit<SetDictItem>(result, dict, key, value, tc.frame);
}
stack.discard(dict_size * 2);
stack.push(dict);
}
void HIRBuilder::emitBuildMap(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto dict_size = bc_instr.oparg();
Register* dict = temps_.AllocateStack();
tc.emit<MakeDict>(dict, dict_size, tc.frame);
// Fill dict
auto& stack = tc.frame.stack;
for (auto i = stack.size() - dict_size * 2, end = stack.size(); i < end;
i += 2) {
auto key = stack.at(i);
auto value = stack.at(i + 1);
auto result = temps_.AllocateStack();
tc.emit<SetDictItem>(result, dict, key, value, tc.frame);
}
stack.discard(dict_size * 2);
stack.push(dict);
}
void HIRBuilder::emitBuildMapUnpack(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr,
bool with_call) {
Register* sum = temps_.AllocateStack();
tc.emit<MakeDict>(sum, 0, tc.frame);
int oparg = bc_instr.oparg();
auto& stack = tc.frame.stack;
Register* func = with_call ? stack.peek(oparg + 2) : temps_.AllocateStack();
for (int i = oparg; i > 0; i--) {
auto arg = stack.peek(i);
auto result = temps_.AllocateStack();
tc.emit<MergeDictUnpack>(result, sum, arg, func, tc.frame);
}
stack.discard(oparg);
stack.push(sum);
}
void HIRBuilder::emitBuildSet(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* set = temps_.AllocateStack();
tc.emit<MakeSet>(set, tc.frame);
int oparg = bc_instr.oparg();
for (int i = oparg; i > 0; i--) {
auto item = tc.frame.stack.peek(i);
auto result = temps_.AllocateStack();
tc.emit<SetSetItem>(result, set, item, tc.frame);
}
tc.frame.stack.discard(oparg);
tc.frame.stack.push(set);
}
void HIRBuilder::emitBuildSetUnpack(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* set = temps_.AllocateStack();
tc.emit<MakeSet>(set, tc.frame);
int oparg = bc_instr.oparg();
for (int i = oparg; i > 0; i--) {
auto iterable = tc.frame.stack.peek(i);
auto result = temps_.AllocateStack();
tc.emit<MergeSetUnpack>(result, set, iterable, tc.frame);
}
tc.frame.stack.discard(oparg);
tc.frame.stack.push(set);
}
void HIRBuilder::emitBuildConstKeyMap(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto dict_size = bc_instr.oparg();
Register* dict = temps_.AllocateStack();
tc.emit<MakeDict>(dict, dict_size, tc.frame);
// Fill dict
auto& stack = tc.frame.stack;
Register* keys = stack.pop();
// ceval.c checks the type and size of the keys tuple before proceeding; we
// intentionally skip that here.
for (auto i = 0; i < dict_size; ++i) {
Register* key = temps_.AllocateStack();
tc.emit<LoadTupleItem>(key, keys, i);
Register* value = stack.at(stack.size() - dict_size + i);
Register* result = temps_.AllocateStack();
tc.emit<SetDictItem>(result, dict, key, value, tc.frame);
}
stack.discard(dict_size);
stack.push(dict);
}
void HIRBuilder::emitPopJumpIf(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* var = tc.frame.stack.pop();
Py_ssize_t true_offset, false_offset;
switch (bc_instr.opcode()) {
case POP_JUMP_IF_ZERO:
case POP_JUMP_IF_FALSE: {
true_offset = bc_instr.NextInstrOffset();
false_offset = bc_instr.oparg();
break;
}
case POP_JUMP_IF_NONZERO:
case POP_JUMP_IF_TRUE: {
true_offset = bc_instr.oparg();
false_offset = bc_instr.NextInstrOffset();
break;
}
default: {
// NOTREACHED
JIT_CHECK(
false,
"trying to translate non pop-jump bytecode: %d",
bc_instr.opcode());
break;
}
}
BasicBlock* true_block = getBlockAtOff(true_offset);
BasicBlock* false_block = getBlockAtOff(false_offset);
if (bc_instr.opcode() == POP_JUMP_IF_FALSE ||
bc_instr.opcode() == POP_JUMP_IF_TRUE) {
Register* tval = temps_.AllocateNonStack();
tc.emit<IsTruthy>(tval, var, tc.frame);
tc.emit<CondBranch>(tval, true_block, false_block);
} else {
tc.emit<CondBranch>(var, true_block, false_block);
}
}
void HIRBuilder::emitStoreAttr(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* receiver = tc.frame.stack.pop();
Register* value = tc.frame.stack.pop();
Register* result = temps_.AllocateStack();
tc.emit<StoreAttr>(result, receiver, value, bc_instr.oparg(), tc.frame);
}
void HIRBuilder::moveOverwrittenStackRegisters(
TranslationContext& tc,
Register* dst) {
// If we're about to overwrite a register that is on the stack, move it to a
// new register.
Register* tmp = nullptr;
auto& stack = tc.frame.stack;
for (std::size_t i = 0, stack_size = stack.size(); i < stack_size; i++) {
if (stack.at(i) == dst) {
if (tmp == nullptr) {
tmp = temps_.AllocateStack();
tc.emit<Assign>(tmp, dst);
}
stack.atPut(i, tmp);
}
}
}
void HIRBuilder::emitStoreFast(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* src = tc.frame.stack.pop();
Register* dst = tc.frame.locals[bc_instr.oparg()];
JIT_DCHECK(dst != nullptr, "no register");
moveOverwrittenStackRegisters(tc, dst);
tc.emit<Assign>(dst, src);
}
void HIRBuilder::emitStoreSubscr(TranslationContext& tc) {
auto& stack = tc.frame.stack;
Register* sub = stack.pop();
Register* container = stack.pop();
Register* value = stack.pop();
Register* result = temps_.AllocateStack();
tc.emit<StoreSubscr>(result, container, sub, value, tc.frame);
}
void HIRBuilder::emitGetIter(TranslationContext& tc) {
Register* iterable = tc.frame.stack.pop();
Register* result = temps_.AllocateStack();
tc.emit<GetIter>(result, iterable, tc.frame);
tc.frame.stack.push(result);
}
void HIRBuilder::emitForIter(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* iterator = tc.frame.stack.top();
Register* next_val = temps_.AllocateStack();
tc.emit<InvokeIterNext>(next_val, iterator, tc.frame);
tc.frame.stack.push(next_val);
BasicBlock* footer = getBlockAtOff(bc_instr.GetJumpTarget());
BasicBlock* body = getBlockAtOff(bc_instr.NextInstrOffset());
tc.emit<CondBranchIterNotDone>(next_val, body, footer);
}
void HIRBuilder::emitGetYieldFromIter(CFG& cfg, TranslationContext& tc) {
Register* iter_in = tc.frame.stack.pop();
bool in_coro = code_->co_flags & (CO_COROUTINE | CO_ITERABLE_COROUTINE);
BasicBlock* done_block = cfg.AllocateBlock();
BasicBlock* next_block = cfg.AllocateBlock();
BasicBlock* nop_block = cfg.AllocateBlock();
BasicBlock* is_coro_block = in_coro ? nop_block : cfg.AllocateBlock();
tc.emit<CondBranchCheckType>(
iter_in, Type::fromTypeExact(&PyCoro_Type), is_coro_block, next_block);
if (!in_coro) {
tc.block = is_coro_block;
tc.emit<RaiseStatic>(
0,
PyExc_TypeError,
"cannot 'yield from' a coroutine object in a non-coroutine generator",
tc.frame);
}
tc.block = next_block;
BasicBlock* slow_path = cfg.AllocateBlock();
Register* iter_out = temps_.AllocateStack();
tc.emit<CondBranchCheckType>(iter_in, TGen, nop_block, slow_path);
tc.block = slow_path;
tc.emit<GetIter>(iter_out, iter_in, tc.frame);
tc.emit<Branch>(done_block);
tc.block = nop_block;
tc.emit<Assign>(iter_out, iter_in);
tc.emit<Branch>(done_block);
tc.block = done_block;
tc.frame.stack.push(iter_out);
}
void HIRBuilder::emitUnpackEx(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
int oparg = bc_instr.oparg();
int arg_before = oparg & 0xff;
int arg_after = oparg >> 8;
auto& stack = tc.frame.stack;
Register* seq = stack.pop();
Register* tuple = temps_.AllocateStack();
tc.emit<UnpackExToTuple>(tuple, seq, arg_before, arg_after, tc.frame);
int total_args = arg_before + arg_after + 1;
for (int i = total_args - 1; i >= 0; i--) {
Register* item = temps_.AllocateStack();
tc.emit<LoadTupleItem>(item, tuple, i);
stack.push(item);
}
}
void HIRBuilder::emitUnpackSequence(
CFG& cfg,
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& stack = tc.frame.stack;
Register* seq = stack.top();
TranslationContext deopt_path{cfg.AllocateBlock(), tc.frame};
deopt_path.frame.next_instr_offset = bc_instr.offset();
deopt_path.snapshot();
Deopt* deopt = deopt_path.emit<Deopt>();
deopt->setGuiltyReg(seq);
deopt->setDescr("UNPACK_SEQUENCE");
BasicBlock* fast_path = cfg.AllocateBlock();
BasicBlock* list_check_path = cfg.AllocateBlock();
BasicBlock* list_fast_path = cfg.AllocateBlock();
BasicBlock* tuple_fast_path = cfg.AllocateBlock();
Register* list_mem = temps_.AllocateStack();
stack.pop();
// TODO: The manual type checks and branches should go away once we get
// PGO support to be able to optimize to known types.
tc.emit<CondBranchCheckType>(
seq, TTupleExact, tuple_fast_path, list_check_path);
tc.block = list_check_path;
tc.emit<CondBranchCheckType>(
seq, TListExact, list_fast_path, deopt_path.block);
tc.block = tuple_fast_path;
Register* offset_reg = temps_.AllocateStack();
tc.emit<LoadConst>(
offset_reg, Type::fromCInt(offsetof(PyTupleObject, ob_item), TCInt64));
tc.emit<LoadFieldAddress>(list_mem, seq, offset_reg);
tc.emit<Branch>(fast_path);
tc.block = list_fast_path;
tc.emit<LoadField>(
list_mem, seq, "ob_item", offsetof(PyListObject, ob_item), TCPtr);
tc.emit<Branch>(fast_path);
tc.block = fast_path;
Register* seq_size = temps_.AllocateStack();
Register* target_size = temps_.AllocateStack();
Register* is_equal = temps_.AllocateStack();
tc.emit<LoadVarObjectSize>(seq_size, seq);
tc.emit<LoadConst>(target_size, Type::fromCInt(bc_instr.oparg(), TCInt64));
tc.emit<PrimitiveCompare>(
is_equal, PrimitiveCompareOp::kEqual, seq_size, target_size);
fast_path = cfg.AllocateBlock();
tc.emit<CondBranch>(is_equal, fast_path, deopt_path.block);
tc.block = fast_path;
Register* idx_reg = temps_.AllocateStack();
for (int idx = bc_instr.oparg() - 1; idx >= 0; --idx) {
Register* item = temps_.AllocateStack();
tc.emit<LoadConst>(idx_reg, Type::fromCInt(idx, TCInt64));
tc.emit<LoadArrayItem>(item, list_mem, idx_reg, seq, 0, TObject);
stack.push(item);
}
}
void HIRBuilder::emitFinallyBlock(
Function& irfunc,
TranslationContext& tc,
const BytecodeInstructionBlock& bc_instrs,
std::deque<TranslationContext>& queue,
Py_ssize_t finally_off,
BasicBlock* ret_block) {
// Create a new set of basic blocks to house the finally block and jump there
BlockMap new_block_map = createBlocks(irfunc, bc_instrs);
BasicBlock* finally_block = map_get(new_block_map.blocks, finally_off);
tc.emit<Branch>(finally_block);
BlockCanonicalizer().Run(tc.block, temps_, tc.frame.stack);
// Recurse into translate() to duplicate the finally block. `comp` will be
// invoked in the callee to link the finally block back to us.
std::swap(new_block_map, block_map_);
auto comp = [&](TranslationContext& ftc,
const jit::BytecodeInstruction& bci) {
BasicBlock* succ = ret_block;
if (succ == nullptr || bci.opcode() == POP_FINALLY) {
// Resume execution at the next instruction after the finally block
succ = map_get(new_block_map.blocks, bci.NextInstrOffset());
}
ftc.emit<Branch>(succ);
BlockCanonicalizer().Run(ftc.block, temps_, ftc.frame.stack);
queue.emplace_back(succ, ftc.frame);
};
TranslationContext new_tc{finally_block, tc.frame};
translate(irfunc, bc_instrs, new_tc, comp);
std::swap(new_block_map, block_map_);
}
void HIRBuilder::emitBeginFinally(
Function& irfunc,
TranslationContext& tc,
const BytecodeInstructionBlock& bc_instrs,
const jit::BytecodeInstruction& bc_instr,
std::deque<TranslationContext>& queue) {
Register* null = temps_.AllocateStack();
tc.emit<LoadConst>(null, TNullptr);
tc.frame.stack.push(null);
Py_ssize_t finally_off = bc_instr.NextInstrOffset();
emitFinallyBlock(irfunc, tc, bc_instrs, queue, finally_off, nullptr);
}
void HIRBuilder::emitCallFinally(
Function& irfunc,
TranslationContext& tc,
const BytecodeInstructionBlock& bc_instrs,
const jit::BytecodeInstruction& bc_instr,
std::deque<TranslationContext>& queue) {
Register* ret_off = temps_.AllocateStack();
tc.emit<LoadConst>(
ret_off, Type::fromCInt(bc_instr.NextInstrOffset(), TCInt64));
tc.frame.stack.push(ret_off);
BasicBlock* succ = getBlockAtOff(bc_instr.NextInstrOffset());
Py_ssize_t finally_off = bc_instr.GetJumpTarget();
emitFinallyBlock(irfunc, tc, bc_instrs, queue, finally_off, succ);
}
void HIRBuilder::emitEndFinally(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr,
FinallyCompleter complete_finally) {
// Normally the interpreter will find either 1 value (when no
// exception is active) or 6 values (when an exception is active) at
// the top of the stack. We are guaranteed to only ever encounter 1
// value at the top of the stack, as we deoptimize when an exception
// is active.
//
// In the interpreter case, the single value is either `nullptr` (if
// the finally block was entered via fallthrough) or an integer (if
// the finally block was entered via `CALL_FINALLY`).
tc.frame.stack.pop();
complete_finally(tc, bc_instr);
}
void HIRBuilder::emitPopFinally(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr,
FinallyCompleter complete_finally) {
if (bc_instr.oparg() == 0) {
// If oparg is 0, TOS is `nullptr` (if the finally block was entered via
// `BEGIN_FINALLY`) or an integer (if the finally block was entered via
// `CALL_FINALLY`). Both can be discarded, since execution always continues
// at the next instruction.
tc.frame.stack.pop();
} else {
// If oparg is 1, the return value is additionally pushed on the stack
Register* res = tc.frame.stack.pop();
tc.frame.stack.pop();
tc.frame.stack.push(res);
}
complete_finally(tc, bc_instr);
}
void HIRBuilder::emitSetupFinally(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
int handler_off = bc_instr.NextInstrOffset() + bc_instr.oparg();
int stack_level = tc.frame.stack.size();
tc.frame.block_stack.push(
ExecutionBlock{SETUP_FINALLY, handler_off, stack_level});
}
void HIRBuilder::emitAsyncForHeaderYieldFrom(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* send_value = tc.frame.stack.pop();
Register* awaitable = tc.frame.stack.pop();
Register* out = temps_.AllocateStack();
if (code_->co_flags & CO_COROUTINE) {
tc.emit<SetCurrentAwaiter>(awaitable);
}
// Unlike emitYieldFrom() we do not use tc.emitChecked() here.
tc.emit<YieldFrom>(out, send_value, awaitable, tc.frame);
tc.frame.stack.push(out);
// If an exception was raised then exit the loop
BasicBlock* yf_cont_block = getBlockAtOff(bc_instr.NextInstrOffset());
int handler_off = tc.frame.block_stack.top().handler_off;
int handler_idx = handler_off / sizeof(_Py_CODEUNIT);
BasicBlock* yf_exc_block = getBlockAtOff(handler_off);
end_async_for_frame_state_.emplace(handler_idx, tc.frame);
tc.emit<CondBranch>(out, yf_cont_block, yf_exc_block);
}
void HIRBuilder::emitEndAsyncFor(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
Register* is_stop = temps_.AllocateStack();
tc.emit<IsErrStopAsyncIteration>(is_stop);
FrameState& yield_from_frame =
end_async_for_frame_state_.at(bc_instr.index());
tc.emit<CheckExc>(is_stop, is_stop, yield_from_frame);
tc.emit<ClearError>();
// Pop finally block and discard exhausted async iterator.
const ExecutionBlock& b = tc.frame.block_stack.top();
JIT_CHECK(
static_cast<int>(tc.frame.stack.size()) == b.stack_level,
"Bad stack depth in END_ASYNC_FOR: block stack expects %d, stack is %d",
b.stack_level,
tc.frame.stack.size());
tc.frame.block_stack.pop();
tc.frame.stack.pop();
}
void HIRBuilder::emitGetAIter(TranslationContext& tc) {
Register* obj = tc.frame.stack.pop();
Register* out = temps_.AllocateStack();
tc.emitChecked<CallCFunc>(
1, out, CallCFunc::Func::k_PyEval_GetAIter, std::vector<Register*>{obj});
tc.frame.stack.push(out);
}
void HIRBuilder::emitGetANext(TranslationContext& tc) {
Register* obj = tc.frame.stack.top();
Register* out = temps_.AllocateStack();
tc.emitChecked<CallCFunc>(
1, out, CallCFunc::Func::k_PyEval_GetANext, std::vector<Register*>{obj});
tc.frame.stack.push(out);
}
Register* HIRBuilder::emitSetupWithCommon(
TranslationContext& tc,
_Py_Identifier* enter_id,
_Py_Identifier* exit_id,
bool swap_lookup) {
// Load the enter and exit attributes from the manager, push exit, and return
// the result of calling enter().
auto& stack = tc.frame.stack;
Register* manager = stack.pop();
Register* enter = temps_.AllocateStack();
Register* exit = temps_.AllocateStack();
if (swap_lookup) {
tc.emit<LoadAttrSpecial>(exit, manager, exit_id, tc.frame);
tc.emit<LoadAttrSpecial>(enter, manager, enter_id, tc.frame);
} else {
tc.emit<LoadAttrSpecial>(enter, manager, enter_id, tc.frame);
tc.emit<LoadAttrSpecial>(exit, manager, exit_id, tc.frame);
}
stack.push(exit);
Register* enter_result = temps_.AllocateStack();
VectorCall* call =
tc.emit<VectorCall>(1, enter_result, false /* is_awaited */);
call->setFrameState(tc.frame);
call->SetOperand(0, enter);
return enter_result;
}
void HIRBuilder::emitBeforeAsyncWith(TranslationContext& tc) {
_Py_IDENTIFIER(__aenter__);
_Py_IDENTIFIER(__aexit__);
tc.frame.stack.push(
emitSetupWithCommon(tc, &PyId___aenter__, &PyId___aexit__, true));
}
void HIRBuilder::emitSetupAsyncWith(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
// The finally block should be above the result of __aenter__.
Register* top = tc.frame.stack.pop();
emitSetupFinally(tc, bc_instr);
tc.frame.stack.push(top);
}
void HIRBuilder::emitSetupWith(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
_Py_IDENTIFIER(__enter__);
_Py_IDENTIFIER(__exit__);
Register* enter_result =
emitSetupWithCommon(tc, &PyId___enter__, &PyId___exit__, false);
emitSetupFinally(tc, bc_instr);
tc.frame.stack.push(enter_result);
}
void HIRBuilder::emitWithCleanupStart(TranslationContext& tc) {
// We currently deopt when an exception is raised, so we don't have to
// worry about the exception case. TOS should always be NULL.
auto& stack = tc.frame.stack;
Register* null = stack.pop();
Register* exit = stack.pop();
stack.push(null);
Register* none = temps_.AllocateStack();
tc.emit<LoadConst>(none, TNoneType);
Register* exit_result = temps_.AllocateStack();
VectorCall* call =
tc.emit<VectorCall>(4, exit_result, false /* is_awaited */);
call->setFrameState(tc.frame);
call->SetOperand(0, exit);
call->SetOperand(1, none);
call->SetOperand(2, none);
call->SetOperand(3, none);
stack.push(none);
stack.push(exit_result);
}
void HIRBuilder::emitWithCleanupFinish(TranslationContext& tc) {
auto& stack = tc.frame.stack;
stack.pop(); // unused result of __exit__
stack.pop(); // None
}
void HIRBuilder::emitLoadField(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& [offset, type, name] = preloader_.fieldInfo(constArg(bc_instr));
Register* receiver = tc.frame.stack.pop();
Register* result = temps_.AllocateStack();
const char* field_name = PyUnicode_AsUTF8(name);
if (field_name == nullptr) {
PyErr_Clear();
field_name = "";
}
tc.emit<LoadField>(result, receiver, field_name, offset, type);
if (type.couldBe(TNullptr)) {
CheckField* cf = tc.emit<CheckField>(result, result, name, tc.frame);
cf->setGuiltyReg(receiver);
}
tc.frame.stack.push(result);
}
void HIRBuilder::emitStoreField(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& [offset, type, name] = preloader_.fieldInfo(constArg(bc_instr));
const char* field_name = PyUnicode_AsUTF8(name);
if (field_name == nullptr) {
PyErr_Clear();
field_name = "";
}
Register* receiver = tc.frame.stack.pop();
Register* value = tc.frame.stack.pop();
Register* previous = temps_.AllocateStack();
if (type <= TPrimitive) {
Register* converted = temps_.AllocateStack();
tc.emit<LoadConst>(previous, TNullptr);
tc.emit<IntConvert>(converted, value, type);
value = converted;
} else {
tc.emit<LoadField>(previous, receiver, field_name, offset, type, false);
}
tc.emit<StoreField>(receiver, field_name, offset, value, type, previous);
}
void HIRBuilder::emitCast(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& [pytype, opt, exact] =
preloader_.pyTypeOpt(PyTuple_GetItem(constArg(bc_instr), 0));
Register* value = tc.frame.stack.pop();
Register* result = temps_.AllocateStack();
tc.emit<Cast>(
result,
value,
pytype,
opt,
exact,
PyTuple_GetItem(constArg(bc_instr), 1) == Py_True,
tc.frame);
tc.frame.stack.push(result);
}
void HIRBuilder::emitTpAlloc(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto pytype = preloader_.pyType(constArg(bc_instr));
Register* result = temps_.AllocateStack();
tc.emit<TpAlloc>(result, pytype, tc.frame);
tc.frame.stack.push(result);
}
void HIRBuilder::emitImportFrom(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& stack = tc.frame.stack;
Register* name = stack.top();
Register* res = temps_.AllocateStack();
tc.emit<ImportFrom>(res, name, bc_instr.oparg(), tc.frame);
stack.push(res);
}
void HIRBuilder::emitImportName(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& stack = tc.frame.stack;
Register* fromlist = stack.pop();
Register* level = stack.pop();
Register* res = temps_.AllocateStack();
tc.emit<ImportName>(res, bc_instr.oparg(), fromlist, level, tc.frame);
stack.push(res);
}
void HIRBuilder::emitRaiseVarargs(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto& stack = tc.frame.stack;
switch (bc_instr.oparg()) {
case 2: {
auto cause = stack.pop();
auto exc = stack.pop();
tc.emit<Raise>(2, tc.frame, exc, cause);
break;
}
case 1:
tc.emit<Raise>(1, tc.frame, stack.pop());
break;
case 0:
tc.emit<Raise>(0, tc.frame);
break;
default:
JIT_CHECK(false, "unsupported RAISE_VARARGS op: %d", bc_instr.oparg());
break;
}
}
void HIRBuilder::emitYieldFrom(TranslationContext& tc, Register* out) {
auto& stack = tc.frame.stack;
auto send_value = stack.pop();
auto iter = stack.pop();
if (code_->co_flags & CO_COROUTINE) {
tc.emit<SetCurrentAwaiter>(iter);
}
tc.emitChecked<YieldFrom>(out, send_value, iter, tc.frame);
stack.push(out);
}
void HIRBuilder::emitYieldValue(TranslationContext& tc) {
auto& stack = tc.frame.stack;
auto in = stack.pop();
auto out = temps_.AllocateStack();
if (code_->co_flags & CO_ASYNC_GENERATOR) {
tc.emitChecked<CallCFunc>(
1,
out,
CallCFunc::Func::k_PyAsyncGenValueWrapperNew,
std::vector<Register*>{in});
in = out;
out = temps_.AllocateStack();
}
tc.emitChecked<YieldValue>(out, in, tc.frame);
stack.push(out);
}
void HIRBuilder::emitGetAwaitable(
CFG& cfg,
TranslationContext& tc,
int prev_op) {
OperandStack& stack = tc.frame.stack;
Register* iterable = stack.pop();
Register* iter = temps_.AllocateStack();
// Most work is done by existing _PyCoro_GetAwaitableIter() utility.
tc.emit<CallCFunc>(
1,
iter,
CallCFunc::Func::k_PyCoro_GetAwaitableIter,
std::vector<Register*>{iterable});
if (prev_op == BEFORE_ASYNC_WITH || prev_op == WITH_CLEANUP_START) {
BasicBlock* error_block = cfg.AllocateBlock();
BasicBlock* ok_block = cfg.AllocateBlock();
tc.emit<CondBranch>(iter, ok_block, error_block);
tc.block = error_block;
Register* type = temps_.AllocateStack();
tc.emit<LoadField>(
type, iterable, "ob_type", offsetof(PyObject, ob_type), TType);
tc.emit<RaiseAwaitableError>(type, prev_op, tc.frame);
tc.block = ok_block;
} else {
tc.emit<CheckExc>(iter, iter, tc.frame);
}
// For coroutines only, runtime assert it isn't already awaiting by checking
// if it has a sub-iterator using _PyGen_yf().
TranslationContext block_assert_not_awaited_coro{
cfg.AllocateBlock(), tc.frame};
TranslationContext block_done{cfg.AllocateBlock(), tc.frame};
tc.emit<CondBranchCheckType>(
iter,
Type::fromTypeExact(&PyCoro_Type),
block_assert_not_awaited_coro.block,
block_done.block);
Register* yf = temps_.AllocateStack();
block_assert_not_awaited_coro.emit<CallCFunc>(
1, yf, CallCFunc::Func::k_PyGen_yf, std::vector<Register*>{iter});
TranslationContext block_coro_already_awaited{cfg.AllocateBlock(), tc.frame};
block_assert_not_awaited_coro.emit<CondBranch>(
yf, block_coro_already_awaited.block, block_done.block);
block_coro_already_awaited.emit<RaiseStatic>(
0, PyExc_RuntimeError, "coroutine is being awaited already", tc.frame);
stack.push(iter);
tc.block = block_done.block;
}
void HIRBuilder::emitBuildString(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto num_operands = bc_instr.oparg();
tc.emitVariadic<BuildString>(temps_, num_operands);
}
void HIRBuilder::emitFormatValue(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto oparg = bc_instr.oparg();
int have_fmt_spec = (oparg & FVS_MASK) == FVS_HAVE_SPEC;
Register* fmt_spec;
if (have_fmt_spec) {
fmt_spec = tc.frame.stack.pop();
} else {
fmt_spec = temps_.AllocateStack();
tc.emit<LoadConst>(fmt_spec, TNullptr);
}
Register* value = tc.frame.stack.pop();
Register* dst = temps_.AllocateStack();
int which_conversion = oparg & FVC_MASK;
tc.emit<FormatValue>(dst, fmt_spec, value, which_conversion, tc.frame);
tc.frame.stack.push(dst);
}
void HIRBuilder::emitMapAdd(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto oparg = bc_instr.oparg();
auto& stack = tc.frame.stack;
auto value = stack.pop();
auto key = stack.pop();
auto map = stack.peek(oparg);
auto result = temps_.AllocateStack();
tc.emit<SetDictItem>(result, map, key, value, tc.frame);
}
void HIRBuilder::emitSetAdd(
TranslationContext& tc,
const jit::BytecodeInstruction& bc_instr) {
auto oparg = bc_instr.oparg();
auto& stack = tc.frame.stack;
auto* v = stack.pop();
auto* set = stack.peek(oparg);
auto result = temps_.AllocateStack();
tc.emit<SetSetItem>(result, set, v, tc.frame);
}
void HIRBuilder::emitDispatchEagerCoroResult(
CFG& cfg,
TranslationContext& tc,
Register* out,
BasicBlock* await_block,
BasicBlock* post_await_block) {
Register* stack_top = tc.frame.stack.top();
TranslationContext has_wh_block{cfg.AllocateBlock(), tc.frame};
tc.emit<CondBranchCheckType>(
stack_top, TWaitHandle, has_wh_block.block, await_block);
Register* wait_handle = stack_top;
Register* wh_coro_or_result = temps_.AllocateStack();
Register* wh_waiter = temps_.AllocateStack();
has_wh_block.emit<WaitHandleLoadCoroOrResult>(wh_coro_or_result, wait_handle);
has_wh_block.emit<WaitHandleLoadWaiter>(wh_waiter, wait_handle);
has_wh_block.emit<WaitHandleRelease>(wait_handle);
TranslationContext coro_block{cfg.AllocateBlock(), tc.frame};
TranslationContext res_block{cfg.AllocateBlock(), tc.frame};
has_wh_block.emit<CondBranch>(wh_waiter, coro_block.block, res_block.block);
if (code_->co_flags & CO_COROUTINE) {
coro_block.emit<SetCurrentAwaiter>(wh_coro_or_result);
}
coro_block.emitChecked<YieldAndYieldFrom>(
out, wh_waiter, wh_coro_or_result, tc.frame);
coro_block.emit<Branch>(post_await_block);
res_block.emit<Assign>(out, wh_coro_or_result);
res_block.emit<Branch>(post_await_block);
}
void HIRBuilder::insertEvalBreakerCheck(
CFG& cfg,
BasicBlock* check_block,
BasicBlock* succ,
const FrameState& frame) {
TranslationContext check(check_block, frame);
TranslationContext body(cfg.AllocateBlock(), frame);
// Check if the eval breaker has been set
Register* eval_breaker = temps_.AllocateStack();
check.emit<LoadEvalBreaker>(eval_breaker);
check.emit<CondBranch>(eval_breaker, body.block, succ);
// If set, run periodic tasks
body.snapshot();
body.emit<RunPeriodicTasks>(temps_.AllocateStack(), body.frame);
body.emit<Branch>(succ);
}
void HIRBuilder::insertEvalBreakerCheckForLoop(
CFG& cfg,
BasicBlock* loop_header) {
auto snap = loop_header->entrySnapshot();
JIT_CHECK(snap != nullptr, "block %d has no entry snapshot", loop_header->id);
auto fs = snap->frameState();
JIT_CHECK(
fs != nullptr,
"entry snapshot for block %d has no FrameState",
loop_header->id);
auto check_block = cfg.AllocateBlock();
loop_header->retargetPreds(check_block);
insertEvalBreakerCheck(cfg, check_block, loop_header, *fs);
}
void HIRBuilder::insertEvalBreakerCheckForExcept(
CFG& cfg,
TranslationContext& tc) {
TranslationContext succ(cfg.AllocateBlock(), tc.frame);
succ.snapshot();
insertEvalBreakerCheck(cfg, tc.block, succ.block, tc.frame);
tc.block = succ.block;
}
ExecutionBlock HIRBuilder::popBlock(CFG& cfg, TranslationContext& tc) {
if (tc.frame.block_stack.top().opcode == SETUP_FINALLY) {
insertEvalBreakerCheckForExcept(cfg, tc);
}
return tc.frame.block_stack.pop();
}
BorrowedRef<> HIRBuilder::constArg(const BytecodeInstruction& bc_instr) {
return PyTuple_GET_ITEM(code_->co_consts, bc_instr.oparg());
}
} // namespace hir
} // namespace jit