in Jit/hir/builder.cpp [771:1407]
void HIRBuilder::translate(
Function& irfunc,
const jit::BytecodeInstructionBlock& bc_instrs,
const TranslationContext& tc,
FinallyCompleter complete_finally) {
std::deque<TranslationContext> queue = {tc};
std::unordered_set<BasicBlock*> processed;
std::unordered_set<BasicBlock*> loop_headers;
const CodeProfileData* profile_data = getProfileData(tc.frame.code);
while (!queue.empty()) {
auto tc = std::move(queue.front());
queue.pop_front();
if (processed.count(tc.block)) {
continue;
}
processed.emplace(tc.block);
// Translate remaining instructions into HIR
auto& bc_block = map_get(block_map_.bc_blocks, tc.block);
tc.frame.next_instr_offset = bc_block.startOffset();
tc.snapshot();
auto is_in_async_for_header_block = [&tc, &bc_instrs]() {
if (tc.frame.block_stack.isEmpty()) {
return false;
}
const ExecutionBlock& block_top = tc.frame.block_stack.top();
return block_top.isAsyncForHeaderBlock(bc_instrs);
};
for (auto bc_it = bc_block.begin(); bc_it != bc_block.end(); ++bc_it) {
BytecodeInstruction bc_instr = *bc_it;
tc.setCurrentInstr(bc_instr);
if (profile_data != nullptr) {
emitProfiledTypes(tc, *profile_data, bc_instr);
}
// Translate instruction
switch (bc_instr.opcode()) {
case NOP: {
break;
}
case BINARY_ADD:
case BINARY_AND:
case BINARY_FLOOR_DIVIDE:
case BINARY_LSHIFT:
case BINARY_MATRIX_MULTIPLY:
case BINARY_MODULO:
case BINARY_MULTIPLY:
case BINARY_OR:
case BINARY_POWER:
case BINARY_RSHIFT:
case BINARY_SUBSCR:
case BINARY_SUBTRACT:
case BINARY_TRUE_DIVIDE:
case BINARY_XOR: {
emitBinaryOp(tc, bc_instr);
break;
}
case INPLACE_ADD:
case INPLACE_AND:
case INPLACE_FLOOR_DIVIDE:
case INPLACE_LSHIFT:
case INPLACE_MATRIX_MULTIPLY:
case INPLACE_MODULO:
case INPLACE_MULTIPLY:
case INPLACE_OR:
case INPLACE_POWER:
case INPLACE_RSHIFT:
case INPLACE_SUBTRACT:
case INPLACE_TRUE_DIVIDE:
case INPLACE_XOR: {
emitInPlaceOp(tc, bc_instr);
break;
}
case UNARY_NOT:
case UNARY_NEGATIVE:
case UNARY_POSITIVE:
case UNARY_INVERT: {
emitUnaryOp(tc, bc_instr);
break;
}
case BUILD_LIST:
case BUILD_TUPLE:
emitMakeListTuple(tc, bc_instr);
break;
case BUILD_LIST_UNPACK:
case BUILD_TUPLE_UNPACK:
case BUILD_TUPLE_UNPACK_WITH_CALL:
emitMakeListTupleUnpack(tc, bc_instr);
break;
case BUILD_CHECKED_LIST: {
emitBuildCheckedList(tc, bc_instr);
break;
}
case BUILD_CHECKED_MAP: {
emitBuildCheckedMap(tc, bc_instr);
break;
}
case BUILD_MAP: {
emitBuildMap(tc, bc_instr);
break;
}
case BUILD_MAP_UNPACK:
emitBuildMapUnpack(tc, bc_instr, false);
break;
case BUILD_MAP_UNPACK_WITH_CALL:
emitBuildMapUnpack(tc, bc_instr, true);
break;
case BUILD_SET: {
emitBuildSet(tc, bc_instr);
break;
}
case BUILD_SET_UNPACK: {
emitBuildSetUnpack(tc, bc_instr);
break;
}
case BUILD_CONST_KEY_MAP: {
emitBuildConstKeyMap(tc, bc_instr);
break;
}
case CALL_FUNCTION:
case CALL_FUNCTION_EX:
case CALL_FUNCTION_KW:
case CALL_METHOD:
case INVOKE_FUNCTION:
case INVOKE_METHOD: {
emitAnyCall(irfunc.cfg, tc, bc_it, bc_instrs);
break;
}
case FUNC_CREDENTIAL:
emitFunctionCredential(tc, bc_instr);
break;
case COMPARE_OP: {
emitCompareOp(tc, bc_instr);
break;
}
case DELETE_ATTR: {
emitDeleteAttr(tc, bc_instr);
break;
}
case LOAD_ATTR: {
emitLoadAttr(tc, bc_instr);
break;
}
case LOAD_METHOD: {
emitLoadMethod(tc, bc_instr);
break;
}
case LOAD_METHOD_SUPER: {
emitLoadMethodOrAttrSuper(tc, bc_instr, true);
break;
}
case LOAD_ATTR_SUPER: {
emitLoadMethodOrAttrSuper(tc, bc_instr, false);
break;
}
case LOAD_CLOSURE: {
tc.frame.stack.push(tc.frame.cells[bc_instr.oparg()]);
break;
}
case LOAD_DEREF: {
emitLoadDeref(tc, bc_instr);
break;
}
case STORE_DEREF: {
emitStoreDeref(tc, bc_instr);
break;
}
case LOAD_CONST: {
emitLoadConst(tc, bc_instr);
break;
}
case LOAD_FAST: {
emitLoadFast(tc, bc_instr);
break;
}
case LOAD_LOCAL: {
emitLoadLocal(tc, bc_instr);
break;
}
case LOAD_TYPE: {
emitLoadType(tc, bc_instr);
break;
}
case CONVERT_PRIMITIVE: {
emitConvertPrimitive(tc, bc_instr);
break;
}
case PRIMITIVE_LOAD_CONST: {
emitPrimitiveLoadConst(tc, bc_instr);
break;
}
case INT_LOAD_CONST_OLD: {
emitIntLoadConstOld(tc, bc_instr);
break;
}
case PRIMITIVE_BOX: {
emitPrimitiveBox(tc, bc_instr);
break;
}
case PRIMITIVE_UNBOX: {
emitPrimitiveUnbox(tc, bc_instr);
break;
}
case PRIMITIVE_BINARY_OP: {
emitPrimitiveBinaryOp(tc, bc_instr);
break;
}
case PRIMITIVE_COMPARE_OP: {
emitPrimitiveCompare(tc, bc_instr);
break;
}
case PRIMITIVE_UNARY_OP: {
emitPrimitiveUnaryOp(tc, bc_instr);
break;
}
case FAST_LEN: {
emitFastLen(irfunc.cfg, tc, bc_instr);
break;
}
case REFINE_TYPE: {
emitRefineType(tc, bc_instr);
break;
}
case SEQUENCE_GET: {
emitSequenceGet(tc, bc_instr);
break;
}
case SEQUENCE_SET: {
emitSequenceSet(tc, bc_instr);
break;
}
case SEQUENCE_REPEAT: {
emitSequenceRepeat(irfunc.cfg, tc, bc_instr);
break;
}
case LOAD_GLOBAL: {
emitLoadGlobal(tc, bc_instr);
break;
}
case JUMP_ABSOLUTE:
case JUMP_FORWARD: {
auto target_off = bc_instr.GetJumpTarget();
auto target = getBlockAtOff(target_off);
if ((bc_instr.opcode() == JUMP_ABSOLUTE) &&
(target_off <= bc_instr.offset())) {
loop_headers.emplace(target);
}
tc.emit<Branch>(target);
break;
}
case JUMP_IF_FALSE_OR_POP:
case JUMP_IF_NONZERO_OR_POP:
case JUMP_IF_TRUE_OR_POP:
case JUMP_IF_ZERO_OR_POP: {
emitJumpIf(tc, bc_instr);
break;
}
case POP_BLOCK: {
popBlock(irfunc.cfg, tc);
break;
}
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE: {
auto target_off = bc_instr.GetJumpTarget();
auto target = getBlockAtOff(target_off);
if (target_off <= bc_instr.offset()) {
loop_headers.emplace(target);
}
emitPopJumpIf(tc, bc_instr);
break;
}
case POP_TOP: {
tc.frame.stack.pop();
break;
}
case RETURN_PRIMITIVE: {
Type type = prim_type_to_type(bc_instr.oparg());
if (preloader_.returnType() <= TCEnum) {
JIT_CHECK(
type <= TCInt64,
"bad return type %s for enum, expected CInt64",
type);
} else {
JIT_CHECK(
type <= preloader_.returnType(),
"bad return type %s, expected %s",
type,
preloader_.returnType());
}
Register* reg = tc.frame.stack.pop();
tc.emit<Return>(reg, type);
break;
}
case RETURN_VALUE: {
Register* reg = tc.frame.stack.pop();
// TODO add preloader_.returnType() to Return instr here to validate
// that all values flowing to return are of correct type; will
// require consistency of static compiler and JIT types, see
// T86480663
JIT_CHECK(
tc.frame.block_stack.isEmpty(),
"Returning with non-empty block stack");
tc.emit<Return>(reg);
break;
}
case BEGIN_FINALLY: {
emitBeginFinally(irfunc, tc, bc_instrs, bc_instr, queue);
break;
}
case CALL_FINALLY: {
emitCallFinally(irfunc, tc, bc_instrs, bc_instr, queue);
break;
}
case END_ASYNC_FOR: {
emitEndAsyncFor(tc, bc_instr);
break;
}
case END_FINALLY: {
emitEndFinally(tc, bc_instr, complete_finally);
break;
}
case POP_FINALLY: {
emitPopFinally(tc, bc_instr, complete_finally);
break;
}
case SETUP_FINALLY: {
emitSetupFinally(tc, bc_instr);
break;
}
case STORE_ATTR: {
emitStoreAttr(tc, bc_instr);
break;
}
case STORE_FAST: {
emitStoreFast(tc, bc_instr);
break;
}
case STORE_LOCAL: {
emitStoreLocal(tc, bc_instr);
break;
}
case STORE_SUBSCR: {
emitStoreSubscr(tc);
break;
}
case BUILD_SLICE: {
emitBuildSlice(tc, bc_instr);
break;
}
case GET_AITER: {
emitGetAIter(tc);
break;
}
case GET_ANEXT: {
emitGetANext(tc);
break;
}
case GET_ITER: {
emitGetIter(tc);
break;
}
case GET_YIELD_FROM_ITER: {
emitGetYieldFromIter(irfunc.cfg, tc);
break;
}
case MAKE_FUNCTION: {
emitMakeFunction(tc, bc_instr);
break;
}
case LIST_APPEND: {
emitListAppend(tc, bc_instr);
break;
}
case LOAD_ITERABLE_ARG: {
emitLoadIterableArg(irfunc.cfg, tc, bc_instr);
break;
}
case DUP_TOP: {
auto& stack = tc.frame.stack;
stack.push(stack.top());
break;
}
case DUP_TOP_TWO: {
auto& stack = tc.frame.stack;
Register* top = stack.top();
Register* snd = stack.top(1);
stack.push(snd);
stack.push(top);
break;
}
case ROT_TWO: {
auto& stack = tc.frame.stack;
Register* top = stack.pop();
Register* snd = stack.pop();
stack.push(top);
stack.push(snd);
break;
}
case ROT_THREE: {
auto& stack = tc.frame.stack;
Register* top = stack.pop();
Register* snd = stack.pop();
Register* thd = stack.pop();
stack.push(top);
stack.push(thd);
stack.push(snd);
break;
}
case ROT_FOUR: {
auto& stack = tc.frame.stack;
Register* r1 = stack.pop();
Register* r2 = stack.pop();
Register* r3 = stack.pop();
Register* r4 = stack.pop();
stack.push(r1);
stack.push(r4);
stack.push(r3);
stack.push(r2);
break;
}
case FOR_ITER: {
emitForIter(tc, bc_instr);
break;
}
case LOAD_FIELD: {
emitLoadField(tc, bc_instr);
break;
}
case CAST: {
emitCast(tc, bc_instr);
break;
}
case TP_ALLOC: {
emitTpAlloc(tc, bc_instr);
break;
}
case CHECK_ARGS: {
// check args is handled in the prologue
break;
}
case STORE_FIELD: {
emitStoreField(tc, bc_instr);
break;
}
case POP_JUMP_IF_ZERO:
case POP_JUMP_IF_NONZERO: {
emitPopJumpIf(tc, bc_instr);
break;
}
case IMPORT_FROM: {
emitImportFrom(tc, bc_instr);
break;
}
case IMPORT_NAME: {
emitImportName(tc, bc_instr);
break;
}
case RAISE_VARARGS: {
emitRaiseVarargs(tc, bc_instr);
break;
}
case YIELD_VALUE: {
emitYieldValue(tc);
break;
}
case YIELD_FROM: {
if (is_in_async_for_header_block()) {
emitAsyncForHeaderYieldFrom(tc, bc_instr);
} else {
emitYieldFrom(tc, temps_.AllocateStack());
}
break;
}
case GET_AWAITABLE: {
Py_ssize_t idx = bc_instr.index();
int prev_op = idx ? bc_instrs.at(idx - 1).opcode() : 0;
emitGetAwaitable(irfunc.cfg, tc, prev_op);
break;
}
case BUILD_STRING: {
emitBuildString(tc, bc_instr);
break;
}
case FORMAT_VALUE: {
emitFormatValue(tc, bc_instr);
break;
}
case MAP_ADD: {
emitMapAdd(tc, bc_instr);
break;
}
case SET_ADD: {
emitSetAdd(tc, bc_instr);
break;
}
case UNPACK_EX: {
emitUnpackEx(tc, bc_instr);
break;
}
case UNPACK_SEQUENCE: {
emitUnpackSequence(irfunc.cfg, tc, bc_instr);
break;
}
case DELETE_SUBSCR: {
Register* sub = tc.frame.stack.pop();
Register* container = tc.frame.stack.pop();
tc.emit<DeleteSubscr>(container, sub, tc.frame);
break;
}
case DELETE_FAST: {
int var_idx = bc_instr.oparg();
Register* var = tc.frame.locals[var_idx];
tc.emit<LoadConst>(var, TNullptr);
break;
}
case BEFORE_ASYNC_WITH: {
emitBeforeAsyncWith(tc);
break;
}
case SETUP_ASYNC_WITH: {
emitSetupAsyncWith(tc, bc_instr);
break;
}
case SETUP_WITH: {
emitSetupWith(tc, bc_instr);
break;
}
case WITH_CLEANUP_START: {
emitWithCleanupStart(tc);
break;
}
case WITH_CLEANUP_FINISH: {
emitWithCleanupFinish(tc);
break;
}
default: {
// NOTREACHED
JIT_CHECK(false, "unhandled opcode: %d", bc_instr.opcode());
break;
}
}
if (should_snapshot(bc_instr, is_in_async_for_header_block())) {
tc.snapshot();
}
}
// Insert jumps for blocks that fall through.
auto last_instr = tc.block->GetTerminator();
if ((last_instr == nullptr) || !last_instr->IsTerminator()) {
auto off = bc_block.endOffset();
last_instr = tc.emit<Branch>(getBlockAtOff(off));
}
// Make sure any values left on the stack are in the registers that we
// expect
BlockCanonicalizer bc;
bc.Run(tc.block, temps_, tc.frame.stack);
// Add successors to be processed
//
// These bytecodes alter the operand stack along one branch and leave it
// untouched along the other. Thus, they must be special cased.
BytecodeInstruction last_bc_instr = bc_block.lastInstr();
switch (last_bc_instr.opcode()) {
case BEGIN_FINALLY:
case CALL_FINALLY:
case END_FINALLY:
case POP_FINALLY: {
// Opcodes for handling finally blocks are handled specially because
// CPython does not guarantee a constant stack depth when entering a
// finally block. We work around the issue by "tail duplicating" the
// finally block at each "call site" (BEGIN_FINALLY or CALL_FINALLY) by
// recursing into the compiler with a fresh set of basic blocks. The
// callee then links the finally block back to us and queues the
// appropriate block for processing. See the various `emit` functions
// for these opcodes for the implementation.
break;
}
case FOR_ITER: {
auto condbr = static_cast<CondBranchIterNotDone*>(last_instr);
auto new_frame = tc.frame;
// Sentinel value signaling iteration is complete and the iterator
// itself
new_frame.stack.discard(2);
queue.emplace_back(condbr->true_bb(), tc.frame);
queue.emplace_back(condbr->false_bb(), new_frame);
break;
}
case JUMP_IF_FALSE_OR_POP:
case JUMP_IF_ZERO_OR_POP: {
auto condbr = static_cast<CondBranch*>(last_instr);
auto new_frame = tc.frame;
new_frame.stack.pop();
queue.emplace_back(condbr->true_bb(), new_frame);
queue.emplace_back(condbr->false_bb(), tc.frame);
break;
}
case JUMP_IF_NONZERO_OR_POP:
case JUMP_IF_TRUE_OR_POP: {
auto condbr = static_cast<CondBranch*>(last_instr);
auto new_frame = tc.frame;
new_frame.stack.pop();
queue.emplace_back(condbr->true_bb(), tc.frame);
queue.emplace_back(condbr->false_bb(), new_frame);
break;
}
default: {
if (last_bc_instr.opcode() == YIELD_FROM &&
is_in_async_for_header_block()) {
JIT_CHECK(
last_instr->IsCondBranch(),
"Async-for header should end with CondBranch");
auto condbr = static_cast<CondBranch*>(last_instr);
FrameState new_frame = tc.frame;
new_frame.stack.pop();
queue.emplace_back(condbr->true_bb(), tc.frame);
queue.emplace_back(condbr->false_bb(), std::move(new_frame));
break;
}
for (std::size_t i = 0; i < last_instr->numEdges(); i++) {
auto succ = last_instr->successor(i);
queue.emplace_back(succ, tc.frame);
}
break;
}
}
}
for (auto block : loop_headers) {
insertEvalBreakerCheckForLoop(irfunc.cfg, block);
}
}