Jit/hir/optimization.cpp (803 lines of code) (raw):
// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
#include "Jit/hir/optimization.h"
#include "Python.h"
#include "code.h"
#include "pycore_pystate.h"
#include "Jit/compiler.h"
#include "Jit/hir/analysis.h"
#include "Jit/hir/builder.h"
#include "Jit/hir/hir.h"
#include "Jit/hir/memory_effects.h"
#include "Jit/hir/printer.h"
#include "Jit/hir/ssa.h"
#include "Jit/jit_rt.h"
#include "Jit/pyjit.h"
#include "Jit/util.h"
#include <fmt/format.h>
#include <list>
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <vector>
namespace jit {
namespace hir {
PassRegistry::PassRegistry() {
auto addPass = [&](const PassFactory& factory) {
factories_.emplace(factory()->name(), factory);
};
addPass(RefcountInsertion::Factory);
addPass(CopyPropagation::Factory);
addPass(CleanCFG::Factory);
addPass(DynamicComparisonElimination::Factory);
addPass(PhiElimination::Factory);
addPass(InlineFunctionCalls::Factory);
addPass(Simplify::Factory);
addPass(DeadCodeElimination::Factory);
addPass(GuardTypeRemoval::Factory);
}
std::unique_ptr<Pass> PassRegistry::MakePass(const std::string& name) {
auto it = factories_.find(name);
if (it != factories_.end()) {
return it->second();
} else {
return nullptr;
}
}
Instr* DynamicComparisonElimination::ReplaceCompare(
Compare* compare,
IsTruthy* truthy) {
// For is/is not we can use CompareInt:
// $truthy = CompareInt<Eq> $x $y
// CondBranch<x, y> $truthy
// For other comparisons we can use ComapreBool.
if (compare->op() == CompareOp::kIs || compare->op() == CompareOp::kIsNot) {
return PrimitiveCompare::create(
truthy->GetOutput(),
(compare->op() == CompareOp::kIs) ? PrimitiveCompareOp::kEqual
: PrimitiveCompareOp::kNotEqual,
compare->GetOperand(0),
compare->GetOperand(1));
}
return CompareBool::create(
truthy->GetOutput(),
compare->op(),
compare->GetOperand(0),
compare->GetOperand(1),
*get_frame_state(*truthy));
}
void DynamicComparisonElimination::InitBuiltins() {
if (inited_builtins_) {
return;
}
inited_builtins_ = true;
// we want to check the exact function address, rather than relying on
// modules which can be mutated. First find builtins, which we have
// to do a search for because PyEval_GetBuiltins() returns the
// module dict.
PyObject* mods = _PyThreadState_GET()->interp->modules_by_index;
PyModuleDef* builtins = nullptr;
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(mods); i++) {
PyObject* cur = PyList_GET_ITEM(mods, i);
if (cur == Py_None) {
continue;
}
PyModuleDef* def = PyModule_GetDef(cur);
if (def != nullptr && strcmp(def->m_name, "builtins") == 0) {
builtins = def;
break;
}
}
if (builtins == nullptr) {
return;
}
for (PyMethodDef* fdef = builtins->m_methods; fdef->ml_name != NULL; fdef++) {
if (strcmp(fdef->ml_name, "isinstance") == 0) {
isinstance_func_ = fdef->ml_meth;
break;
}
}
}
Instr* DynamicComparisonElimination::ReplaceTypeCheck(
Function& irfunc,
CondBranch& cond_branch,
BasicBlock& block,
Register* obj_op,
Register* type_op,
Instr* isinstance,
IsTruthy* truthy,
bool is_subtype,
bool is_optional) {
int bc_off = cond_branch.bytecodeOffset();
// We want to replace:
// if isinstance(x, some_type):
// with:
// if x.__class__ == some_type or PyObject_IsInstance(x, some_type):
// This inlines the common type check case, and eliminates
// the truthy case.
// We do this by updating the existing branch to be
// based off the fast path, and if that fails, then
// we insert a new basic block which handles the slow path
// and branches to the success or failure cases.
auto obj_type = irfunc.env.AllocateRegister();
auto fast_eq = irfunc.env.AllocateRegister();
auto load_type = LoadField::create(
obj_type, obj_op, "ob_type", offsetof(PyObject, ob_type), TType);
auto compare_type = PrimitiveCompare::create(
fast_eq, PrimitiveCompareOp::kEqual, obj_type, type_op);
load_type->copyBytecodeOffset(*isinstance);
load_type->InsertBefore(*truthy);
compare_type->copyBytecodeOffset(*isinstance);
// Slow path, check for None if optional, and call
// perform isintance or subtype check
auto slow_path = block.cfg->AllocateBlock();
auto prev_false_bb = cond_branch.false_bb();
cond_branch.set_false_bb(slow_path);
cond_branch.SetOperand(0, fast_eq);
if (is_optional) {
auto not_none_slow_path = block.cfg->AllocateBlock();
auto none_reg = irfunc.env.AllocateRegister();
slow_path->appendWithOff<LoadConst>(
bc_off, none_reg, Type::fromObject(Py_None));
auto none_eq = irfunc.env.AllocateRegister();
slow_path->appendWithOff<PrimitiveCompare>(
bc_off, none_eq, PrimitiveCompareOp::kEqual, obj_op, none_reg);
slow_path->appendWithOff<CondBranch>(
bc_off, none_eq, cond_branch.true_bb(), not_none_slow_path);
slow_path = not_none_slow_path;
}
if (is_subtype) {
slow_path->appendWithOff<IsSubtype>(
bc_off, truthy->GetOutput(), obj_type, type_op);
} else {
slow_path->appendWithOff<IsInstance>(
bc_off,
truthy->GetOutput(),
obj_op,
type_op,
*get_frame_state(*truthy));
}
slow_path->appendWithOff<CondBranch>(
bc_off, truthy->GetOutput(), cond_branch.true_bb(), prev_false_bb);
// we need to update the phis from the previous false case to now
// be coming from the slow path block.
prev_false_bb->fixupPhis(&block, slow_path);
// and the phis coming in on the success case now have an extra
// block from the slow path.
cond_branch.true_bb()->addPhiPredecessor(&block, slow_path);
return compare_type;
}
Instr* DynamicComparisonElimination::ReplaceVectorCall(
Function& irfunc,
CondBranch& cond_branch,
BasicBlock& block,
VectorCall* vectorcall,
IsTruthy* truthy) {
auto func = vectorcall->func();
if (!func->type().hasValueSpec(TObject)) {
return nullptr;
}
InitBuiltins();
auto funcobj = func->type().objectSpec();
if (Py_TYPE(funcobj) == &PyCFunction_Type &&
PyCFunction_GET_FUNCTION(funcobj) == isinstance_func_ &&
vectorcall->numArgs() == 2 &&
vectorcall->GetOperand(2)->type() <= TType) {
return ReplaceTypeCheck(
irfunc,
cond_branch,
block,
vectorcall->GetOperand(1),
vectorcall->GetOperand(2),
vectorcall,
truthy,
false,
false);
}
return nullptr;
}
void DynamicComparisonElimination::Run(Function& irfunc) {
LivenessAnalysis liveness{irfunc};
liveness.Run();
auto last_uses = liveness.GetLastUses();
// Optimize "if x is y" case
for (auto& block : irfunc.cfg.blocks) {
auto& instr = block.back();
// Looking for:
// $some_conditional = ...
// $truthy = IsTruthy $compare
// CondBranch<x, y> $truthy
// Which we then re-write to a form which doesn't use IsTruthy anymore.
if (!instr.IsCondBranch()) {
continue;
}
Instr* truthy = instr.GetOperand(0)->instr();
if (!truthy->IsIsTruthy() || truthy->block() != &block) {
continue;
}
Instr* truthy_target = truthy->GetOperand(0)->instr();
if (truthy_target->block() != &block ||
(!truthy_target->IsCompare() && !truthy_target->IsVectorCall() &&
!truthy_target->IsCast())) {
continue;
}
auto& dying_regs = map_get(last_uses, truthy, kEmptyRegSet);
if (dying_regs.count(truthy->GetOperand(0)) == 0) {
// Compare output lives on, we can't re-write...
continue;
}
// Make sure the output of compare isn't getting used between the compare
// and the branch other than by the truthy instruction.
std::vector<Instr*> snapshots;
bool can_optimize = true;
for (auto it = std::next(block.rbegin()); it != block.rend(); ++it) {
if (&*it == truthy_target) {
break;
} else if (&*it != truthy) {
if (it->IsSnapshot()) {
if (it->Uses(truthy_target->GetOutput())) {
snapshots.push_back(&*it);
}
continue;
} else if (!it->isReplayable()) {
can_optimize = false;
break;
}
if (it->Uses(truthy->GetOperand(0))) {
can_optimize = false;
break;
}
}
}
if (!can_optimize) {
continue;
}
Instr* replacement = nullptr;
if (truthy_target->IsCompare()) {
auto compare = static_cast<Compare*>(truthy_target);
replacement = ReplaceCompare(compare, static_cast<IsTruthy*>(truthy));
} else if (truthy_target->IsVectorCall()) {
auto vectorcall = static_cast<VectorCall*>(truthy_target);
replacement = ReplaceVectorCall(
irfunc,
static_cast<CondBranch&>(instr),
block,
vectorcall,
static_cast<IsTruthy*>(truthy));
} else if (truthy_target->IsCast()) {
auto cast = static_cast<Cast*>(truthy_target);
if (!cast->iserror()) {
auto cast_type = irfunc.env.AllocateRegister();
auto init_type = LoadConst::create(
cast_type, Type::fromObject((PyObject*)cast->pytype()));
init_type->copyBytecodeOffset(*cast);
init_type->InsertBefore(*truthy);
replacement = ReplaceTypeCheck(
irfunc,
static_cast<CondBranch&>(instr),
block,
cast->GetOperand(0),
cast_type,
cast,
static_cast<IsTruthy*>(truthy),
true,
cast->optional());
}
}
if (replacement != nullptr) {
replacement->copyBytecodeOffset(instr);
truthy->ReplaceWith(*replacement);
truthy_target->unlink();
delete truthy_target;
delete truthy;
// There may be zero or more Snapshots between the Compare and the
// IsTruthy that uses the output of the Compare (which we want to delete).
// Since we're fusing the two operations together, the Snapshot and
// its use of the dead intermediate value should be deleted.
for (auto snapshot : snapshots) {
snapshot->unlink();
delete snapshot;
}
}
}
reflowTypes(irfunc);
}
void CopyPropagation::Run(Function& irfunc) {
std::vector<Instr*> assigns;
for (auto block : irfunc.cfg.GetRPOTraversal()) {
for (auto& instr : *block) {
instr.visitUses([](Register*& reg) {
while (reg->instr()->IsAssign()) {
reg = reg->instr()->GetOperand(0);
}
return true;
});
if (instr.IsAssign()) {
assigns.emplace_back(&instr);
}
}
}
for (auto instr : assigns) {
instr->unlink();
delete instr;
}
}
void PhiElimination::Run(Function& func) {
for (bool changed = true; changed;) {
changed = false;
for (auto& block : func.cfg.blocks) {
std::vector<Instr*> assigns;
for (auto it = block.begin(); it != block.end();) {
auto& instr = *it;
++it;
if (!instr.IsPhi()) {
for (auto assign : assigns) {
assign->InsertBefore(instr);
}
break;
}
if (auto value = static_cast<Phi&>(instr).isTrivial()) {
auto assign = Assign::create(instr.GetOutput(), value);
assign->copyBytecodeOffset(instr);
assigns.emplace_back(assign);
instr.unlink();
delete &instr;
changed = true;
}
}
}
CopyPropagation{}.Run(func);
}
// TODO(emacs): Investigate running the whole CleanCFG pass here or between
// every pass.
CleanCFG::RemoveTrampolineBlocks(&func.cfg);
}
static bool isUseful(Instr& instr) {
return instr.IsTerminator() || instr.IsSnapshot() ||
dynamic_cast<const DeoptBase*>(&instr) != nullptr ||
(!instr.IsPhi() && memoryEffects(instr).may_store != AEmpty);
}
void DeadCodeElimination::Run(Function& func) {
Worklist<Instr*> worklist;
for (auto& block : func.cfg.blocks) {
for (Instr& instr : block) {
if (isUseful(instr)) {
worklist.push(&instr);
}
}
}
std::unordered_set<Instr*> live_set;
while (!worklist.empty()) {
auto live_op = worklist.front();
worklist.pop();
if (live_set.insert(live_op).second) {
live_op->visitUses([&](Register*& reg) {
if (live_set.count(reg->instr()) == 0) {
worklist.push(reg->instr());
}
return true;
});
}
}
for (auto& block : func.cfg.blocks) {
for (auto it = block.begin(); it != block.end();) {
auto& instr = *it;
++it;
if (live_set.count(&instr) == 0) {
instr.unlink();
delete &instr;
}
}
}
}
using RegUses = std::unordered_map<Register*, std::unordered_set<Instr*>>;
static bool
guardNeeded(const RegUses& uses, Register* new_reg, Type relaxed_type) {
auto it = uses.find(new_reg);
if (it == uses.end()) {
// No uses; the guard is dead.
return false;
}
// Stores all Register->Type pairs to consider as the algorithm examines
// whether a guard is needed across passthrough + Phi instructions
std::queue<std::pair<Register*, Type>> worklist;
std::unordered_map<Register*, std::unordered_set<Type>> seen_state;
worklist.emplace(new_reg, relaxed_type);
seen_state[new_reg].insert(relaxed_type);
while (!worklist.empty()) {
std::pair<Register*, Type> args = worklist.front();
worklist.pop();
new_reg = args.first;
relaxed_type = args.second;
auto new_reg_uses = uses.find(new_reg);
if (new_reg_uses == uses.end()) {
continue;
}
for (const Instr* instr : new_reg_uses->second) {
for (std::size_t i = 0; i < instr->NumOperands(); i++) {
if (instr->GetOperand(i) == new_reg) {
if ((instr->GetOutput() != nullptr) &&
(instr->IsPhi() || isPassthrough(*instr))) {
Register* passthrough_output = instr->GetOutput();
Type passthrough_type = outputType(*instr, [&](std::size_t ind) {
if (ind == i) {
return relaxed_type;
}
return instr->GetOperand(ind)->type();
});
if (seen_state[passthrough_output]
.insert(passthrough_type)
.second) {
worklist.emplace(passthrough_output, passthrough_type);
}
}
OperandType expected_type = instr->GetOperandType(i);
// TODO(T106726658): We should be able to remove GuardTypes if we ever
// add a matching constraint for non-Primitive types, and our
// GuardType adds an unnecessary refinement. Since we cannot guard on
// primitive types yet, this should never happen
if (operandsMustMatch(expected_type)) {
JIT_DLOG(
"'%s' kept alive by primitive '%s'", *new_reg->instr(), *instr);
return true;
}
if (!registerTypeMatches(relaxed_type, expected_type)) {
JIT_DLOG("'%s' kept alive by '%s'", *new_reg->instr(), *instr);
return true;
}
}
}
}
}
return false;
}
// Collect direct operand uses of all Registers in the given func, excluding
// uses in FrameState or other metadata.
static RegUses collectDirectRegUses(Function& func) {
RegUses uses;
for (auto& block : func.cfg.blocks) {
for (Instr& instr : block) {
for (size_t i = 0; i < instr.NumOperands(); ++i) {
uses[instr.GetOperand(i)].insert(&instr);
}
}
}
return uses;
}
void GuardTypeRemoval::Run(Function& func) {
RegUses reg_uses = collectDirectRegUses(func);
std::vector<std::unique_ptr<Instr>> removed_guards;
for (auto& block : func.cfg.blocks) {
for (auto it = block.begin(); it != block.end();) {
auto& instr = *it;
++it;
if (!instr.IsGuardType()) {
continue;
}
Register* guard_out = instr.GetOutput();
Register* guard_in = instr.GetOperand(0);
if (!guardNeeded(reg_uses, guard_out, guard_in->type())) {
auto assign = Assign::create(guard_out, guard_in);
assign->copyBytecodeOffset(instr);
instr.ReplaceWith(*assign);
removed_guards.emplace_back(&instr);
}
}
}
CopyPropagation{}.Run(func);
reflowTypes(func);
}
static bool absorbDstBlock(BasicBlock* block) {
auto branch = dynamic_cast<Branch*>(block->GetTerminator());
if (!branch) {
return false;
}
BasicBlock* target = branch->target();
if (target == block) {
return false;
}
if (target->in_edges().size() != 1) {
return false;
}
if (target == block) {
return false;
}
branch->unlink();
while (!target->empty()) {
Instr* instr = target->pop_front();
JIT_CHECK(!instr->IsPhi(), "Expected no Phi but found %s", *instr);
block->Append(instr);
}
// The successors to target might have Phis that still refer to target.
// Retarget them to refer to block.
Instr* old_term = block->GetTerminator();
JIT_CHECK(old_term != nullptr, "block must have a terminator");
for (std::size_t i = 0, n = old_term->numEdges(); i < n; ++i) {
old_term->successor(i)->fixupPhis(
/*old_pred=*/target, /*new_pred=*/block);
}
// Target block becomes unreachable and gets picked up by
// RemoveUnreachableBlocks.
delete branch;
return true;
}
bool CleanCFG::RemoveUnreachableBlocks(CFG* cfg) {
std::unordered_set<BasicBlock*> visited;
std::vector<BasicBlock*> stack;
stack.emplace_back(cfg->entry_block);
while (!stack.empty()) {
BasicBlock* block = stack.back();
stack.pop_back();
if (visited.count(block)) {
continue;
}
visited.insert(block);
auto term = block->GetTerminator();
for (std::size_t i = 0, n = term->numEdges(); i < n; ++i) {
BasicBlock* succ = term->successor(i);
// This check isn't necessary for correctness but avoids unnecessary
// pushes to the stack.
if (!visited.count(succ)) {
stack.emplace_back(succ);
}
}
}
std::vector<BasicBlock*> unreachable;
for (auto it = cfg->blocks.begin(); it != cfg->blocks.end();) {
BasicBlock* block = &*it;
++it;
if (!visited.count(block)) {
if (Instr* old_term = block->GetTerminator()) {
for (std::size_t i = 0, n = old_term->numEdges(); i < n; ++i) {
old_term->successor(i)->removePhiPredecessor(block);
}
}
cfg->RemoveBlock(block);
block->clear();
unreachable.emplace_back(block);
}
}
for (BasicBlock* block : unreachable) {
delete block;
}
return unreachable.size() > 0;
}
// Replace cond branches where both sides of branch go to the same block with a
// direct branch
// TODO(emacs): Move to Simplify
static void simplifyRedundantCondBranches(CFG* cfg) {
std::vector<BasicBlock*> to_simplify;
for (auto& block : cfg->blocks) {
if (block.empty()) {
continue;
}
auto term = block.GetTerminator();
std::size_t num_edges = term->numEdges();
if (num_edges < 2) {
continue;
}
JIT_CHECK(num_edges == 2, "only two edges are supported");
if (term->successor(0) != term->successor(1)) {
continue;
}
switch (term->opcode()) {
case Opcode::kCondBranch:
case Opcode::kCondBranchIterNotDone:
case Opcode::kCondBranchCheckType:
break;
default:
// Can't be sure that it's safe to replace the instruction with a branch
JIT_CHECK(
false, "unknown side effects of %s instruction", term->opname());
break;
}
to_simplify.emplace_back(&block);
}
for (auto& block : to_simplify) {
auto term = block->GetTerminator();
term->unlink();
auto branch = block->appendWithOff<Branch>(
term->bytecodeOffset(), term->successor(0));
branch->copyBytecodeOffset(*term);
delete term;
}
}
bool CleanCFG::RemoveTrampolineBlocks(CFG* cfg) {
std::vector<BasicBlock*> trampolines;
for (auto& block : cfg->blocks) {
if (!block.IsTrampoline()) {
continue;
}
BasicBlock* succ = block.successor(0);
// if this is the entry block and its successor has multiple
// predecessors, don't remove it; it's necessary to maintain isolated
// entries
if (&block == cfg->entry_block) {
if (succ->in_edges().size() > 1) {
continue;
} else {
cfg->entry_block = succ;
}
}
// Update all predecessors to jump directly to our successor
block.retargetPreds(succ);
// Finish splicing the trampoline out of the cfg
block.set_successor(0, nullptr);
trampolines.emplace_back(&block);
}
for (auto& block : trampolines) {
cfg->RemoveBlock(block);
delete block;
}
simplifyRedundantCondBranches(cfg);
return trampolines.size() > 0;
}
void CleanCFG::Run(Function& irfunc) {
bool changed = false;
do {
// Remove any trivial Phis; absorbDstBlock cannot handle them.
PhiElimination{}.Run(irfunc);
std::vector<BasicBlock*> blocks = irfunc.cfg.GetRPOTraversal();
for (auto block : blocks) {
// Ignore transient empty blocks.
if (block->empty()) {
continue;
}
// Keep working on the current block until no further changes are made.
for (;; changed = true) {
if (absorbDstBlock(block)) {
continue;
}
break;
}
}
} while (RemoveUnreachableBlocks(&irfunc.cfg));
if (changed) {
reflowTypes(irfunc);
}
}
struct AbstractCall {
AbstractCall(PyFunctionObject* func, size_t nargs, DeoptBase* instr)
: func(func), nargs(nargs), instr(instr) {}
AbstractCall(Register* target, size_t nargs, DeoptBase* instr)
: target(target),
func(reinterpret_cast<PyFunctionObject*>(target->type().objectSpec())),
nargs(nargs),
instr(instr) {}
Register* arg(std::size_t i) const {
if (auto f = dynamic_cast<InvokeStaticFunction*>(instr)) {
return f->arg(i);
}
if (auto f = dynamic_cast<VectorCallBase*>(instr)) {
return f->arg(i);
}
JIT_CHECK(false, "unsupported call type %s", instr->opname());
}
Register* target{nullptr};
BorrowedRef<PyFunctionObject> func{nullptr};
size_t nargs{0};
DeoptBase* instr{nullptr};
};
// Most of these checks are only temporary and do not in perpetuity prohibit
// inlining. They are here to simplify bringup of the inliner and can be
// treated as TODOs.
static bool canInline(
AbstractCall* call_instr,
PyFunctionObject* func,
const std::string& fullname) {
if (func->func_defaults != nullptr) {
JIT_DLOG("Can't inline %s because it has defaults", fullname);
return false;
}
if (func->func_kwdefaults != nullptr) {
JIT_DLOG("Can't inline %s because it has kwdefaults", fullname);
return false;
}
PyCodeObject* code = reinterpret_cast<PyCodeObject*>(func->func_code);
if (code->co_kwonlyargcount > 0) {
JIT_DLOG("Can't inline %s because it has keyword-only args", fullname);
return false;
}
if (code->co_flags & CO_VARARGS) {
JIT_DLOG("Can't inline %s because it has varargs", fullname);
return false;
}
if (code->co_flags & CO_VARKEYWORDS) {
JIT_DLOG("Can't inline %s because it has varkwargs", fullname);
return false;
}
JIT_DCHECK(code->co_argcount >= 0, "argcount must be positive");
if (call_instr->nargs != static_cast<size_t>(code->co_argcount)) {
JIT_DLOG(
"Can't inline %s because it is called with mismatched arguments",
fullname);
return false;
}
if (code->co_flags & kCoFlagsAnyGenerator) {
JIT_DLOG("Can't inline %s because it is a generator", fullname);
return false;
}
Py_ssize_t ncellvars = PyTuple_GET_SIZE(code->co_cellvars);
if (ncellvars > 0) {
JIT_DLOG("Can't inline %s because it is has cellvars", fullname);
return false;
}
Py_ssize_t nfreevars = PyTuple_GET_SIZE(code->co_freevars);
if (nfreevars > 0) {
JIT_DLOG("Can't inline %s because it is has freevars", fullname);
return false;
}
if (usesRuntimeFunc(code)) {
JIT_DLOG(
"Can't inline %s because it needs runtime access to its "
"PyFunctionObject",
fullname);
return false;
}
if (g_threaded_compile_context.compileRunning() && !isPreloaded(func)) {
JIT_DLOG(
"Can't inline %s because multithreaded compile is enabled and the "
"function is not preloaded",
fullname);
return false;
}
return true;
}
void inlineFunctionCall(Function& caller, AbstractCall* call_instr) {
PyFunctionObject* func = call_instr->func;
PyCodeObject* code = reinterpret_cast<PyCodeObject*>(func->func_code);
JIT_CHECK(PyCode_Check(code), "Expected PyCodeObject");
PyObject* globals = func->func_globals;
std::string fullname = funcFullname(func);
if (!PyDict_Check(globals)) {
JIT_DLOG(
"Refusing to inline %s: globals is a %.200s, not a dict",
fullname,
Py_TYPE(globals)->tp_name);
return;
}
PyObject* builtins = PyEval_GetBuiltins();
if (!PyDict_CheckExact(builtins)) {
JIT_DLOG(
"Refusing to inline %s: builtins is a %.200s, not a dict",
fullname,
Py_TYPE(builtins)->tp_name);
return;
}
if (!canInline(call_instr, func, fullname)) {
JIT_DLOG("Cannot inline %s into %s", fullname, caller.fullname);
return;
}
auto caller_frame_state =
std::make_unique<FrameState>(*call_instr->instr->frameState());
// Multi-threaded compilation must use an existing Preloader, whereas
// single-threaded compilation can make Preloaders on the fly.
InlineResult result;
if (g_threaded_compile_context.compileRunning()) {
HIRBuilder hir_builder(getPreloader(func));
result = hir_builder.inlineHIR(&caller, caller_frame_state.get());
} else {
// This explicit temporary is necessary because HIRBuilder takes a const
// reference and stores it and we need to make sure the target doesn't go
// away.
Preloader preloader(func);
HIRBuilder hir_builder(preloader);
result = hir_builder.inlineHIR(&caller, caller_frame_state.get());
}
if (result.entry == nullptr) {
JIT_DLOG("Cannot inline %s into %s", fullname, caller.fullname);
return;
}
BasicBlock* head = call_instr->instr->block();
BasicBlock* tail = head->splitAfter(*call_instr->instr);
auto begin_inlined_function = BeginInlinedFunction::create(
code, globals, std::move(caller_frame_state), fullname);
auto callee_branch = Branch::create(result.entry);
if (call_instr->target != nullptr) {
// Not a static call. Check that __code__ has not been swapped out since
// the function was inlined.
// VectorCall -> {LoadField, GuardIs, BeginInlinedFunction, Branch to
// callee CFG}
// TODO(emacs): Emit a DeoptPatchpoint here to catch the case where someone
// swaps out function.__code__.
Register* code_obj = caller.env.AllocateRegister();
auto load_code = LoadField::create(
code_obj,
call_instr->target,
"func_code",
offsetof(PyFunctionObject, func_code),
TObject);
Register* guarded_code = caller.env.AllocateRegister();
auto guard_code = GuardIs::create(
guarded_code, reinterpret_cast<PyObject*>(code), code_obj);
call_instr->instr->ExpandInto(
{load_code, guard_code, begin_inlined_function, callee_branch});
} else {
call_instr->instr->ExpandInto({begin_inlined_function, callee_branch});
}
tail->push_front(
EndInlinedFunction::create(begin_inlined_function->inlineDepth()));
// Transform LoadArg into Assign
for (auto it = result.entry->begin(); it != result.entry->end();) {
auto& instr = *it;
++it;
if (instr.IsLoadArg()) {
auto load_arg = static_cast<LoadArg*>(&instr);
auto assign = Assign::create(
instr.GetOutput(), call_instr->arg(load_arg->arg_idx()));
instr.ReplaceWith(*assign);
delete &instr;
}
}
// Transform Return into Assign+Branch
auto return_instr = result.exit->GetTerminator();
JIT_CHECK(
return_instr->IsReturn(),
"terminator from inlined function should be Return");
auto assign = Assign::create(
call_instr->instr->GetOutput(), return_instr->GetOperand(0));
auto return_branch = Branch::create(tail);
return_instr->ExpandInto({assign, return_branch});
delete return_instr;
delete call_instr->instr;
}
void InlineFunctionCalls::Run(Function& irfunc) {
if (irfunc.code == nullptr) {
// In tests, irfunc may not have bytecode.
return;
}
if (irfunc.code->co_flags & kCoFlagsAnyGenerator) {
// TODO(T109706798): Support inlining into generators
JIT_DLOG(
"Refusing to inline functions into %s: function is a generator",
irfunc.fullname);
return;
}
std::vector<AbstractCall> to_inline;
for (auto& block : irfunc.cfg.blocks) {
for (auto& instr : block) {
// TODO(emacs): Support InvokeMethod
if (instr.IsVectorCall() || instr.IsVectorCallStatic()) {
auto call = static_cast<VectorCallBase*>(&instr);
Register* target = call->func();
if (!target->type().hasValueSpec(TFunc)) {
JIT_DLOG(
"Cannot inline non-function type %s (%s) into %s",
target->type(),
*target,
irfunc.fullname);
continue;
}
to_inline.emplace_back(AbstractCall(target, call->numArgs(), call));
} else if (instr.IsInvokeStaticFunction()) {
auto call = static_cast<InvokeStaticFunction*>(&instr);
to_inline.emplace_back(
AbstractCall(call->func(), call->NumArgs(), call));
}
}
}
if (to_inline.empty()) {
return;
}
for (auto& instr : to_inline) {
inlineFunctionCall(irfunc, &instr);
// We need to reflow types after every inline to propagate new type
// information from the callee.
reflowTypes(irfunc);
}
// The inliner will make some blocks unreachable and we need to remove them
// to make the CFG valid again. While inlining might make some blocks
// unreachable and therefore make less work (less to inline), we cannot
// remove unreachable blocks in the above loop. It might delete instructions
// pointed to by `to_inline`.
CopyPropagation{}.Run(irfunc);
CleanCFG{}.Run(irfunc);
}
} // namespace hir
} // namespace jit