Jit/hir/simplify.cpp

// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) #include "Jit/hir/optimization.h" #include "Jit/hir/printer.h" #include "Jit/hir/ssa.h" namespace jit { namespace hir { // This file contains the Simplify pass, which is a collection of // strength-reduction optimizations. An optimization should be added as a case // in Simplify rather than a standalone pass if and only if it meets these // criteria: // - It operates on one instruction at a time, with no global analysis or // state. // - Optimizable instructions are replaced with 0 or more new instructions that // define an equivalent value while doing less work. // // To add support for a new instruction Foo, add a function simplifyFoo(Env& // env, const Foo* instr) (env can be left out if you don't need it) containing // the optimization and call it from a new case in // simplifyInstr(). simplifyFoo() should analyze the given instruction, then do // one of the following: // - If the instruction is not optimizable, return nullptr and do not call any // functions on env. // - If the instruction is redundant and can be elided, return the existing // value that should replace its output (this is often one of the // instruction's inputs). // - If the instruction can be replaced with a cheaper sequence of // instructions, emit those instructions using env.emit<T>(...). For // instructions that define an output, emit<T> will allocate and return an // appropriately-typed Register* for you, to ease chaining multiple // instructions. As with the previous case, return the Register* that should // replace the current output of the instruction. // - If the instruction can be elided but does not produce an output, set // env.optimized = true and return nullptr. // // Do not modify, unlink, or delete the existing instruction; all of those // details are handled by existing code outside of the individual optimization // functions. namespace { struct Env { Env(Function& f) : func{f}, type_object( Type::fromObject(reinterpret_cast<PyObject*>(&PyType_Type))) {} // The current function. Function& func; // The current block being emitted into. Might not be the block originally // containing the instruction being optimized, if more blocks have been // inserted by the simplify function. BasicBlock* block{nullptr}; // Insertion cursor for new instructions. Must belong to block's Instr::List, // and except for brief critical sections during emit functions on Env, // should always point to the original, unoptimized instruction. Instr::List::iterator cursor; // Bytecode instruction of the instruction being optimized, automatically set // on all replacement instructions. int bc_off{-1}; // Set to true by emit<T>() to indicate that the original instruction should // be removed. bool optimized{false}; // The object that corresponds to "type". Type type_object{TTop}; // Create and insert the specified instruction. If the instruction has an // output, a new Register* will be created and returned. template <typename T, typename... Args> Register* emit(Args&&... args) { if constexpr (T::has_output) { return emitRaw<T>( func.env.AllocateRegister(), std::forward<Args>(args)...); } else { return emitRaw<T>(std::forward<Args>(args)...); } } // Similar to emit<T>(), but does not automatically create an output // register. template <typename T, typename... Args> Register* emitRaw(Args&&... args) { optimized = true; T* instr = T::create(std::forward<Args>(args)...); instr->setBytecodeOffset(bc_off); block->insert(instr, cursor); if constexpr (T::has_output) { Register* output = instr->GetOutput(); output->set_type(outputType(*instr)); return output; } return nullptr; } // Create and return a conditional value. Expects three callables: // - do_branch is given two BasicBlock* and should emit a conditional branch // instruction using them. // - do_bb1 should emit code for the first successor, returning the computed // value. // - do_bb2 should do the same for the second successor. template <typename BranchFn, typename Bb1Fn, typename Bb2Fn> Register* emitCond(BranchFn do_branch, Bb1Fn do_bb1, Bb2Fn do_bb2) { BasicBlock* bb1 = func.cfg.AllocateBlock(); BasicBlock* bb2 = func.cfg.AllocateBlock(); do_branch(bb1, bb2); JIT_CHECK( cursor != block->begin(), "block should not be empty after calling do_branch()"); BasicBlock* tail = block->splitAfter(*std::prev(cursor)); block = bb1; cursor = bb1->end(); Register* bb1_reg = do_bb1(); emit<Branch>(tail); block = bb2; cursor = bb2->end(); Register* bb2_reg = do_bb2(); emit<Branch>(tail); block = tail; cursor = tail->begin(); std::unordered_map<BasicBlock*, Register*> phi_srcs{ {bb1, bb1_reg}, {bb2, bb2_reg}, }; return emit<Phi>(phi_srcs); } }; Register* simplifyCheck(const CheckBase* instr) { // These all check their input for null. if (instr->GetOperand(0)->isA(TObject)) { // No UseType is necessary because we never guard potentially-null values. return instr->GetOperand(0); } return nullptr; } Register* simplifyGuardType(Env& env, const GuardType* instr) { Register* input = instr->GetOperand(0); Type type = instr->target(); if (input->isA(type)) { // We don't need a UseType: If an instruction cares about the type of this // GuardType's output, it will express that through its operand type // constraints. Once this GuardType is removed, those constraints will // apply to input's instruction rather than this GuardType, and any // downstream instructions will still be satisfied. return input; } if (type == TNoneType) { return env.emit<GuardIs>(Py_None, input); } return nullptr; } Register* simplifyRefineType(const RefineType* instr) { Register* input = instr->GetOperand(0); if (input->isA(instr->type())) { // No UseType for the same reason as GuardType above: RefineType itself // doesn't care about the input's type, only users of its output do, and // they're unchanged. return input; } return nullptr; } Register* simplifyIntConvert(Env& env, const IntConvert* instr) { Register* src = instr->GetOperand(0); if (src->isA(instr->type())) { env.emit<UseType>(src, instr->type()); return instr->GetOperand(0); } return nullptr; } Register* simplifyCompare(Env& env, const Compare* instr) { Register* left = instr->GetOperand(0); Register* right = instr->GetOperand(1); CompareOp op = instr->op(); if (op == CompareOp::kIs || op == CompareOp::kIsNot) { Type left_t = left->type(); Type right_t = right->type(); if (!left_t.couldBe(right_t)) { env.emit<UseType>(left, left_t); env.emit<UseType>(right, right_t); return env.emit<LoadConst>( Type::fromObject(op == CompareOp::kIs ? Py_False : Py_True)); } PyObject* left_t_obj = left_t.asObject(); PyObject* right_t_obj = right_t.asObject(); if (left_t_obj != nullptr && right_t_obj != nullptr) { env.emit<UseType>(left, left_t); env.emit<UseType>(right, right_t); bool same_obj = left_t_obj == right_t_obj; bool truthy = (op == CompareOp::kIs) == same_obj; return env.emit<LoadConst>(Type::fromObject(truthy ? Py_True : Py_False)); } auto cbool = env.emit<PrimitiveCompare>( instr->op() == CompareOp::kIs ? PrimitiveCompareOp::kEqual : PrimitiveCompareOp::kNotEqual, instr->left(), instr->right()); return env.emit<PrimitiveBox>(cbool, TCBool); } if (left->isA(TNoneType) && right->isA(TNoneType)) { if (op == CompareOp::kEqual || op == CompareOp::kNotEqual) { env.emit<UseType>(left, TNoneType); env.emit<UseType>(right, TNoneType); return env.emit<LoadConst>( Type::fromObject(op == CompareOp::kEqual ? Py_True : Py_False)); } } // Emit LongCompare if both args are LongExact and the op is supported // between two longs. if (left->isA(TLongExact) && right->isA(TLongExact) && !(op == CompareOp::kIn || op == CompareOp::kNotIn || op == CompareOp::kExcMatch)) { return env.emit<LongCompare>(instr->op(), left, right); } return nullptr; } Register* simplifyCondBranch(Env& env, const CondBranch* instr) { Type op_type = instr->GetOperand(0)->type(); if (op_type.hasIntSpec()) { if (op_type.intSpec() == 0) { return env.emit<Branch>(instr->false_bb()); } return env.emit<Branch>(instr->true_bb()); } return nullptr; } Register* simplifyCondBranchCheckType( Env& env, const CondBranchCheckType* instr) { Register* value = instr->GetOperand(0); Type actual_type = value->type(); Type expected_type = instr->type(); if (actual_type <= expected_type) { env.emit<UseType>(value, actual_type); return env.emit<Branch>(instr->true_bb()); } if (!actual_type.couldBe(expected_type)) { env.emit<UseType>(value, actual_type); return env.emit<Branch>(instr->false_bb()); } return nullptr; } Register* simplifyIsTruthy(Env& env, const IsTruthy* instr) { Type ty = instr->GetOperand(0)->type(); PyObject* obj = ty.asObject(); if (obj != nullptr) { // Should only consider immutable Objects static const std::unordered_set<PyTypeObject*> kTrustedTypes{ &PyBool_Type, &PyFloat_Type, &PyLong_Type, &PyFrozenSet_Type, &PySlice_Type, &PyTuple_Type, &PyUnicode_Type, &_PyNone_Type, }; if (kTrustedTypes.count(Py_TYPE(obj))) { int res = PyObject_IsTrue(obj); JIT_CHECK(res >= 0, "PyObject_IsTrue failed on trusted type"); // Since we no longer use instr->GetOperand(0), we need to make sure that // we don't lose any associated type checks env.emit<UseType>(instr->GetOperand(0), ty); Type output_type = instr->GetOutput()->type(); return env.emit<LoadConst>(Type::fromCInt(res, output_type)); } } if (ty <= TBool) { Register* left = instr->GetOperand(0); env.emit<UseType>(left, TBool); Register* right = env.emit<LoadConst>(Type::fromObject(Py_True)); Register* result = env.emit<PrimitiveCompare>(PrimitiveCompareOp::kEqual, left, right); return env.emit<IntConvert>(result, TCInt32); } if (ty <= TListExact || ty <= TTupleExact || ty <= TArray) { Register* obj = instr->GetOperand(0); env.emit<UseType>(obj, ty); Register* size = env.emit<LoadField>( obj, "ob_size", offsetof(PyVarObject, ob_size), TCInt64); return env.emit<IntConvert>(size, TCInt32); } if (ty <= TDictExact || ty <= TSetExact || ty <= TUnicodeExact) { Register* obj = instr->GetOperand(0); env.emit<UseType>(obj, ty.unspecialized()); std::size_t offset = 0; const char* name = nullptr; if (ty <= TDictExact) { offset = offsetof(PyDictObject, ma_used); name = "ma_used"; } else if (ty <= TSetExact) { offset = offsetof(PySetObject, used); name = "used"; } else if (ty <= TUnicodeExact) { // Note: In debug mode, the interpreter has an assert that ensures the // string is "ready", check PyUnicode_GET_LENGTH for strings. offset = offsetof(PyASCIIObject, length); name = "length"; } else { JIT_CHECK(false, "unexpected type"); } Register* size = env.emit<LoadField>(obj, name, offset, TCInt64); return env.emit<IntConvert>(size, TCInt32); } if (ty <= TLongExact) { Register* left = instr->GetOperand(0); env.emit<UseType>(left, ty); // Zero is canonical as a "small int" in CPython. ThreadedCompileSerialize guard; auto zero = Ref<>::steal(PyLong_FromLong(0)); Register* right = env.emit<LoadConst>( Type::fromObject(env.func.env.addReference(std::move(zero)))); Register* result = env.emit<PrimitiveCompare>(PrimitiveCompareOp::kNotEqual, left, right); return env.emit<IntConvert>(result, TCInt32); } return nullptr; } Register* simplifyLoadTupleItem(Env& env, const LoadTupleItem* instr) { Register* src = instr->GetOperand(0); Type src_ty = src->type(); if (!src_ty.hasValueSpec(TTuple)) { return nullptr; } env.emit<UseType>(src, src_ty); return env.emit<LoadConst>( Type::fromObject(PyTuple_GET_ITEM(src_ty.objectSpec(), instr->idx()))); } Register* simplifyBinaryOp(Env& env, const BinaryOp* instr) { Register* lhs = instr->left(); Register* rhs = instr->right(); if (instr->op() == BinaryOpKind::kSubscript) { if (!rhs->isA(TLongExact)) { return nullptr; } Type lhs_type = lhs->type(); Type rhs_type = rhs->type(); if (lhs_type <= TTupleExact && lhs_type.hasObjectSpec() && rhs_type.hasObjectSpec()) { int overflow; Py_ssize_t index = PyLong_AsLongAndOverflow(rhs_type.objectSpec(), &overflow); if (!overflow) { PyObject* lhs_obj = lhs_type.objectSpec(); if (index >= 0 && index < PyTuple_GET_SIZE(lhs_obj)) { ThreadedCompileSerialize guard; PyObject* item = PyTuple_GET_ITEM(lhs_obj, index); env.emit<UseType>(lhs, lhs_type); env.emit<UseType>(rhs, rhs_type); return env.emit<LoadConst>( Type::fromObject(env.func.env.addReference(Ref(item)))); } // Fallthrough } // Fallthrough } if (lhs->isA(TListExact) || lhs->isA(TTupleExact)) { // TODO(T93509109): Replace TCInt64 with a less platform-specific // representation of the type, which should be analagous to Py_ssize_t. env.emit<UseType>(lhs, lhs->isA(TListExact) ? TListExact : TTupleExact); env.emit<UseType>(rhs, TLongExact); Register* right_index = env.emit<PrimitiveUnbox>(rhs, TCInt64); Register* adjusted_idx = env.emit<CheckSequenceBounds>(lhs, right_index, *instr->frameState()); ssize_t offset = offsetof(PyTupleObject, ob_item); Register* array = lhs; // Lists carry a nested array of ob_item whereas tuples are variable-sized // structs. if (lhs->isA(TListExact)) { array = env.emit<LoadField>( lhs, "ob_item", offsetof(PyListObject, ob_item), TCPtr); offset = 0; } return env.emit<LoadArrayItem>(array, adjusted_idx, lhs, offset, TObject); } } if (lhs->isA(TLongExact) && rhs->isA(TLongExact)) { if (instr->op() == BinaryOpKind::kMatrixMultiply && instr->op() == BinaryOpKind::kSubscript) { // These will generate an error at runtime. return nullptr; } env.emit<UseType>(lhs, TLongExact); env.emit<UseType>(rhs, TLongExact); return env.emit<LongBinaryOp>(instr->op(), lhs, rhs, *instr->frameState()); } // Unsupported case. return nullptr; } Register* simplifyPrimitiveUnbox(Env& env, const PrimitiveUnbox* instr) { Register* unboxed_value = instr->GetOperand(0); Type unbox_output_type = instr->GetOutput()->type(); // Ensure that we are dealing with either a integer or a double. Type unboxed_value_type = unboxed_value->type(); if (!(unboxed_value_type.hasObjectSpec())) { return nullptr; } PyObject* value = unboxed_value_type.objectSpec(); if (unbox_output_type <= (TCSigned | TCUnsigned)) { if (!PyLong_Check(value)) { return nullptr; } int overflow = 0; long number = PyLong_AsLongAndOverflow(unboxed_value_type.objectSpec(), &overflow); if (overflow != 0) { return nullptr; } if (unbox_output_type <= TCSigned) { if (!Type::CIntFitsType(number, unbox_output_type)) { return nullptr; } return env.emit<LoadConst>(Type::fromCInt(number, unbox_output_type)); } else { if (!Type::CUIntFitsType(number, unbox_output_type)) { return nullptr; } return env.emit<LoadConst>(Type::fromCUInt(number, unbox_output_type)); } } else if (unbox_output_type <= TCDouble) { if (!PyFloat_Check(value)) { return nullptr; } double number = PyFloat_AS_DOUBLE(unboxed_value_type.objectSpec()); return env.emit<LoadConst>(Type::fromCDouble(number)); } return nullptr; } Register* simplifyLoadAttr(Env& env, const LoadAttr* load_attr) { Register* receiver = load_attr->GetOperand(0); if (!receiver->isA(TType)) { return nullptr; } const int cache_id = env.func.env.allocateLoadAttrCache(); env.emit<UseType>(receiver, TType); Register* guard = env.emit<LoadTypeAttrCacheItem>(cache_id, 0); Register* type_matches = env.emit<PrimitiveCompare>(PrimitiveCompareOp::kEqual, guard, receiver); return env.emitCond( [&](BasicBlock* fast_path, BasicBlock* slow_path) { env.emit<CondBranch>(type_matches, fast_path, slow_path); }, [&] { // Fast path return env.emit<LoadTypeAttrCacheItem>(cache_id, 1); }, [&] { // Slow path int name_idx = load_attr->name_idx(); return env.emit<FillTypeAttrCache>( receiver, name_idx, cache_id, *load_attr->frameState()); }); } // If we're loading ob_fval from a known float into a double, this can be // simplified into a LoadConst. Register* simplifyLoadField(Env& env, const LoadField* instr) { Register* loadee = instr->GetOperand(0); Type load_output_type = instr->GetOutput()->type(); // Ensure that we are dealing with either a integer or a double. Type loadee_type = loadee->type(); if (!loadee_type.hasObjectSpec()) { return nullptr; } PyObject* value = loadee_type.objectSpec(); if (PyFloat_Check(value) && load_output_type <= TCDouble && instr->offset() == offsetof(PyFloatObject, ob_fval)) { double number = PyFloat_AS_DOUBLE(loadee_type.objectSpec()); env.emit<UseType>(loadee, loadee_type); return env.emit<LoadConst>(Type::fromCDouble(number)); } return nullptr; } Register* simplifyIsNegativeAndErrOccurred( Env& env, const IsNegativeAndErrOccurred* instr) { if (!instr->GetOperand(0)->instr()->IsLoadConst()) { return nullptr; } // Other optimizations might reduce the strength of global loads, etc. to load // consts. If this is the case, we know that there can't be an active // exception. In this case, the IsNegativeAndErrOccurred instruction has a // known result. Instead of deleting it, we replace it with load of false - // the idea is that if there are other downstream consumers of it, they will // still have access to the result. Otherwise, DCE will take care of this. Type output_type = instr->GetOutput()->type(); return env.emit<LoadConst>(Type::fromCInt(0, output_type)); } Register* simplifyVectorCall(Env& env, const VectorCall* instr) { Register* target = instr->GetOperand(0); Type target_type = target->type(); if (target_type == env.type_object && instr->NumOperands() == 2) { env.emit<UseType>(target, env.type_object); return env.emit<LoadField>( instr->GetOperand(1), "ob_type", offsetof(PyObject, ob_type), TType); } return nullptr; } Register* simplifyInstr(Env& env, const Instr* instr) { switch (instr->opcode()) { case Opcode::kCheckVar: case Opcode::kCheckExc: case Opcode::kCheckField: return simplifyCheck(static_cast<const CheckBase*>(instr)); case Opcode::kGuardType: return simplifyGuardType(env, static_cast<const GuardType*>(instr)); case Opcode::kRefineType: return simplifyRefineType(static_cast<const RefineType*>(instr)); case Opcode::kCompare: return simplifyCompare(env, static_cast<const Compare*>(instr)); case Opcode::kCondBranch: return simplifyCondBranch(env, static_cast<const CondBranch*>(instr)); case Opcode::kCondBranchCheckType: return simplifyCondBranchCheckType( env, static_cast<const CondBranchCheckType*>(instr)); case Opcode::kIntConvert: return simplifyIntConvert(env, static_cast<const IntConvert*>(instr)); case Opcode::kIsTruthy: return simplifyIsTruthy(env, static_cast<const IsTruthy*>(instr)); case Opcode::kLoadAttr: return simplifyLoadAttr(env, static_cast<const LoadAttr*>(instr)); case Opcode::kLoadField: return simplifyLoadField(env, static_cast<const LoadField*>(instr)); case Opcode::kLoadTupleItem: return simplifyLoadTupleItem( env, static_cast<const LoadTupleItem*>(instr)); case Opcode::kBinaryOp: return simplifyBinaryOp(env, static_cast<const BinaryOp*>(instr)); case Opcode::kPrimitiveUnbox: return simplifyPrimitiveUnbox( env, static_cast<const PrimitiveUnbox*>(instr)); case Opcode::kIsNegativeAndErrOccurred: return simplifyIsNegativeAndErrOccurred( env, static_cast<const IsNegativeAndErrOccurred*>(instr)); case Opcode::kVectorCall: return simplifyVectorCall(env, static_cast<const VectorCall*>(instr)); default: return nullptr; } } } // namespace void Simplify::Run(Function& irfunc) { Env env{irfunc}; bool changed; do { changed = false; for (auto cfg_it = irfunc.cfg.blocks.begin(); cfg_it != irfunc.cfg.blocks.end();) { BasicBlock& block = *cfg_it; ++cfg_it; env.block = &block; for (auto blk_it = block.begin(); blk_it != block.end();) { Instr& instr = *blk_it; ++blk_it; env.optimized = false; env.cursor = block.iterator_to(instr); env.bc_off = instr.bytecodeOffset(); Register* new_output = simplifyInstr(env, &instr); JIT_CHECK( env.cursor == env.block->iterator_to(instr), "Simplify functions are expected to leave env.cursor pointing to " "the original instruction, with new instructions inserted before " "it."); if (new_output == nullptr && !env.optimized) { continue; } changed = true; JIT_CHECK( (new_output == nullptr) == (instr.GetOutput() == nullptr), "Simplify function should return a new output if and only if the " "existing instruction has an output"); if (new_output != nullptr) { JIT_CHECK( new_output->type() <= instr.GetOutput()->type(), "New output type %s isn't compatible with old output type %s", new_output->type(), instr.GetOutput()->type()); env.emitRaw<Assign>(instr.GetOutput(), new_output); } if (instr.IsCondBranch() || instr.IsCondBranchIterNotDone() || instr.IsCondBranchCheckType()) { JIT_CHECK(env.cursor != env.block->begin(), "Unexpected empty block"); Instr& prev_instr = *std::prev(env.cursor); JIT_CHECK( prev_instr.IsBranch(), "The only supported simplification for CondBranch* is to a " "Branch, got unexpected '%s'", prev_instr); // If we've optimized a CondBranchBase into a Branch, we also need to // remove any Phi references to the current block from the block that // we no longer visit. auto cond = static_cast<CondBranchBase*>(&instr); BasicBlock* new_dst = prev_instr.successor(0); BasicBlock* old_branch_block = cond->false_bb() == new_dst ? cond->true_bb() : cond->false_bb(); old_branch_block->removePhiPredecessor(cond->block()); } instr.unlink(); delete &instr; if (env.block != &block) { // If we're now in a different block, `block' should only contain the // newly-emitted instructions, with no more old instructions to // process. Continue to the next block in the list; any newly-created // blocks were added to the end of the list and will be processed // later. break; } } } if (changed) { // Perform some simple cleanup between each pass. CopyPropagation{}.Run(irfunc); reflowTypes(irfunc); CleanCFG{}.Run(irfunc); } } while (changed); } } // namespace hir } // namespace jit

Jit/hir/simplify.cpp (508 lines of code) (raw):