hphp/runtime/vm/jit/gvn.cpp (642 lines of code) (raw):

/* +----------------------------------------------------------------------+ | HipHop for PHP | +----------------------------------------------------------------------+ | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ */ #include "hphp/runtime/vm/jit/opt.h" #include "hphp/runtime/base/perf-warning.h" #include "hphp/runtime/vm/jit/analysis.h" #include "hphp/runtime/vm/jit/block.h" #include "hphp/runtime/vm/jit/cfg.h" #include "hphp/runtime/vm/jit/containers.h" #include "hphp/runtime/vm/jit/dce.h" #include "hphp/runtime/vm/jit/ir-instruction.h" #include "hphp/runtime/vm/jit/mutation.h" #include "hphp/runtime/vm/jit/ssa-tmp.h" #include "hphp/runtime/vm/jit/state-vector.h" #include "hphp/runtime/vm/jit/pass-tracer.h" #include "hphp/runtime/vm/jit/timer.h" #include "hphp/util/dataflow-worklist.h" #include <sstream> #include <folly/small_vector.h> namespace HPHP::jit { TRACE_SET_MOD(hhir_gvn); ////////////////////////////////////////////////////////////////////// namespace { struct ValueNumberMetadata { SSATmp* key; SSATmp* value; }; using ValueNumberTable = StateVector<SSATmp, ValueNumberMetadata>; using DstIndex = int32_t; struct CongruenceHasher { using KeyType = std::pair<IRInstruction*, DstIndex>; explicit CongruenceHasher(const ValueNumberTable& globalTable) : m_globalTable(&globalTable) { } size_t hashSharedImpl(IRInstruction* inst, size_t result) const { if (inst->hasExtra()) { result = folly::hash::hash_128_to_64( result, hashExtra(inst->op(), inst->rawExtra()) ); } if (inst->hasTypeParam()) { result = folly::hash::hash_128_to_64(result, inst->typeParam().hash()); } return result; } size_t hashSrcs(KeyType key, size_t result) const { auto& table = *m_globalTable; auto inst = key.first; for (uint32_t i = 0; i < inst->numSrcs(); ++i) { auto src = canonical(inst->src(i)); assertx(table[src].value); result = folly::hash::hash_128_to_64( result, reinterpret_cast<size_t>(table[src].value) ); } return result; } size_t operator()(KeyType key) const { auto inst = key.first; // Note: this doesn't take commutativity or associativity into account, but // it might be nice to do so for the opcodes where it makes sense. size_t result = static_cast<size_t>(inst->op()); result = hashSrcs(key, result); return hashSharedImpl(inst, result); } private: const ValueNumberTable* m_globalTable; }; struct CongruenceComparator { using KeyType = std::pair<IRInstruction*, DstIndex>; explicit CongruenceComparator(const ValueNumberTable& globalTable) : m_globalTable(&globalTable) { } bool compareSrcs(KeyType keyA, KeyType keyB) const { auto& table = *m_globalTable; auto instA = keyA.first; auto instB = keyB.first; for (uint32_t i = 0; i < instA->numSrcs(); ++i) { auto srcA = canonical(instA->src(i)); auto srcB = canonical(instB->src(i)); assertx(table[srcA].value); assertx(table[srcB].value); if (table[srcA].value != table[srcB].value) return false; } return true; } bool operator()(KeyType keyA, KeyType keyB) const { auto instA = keyA.first; auto instB = keyB.first; // Note: this doesn't take commutativity or associativity into account, but // it might be nice to do so for the opcodes where it makes sense. if (instA->op() != instB->op()) return false; if (instA->numSrcs() != instB->numSrcs()) return false; if (instA->hasTypeParam() != instB->hasTypeParam()) return false; if (instA->hasTypeParam() && instA->typeParam() != instB->typeParam()) { return false; } if (instA->hasExtra()) { assertx(instB->hasExtra()); if (!equalsExtra(instA->op(), instA->rawExtra(), instB->rawExtra())) { return false; } } if (!compareSrcs(keyA, keyB)) { return false; } return true; } private: const ValueNumberTable* m_globalTable; }; using NameTable = jit::hash_map< std::pair<IRInstruction*, DstIndex>, SSATmp*, CongruenceHasher, CongruenceComparator >; struct GVNState { ValueNumberTable* localTable{nullptr}; ValueNumberTable* globalTable{nullptr}; NameTable* nameTable{nullptr}; }; bool supportsGVN(const IRInstruction* inst) { switch (inst->op()) { case AssertType: case AbsDbl: case AddInt: case SubInt: case MulInt: case AndInt: case AddDbl: case SubDbl: case MulDbl: case DivDbl: case DivInt: case Mod: case Sqrt: case OrInt: case XorInt: case Shl: case Shr: case Floor: case Ceil: case AddIntO: case SubIntO: case MulIntO: case XorBool: case ConvDblToBool: case ConvIntToBool: case ConvBoolToDbl: case ConvIntToDbl: case ConvBoolToInt: case ConvDblToInt: case ConvFuncPrologueFlagsToARFlags: case DblAsBits: case GtInt: case GteInt: case LtInt: case LteInt: case EqInt: case NeqInt: case CmpInt: case GtDbl: case GteDbl: case LtDbl: case LteDbl: case EqDbl: case NeqDbl: case CmpDbl: case GtStr: case GteStr: case LtStr: case LteStr: case EqStr: case NeqStr: case SameStr: case NSameStr: case CmpStr: case GtBool: case GteBool: case LtBool: case LteBool: case EqBool: case NeqBool: case CmpBool: case SameObj: case NSameObj: case SameArrLike: case NSameArrLike: case GtRes: case GteRes: case LtRes: case LteRes: case EqRes: case NeqRes: case CmpRes: case EqCls: case EqLazyCls: case EqFunc: case EqStrPtr: case InstanceOf: case InstanceOfIface: case InstanceOfIfaceVtable: case ExtendsClass: case InstanceOfBitmask: case NInstanceOfBitmask: case InterfaceSupportsArrLike: case InterfaceSupportsStr: case InterfaceSupportsInt: case InterfaceSupportsDbl: case HasToString: case IsType: case IsNType: case IsLegacyArrLike: case IsWaitHandle: case IsCol: case LdRDSAddr: case LdFrameThis: case LdFrameCls: case LdClsCtor: case DefConst: case LdCls: case LdClsCached: case LdClsInitData: case LdClsFromClsMeth: case LdSmashableFunc: case LdFuncFromClsMeth: case LdFuncInOutBits: case LdFuncVecLen: case LdClsMethod: case LdIfaceMethod: case LdPropAddr: case LdClsPropAddrOrNull: case LdClsPropAddrOrRaise: case LdObjClass: case LdClsName: case LdLazyCls: case LdLazyClsName: case Mov: case LdContActRec: case LdAFWHActRec: case LdVecElemAddr: case OrdStr: case ChrInt: case CheckRange: case CountVec: case CountDict: case CountKeyset: case BespokeGet: case BespokeGetThrow: case BespokeIterEnd: case LdMonotypeDictTombstones: case LdMonotypeDictKey: case LdMonotypeDictVal: case LdMonotypeVecElem: case Select: case StrictlyIntegerConv: case LookupSPropSlot: case ConvPtrToLval: case RaiseErrorOnInvalidIsAsExpressionType: case LdUnitPerRequestFilepath: case DirFromFilepath: case AKExistsDict: case AKExistsKeyset: case DictGet: case DictGetK: case DictGetQuiet: case DictIdx: case DictIsset: case KeysetGet: case KeysetGetK: case KeysetGetQuiet: case KeysetIdx: case KeysetIsset: case CheckDictOffset: case CheckKeysetOffset: case CheckDictKeys: case CheckArrayCOW: case CheckMissingKeyInArrLike: case LdFuncRequiredCoeffects: case FuncHasAttr: case ClassHasAttr: case LdTVFromRDS: case StructDictSlot: case StructDictElemAddr: return true; case EqArrLike: case NeqArrLike: // Keyset equality comparisons never re-enter or throw return inst->src(0)->type() <= TKeyset && inst->src(1)->type() <= TKeyset; case IsTypeStruct: // Resources can change type without generating a new SSATmp, // so its not safe to GVN return !opcodeMayRaise(IsTypeStruct) && !inst->src(1)->type().maybe(TRes); default: return false; } } void initWithInstruction(IRInstruction* inst, ValueNumberTable& table) { // Each SSATmp starts out as the canonical name for itself. for (auto dst : inst->dsts()) { table[dst] = ValueNumberMetadata { dst, dst }; } for (auto src : inst->srcs()) { table[src] = ValueNumberMetadata { src, src }; } } bool visitInstruction( GVNState& env, IRInstruction* inst ) { auto& globalTable = *env.globalTable; auto& localTable = *env.localTable; auto& nameTable = *env.nameTable; if (!supportsGVN(inst)) return false; bool changed = false; for (auto dstIdx = 0; dstIdx < inst->numDsts(); ++dstIdx) { auto dst = inst->dst(dstIdx); assertx(dst); auto result = nameTable.emplace(std::make_pair(inst, dstIdx), dst); SSATmp* temp = result.second ? dst : result.first->second; assertx(temp); assertx(globalTable[dst].value); if (temp != globalTable[dst].value) { localTable[dst] = ValueNumberMetadata { dst, temp }; FTRACE(1, "instruction {}\n" "updated value number for dst to dst of {}\n", *inst, *temp->inst() ); changed = true; } } return changed; } bool visitBlock( GVNState& env, Block* block ) { bool changed = false; for (auto& inst : *block) { changed = visitInstruction(env, &inst) || changed; } return changed; } void applyLocalUpdates(ValueNumberTable& local, ValueNumberTable& global) { for (auto metadata : local) { if (!metadata.key) continue; global[metadata.key] = metadata; } } void runAnalysis( GVNState& env, const IRUnit& unit, const BlockList& blocks ) { for (auto block : blocks) { for (auto& inst : *block) { initWithInstruction(&inst, *env.globalTable); } } bool changed = true; while (changed) { // We need a temporary table of updates which we apply after running this // iteration of the fixed point. If we change the global ValueNumberTable // during the pass, the hash values of the SSATmps will change which is // apparently a no-no for unordered_map. ValueNumberTable localTable(unit, ValueNumberMetadata{}); env.localTable = &localTable; SCOPE_EXIT { env.localTable = nullptr; }; { CongruenceHasher hash(*env.globalTable); CongruenceComparator pred(*env.globalTable); NameTable nameTable(0, hash, pred); env.nameTable = &nameTable; SCOPE_EXIT { env.nameTable = nullptr; }; changed = false; for (auto block : blocks) { changed = visitBlock(env, block) || changed; } } applyLocalUpdates(localTable, *env.globalTable); } } /* * When gvn replaces an instruction that produces a refcount, we need * to IncRef the replacement value. Its not enough to IncRef it where * the replacement occurs (where a refcount would originally have been * produced), because the replacement value might not survive that * long. For example: * * (1) t2:Str = CastIntToStr t1:Int * .... * (2) DecRef t2:Str * .... * (3) t3:Str = CastIntToStr t1:Int * * The DecRef in (2) might release t2. So, inserting an IncRef after * (3) isn't correct, as t2 will have already been released. Instead, * we need to IncRef it right after the canonical instruction * (1). However, in general there will be paths from the canonical * instruction that don't reach the replaced instruction, so we'll * need to insert DecRefs on those paths. * * As an example, given: * * +------+ * | Orig | * +------+ * |\ * | \ * | \ +-----+ * | --->|exit1| * | +-----+ * v * +------+ * | Rep1 | * +------+ * |\ * | \ * | \ +-----+ * | --->|exit2| * | +-----+ * v * +------+ * | Rep2 | * +------+ * * Where Orig is a PRc instruction, and Rep1 and Rep2 are identical * instructions that are going to be replaced, we need to insert an * IncRef after Orig (to ensure its dst lives until Rep1), a DecRef at * exit1 (to keep the RefCounts balanced), an IncRef after Rep1 (to * ensure Orig's dst lives until Rep2), and a DecRef in exit2 (to keep * the RefCounts balanced). * * We do this by computing the Partial-Anticipability of the candidate * instructions (Orig, Rep1 and Rep2 in the example). After each * candidate instruction we insert an IncRef if a candidate is * Partially-Anticipated-out. If a candidateis * Partially-Anticipated-out in a predecessor, but not * Partially-Anticipated-in in the successor, then a DecRef is * inserted in the successor. Since critical edges are split, this * covers all of the cases. */ constexpr uint32_t kMaxTrackedPrcs = 64; struct PrcState { using Bits = std::bitset<kMaxTrackedPrcs>; uint32_t rpoId; /* original definition of candidates (kills pant) */ Bits orig; /* local is the set of candidates in this block */ Bits local; /* pantIn = (local | pantOut) - orig */ Bits pantIn; /* pantOut = Union<succs>(pantIn) */ Bits pantOut; }; std::string show(const PrcState::Bits& bits) { std::ostringstream out; if (bits.none()) { return "0"; } if (bits.all()) { return "-1"; } out << bits; return out.str(); } std::string show(const PrcState& state) { return folly::sformat( " pantIn : {}\n" " orig : {}\n" " local : {}\n" " pantOut : {}\n", show(state.pantIn), show(state.orig), show(state.local), show(state.pantOut) ); } struct PrcEnv { PrcEnv(IRUnit& unit, const BlockList& rpoBlocks) : unit{unit}, rpoBlocks{rpoBlocks} {} IRUnit& unit; const BlockList& rpoBlocks; // The first element of each inner vector is the canonical SSATmp // and the remainder are the SSATmps that will be replaced. std::vector<std::vector<SSATmp*>> insertMap; std::vector<PrcState> states; }; void insertIncRefs(PrcEnv& env) { auto antQ = dataflow_worklist<uint32_t, std::less<uint32_t>>(env.rpoBlocks.size()); env.states.resize(env.unit.numBlocks()); for (uint32_t i = 0; i < env.rpoBlocks.size(); i++) { auto blk = env.rpoBlocks[i]; auto& state = env.states[blk->id()]; state.rpoId = i; antQ.push(i); } auto id = 0; for (auto const& v : env.insertMap) { assertx(!v.empty()); for (auto const tmp : v) { auto const blk = tmp->inst()->block(); auto& state = env.states[blk->id()]; if (!state.local.test(id)) { state.local.set(id); continue; } } env.states[v[0]->inst()->block()->id()].orig.set(id); id++; } // compute anticipated do { auto const blk = env.rpoBlocks[antQ.pop()]; auto& state = env.states[blk->id()]; state.pantIn = (state.pantOut | state.local) & ~state.orig; blk->forEachPred( [&] (Block* b) { auto& s = env.states[b->id()]; auto const pantOut = s.pantOut | state.pantIn; if (pantOut != s.pantOut) { s.pantOut = pantOut; antQ.push(s.rpoId); } } ); } while (!antQ.empty()); for (auto blk : env.rpoBlocks) { auto& state = env.states[blk->id()]; FTRACE(4, "InsertIncDecs: Blk(B{}) <- {}\n" "{}" " ->{}\n", blk->id(), [&] { std::string ret; blk->forEachPred([&] (Block* pred) { folly::format(&ret, " B{}", pred->id()); }); return ret; }(), show(state), [&] { std::string ret; blk->forEachSucc([&] (Block* succ) { folly::format(&ret, " B{}", succ->id()); }); return ret; }()); auto inc = state.local; for (auto inc_id = 0; inc.any(); inc >>= 1, inc_id++) { if (inc.test(0)) { auto const& tmps = env.insertMap[inc_id]; auto insert = [&] (IRInstruction* inst) { FTRACE(3, "Inserting IncRef into B{} for t{}\n", blk->id(), tmps[0]->id()); auto const iter = std::next(blk->iteratorTo(inst)); blk->insert(iter, env.unit.gen(IncRef, inst->bcctx(), tmps[0])); }; SSATmp* last = nullptr; // Insert an IncRef after every candidate in this block except // the last one (since we know for all but the last that its // successor is anticipated). Note that entries in tmps from // the same block are guaranteed to be in program order. for (auto const tmp : tmps) { if (tmp->inst()->block() != blk) continue; if (last) insert(last->inst()); last = tmp; } // If it's partially anticipated out, insert an inc after the // last one too. always_assert(last); if (state.pantOut.test(inc_id)) insert(last->inst()); } } if (state.pantOut.any()) { blk->forEachSucc( [&] (Block* succ) { auto dec = state.pantOut & ~env.states[succ->id()].pantIn; if (!dec.any()) return; for (auto dec_id = 0; dec.any(); dec >>= 1, dec_id++) { if (!dec.test(0)) continue; auto const tmp = env.insertMap[dec_id][0]; FTRACE(3, "Inserting DecRef into B{} for t{}\n", blk->id(), tmp->id()); succ->prepend(env.unit.gen(DecRef, tmp->inst()->bcctx(), DecRefData{}, tmp)); } } ); } } } using ActionMap = jit::fast_map<SSATmp*, std::vector<SSATmp*>>; bool tryReplaceInstruction( IRUnit& unit, const IdomVector& idoms, IRInstruction* inst, ValueNumberTable& table, ActionMap& actionMap ) { auto changed = false; if (inst->hasDst()) { auto const dst = inst->dst(); auto const valueNumber = table[dst].value; if (valueNumber && valueNumber != dst && is_tmp_usable(idoms, valueNumber, inst->block())) { changed = true; if (inst->producesReference()) { auto& v = actionMap[valueNumber]; if (!v.size()) v.push_back(valueNumber); v.push_back(dst); } if (inst->isBlockEnd()) { // Because the output of an equivalent instruction is available, we // know that this instruction must take its next branch. FTRACE(1, "eliminating throwing instruction {}\n", inst->toString() ); assertx(inst->next()); inst->block()->push_back(unit.gen(Jmp, inst->bcctx(), inst->next())); } if (!(valueNumber->type() <= dst->type())) { FTRACE(1, "replacing {} with AssertType({})\n", inst->toString(), dst->type().toString() ); unit.replace(inst, AssertType, dst->type(), valueNumber); } else { FTRACE(1, "replacing {} with Mov({})\n", inst->toString(), *valueNumber ); unit.replace(inst, Mov, valueNumber); } } } for (uint32_t i = 0; i < inst->numSrcs(); ++i) { auto s = inst->src(i); auto valueNumber = table[s].value; if (valueNumber == s) continue; if (!valueNumber) continue; if (!is_tmp_usable(idoms, valueNumber, inst->block())) continue; // If the replacement is at least as refined as the source type, // just substitute it directly if (valueNumber->type() <= s->type()) { FTRACE(1, "instruction {}\n" "replacing src {} with dst of {}\n", *inst, i, *valueNumber->inst() ); inst->setSrc(i, valueNumber); changed = true; } } return changed; } bool replaceRedundantComputations( IRUnit& unit, const IdomVector& idoms, const BlockList& blocks, ValueNumberTable& table ) { ActionMap actionMap; auto changed = false; for (auto block : blocks) { for (auto& inst : *block) { changed |= tryReplaceInstruction(unit, idoms, &inst, table, actionMap); } } if (actionMap.empty()) return changed; PrcEnv env(unit, blocks); for (auto& elm : actionMap) { if (env.insertMap.size() == kMaxTrackedPrcs) { // This pretty much doesn't happen; when it does, we might be // over-increffing here - but thats not a big deal. auto const newTmp = elm.second[0]; auto const block = newTmp->inst()->block(); auto const iter = std::next(block->iteratorTo(newTmp->inst())); for (auto i = 1; i < elm.second.size(); i++) { block->insert(iter, unit.gen(IncRef, newTmp->inst()->bcctx(), newTmp)); } continue; } env.insertMap.push_back(std::move(elm.second)); if (env.insertMap.size() >= kMaxTrackedPrcs) { logPerfWarning( "gvnMaxPrcs", [&](StructuredLogEntry& cols) { auto const func = unit.context().initSrcKey.func(); cols.setStr("func", func->fullName()->slice()); cols.setStr("filename", func->unit()->origFilepath()->slice()); cols.setStr("hhir_unit", show(unit)); } ); } } insertIncRefs(env); return changed; } } // namespace ///////////////////////////////////////////////////////////////////////// void gvn(IRUnit& unit) { PassTracer tracer{&unit, Trace::hhir_gvn, "gvn"}; Timer t(Timer::optimize_gvn, unit.logEntry().get_pointer()); splitCriticalEdges(unit); GVNState state; auto const rpoBlocks = rpoSortCfg(unit); auto const idoms = findDominators( unit, rpoBlocks, numberBlocks(unit, rpoBlocks) ); ValueNumberTable globalTable(unit, ValueNumberMetadata{}); state.globalTable = &globalTable; // This is an implementation of the RPO version of the global value numbering // algorithm presented in the 1996 paper "SCC-based Value Numbering" by // Cooper and Simpson. runAnalysis(state, unit, rpoBlocks); auto const changed = replaceRedundantComputations(unit, idoms, rpoBlocks, globalTable); state.globalTable = nullptr; // We might have added a new use of a SSATmp past a CheckType or // AssertType on it, so refine if necessary. if (changed) { // Restore basic invariants before refining. mandatoryDCE(unit); refineTmps(unit); } } }