hphp/runtime/vm/jit/inlining-decider.cpp (757 lines of code) (raw):

/* +----------------------------------------------------------------------+ | HipHop for PHP | +----------------------------------------------------------------------+ | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ */ #include "hphp/runtime/vm/jit/inlining-decider.h" #include "hphp/runtime/base/program-functions.h" #include "hphp/runtime/base/runtime-option.h" #include "hphp/runtime/ext/asio/ext_async-generator.h" #include "hphp/runtime/ext/generator/ext_generator.h" #include "hphp/runtime/vm/bytecode.h" #include "hphp/runtime/vm/func.h" #include "hphp/runtime/vm/hhbc.h" #include "hphp/runtime/vm/jit/irgen.h" #include "hphp/runtime/vm/jit/location.h" #include "hphp/runtime/vm/jit/irlower.h" #include "hphp/runtime/vm/jit/mcgen.h" #include "hphp/runtime/vm/jit/mcgen-translate.h" #include "hphp/runtime/vm/jit/normalized-instruction.h" #include "hphp/runtime/vm/jit/prof-data.h" #include "hphp/runtime/vm/jit/prof-data-serialize.h" #include "hphp/runtime/vm/jit/region-selection.h" #include "hphp/runtime/vm/jit/tc.h" #include "hphp/runtime/vm/jit/trans-cfg.h" #include "hphp/runtime/vm/jit/translate-region.h" #include "hphp/runtime/vm/resumable.h" #include "hphp/runtime/vm/srckey.h" #include "hphp/util/arch.h" #include "hphp/util/struct-log.h" #include "hphp/util/trace.h" #include "hphp/zend/zend-strtod.h" #include <folly/Synchronized.h> #include <cmath> #include <vector> #include <sstream> namespace HPHP::jit { /////////////////////////////////////////////////////////////////////////////// TRACE_SET_MOD(inlining); namespace { /////////////////////////////////////////////////////////////////////////////// std::string nameAndReason(int bcOff, std::string caller, std::string callee, std::string why) { return folly::sformat("BC {}: {} -> {}: {}\n", bcOff, caller, callee, why); } bool traceRefusal(SrcKey callerSk, const Func* callee, std::string why, AnnotationData* annotations) { // This is not under Trace::enabled so that we can collect the data in prod. const Func* caller = callerSk.func(); int bcOff = callerSk.offset(); auto calleeName = callee ? callee->fullName()->data() : "(unknown)"; if (annotations && RuntimeOption::EvalDumpInlDecision > 0) { annotations->inliningDecisions.emplace_back(false, bcOff, caller, callee, why); } if (Trace::enabled) { assertx(caller); FTRACE(2, "Inlining decider: refusing {}() <- {}{}\t<reason: {}>\n", caller->fullName()->data(), calleeName, callee ? "()" : "", why); } if (caller->shouldSampleJit() || (callee && callee->shouldSampleJit())) { StructuredLogEntry inlLog; auto bcStr = [&] { std::ostringstream bcStrn; bcStrn << bcOff; return bcStrn.str(); } (); inlLog.setStr("bc_off", bcStr); inlLog.setStr("caller", caller->fullName()->data()); inlLog.setStr("callee", calleeName); inlLog.setStr("reason", why); StructuredLog::log("hhvm_inline_refuse", inlLog); } return false; } std::atomic<uint64_t> s_baseProfCount{0}; /////////////////////////////////////////////////////////////////////////////// // canInlineAt() helpers. const StaticString s_AlwaysInline("__ALWAYS_INLINE"), s_NeverInline("__NEVER_INLINE"), s_HH_Coeffects_Backdoor("HH\\Coeffects\\backdoor"), s_HH_Coeffects_Backdoor_Async("HH\\Coeffects\\backdoor_async"), s_HH_Coeffects_FB_Backdoor_to_globals_leak_safe__DO_NOT_USE( "HH\\Coeffects\\fb\\backdoor_to_globals_leak_safe__DO_NOT_USE" ); #define COEFFECTS_BACKDOOR_WRAPPERS \ X(pure) \ X(write_props) \ X(read_globals) \ X(zoned) \ X(leak_safe) #define X(x) \ const StaticString \ s_HH_Coeffects_FB_Backdoor_from_##x \ ("HH\\Coeffects\\fb\\backdoor_from_"#x"__DO_NOT_USE"); COEFFECTS_BACKDOOR_WRAPPERS #undef X /* * Check if the `callee' has any characteristics which prevent inlining, * without peeking into its bytecode or regions. */ bool isCalleeInlinable(SrcKey callSK, const Func* callee, AnnotationData* annotations) { assertx(isFCall(callSK.op())); auto refuse = [&] (const char* why) { return traceRefusal(callSK, callee, why, annotations); }; if (!callee) { return refuse("callee not known"); } if (callee == callSK.func()) { return refuse("call is recursive"); } if (callee->isGenerator()) { return refuse("callee is generator"); } if (callee->maxStackCells() >= kStackCheckLeafPadding) { return refuse("function stack depth too deep"); } if (callee->userAttributes().count(s_NeverInline.get())) { return refuse("callee marked __NEVER_INLINE"); } return true; } /* * Check that we don't have any missing or extra arguments. */ bool checkNumArgs(SrcKey callSK, const Func* callee, const FCallArgs& fca, AnnotationData* annotations) { assertx(callee); auto refuse = [&] (const char* why) { return traceRefusal(callSK, callee, why, annotations); }; assertx(fca.numArgs <= callee->numNonVariadicParams()); assertx(!fca.hasUnpack() || fca.numArgs == callee->numNonVariadicParams()); if (fca.hasUnpack() && !callee->hasVariadicCaptureParam()) { return refuse("callee called with too many arguments"); } if (fca.numArgs < callee->numRequiredParams()) { return refuse("callee called with too few arguments"); } if (fca.enforceInOut()) { for (auto i = 0; i < fca.numArgs; ++i) { if (callee->isInOut(i) != fca.isInOut(i)) { return refuse("callee called with arguments with mismatched inout"); } } } if (fca.enforceReadonly()) { for (auto i = 0; i < fca.numArgs; ++i) { if (fca.isReadonly(i) && !callee->isReadonly(i)) { return refuse("callee called with arguments with mismatched readonly"); } } } if (fca.enforceMutableReturn() && (callee->attrs() & AttrReadonlyReturn)) { return refuse("caller requries mutable return but callee is readonly return"); } if (fca.enforceReadonlyThis() && !(callee->attrs() & AttrReadonlyThis)) { return refuse("caller expects no modifications to the instance but callee does modify"); } return true; } /////////////////////////////////////////////////////////////////////////////// } bool canInlineAt(SrcKey callSK, const Func* callee, const FCallArgs& fca, AnnotationData* annotations) { assertx(isFCall(callSK.op())); if (!callee) { return traceRefusal(callSK, callee, "unknown callee", annotations); } if (!RuntimeOption::EvalHHIREnableGenTimeInlining) { return traceRefusal(callSK, callee, "disabled via runtime option", annotations); } if (RuntimeOption::EvalJitEnableRenameFunction) { return traceRefusal(callSK, callee, "rename function is enabled", annotations); } if (callee->attrs() & AttrInterceptable) { return traceRefusal(callSK, callee, "callee is interceptable", annotations); } if (!isCalleeInlinable(callSK, callee, annotations) || !checkNumArgs(callSK, callee, fca, annotations)) { return false; } return true; } namespace { /////////////////////////////////////////////////////////////////////////////// // shouldInline() helpers. /* * Check if a builtin is inlinable. */ bool isInlinableCPPBuiltin(const Func* f) { assertx(f->isCPPBuiltin()); // The callee needs to be callable with FCallBuiltin, because NativeImpl // requires a frame. if (!RuntimeOption::EvalEnableCallBuiltin || (f->attrs() & AttrNoFCallBuiltin) || (f->numParams() > Native::maxFCallBuiltinArgs()) || !f->nativeFuncPtr()) { return false; } return true; } struct InlineRegionKey { InlineRegionKey(SrcKey entryKey, Type ctxType, TinyVector<Type, 4> argTypes) : entryKey{std::move(entryKey)} , ctxType{std::move(ctxType)} , argTypes(std::move(argTypes)) {} explicit InlineRegionKey(const RegionDesc& region) : entryKey(region.entry()->start()) , ctxType(region.inlineCtxType()) { for (auto const ty : region.inlineInputTypes()) { argTypes.push_back(ty); } } InlineRegionKey(const InlineRegionKey& irk) : entryKey(irk.entryKey) , ctxType(irk.ctxType) { for (auto ty : irk.argTypes) argTypes.push_back(ty); } InlineRegionKey(InlineRegionKey&& irk) noexcept : entryKey(std::move(irk.entryKey)) , ctxType(std::move(irk.ctxType)) { for (auto ty : irk.argTypes) argTypes.push_back(ty); irk.argTypes.clear(); } InlineRegionKey& operator=(const InlineRegionKey& irk) { entryKey = irk.entryKey; ctxType = irk.ctxType; argTypes.clear(); for (auto ty : irk.argTypes) argTypes.push_back(ty); return *this; } InlineRegionKey& operator=(InlineRegionKey&& irk) noexcept { entryKey = irk.entryKey; ctxType = irk.ctxType; argTypes.clear(); for (auto ty : irk.argTypes) argTypes.push_back(ty); irk.argTypes.clear(); return *this; } struct Eq { size_t operator()(const InlineRegionKey& k1, const InlineRegionKey& k2) const { return k1.entryKey == k2.entryKey && k1.ctxType == k2.ctxType && k1.argTypes == k2.argTypes; } }; struct Hash { size_t operator()(const InlineRegionKey& key) const { size_t h = 0; h = hash_combine(h, key.entryKey.toAtomicInt()); h = hash_combine(h, key.ctxType.hash()); for (auto const ty : key.argTypes) { h = hash_combine(h, ty.hash()); } return h; } private: template<class T> static size_t hash_combine(size_t base, T other) { return folly::hash::hash_128_to_64( base, folly::hash::hash_combine(other)); } }; SrcKey entryKey; Type ctxType; TinyVector<Type, 4> argTypes; }; using InlineCostCache = jit::fast_map< InlineRegionKey, unsigned, InlineRegionKey::Hash, InlineRegionKey::Eq >; Vcost computeTranslationCostSlow(SrcKey at, const RegionDesc& region, AnnotationData* annotationData) { TransContext ctx { TransIDSet{}, 0, // optIndex TransKind::Optimize, at, &region, PrologueID(), }; tracing::Block _{"compute-inline-cost", [&] { return traceProps(ctx); }}; rqtrace::DisableTracing notrace; auto const unbumper = mcgen::unbumpFunctions(); auto const unit = irGenInlineRegion(ctx, region); if (!unit) return {0, true}; // TODO(T52856776) - annotations should be copied from unit into outer unit // via annotationData SCOPE_ASSERT_DETAIL("Inline-IRUnit") { return show(*unit); }; return irlower::computeIRUnitCost(*unit); } folly::Synchronized<InlineCostCache> s_inlCostCache; int computeTranslationCost(SrcKey at, const RegionDesc& region, AnnotationData* annotationData) { InlineRegionKey irk{region}; SYNCHRONIZED_CONST(s_inlCostCache) { auto f = s_inlCostCache.find(irk); if (f != s_inlCostCache.end()) return f->second; } auto const info = computeTranslationCostSlow(at, region, annotationData); auto cost = info.cost; // We normally store the computed cost into the cache. However, if the region // is incomplete, and it's cost is still within the maximum allowed cost, and // we're still profiling that function, then we don't want to cache that // result yet. The reason for this exception is that we may still gather // additional profiling information that will allow us to create a complete // region with acceptable cost. bool cacheResult = true; if (info.incomplete) { if (info.cost <= RuntimeOption::EvalHHIRInliningMaxVasmCostLimit) { auto const fid = region.entry()->func()->getFuncId(); auto const profData = jit::profData(); auto const profiling = profData && profData->profiling(fid); if (profiling) cacheResult = false; } // Set cost very high to prevent inlining of incomplete regions. cost = std::numeric_limits<int>::max(); } if (cacheResult && !as_const(s_inlCostCache)->count(irk)) { s_inlCostCache->emplace(irk, cost); } FTRACE(3, "computeTranslationCost(at {}) = {}\n", showShort(at), cost); return cost; } uint64_t adjustedMaxVasmCost(const irgen::IRGS& env, const RegionDesc& calleeRegion, uint32_t depth) { auto const maxDepth = RuntimeOption::EvalHHIRInliningMaxDepth; if (depth >= maxDepth) return 0; const auto baseVasmCost = RuntimeOption::EvalHHIRInliningVasmCostLimit; const auto baseProfCount = s_baseProfCount.load(); if (baseProfCount == 0) return baseVasmCost; auto const callerProfCount = irgen::curProfCount(env); auto adjustedCost = baseVasmCost * std::pow((double)callerProfCount / baseProfCount, RuntimeOption::EvalHHIRInliningVasmCallerExp); auto const calleeProfCount = irgen::calleeProfCount(env, calleeRegion); if (calleeProfCount) { adjustedCost *= std::pow((double)callerProfCount / calleeProfCount, RuntimeOption::EvalHHIRInliningVasmCalleeExp); } adjustedCost *= std::pow(1 - (double)depth / maxDepth, RuntimeOption::EvalHHIRInliningDepthExp); if (adjustedCost < RuntimeOption::EvalHHIRInliningMinVasmCostLimit) { adjustedCost = RuntimeOption::EvalHHIRInliningMinVasmCostLimit; } if (adjustedCost > RuntimeOption::EvalHHIRInliningMaxVasmCostLimit) { adjustedCost = RuntimeOption::EvalHHIRInliningMaxVasmCostLimit; } if (calleeProfCount) { FTRACE(3, "adjustedMaxVasmCost: adjustedCost ({}) = baseVasmCost ({}) * " "(callerProfCount ({}) / baseProfCount ({})) ^ {} * " "(callerProfCount ({}) / calleeProfCount ({})) ^ {} * " "(1 - depth ({}) / maxDepth ({})) ^ {}\n", adjustedCost, baseVasmCost, callerProfCount, baseProfCount, RuntimeOption::EvalHHIRInliningVasmCallerExp, callerProfCount, calleeProfCount, RuntimeOption::EvalHHIRInliningVasmCalleeExp, depth, maxDepth, RuntimeOption::EvalHHIRInliningDepthExp); } else { FTRACE(3, "adjustedMaxVasmCost: adjustedCost ({}) = baseVasmCost ({}) * " "(callerProfCount ({}) / baseProfCount ({})) ^ {} * " "(1 - depth ({}) / maxDepth ({})) ^ {}\n", adjustedCost, baseVasmCost, callerProfCount, baseProfCount, RuntimeOption::EvalHHIRInliningVasmCallerExp, depth, maxDepth, RuntimeOption::EvalHHIRInliningDepthExp); } return adjustedCost; } /////////////////////////////////////////////////////////////////////////////// } /* * Return the cost of inlining the given callee. */ int costOfInlining(SrcKey callerSk, const Func* callee, const RegionDesc& region, AnnotationData* annotationData) { auto const alwaysInl = (!RuntimeOption::EvalHHIRInliningIgnoreHints && callee->userAttributes().count(s_AlwaysInline.get())) || (callee->isMemoizeWrapper() && callee->numParams() == 0); // Functions marked as always inline don't contribute to overall cost return alwaysInl ? 0 : computeTranslationCost(callerSk, region, annotationData); } bool isCoeffectsBackdoor(SrcKey callerSk, const Func* callee) { auto const callee_name = callee->fullName(); #define X(x) \ if (callee_name->isame(s_HH_Coeffects_FB_Backdoor_from_##x.get())) { \ return true; \ } COEFFECTS_BACKDOOR_WRAPPERS #undef X if (callee_name->isame( s_HH_Coeffects_FB_Backdoor_to_globals_leak_safe__DO_NOT_USE.get())) { return true; } if (callee_name->isame(s_HH_Coeffects_Backdoor.get()) || callee_name->isame(s_HH_Coeffects_Backdoor_Async.get()) || (callee->isClosureBody() && (callerSk.func()->fullName()->isame(s_HH_Coeffects_Backdoor.get()) || callerSk.func()->fullName()->isame(s_HH_Coeffects_Backdoor_Async.get())))) { return true; } return false; } bool shouldInline(const irgen::IRGS& irgs, SrcKey callerSk, const Func* callee, const RegionDesc& region, uint32_t maxTotalCost) { auto sk = region.empty() ? SrcKey() : region.start(); assertx(callee); assertx(sk.func() == callee); auto annotationsPtr = mcgen::dumpTCAnnotation(irgs.context.kind) ? irgs.unit.annotationData.get() : nullptr; // Tracing return lambdas. auto refuse = [&] (const std::string& why) { FTRACE(2, "shouldInline: rejecting callee region: {}", show(region)); return traceRefusal(callerSk, callee, why, annotationsPtr); }; auto accept = [&] (std::string why) { auto static inlineAccepts = ServiceData::createTimeSeries( "jit.inline.accepts", {ServiceData::StatsType::COUNT}); inlineAccepts->addValue(1); if (annotationsPtr && RuntimeOption::EvalDumpInlDecision >= 2) { auto const decision = AnnotationData::InliningDecision{ true, callerSk.offset(), callerSk.func(), callee, why }; annotationsPtr->inliningDecisions.push_back(decision); } UNUSED auto const topFunc = [&] { return irgs.inlineState.bcStateStack.empty() ? irgs.bcState.func() : irgs.inlineState.bcStateStack[0].func(); }; FTRACE(2, "Inlining decider: inlining {}() <- {}()\t<reason: {}>\n", topFunc()->fullName()->data(), callee->fullName()->data(), why); return true; }; auto const stackDepth = irgs.inlineState.stackDepth; if (stackDepth + callee->maxStackCells() >= kStackCheckLeafPadding) { return refuse("inlining stack depth limit exceeded"); } auto isAwaitish = [&] (Op opcode) { return opcode == OpAwait || opcode == OpAwaitAll; }; // Try to inline CPP builtin functions. Inline regions for these functions // must end with a unique NativeImpl, which may not be true: // - If we only include the initial Asserts in the region, we may have zero // - If the NativeImpl guards its inputs, we may have multiple if (callee->isCPPBuiltin()) { if (!isInlinableCPPBuiltin(callee)) { return refuse("non-inlinable CPP builtin"); } auto const count = std::count_if( std::begin(region.blocks()), std::end(region.blocks()), [](auto const b) { return !b->empty() && !b->last().funcEntry() && b->last().op() == OpNativeImpl; } ); switch (count) { case 0: return refuse("inlinable CPP builtin without a NativeImpl"); case 1: return accept("inlinable CPP builtin with a unique NativeImpl"); default: return refuse("inlinable CPP builtin with multiple NativeImpls"); } } bool hasRet = false; // Iterate through the region, checking its suitability for inlining. for (auto const& block : region.blocks()) { sk = block->start(); for (auto i = 0, n = block->length(); i < n; ++i, sk.advance()) { if (sk.funcEntry()) continue; // We don't allow inlined functions in the region. The client is // expected to disable inlining for the region it gives us to peek. if (sk.func() != callee) { return refuse("got region with inlined calls"); } // Detect that the region contains a return. if (isReturnish(sk.op())) { hasRet = true; } // In optimized regions consider an await to be a returnish instruction, // if no returns appeared in the region then we likely suspend on all // calls to the callee. if (block->profTransID() != kInvalidTransID) { if (region.isExit(block->id()) && i + 1 == n && isAwaitish(sk.op())) { hasRet = true; } } } } if (!hasRet) { return refuse( folly::sformat("region has no returns: callee BC instrs = {} : {}", region.instrSize(), show(region))); } if (isCoeffectsBackdoor(callerSk, callee)) { return accept("coeffect backdoor is always inlined"); } // Ignore cost computation for functions marked __ALWAYS_INLINE if (!RuntimeOption::EvalHHIRInliningIgnoreHints && callee->userAttributes().count(s_AlwaysInline.get())) { // In debug builds compute the cost anyway to catch bugs in the inlining // machinery. Many inlining tests utilize the __ALWAYS_INLINE attribute. if (debug) { computeTranslationCost(callerSk, region, annotationsPtr); } return accept("callee marked as __ALWAYS_INLINE"); } // Refuse if the cost exceeds our thresholds. // We measure the cost of inlining each callstack and stop when it exceeds a // certain threshold. (Note that we do not measure the total cost of all the // inlined calls for a given caller---just the cost of each nested stack.) const int cost = costOfInlining(callerSk, callee, region, annotationsPtr); if (cost <= RuntimeOption::EvalHHIRAlwaysInlineVasmCostLimit) { return accept(folly::sformat("cost={} within always-inline limit", cost)); } if (region.instrSize() > irgs.budgetBCInstrs) { return refuse(folly::sformat("exhausted budgetBCInstrs={}, regionSize={}", irgs.budgetBCInstrs, region.instrSize())); } int maxCost = maxTotalCost; if (RuntimeOption::EvalHHIRInliningUseStackedCost) { maxCost -= irgs.inlineState.cost; } const auto baseProfCount = s_baseProfCount.load(); const auto callerProfCount = irgen::curProfCount(irgs); const auto calleeProfCount = irgen::calleeProfCount(irgs, region); if (cost > maxCost) { auto const depth = inlineDepth(irgs); return refuse(folly::sformat( "too expensive: cost={} : maxCost={} : " "baseProfCount={} : callerProfCount={} : calleeProfCount={} : depth={}", cost, maxCost, baseProfCount, callerProfCount, calleeProfCount, depth)); } return accept(folly::sformat("small region with return: cost={} : " "maxTotalCost={} : maxCost={} : baseProfCount={}" " : callerProfCount={} : calleeProfCount={}", cost, maxTotalCost, maxCost, baseProfCount, callerProfCount, calleeProfCount)); } /////////////////////////////////////////////////////////////////////////////// namespace { RegionDescPtr selectCalleeTracelet(const Func* callee, Type ctxType, std::vector<Type>& argTypes, int32_t maxBCInstrs) { // Set up the RegionContext for the tracelet selector. auto const entryOff = callee->getEntryForNumArgs(argTypes.size()); RegionContext ctx{ SrcKey { callee, entryOff, SrcKey::FuncEntryTag {} }, SBInvOffset{0}, }; for (uint32_t i = 0; i < argTypes.size(); ++i) { auto type = argTypes[i]; assertx(type <= TCell); ctx.liveTypes.push_back({Location::Local{i}, type}); } auto const numParams = callee->numNonVariadicParams(); for (uint32_t i = argTypes.size(); i < numParams; ++i) { // These params are populated by DV init funclets, so set them to Uninit. ctx.liveTypes.push_back({Location::Local{i}, TUninit}); } if (argTypes.size() <= numParams && callee->hasVariadicCaptureParam()) { // There's no DV init funclet for the case where all non-variadic params // have already been passed, so the caller must handle it instead. auto const vargs = Type::cns(ArrayData::CreateVec()); ctx.liveTypes.push_back({Location::Local{numParams}, vargs}); } // Produce a tracelet for the callee. auto r = selectTracelet( ctx, TransKind::Live, maxBCInstrs, true /* inlining */ ); if (r) { r->setInlineContext(ctxType, argTypes); } return r; } TransIDSet findTransIDsForCallee(const ProfData* profData, const Func* callee, Type ctxType, std::vector<Type>& argTypes) { auto const idvec = profData->funcProfTransIDs(callee->getFuncId()); auto const offset = callee->getEntryForNumArgs(argTypes.size()); auto const sk = SrcKey { callee, offset, SrcKey::FuncEntryTag {} }; TransIDSet ret; FTRACE(2, "findTransIDForCallee: offset={}\n", offset); for (auto const id : idvec) { auto const rec = profData->transRec(id); if (rec->srcKey() != sk) continue; auto const region = rec->region(); auto const isvalid = [&] () { if (rec->srcKey().hasThis() != ctxType.maybe(TObj)) { return false; } for (auto const& typeloc : region->entry()->typePreConditions()) { if (typeloc.location.tag() != LTag::Local) continue; auto const locId = typeloc.location.localId(); if (locId < argTypes.size() && !(argTypes[locId].maybe(typeloc.type))) { return false; } } return true; }(); if (isvalid) ret.insert(id); } return ret; } RegionDescPtr selectCalleeCFG(SrcKey callerSk, const Func* callee, Type ctxType, std::vector<Type>& argTypes, int32_t maxBCInstrs, AnnotationData* annotations) { auto const profData = jit::profData(); if (!profData) { traceRefusal(callerSk, callee, "no profData", annotations); return nullptr; } if (!profData->profiling(callee->getFuncId())) { traceRefusal(callerSk, callee, folly::sformat("no profiling data for callee FuncId: {}", callee->getFuncId()), annotations); return nullptr; } auto const dvIDs = findTransIDsForCallee(profData, callee, ctxType, argTypes); if (dvIDs.empty()) { traceRefusal(callerSk, callee, "didn't find entry TransID for callee", annotations); return nullptr; } TransCFG cfg(callee->getFuncId(), profData, true /* inlining */); HotTransContext ctx; ctx.entries = dvIDs; ctx.cfg = &cfg; ctx.profData = profData; ctx.maxBCInstrs = maxBCInstrs; ctx.inlining = true; ctx.inputTypes = &argTypes; bool truncated = false; auto r = selectHotCFG(ctx, &truncated); if (truncated) { traceRefusal(callerSk, callee, "callee region truncated due to BC size", annotations); return nullptr; } if (r) { r->setInlineContext(ctxType, argTypes); } else { traceRefusal(callerSk, callee, "failed selectHotCFG for callee", annotations); } return r; } } RegionDescPtr selectCalleeRegion(const irgen::IRGS& irgs, const Func* callee, const FCallArgs& fca, Type ctxType, const SrcKey& sk) { assertx(isFCall(sk.op())); auto static inlineAttempts = ServiceData::createTimeSeries( "jit.inline.attempts", {ServiceData::StatsType::COUNT}); inlineAttempts->addValue(1); auto kind = irgs.context.kind; auto annotationsPtr = mcgen::dumpTCAnnotation(kind) ? irgs.unit.annotationData.get() : nullptr; if (ctxType == TBottom) { traceRefusal(sk, callee, "ctx is TBottom", annotationsPtr); return nullptr; } if (callee->isClosureBody()) { if (!callee->cls()) { ctxType = TNullptr; } else if (callee->hasThisInBody()) { ctxType = TObj; } else { ctxType = TCls; } } else { // Bail out if calling a static methods with an object ctx. if (ctxType.maybe(TObj) && (callee->isStaticInPrologue() || (!sk.hasThis() && isFCallClsMethod(sk.op())))) { traceRefusal(sk, callee, "calling static method with an object", annotationsPtr); return nullptr; } } if (callee->cls()) { if (callee->isStatic() && !ctxType.maybe(TCls)) { traceRefusal(sk, callee, "calling a static method with an instance", annotationsPtr); return nullptr; } if (!callee->isStatic() && !ctxType.maybe(TObj)) { traceRefusal(sk, callee, "calling an instance method without an instance", annotationsPtr); return nullptr; } } FTRACE(2, "selectCalleeRegion: callee = {}\n", callee->fullName()->data()); auto const firstArgPos = static_cast<int32_t>(fca.numInputs()) - 1; std::vector<Type> argTypes; auto const numArgsInclUnpack = fca.numArgs + (fca.hasUnpack() ? 1 : 0); for (int32_t i = 0; i < numArgsInclUnpack; ++i) { // DataTypeGeneric is used because we're just passing the locals into the // callee. It's up to the callee to constrain further if needed. auto type = irgen::publicTopType(irgs, BCSPRelOffset{firstArgPos - i}); assertx(type <= TCell); // If we don't have sufficient type information to inline the region return // early if (type == TBottom) return nullptr; FTRACE(2, "arg {}: {}\n", i + 1, type); argTypes.push_back(type); } if (fca.hasUnpack()) { const int32_t ix = fca.numArgs; auto const ty = irgen::publicTopType(irgs, BCSPRelOffset{firstArgPos - ix}); if (!(ty <= TVec)) { traceRefusal( sk, callee, folly::sformat("unpacked argument has a wrong type ({})", ty.toString()), annotationsPtr ); return nullptr; } } const auto depth = inlineDepth(irgs); if (profData()) { auto region = selectCalleeCFG(sk, callee, ctxType, argTypes, RO::EvalJitMaxRegionInstrs, annotationsPtr); if (region) { if (shouldInline(irgs, sk, callee, *region, adjustedMaxVasmCost(irgs, *region, depth))) { return region; } return nullptr; } // Special case: even if we don't have prof data for this func, if // it takes no arguments and returns a constant, it might be a // trivial function (IE, "return 123;"). Attempt to inline it // anyways using the tracelet selector. if (numArgsInclUnpack > 0) return nullptr; auto const retType = typeFromRAT(callee->repoReturnType(), sk.func()->cls()); // Deliberately using hasConstVal, not admitsSingleVal, since we // don't want TInitNull, etc. if (!retType.hasConstVal()) return nullptr; } auto region = selectCalleeTracelet(callee, ctxType, argTypes, RO::EvalJitMaxRegionInstrs); if (region && shouldInline(irgs, sk, callee, *region, adjustedMaxVasmCost(irgs, *region, depth))) { return region; } return nullptr; } void setBaseInliningProfCount(uint64_t value) { s_baseProfCount.store(value); FTRACE(1, "setBaseInliningProfCount: {}\n", value); } /////////////////////////////////////////////////////////////////////////////// void clearCachedInliningCost() { s_inlCostCache->clear(); } void serializeCachedInliningCost(ProfDataSerializer& ser) { tl_heap.getCheck()->init(); zend_get_bigint_data(); SYNCHRONIZED_CONST(s_inlCostCache) { write_raw(ser, safe_cast<uint32_t>(s_inlCostCache.size())); for (auto const& p : s_inlCostCache) { write_srckey(ser, p.first.entryKey); p.first.ctxType.serialize(ser); write_raw(ser, safe_cast<uint32_t>(p.first.argTypes.size())); for (auto const& arg : p.first.argTypes) arg.serialize(ser); write_raw(ser, safe_cast<uint32_t>(p.second)); } } } void deserializeCachedInliningCost(ProfDataDeserializer& ser) { SYNCHRONIZED(s_inlCostCache) { auto const numEntries = read_raw<uint32_t>(ser); for (uint32_t i = 0; i < numEntries; ++i) { auto srcKey = read_srckey(ser); auto ctxType = Type::deserialize(ser); auto const numArgs = read_raw<uint32_t>(ser); TinyVector<Type, 4> args; for (int64_t j = 0; j < numArgs; j++) { args.emplace_back(Type::deserialize(ser)); } auto const cost = read_raw<uint32_t>(ser); s_inlCostCache.emplace( InlineRegionKey{std::move(srcKey), std::move(ctxType), std::move(args)}, cost ); } } } /////////////////////////////////////////////////////////////////////////////// }