opt/methodinline/PerfMethodInlinePass.cpp (1,004 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "PerfMethodInlinePass.h" #include <boost/algorithm/string.hpp> #include <boost/optional.hpp> #include <boost/range/adaptor/filtered.hpp> #include <boost/range/algorithm/transform.hpp> #include <fstream> #include <iostream> #include <limits> #include <queue> #include "ConfigFiles.h" #include "ControlFlow.h" #include "CppUtil.h" #include "DexClass.h" #include "DexUtil.h" #include "IRCode.h" #include "InlineForSpeed.h" #include "Macros.h" #include "MethodInliner.h" #include "MethodProfiles.h" #include "PGIForest.h" #include "RedexContext.h" #include "Resolver.h" #include "Show.h" #include "SourceBlocks.h" #include "Trace.h" namespace { using namespace method_profiles; class InlineForSpeedBase : public InlineForSpeed { public: bool should_inline_generic(const DexMethod* caller_method, const DexMethod* callee_method) final { bool accept = should_inline_impl(caller_method, callee_method); ++m_num_choices; if (accept) { ++m_num_accepted; } return accept; } size_t get_num_choices() const { return m_num_choices; } size_t get_num_accepted() const { return m_num_accepted; } bool should_inline_callsite(const DexMethod* caller_method, const DexMethod* callee_method, const cfg::Block* caller_block) final { bool accept = should_inline_callsite_impl(caller_method, callee_method, caller_block); ++m_num_callsite_choices; if (accept) { ++m_num_callsite_accepted; } return accept; } size_t get_num_callsite_choices() const { return m_num_callsite_choices; } size_t get_num_callsite_accepted() const { return m_num_callsite_accepted; } protected: virtual bool should_inline_impl(const DexMethod* caller_method, const DexMethod* callee_method) = 0; virtual bool should_inline_callsite_impl(const DexMethod* caller_method, const DexMethod* callee_method, const cfg::Block* caller_block) = 0; size_t m_num_choices{0}; size_t m_num_accepted{0}; size_t m_num_callsite_choices{0}; size_t m_num_callsite_accepted{0}; }; class InlineForSpeedMethodProfiles final : public InlineForSpeedBase { public: explicit InlineForSpeedMethodProfiles(const MethodProfiles* method_profiles) : m_method_profiles(method_profiles) { compute_hot_methods(); } protected: bool should_inline_impl(const DexMethod* caller_method, const DexMethod* callee_method) override; bool should_inline_callsite_impl( const DexMethod* caller_method ATTRIBUTE_UNUSED, const DexMethod* callee_method ATTRIBUTE_UNUSED, const cfg::Block* caller_block ATTRIBUTE_UNUSED) final { return true; } private: void compute_hot_methods(); bool should_inline_per_interaction(const DexMethod* caller_method, const DexMethod* callee_method, uint32_t caller_insns, uint32_t callee_insns, const std::string& interaction_id, const StatsMap& method_stats) const; const MethodProfiles* m_method_profiles; std::map<std::string, std::pair<double, double>> m_min_scores; }; constexpr double MIN_APPEAR_PERCENT = 80.0; void InlineForSpeedMethodProfiles::compute_hot_methods() { if (m_method_profiles == nullptr || !m_method_profiles->has_stats()) { return; } for (const auto& pair : m_method_profiles->all_interactions()) { const std::string& interaction_id = pair.first; const auto& method_stats = pair.second; size_t popular_set_size = 0; for (const auto& entry : method_stats) { if (entry.second.appear_percent >= MIN_APPEAR_PERCENT) { ++popular_set_size; } } // Methods in the top PERCENTILE of call counts will be considered warm/hot. constexpr double WARM_PERCENTILE = 0.25; constexpr double HOT_PERCENTILE = 0.1; // Find the lowest score that is within the given percentile constexpr size_t MIN_SIZE = 1; size_t warm_size = std::max( MIN_SIZE, static_cast<size_t>(popular_set_size * WARM_PERCENTILE)); size_t hot_size = std::max( MIN_SIZE, static_cast<size_t>(popular_set_size * HOT_PERCENTILE)); // the "top" of the queue is actually the minimum warm/hot score using pq = std::priority_queue<double, std::vector<double>, std::greater<double>>; pq warm_scores; pq hot_scores; auto maybe_push = [](pq& q, size_t size, double value) { if (q.size() < size) { q.push(value); } else if (value > q.top()) { q.push(value); q.pop(); } }; for (const auto& entry : method_stats) { const auto& stat = entry.second; if (stat.appear_percent >= MIN_APPEAR_PERCENT) { auto score = stat.call_count; maybe_push(warm_scores, warm_size, score); maybe_push(hot_scores, hot_size, score); } } double min_warm_score = std::max(50.0, warm_scores.top()); double min_hot_score = std::max(100.0, hot_scores.top()); TRACE(METH_PROF, 2, "%s min scores = %f, %f", interaction_id.c_str(), min_warm_score, min_hot_score); std::pair<double, double> p(min_warm_score, min_hot_score); m_min_scores.emplace(interaction_id, std::move(p)); } } bool InlineForSpeedMethodProfiles::should_inline_impl( const DexMethod* caller_method, const DexMethod* callee_method) { auto caller_insns = caller_method->get_code()->cfg().num_opcodes(); // The cost of inlining large methods usually outweighs the benefits constexpr uint32_t MAX_NUM_INSNS = 240; if (caller_insns > MAX_NUM_INSNS) { return false; } auto callee_insns = callee_method->get_code()->cfg().num_opcodes(); if (callee_insns > MAX_NUM_INSNS) { return false; } // If the pair is hot under any interaction, inline it. for (const auto& pair : m_method_profiles->all_interactions()) { bool should = should_inline_per_interaction(caller_method, callee_method, caller_insns, callee_insns, pair.first, pair.second); if (should) { return true; } } return false; } bool InlineForSpeedMethodProfiles::should_inline_per_interaction( const DexMethod* caller_method, const DexMethod* callee_method, uint32_t caller_insns, uint32_t callee_insns, const std::string& interaction_id, const StatsMap& method_stats) const { const auto& caller_search = method_stats.find(caller_method); if (caller_search == method_stats.end()) { return false; } const auto& scores = m_min_scores.at(interaction_id); double warm_score = scores.first; double hot_score = scores.second; const auto& caller_stats = caller_search->second; auto caller_hits = caller_stats.call_count; auto caller_appears = caller_stats.appear_percent; if (caller_hits < warm_score || caller_appears < MIN_APPEAR_PERCENT) { return false; } const auto& callee_search = method_stats.find(callee_method); if (callee_search == method_stats.end()) { return false; } const auto& callee_stats = callee_search->second; auto callee_hits = callee_stats.call_count; auto callee_appears = callee_stats.appear_percent; if (callee_hits < warm_score || callee_appears < MIN_APPEAR_PERCENT) { return false; } // Smaller methods tend to benefit more from inlining. Allow warm + small // methods, or hot + medium size methods. constexpr uint32_t SMALL_ENOUGH = 20; bool either_small = caller_insns < SMALL_ENOUGH || callee_insns < SMALL_ENOUGH; bool either_hot = caller_hits >= hot_score || callee_hits >= hot_score; bool result = either_small || either_hot; if (result) { TRACE(METH_PROF, 5, "%s, %s, %s, %u, %u, %f, %f", SHOW(caller_method), SHOW(callee_method), interaction_id.c_str(), caller_insns, callee_insns, caller_hits, callee_hits); } return result; } using namespace random_forest; class InlineForSpeedDecisionTrees final : public InlineForSpeedBase { public: struct DecisionTreesConfig { boost::optional<float> min_method_hits = boost::none; boost::optional<float> min_method_appear = boost::none; boost::optional<float> min_block_hits = boost::none; boost::optional<float> min_block_appear = boost::none; boost::optional<std::vector<size_t>> interaction_indices = boost::none; boost::optional<size_t> exp_force_top_x_entries = boost::none; boost::optional<size_t> exp_force_top_x_entries_min_callee_size = boost::none; boost::optional<float> exp_force_top_x_entries_min_appear100 = boost::none; float accept_threshold{0}; bool accept_over{true}; bool break_chains{false}; }; InlineForSpeedDecisionTrees(const MethodProfiles* method_profiles, PGIForest&& forest, const DecisionTreesConfig& config) : m_method_context_context(method_profiles), m_forest(std::move(forest)), m_config(config) { if (m_config.exp_force_top_x_entries) { fetch_top_entries(method_profiles); } } protected: bool should_inline_impl(const DexMethod* caller_method, const DexMethod* callee_method) override { auto& caller_context = get_or_create(caller_method); auto& callee_context = get_or_create(callee_method); float accepted{0}; auto print_stats = [&](const char* suffix) { for (size_t i = 0; i != m_method_context_context.m_interaction_list.size(); ++i) { auto get_val = [](const auto& vals, size_t i) { if (!vals) { return -1.0f; } if (!vals->hits[i]) { return -1.0f; } return *vals->hits[i]; }; auto caller_val = get_val(caller_context.m_vals, i); auto callee_val = get_val(callee_context.m_vals, i); auto get_appear_val = [](const auto& vals, size_t i) { if (!vals) { return -1.0f; } if (!vals->appear100[i]) { return -1.0f; } return *vals->appear100[i]; }; auto caller_appear_val = get_appear_val(caller_context.m_vals, i); auto callee_appear_val = get_appear_val(callee_context.m_vals, i); TRACE(METH_PROF, 5, "[InlineForSpeedDecisionTrees%s] %.3f: " "%s!%u!%u!%u!%1.5f!%1.5f!%u!%u!%u!%u!%u!%s!%u!%u!%u!%1.5f!%1.5f!%" "u!%u!%u!%u!%u!%s", suffix, accepted, // Caller SHOW(caller_method), caller_context.m_params, caller_context.m_blocks, caller_context.m_edges, caller_val, caller_appear_val, caller_context.m_insns, caller_context.m_opcodes, caller_context.m_regs, caller_context.m_num_loops, caller_context.m_deepest_loop, // Callee SHOW(callee_method), callee_context.m_params, callee_context.m_blocks, callee_context.m_edges, callee_val, callee_appear_val, callee_context.m_insns, callee_context.m_opcodes, callee_context.m_regs, callee_context.m_num_loops, callee_context.m_deepest_loop, m_method_context_context.m_interaction_list[i].c_str()); } }; // While "normal" is more expensive, do it first anyways to fill `accepted`. if (!should_inline_normal(caller_method, callee_method, caller_context, callee_context, accepted) && !should_inline_exp(caller_method, callee_method, caller_context, callee_context)) { if (traceEnabled(METH_PROF, 5)) { print_stats("-not"); } return false; } if (traceEnabled(METH_PROF, 5)) { print_stats(""); } if (m_config.break_chains) { m_inline_calls[caller_method].insert(callee_method); } return true; } private: template <typename Fn> bool test_any_interaction(const Fn& fn) const { if (!m_config.interaction_indices) { for (size_t i = 0; i != m_method_context_context.m_interaction_list.size(); ++i) { if (fn(i)) { return true; } } return false; } for (size_t idx : *m_config.interaction_indices) { if (fn(idx)) { return true; } } return false; } bool should_inline_exp(const DexMethod* caller_method, const DexMethod* callee_method, const MethodContext& caller_context ATTRIBUTE_UNUSED, const MethodContext& callee_context) { if (!m_config.exp_force_top_x_entries) { return false; } if (!test_any_interaction([&](size_t i) { return top_n_entries[i].count(caller_method) != 0 && top_n_entries[i].count(callee_method) != 0; })) { return false; } if (!m_config.exp_force_top_x_entries_min_callee_size) { return true; } if (*m_config.exp_force_top_x_entries_min_callee_size <= callee_context.m_insns) { return true; } return false; } bool should_inline_normal(const DexMethod* caller_method, const DexMethod* callee_method, const MethodContext& caller_context, const MethodContext& callee_context, float& accepted) { auto has_matching = [&](const auto& selector_fn, auto min_hits) { if (!caller_context.m_vals || !callee_context.m_vals) { return false; } return test_any_interaction([&](size_t idx) { const auto& caller_val = selector_fn(*caller_context.m_vals, idx); const auto& callee_val = selector_fn(*callee_context.m_vals, idx); return caller_val && *caller_val >= min_hits && callee_val && *callee_val >= min_hits; }); }; // Explicitly check that the callee seems to ever be called with the caller. if (m_config.min_method_hits) { auto has_matching_hits = has_matching([](const auto& vals, size_t i) { return vals.hits[i]; }, *m_config.min_method_hits); if (!has_matching_hits) { TRACE(METH_PROF, 5, "%s calling %s: no samples together", SHOW(caller_method), SHOW(callee_method)); return false; } } if (m_config.min_method_appear) { auto has_matching_appear = has_matching( [](const auto& vals, size_t i) { return vals.appear100[i]; }, *m_config.min_method_appear); if (!has_matching_appear) { TRACE(METH_PROF, 5, "%s calling %s: no appear together", SHOW(caller_method), SHOW(callee_method)); return false; } } bool default_ret = m_forest.accept(caller_context, callee_context, &accepted); if (m_config.accept_threshold == 0) { return default_ret; } return m_config.accept_over ? accepted >= m_config.accept_threshold : accepted <= m_config.accept_threshold; } protected: bool should_inline_callsite_impl(const DexMethod* caller_method, const DexMethod* callee_method ATTRIBUTE_UNUSED, const cfg::Block* caller_block) final { // This is not really great, but it would mean recomputing the method-level // choice to understand. if (m_config.exp_force_top_x_entries) { return true; } auto compute_res = [&](const auto& threshold, const auto& feature_fn) -> boost::optional<bool> { if (!threshold) { return boost::none; } auto min_hits = *threshold; auto sb_vec = source_blocks::gather_source_blocks(caller_block); if (sb_vec.empty()) { return false; } // Check all interactions. const auto* sb = sb_vec[0]; return test_any_interaction([&](size_t i) { auto val = feature_fn(sb, i); return val && val >= min_hits; }); }; auto inline_hits = compute_res(m_config.min_block_hits, [](const auto* sb, size_t i) { return sb->get_val(i); }); if (inline_hits && !*inline_hits) { return false; } auto inline_appear = compute_res(m_config.min_block_appear, [](const auto* sb, size_t i) { return sb->get_appear100(i); }); if (inline_appear && !*inline_appear) { return false; } if (m_config.break_chains) { { std::unique_lock<std::mutex> lock{m_inline_calls_mutex}; if (!m_inline_calls_culled) { cull_inline_calls_now(); } } if (m_inline_calls.count(caller_method) == 0) { return false; } } return true; } private: const MethodContext& get_or_create(const DexMethod* m) { auto it = m_cache.find(m); if (it != m_cache.end()) { return it->second; } auto insert_it = m_cache.emplace(m, m_method_context_context.create(m)); return insert_it.first->second; } void fetch_top_entries(const MethodProfiles* method_profiles) { const auto& interactions = g_redex->get_sb_interaction_indices(); top_n_entries.resize(interactions.size()); const auto& all = method_profiles->all_interactions(); for (const auto& p : interactions) { auto it = all.find(p.first); redex_assert(it != all.end()); const auto& stats_map = it->second; std::vector<std::pair<const DexMethodRef*, double>> tmp_vec; tmp_vec.reserve(stats_map.size()); boost::transform( stats_map | boost::adaptors::filtered([&](const auto& p) { return !m_config.exp_force_top_x_entries_min_appear100 || p.second.appear_percent >= *m_config.exp_force_top_x_entries_min_appear100; }), std::back_inserter(tmp_vec), [](const auto& p) { return std::make_pair(p.first, p.second.call_count); }); std::sort(tmp_vec.begin(), tmp_vec.end(), [](const auto& lhs, const auto& rhs) { return lhs.second > rhs.second; }); if (tmp_vec.size() > *m_config.exp_force_top_x_entries) { tmp_vec.resize(*m_config.exp_force_top_x_entries); } std::transform(tmp_vec.begin(), tmp_vec.end(), std::inserter(top_n_entries.at(p.second), top_n_entries.at(p.second).begin()), [](const auto& p) { return p.first; }); } } void cull_inline_calls_now() { // This is a simplistic greedy algorithm. We take the highest edge, as given // by the very simple heuristic of callee call-count (then name). We could // probably scale by caller call-count, or appear. auto edge_heuristic = [&](const auto* /*caller*/, const auto* callee) -> float { const auto& vals = m_cache.at(callee).m_vals; if (!vals) { return -1; } const auto& val = vals->hits.at(0); if (!val) { return 0; } return *val; }; auto order = [&](const auto& lhs, const auto& rhs) { auto lhs_h = edge_heuristic(lhs.first, lhs.second); auto rhs_h = edge_heuristic(rhs.first, rhs.second); if (lhs_h != rhs_h) { return lhs_h > rhs_h; } if (lhs.first != rhs.first) { return compare_dexmethods(lhs.first, rhs.first); } return compare_dexmethods(lhs.second, rhs.second); }; using ElemT = std::pair<const DexMethod*, const DexMethod*>; std::priority_queue<ElemT, std::vector<ElemT>, decltype(order)> queue( order); auto filtered_map = m_inline_calls; // Fill the queue with all our edges. for (const auto& p : m_inline_calls) { for (const auto* callee : p.second) { if (p.first != callee) { // No cycles. queue.emplace(p.first, callee); filtered_map[callee]; } else { filtered_map.at(p.first).erase(callee); } } } while (!queue.empty()) { auto top = queue.top(); queue.pop(); auto* caller = top.first; auto* callee = top.second; if (filtered_map.at(caller).count(callee) == 0) { continue; } // Remove all out edges from the callee. filtered_map.at(callee).clear(); } { for (auto it = filtered_map.begin(); it != filtered_map.end();) { if (it->second.empty()) { it = filtered_map.erase(it); } else { ++it; } } } m_inline_calls = std::move(filtered_map); m_inline_calls_culled = true; } MethodContextContext m_method_context_context; std::unordered_map<const DexMethod*, MethodContext> m_cache; PGIForest m_forest; DecisionTreesConfig m_config; std::vector<std::unordered_set<const DexMethodRef*>> top_n_entries; // Collect "yes" decisions based on methods, possibly to break chains later. std::mutex m_inline_calls_mutex; std::unordered_map<const DexMethod*, std::unordered_set<const DexMethod*>> m_inline_calls; bool m_inline_calls_culled{false}; }; class InlineForSpeedCallerList final : public InlineForSpeedBase { public: explicit InlineForSpeedCallerList(const std::vector<std::string>& caller_list, bool by_prefix, const MethodProfiles* method_profiles, float callee_min_hits, float callee_min_appear) : m_caller_methods(gather_methods(caller_list, by_prefix)), m_method_profiles(method_profiles), m_callee_min_hits(callee_min_hits), m_callee_min_appear(callee_min_appear), m_method_context_context(method_profiles) {} protected: bool should_inline_impl(const DexMethod* caller_method, const DexMethod* callee_method) final { if (m_caller_methods.count(caller_method) == 0) { return false; } // If the pair is hot under any interaction, inline it. auto do_inline = [&]() { for (const auto& pair : m_method_profiles->all_interactions()) { auto it = pair.second.find(callee_method); if (it == pair.second.end()) { continue; } if (it->second.call_count >= m_callee_min_hits && it->second.appear_percent >= m_callee_min_appear) { return true; } } return false; }(); if (!do_inline) { return false; } if (traceEnabled(METH_PROF, 5)) { const auto& caller_context = get_or_create(caller_method); const auto& callee_context = get_or_create(callee_method); for (size_t i = 0; i != m_method_context_context.m_interaction_list.size(); ++i) { auto get_val = [](const auto& vals, size_t i) { if (!vals) { return -1.0f; } if (!vals->hits[i]) { return -1.0f; } return *vals->hits[i]; }; auto caller_val = get_val(caller_context.m_vals, i); auto callee_val = get_val(callee_context.m_vals, i); auto get_appear_val = [](const auto& vals, size_t i) { if (!vals) { return -1.0f; } if (!vals->appear100[i]) { return -1.0f; } return *vals->appear100[i]; }; auto caller_appear_val = get_appear_val(caller_context.m_vals, i); auto callee_appear_val = get_appear_val(callee_context.m_vals, i); TRACE(METH_PROF, 5, "[InlineForSpeedDecisionTrees] %zu: " "%s!%u!%u!%u!%1.5f!%1.5f!%u!%u!%u!%u!%u!%s!%u!%u!%u!%1.5f!%1.5f!%" "u!%u!%u!%u!%u!%s", (size_t)0, // Caller SHOW(caller_method), caller_context.m_params, caller_context.m_blocks, caller_context.m_edges, caller_val, caller_appear_val, caller_context.m_insns, caller_context.m_opcodes, caller_context.m_regs, caller_context.m_num_loops, caller_context.m_deepest_loop, // Callee SHOW(callee_method), callee_context.m_params, callee_context.m_blocks, callee_context.m_edges, callee_val, callee_appear_val, callee_context.m_insns, callee_context.m_opcodes, callee_context.m_regs, callee_context.m_num_loops, callee_context.m_deepest_loop, m_method_context_context.m_interaction_list[i].c_str()); } } return true; } bool should_inline_callsite_impl( const DexMethod* caller_method ATTRIBUTE_UNUSED, const DexMethod* callee_method ATTRIBUTE_UNUSED, const cfg::Block* caller_block ATTRIBUTE_UNUSED) final { // TODO: Maybe do this? return true; } private: // Binding late to support methods synthesized before PGI time. static std::unordered_set<const DexMethodRef*> gather_methods( const std::vector<std::string>& caller_list, bool by_prefix) { std::unordered_set<const DexMethodRef*> ret; auto collect = [&](auto fn) { for (const auto& str_mref : caller_list) { auto* mref = fn(str_mref); if (mref != nullptr) { ret.insert(mref); } else { std::cerr << "Warning: Could not find " << str_mref << std::endl; } } }; if (by_prefix) { auto prefix_fn = [](auto& str) -> const DexMethodRef* { // Array class? { auto bracket_pos = str.find('['); if (bracket_pos != std::string::npos) { return nullptr; } } auto pos = str.rfind('.'); if (pos == std::string::npos || pos == 0 || pos == str.size() - 1) { return nullptr; } DexClass* cls; { auto external_class_name = str.substr(0, pos); auto internal_class_name = java_names::external_to_internal(external_class_name); auto type = DexType::get_type(internal_class_name); if (type == nullptr) { return nullptr; } cls = type_class(type); if (cls == nullptr) { return nullptr; } if (cls->is_external()) { return nullptr; } } // OK, seem to have a good class, now look for the method. auto method_name = str.substr(pos + 1); boost::optional<const DexMethod*> found{boost::none}; for (auto* m : cls->get_all_methods()) { if (m->get_name()->str() == method_name) { if (found) { std::cerr << "Ambiguous method " << method_name << std::endl; found = boost::none; break; } found = m; } } if (found) { return *found; } return nullptr; }; collect(prefix_fn); } else { collect([](auto& str) { return DexMethod::get_method(str); }); } return ret; } const MethodContext& get_or_create(const DexMethod* m) { auto it = m_cache.find(m); if (it != m_cache.end()) { return it->second; } auto insert_it = m_cache.emplace(m, m_method_context_context.create(m)); return insert_it.first->second; } const std::unordered_set<const DexMethodRef*> m_caller_methods; const MethodProfiles* m_method_profiles; const float m_callee_min_hits; const float m_callee_min_appear; // For TRACE. MethodContextContext m_method_context_context; std::unordered_map<const DexMethod*, MethodContext> m_cache; }; enum class IFSMode { kMethodProfiles, kForest, kCallerList, }; } // namespace PerfMethodInlinePass::~PerfMethodInlinePass() {} struct PerfMethodInlinePass::Config { boost::optional<random_forest::PGIForest> forest = boost::none; InlineForSpeedDecisionTrees::DecisionTreesConfig dec_trees_config; std::string interactions_str; boost::optional<std::vector<std::string>> caller_list = boost::none; bool caller_list_prefix{false}; float caller_list_callee_min_hits{0}; float caller_list_callee_min_appear{0}; IFSMode ifs{IFSMode::kMethodProfiles}; boost::optional<std::vector<size_t>> get_interactions( const RedexContext& ctx) { if (interactions_str.empty()) { return boost::none; } const auto& map = ctx.get_sb_interaction_indices(); std::vector<std::string> split; boost::split(split, interactions_str, boost::is_any_of(",")); std::vector<size_t> indices; indices.reserve(split.size()); for (const auto& str : split) { auto it = map.find(str); always_assert_log(it != map.end(), "%s not found!", str.c_str()); indices.push_back(it->second); } return indices; } }; void PerfMethodInlinePass::bind_config() { std::string random_forest_file; bind("random_forest_file", "", random_forest_file); float accept_threshold; bind("accept_threshold", 0, accept_threshold, "Threshold of trees to accept an inlining decision. 0 uses default " "(half)."); bool accept_over; bind("accept_over", true, accept_over, "Comparison is accept-value >= threshold when true and accept-value <= " "threshold otherwise"); float min_hits; bind("min_hits", std::numeric_limits<float>::min(), min_hits, "Threshold for caller and callee method call-count to consider " "inlining. A negative value elides the check."); float min_appear; bind("min_appear", 1.0f, min_appear, "Threshold for caller and callee method appear100 to consider " "inlining. A negative value elides the check."); float min_block_hits; bind("min_block_hits", -1.0f, min_block_hits, "Threshold for caller source-block value to consider inlining. A " "negative value elides the check."); float min_block_appear; bind("min_block_appear", -1.0f, min_block_appear, "Threshold for caller source-block appear100 to consider inlining. A " "negative value elides the check."); bool break_chains; bind("break_chains", true, break_chains); std::string interactions_str; bind("interactions", "", interactions_str, "Comma-separated list of interactions to use. An empty value uses all " "interactions."); size_t exp_force_top_x_entries; bind("exp_force_top_x_entries", 0, exp_force_top_x_entries, "For experiments: If greater than zero, always accept caller/callee " "pairs that are in the top N of the profile."); size_t exp_force_top_x_entries_min_callee_size; bind("exp_force_top_x_entries_min_callee_size", 0, exp_force_top_x_entries_min_callee_size, "For experiments: If greater than zero, restrict always-accept " "caller/callee pairs from exp_force_top_x_entries to callees of at " "least the given size (in instructions)"); float exp_force_top_x_entries_min_appear100; bind("exp_force_top_x_entries_min_appear100", -1.0f, exp_force_top_x_entries_min_appear100, "For experiments: If non-negative, restrict always-accept caller/callee " "pairs from exp_force_top_x_entries to callers and callees that appear " "at least this amount."); std::string caller_list_file; bind("caller_list_file", "", caller_list_file); bool caller_list_prefix; bind("caller_list_prefix", false, caller_list_prefix); float caller_list_callee_min_hits; bind("caller_list_callee_min_hits", 1.0f, caller_list_callee_min_hits); float caller_list_callee_min_appear; bind("caller_list_callee_min_appear", 1.0f, caller_list_callee_min_appear); std::string which_ifs; bind("decision_mode", "", which_ifs); after_configuration([this, random_forest_file, accept_threshold, accept_over, min_hits, min_appear, min_block_hits, min_block_appear, interactions_str, exp_force_top_x_entries, exp_force_top_x_entries_min_callee_size, exp_force_top_x_entries_min_appear100, caller_list_file, caller_list_prefix, caller_list_callee_min_hits, caller_list_callee_min_appear, which_ifs, break_chains]() { this->m_config = std::make_unique<PerfMethodInlinePass::Config>(); if (!random_forest_file.empty()) { std::stringstream buffer; { std::ifstream ifs(random_forest_file); always_assert(ifs); buffer << ifs.rdbuf(); always_assert(!ifs.fail()); } // For simplicity, accept an empty file. if (!buffer.str().empty()) { this->m_config->forest = random_forest::PGIForest::deserialize( buffer.str(), random_forest::get_default_feature_function_map()); TRACE(METH_PROF, 1, "Loaded a forest with %zu decision trees.", this->m_config->forest->size()); } } auto assign_opt = [](float v) -> boost::optional<float> { return v < 0 ? boost::none : boost::optional<float>(v); }; auto& dec_trees_config = this->m_config->dec_trees_config; dec_trees_config.accept_threshold = accept_threshold; dec_trees_config.accept_over = accept_over; dec_trees_config.min_method_hits = assign_opt(min_hits); dec_trees_config.min_method_appear = assign_opt(min_appear); dec_trees_config.min_block_hits = assign_opt(min_block_hits); dec_trees_config.min_block_appear = assign_opt(min_block_appear); dec_trees_config.break_chains = break_chains; this->m_config->interactions_str = interactions_str; auto assign_opt_size_t = [](size_t v) -> boost::optional<size_t> { return v == 0 ? boost::none : boost::optional<size_t>(v); }; dec_trees_config.exp_force_top_x_entries = assign_opt_size_t(exp_force_top_x_entries); dec_trees_config.exp_force_top_x_entries_min_callee_size = assign_opt_size_t(exp_force_top_x_entries_min_callee_size); dec_trees_config.exp_force_top_x_entries_min_appear100 = assign_opt(exp_force_top_x_entries_min_appear100); if (!caller_list_file.empty()) { std::ifstream ifs(caller_list_file); always_assert(ifs); std::string tmp; std::vector<std::string> str_vec; while (std::getline(ifs, tmp)) { str_vec.emplace_back(std::move(tmp)); } always_assert(!ifs.bad()); this->m_config->caller_list = std::move(str_vec); } this->m_config->caller_list_callee_min_hits = caller_list_callee_min_hits; this->m_config->caller_list_callee_min_appear = caller_list_callee_min_appear; this->m_config->caller_list_prefix = caller_list_prefix; if (!which_ifs.empty()) { if (which_ifs == "caller-list") { redex_assert(this->m_config->caller_list); this->m_config->ifs = IFSMode::kCallerList; } else if (which_ifs == "forest") { redex_assert(this->m_config->forest); this->m_config->ifs = IFSMode::kForest; } else { redex_assert(which_ifs == "method-profiles"); this->m_config->ifs = IFSMode::kMethodProfiles; } } else { // Prefer forest. if (this->m_config->forest) { this->m_config->ifs = IFSMode::kForest; } else if (this->m_config->caller_list) { this->m_config->ifs = IFSMode::kCallerList; } else { this->m_config->ifs = IFSMode::kMethodProfiles; } } }); } void PerfMethodInlinePass::run_pass(DexStoresVector& stores, ConfigFiles& conf, PassManager& mgr) { if (mgr.get_redex_options().instrument_pass_enabled) { TRACE(METH_PROF, 1, "Skipping PerfMethodInlinePass because Instrumentation is enabled"); return; } redex_assert(m_config); const auto& method_profiles = conf.get_method_profiles(); if (!method_profiles.has_stats()) { // PerfMethodInline is enabled, but there are no profiles available. Bail, // don't run a regular inline pass. TRACE(METH_PROF, 1, "No profiling data available"); return; } // Unique pointer for indirection and single path. std::unique_ptr<InlineForSpeedBase> ifs{[&]() -> InlineForSpeedBase* { switch (m_config->ifs) { case IFSMode::kForest: { redex_assert(m_config->forest); if (m_config->dec_trees_config.exp_force_top_x_entries) { mgr.set_metric("exp_force_top_x_entries", *m_config->dec_trees_config.exp_force_top_x_entries); if (m_config->dec_trees_config .exp_force_top_x_entries_min_callee_size) { mgr.set_metric("exp_force_top_x_entries_min_callee_size", *m_config->dec_trees_config .exp_force_top_x_entries_min_callee_size); } } m_config->dec_trees_config.interaction_indices = m_config->get_interactions(*g_redex); return new InlineForSpeedDecisionTrees(&method_profiles, m_config->forest->clone(), m_config->dec_trees_config); } case IFSMode::kCallerList: { redex_assert(m_config->caller_list); mgr.set_metric("caller_list_size", m_config->caller_list->size()); mgr.set_metric("caller_list_callee_min_hits_100", m_config->caller_list_callee_min_hits * 100); mgr.set_metric("caller_list_callee_min_appear_100", m_config->caller_list_callee_min_appear * 100); return new InlineForSpeedCallerList( *m_config->caller_list, m_config->caller_list_prefix, &method_profiles, m_config->caller_list_callee_min_hits, m_config->caller_list_callee_min_appear); } case IFSMode::kMethodProfiles: return new InlineForSpeedMethodProfiles(&method_profiles); } not_reached(); }()}; inliner::run_inliner(stores, mgr, conf, /* intra_dex */ true, /* inline_for_speed= */ ifs.get()); TRACE(METH_PROF, 1, "Accepted %zu out of %zu choices.", ifs->get_num_accepted(), ifs->get_num_choices()); mgr.set_metric("pgi_inline_choices", ifs->get_num_choices()); mgr.set_metric("pgi_inline_choices_accepted", ifs->get_num_accepted()); mgr.set_metric("pgi_inline_callsite_choices", ifs->get_num_callsite_choices()); mgr.set_metric("pgi_inline_callsite_choices_accepted", ifs->get_num_callsite_accepted()); mgr.set_metric("pgi_use_random_forest", m_config->forest ? 1 : 0); mgr.set_metric("pgi_use_caller_list", m_config->forest ? 0 : m_config->caller_list ? 1 : 0); if (m_config->forest) { auto opt = m_config->get_interactions(*g_redex); mgr.set_metric("pgi_interactions", opt ? opt->size() : g_redex->get_sb_interaction_indices().size()); } } static PerfMethodInlinePass s_pass;