service/method-inliner/Inliner.h

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #pragma once #include <functional> #include <vector> #include "ABExperimentContext.h" #include "CallSiteSummaries.h" #include "PriorityThreadPoolDAGScheduler.h" #include "RefChecker.h" #include "Resolver.h" #include "Shrinker.h" class InlineForSpeed; namespace inliner { struct InlinerConfig; /* * Use the editable CFG instead of IRCode to do the inlining. Return true on * success. Registers starting with next_caller_reg must be available */ bool inline_with_cfg( DexMethod* caller_method, DexMethod* callee_method, IRInstruction* callsite, DexType* needs_receiver_cast, DexType* needs_init_class, size_t next_caller_reg, const std::shared_ptr<cfg::ControlFlowGraph>& reduced_cfg = nullptr); } // namespace inliner /** * What kind of caller-callee relationships the inliner should consider. */ enum MultiMethodInlinerMode { None, InterDex, IntraDex, }; // All call-sites of a callee. struct CallerInsns { // Invoke instructions per caller std::unordered_map<const DexMethod*, std::unordered_set<IRInstruction*>> caller_insns; // Invoke instructions that need a cast std::unordered_map<IRInstruction*, DexType*> inlined_invokes_need_cast; // Whether there may be any other unknown call-sites. bool other_call_sites{false}; bool empty() const { return caller_insns.empty() && !other_call_sites; } }; using CalleeCallerInsns = std::unordered_map<DexMethod*, CallerInsns>; struct Inlinable { DexMethod* callee; // Only used when not using cfg; iterator to invoke instruction to callee IRList::iterator iterator; // Invoke instruction to callee IRInstruction* insn; // Whether the invocation at a particular call-site is guaranteed to not // return normally, and instead of inlining, a throw statement should be // inserted afterwards. bool no_return{false}; // For a specific call-site, reduced cfg template after applying call-site // summary std::shared_ptr<cfg::ControlFlowGraph> reduced_cfg; // Estimated size of callee, possibly reduced by call-site specific knowledge size_t insn_size; }; struct CalleeCallerRefs { bool same_class; size_t classes; }; // The average or call-site specific inlined costs, depending on how it is // retrieved struct InlinedCost { // Full code costs of the original callee size_t full_code; // Average or call-site specific code costs of the callee after pruning float code; // Average or call-site specific method-refs count of the callee after pruning float method_refs; // Average or call-site specific others-refs count of the callee after pruning float other_refs; // Whether all or a specific call-site is guaranteed to not return normally bool no_return; // Average or call-site specific value indicating whether result is used float result_used; // For a specific call-site, reduced cfg template after applying call-site // summary std::shared_ptr<cfg::ControlFlowGraph> reduced_cfg; // Maximum or call-site specific estimated callee size after pruning size_t insn_size; bool operator==(const InlinedCost& other) { // TODO: Also check that reduced_cfg's are equivalent return full_code == other.full_code && code == other.code && method_refs == other.method_refs && other_refs == other.other_refs && no_return == other.no_return && result_used == other.result_used && insn_size == other.insn_size; } }; /** * Helper class to inline a set of candidates. * Take a set of candidates and a scope and walk all instructions in scope * to find and inline all calls to candidate. * A resolver is used to map a method reference to a method definition, and must * be thread-safe. Not all methods may be inlined both for restriction on the * caller or the callee. Perform inlining bottom up. */ class MultiMethodInliner { public: /** * We can do global inlining before InterDexPass, but after InterDexPass, we * can only inline methods within each dex. Set intra_dex to true if * inlining is needed after InterDex. */ MultiMethodInliner( const std::vector<DexClass*>& scope, const init_classes::InitClassesWithSideEffects& init_classes_with_side_effects, DexStoresVector& stores, const std::unordered_set<DexMethod*>& candidates, std::function<DexMethod*(DexMethodRef*, MethodSearch)> concurrent_resolve_fn, const inliner::InlinerConfig& config, int min_sdk, MultiMethodInlinerMode mode = InterDex, const CalleeCallerInsns& true_virtual_callers = {}, InlineForSpeed* inline_for_speed = nullptr, bool analyze_and_prune_inits = false, const std::unordered_set<DexMethodRef*>& configured_pure_methods = {}, const api::AndroidSDK* min_sdk_api = nullptr, bool cross_dex_penalty = false, const std::unordered_set<const DexString*>& configured_finalish_field_names = {}); ~MultiMethodInliner() { delayed_invoke_direct_to_static(); } /** * attempt inlining for all candidates. */ void inline_methods(bool methods_need_deconstruct = true); /** * Return the set of unique inlined methods. */ std::unordered_set<DexMethod*> get_inlined() const { std::unordered_set<DexMethod*> res(m_inlined.begin(), m_inlined.end()); return res; } bool for_speed() const { return m_inline_for_speed != nullptr; } /** * Inline callees in the caller if is_inlinable below returns true. */ void inline_callees(DexMethod* caller, const std::unordered_set<DexMethod*>& callees, bool filter_via_should_inline = false); /** * Inline callees in the given instructions in the caller, if is_inlinable * below returns true. */ size_t inline_callees(DexMethod* caller, const std::unordered_set<IRInstruction*>& insns, std::vector<IRInstruction*>* deleted_insns = nullptr); /** * Return true if the callee is inlinable into the caller. * The predicates below define the constraints for inlining. * Providing an instrucion is optional, and only used for logging. */ bool is_inlinable(const DexMethod* caller, const DexMethod* callee, const IRInstruction* insn, uint64_t estimated_caller_size, uint64_t estimated_callee_size, bool* caller_too_large_ = nullptr); void visibility_changes_apply_and_record_make_static( const VisibilityChanges& visibility_changes); shrinker::Shrinker& get_shrinker() { return m_shrinker; } private: DexType* get_needs_init_class(DexMethod* callee) const; DexMethod* get_callee(DexMethod* caller, IRInstruction* insn); size_t inline_inlinables( DexMethod* caller, const std::vector<Inlinable>& inlinables, std::vector<IRInstruction*>* deleted_insns = nullptr); /** * Return true if the method is related to enum (java.lang.Enum and derived). * Cannot inline enum methods because they can be called by code we do * not own. */ bool is_blocklisted(const DexMethod* callee); bool caller_is_blocklisted(const DexMethod* caller); /** * Return true if the callee contains external catch exception types * which are not public. */ bool has_external_catch(const DexMethod* callee); /** * Return true if the callee contains certain opcodes that are difficult * or impossible to inline. * Some of the opcodes are defined by the methods below. */ bool cannot_inline_opcodes(const DexMethod* caller, const DexMethod* callee, const IRInstruction* invk_insn); /** * Return true if inlining would require a method called from the callee * (candidate) to turn into a virtual method (e.g. private to public). */ bool create_vmethod(IRInstruction* insn, const DexMethod* callee, const DexMethod* caller); /** * Return true if we would create an invocation within an outlined method to * another outlined method. */ bool outlined_invoke_outlined(IRInstruction* insn, const DexMethod* caller); /** * Return true if a callee contains an invoke super to a different method * in the hierarchy. * invoke-super can only exist within the class the call lives in. */ bool nonrelocatable_invoke_super(IRInstruction* insn); /** * Return true if the callee contains a call to an unknown virtual method. * We cannot determine the visibility of the method invoked and thus * we cannot inline as we could cause a verification error if the method * was package/protected and we move the call out of context. */ bool unknown_virtual(IRInstruction* insn); /** * Return true if the callee contains an access to an unknown field. * We cannot determine the visibility of the field accessed and thus * we cannot inline as we could cause a verification error if the field * was package/protected and we move the access out of context. */ bool unknown_field(IRInstruction* insn); /** * return true if `insn` is * sget android.os.Build.VERSION.SDK_INT */ bool check_android_os_version(IRInstruction* insn); /** * Return true if a caller is in a DEX in a store and any opcode in callee * refers to a DexMember in a different store . */ bool cross_store_reference(const DexMethod* caller, const DexMethod* callee); /** * Return true if a caller is in a DEX in a store and any opcode in callee * refers to a problematic ref, i.e. one that directly or indirectly refers to * another store, or a non-min-sdk API. */ bool problematic_refs(const DexMethod* caller, const DexMethod* callee); bool is_estimate_over_max(uint64_t estimated_caller_size, uint64_t estimated_callee_size, uint64_t max); /** * Some versions of ART (5.0.0 - 5.0.2) will fail to verify a method if it * is too large. See https://code.google.com/p/android/issues/detail?id=66655. * * Right now we only check for the number of instructions, but there are * other constraints that might be worth looking into, e.g. the number of * registers. */ bool caller_too_large(DexType* caller_type, uint64_t estimated_caller_size, uint64_t estimated_callee_size); /** * Return whether the callee should be inlined into the caller. This differs * from is_inlinable in that the former is concerned with whether inlining is * possible to do correctly at all, whereas this is concerned with whether the * inlining is beneficial for size / performance. * * This method does *not* need to return a subset of is_inlinable. We will * only inline callsites that pass both should_inline and is_inlinable. * * Note that this filter will only be applied when inlining is initiated via * a call to `inline_methods()`, but not if `inline_callees()` is invoked * directly. */ bool should_inline_always(const DexMethod* callee); /** * Whether it's beneficial to inline the callee at a particular callsite. * no_return may be set to true when the return value is false. * reduced_cfg and insn_size are set when the return value is true. */ bool should_inline_at_call_site( DexMethod* caller, const IRInstruction* invoke_insn, DexMethod* callee, bool* no_return = nullptr, std::shared_ptr<cfg::ControlFlowGraph>* reduced_cfg = nullptr, size_t* insn_size = nullptr); /** * should_inline_fast will return true for a subset of methods compared to * should_inline. should_inline_fast can be evaluated much more quickly, as it * doesn't need to peek into the callee code. */ bool should_inline_fast(const DexMethod* callee); /** * Gets the number of instructions in a callee. */ size_t get_callee_insn_size(const DexMethod* callee); /** * Gets the set of referenced types in a callee. */ std::shared_ptr<std::vector<DexType*>> get_callee_type_refs( const DexMethod* callee); /** * Gets the set of references in a callee's code. */ std::shared_ptr<CodeRefs> get_callee_code_refs(const DexMethod* callee); /** * Computes information about callers of a method. */ CalleeCallerRefs get_callee_caller_refs(const DexMethod* callee); /** * We want to avoid inlining a large method with many callers as that would * bloat the bytecode. */ bool too_many_callers(const DexMethod* callee); // Reduce a cfg with a call-site summary, if given. std::shared_ptr<cfg::ControlFlowGraph> apply_call_site_summary( bool is_static, DexType* declaring_type, DexProto* proto, const cfg::ControlFlowGraph& original_cfg, const CallSiteSummary* call_site_summary); /* * Try to estimate number of code units (2 bytes each) of code. Also take * into account costs arising from control-flow overhead and constant * arguments, if any */ InlinedCost get_inlined_cost( bool is_static, DexType* declaring_type, DexProto* proto, const IRCode* code, const CallSiteSummary* call_site_summary = nullptr); /** * Estimate inlined cost for fully inlining a callee without using any * summaries for pruning. */ const InlinedCost* get_fully_inlined_cost(const DexMethod* callee); /** * Estimate average inlined cost when inlining a callee, considering all * call-site summaries for pruning. */ const InlinedCost* get_average_inlined_cost(const DexMethod* callee); /** * Estimate inlined cost for a particular call-site, if available. */ const InlinedCost* get_call_site_inlined_cost( const IRInstruction* invoke_insn, const DexMethod* callee); /** * Estimate inlined cost for a particular call-site summary, if available. */ const InlinedCost* get_call_site_inlined_cost( const CallSiteSummary* call_site_summary, const DexMethod* callee); /** * Change visibilities of methods, assuming that`m_visibility_changes` is * non-null. */ void delayed_visibility_changes_apply(); /** * Staticize required methods (stored in `m_delayed_make_static`) and update * opcodes accordingly. * * NOTE: It only needs to be called once after inlining. Since it is called * from the destructor, there is no need to manually call it. */ void delayed_invoke_direct_to_static(); /** * Initiate computation of various callee costs asynchronously. */ void compute_callee_costs(DexMethod* method); /** * Post-processing a method synchronously. */ void postprocess_method(DexMethod* method); /** * Shrink a method (run constant-prop, cse, copy-prop, local-dce, * dedup-blocks) synchronously. */ void shrink_method(DexMethod* method); /** * Whether inline_inlinables needs to deconstruct the caller's and callees' * code. */ bool inline_inlinables_need_deconstruct(DexMethod* method); // Checks that... // - there are no assignments to (non-inherited) instance fields before // a constructor call, and // - the constructor refers to a method of the same class, and // - there are no assignments to any final fields. // Under these conditions, a constructor is universally inlinable. bool can_inline_init(const DexMethod* init_method); private: std::unique_ptr<std::vector<std::unique_ptr<RefChecker>>> m_ref_checkers; /** * Resolver function to map a method reference to a method definition. Must be * thread-safe. */ std::function<DexMethod*(DexMethodRef*, MethodSearch)> m_concurrent_resolver; /** * Inlined methods. */ ConcurrentSet<DexMethod*> m_inlined; // // Maps from callee to callers and reverse map from caller to callees. // Those are used to perform bottom up inlining. // MethodToMethodOccurrences callee_caller; MethodToMethodOccurrences caller_callee; std::unordered_map<const DexMethod*, std::unordered_map<IRInstruction*, DexMethod*>> caller_virtual_callee; std::unordered_map<IRInstruction*, DexType*> m_inlined_invokes_need_cast; std::unordered_set<const DexMethod*> m_true_virtual_callees_with_other_call_sites; std::unordered_set<const DexMethod*> m_recursive_callees; std::unordered_set<const DexMethod*> m_speed_excluded_callees; // If mode == IntraDex, then this is the set of callees that is reachable via // an (otherwise ignored) invocation from a caller in a different dex. If mode // != IntraDex, then the set is empty. std::unordered_set<const DexMethod*> m_x_dex_callees; // Cache of the inlined costs of fully inlining a calle without using any // summaries for pruning. mutable ConcurrentMap<const DexMethod*, std::shared_ptr<InlinedCost>> m_fully_inlined_costs; // Cache of the average inlined costs of each method. mutable ConcurrentMap<const DexMethod*, std::shared_ptr<InlinedCost>> m_average_inlined_costs; // Cache of the inlined costs of each call-site summary after pruning. mutable ConcurrentMap<CalleeCallSiteSummary, std::shared_ptr<InlinedCost>, boost::hash<CalleeCallSiteSummary>> m_call_site_inlined_costs; // Cache of the inlined costs of each call-site after pruning. mutable ConcurrentMap<const IRInstruction*, boost::optional<const InlinedCost*>> m_invoke_call_site_inlined_costs; // Priority thread pool to handle parallel processing of methods, either // shrinking initially / after inlining into them, or even to inline in // parallel. By default, parallelism is disabled num_threads = 0). PriorityThreadPoolDAGScheduler<DexMethod*> m_scheduler; // Set of methods that need to be made static eventually. The destructor // of this class will do the necessary delayed work. ConcurrentSet<DexMethod*> m_delayed_make_static; // Accumulated visibility changes that must be applied eventually. // This happens locally within inline_methods. std::unique_ptr<VisibilityChanges> m_delayed_visibility_changes; // When mutating m_delayed_visibility_changes or applying visibility changes // eagerly std::mutex m_visibility_changes_mutex; // Cache for should_inline function ConcurrentMap<const DexMethod*, boost::optional<bool>> m_should_inline; // Optional cache for get_callee_insn_size function std::unique_ptr<ConcurrentMap<const DexMethod*, size_t>> m_callee_insn_sizes; // Optional cache for get_callee_type_refs function std::unique_ptr< ConcurrentMap<const DexMethod*, std::shared_ptr<std::vector<DexType*>>>> m_callee_type_refs; // Optional cache for get_callee_code_refs function std::unique_ptr<ConcurrentMap<const DexMethod*, std::shared_ptr<CodeRefs>>> m_callee_code_refs; // Optional cache for get_callee_caller_res function std::unique_ptr<ConcurrentMap<const DexMethod*, CalleeCallerRefs>> m_callee_caller_refs; // Cache of whether a constructor can be unconditionally inlined. mutable ConcurrentMap<const DexMethod*, boost::optional<bool>> m_can_inline_init; private: std::unique_ptr<inliner::CallSiteSummarizer> m_call_site_summarizer; /** * Info about inlining. */ struct InliningInfo { // statistics that must be incremented sequentially size_t recursive{0}; size_t max_call_stack_depth{0}; size_t waited_seconds{0}; int critical_path_length{0}; // statistics that may be incremented concurrently std::atomic<size_t> kotlin_lambda_inlined{0}; std::atomic<size_t> calls_inlined{0}; std::atomic<size_t> init_classes{0}; std::atomic<size_t> calls_not_inlinable{0}; std::atomic<size_t> calls_not_inlined{0}; std::atomic<size_t> no_returns{0}; std::atomic<size_t> unreachable_insns{0}; std::atomic<size_t> intermediate_shrinkings{0}; std::atomic<size_t> intermediate_remove_unreachable_blocks{0}; std::atomic<size_t> not_found{0}; std::atomic<size_t> blocklisted{0}; std::atomic<size_t> throws{0}; std::atomic<size_t> multi_ret{0}; std::atomic<size_t> need_vmethod{0}; std::atomic<size_t> invoke_super{0}; std::atomic<size_t> escaped_virtual{0}; std::atomic<size_t> known_public_methods{0}; std::atomic<size_t> unresolved_methods{0}; std::atomic<size_t> non_pub_virtual{0}; std::atomic<size_t> escaped_field{0}; std::atomic<size_t> non_pub_field{0}; std::atomic<size_t> non_pub_ctor{0}; std::atomic<size_t> cross_store{0}; std::atomic<size_t> api_level_mismatch{0}; std::atomic<size_t> problematic_refs{0}; std::atomic<size_t> caller_too_large{0}; std::atomic<size_t> constant_invoke_callees_analyzed{0}; std::atomic<size_t> constant_invoke_callees_unused_results{0}; std::atomic<size_t> constant_invoke_callees_no_return{0}; inliner::CallSiteSummaryStats call_site_summary_stats; }; InliningInfo info; const std::vector<DexClass*>& m_scope; const inliner::InlinerConfig& m_config; const MultiMethodInlinerMode m_mode; // Non-const to allow for caching behavior. InlineForSpeed* m_inline_for_speed; // Whether to do some deep analysis to determine if constructor candidates // can be safely inlined, and don't inline them otherwise. bool m_analyze_and_prune_inits; bool m_cross_dex_penalty; shrinker::Shrinker m_shrinker; AccumulatingTimer m_inline_callees_timer; AccumulatingTimer m_inline_callees_should_inline_timer; AccumulatingTimer m_inline_callees_init_timer; AccumulatingTimer m_inline_inlinables_timer; AccumulatingTimer m_inline_with_cfg_timer; AccumulatingTimer m_call_site_inlined_cost_timer; AccumulatingTimer m_cannot_inline_sketchy_code_timer; std::unique_ptr<ab_test::ABExperimentContext> m_ab_experiment_context{ nullptr}; std::mutex ab_exp_mutex; const DexFieldRef* m_sdk_int_field = DexField::get_field("Landroid/os/Build$VERSION;.SDK_INT:I"); public: const InliningInfo& get_info() { return info; } size_t get_callers() { return caller_callee.size(); } size_t get_x_dex_callees() { return m_x_dex_callees.size(); } double get_call_site_inlined_cost_seconds() const { return m_call_site_inlined_cost_timer.get_seconds(); } double get_inline_callees_seconds() const { return m_inline_callees_timer.get_seconds() - m_inline_callees_should_inline_timer.get_seconds() - m_inline_callees_init_timer.get_seconds(); } double get_inline_callees_should_inline_seconds() const { return m_inline_callees_should_inline_timer.get_seconds(); } double get_inline_callees_init_seconds() const { return m_inline_callees_init_timer.get_seconds(); } double get_inline_inlinables_seconds() const { return m_inline_inlinables_timer.get_seconds() - m_inline_with_cfg_timer.get_seconds(); } double get_inline_with_cfg_seconds() const { return m_inline_with_cfg_timer.get_seconds(); } double get_cannot_inline_sketchy_code_timer_seconds() const { return m_cannot_inline_sketchy_code_timer.get_seconds(); } };

service/method-inliner/Inliner.h (302 lines of code) (raw):