service/method-inliner/Inliner.cpp (1,790 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "Inliner.h"
#include <cstdint>
#include <utility>
#include "ApiLevelChecker.h"
#include "CFGInliner.h"
#include "ConstantPropagationAnalysis.h"
#include "ConstantPropagationWholeProgramState.h"
#include "ConstructorAnalysis.h"
#include "DexInstruction.h"
#include "EditableCfgAdapter.h"
#include "GraphUtil.h"
#include "InlineForSpeed.h"
#include "InlinerConfig.h"
#include "LocalDce.h"
#include "LoopInfo.h"
#include "Macros.h"
#include "MethodProfiles.h"
#include "MonitorCount.h"
#include "Mutators.h"
#include "OptData.h"
#include "OutlinedMethods.h"
#include "RecursionPruner.h"
#include "StlUtil.h"
#include "Timer.h"
#include "UnknownVirtuals.h"
#include "Walkers.h"
#include "WorkQueue.h"
using namespace opt_metadata;
using namespace outliner;
namespace {
// The following costs are in terms of code-units (2 bytes).
// Typical overhead of calling a method, without move-result overhead.
const float COST_INVOKE = 3.7f;
// Typical overhead of having move-result instruction.
const float COST_MOVE_RESULT = 3.0f;
// Overhead of having a method and its metadata.
const size_t COST_METHOD = 16;
// When to consider running constant-propagation to better estimate inlined
// cost. It just takes too much time to run the analysis for large methods.
const size_t MAX_COST_FOR_CONSTANT_PROPAGATION = 1000;
/*
* This is the maximum size of method that Dex bytecode can encode.
* The table of instructions is indexed by a 32 bit unsigned integer.
*/
constexpr uint64_t HARD_MAX_INSTRUCTION_SIZE = UINT64_C(1) << 32;
} // namespace
MultiMethodInliner::MultiMethodInliner(
const std::vector<DexClass*>& scope,
const init_classes::InitClassesWithSideEffects&
init_classes_with_side_effects,
DexStoresVector& stores,
const std::unordered_set<DexMethod*>& candidates,
std::function<DexMethod*(DexMethodRef*, MethodSearch)>
concurrent_resolve_fn,
const inliner::InlinerConfig& config,
int min_sdk,
MultiMethodInlinerMode mode /* default is InterDex */,
const CalleeCallerInsns& true_virtual_callers,
InlineForSpeed* inline_for_speed,
bool analyze_and_prune_inits,
const std::unordered_set<DexMethodRef*>& configured_pure_methods,
const api::AndroidSDK* min_sdk_api,
bool cross_dex_penalty,
const std::unordered_set<const DexString*>& configured_finalish_field_names)
: m_concurrent_resolver(std::move(concurrent_resolve_fn)),
m_scheduler(
[this](DexMethod* method) {
auto it = caller_callee.find(method);
if (it != caller_callee.end()) {
always_assert(!inline_inlinables_need_deconstruct(method));
std::unordered_set<DexMethod*> callees;
for (auto& p : it->second) {
callees.insert(p.first);
}
inline_callees(method, callees,
/* filter_via_should_inline */ true);
// We schedule the post-processing, to allow other unlocked
// higher-priority tasks take precedence
m_scheduler.augment(
method, [this, method] { postprocess_method(method); });
} else {
postprocess_method(method);
}
},
0),
m_scope(scope),
m_config(config),
m_mode(mode),
m_inline_for_speed(inline_for_speed),
m_analyze_and_prune_inits(analyze_and_prune_inits),
m_cross_dex_penalty(cross_dex_penalty),
m_shrinker(stores,
scope,
init_classes_with_side_effects,
config.shrinker,
min_sdk,
configured_pure_methods,
configured_finalish_field_names) {
Timer t("MultiMethodInliner construction");
for (const auto& callee_callers : true_virtual_callers) {
auto callee = callee_callers.first;
if (callee_callers.second.other_call_sites) {
m_true_virtual_callees_with_other_call_sites.insert(callee);
}
for (const auto& caller_insns : callee_callers.second.caller_insns) {
for (auto insn : caller_insns.second) {
caller_virtual_callee[caller_insns.first][insn] = callee;
}
auto& iinc = callee_callers.second.inlined_invokes_need_cast;
m_inlined_invokes_need_cast.insert(iinc.begin(), iinc.end());
}
}
// Walk every opcode in scope looking for calls to inlinable candidates and
// build a map of callers to callees and the reverse callees to callers. If
// mode != IntraDex, we build the map for all the candidates. If mode ==
// IntraDex, we properly exclude invocations where the caller is located in
// another dex from the callee, and remember all such x-dex callees.
ConcurrentSet<DexMethod*> concurrent_x_dex_callees;
ConcurrentMap<const DexMethod*, std::unordered_map<DexMethod*, size_t>>
concurrent_callee_caller;
ConcurrentMap<DexMethod*, std::unordered_map<DexMethod*, size_t>>
concurrent_caller_callee;
std::unique_ptr<XDexRefs> x_dex;
if (mode == IntraDex) {
x_dex = std::make_unique<XDexRefs>(stores);
}
if (min_sdk_api) {
const auto& xstores = m_shrinker.get_xstores();
m_ref_checkers =
std::make_unique<std::vector<std::unique_ptr<RefChecker>>>();
for (size_t store_idx = 0; store_idx < xstores.size(); store_idx++) {
m_ref_checkers->emplace_back(
std::make_unique<RefChecker>(&xstores, store_idx, min_sdk_api));
}
}
walk::parallel::opcodes(
scope, [](DexMethod*) { return true; },
[&](DexMethod* caller, IRInstruction* insn) {
if (!opcode::is_an_invoke(insn->opcode())) {
return;
}
auto callee =
m_concurrent_resolver(insn->get_method(), opcode_to_search(insn));
if (callee == nullptr || !callee->is_concrete() ||
!candidates.count(callee) || true_virtual_callers.count(callee)) {
return;
}
if (x_dex && x_dex->cross_dex_ref(caller, callee)) {
concurrent_x_dex_callees.insert(callee);
return;
}
concurrent_callee_caller.update(
callee,
[caller](const DexMethod*,
std::unordered_map<DexMethod*, size_t>& v,
bool) { ++v[caller]; });
concurrent_caller_callee.update(
caller,
[callee](const DexMethod*,
std::unordered_map<DexMethod*, size_t>& v,
bool) { ++v[callee]; });
});
m_x_dex_callees.insert(concurrent_x_dex_callees.begin(),
concurrent_x_dex_callees.end());
for (auto& p : concurrent_callee_caller) {
callee_caller.insert(std::move(p));
}
for (auto& p : concurrent_caller_callee) {
caller_callee.insert(std::move(p));
}
for (const auto& callee_callers : true_virtual_callers) {
auto callee = callee_callers.first;
for (const auto& caller_insns : callee_callers.second.caller_insns) {
auto caller = const_cast<DexMethod*>(caller_insns.first);
if (x_dex && x_dex->cross_dex_ref(caller, callee)) {
m_x_dex_callees.insert(callee);
continue;
}
++callee_caller[callee][caller];
++caller_callee[caller][callee];
}
}
}
void MultiMethodInliner::inline_methods(bool methods_need_deconstruct) {
std::unordered_set<IRCode*> need_deconstruct;
if (methods_need_deconstruct) {
for (auto& p : caller_callee) {
need_deconstruct.insert(const_cast<IRCode*>(p.first->get_code()));
}
for (auto& p : callee_caller) {
need_deconstruct.insert(const_cast<IRCode*>(p.first->get_code()));
}
for (auto it = need_deconstruct.begin(); it != need_deconstruct.end();) {
if ((*it)->editable_cfg_built()) {
it = need_deconstruct.erase(it);
} else {
it++;
}
}
if (!need_deconstruct.empty()) {
workqueue_run<IRCode*>(
[](IRCode* code) { code->build_cfg(/* editable */ true); },
need_deconstruct);
}
}
m_ab_experiment_context = ab_test::ABExperimentContext::create("pgi_v1");
// Inlining and shrinking initiated from within this method will be done
// in parallel.
m_scheduler.get_thread_pool().set_num_threads(
m_config.debug ? 1 : redex_parallel::default_num_threads());
// The order in which we inline is such that once a callee is considered to
// be inlined, it's code will no longer change. So we can cache...
// - its size
// - its set of type refs
// - its set of method refs
// - whether all callers are in the same class, and are called from how many
// classes
m_callee_insn_sizes =
std::make_unique<ConcurrentMap<const DexMethod*, size_t>>();
m_callee_type_refs =
std::make_unique<ConcurrentMap<const DexMethod*,
std::shared_ptr<std::vector<DexType*>>>>();
if (m_ref_checkers) {
m_callee_code_refs = std::make_unique<
ConcurrentMap<const DexMethod*, std::shared_ptr<CodeRefs>>>();
}
m_callee_caller_refs =
std::make_unique<ConcurrentMap<const DexMethod*, CalleeCallerRefs>>();
// Instead of changing visibility as we inline, blocking other work on the
// critical path, we do it all in parallel at the end.
m_delayed_visibility_changes = std::make_unique<VisibilityChanges>();
// we want to inline bottom up, so as a first step, for all callers, we
// recurse into all inlinable callees until we hit a leaf and we start
// inlining from there. First, we just gather data on
// caller/non-recursive-callees pairs for each stack depth.
{
auto exclude_fn = [this](DexMethod* caller, DexMethod* callee) {
return for_speed() &&
!m_inline_for_speed->should_inline_generic(caller, callee);
};
inliner::RecursionPruner recursion_pruner(callee_caller, caller_callee,
std::move(exclude_fn));
recursion_pruner.run();
info.recursive = recursion_pruner.recursive_call_sites();
info.max_call_stack_depth = recursion_pruner.max_call_stack_depth();
m_recursive_callees = std::move(recursion_pruner.recursive_callees());
m_speed_excluded_callees = std::move(recursion_pruner.excluded_callees());
}
if (m_config.use_call_site_summaries) {
m_call_site_summarizer = std::make_unique<inliner::CallSiteSummarizer>(
m_shrinker, callee_caller, caller_callee,
[this](DexMethod* caller, IRInstruction* insn) -> DexMethod* {
return this->get_callee(caller, insn);
},
[this](DexMethod* callee) -> bool {
return m_recursive_callees.count(callee) || root(callee) ||
m_x_dex_callees.count(callee) || !can_rename(callee) ||
m_true_virtual_callees_with_other_call_sites.count(callee) ||
m_speed_excluded_callees.count(callee);
},
/* filter_fn */ nullptr, &info.call_site_summary_stats);
m_call_site_summarizer->summarize();
}
// Second, compute caller priorities --- the callers get a priority assigned
// that reflects how many other callers will be waiting for them.
std::unordered_set<DexMethod*> methods_to_schedule;
for (auto& p : caller_callee) {
auto caller = p.first;
for (auto& q : p.second) {
auto callee = q.first;
m_scheduler.add_dependency(const_cast<DexMethod*>(caller), callee);
}
}
// Third, schedule and run tasks for all selected methods.
if (m_shrinker.enabled() && m_config.shrink_other_methods) {
walk::code(m_scope, [&](DexMethod* method, IRCode& code) {
methods_to_schedule.insert(method);
});
} else {
for (auto& p : caller_callee) {
methods_to_schedule.insert(const_cast<DexMethod*>(p.first));
}
for (auto& p : callee_caller) {
methods_to_schedule.insert(const_cast<DexMethod*>(p.first));
}
}
info.critical_path_length =
m_scheduler.run(methods_to_schedule.begin(), methods_to_schedule.end());
m_ab_experiment_context->flush();
m_ab_experiment_context = nullptr;
delayed_visibility_changes_apply();
delayed_invoke_direct_to_static();
info.waited_seconds = m_scheduler.get_thread_pool().get_waited_seconds();
if (!need_deconstruct.empty()) {
workqueue_run<IRCode*>([](IRCode* code) { code->clear_cfg(); },
need_deconstruct);
}
}
DexMethod* MultiMethodInliner::get_callee(DexMethod* caller,
IRInstruction* insn) {
if (!opcode::is_an_invoke(insn->opcode())) {
return nullptr;
}
auto callee =
m_concurrent_resolver(insn->get_method(), opcode_to_search(insn));
auto it = caller_virtual_callee.find(caller);
if (it == caller_virtual_callee.end()) {
return callee;
}
auto it2 = it->second.find(insn);
if (it2 == it->second.end()) {
return callee;
}
return it2->second;
}
void MultiMethodInliner::inline_callees(
DexMethod* caller,
const std::unordered_set<DexMethod*>& callees,
bool filter_via_should_inline) {
TraceContext context{caller};
std::vector<Inlinable> inlinables;
{
auto timer = m_inline_callees_timer.scope();
// walk the caller opcodes collecting all candidates to inline
// Build a callee to opcode map
editable_cfg_adapter::iterate_with_iterator(
caller->get_code(), [&](const IRList::iterator& it) {
auto insn = it->insn;
auto callee = get_callee(caller, insn);
if (!callee || !callees.count(callee)) {
return editable_cfg_adapter::LOOP_CONTINUE;
}
std::shared_ptr<cfg::ControlFlowGraph> reduced_cfg;
bool no_return{false};
size_t insn_size{0};
if (filter_via_should_inline) {
auto timer2 = m_inline_callees_should_inline_timer.scope();
// Cost model is based on fully inlining callee everywhere; let's
// see if we can get more detailed call-site specific information
if (should_inline_at_call_site(caller, insn, callee, &no_return,
&reduced_cfg, &insn_size)) {
always_assert(!no_return);
// Yes, we know might have dead_blocks and a refined insn_size
} else if (should_inline_always(callee)) {
// We'll fully inline the callee without any adjustments
no_return = false;
insn_size = get_callee_insn_size(callee);
} else if (no_return) {
always_assert(insn_size == 0);
always_assert(!reduced_cfg);
} else {
return editable_cfg_adapter::LOOP_CONTINUE;
}
} else {
insn_size = get_callee_insn_size(callee);
}
always_assert(callee->is_concrete());
if (m_analyze_and_prune_inits && method::is_init(callee) &&
!no_return) {
auto timer2 = m_inline_callees_init_timer.scope();
if (!callee->get_code()->editable_cfg_built()) {
return editable_cfg_adapter::LOOP_CONTINUE;
}
if (!can_inline_init(callee)) {
if (!method::is_init(caller) ||
caller->get_class() != callee->get_class() ||
!caller->get_code()->editable_cfg_built() ||
!constructor_analysis::can_inline_inits_in_same_class(
caller, callee, insn)) {
return editable_cfg_adapter::LOOP_CONTINUE;
}
}
}
inlinables.push_back((Inlinable){callee, it, insn, no_return,
std::move(reduced_cfg), insn_size});
return editable_cfg_adapter::LOOP_CONTINUE;
});
}
if (!inlinables.empty()) {
inline_inlinables(caller, inlinables);
}
}
size_t MultiMethodInliner::inline_callees(
DexMethod* caller,
const std::unordered_set<IRInstruction*>& insns,
std::vector<IRInstruction*>* deleted_insns) {
TraceContext context{caller};
std::vector<Inlinable> inlinables;
editable_cfg_adapter::iterate_with_iterator(
caller->get_code(), [&](const IRList::iterator& it) {
auto insn = it->insn;
if (insns.count(insn)) {
auto callee = get_callee(caller, insn);
if (callee == nullptr) {
return editable_cfg_adapter::LOOP_CONTINUE;
}
always_assert(callee->is_concrete());
inlinables.push_back((Inlinable){callee, it, insn, false, nullptr,
get_callee_insn_size(callee)});
}
return editable_cfg_adapter::LOOP_CONTINUE;
});
return inline_inlinables(caller, inlinables, deleted_insns);
}
bool MultiMethodInliner::inline_inlinables_need_deconstruct(DexMethod* method) {
// The mixed CFG, IRCode is used by Switch Inline (only?) where the caller is
// an IRCode and the callee is a CFG.
return !method->get_code()->editable_cfg_built();
}
namespace {
// Helper method, as computing inline for a trace could be too expensive.
std::string create_inlining_trace_msg(const DexMethod* caller,
const DexMethod* callee,
IRInstruction* invoke_insn) {
std::ostringstream oss;
oss << "inline " << show(callee) << " into " << show(caller) << " ";
auto features = [&oss](const DexMethod* m, IRInstruction* insn) {
auto code = m->get_code();
auto regs = code->cfg_built() ? code->cfg().get_registers_size()
: code->get_registers_size();
auto opcodes = code->count_opcodes();
auto blocks = code->cfg_built() ? code->cfg().num_blocks() : (size_t)0;
auto edges = code->cfg_built() ? code->cfg().num_edges() : (size_t)0;
oss << regs << "!" << opcodes << "!" << blocks << "!" << edges;
// Expensive...
if (code->cfg_built()) {
loop_impl::LoopInfo info(code->cfg());
oss << "!" << info.num_loops();
size_t max_depth{0};
for (auto* loop : info) {
max_depth = std::max(max_depth, (size_t)loop->get_loop_depth());
}
oss << "!" << max_depth;
if (insn != nullptr) {
auto it = code->cfg().find_insn(insn);
loop_impl::Loop* loop{nullptr};
if (!it.is_end()) {
loop = info.get_loop_for(it.block());
}
if (loop != nullptr) {
oss << "!" << loop->get_loop_depth();
} else {
oss << "!" << 0;
}
} else {
oss << "!" << 0;
}
} else {
oss << "!0!0!0";
}
};
features(caller, invoke_insn);
oss << "!";
features(callee, nullptr);
return oss.str();
}
} // namespace
DexType* MultiMethodInliner::get_needs_init_class(DexMethod* callee) const {
if (!is_static(callee) || assumenosideeffects(callee)) {
return nullptr;
}
auto insn =
m_shrinker.get_init_classes_with_side_effects().create_init_class_insn(
callee->get_class());
if (insn == nullptr) {
return nullptr;
}
auto type = const_cast<DexType*>(insn->get_type());
delete insn;
return type;
}
size_t MultiMethodInliner::inline_inlinables(
DexMethod* caller_method,
const std::vector<Inlinable>& inlinables,
std::vector<IRInstruction*>* deleted_insns) {
auto timer = m_inline_inlinables_timer.scope();
if (for_speed() && m_ab_experiment_context->use_control()) {
return 0;
}
auto caller = caller_method->get_code();
std::unordered_set<IRCode*> need_deconstruct;
if (inline_inlinables_need_deconstruct(caller_method)) {
need_deconstruct.reserve(1 + inlinables.size());
need_deconstruct.insert(caller);
for (const auto& inlinable : inlinables) {
need_deconstruct.insert(inlinable.callee->get_code());
}
for (auto code : need_deconstruct) {
always_assert(!code->editable_cfg_built());
code->build_cfg(/* editable */ true);
// if (deleted_insns != nullptr) {
// code->cfg().set_removed_insn_ownerhsip(false);
// }
}
}
// attempt to inline all inlinable candidates
size_t estimated_caller_size = caller->editable_cfg_built()
? caller->cfg().sum_opcode_sizes()
: caller->sum_opcode_sizes();
// Prefer inlining smaller methods first, so that we are less likely to hit
// overall size limit.
std::vector<Inlinable> ordered_inlinables(inlinables.begin(),
inlinables.end());
std::stable_sort(ordered_inlinables.begin(),
ordered_inlinables.end(),
[&](const Inlinable& a, const Inlinable& b) {
// First, prefer no-return inlinable, as they cut off
// control-flow and thus other inlinables.
if (a.no_return != b.no_return) {
return a.no_return > b.no_return;
}
// Second, prefer smaller methods, to avoid hitting size
// limits too soon
return a.insn_size < b.insn_size;
});
std::vector<DexMethod*> inlined_callees;
boost::optional<reg_t> cfg_next_caller_reg;
if (!m_config.unique_inlined_registers) {
cfg_next_caller_reg = caller->cfg().get_registers_size();
}
size_t calls_not_inlinable{0}, calls_not_inlined{0}, no_returns{0},
unreachable_insns{0}, caller_too_large{0};
size_t intermediate_shrinkings{0};
size_t intermediate_remove_unreachable_blocks{0};
// We only try intermediate remove-unreachable-blocks or shrinking when using
// the cfg-inliner, as it will invalidate irlist iterators, which are used
// with the legacy non-cfg-inliner.
size_t last_intermediate_inlined_callees{0};
// Once blocks might have been freed, which can happen via
// remove_unreachable_blocks and shrinking, callsite pointers are no longer
// valid.
std::unique_ptr<std::unordered_set<const IRInstruction*>> remaining_callsites;
auto recompute_remaining_callsites = [caller, &remaining_callsites,
&ordered_inlinables]() {
if (!remaining_callsites) {
remaining_callsites =
std::make_unique<std::unordered_set<const IRInstruction*>>();
for (const auto& inlinable : ordered_inlinables) {
remaining_callsites->insert(inlinable.insn);
}
}
std::unordered_set<const IRInstruction*> new_remaining_callsites;
for (auto& mie : InstructionIterable(caller->cfg())) {
if (mie.insn->has_method() && remaining_callsites->count(mie.insn)) {
new_remaining_callsites.insert(mie.insn);
}
}
always_assert(new_remaining_callsites.size() <=
remaining_callsites->size());
*remaining_callsites = std::move(new_remaining_callsites);
};
VisibilityChanges visibility_changes;
std::unordered_set<DexMethod*> visibility_changes_for;
size_t init_classes = 0;
for (const auto& inlinable : ordered_inlinables) {
auto callee_method = inlinable.callee;
auto callee = callee_method->get_code();
auto callsite_insn = inlinable.insn;
if (remaining_callsites && !remaining_callsites->count(callsite_insn)) {
if (!inlinable.no_return) {
calls_not_inlined++;
}
continue;
}
if (inlinable.no_return) {
if (!m_config.throw_after_no_return) {
continue;
}
// we are not actually inlining, but just cutting off control-flow
// afterwards, inserting an (unreachable) "throw null" instruction
// sequence.
auto& caller_cfg = caller->cfg();
auto callsite_it = caller_cfg.find_insn(callsite_insn);
if (!callsite_it.is_end()) {
if (m_config.unique_inlined_registers) {
cfg_next_caller_reg = caller_cfg.get_registers_size();
}
auto temp_reg = *cfg_next_caller_reg;
if (temp_reg >= caller_cfg.get_registers_size()) {
caller_cfg.set_registers_size(temp_reg + 1);
}
// Copying to avoid cfg limitation
auto* callsite_copy = new IRInstruction(*callsite_it->insn);
auto* const_insn = (new IRInstruction(OPCODE_CONST))
->set_dest(temp_reg)
->set_literal(0);
auto* throw_insn =
(new IRInstruction(OPCODE_THROW))->set_src(0, temp_reg);
caller_cfg.replace_insns(callsite_it,
{callsite_copy, const_insn, throw_insn});
auto p = caller_cfg.remove_unreachable_blocks();
auto unreachable_insn_count = p.first;
auto registers_size_possibly_reduced = p.second;
if (registers_size_possibly_reduced &&
m_config.unique_inlined_registers) {
caller_cfg.recompute_registers_size();
cfg_next_caller_reg = caller_cfg.get_registers_size();
}
if (unreachable_insn_count) {
unreachable_insns += unreachable_insn_count;
recompute_remaining_callsites();
}
estimated_caller_size = caller_cfg.sum_opcode_sizes();
no_returns++;
}
continue;
}
if (for_speed()) {
// This is expensive, but with shrinking/non-cfg inlining prep there's no
// better way. Needs an explicit check to see whether the instruction has
// already been shrunk away.
auto callsite_it = caller->cfg().find_insn(callsite_insn);
if (!callsite_it.is_end()) {
auto* block = callsite_it.block();
if (!m_inline_for_speed->should_inline_callsite(caller_method,
callee_method, block)) {
calls_not_inlinable++;
continue;
}
}
}
bool caller_too_large_;
auto not_inlinable = !is_inlinable(caller_method, callee_method,
callsite_insn, estimated_caller_size,
inlinable.insn_size, &caller_too_large_);
if (not_inlinable && caller_too_large_ &&
inlined_callees.size() > last_intermediate_inlined_callees) {
intermediate_remove_unreachable_blocks++;
last_intermediate_inlined_callees = inlined_callees.size();
auto p = caller->cfg().remove_unreachable_blocks();
unreachable_insns += p.first;
auto registers_size_possibly_reduced = p.second;
if (registers_size_possibly_reduced &&
m_config.unique_inlined_registers) {
caller->cfg().recompute_registers_size();
cfg_next_caller_reg = caller->cfg().get_registers_size();
}
estimated_caller_size = caller->cfg().sum_opcode_sizes();
recompute_remaining_callsites();
if (!remaining_callsites->count(callsite_insn)) {
calls_not_inlined++;
continue;
}
not_inlinable = !is_inlinable(caller_method, callee_method, callsite_insn,
estimated_caller_size, inlinable.insn_size,
&caller_too_large_);
if (!not_inlinable && m_config.intermediate_shrinking &&
m_shrinker.enabled()) {
intermediate_shrinkings++;
m_shrinker.shrink_method(caller_method);
cfg_next_caller_reg = caller->cfg().get_registers_size();
estimated_caller_size = caller->cfg().sum_opcode_sizes();
recompute_remaining_callsites();
if (!remaining_callsites->count(callsite_insn)) {
calls_not_inlined++;
continue;
}
not_inlinable = !is_inlinable(caller_method, callee_method,
callsite_insn, estimated_caller_size,
inlinable.insn_size, &caller_too_large_);
}
}
if (not_inlinable) {
if (caller_too_large_) {
caller_too_large++;
} else {
calls_not_inlinable++;
}
continue;
}
TRACE(MMINL, 4, "%s",
create_inlining_trace_msg(caller_method, callee_method, callsite_insn)
.c_str());
if (for_speed()) {
std::lock_guard<std::mutex> lock(ab_exp_mutex);
m_ab_experiment_context->try_register_method(caller_method);
}
if (m_config.unique_inlined_registers) {
cfg_next_caller_reg = caller->cfg().get_registers_size();
}
auto timer2 = m_inline_with_cfg_timer.scope();
auto it = m_inlined_invokes_need_cast.find(callsite_insn);
auto needs_receiver_cast =
it == m_inlined_invokes_need_cast.end() ? nullptr : it->second;
auto needs_init_class = get_needs_init_class(callee_method);
bool success = inliner::inline_with_cfg(
caller_method, callee_method, callsite_insn, needs_receiver_cast,
needs_init_class, *cfg_next_caller_reg, inlinable.reduced_cfg);
if (!success) {
calls_not_inlined++;
continue;
}
TRACE(INL, 2, "caller: %s\tcallee: %s",
caller->cfg_built() ? SHOW(caller->cfg()) : SHOW(caller),
SHOW(callee));
estimated_caller_size += inlinable.insn_size;
if (inlinable.reduced_cfg) {
visibility_changes.insert(get_visibility_changes(
*inlinable.reduced_cfg, caller_method->get_class(), callee_method));
} else {
visibility_changes_for.insert(callee_method);
}
if (is_static(callee_method)) {
init_classes++;
}
inlined_callees.push_back(callee_method);
if (type::is_kotlin_lambda(type_class(callee_method->get_class()))) {
info.kotlin_lambda_inlined++;
}
}
if (!inlined_callees.empty()) {
for (auto callee_method : visibility_changes_for) {
visibility_changes.insert(
get_visibility_changes(callee_method, caller_method->get_class()));
}
if (!visibility_changes.empty()) {
std::lock_guard<std::mutex> guard(m_visibility_changes_mutex);
if (m_delayed_visibility_changes) {
m_delayed_visibility_changes->insert(visibility_changes);
} else {
visibility_changes_apply_and_record_make_static(visibility_changes);
}
}
m_inlined.insert(inlined_callees.begin(), inlined_callees.end());
}
for (IRCode* code : need_deconstruct) {
code->clear_cfg(nullptr, deleted_insns);
}
info.calls_inlined += inlined_callees.size();
if (calls_not_inlinable) {
info.calls_not_inlinable += calls_not_inlinable;
}
if (calls_not_inlined) {
info.calls_not_inlined += calls_not_inlined;
}
if (no_returns) {
info.no_returns += no_returns;
}
if (unreachable_insns) {
info.unreachable_insns += unreachable_insns;
}
if (intermediate_shrinkings) {
info.intermediate_shrinkings += intermediate_shrinkings;
}
if (intermediate_remove_unreachable_blocks) {
info.intermediate_remove_unreachable_blocks +=
intermediate_remove_unreachable_blocks;
}
if (caller_too_large) {
info.caller_too_large += caller_too_large;
}
if (init_classes) {
info.init_classes += init_classes;
}
return inlined_callees.size();
}
void MultiMethodInliner::postprocess_method(DexMethod* method) {
TraceContext context(method);
if (m_shrinker.enabled() && !method->rstate.no_optimizations()) {
m_shrinker.shrink_method(method);
}
bool is_callee = !!callee_caller.count(method);
if (!is_callee) {
// This method isn't the callee of another caller, so we can stop here.
return;
}
compute_callee_costs(method);
}
void MultiMethodInliner::compute_callee_costs(DexMethod* method) {
auto fully_inlined_cost = get_fully_inlined_cost(method);
always_assert(fully_inlined_cost);
auto callee_call_site_invokes =
m_call_site_summarizer
? m_call_site_summarizer->get_callee_call_site_invokes(method)
: nullptr;
if (callee_call_site_invokes != nullptr) {
std::unordered_map<const CallSiteSummary*,
std::vector<const IRInstruction*>>
invokes;
for (auto invoke_insn : *callee_call_site_invokes) {
const auto* call_site_summary =
m_call_site_summarizer->get_instruction_call_site_summary(
invoke_insn);
always_assert(call_site_summary != nullptr);
invokes[call_site_summary].push_back(invoke_insn);
}
for (auto& p : invokes) {
m_scheduler.augment(method, [this, call_site_summary = p.first,
insns = p.second, method]() {
TraceContext context(method);
// Populate caches
auto timer = m_call_site_inlined_cost_timer.scope();
bool keep_reduced_cfg = false;
for (auto insn : insns) {
if (should_inline_at_call_site(nullptr, insn, method)) {
keep_reduced_cfg = true;
}
}
if (!keep_reduced_cfg) {
CalleeCallSiteSummary key{method, call_site_summary};
auto inlined_cost = m_call_site_inlined_costs.get(key, nullptr);
if (inlined_cost) {
inlined_cost->reduced_cfg.reset();
}
}
});
}
}
m_scheduler.augment(method, [this, method]() {
// Populate caches
get_callee_insn_size(method);
get_callee_type_refs(method);
if (m_ref_checkers) {
get_callee_code_refs(method);
}
});
// The should_inline_always caching depends on all other caches, so we augment
// it as a "continuation".
m_scheduler.augment(
method,
[this, method] {
TraceContext context(method);
// Populate cache
should_inline_always(method);
},
/* continuation */ true);
}
/**
* Defines the set of rules that determine whether a function is inlinable.
*/
bool MultiMethodInliner::is_inlinable(const DexMethod* caller,
const DexMethod* callee,
const IRInstruction* insn,
uint64_t estimated_caller_size,
uint64_t estimated_callee_size,
bool* caller_too_large_) {
TraceContext context{caller};
if (caller_too_large_) {
*caller_too_large_ = false;
}
// don't inline cross store references
if (cross_store_reference(caller, callee)) {
if (insn) {
log_nopt(INL_CROSS_STORE_REFS, caller, insn);
}
return false;
}
if (is_blocklisted(callee)) {
if (insn) {
log_nopt(INL_BLOCK_LISTED_CALLEE, callee);
}
return false;
}
if (caller_is_blocklisted(caller)) {
if (insn) {
log_nopt(INL_BLOCK_LISTED_CALLER, caller);
}
return false;
}
if (has_external_catch(callee)) {
if (insn) {
log_nopt(INL_EXTERN_CATCH, callee);
}
return false;
}
if (cannot_inline_opcodes(caller, callee, insn)) {
return false;
}
if (!callee->rstate.force_inline()) {
// Don't inline code into a method that doesn't have the same (or higher)
// required API. We don't want to bring API specific code into a class
// where it's not supported.
int32_t callee_api = api::LevelChecker::get_method_level(callee);
if (callee_api != api::LevelChecker::get_min_level() &&
callee_api > api::LevelChecker::get_method_level(caller)) {
// check callee_api against the minimum and short-circuit because most
// methods don't have a required api and we want that to be fast.
if (insn) {
log_nopt(INL_REQUIRES_API, caller, insn);
}
TRACE(MMINL, 4,
"Refusing to inline %s"
" into %s\n because of API boundaries.",
show_deobfuscated(callee).c_str(),
show_deobfuscated(caller).c_str());
info.api_level_mismatch++;
return false;
}
if (callee->rstate.dont_inline()) {
if (insn) {
log_nopt(INL_DO_NOT_INLINE, caller, insn);
}
return false;
}
if (caller_too_large(caller->get_class(), estimated_caller_size,
estimated_callee_size)) {
if (insn) {
log_nopt(INL_TOO_BIG, caller, insn);
}
if (caller_too_large_) {
*caller_too_large_ = true;
}
return false;
}
if (caller->get_class() != callee->get_class() && m_ref_checkers &&
problematic_refs(caller, callee)) {
return false;
}
}
return true;
}
/**
* Return whether the method or any of its ancestors are in the blocklist.
* Typically used to prevent inlining / deletion of methods that are called
* via reflection.
*/
bool MultiMethodInliner::is_blocklisted(const DexMethod* callee) {
auto cls = type_class(callee->get_class());
// Enums' kept methods are all excluded.
if (is_enum(cls) && root(callee)) {
return true;
}
while (cls != nullptr) {
if (m_config.get_blocklist().count(cls->get_type())) {
info.blocklisted++;
return true;
}
cls = type_class(cls->get_super_class());
}
return false;
}
bool MultiMethodInliner::is_estimate_over_max(uint64_t estimated_caller_size,
uint64_t estimated_callee_size,
uint64_t max) {
// INSTRUCTION_BUFFER is added because the final method size is often larger
// than our estimate -- during the sync phase, we may have to pick larger
// branch opcodes to encode large jumps.
if (estimated_caller_size + estimated_callee_size >
max - std::min(m_config.instruction_size_buffer, max)) {
return true;
}
return false;
}
bool MultiMethodInliner::caller_too_large(DexType* caller_type,
uint64_t estimated_caller_size,
uint64_t estimated_callee_size) {
if (is_estimate_over_max(estimated_caller_size, estimated_callee_size,
HARD_MAX_INSTRUCTION_SIZE)) {
return true;
}
if (!m_config.enforce_method_size_limit) {
return false;
}
if (m_config.allowlist_no_method_limit.count(caller_type)) {
return false;
}
if (is_estimate_over_max(estimated_caller_size, estimated_callee_size,
m_config.soft_max_instruction_size)) {
return true;
}
return false;
}
bool MultiMethodInliner::should_inline_fast(const DexMethod* callee) {
if (for_speed()) {
// inline_for_speed::should_inline was used earlire to prune the static
// call-graph
return true;
}
if (callee->rstate.force_inline()) {
return true;
}
// non-root methods that are only ever called once should always be inlined,
// as the method can be removed afterwards
const auto& callers = callee_caller.at(callee);
if (callers.size() == 1 && callers.begin()->second == 1 && !root(callee) &&
!m_recursive_callees.count(callee) && !m_x_dex_callees.count(callee) &&
!m_true_virtual_callees_with_other_call_sites.count(callee)) {
return true;
}
return false;
}
bool MultiMethodInliner::should_inline_always(const DexMethod* callee) {
if (should_inline_fast(callee)) {
return true;
}
auto res = m_should_inline.get(callee, boost::none);
if (res) {
return *res;
}
always_assert(!for_speed());
always_assert(!callee->rstate.force_inline());
if (too_many_callers(callee)) {
log_nopt(INL_TOO_MANY_CALLERS, callee);
res = false;
} else {
res = true;
}
m_should_inline.emplace(callee, res);
return *res;
}
size_t MultiMethodInliner::get_callee_insn_size(const DexMethod* callee) {
if (m_callee_insn_sizes) {
const auto absent = std::numeric_limits<size_t>::max();
auto size = m_callee_insn_sizes->get(callee, absent);
if (size != absent) {
return size;
}
}
const IRCode* code = callee->get_code();
auto size = code->editable_cfg_built() ? code->cfg().sum_opcode_sizes()
: code->sum_opcode_sizes();
if (m_callee_insn_sizes) {
m_callee_insn_sizes->emplace(callee, size);
}
return size;
}
/*
* Estimate additional costs if an instruction takes many source registers.
*/
static size_t get_inlined_regs_cost(size_t regs) {
size_t cost{0};
if (regs > 3) {
if (regs > 5) {
// invoke with many args will likely need extra moves
cost += regs;
} else {
cost += regs / 2;
}
}
return cost;
}
static float get_invoke_cost(const DexMethod* callee, float result_used) {
float invoke_cost = COST_INVOKE + result_used * COST_MOVE_RESULT;
invoke_cost += get_inlined_regs_cost(callee->get_proto()->get_args()->size());
return invoke_cost;
}
/*
* Try to estimate number of code units (2 bytes each) of an instruction.
* - Ignore internal opcodes because they do not take up any space in the
* final dex file.
* - Ignore move opcodes with the hope that RegAlloc will eliminate most of
* them.
* - Remove return opcodes, as they will disappear when gluing things
* together.
*/
static size_t get_inlined_cost(IRInstruction* insn) {
auto op = insn->opcode();
size_t cost{0};
if (opcode::is_an_internal(op) || opcode::is_a_move(op) ||
opcode::is_a_return(op)) {
if (op == IOPCODE_INIT_CLASS) {
cost += 2;
}
} else {
cost++;
auto regs = insn->srcs_size() +
((insn->has_dest() || insn->has_move_result_pseudo()) ? 1 : 0);
cost += get_inlined_regs_cost(regs);
if (op == OPCODE_MOVE_EXCEPTION) {
cost += 8; // accounting for book-keeping overhead of throw-blocks
} else if (insn->has_method() || insn->has_field() || insn->has_type() ||
insn->has_string()) {
cost++;
} else if (insn->has_data()) {
cost += 4 + insn->get_data()->size();
} else if (insn->has_literal()) {
auto lit = insn->get_literal();
if (lit < -2147483648 || lit > 2147483647) {
cost += 4;
} else if (lit < -32768 || lit > 32767) {
cost += 2;
} else if (opcode::is_a_const(op) && (lit < -8 || lit > 7)) {
cost++;
} else if (!opcode::is_a_const(op) && (lit < -128 || lit > 127)) {
cost++;
}
}
}
TRACE(INLINE, 5, " %zu: %s", cost, SHOW(insn));
return cost;
}
/*
* Try to estimate number of code units (2 bytes each) overhead (instructions,
* metadata) that exists for this block; this doesn't include the cost of
* the instructions in the block, which are accounted for elsewhere.
*/
static size_t get_inlined_cost(const std::vector<cfg::Block*>& blocks,
size_t index,
const std::vector<cfg::Edge*>& succs) {
auto block = blocks.at(index);
switch (block->branchingness()) {
case opcode::Branchingness::BRANCH_GOTO:
case opcode::Branchingness::BRANCH_IF:
case opcode::Branchingness::BRANCH_SWITCH: {
if (succs.empty()) {
return 0;
}
if (succs.size() > 2) {
// a switch
return 4 + 3 * succs.size();
}
// a (possibly conditional) branch; each feasible non-fallthrough edge has a
// cost
size_t cost{0};
auto next_block =
index == blocks.size() - 1 ? nullptr : blocks.at(index + 1);
for (auto succ : succs) {
always_assert(succ->target() != nullptr);
if (next_block != succ->target()) {
// we have a non-fallthrough edge
cost++;
}
}
return cost;
}
default:
return 0;
}
}
std::shared_ptr<cfg::ControlFlowGraph>
MultiMethodInliner::apply_call_site_summary(
bool is_static,
DexType* declaring_type,
DexProto* proto,
const cfg::ControlFlowGraph& original_cfg,
const CallSiteSummary* call_site_summary) {
if (!call_site_summary) {
return nullptr;
}
if (call_site_summary->arguments.is_top()) {
if (proto->is_void() || call_site_summary->result_used) {
return nullptr;
}
}
// Clone original cfg
IRCode cloned_code(std::make_unique<cfg::ControlFlowGraph>());
original_cfg.deep_copy(&cloned_code.cfg());
// If result is not used, change all return-* instructions to return-void (and
// let local-dce remove the code that leads to it).
if (!proto->is_void() && !call_site_summary->result_used) {
proto = DexProto::make_proto(type::_void(), proto->get_args());
for (auto& mie : InstructionIterable(cloned_code.cfg())) {
if (opcode::is_a_return(mie.insn->opcode())) {
mie.insn->set_opcode(OPCODE_RETURN_VOID);
mie.insn->set_srcs_size(0);
}
}
}
// Run constant-propagation with call-site specific arguments, and then run
// local-dce
ConstantEnvironment initial_env =
constant_propagation::interprocedural::env_with_params(
is_static, &cloned_code, call_site_summary->arguments);
constant_propagation::Transform::Config config;
// No need to add extra instructions to load constant params, we'll pass those
// in anyway
config.add_param_const = false;
m_shrinker.constant_propagation(is_static, declaring_type, proto,
&cloned_code, initial_env, config);
m_shrinker.local_dce(&cloned_code, /* normalize_new_instances */ false,
declaring_type);
// Re-build cfg once more to get linearized representation, good for
// predicting fallthrough branches
cloned_code.build_cfg(/* editable */ true);
// And a final clone to move the cfg into a long-lived shared-ptr.
auto res = std::make_shared<cfg::ControlFlowGraph>();
cloned_code.cfg().deep_copy(res.get());
res->set_insn_ownership(true);
cloned_code.cfg().set_insn_ownership(true);
// Note that we are not clearing the cloned_code.cfg(). It will get destroyed
// now, and delete all instruction copies that it now owns.
return res;
}
InlinedCost MultiMethodInliner::get_inlined_cost(
bool is_static,
DexType* declaring_type,
DexProto* proto,
const IRCode* code,
const CallSiteSummary* call_site_summary) {
size_t cost{0};
std::shared_ptr<cfg::ControlFlowGraph> reduced_cfg;
size_t returns{0};
std::unordered_set<DexMethodRef*> method_refs_set;
std::unordered_set<const void*> other_refs_set;
auto analyze_refs = [&](IRInstruction* insn) {
if (insn->has_method()) {
auto cls = type_class(insn->get_method()->get_class());
if (cls && !cls->is_external()) {
method_refs_set.insert(insn->get_method());
}
}
if (insn->has_field()) {
auto cls = type_class(insn->get_field()->get_class());
if (cls && !cls->is_external()) {
other_refs_set.insert(insn->get_field());
}
}
if (insn->has_type()) {
auto type = type::get_element_type_if_array(insn->get_type());
auto cls = type_class(type);
if (cls && !cls->is_external()) {
other_refs_set.insert(type);
}
}
};
size_t insn_size;
if (code->editable_cfg_built()) {
reduced_cfg = apply_call_site_summary(is_static, declaring_type, proto,
code->cfg(), call_site_summary);
auto cfg = reduced_cfg ? reduced_cfg.get() : &code->cfg();
auto blocks = cfg->blocks();
for (size_t i = 0; i < blocks.size(); ++i) {
auto block = blocks.at(i);
for (auto& mie : InstructionIterable(block)) {
auto insn = mie.insn;
cost += ::get_inlined_cost(insn);
analyze_refs(insn);
}
cost += ::get_inlined_cost(blocks, i, block->succs());
if (block->branchingness() == opcode::Branchingness::BRANCH_RETURN) {
returns++;
}
}
insn_size = cfg->sum_opcode_sizes();
} else {
editable_cfg_adapter::iterate(code, [&](const MethodItemEntry& mie) {
auto insn = mie.insn;
cost += ::get_inlined_cost(insn);
if (opcode::is_a_return(insn->opcode())) {
returns++;
}
analyze_refs(insn);
return editable_cfg_adapter::LOOP_CONTINUE;
});
insn_size = code->sum_opcode_sizes();
}
if (returns > 1) {
// if there's more than one return, gotos will get introduced to merge
// control flow
cost += returns - 1;
}
auto result_used =
call_site_summary ? call_site_summary->result_used : !proto->is_void();
return (InlinedCost){cost,
(float)cost,
(float)method_refs_set.size(),
(float)other_refs_set.size(),
!returns,
(float)result_used,
std::move(reduced_cfg),
insn_size};
}
const InlinedCost* MultiMethodInliner::get_fully_inlined_cost(
const DexMethod* callee) {
auto inlined_cost = m_fully_inlined_costs.get(callee, nullptr);
if (inlined_cost) {
return inlined_cost.get();
}
inlined_cost = std::make_shared<InlinedCost>(
get_inlined_cost(is_static(callee), callee->get_class(),
callee->get_proto(), callee->get_code()));
TRACE(INLINE, 4, "get_fully_inlined_cost(%s) = {%zu,%f,%f,%f,%s,%f,%d,%zu}",
SHOW(callee), inlined_cost->full_code, inlined_cost->code,
inlined_cost->method_refs, inlined_cost->other_refs,
inlined_cost->no_return ? "no_return" : "return",
inlined_cost->result_used, !!inlined_cost->reduced_cfg,
inlined_cost->insn_size);
m_fully_inlined_costs.update(
callee,
[&](const DexMethod*, std::shared_ptr<InlinedCost>& value, bool exists) {
if (exists) {
// We wasted some work, and some other thread beat us. Oh well...
always_assert(*value == *inlined_cost);
inlined_cost = value;
return;
}
value = inlined_cost;
});
return inlined_cost.get();
}
const InlinedCost* MultiMethodInliner::get_call_site_inlined_cost(
const IRInstruction* invoke_insn, const DexMethod* callee) {
auto res = m_invoke_call_site_inlined_costs.get(invoke_insn, boost::none);
if (res) {
return *res;
}
auto call_site_summary =
m_call_site_summarizer
? m_call_site_summarizer->get_instruction_call_site_summary(
invoke_insn)
: nullptr;
if (call_site_summary == nullptr) {
res = nullptr;
} else {
res = get_call_site_inlined_cost(call_site_summary, callee);
}
always_assert(res);
m_invoke_call_site_inlined_costs.emplace(invoke_insn, res);
return *res;
}
const InlinedCost* MultiMethodInliner::get_call_site_inlined_cost(
const CallSiteSummary* call_site_summary, const DexMethod* callee) {
auto fully_inlined_cost = get_fully_inlined_cost(callee);
always_assert(fully_inlined_cost);
if (fully_inlined_cost->full_code > MAX_COST_FOR_CONSTANT_PROPAGATION) {
return nullptr;
}
CalleeCallSiteSummary key{callee, call_site_summary};
auto inlined_cost = m_call_site_inlined_costs.get(key, nullptr);
if (inlined_cost) {
return inlined_cost.get();
}
inlined_cost = std::make_shared<InlinedCost>(get_inlined_cost(
is_static(callee), callee->get_class(), callee->get_proto(),
callee->get_code(), call_site_summary));
TRACE(INLINE, 4,
"get_call_site_inlined_cost(%s) = {%zu,%f,%f,%f,%s,%f,%d,%zu}",
call_site_summary->get_key().c_str(), inlined_cost->full_code,
inlined_cost->code, inlined_cost->method_refs, inlined_cost->other_refs,
inlined_cost->no_return ? "no_return" : "return",
inlined_cost->result_used, !!inlined_cost->reduced_cfg,
inlined_cost->insn_size);
if (inlined_cost->insn_size >= fully_inlined_cost->insn_size) {
inlined_cost->reduced_cfg.reset();
}
m_call_site_inlined_costs.update(key,
[&](const CalleeCallSiteSummary&,
std::shared_ptr<InlinedCost>& value,
bool exists) {
if (exists) {
// We wasted some work, and some other
// thread beat us. Oh well...
always_assert(*value == *inlined_cost);
inlined_cost = value;
return;
}
value = inlined_cost;
});
return inlined_cost.get();
}
const InlinedCost* MultiMethodInliner::get_average_inlined_cost(
const DexMethod* callee) {
auto inlined_cost = m_average_inlined_costs.get(callee, nullptr);
if (inlined_cost) {
return inlined_cost.get();
}
auto fully_inlined_cost = get_fully_inlined_cost(callee);
always_assert(fully_inlined_cost);
size_t callees_analyzed{0};
size_t callees_unused_results{0};
size_t callees_no_return{0};
const std::vector<CallSiteSummaryOccurrences>*
callee_call_site_summary_occurrences;
if (fully_inlined_cost->full_code > MAX_COST_FOR_CONSTANT_PROPAGATION ||
!(callee_call_site_summary_occurrences =
m_call_site_summarizer
? m_call_site_summarizer
->get_callee_call_site_summary_occurrences(callee)
: nullptr)) {
inlined_cost = std::make_shared<InlinedCost>(*fully_inlined_cost);
} else {
inlined_cost = std::make_shared<InlinedCost>((InlinedCost){
fully_inlined_cost->full_code, 0.0f, 0.0f, 0.0f, true, 0.0f, {}, 0});
bool callee_has_result = !callee->get_proto()->is_void();
for (auto& p : *callee_call_site_summary_occurrences) {
const auto call_site_summary = p.first;
const auto count = p.second;
auto call_site_inlined_cost =
get_call_site_inlined_cost(call_site_summary, callee);
always_assert(call_site_inlined_cost);
if (callee_has_result && !call_site_summary->result_used) {
callees_unused_results += count;
}
inlined_cost->code += call_site_inlined_cost->code * count;
inlined_cost->method_refs += call_site_inlined_cost->method_refs * count;
inlined_cost->other_refs += call_site_inlined_cost->other_refs * count;
inlined_cost->result_used += call_site_inlined_cost->result_used * count;
if (call_site_inlined_cost->no_return) {
callees_no_return++;
} else {
inlined_cost->no_return = false;
}
if (call_site_inlined_cost->insn_size > inlined_cost->insn_size) {
inlined_cost->insn_size = call_site_inlined_cost->insn_size;
}
callees_analyzed += count;
};
always_assert(callees_analyzed > 0);
// compute average costs
inlined_cost->code /= callees_analyzed;
inlined_cost->method_refs /= callees_analyzed;
inlined_cost->other_refs /= callees_analyzed;
inlined_cost->result_used /= callees_analyzed;
}
TRACE(INLINE, 4, "get_average_inlined_cost(%s) = {%zu,%f,%f,%f,%s,%f,%zu}",
SHOW(callee), inlined_cost->full_code, inlined_cost->code,
inlined_cost->method_refs, inlined_cost->other_refs,
inlined_cost->no_return ? "no_return" : "return",
inlined_cost->result_used, inlined_cost->insn_size);
m_average_inlined_costs.update(
callee,
[&](const DexMethod*, std::shared_ptr<InlinedCost>& value, bool exists) {
if (exists) {
// We wasted some work, and some other thread beat us. Oh well...
always_assert(*value == *inlined_cost);
inlined_cost = value;
return;
}
value = inlined_cost;
if (callees_analyzed == 0) {
return;
}
info.constant_invoke_callees_analyzed += callees_analyzed;
info.constant_invoke_callees_unused_results += callees_unused_results;
info.constant_invoke_callees_no_return += callees_no_return;
});
return inlined_cost.get();
}
bool MultiMethodInliner::can_inline_init(const DexMethod* init_method) {
auto opt_can_inline_init = m_can_inline_init.get(init_method, boost::none);
if (opt_can_inline_init) {
return *opt_can_inline_init;
}
const auto* finalizable_fields = m_shrinker.get_finalizable_fields();
bool res =
constructor_analysis::can_inline_init(init_method, finalizable_fields);
m_can_inline_init.update(
init_method,
[&](const DexMethod*, boost::optional<bool>& value, bool exists) {
if (exists) {
// We wasted some work, and some other thread beat us. Oh well...
always_assert(*value == res);
return;
}
value = res;
});
return res;
}
bool MultiMethodInliner::too_many_callers(const DexMethod* callee) {
bool can_delete_callee = true;
if (root(callee) || m_recursive_callees.count(callee) ||
m_x_dex_callees.count(callee) ||
m_true_virtual_callees_with_other_call_sites.count(callee) ||
!m_config.multiple_callers) {
if (m_config.use_call_site_summaries) {
return true;
}
can_delete_callee = false;
}
const auto& callers = callee_caller.at(callee);
// Can we inline the init-callee into all callers?
// If not, then we can give up, as there's no point in making the case that
// we can eliminate the callee method based on pervasive inlining.
if (m_analyze_and_prune_inits && method::is_init(callee)) {
if (!callee->get_code()->editable_cfg_built()) {
return true;
}
if (!can_inline_init(callee)) {
for (auto& p : callers) {
auto caller = p.first;
if (!method::is_init(caller) ||
caller->get_class() != callee->get_class() ||
!caller->get_code()->editable_cfg_built() ||
!constructor_analysis::can_inline_inits_in_same_class(
caller, callee,
/* callsite_insn */ nullptr)) {
return true;
}
}
}
}
// 1. Determine costs of inlining
auto inlined_cost = get_average_inlined_cost(callee);
always_assert(inlined_cost);
boost::optional<CalleeCallerRefs> callee_caller_refs;
float cross_dex_penalty{0};
if (m_cross_dex_penalty && !is_private(callee)) {
callee_caller_refs = get_callee_caller_refs(callee);
if (callee_caller_refs->same_class) {
callee_caller_refs = boost::none;
} else {
// Inlining methods into different classes might lead to worse
// cross-dex-ref minimization results.
cross_dex_penalty = inlined_cost->method_refs;
if (callee_caller_refs->classes > 1 &&
(inlined_cost->method_refs + inlined_cost->other_refs) > 0) {
cross_dex_penalty++;
}
}
}
// 2. Determine costs of keeping the invoke instruction
size_t caller_count{0};
for (auto& p : callers) {
caller_count += p.second;
}
float invoke_cost = get_invoke_cost(callee, inlined_cost->result_used);
TRACE(INLINE, 3,
"[too_many_callers] %zu calls to %s; cost: inlined %f + %f, invoke %f",
caller_count, SHOW(callee), inlined_cost->code, cross_dex_penalty,
invoke_cost);
size_t classes = callee_caller_refs ? callee_caller_refs->classes : 0;
size_t method_cost = 0;
if (can_delete_callee) {
// The cost of keeping a method amounts of somewhat fixed metadata overhead,
// plus the method body, which we approximate with the inlined cost.
method_cost = COST_METHOD + inlined_cost->full_code;
}
// If we inline invocations to this method everywhere, we could delete the
// method. Is this worth it, given the number of callsites and costs
// involved?
if (inlined_cost->code * caller_count + classes * cross_dex_penalty >
invoke_cost * caller_count + method_cost) {
return true;
}
if (can_delete_callee) {
// We are going to call is_inlinable for all callers, and we are going to
// short circuit as those calls are potentially expensive. However,
// is_inlinable is recording some metrics around how often certain things
// occur, so we are creating an ordered list of callers here to make sure we
// always call is_inlinable in the same way.
std::vector<DexMethod*> ordered_callers;
ordered_callers.reserve(callers.size());
for (auto& p : callers) {
ordered_callers.push_back(p.first);
}
std::sort(ordered_callers.begin(), ordered_callers.end(),
compare_dexmethods);
// We can't eliminate the method entirely if it's not inlinable
for (auto caller : ordered_callers) {
// We can't account here in detail for the caller and callee size. We hope
// for the best, and assume that the caller is empty, and we'll use the
// (maximum) insn_size for all inlined-costs. We'll check later again at
// each individual call-site whether the size limits hold.
if (!is_inlinable(caller, callee, /* insn */ nullptr,
/* estimated_caller_size */ 0,
/* estimated_callee_size */ inlined_cost->insn_size)) {
return true;
}
}
}
return false;
}
bool MultiMethodInliner::should_inline_at_call_site(
DexMethod* caller,
const IRInstruction* invoke_insn,
DexMethod* callee,
bool* no_return,
std::shared_ptr<cfg::ControlFlowGraph>* reduced_cfg,
size_t* insn_size) {
auto inlined_cost = get_call_site_inlined_cost(invoke_insn, callee);
if (!inlined_cost) {
return false;
}
float cross_dex_penalty{0};
if (m_cross_dex_penalty && !is_private(callee) &&
(caller == nullptr || caller->get_class() != callee->get_class())) {
// Inlining methods into different classes might lead to worse
// cross-dex-ref minimization results.
cross_dex_penalty = inlined_cost->method_refs;
if (inlined_cost->method_refs + inlined_cost->other_refs > 0) {
cross_dex_penalty++;
}
}
float invoke_cost = get_invoke_cost(callee, inlined_cost->result_used);
if (inlined_cost->code + cross_dex_penalty > invoke_cost) {
if (no_return) {
*no_return = inlined_cost->no_return;
}
return false;
}
if (reduced_cfg) {
*reduced_cfg = inlined_cost->reduced_cfg;
}
if (insn_size) {
*insn_size = inlined_cost->insn_size;
}
return true;
}
bool MultiMethodInliner::caller_is_blocklisted(const DexMethod* caller) {
auto cls = caller->get_class();
if (m_config.get_caller_blocklist().count(cls)) {
info.blocklisted++;
return true;
}
return false;
}
/**
* Returns true if the callee has catch type which is external and not public,
* in which case we cannot inline.
*/
bool MultiMethodInliner::has_external_catch(const DexMethod* callee) {
std::vector<DexType*> types;
callee->get_code()->gather_catch_types(types);
for (auto type : types) {
auto cls = type_class(type);
if (cls != nullptr && cls->is_external() && !is_public(cls)) {
return true;
}
}
return false;
}
/**
* Analyze opcodes in the callee to see if they are problematic for inlining.
*/
bool MultiMethodInliner::cannot_inline_opcodes(const DexMethod* caller,
const DexMethod* callee,
const IRInstruction* invk_insn) {
bool can_inline = true;
editable_cfg_adapter::iterate(
callee->get_code(), [&](const MethodItemEntry& mie) {
auto insn = mie.insn;
if (create_vmethod(insn, callee, caller)) {
if (invk_insn) {
log_nopt(INL_CREATE_VMETH, caller, invk_insn);
}
can_inline = false;
return editable_cfg_adapter::LOOP_BREAK;
}
if (outlined_invoke_outlined(insn, caller)) {
can_inline = false;
return editable_cfg_adapter::LOOP_BREAK;
}
// if the caller and callee are in the same class, we don't have to
// worry about invoke supers, or unknown virtuals -- private /
// protected methods will remain accessible
if (caller->get_class() != callee->get_class()) {
if (nonrelocatable_invoke_super(insn)) {
if (invk_insn) {
log_nopt(INL_HAS_INVOKE_SUPER, caller, invk_insn);
}
can_inline = false;
return editable_cfg_adapter::LOOP_BREAK;
}
if (unknown_virtual(insn)) {
if (invk_insn) {
log_nopt(INL_UNKNOWN_VIRTUAL, caller, invk_insn);
}
can_inline = false;
return editable_cfg_adapter::LOOP_BREAK;
}
if (unknown_field(insn)) {
if (invk_insn) {
log_nopt(INL_UNKNOWN_FIELD, caller, invk_insn);
}
can_inline = false;
return editable_cfg_adapter::LOOP_BREAK;
}
if (check_android_os_version(insn)) {
can_inline = false;
return editable_cfg_adapter::LOOP_BREAK;
}
}
if (!m_config.throws_inline && insn->opcode() == OPCODE_THROW) {
info.throws++;
can_inline = false;
return editable_cfg_adapter::LOOP_BREAK;
}
return editable_cfg_adapter::LOOP_CONTINUE;
});
if (!can_inline) {
return true;
}
if (m_config.respect_sketchy_methods) {
auto timer = m_cannot_inline_sketchy_code_timer.scope();
if (monitor_count::cannot_inline_sketchy_code(
*caller->get_code(), *callee->get_code(), invk_insn)) {
return true;
}
}
return false;
}
/**
* Check if a visibility/accessibility change would turn a method referenced
* in a callee to virtual methods as they are inlined into the caller.
* That is, once a callee is inlined we need to ensure that everything that
* was referenced by a callee is visible and accessible in the caller context.
* This step would not be needed if we changed all private instance to static.
*/
bool MultiMethodInliner::create_vmethod(IRInstruction* insn,
const DexMethod* callee,
const DexMethod* caller) {
auto opcode = insn->opcode();
if (opcode == OPCODE_INVOKE_DIRECT) {
auto method =
m_concurrent_resolver(insn->get_method(), MethodSearch::Direct);
if (method == nullptr) {
info.need_vmethod++;
return true;
}
always_assert(method->is_def());
if (caller->get_class() == callee->get_class()) {
// No need to give up here, or make it static. Visibility is just fine.
return false;
}
if (method::is_init(method)) {
if (!method->is_concrete() && !is_public(method)) {
info.non_pub_ctor++;
return true;
}
// concrete ctors we can handle because they stay invoke_direct
return false;
}
if (!can_rename(method)) {
info.need_vmethod++;
return true;
}
}
return false;
}
bool MultiMethodInliner::outlined_invoke_outlined(IRInstruction* insn,
const DexMethod* caller) {
if (!PositionPatternSwitchManager::
CAN_OUTLINED_METHOD_INVOKE_OUTLINED_METHOD &&
insn->opcode() == OPCODE_INVOKE_STATIC && is_outlined_method(caller) &&
is_outlined_method(insn->get_method())) {
// TODO: Remove this limitation imposed by symbolication infrastructure.
return true;
}
return false;
}
/**
* Return true if a callee contains an invoke super to a different method
* in the hierarchy.
* Inlining an invoke_super off its class hierarchy would break the verifier.
*/
bool MultiMethodInliner::nonrelocatable_invoke_super(IRInstruction* insn) {
if (insn->opcode() == OPCODE_INVOKE_SUPER) {
info.invoke_super++;
return true;
}
return false;
}
/**
* The callee contains an invoke to a virtual method we either do not know
* or it's not public. Given the caller may not be in the same
* hierarchy/package we cannot inline it unless we make the method public.
* But we need to make all methods public across the hierarchy and for methods
* we don't know we have no idea whether the method was public or not anyway.
*/
bool MultiMethodInliner::unknown_virtual(IRInstruction* insn) {
if (insn->opcode() == OPCODE_INVOKE_VIRTUAL) {
auto method = insn->get_method();
auto res_method = m_concurrent_resolver(method, MethodSearch::Virtual);
if (res_method == nullptr) {
info.unresolved_methods++;
if (unknown_virtuals::is_method_known_to_be_public(method)) {
info.known_public_methods++;
return false;
}
info.escaped_virtual++;
return true;
}
if (res_method->is_external() && !is_public(res_method)) {
info.non_pub_virtual++;
return true;
}
}
return false;
}
/**
* The callee contains a *get/put instruction to an unknown field.
* Given the caller may not be in the same hierarchy/package we cannot inline
* it unless we make the field public.
* But we need to make all fields public across the hierarchy and for fields
* we don't know we have no idea whether the field was public or not anyway.
*/
bool MultiMethodInliner::unknown_field(IRInstruction* insn) {
if (opcode::is_an_ifield_op(insn->opcode()) ||
opcode::is_an_sfield_op(insn->opcode())) {
auto ref = insn->get_field();
DexField* field = resolve_field(ref, opcode::is_an_sfield_op(insn->opcode())
? FieldSearch::Static
: FieldSearch::Instance);
if (field == nullptr) {
info.escaped_field++;
return true;
}
if (!field->is_concrete() && !is_public(field)) {
info.non_pub_field++;
return true;
}
}
return false;
}
/**
* return true if `insn` is
* sget android.os.Build.VERSION.SDK_INT
*/
bool MultiMethodInliner::check_android_os_version(IRInstruction* insn) {
// Referencing a method or field that doesn't exist on the OS version of the
// current device causes a "soft error" for the entire class that the
// reference resides in. Soft errors aren't worrisome from a correctness
// perspective (though they may cause the class to run slower on some
// devices) but there's a bug in Android 5 that triggers an erroneous "hard
// error" after a "soft error".
//
// The exact conditions that trigger the Android 5 bug aren't currently
// known. As a quick fix, we're refusing to inline methods that check the
// OS's version. This generally works because the reference to the
// non-existent field/method is usually guarded by checking that
// `android.os.build.VERSION.SDK_INT` is larger than the required api level.
auto op = insn->opcode();
if (opcode::is_an_sget(op)) {
auto ref = insn->get_field();
DexField* field = resolve_field(ref, FieldSearch::Static);
if (field != nullptr && field == m_sdk_int_field) {
return true;
}
}
return false;
}
std::shared_ptr<CodeRefs> MultiMethodInliner::get_callee_code_refs(
const DexMethod* callee) {
if (m_callee_code_refs) {
auto cached = m_callee_code_refs->get(callee, nullptr);
if (cached) {
return cached;
}
}
auto code_refs = std::make_shared<CodeRefs>(callee);
if (m_callee_code_refs) {
m_callee_code_refs->emplace(callee, code_refs);
}
return code_refs;
}
std::shared_ptr<std::vector<DexType*>> MultiMethodInliner::get_callee_type_refs(
const DexMethod* callee) {
if (m_callee_type_refs) {
auto cached = m_callee_type_refs->get(callee, nullptr);
if (cached) {
return cached;
}
}
std::unordered_set<DexType*> type_refs_set;
editable_cfg_adapter::iterate(callee->get_code(),
[&](const MethodItemEntry& mie) {
auto insn = mie.insn;
if (insn->has_type()) {
type_refs_set.insert(insn->get_type());
} else if (insn->has_method()) {
auto meth = insn->get_method();
type_refs_set.insert(meth->get_class());
auto proto = meth->get_proto();
type_refs_set.insert(proto->get_rtype());
auto args = proto->get_args();
if (args != nullptr) {
for (const auto& arg : *args) {
type_refs_set.insert(arg);
}
}
} else if (insn->has_field()) {
auto field = insn->get_field();
type_refs_set.insert(field->get_class());
type_refs_set.insert(field->get_type());
}
return editable_cfg_adapter::LOOP_CONTINUE;
});
auto type_refs = std::make_shared<std::vector<DexType*>>();
for (auto type : type_refs_set) {
// filter out what xstores.illegal_ref(...) doesn't care about
if (type_class_internal(type) == nullptr) {
continue;
}
type_refs->push_back(type);
}
if (m_callee_type_refs) {
m_callee_type_refs->emplace(callee, type_refs);
}
return type_refs;
}
CalleeCallerRefs MultiMethodInliner::get_callee_caller_refs(
const DexMethod* callee) {
if (m_callee_caller_refs) {
CalleeCallerRefs absent = {false, std::numeric_limits<size_t>::max()};
auto cached = m_callee_caller_refs->get(callee, absent);
if (cached.classes != absent.classes) {
return cached;
}
}
const auto& callers = callee_caller.at(callee);
std::unordered_set<DexType*> caller_classes;
for (auto& p : callers) {
auto caller = p.first;
caller_classes.insert(caller->get_class());
}
auto callee_class = callee->get_class();
CalleeCallerRefs callee_caller_refs{
/* same_class */ caller_classes.size() == 1 &&
*caller_classes.begin() == callee_class,
caller_classes.size()};
if (m_callee_caller_refs) {
m_callee_caller_refs->emplace(callee, callee_caller_refs);
}
return callee_caller_refs;
}
bool MultiMethodInliner::problematic_refs(const DexMethod* caller,
const DexMethod* callee) {
always_assert(m_ref_checkers);
auto callee_code_refs = get_callee_code_refs(callee);
always_assert(callee_code_refs);
const auto& xstores = m_shrinker.get_xstores();
size_t store_idx = xstores.get_store_idx(caller->get_class());
auto& ref_checker = m_ref_checkers->at(store_idx);
if (!ref_checker->check_code_refs(*callee_code_refs)) {
info.problematic_refs++;
return true;
}
return false;
}
bool MultiMethodInliner::cross_store_reference(const DexMethod* caller,
const DexMethod* callee) {
auto callee_type_refs = get_callee_type_refs(callee);
always_assert(callee_type_refs);
const auto& xstores = m_shrinker.get_xstores();
size_t store_idx = xstores.get_store_idx(caller->get_class());
for (auto type : *callee_type_refs) {
if (xstores.illegal_ref(store_idx, type)) {
info.cross_store++;
return true;
}
}
return false;
}
void MultiMethodInliner::delayed_visibility_changes_apply() {
visibility_changes_apply_and_record_make_static(
*m_delayed_visibility_changes);
}
void MultiMethodInliner::visibility_changes_apply_and_record_make_static(
const VisibilityChanges& visibility_changes) {
visibility_changes.apply();
// any method that was just made public and isn't virtual or a constructor or
// static must be made static
for (auto method : visibility_changes.methods) {
always_assert(is_public(method));
if (!method->is_virtual() && !method::is_init(method) &&
!is_static(method)) {
always_assert(can_rename(method));
always_assert(method->is_concrete());
m_delayed_make_static.insert(method);
}
}
}
void MultiMethodInliner::delayed_invoke_direct_to_static() {
if (m_delayed_make_static.empty()) {
return;
}
// We sort the methods here because make_static renames methods on
// collision, and which collisions occur is order-dependent. E.g. if we have
// the following methods in m_delayed_make_static:
//
// Foo Foo::bar()
// Foo Foo::bar(Foo f)
//
// making Foo::bar() static first would make it collide with Foo::bar(Foo
// f), causing it to get renamed to bar$redex0(). But if Foo::bar(Foo f)
// gets static-ified first, it becomes Foo::bar(Foo f, Foo f), so when bar()
// gets made static later there is no collision. So in the interest of
// having reproducible binaries, we sort the methods first.
//
// Also, we didn't use an std::set keyed by method signature here because
// make_static is mutating the signatures. The tree that implements the set
// would have to be rebalanced after the mutations.
std::vector<DexMethod*> methods(m_delayed_make_static.begin(),
m_delayed_make_static.end());
std::sort(methods.begin(), methods.end(), compare_dexmethods);
for (auto method : methods) {
TRACE(MMINL, 6, "making %s static", method->get_name()->c_str());
mutators::make_static(method);
}
walk::parallel::opcodes(
m_scope, [](DexMethod* meth) { return true; },
[&](DexMethod*, IRInstruction* insn) {
auto op = insn->opcode();
if (op == OPCODE_INVOKE_DIRECT) {
auto m = insn->get_method()->as_def();
if (m && m_delayed_make_static.count_unsafe(m)) {
insn->set_opcode(OPCODE_INVOKE_STATIC);
}
}
});
m_delayed_make_static.clear();
}
namespace inliner {
// return true on successful inlining, false otherwise
bool inline_with_cfg(
DexMethod* caller_method,
DexMethod* callee_method,
IRInstruction* callsite,
DexType* needs_receiver_cast,
DexType* needs_init_class,
size_t next_caller_reg,
const std::shared_ptr<cfg::ControlFlowGraph>& reduced_cfg) {
auto caller_code = caller_method->get_code();
always_assert(caller_code->editable_cfg_built());
auto& caller_cfg = caller_code->cfg();
const cfg::InstructionIterator& callsite_it = caller_cfg.find_insn(callsite);
if (callsite_it.is_end()) {
// The callsite is not in the caller cfg. This is probably because the
// callsite pointer is stale. Maybe the callsite's block was deleted since
// the time the callsite was found.
//
// This could have happened if a previous inlining caused a block to be
// unreachable, and that block was deleted when the CFG was simplified.
return false;
}
auto callee_code = callee_method->get_code();
always_assert(callee_code->editable_cfg_built());
auto& callee_cfg = reduced_cfg ? *reduced_cfg : callee_code->cfg();
bool is_trivial = true;
for (auto& mie : InstructionIterable(callee_cfg)) {
if (mie.insn->opcode() != OPCODE_RETURN_VOID &&
!opcode::is_load_param(mie.insn->opcode())) {
is_trivial = false;
break;
}
}
if (is_trivial) {
// no need to go through expensive general inlining, which would also add
// unnecessary or even dubious positions
caller_cfg.remove_insn(callsite_it);
return true;
}
if (caller_code->get_debug_item() == nullptr) {
// Create an empty item so that debug info of inlinee does not get lost.
caller_code->set_debug_item(std::make_unique<DexDebugItem>());
// Create a fake position.
caller_cfg.insert_before(
caller_cfg.entry_block(),
caller_cfg.entry_block()->get_first_non_param_loading_insn(),
DexPosition::make_synthetic_entry_position(caller_method));
}
// Logging before the call to inline_cfg to get the most relevant line
// number near callsite before callsite gets replaced. Should be ok as
// inline_cfg does not fail to inline.
log_opt(INLINED, caller_method, callsite);
cfg::CFGInliner::inline_cfg(&caller_cfg, callsite_it, needs_receiver_cast,
needs_init_class, callee_cfg, next_caller_reg);
return true;
}
} // namespace inliner