opt/builder_pattern/BuilderAnalysis.cpp (437 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "BuilderAnalysis.h"
#include "BaseIRAnalyzer.h"
#include "ConstantAbstractDomain.h"
#include "ControlFlow.h"
#include "Liveness.h"
#include "PatriciaTreeMapAbstractEnvironment.h"
#include "Resolver.h"
#include "Show.h"
#include "Trace.h"
using namespace sparta;
namespace builder_pattern {
namespace impl {
using namespace ir_analyzer;
using NullableConstantValue =
acd_impl::ConstantAbstractValue<const IRInstruction*>;
class NullableConstantDomain final
: public AbstractDomainScaffolding<NullableConstantValue,
NullableConstantDomain> {
public:
using ConstantType = const IRInstruction*;
using BaseClass =
AbstractDomainScaffolding<NullableConstantValue, NullableConstantDomain>;
NullableConstantDomain() { set_to_top(); }
explicit NullableConstantDomain(const ConstantType& cst) {
set_to_value(NullableConstantValue(cst));
}
explicit NullableConstantDomain(AbstractValueKind kind) : BaseClass(kind) {}
boost::optional<ConstantType> get_constant() const {
return is_value()
? boost::optional<ConstantType>(get_value()->get_constant())
: boost::none;
}
void join_with(const NullableConstantDomain& other) override {
if (is_value() && other.is_value()) {
if (get_value()->get_constant()->opcode() == OPCODE_CONST &&
other.get_value()->get_constant()->opcode() != OPCODE_CONST) {
TRACE(BLD_PATTERN, 5, "Join NULL const with builder %s:%s",
SHOW(get_value()->get_constant()),
SHOW(other.get_value()->get_constant()));
set_to_value(*other.get_value());
return;
}
if (get_value()->get_constant()->opcode() != OPCODE_CONST &&
other.get_value()->get_constant()->opcode() == OPCODE_CONST) {
TRACE(BLD_PATTERN, 5, "Join NULL const with builder %s:%s",
SHOW(get_value()->get_constant()),
SHOW(other.get_value()->get_constant()));
// Do nothing, the value is already builder.
return;
}
}
BaseClass::join_with(other);
}
};
using IRInstructionConstantDomain = NullableConstantDomain;
/**
* For each register that holds an instance of a builder keeps track
* of the instruction that initialized it.
**/
using IRInstructionConstantEnvironment =
PatriciaTreeMapAbstractEnvironment<reg_t, IRInstructionConstantDomain>;
class InstructionToEnvMap final
: public std::unordered_map<const IRInstruction*,
impl::IRInstructionConstantEnvironment> {};
class Analyzer final : public BaseIRAnalyzer<IRInstructionConstantEnvironment> {
public:
Analyzer(const cfg::ControlFlowGraph& cfg,
const ConstTypeHashSet& builder_types,
const ConstTypeHashSet& excluded_builder_types,
bool accept_excluded)
: BaseIRAnalyzer<IRInstructionConstantEnvironment>(cfg),
m_builder_types(builder_types),
m_excluded_builder_types(excluded_builder_types),
m_accept_excluded(accept_excluded) {
MonotonicFixpointIterator::run(IRInstructionConstantEnvironment::top());
}
void analyze_instruction(
const IRInstruction* insn,
IRInstructionConstantEnvironment* current_state) const override {
auto default_case = [&]() {
// If we get here, reset destination.
if (insn->has_dest()) {
current_state->set(insn->dest(), IRInstructionConstantDomain::top());
if (insn->dest_is_wide()) {
current_state->set(insn->dest() + 1,
IRInstructionConstantDomain::top());
}
} else if (insn->has_move_result_any()) {
// Here we don't need to update RESULT_REGISTER + 1 for wide cases,
// since we only care about keeping track of the builders
// (which are not wide). We only use this result for object move result
// opcodes.
current_state->set(RESULT_REGISTER, IRInstructionConstantDomain::top());
}
};
switch (insn->opcode()) {
case OPCODE_MOVE_OBJECT: {
current_state->set(insn->dest(), current_state->get(insn->src(0)));
break;
}
case IOPCODE_MOVE_RESULT_PSEUDO_OBJECT:
case OPCODE_MOVE_RESULT_OBJECT: {
current_state->set(insn->dest(), current_state->get(RESULT_REGISTER));
break;
}
case OPCODE_CONST:
if (insn->get_literal() == 0) {
// NULL const is a spcial case required for conditional creation.
current_state->set(insn->dest(), IRInstructionConstantDomain(insn));
} else {
default_case();
}
break;
case OPCODE_NEW_INSTANCE: {
if (is_builder(insn->get_type())) {
// Keep track of the instantiation.
current_state->set(RESULT_REGISTER, IRInstructionConstantDomain(insn));
} else {
default_case();
}
break;
}
case OPCODE_CHECK_CAST: {
current_state->set(RESULT_REGISTER, current_state->get(insn->src(0)));
break;
}
case OPCODE_INVOKE_DIRECT:
case OPCODE_INVOKE_VIRTUAL:
case OPCODE_INVOKE_STATIC: {
auto method = resolve_method(insn->get_method(), opcode_to_search(insn));
if (!method) {
default_case();
break;
}
auto rtype = method->get_proto()->get_rtype();
if (insn->opcode() != OPCODE_INVOKE_STATIC &&
method->get_class() == rtype) {
// NOTE: We expect that the method actually operates on the same
// instance and returns it. We are going to verify that later.
current_state->set(RESULT_REGISTER, current_state->get(insn->src(0)));
} else if (is_builder(rtype)) {
// Keep track of the callsite that created / got the instance..
current_state->set(RESULT_REGISTER, IRInstructionConstantDomain(insn));
} else {
default_case();
}
break;
}
default: {
default_case();
break;
}
}
}
private:
const ConstTypeHashSet& m_builder_types;
const ConstTypeHashSet& m_excluded_builder_types;
const bool m_accept_excluded;
bool is_builder(const DexType* type) const {
const bool is_not_excluded =
m_accept_excluded || m_excluded_builder_types.count(type) == 0;
return m_builder_types.count(type) && is_not_excluded;
}
};
} // namespace impl
BuilderAnalysis::~BuilderAnalysis() {}
BuilderAnalysis::BuilderAnalysis(const ConstTypeHashSet& builder_types,
const ConstTypeHashSet& excluded_builder_types,
DexMethod* method)
: m_builder_types(builder_types),
m_excluded_builder_types(excluded_builder_types),
m_insn_to_env(new impl::InstructionToEnvMap),
m_method(method),
m_accept_excluded(true) {}
void BuilderAnalysis::run_analysis() {
auto* code = m_method->get_code();
if (!code) {
return;
}
code->build_cfg(/* editable */ false);
cfg::ControlFlowGraph& cfg = code->cfg();
cfg.calculate_exit_block();
m_analyzer.reset(new impl::Analyzer(
cfg, m_builder_types, m_excluded_builder_types, m_accept_excluded));
populate_usage();
update_stats();
}
void BuilderAnalysis::print_usage() {
if (!m_method || !m_method->get_code()) {
return;
}
always_assert(m_analyzer);
if (m_usage.empty()) {
return;
}
TRACE(BLD_PATTERN, 4, "\nMethod %s", SHOW(m_method));
for (const auto& pair : m_usage) {
TRACE(BLD_PATTERN, 4, "\nInitialization in %s", SHOW(pair.first));
for (const auto& it : pair.second) {
TRACE(BLD_PATTERN, 4, "\t Usage: %s", SHOW(it->insn));
}
}
}
void BuilderAnalysis::update_stats() {
// We only keep track of total usages once per method, to avoid redundant
// computation. At the same time, we switch m_accept_excluded to false,
// which will ignore the excluded builder types in the analysis.
//
// TODO(emmasevastian): maybe move this to the caller instead?
if (m_accept_excluded) {
m_total_usages = m_usage.size() + m_excluded_instantiation.size();
m_accept_excluded = false;
}
}
namespace {
DexType* get_instantiated_type(const IRInstruction* insn) {
DexType* current_instance = nullptr;
switch (insn->opcode()) {
case OPCODE_CONST:
break;
case OPCODE_NEW_INSTANCE:
current_instance = insn->get_type();
break;
case OPCODE_INVOKE_STATIC:
case OPCODE_INVOKE_VIRTUAL:
case OPCODE_INVOKE_DIRECT: {
auto method = resolve_method(insn->get_method(), opcode_to_search(insn));
current_instance = method->get_proto()->get_rtype();
break;
}
default:
not_reached_log("Different instantion opcode %s", SHOW(insn));
}
return current_instance;
}
} // namespace
void BuilderAnalysis::populate_usage() {
m_usage.clear();
m_insn_to_env->clear();
auto* code = m_method->get_code();
auto& cfg = code->cfg();
// If the instantiated type is not excluded, updates the usages map.
// Otherwise, update the excluded instantiation list.
auto update_usages = [&](const IRInstruction* val,
const IRList::iterator& use) {
if (auto referenced_type = get_instantiated_type(val)) {
if (m_excluded_builder_types.count(referenced_type) == 0) {
m_usage[val].push_back(use);
}
m_excluded_instantiation.emplace(val);
}
};
for (cfg::Block* block : cfg.blocks()) {
auto env = m_analyzer->get_entry_state_at(block);
for (auto& mie : InstructionIterable(block)) {
auto it = code->iterator_to(mie);
IRInstruction* insn = mie.insn;
m_insn_to_env->emplace(insn, env);
m_analyzer->analyze_instruction(insn, &env);
if (insn->has_dest()) {
auto dest = insn->dest();
auto val_dest = env.get(dest).get_constant();
if (val_dest) {
update_usages(*val_dest, it);
}
}
for (size_t index = 0; index < insn->srcs_size(); index++) {
auto src = insn->src(index);
auto val_src = env.get(src).get_constant();
if (val_src) {
update_usages(*val_src, it);
}
}
}
}
}
std::unordered_map<IRInstruction*, DexType*>
BuilderAnalysis::get_vinvokes_to_this_infered_type() {
std::unordered_map<IRInstruction*, DexType*> result;
for (const auto& pair : m_usage) {
if (opcode::is_invoke_virtual(pair.first->opcode())) {
always_assert(!result.count(const_cast<IRInstruction*>(pair.first)));
auto current_instance = get_instantiated_type(pair.first);
result[const_cast<IRInstruction*>(pair.first)] = current_instance;
}
for (auto& it : pair.second) {
auto* insn = it->insn;
if (opcode::is_invoke_virtual(insn->opcode())) {
auto this_reg = insn->src(0);
auto val = m_insn_to_env->at(insn).get(this_reg).get_constant();
if (val) {
auto infered_type = get_instantiated_type(*val);
always_assert(!result.count(const_cast<IRInstruction*>(insn)) ||
result[const_cast<IRInstruction*>(insn)] ==
infered_type);
result[const_cast<IRInstruction*>(insn)] = infered_type;
};
}
}
}
return result;
}
std::unordered_set<IRInstruction*> BuilderAnalysis::get_all_inlinable_insns() {
std::unordered_set<IRInstruction*> result;
for (const auto& pair : m_usage) {
if (opcode::is_an_invoke(pair.first->opcode())) {
result.emplace(const_cast<IRInstruction*>(pair.first));
}
for (const auto& it : pair.second) {
auto* insn = it->insn;
if (opcode::is_an_invoke(insn->opcode())) {
result.emplace(const_cast<IRInstruction*>(insn));
}
}
}
// Filter out non-inlinable ones.
for (auto it = result.begin(); it != result.end();) {
auto insn = *it;
always_assert(insn->has_method());
auto method = resolve_method(insn->get_method(), opcode_to_search(insn));
if (!method || !method->get_code()) {
it = result.erase(it);
continue;
}
if (method::is_init(method)) {
auto this_reg = insn->src(0);
auto val = m_insn_to_env->at(insn).get(this_reg).get_constant();
if (!val || get_instantiated_type(*val) != method->get_class()) {
it = result.erase(it);
continue;
}
}
it++;
}
return result;
}
ConstTypeHashSet BuilderAnalysis::get_instantiated_types(
std::unordered_set<const IRInstruction*>* insns) {
ConstTypeHashSet result;
bool check_specific_insn = insns != nullptr;
for (const auto& pair : m_usage) {
auto type = get_instantiated_type(pair.first);
if (!check_specific_insn) {
result.emplace(type);
continue;
} else if (insns->count(const_cast<IRInstruction*>(pair.first))) {
result.emplace(type);
continue;
}
for (const auto& it : pair.second) {
if (insns->count(const_cast<IRInstruction*>(it->insn))) {
result.emplace(type);
break;
}
}
}
return result;
}
namespace {
DexMethodRef* get_obj_default_ctor() {
auto obj_type = type::java_lang_Object();
auto ctor_name = DexString::get_string("<init>");
auto default_ctor = DexMethod::get_method(
obj_type,
ctor_name,
DexProto::get_proto(type::_void(), DexTypeList::make_type_list({})));
return default_ctor;
}
} // namespace
ConstTypeHashSet BuilderAnalysis::non_removable_types() {
auto non_removable_types = escape_types();
// Consider other non-removable usages (for example synchronization usage).
for (const auto& pair : m_usage) {
auto current_instance = get_instantiated_type(pair.first);
if (non_removable_types.count(current_instance)) {
// Already decided it isn't removable.
continue;
}
for (const auto& it : pair.second) {
if (opcode::is_a_monitor(it->insn->opcode())) {
non_removable_types.emplace(current_instance);
break;
}
}
}
return non_removable_types;
}
ConstTypeHashSet BuilderAnalysis::escape_types() {
auto* code = m_method->get_code();
auto& cfg = code->cfg();
// Don't treat as escaping a builder passed to Object.<init>().
auto acceptable_method = get_obj_default_ctor();
ConstTypeHashSet escape_types;
for (const auto& pair : m_usage) {
auto instantiation_insn = pair.first;
auto current_instance = get_instantiated_type(instantiation_insn);
for (const auto& it : pair.second) {
auto* insn = it->insn;
// If there is any invoke here, it is because we couldn't inline it.
if (opcode::is_an_invoke(insn->opcode())) {
// We accept Object.<init> calls.
if (insn->get_method() == acceptable_method) {
continue;
}
auto method = resolve_method(insn->get_method(), MethodSearch::Any);
TRACE(BLD_PATTERN, 2, "Excluding type %s since we couldn't inline %s",
SHOW(current_instance), SHOW(method));
escape_types.emplace(current_instance);
} else if (insn->opcode() == OPCODE_INSTANCE_OF) {
TRACE(BLD_PATTERN, 2, "Excluding type %s since instanceof used",
SHOW(current_instance));
escape_types.emplace(current_instance);
} else if (opcode::is_an_iput(insn->opcode()) ||
opcode::is_an_sput(insn->opcode()) ||
insn->opcode() == OPCODE_APUT_OBJECT ||
opcode::is_a_return(insn->opcode())) {
auto src = insn->src(0);
auto escaped = m_insn_to_env->at(insn).get(src).get_constant();
if (escaped && escaped.get() == instantiation_insn) {
TRACE(BLD_PATTERN, 2,
"Excluding type %s since it is stored or returned in %s",
SHOW(current_instance), SHOW(insn));
escape_types.emplace(current_instance);
}
}
}
}
LivenessFixpointIterator liveness_iter(cfg);
liveness_iter.run(LivenessDomain());
for (cfg::Block* block : cfg.blocks()) {
auto current_env = m_analyzer->get_exit_state_at(block);
for (auto& succ : block->succs()) {
cfg::Block* block_succ = succ->target();
auto entry_env_at_succ = m_analyzer->get_entry_state_at(block_succ);
auto live_in_vars_at_succ =
liveness_iter.get_live_in_vars_at(succ->target());
// Check that live registers, if they hold a builder at the end of block
// B, then they hold the same one at the entry of any successor block.
for (reg_t live_reg : live_in_vars_at_succ.elements()) {
if (!entry_env_at_succ.get(live_reg).equals(
current_env.get(live_reg))) {
// It escapes here, since we couldn't follow it anymore.
TRACE(BLD_PATTERN, 5,
"Liveness missmatch for register v%d\nPRED:\n%sSUCC:\n%s",
live_reg, SHOW(block), SHOW(block_succ));
TRACE(BLD_PATTERN, 5, "Register value in PRED: %s",
SHOW(*current_env.get(live_reg).get_constant()));
if (auto succ_val = entry_env_at_succ.get(live_reg).get_constant()) {
TRACE(BLD_PATTERN, 5, "Register value in SUCC: %s",
SHOW(*succ_val));
} else {
TRACE(BLD_PATTERN, 5, "Register value in SUCC: NONE");
}
auto init_insn = *current_env.get(live_reg).get_constant();
if (init_insn->opcode() != OPCODE_CONST) {
// NULL const cannot escape only builder can escape.
auto current_instance = get_instantiated_type(init_insn);
TRACE(BLD_PATTERN, 2,
"Excluding type %s since it escapes method %s",
SHOW(current_instance), SHOW(m_method));
escape_types.emplace(current_instance);
}
}
}
}
}
return escape_types;
}
} // namespace builder_pattern