opt/object-sensitive-dce/SideEffectSummary.cpp (299 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "SideEffectSummary.h"
#include "CallGraph.h"
#include "ConcurrentContainers.h"
#include "Show.h"
#include "Trace.h"
#include "Walkers.h"
using namespace side_effects;
using namespace sparta;
namespace ptrs = local_pointers;
namespace side_effects {
using PointersFixpointIteratorMap =
ConcurrentMap<const DexMethodRef*, ptrs::FixpointIterator*>;
using SummaryConcurrentMap = ConcurrentMap<const DexMethodRef*, Summary>;
SummaryBuilder::SummaryBuilder(
const init_classes::InitClassesWithSideEffects&
init_classes_with_side_effects,
const InvokeToSummaryMap& invoke_to_summary_cmap,
const ptrs::FixpointIterator& ptrs_fp_iter,
const IRCode* code,
reaching_defs::MoveAwareFixpointIterator* reaching_defs_fixpoint_iter,
bool analyze_external_reads)
: m_init_classes_with_side_effects(init_classes_with_side_effects),
m_invoke_to_summary_cmap(invoke_to_summary_cmap),
m_ptrs_fp_iter(ptrs_fp_iter),
m_code(code),
m_analyze_external_reads(analyze_external_reads),
m_reaching_defs_fixpoint_iter(reaching_defs_fixpoint_iter) {
auto idx = 0;
auto params = code->editable_cfg_built()
? code->cfg().get_param_instructions()
: code->get_param_instructions();
for (auto& mie : InstructionIterable(params)) {
auto insn = mie.insn;
m_param_insn_map.emplace(insn, idx++);
}
}
Summary SummaryBuilder::build() {
Summary summary;
// Aggregate the effects of each individual instruction in the code object.
auto& cfg = m_code->cfg();
for (auto* block : cfg.blocks()) {
auto env = m_ptrs_fp_iter.get_entry_state_at(block);
reaching_defs::Environment reaching_def_env;
if (env.is_bottom()) {
continue;
}
if (m_analyze_external_reads) {
reaching_def_env =
m_reaching_defs_fixpoint_iter->get_entry_state_at(block);
}
for (auto& mie : InstructionIterable(block)) {
auto* insn = mie.insn;
analyze_instruction_effects(env, reaching_def_env, insn, &summary);
m_ptrs_fp_iter.analyze_instruction(insn, &env);
if (m_analyze_external_reads) {
m_reaching_defs_fixpoint_iter->analyze_instruction(insn,
&reaching_def_env);
}
}
}
return summary;
}
void SummaryBuilder::analyze_instruction_effects(
const ptrs::Environment& env,
const reaching_defs::Environment& reaching_def_env,
const IRInstruction* insn,
Summary* summary) {
auto init_class = !!m_init_classes_with_side_effects.refine(
get_init_class_type_demand(insn));
if (init_class) {
summary->effects |= EFF_INIT_CLASS;
}
auto op = insn->opcode();
switch (op) {
case OPCODE_THROW: {
summary->effects |= EFF_THROWS;
break;
}
case OPCODE_MONITOR_ENTER:
case OPCODE_MONITOR_EXIT: {
summary->effects |= EFF_LOCKS;
break;
}
case OPCODE_SGET:
case OPCODE_SGET_WIDE:
case OPCODE_SGET_BOOLEAN:
case OPCODE_SGET_BYTE:
case OPCODE_SGET_CHAR:
case OPCODE_SGET_SHORT:
case OPCODE_SGET_OBJECT: {
summary->may_read_external = true;
break;
}
case OPCODE_IGET:
case OPCODE_IGET_WIDE:
case OPCODE_IGET_BOOLEAN:
case OPCODE_IGET_BYTE:
case OPCODE_IGET_CHAR:
case OPCODE_IGET_SHORT:
case OPCODE_IGET_OBJECT:
case OPCODE_AGET:
case OPCODE_AGET_WIDE:
case OPCODE_AGET_BOOLEAN:
case OPCODE_AGET_BYTE:
case OPCODE_AGET_CHAR:
case OPCODE_AGET_SHORT:
case OPCODE_AGET_OBJECT: {
if (m_analyze_external_reads) {
auto def = reaching_def_env.get(insn->src(0));
if (def.is_top() ||
std::any_of(def.elements().begin(), def.elements().end(),
[&](auto insn) {
return !opcode::is_a_load_param(insn->opcode());
})) {
summary->may_read_external = true;
}
} else {
summary->may_read_external = true;
}
break;
}
case OPCODE_SPUT:
case OPCODE_SPUT_WIDE:
case OPCODE_SPUT_BOOLEAN:
case OPCODE_SPUT_BYTE:
case OPCODE_SPUT_CHAR:
case OPCODE_SPUT_SHORT:
case OPCODE_SPUT_OBJECT: {
summary->effects |= EFF_WRITE_MAY_ESCAPE;
break;
}
case OPCODE_IPUT:
case OPCODE_IPUT_WIDE:
case OPCODE_IPUT_BOOLEAN:
case OPCODE_IPUT_BYTE:
case OPCODE_IPUT_CHAR:
case OPCODE_IPUT_SHORT:
case OPCODE_IPUT_OBJECT:
case OPCODE_APUT:
case OPCODE_APUT_WIDE:
case OPCODE_APUT_BOOLEAN:
case OPCODE_APUT_BYTE:
case OPCODE_APUT_CHAR:
case OPCODE_APUT_SHORT:
case OPCODE_APUT_OBJECT: {
classify_heap_write(env, insn->src(1), summary);
break;
}
case OPCODE_FILL_ARRAY_DATA: {
classify_heap_write(env, insn->src(0), summary);
break;
}
case OPCODE_INVOKE_SUPER:
case OPCODE_INVOKE_INTERFACE: {
TRACE(OSDCE, 3, "Unknown invoke: %s", SHOW(insn));
summary->effects |= EFF_UNKNOWN_INVOKE;
break;
}
case OPCODE_INVOKE_STATIC:
case OPCODE_INVOKE_DIRECT:
case OPCODE_INVOKE_VIRTUAL: {
if (m_invoke_to_summary_cmap.count(insn)) {
const auto& callee_summary = m_invoke_to_summary_cmap.at(insn);
summary->effects |= callee_summary.effects;
summary->may_read_external |= callee_summary.may_read_external;
for (auto idx : callee_summary.modified_params) {
classify_heap_write(env, insn->src(idx), summary);
}
} else {
TRACE(OSDCE, 3, "Unknown invoke: %s", SHOW(insn));
summary->effects |= EFF_UNKNOWN_INVOKE;
}
break;
}
default: {
break;
}
}
}
/*
* Given a write to the heap, classify it as one of the following:
* - Write to a locally-allocated non-escaping object
* - Write to an object passed in as a parameter
* - Write to an escaping and/or unknown object
*/
void SummaryBuilder::classify_heap_write(const ptrs::Environment& env,
reg_t modified_ptr_reg,
Summary* summary) {
auto pointers = env.get_pointers(modified_ptr_reg);
if (!pointers.is_value()) {
summary->effects |= EFF_WRITE_MAY_ESCAPE;
return;
}
for (auto insn : pointers.elements()) {
if (!env.may_have_escaped(insn)) {
if (insn->opcode() == IOPCODE_LOAD_PARAM_OBJECT) {
summary->modified_params.emplace(m_param_insn_map.at(insn));
}
} else {
TRACE(OSDCE, 3, "Escaping write to value allocated by %s", SHOW(insn));
summary->effects |= EFF_WRITE_MAY_ESCAPE;
}
}
}
/*
* Analyze :method and insert its summary into :summary_cmap. Recursively
* analyze the callees if necessary. This method is thread-safe.
*/
void analyze_method_recursive(const init_classes::InitClassesWithSideEffects&
init_classes_with_side_effects,
const DexMethod* method,
const call_graph::Graph& call_graph,
const ptrs::FixpointIteratorMap& ptrs_fp_iter_map,
PatriciaTreeSet<const DexMethodRef*> visiting,
SummaryConcurrentMap* summary_cmap) {
if (!method || summary_cmap->count(method) != 0 ||
visiting.contains(method) || method->get_code() == nullptr) {
return;
}
visiting.insert(method);
InvokeToSummaryMap invoke_to_summary_cmap;
if (call_graph.has_node(method)) {
const auto& callee_edges = call_graph.node(method)->callees();
for (const auto& edge : callee_edges) {
auto* callee = edge->callee()->method();
analyze_method_recursive(init_classes_with_side_effects, callee,
call_graph, ptrs_fp_iter_map, visiting,
summary_cmap);
if (summary_cmap->count(callee) != 0) {
invoke_to_summary_cmap.emplace(edge->invoke_insn(),
summary_cmap->at(callee));
}
}
}
const auto* ptrs_fp_iter = ptrs_fp_iter_map.find(method)->second;
auto summary =
SummaryBuilder(init_classes_with_side_effects, invoke_to_summary_cmap,
*ptrs_fp_iter, method->get_code())
.build();
if (method->rstate.no_optimizations()) {
summary.effects |= EFF_NO_OPTIMIZE;
}
summary_cmap->emplace(method, summary);
if (traceEnabled(OSDCE, 3)) {
TRACE(OSDCE, 3, "%s %s unknown side effects (%zu)", SHOW(method),
summary.effects != EFF_NONE ? "has" : "does not have",
summary.effects);
if (!summary.modified_params.empty()) {
TRACE_NO_LINE(OSDCE, 3, "Modified params: ");
for (auto idx : summary.modified_params) {
TRACE_NO_LINE(OSDCE, 3, "%u ", idx);
}
TRACE(OSDCE, 3, "");
}
}
}
Summary analyze_code(const init_classes::InitClassesWithSideEffects&
init_classes_with_side_effects,
const InvokeToSummaryMap& invoke_to_summary_cmap,
const ptrs::FixpointIterator& ptrs_fp_iter,
const IRCode* code) {
return SummaryBuilder(init_classes_with_side_effects, invoke_to_summary_cmap,
ptrs_fp_iter, code)
.build();
}
void analyze_scope(
const init_classes::InitClassesWithSideEffects&
init_classes_with_side_effects,
const Scope& scope,
const call_graph::Graph& call_graph,
const ConcurrentMap<const DexMethodRef*, ptrs::FixpointIterator*>&
ptrs_fp_iter_map,
SummaryMap* effect_summaries) {
// This method is special: the bytecode verifier requires that this method
// be called before a newly-allocated object gets used in any way. We can
// model this by treating the method as modifying its `this` parameter --
// changing it from uninitialized to initialized.
(*effect_summaries)[DexMethod::get_method("Ljava/lang/Object;.<init>:()V")] =
Summary({0});
SummaryConcurrentMap summary_cmap;
for (auto& pair : *effect_summaries) {
summary_cmap.insert(pair);
}
walk::parallel::code(scope, [&](const DexMethod* method, IRCode& code) {
PatriciaTreeSet<const DexMethodRef*> visiting;
analyze_method_recursive(init_classes_with_side_effects, method, call_graph,
ptrs_fp_iter_map, visiting, &summary_cmap);
});
for (auto& pair : summary_cmap) {
effect_summaries->insert(pair);
}
}
s_expr to_s_expr(const Summary& summary) {
std::vector<s_expr> s_exprs;
s_exprs.emplace_back(std::to_string(summary.effects));
std::vector<s_expr> mod_param_s_exprs;
for (auto idx : summary.modified_params) {
mod_param_s_exprs.emplace_back(idx);
}
s_exprs.emplace_back(mod_param_s_exprs);
return s_expr(s_exprs);
}
Summary Summary::from_s_expr(const s_expr& expr) {
Summary summary;
always_assert(expr.size() == 2);
always_assert(expr[0].is_string());
summary.effects = std::stoi(expr[0].str());
always_assert(expr[1].is_list());
for (size_t i = 0; i < expr[1].size(); ++i) {
summary.modified_params.emplace(expr[1][i].get_int32());
}
return summary;
}
} // namespace side_effects