service/escape-analysis/LocalPointersAnalysis.cpp (538 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "LocalPointersAnalysis.h"
#include <ostream>
#include "DexUtil.h"
#include "PatriciaTreeSet.h"
#include "Resolver.h"
#include "Walkers.h"
#include "WorkQueue.h"
using namespace local_pointers;
namespace {
using namespace ir_analyzer;
/*
* Whether the dest of an instruction may be a pointer value. The only time
* there is an uncertainty as to whether the dest is a pointer or not is when
* we have a `const 0` instruction, since that may be either a null pointer or
* a zero integer.
*/
bool dest_may_be_pointer(const IRInstruction* insn) {
auto op = insn->opcode();
switch (op) {
case OPCODE_NOP:
not_reached_log("No dest");
case OPCODE_MOVE:
case OPCODE_MOVE_WIDE:
return false;
case OPCODE_MOVE_OBJECT:
return true;
case OPCODE_MOVE_RESULT:
case OPCODE_MOVE_RESULT_WIDE:
return false;
case OPCODE_MOVE_RESULT_OBJECT:
case OPCODE_MOVE_EXCEPTION:
return true;
case OPCODE_RETURN_VOID:
case OPCODE_RETURN:
case OPCODE_RETURN_WIDE:
case OPCODE_RETURN_OBJECT:
not_reached_log("No dest");
case OPCODE_MONITOR_ENTER:
case OPCODE_MONITOR_EXIT:
case OPCODE_THROW:
case OPCODE_GOTO:
not_reached_log("No dest");
case OPCODE_NEG_INT:
case OPCODE_NOT_INT:
case OPCODE_NEG_LONG:
case OPCODE_NOT_LONG:
case OPCODE_NEG_FLOAT:
case OPCODE_NEG_DOUBLE:
case OPCODE_INT_TO_LONG:
case OPCODE_INT_TO_FLOAT:
case OPCODE_INT_TO_DOUBLE:
case OPCODE_LONG_TO_INT:
case OPCODE_LONG_TO_FLOAT:
case OPCODE_LONG_TO_DOUBLE:
case OPCODE_FLOAT_TO_INT:
case OPCODE_FLOAT_TO_LONG:
case OPCODE_FLOAT_TO_DOUBLE:
case OPCODE_DOUBLE_TO_INT:
case OPCODE_DOUBLE_TO_LONG:
case OPCODE_DOUBLE_TO_FLOAT:
case OPCODE_INT_TO_BYTE:
case OPCODE_INT_TO_CHAR:
case OPCODE_INT_TO_SHORT:
case OPCODE_ARRAY_LENGTH:
case OPCODE_CMPL_FLOAT:
case OPCODE_CMPG_FLOAT:
case OPCODE_CMPL_DOUBLE:
case OPCODE_CMPG_DOUBLE:
case OPCODE_CMP_LONG:
return false;
case OPCODE_IF_EQ:
case OPCODE_IF_NE:
case OPCODE_IF_LT:
case OPCODE_IF_GE:
case OPCODE_IF_GT:
case OPCODE_IF_LE:
case OPCODE_IF_EQZ:
case OPCODE_IF_NEZ:
case OPCODE_IF_LTZ:
case OPCODE_IF_GEZ:
case OPCODE_IF_GTZ:
case OPCODE_IF_LEZ:
not_reached_log("No dest");
case OPCODE_AGET:
case OPCODE_AGET_WIDE:
return false;
case OPCODE_AGET_OBJECT:
return true;
case OPCODE_AGET_BOOLEAN:
case OPCODE_AGET_BYTE:
case OPCODE_AGET_CHAR:
case OPCODE_AGET_SHORT:
return false;
case OPCODE_APUT:
case OPCODE_APUT_WIDE:
case OPCODE_APUT_OBJECT:
case OPCODE_APUT_BOOLEAN:
case OPCODE_APUT_BYTE:
case OPCODE_APUT_CHAR:
case OPCODE_APUT_SHORT:
not_reached_log("No dest");
case OPCODE_ADD_INT:
case OPCODE_SUB_INT:
case OPCODE_MUL_INT:
case OPCODE_DIV_INT:
case OPCODE_REM_INT:
case OPCODE_AND_INT:
case OPCODE_OR_INT:
case OPCODE_XOR_INT:
case OPCODE_SHL_INT:
case OPCODE_SHR_INT:
case OPCODE_USHR_INT:
case OPCODE_ADD_LONG:
case OPCODE_SUB_LONG:
case OPCODE_MUL_LONG:
case OPCODE_DIV_LONG:
case OPCODE_REM_LONG:
case OPCODE_AND_LONG:
case OPCODE_OR_LONG:
case OPCODE_XOR_LONG:
case OPCODE_SHL_LONG:
case OPCODE_SHR_LONG:
case OPCODE_USHR_LONG:
case OPCODE_ADD_FLOAT:
case OPCODE_SUB_FLOAT:
case OPCODE_MUL_FLOAT:
case OPCODE_DIV_FLOAT:
case OPCODE_REM_FLOAT:
case OPCODE_ADD_DOUBLE:
case OPCODE_SUB_DOUBLE:
case OPCODE_MUL_DOUBLE:
case OPCODE_DIV_DOUBLE:
case OPCODE_REM_DOUBLE:
case OPCODE_ADD_INT_LIT16:
case OPCODE_RSUB_INT:
case OPCODE_MUL_INT_LIT16:
case OPCODE_DIV_INT_LIT16:
case OPCODE_REM_INT_LIT16:
case OPCODE_AND_INT_LIT16:
case OPCODE_OR_INT_LIT16:
case OPCODE_XOR_INT_LIT16:
case OPCODE_ADD_INT_LIT8:
case OPCODE_RSUB_INT_LIT8:
case OPCODE_MUL_INT_LIT8:
case OPCODE_DIV_INT_LIT8:
case OPCODE_REM_INT_LIT8:
case OPCODE_AND_INT_LIT8:
case OPCODE_OR_INT_LIT8:
case OPCODE_XOR_INT_LIT8:
case OPCODE_SHL_INT_LIT8:
case OPCODE_SHR_INT_LIT8:
case OPCODE_USHR_INT_LIT8:
return false;
case OPCODE_CONST:
return insn->get_literal() == 0;
case OPCODE_FILL_ARRAY_DATA:
case OPCODE_SWITCH:
not_reached_log("No dest");
case OPCODE_CONST_WIDE:
case OPCODE_IGET:
case OPCODE_IGET_WIDE:
return false;
case OPCODE_IGET_OBJECT:
return true;
case OPCODE_IGET_BOOLEAN:
case OPCODE_IGET_BYTE:
case OPCODE_IGET_CHAR:
case OPCODE_IGET_SHORT:
return false;
case OPCODE_IPUT:
case OPCODE_IPUT_WIDE:
case OPCODE_IPUT_OBJECT:
case OPCODE_IPUT_BOOLEAN:
case OPCODE_IPUT_BYTE:
case OPCODE_IPUT_CHAR:
case OPCODE_IPUT_SHORT:
not_reached_log("No dest");
case OPCODE_SGET:
case OPCODE_SGET_WIDE:
return false;
case OPCODE_SGET_OBJECT:
return true;
case OPCODE_SGET_BOOLEAN:
case OPCODE_SGET_BYTE:
case OPCODE_SGET_CHAR:
case OPCODE_SGET_SHORT:
return false;
case OPCODE_SPUT:
case OPCODE_SPUT_WIDE:
case OPCODE_SPUT_OBJECT:
case OPCODE_SPUT_BOOLEAN:
case OPCODE_SPUT_BYTE:
case OPCODE_SPUT_CHAR:
case OPCODE_SPUT_SHORT:
not_reached_log("No dest");
case OPCODE_INVOKE_VIRTUAL:
case OPCODE_INVOKE_SUPER:
case OPCODE_INVOKE_DIRECT:
case OPCODE_INVOKE_STATIC:
case OPCODE_INVOKE_INTERFACE:
return !type::is_primitive(insn->get_method()->get_proto()->get_rtype());
case OPCODE_CONST_STRING:
case OPCODE_CONST_CLASS:
case OPCODE_CHECK_CAST:
return true;
case OPCODE_INSTANCE_OF:
return false;
case OPCODE_NEW_INSTANCE:
case OPCODE_NEW_ARRAY:
case OPCODE_FILLED_NEW_ARRAY:
return true;
case IOPCODE_LOAD_PARAM:
return false;
case IOPCODE_LOAD_PARAM_OBJECT:
return true;
case IOPCODE_LOAD_PARAM_WIDE:
return false;
case IOPCODE_MOVE_RESULT_PSEUDO:
return false;
case IOPCODE_MOVE_RESULT_PSEUDO_OBJECT:
return true;
case IOPCODE_MOVE_RESULT_PSEUDO_WIDE:
return false;
default:
not_reached_log("Unknown opcode %02x\n", op);
}
}
void analyze_invoke_with_summary(const EscapeSummary& summary,
const IRInstruction* insn,
Environment* env) {
for (auto src_idx : summary.escaping_parameters) {
env->set_may_escape(insn->src(src_idx), insn);
}
switch (summary.returned_parameters.kind()) {
case sparta::AbstractValueKind::Value: {
PointerSet returned_ptrs;
for (auto src_idx : summary.returned_parameters.elements()) {
if (src_idx == FRESH_RETURN) {
returned_ptrs.add(insn);
} else {
returned_ptrs.join_with(env->get_pointers(insn->src(src_idx)));
}
}
env->set_pointers(RESULT_REGISTER, returned_ptrs);
break;
}
case sparta::AbstractValueKind::Top:
case sparta::AbstractValueKind::Bottom: {
// We are intentionally handling Bottom by setting the result register to
// Top. This is a loss of precision but it makes it easier to implement
// dead code elimination. See UsedVarsTest_noReturn for details.
escape_dest(insn, RESULT_REGISTER, env);
break;
}
}
}
/*
* Analyze an invoke instruction in the absence of an available summary.
*/
void analyze_generic_invoke(const IRInstruction* insn,
EnvironmentWithStore* env) {
escape_invoke_params(insn, env);
escape_dest(insn, RESULT_REGISTER, env);
}
} // namespace
namespace local_pointers {
void escape_heap_referenced_objects(const IRInstruction* insn,
EnvironmentWithStore* env) {
auto op = insn->opcode();
// Since we don't model instance fields / array elements, any pointers
// written to them must be treated as escaping.
if (op == OPCODE_APUT_OBJECT || op == OPCODE_SPUT_OBJECT ||
op == OPCODE_IPUT_OBJECT) {
env->set_may_escape(insn->src(0), insn);
} else if (op == OPCODE_FILLED_NEW_ARRAY &&
!type::is_primitive(
type::get_array_component_type(insn->get_type()))) {
for (size_t i = 0; i < insn->srcs_size(); ++i) {
env->set_may_escape(insn->src(i), insn);
}
}
}
void escape_dest(const IRInstruction* insn,
reg_t dest,
EnvironmentWithStore* env) {
// While the analysis would still work if we treated all non-pointer-values
// as escaping pointers, it would bloat the size of our abstract domain and
// incur a runtime performance tax.
if (dest_may_be_pointer(insn)) {
env->set_may_escape_pointer(dest, insn, insn);
} else {
env->set_pointers(dest, PointerSet::top());
}
}
void escape_invoke_params(const IRInstruction* insn,
EnvironmentWithStore* env) {
size_t idx{0};
if (insn->opcode() != OPCODE_INVOKE_STATIC) {
env->set_may_escape(insn->src(0), insn);
++idx;
}
const auto* arg_types = insn->get_method()->get_proto()->get_args();
for (const auto* arg : *arg_types) {
if (!type::is_primitive(arg)) {
env->set_may_escape(insn->src(idx), insn);
}
++idx;
}
}
void default_instruction_handler(const IRInstruction* insn,
EnvironmentWithStore* env) {
auto op = insn->opcode();
if (opcode::is_an_invoke(op)) {
analyze_generic_invoke(insn, env);
} else if (opcode::is_a_move(op)) {
env->set_pointers(insn->dest(), env->get_pointers(insn->src(0)));
} else if (op == OPCODE_CHECK_CAST) {
env->set_pointers(RESULT_REGISTER, env->get_pointers(insn->src(0)));
} else if (opcode::is_move_result_any(op)) {
env->set_pointers(insn->dest(), env->get_pointers(RESULT_REGISTER));
} else if (insn->has_dest()) {
escape_dest(insn, insn->dest(), env);
} else if (insn->has_move_result_any()) {
escape_dest(insn, RESULT_REGISTER, env);
}
}
void FixpointIterator::analyze_instruction(const IRInstruction* insn,
Environment* env) const {
escape_heap_referenced_objects(insn, env);
auto op = insn->opcode();
if (opcode::is_an_invoke(op)) {
if (m_invoke_to_summary_map.count(insn)) {
const auto& summary = m_invoke_to_summary_map.at(insn);
analyze_invoke_with_summary(summary, insn, env);
} else {
default_instruction_handler(insn, env);
}
} else if (may_alloc(op)) {
env->set_fresh_pointer(RESULT_REGISTER, insn);
} else if (op == IOPCODE_LOAD_PARAM_OBJECT) {
env->set_fresh_pointer(insn->dest(), insn);
} else {
default_instruction_handler(insn, env);
if (m_escape_check_cast && op == OPCODE_CHECK_CAST) {
env->set_may_escape(insn->src(0), insn);
}
}
}
void FixpointIteratorMapDeleter::operator()(FixpointIteratorMap* map) {
// Deletion is actually really expensive due to the reference counts of the
// shared_ptrs in the Patricia trees, so we do it in parallel.
auto wq = workqueue_foreach<FixpointIterator*>(
[](FixpointIterator* fp_iter) { delete fp_iter; });
for (auto& pair : *map) {
wq.add_item(pair.second);
}
wq.run_all();
}
static void analyze_method_recursive(
const DexMethod* method,
const call_graph::Graph& call_graph,
sparta::PatriciaTreeSet<const DexMethodRef*> visiting,
FixpointIteratorMap* fp_iter_map,
SummaryCMap* summary_map) {
if (!method || summary_map->count(method) != 0 || visiting.contains(method) ||
method->get_code() == nullptr) {
return;
}
visiting.insert(method);
std::unordered_map<const IRInstruction*, EscapeSummary> invoke_to_summary_map;
if (call_graph.has_node(method)) {
const auto& callee_edges = call_graph.node(method)->callees();
for (const auto& edge : callee_edges) {
auto* callee = edge->callee()->method();
analyze_method_recursive(callee, call_graph, visiting, fp_iter_map,
summary_map);
if (summary_map->count(callee) != 0) {
invoke_to_summary_map.emplace(edge->invoke_insn(),
summary_map->at(callee));
}
}
}
auto* code = method->get_code();
auto& cfg = code->cfg();
auto fp_iter = new FixpointIterator(cfg, std::move(invoke_to_summary_map));
fp_iter->run(Environment());
// The following updates form a critical section.
{
boost::lock_guard<boost::mutex> lock(fp_iter_map->get_lock(method));
fp_iter_map->update_unsafe(method,
[&](auto, FixpointIterator*& v, bool exists) {
redex_assert(!(exists ^ (v != nullptr)));
delete v;
v = fp_iter;
});
summary_map->update(method, [&](auto, EscapeSummary& v, bool) {
v = get_escape_summary(*fp_iter, *code);
});
}
}
FixpointIteratorMapPtr analyze_scope(const Scope& scope,
const call_graph::Graph& call_graph,
SummaryCMap* summary_map_ptr) {
FixpointIteratorMapPtr fp_iter_map(new FixpointIteratorMap());
SummaryCMap summary_map;
if (summary_map_ptr == nullptr) {
summary_map_ptr = &summary_map;
}
summary_map_ptr->emplace(
DexMethod::get_method("Ljava/lang/Object;.<init>:()V"), EscapeSummary{});
walk::parallel::code(scope, [&](const DexMethod* method, IRCode& code) {
sparta::PatriciaTreeSet<const DexMethodRef*> visiting;
analyze_method_recursive(method, call_graph, visiting, fp_iter_map.get(),
summary_map_ptr);
});
return fp_iter_map;
}
void collect_exiting_pointers(const FixpointIterator& fp_iter,
const IRCode& code,
PointerSet* returned_ptrs,
PointerSet* thrown_ptrs) {
auto& cfg = code.cfg();
PointerSet rv = PointerSet::bottom();
returned_ptrs->set_to_bottom();
thrown_ptrs->set_to_bottom();
for (auto* block : cfg.blocks()) {
auto last_insn_it = block->get_last_insn();
if (last_insn_it == block->end()) {
continue;
}
auto insn = last_insn_it->insn;
const auto& state =
fp_iter.get_exit_state_at(const_cast<cfg::Block*>(block));
if (opcode::is_a_return_value(insn->opcode())) {
returned_ptrs->join_with(state.get_pointers(insn->src(0)));
} else if (insn->opcode() == OPCODE_THROW) {
thrown_ptrs->join_with(state.get_pointers(insn->src(0)));
}
}
}
EscapeSummary get_escape_summary(const FixpointIterator& fp_iter,
const IRCode& code) {
EscapeSummary summary;
PointerSet returned_ptrs;
PointerSet thrown_ptrs;
collect_exiting_pointers(fp_iter, code, &returned_ptrs, &thrown_ptrs);
auto& cfg = code.cfg();
// FIXME: fix cfg's GraphInterface so this const_cast isn't necessary
const auto& exit_state =
fp_iter.get_exit_state_at(const_cast<cfg::Block*>(cfg.exit_block()));
uint16_t idx{0};
std::unordered_map<const IRInstruction*, uint16_t> param_indexes;
for (auto& mie : InstructionIterable(code.get_param_instructions())) {
auto* insn = mie.insn;
if (insn->opcode() == IOPCODE_LOAD_PARAM_OBJECT) {
param_indexes.emplace(insn, idx);
// Unlike returned pointers, we don't model thrown pointers specially in
// our EscapeSummary; they are treated as escaping pointers.
if (exit_state.may_have_escaped(insn) || thrown_ptrs.contains(insn)) {
summary.escaping_parameters.emplace(idx);
}
}
++idx;
}
switch (returned_ptrs.kind()) {
case sparta::AbstractValueKind::Value: {
for (auto insn : returned_ptrs.elements()) {
if (insn->opcode() == IOPCODE_LOAD_PARAM_OBJECT) {
summary.returned_parameters.add(param_indexes.at(insn));
} else if (!exit_state.may_have_escaped(insn)) {
summary.returned_parameters.add(FRESH_RETURN);
} else {
// We are returning a pointer that did not originate from an input
// parameter. We have no way of representing these values in our
// summary, hence we set the return value to Top.
summary.returned_parameters.set_to_top();
break;
}
}
break;
}
case sparta::AbstractValueKind::Top: {
summary.returned_parameters.set_to_top();
break;
}
case sparta::AbstractValueKind::Bottom: {
summary.returned_parameters.set_to_bottom();
break;
}
}
return summary;
}
std::ostream& operator<<(std::ostream& o, const EscapeSummary& summary) {
o << "Escaping parameters: ";
bool first{true};
for (auto p_idx : summary.escaping_parameters) {
if (!first) {
o << ", ";
}
o << p_idx;
first = false;
}
o << " Returned parameters: " << summary.returned_parameters;
return o;
}
sparta::s_expr to_s_expr(const EscapeSummary& summary) {
std::vector<sparta::s_expr> escaping_params_s_exprs;
std::vector<uint16_t> escaping_parameters(summary.escaping_parameters.begin(),
summary.escaping_parameters.end());
// Sort in order that the output is deterministic.
std::sort(escaping_parameters.begin(), escaping_parameters.end());
escaping_params_s_exprs.reserve(escaping_parameters.size());
for (auto idx : escaping_parameters) {
escaping_params_s_exprs.emplace_back(idx);
}
sparta::s_expr returned_params_s_expr;
switch (summary.returned_parameters.kind()) {
case sparta::AbstractValueKind::Top:
returned_params_s_expr = sparta::s_expr("Top");
break;
case sparta::AbstractValueKind::Bottom:
returned_params_s_expr = sparta::s_expr("Bottom");
break;
case sparta::AbstractValueKind::Value: {
std::vector<sparta::s_expr> idx_s_exprs;
const auto& elems = summary.returned_parameters.elements();
std::vector<uint16_t> returned_parameters(elems.begin(), elems.end());
std::sort(returned_parameters.begin(), returned_parameters.end());
idx_s_exprs.reserve(returned_parameters.size());
for (auto idx : returned_parameters) {
idx_s_exprs.emplace_back(idx);
}
returned_params_s_expr = sparta::s_expr(idx_s_exprs);
break;
}
}
return sparta::s_expr{sparta::s_expr(escaping_params_s_exprs),
returned_params_s_expr};
}
EscapeSummary EscapeSummary::from_s_expr(const sparta::s_expr& expr) {
EscapeSummary summary;
always_assert(expr.is_list() && expr.size() == 2);
auto escaping_params_s_expr = expr[0];
always_assert(escaping_params_s_expr.is_list());
for (size_t i = 0; i < escaping_params_s_expr.size(); ++i) {
summary.escaping_parameters.emplace(escaping_params_s_expr[i].get_int32());
}
auto returned_params_s_expr = expr[1];
if (returned_params_s_expr.is_string()) {
const auto& s = returned_params_s_expr.get_string();
if (s == "Top") {
summary.returned_parameters.set_to_top();
} else {
redex_assert(s == "Bottom");
summary.returned_parameters.set_to_bottom();
}
} else {
always_assert(returned_params_s_expr.is_list());
for (size_t i = 0; i < returned_params_s_expr.size(); ++i) {
summary.returned_parameters.add(returned_params_s_expr[i].get_int32());
}
}
return summary;
}
} // namespace local_pointers