opt/throw-propagation/ThrowPropagationPass.cpp (276 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ /* * This optimizer pass removes dead code by inserting throw instructions as * follows: * * When a method invocation is known to have no normal return behavior (because * all possibly invoked methods are known and have no normal return path, as * they either throw an exception or do not terminate, but never return), then * all instructions following such an invocation are dead. * * In such cases, we insert * new-instance v0, Ljava/lang/RuntimeException; * const-string v1, "Redex: Unreachable code after no-return invoke" * invoke-direct v0, v1, * Ljava/lang/RuntimeException;.<init>:(Ljava/lang/String;)V throw v0 after such * invocations. The control-flow graph will then remove all no longer reachable * instructions and blocks. We run this to a fixed point. * * TODO: Run constant-propagation in caller, and then do callsite-specific * constant-propagation in callee (similar to what the inliner does); some * return instructions might turn out to be unreachable for particular * callsites, and thus invocations might more often be determined to not return. * (This could in many cases detect precondition violations, as * precondition-check methods typically conditionally throw/return, and then we * could effectively remove the entire method body. Cool optimization, but I * don't know how often it applies in practice...) * Then again, in another generalization, all this could one day be part of the * interprocedural constant-propagation. */ #include "ThrowPropagationPass.h" #include "ControlFlow.h" #include "DexUtil.h" #include "IRCode.h" #include "PassManager.h" #include "Purity.h" #include "Resolver.h" #include "ScopedCFG.h" #include "Show.h" #include "Trace.h" #include "Walkers.h" namespace { constexpr const char* METRIC_THROWS_INSERTED = "num_throws_inserted"; constexpr const char* METRIC_UNREACHABLE_INSTRUCTIONS = "num_unreachable_instructions"; constexpr const char* METRIC_NO_RETURN_METHODS = "num_no_return_methods"; constexpr const char* METRIC_ITERATIONS = "num_iterations"; } // namespace void ThrowPropagationPass::bind_config() { bind("debug", false, m_config.debug); bind("blocklist", {}, m_config.blocklist, "List of classes that will not be analyzed to determine which methods " "have no return."); } std::unordered_set<DexMethod*> ThrowPropagationPass::get_no_return_methods( const Config& config, const Scope& scope) { ConcurrentSet<DexMethod*> concurrent_no_return_methods; walk::parallel::methods(scope, [&](DexMethod* method) { if (is_abstract(method) || method->is_external() || is_native(method) || method->rstate.no_optimizations()) { return; } if (config.blocklist.count(method->get_class())) { TRACE(TP, 4, "black-listed method: %s", SHOW(method)); return; } bool can_return{false}; editable_cfg_adapter::iterate_with_iterator( method->get_code(), [&can_return](const IRList::iterator& it) { if (opcode::is_a_return(it->insn->opcode())) { can_return = true; return editable_cfg_adapter::LOOP_BREAK; } else { return editable_cfg_adapter::LOOP_CONTINUE; } }); if (!can_return) { concurrent_no_return_methods.insert(method); } }); std::unordered_set<DexMethod*> no_return_methods( concurrent_no_return_methods.begin(), concurrent_no_return_methods.end()); return no_return_methods; } ThrowPropagationPass::Stats ThrowPropagationPass::run( const Config& config, const std::unordered_set<DexMethod*>& no_return_methods, const method_override_graph::Graph& graph, IRCode* code) { ThrowPropagationPass::Stats stats; cfg::ScopedCFG cfg(code); auto is_no_return_invoke = [&](IRInstruction* insn) { if (!opcode::is_an_invoke(insn->opcode())) { return false; } if (insn->opcode() == OPCODE_INVOKE_SUPER) { // TODO return false; } auto method_ref = insn->get_method(); DexMethod* method = resolve_method(method_ref, opcode_to_search(insn)); if (method == nullptr) { return false; } if (insn->opcode() == OPCODE_INVOKE_INTERFACE && is_annotation(type_class(method->get_class()))) { TRACE(TP, 4, "annotation interface method: %s", SHOW(method)); return false; } bool invoke_can_return{false}; auto check_for_no_return = [&](DexMethod* other_method) { if (!no_return_methods.count(other_method)) { invoke_can_return = true; } return true; }; if (!process_base_and_overriding_methods( &graph, method, /* methods_to_ignore */ nullptr, /* ignore_methods_with_assumenosideeffects */ false, check_for_no_return)) { return false; } return !invoke_can_return; }; boost::optional<std::pair<reg_t, reg_t>> regs; auto will_throw_or_not_terminate = [&cfg](cfg::InstructionIterator it) { std::unordered_set<IRInstruction*> visited{it->insn}; while (true) { it = cfg->next_following_gotos(it); if (!visited.insert(it->insn).second) { // We found a loop return true; } switch (it->insn->opcode()) { case OPCODE_CONST: case OPCODE_CONST_STRING: case OPCODE_MOVE: case OPCODE_NOP: case OPCODE_NEW_INSTANCE: case IOPCODE_LOAD_PARAM: case IOPCODE_LOAD_PARAM_OBJECT: case IOPCODE_MOVE_RESULT_PSEUDO: break; case OPCODE_INVOKE_DIRECT: { auto method = it->insn->get_method(); if (!method::is_init(method) || method->get_class() != DexType::make_type("Ljava/lang/RuntimeException;")) { return false; } break; } case OPCODE_THROW: return true; default: return false; } } not_reached(); }; // Helper function that checks if there's any point in doing a transformation // (not needed if we are already going to throw or not terminate anyway), // and it performs block splitting if needed (see comment inline for details). auto check_if_dead_code_present_and_prepare_block = [&](cfg::Block* block, const ir_list::InstructionIterator& it) -> bool { auto insn = it->insn; TRACE(TP, 4, "no return: %s", SHOW(insn)); auto cfg_it = block->to_cfg_instruction_iterator(it); if (insn == block->get_last_insn()->insn) { if (will_throw_or_not_terminate(cfg_it)) { // There's already code in place that will immediately and // unconditionally throw an exception, and thus we don't need to // bother rewriting the code into a throw. The main reason we are // doing this is to not inflate our throws_inserted statistics. return false; } } else { // When the invoke instruction isn't the last in the block, then we'll // need to some extra work. (Ideally, we could have just inserted our // throwing instructions in the middle of the existing block, but that // causes complications due to the possibly following and then dangling // move-result instruction, so we'll explicitly split the block here // in order to keep all invariant happy.) if (will_throw_or_not_terminate(cfg_it)) { // As above, nothing to do, since an exception will be thrown anyway. return false; } always_assert(cfg->get_succ_edge_of_type(block, cfg::EDGE_THROW) == nullptr); cfg->split_block(cfg_it); always_assert(insn == block->get_last_insn()->insn); } return true; }; auto insert_throw = [&](cfg::Block* block, IRInstruction* insn) { std::string message{"Redex: Unreachable code after no-return invoke"}; if (config.debug) { message += ":"; message += SHOW(insn); } if (!regs) { regs = std::make_pair(cfg->allocate_temp(), cfg->allocate_temp()); } auto exception_reg = regs->first; auto string_reg = regs->second; cfg::Block* new_block = cfg->create_block(); std::vector<IRInstruction*> insns; auto new_instance_insn = new IRInstruction(OPCODE_NEW_INSTANCE); auto exception_type = DexType::get_type("Ljava/lang/RuntimeException;"); always_assert(exception_type != nullptr); new_instance_insn->set_type(exception_type); insns.push_back(new_instance_insn); auto move_result_pseudo_exception_insn = new IRInstruction(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT); move_result_pseudo_exception_insn->set_dest(exception_reg); insns.push_back(move_result_pseudo_exception_insn); auto const_string_insn = new IRInstruction(OPCODE_CONST_STRING); const_string_insn->set_string(DexString::make_string(message)); insns.push_back(const_string_insn); auto move_result_pseudo_string_insn = new IRInstruction(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT); move_result_pseudo_string_insn->set_dest(string_reg); insns.push_back(move_result_pseudo_string_insn); auto invoke_direct_insn = new IRInstruction(OPCODE_INVOKE_DIRECT); auto init_method = DexMethod::get_method( "Ljava/lang/RuntimeException;.<init>:(Ljava/lang/String;)V"); always_assert(init_method != nullptr); invoke_direct_insn->set_method(init_method) ->set_srcs_size(2) ->set_src(0, exception_reg) ->set_src(1, string_reg); insns.push_back(invoke_direct_insn); auto throw_insn = new IRInstruction(OPCODE_THROW); throw_insn->set_src(0, exception_reg); insns.push_back(throw_insn); new_block->push_back(insns); cfg->copy_succ_edges_of_type(block, new_block, cfg::EDGE_THROW); auto existing_goto_edge = cfg->get_succ_edge_of_type(block, cfg::EDGE_GOTO); always_assert(existing_goto_edge != nullptr); cfg->set_edge_target(existing_goto_edge, new_block); stats.throws_inserted++; }; for (auto block : cfg->blocks()) { auto ii = InstructionIterable(block); for (auto it = ii.begin(); it != ii.end(); it++) { auto insn = it->insn; if (!is_no_return_invoke(insn)) { continue; } if (!check_if_dead_code_present_and_prepare_block(block, it)) { continue; } insert_throw(block, insn); // Stop processing more instructions in this block break; } } if (stats.throws_inserted > 0) { stats.unreachable_instruction_count += cfg->remove_unreachable_blocks().first; cfg->recompute_registers_size(); } return stats; } void ThrowPropagationPass::run_pass(DexStoresVector& stores, ConfigFiles&, PassManager& mgr) { Scope scope = build_class_scope(stores); walk::parallel::code(scope, [&](const DexMethod* method, IRCode& code) { if (!method->rstate.no_optimizations()) { code.build_cfg(/* editable */ true); } }); auto override_graph = method_override_graph::build_graph(scope); size_t last_no_return_methods{0}; int iterations = 0; Stats stats; while (true) { iterations++; std::unordered_set<DexMethod*> no_return_methods = get_no_return_methods(m_config, scope); TRACE(TP, 2, "iteration %d, no_return_methods: %zu", iterations, no_return_methods.size()); if (no_return_methods.size() == last_no_return_methods) { break; } last_no_return_methods = no_return_methods.size(); auto last_stats = walk::parallel::methods<Stats>(scope, [&](DexMethod* method) -> Stats { auto code = method->get_code(); if (method->rstate.no_optimizations() || code == nullptr) { return {}; } return run(m_config, no_return_methods, *override_graph, code); }); if (last_stats.throws_inserted == 0) { break; } stats += last_stats; } walk::parallel::code(scope, [&](const DexMethod* method, IRCode& code) { if (!method->rstate.no_optimizations()) { code.clear_cfg(); } }); mgr.incr_metric(METRIC_THROWS_INSERTED, stats.throws_inserted); mgr.incr_metric(METRIC_UNREACHABLE_INSTRUCTIONS, stats.unreachable_instruction_count); mgr.incr_metric(METRIC_NO_RETURN_METHODS, last_no_return_methods); mgr.incr_metric(METRIC_ITERATIONS, iterations); } ThrowPropagationPass::Stats& ThrowPropagationPass::Stats::operator+=( const Stats& that) { throws_inserted += that.throws_inserted; unreachable_instruction_count += that.unreachable_instruction_count; return *this; } static ThrowPropagationPass s_pass;