service/cse/CommonSubexpressionElimination.cpp

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ /* * This optimizer pass eliminates common subexpression. * * It's implemented via a global-value-numbering scheme. * While doing abstract interpretation on a method's code, we evolve... * 1) a mapping of registers to "values" * 2) a mapping of "values" to first-defining instructions * * A "value" is similar to an instruction, in that it has an IROpcode, * a list of srcs dependencies, and type/field/string/... payload data as * necessary; however it's different in that it doesn't have an identity, and * srcs dependencies are expressed in terms of other values, not registers. * * If the same value has multiple (equivalent) defining instructions after the * analysis reaches its fixed point, then the optimization... * - inserts a move of the result to a temporary register after the * defining instruction, and it * - inserts another move from the temporary register to the result register * of later (equivalent) defining instruction, after the defining instruction * * The moves are usually eliminated by copy-propagation, and the now redundant * later defining instructions are removed by local dce --- both of which get to * run on a method's code immediately if cse did a mutation. * * Notes: * - Memory read instructions are captured as well, and, in effect, may be * reordered --- basically, later redundant reads may be replaced by results * of earlier reads. * Of course, true memory barriers are modeled (method invocations, volatile * field accesses, monitor instructions), and to be conservative, all other * writes to the heap (fields, array elements) are also treated as a memory * barrier. This certainly ensures that thread-local behaviors is unaffected. * - There is no proper notion of phi-nodes at this time. Instead, conflicting * information in the register-to-values and values'-first-definitions envs * simply merge to top. Similarly, (memory) barriers are realized by setting * all barrier-sensitive (heap-dependent) mapping entries to top. When later * an instruction is interpreted that depends on a source register where the * register-to-value binding is top, then a special value is created for that * register (a "pre-state-source" value that refers to the value of a source * register as it was *before* the instruction). This recovers the tracking * of merged or havoced registers, in a way that's similar to phi-nodes, but * lazy. */ #include "CommonSubexpressionElimination.h" #include <utility> #include "BaseIRAnalyzer.h" #include "CFGMutation.h" #include "ConstantAbstractDomain.h" #include "ControlFlow.h" #include "FieldOpTracker.h" #include "HashedSetAbstractDomain.h" #include "IRCode.h" #include "IRInstruction.h" #include "PatriciaTreeMapAbstractEnvironment.h" #include "PatriciaTreeSetAbstractDomain.h" #include "ReducedProductAbstractDomain.h" #include "Resolver.h" #include "Show.h" #include "StlUtil.h" #include "Trace.h" #include "TypeInference.h" #include "Walkers.h" using namespace sparta; using namespace cse_impl; namespace { using value_id_t = uint64_t; constexpr size_t TRACKED_LOCATION_BITS = 42; // leaves 20 bits for running index enum ValueIdFlags : value_id_t { // lower bits for tracked locations UNTRACKED = 0, IS_FIRST_TRACKED_LOCATION = ((value_id_t)1), IS_OTHER_TRACKED_LOCATION = ((value_id_t)1) << TRACKED_LOCATION_BITS, IS_ONLY_READ_NOT_WRITTEN_LOCATION = ((value_id_t)1) << (TRACKED_LOCATION_BITS + 1), IS_TRACKED_LOCATION_MASK = IS_ONLY_READ_NOT_WRITTEN_LOCATION * 2 - 1, // upper bits for unique running index BASE = ((value_id_t)1) << (TRACKED_LOCATION_BITS + 2), }; using namespace ir_analyzer; // Marker opcode for values representing a source of an instruction; this is // used to recover from merged / havoced values. const IROpcode IOPCODE_PRE_STATE_SRC = IROpcode(0xFFFF); // Marker opcode for positional values that must not be moved. const IROpcode IOPCODE_POSITIONAL = IROpcode(0xFFFE); // This is only used for an INVOKE-VIRTUAL insn. Marker opocode for a potential // unboxing value. const IROpcode IOPCODE_POSITIONAL_UNBOXING = IROpcode(0xFFFD); struct IRValue { IROpcode opcode; std::vector<value_id_t> srcs; union { // Zero-initialize this union with the uint64_t member instead of a // pointer-type member so that it works properly even on 32-bit machines uint64_t literal{0}; const DexString* string; const DexType* type; const DexFieldRef* field; const DexMethodRef* method; const DexOpcodeData* data; // By setting positional_insn to the pointer of an instruction, it // effectively makes the "value" unique (as unique as the instruction), // avoiding identifying otherwise structurally equivalent operations, e.g. // two move-exception instructions that really must remain at their existing // position, and cannot be replaced. const IRInstruction* positional_insn; }; }; struct IRValueHasher { size_t operator()(const IRValue& tv) const { size_t hash = tv.opcode; for (auto src : tv.srcs) { hash = hash * 27 + src; } hash = hash * 27 + (size_t)tv.literal; return hash; } }; bool operator==(const IRValue& a, const IRValue& b) { return a.opcode == b.opcode && a.srcs == b.srcs && a.literal == b.literal; } using IRInstructionsDomain = sparta::PatriciaTreeSetAbstractDomain<const IRInstruction*>; using ValueIdDomain = sparta::ConstantAbstractDomain<value_id_t>; using DefEnvironment = sparta::PatriciaTreeMapAbstractEnvironment<value_id_t, IRInstructionsDomain>; using RefEnvironment = sparta::PatriciaTreeMapAbstractEnvironment<reg_t, ValueIdDomain>; class CseEnvironment final : public sparta::ReducedProductAbstractDomain<CseEnvironment, DefEnvironment, RefEnvironment> { public: using ReducedProductAbstractDomain::ReducedProductAbstractDomain; CseEnvironment() : ReducedProductAbstractDomain( std::make_tuple(DefEnvironment(), RefEnvironment())) {} static void reduce_product(std::tuple<DefEnvironment, RefEnvironment>&) {} const DefEnvironment& get_def_env() const { return ReducedProductAbstractDomain::get<0>(); } const RefEnvironment& get_ref_env() const { return ReducedProductAbstractDomain::get<1>(); } CseEnvironment& mutate_def_env(std::function<void(DefEnvironment*)> f) { apply<0>(std::move(f)); return *this; } CseEnvironment& mutate_ref_env(std::function<void(RefEnvironment*)> f) { apply<1>(std::move(f)); return *this; } }; static Barrier make_barrier(const IRInstruction* insn) { Barrier b; b.opcode = insn->opcode(); if (insn->has_field()) { b.field = resolve_field(insn->get_field(), opcode::is_an_sfield_op(insn->opcode()) ? FieldSearch::Static : FieldSearch::Instance); } else if (insn->has_method()) { b.method = resolve_method(insn->get_method(), opcode_to_search(insn)); } return b; } CseLocation get_written_array_location(IROpcode opcode) { switch (opcode) { case OPCODE_APUT: return CseLocation(CseSpecialLocations::ARRAY_COMPONENT_TYPE_INT); case OPCODE_APUT_BYTE: return CseLocation(CseSpecialLocations::ARRAY_COMPONENT_TYPE_BYTE); case OPCODE_APUT_CHAR: return CseLocation(CseSpecialLocations::ARRAY_COMPONENT_TYPE_CHAR); case OPCODE_APUT_WIDE: return CseLocation(CseSpecialLocations::ARRAY_COMPONENT_TYPE_WIDE); case OPCODE_APUT_SHORT: return CseLocation(CseSpecialLocations::ARRAY_COMPONENT_TYPE_SHORT); case OPCODE_APUT_OBJECT: return CseLocation(CseSpecialLocations::ARRAY_COMPONENT_TYPE_OBJECT); case OPCODE_APUT_BOOLEAN: return CseLocation(CseSpecialLocations::ARRAY_COMPONENT_TYPE_BOOLEAN); default: always_assert(false); } } CseLocation get_written_location(const Barrier& barrier) { if (opcode::is_an_aput(barrier.opcode)) { return get_written_array_location(barrier.opcode); } else if (opcode::is_an_iput(barrier.opcode) || opcode::is_an_sput(barrier.opcode)) { return get_field_location(barrier.opcode, barrier.field); } else { return CseLocation(CseSpecialLocations::GENERAL_MEMORY_BARRIER); } } bool is_barrier_relevant(const Barrier& barrier, const CseUnorderedLocationSet& read_locations) { auto location = get_written_location(barrier); return location == CseLocation(CseSpecialLocations::GENERAL_MEMORY_BARRIER) || read_locations.count(location); } const CseUnorderedLocationSet no_locations = CseUnorderedLocationSet(); const CseUnorderedLocationSet general_memory_barrier_locations = CseUnorderedLocationSet{ CseLocation(CseSpecialLocations::GENERAL_MEMORY_BARRIER)}; class Analyzer final : public BaseIRAnalyzer<CseEnvironment> { public: Analyzer(SharedState* shared_state, cfg::ControlFlowGraph& cfg, bool is_method_static, bool is_method_init_or_clinit, DexType* declaring_type) : BaseIRAnalyzer(cfg), m_shared_state(shared_state) { // Collect all read locations std::unordered_map<CseLocation, size_t, CseLocationHasher> read_location_counts; for (const auto& mie : cfg::InstructionIterable(cfg)) { auto insn = mie.insn; auto location = get_read_location(insn); if (location != CseLocation(CseSpecialLocations::GENERAL_MEMORY_BARRIER)) { read_location_counts[location]++; } else if (opcode::is_an_invoke(insn->opcode()) && shared_state->has_pure_method(insn)) { for (auto l : shared_state->get_read_locations_of_conditionally_pure_method( insn->get_method(), insn->opcode())) { read_location_counts[l]++; } } } // Prune those which are final fields that cannot get mutated in our context for (auto it = read_location_counts.begin(); it != read_location_counts.end();) { auto location = it->first; // If we are reading a final field... if (location.has_field() && shared_state->is_finalish(location.get_field()) && !root(location.get_field()) && can_rename(location.get_field()) && can_delete(location.get_field()) && !location.get_field()->is_external()) { // ... and we are not analyzing a method that is a corresponding // constructor or static initializer of the declaring type of the // field ... if (!is_method_init_or_clinit || location.get_field()->get_class() != declaring_type || is_static(location.get_field()) != is_method_static) { // ... then we don't need track the field as a memory location // (that in turn might get invalidated on general memory barriers). m_tracked_locations.emplace(location, ValueIdFlags::UNTRACKED); it = read_location_counts.erase(it); continue; } } m_read_locations.insert(location); it++; } // Collect all relevant written locations std::unordered_map<CseLocation, size_t, CseLocationHasher> written_location_counts; for (const auto& mie : cfg::InstructionIterable(cfg)) { auto locations = shared_state->get_relevant_written_locations( mie.insn, nullptr /* exact_virtual_scope */, m_read_locations); if (!locations.count( CseLocation(CseSpecialLocations::GENERAL_MEMORY_BARRIER))) { for (const auto& location : locations) { written_location_counts[location]++; } } } // Check which locations get written and read (vs. just written) std::vector<CseLocation> read_and_written_locations; for (const auto& p : written_location_counts) { always_assert(p.first != CseLocation(CseSpecialLocations::GENERAL_MEMORY_BARRIER)); if (read_location_counts.count(p.first)) { read_and_written_locations.push_back(p.first); } else { always_assert(!m_tracked_locations.count(p.first)); m_tracked_locations.emplace(p.first, ValueIdFlags::UNTRACKED); } } // Also keep track of locations that get read but not written for (const auto& p : read_location_counts) { if (!written_location_counts.count(p.first)) { always_assert(!m_tracked_locations.count(p.first)); m_tracked_locations.emplace( p.first, ValueIdFlags::IS_ONLY_READ_NOT_WRITTEN_LOCATION); } } // We'll use roughly half of the bits in a value_id to encode what kind of // heap locations were involved in producing the value, so that we can // later quickly identify which values need to be invalidated when // encountering a write to a specific location. However, we only have a // limited number of bits available, and potentially many more relevant // locations. // // We'll identify the long tail of locations that are read and written via a // separate bit (IS_OTHER_TRACKED_LOCATION), and we'll also reserve one bit // for locations that are read but not written // (IS_ONLY_READ_NOT_WRITTEN_LOCATION), so that we can identify these // heap-dependent locations when we need to validate all heap-dependent // locations in case of a general memory barrier. // // We use a heuristic to decide which locations get their own bit, vs // the long-tail treatment. // // The currently implemented heuristic prefers locations that are often // read and rarely written. // // TODO: Explore other (variations of this) heuristics. std::sort(read_and_written_locations.begin(), read_and_written_locations.end(), [&](CseLocation a, CseLocation b) { auto get_weight = [&](CseLocation l) { auto reads = read_location_counts.at(l); auto writes = written_location_counts.at(l); return (reads << 16) / writes; }; auto weight_a = get_weight(a); auto weight_b = get_weight(b); if (weight_a != weight_b) { // higher weight takes precedence return weight_a > weight_b; } // in case of a tie, still ensure a deterministic total ordering return a < b; }); TRACE(CSE, 4, "[CSE] relevant locations: %zu %s", read_and_written_locations.size(), read_and_written_locations.size() > 13 ? "(HUGE!)" : ""); value_id_t next_bit = ValueIdFlags::IS_FIRST_TRACKED_LOCATION; for (auto l : read_and_written_locations) { TRACE(CSE, 4, "[CSE] %s: %zu reads, %zu writes", l.special_location < CseSpecialLocations::END ? "array element" : SHOW(l.field), read_location_counts.at(l), written_location_counts.at(l)); m_tracked_locations.emplace(l, next_bit); if (next_bit == ValueIdFlags::IS_OTHER_TRACKED_LOCATION) { m_using_other_tracked_location_bit = true; } else { // we've already reached the last catch-all tracked read/write location next_bit <<= 1; } } MonotonicFixpointIterator::run(CseEnvironment::top()); } void analyze_instruction(const IRInstruction* insn, CseEnvironment* current_state) const override { const auto set_current_state_at = [&](reg_t reg, bool wide, ValueIdDomain value) { current_state->mutate_ref_env([&](RefEnvironment* env) { env->set(reg, value); if (wide) { env->set(reg + 1, ValueIdDomain::top()); } }); }; init_pre_state(insn, current_state); auto clobbered_locations = get_clobbered_locations(insn, current_state); auto opcode = insn->opcode(); switch (opcode) { case OPCODE_MOVE: case OPCODE_MOVE_OBJECT: case OPCODE_MOVE_WIDE: { auto domain = current_state->get_ref_env().get(insn->src(0)); set_current_state_at(insn->dest(), insn->dest_is_wide(), domain); break; } default: { // If we get here, reset destination. if (insn->has_dest()) { ValueIdDomain domain; if (opcode::is_move_result_any(opcode)) { domain = current_state->get_ref_env().get(RESULT_REGISTER); } else { domain = get_value_id_domain(insn, current_state, clobbered_locations); } auto c = domain.get_constant(); if (c) { auto value_id = *c; if (current_state->get_def_env().get(value_id).is_top()) { current_state->mutate_def_env([&](DefEnvironment* env) { env->set(value_id, IRInstructionsDomain(insn)); }); } } set_current_state_at(insn->dest(), insn->dest_is_wide(), domain); } else if (insn->has_move_result_any()) { ValueIdDomain domain = get_value_id_domain(insn, current_state, clobbered_locations); current_state->mutate_ref_env( [&](RefEnvironment* env) { env->set(RESULT_REGISTER, domain); }); if (opcode == OPCODE_NEW_ARRAY && domain.get_constant()) { auto value = get_array_length_value(*domain.get_constant()); TRACE(CSE, 4, "[CSE] installing array-length forwarding for %s", SHOW(insn)); install_forwarding(insn, value, current_state); } } break; } } if (!clobbered_locations.empty()) { value_id_t mask = (value_id_t)0; for (const auto& l : clobbered_locations) { mask |= get_location_value_id_mask(l); } // TODO: The following loops are probably the most expensive thing in this // algorithm; is there a better way of doing this? (Then again, overall, // the time this algorithm takes seems reasonable.) bool any_changes = false; current_state->mutate_def_env([mask, &any_changes](DefEnvironment* env) { if (env->erase_all_matching(mask)) { any_changes = true; } }); current_state->mutate_ref_env([mask, &any_changes](RefEnvironment* env) { bool any_map_changes = env->map([mask](ValueIdDomain domain) { auto c = domain.get_constant(); always_assert(c); auto value_id = *c; if (value_id & mask) { return ValueIdDomain::top(); } return domain; }); if (any_map_changes) { any_changes = true; } }); if (any_changes) { m_shared_state->log_barrier(make_barrier(insn)); } if (!clobbered_locations.count( CseLocation(CseSpecialLocations::GENERAL_MEMORY_BARRIER))) { auto value = get_equivalent_put_value(insn, current_state); if (value) { TRACE(CSE, 4, "[CSE] installing store-to-load forwarding for %s", SHOW(insn)); install_forwarding(insn, *value, current_state); } } } } void install_forwarding(const IRInstruction* insn, const IRValue& value, CseEnvironment* current_state) const { auto value_id = *get_value_id(value); current_state->mutate_def_env([value_id, insn](DefEnvironment* env) { env->set(value_id, IRInstructionsDomain(insn)); }); } bool is_pre_state_src(value_id_t value_id) const { return !!m_pre_state_value_ids.count(value_id); } size_t get_value_ids_size() { return m_value_ids.size(); } bool using_other_tracked_location_bit() { return m_using_other_tracked_location_bit; } const std::vector<IRInstruction*>& get_unboxing_insns() { return m_unboxing_insns; } private: // After analysis, the insns in this list should be refined to call its // unboxing implementor. mutable std::vector<IRInstruction*> m_unboxing_insns; mutable std::unordered_set<IRInstruction*> m_unboxing_insns_set; CseUnorderedLocationSet get_clobbered_locations( const IRInstruction* insn, CseEnvironment* current_state) const { DexType* exact_virtual_scope = nullptr; if (insn->opcode() == OPCODE_INVOKE_VIRTUAL) { auto src0 = current_state->get_ref_env().get(insn->src(0)).get_constant(); if (src0) { exact_virtual_scope = get_exact_type(*src0); } } return m_shared_state->get_relevant_written_locations( insn, exact_virtual_scope, m_read_locations); } ValueIdDomain get_value_id_domain( const IRInstruction* insn, CseEnvironment* current_state, const CseUnorderedLocationSet& clobbered_locations) const { auto value = get_value(insn, current_state, clobbered_locations); auto value_id = get_value_id(value); return value_id ? ValueIdDomain(*value_id) : ValueIdDomain::top(); } value_id_t get_pre_state_src_value_id(reg_t reg, const IRInstruction* insn) const { auto value = get_pre_state_src_value(reg, insn); auto value_id = get_value_id(value); always_assert(value_id); return *value_id; } boost::optional<value_id_t> unwrap_value(const IRValue& value, const DexMethodRef* unwrap_method, const DexMethodRef* wrap_method, const DexMethodRef* abs_method, bool is_unboxed) const { if (is_unboxed) { // for unboxing in boxing-unboxing pattern, the value could be an invoke // or IOPCODE_POSITIONAL_UNBOXING. if (!opcode::is_an_invoke(value.opcode) && value.opcode != IOPCODE_POSITIONAL_UNBOXING) { return boost::none; } } else { // for boxing, we only consider invoke value. if (!opcode::is_an_invoke(value.opcode)) { return boost::none; } } auto value_method = opcode::is_an_invoke(value.opcode) ? value.method : value.positional_insn->get_method(); bool has_unwrap_method = is_unboxed ? (value_method == unwrap_method || value_method == abs_method) : value_method == unwrap_method; if (!has_unwrap_method) { return boost::none; } auto it = m_proper_id_values.find(value.srcs.at(0)); if (it == m_proper_id_values.end()) { return boost::none; } const auto& inner_value = it->second; // For unboxing-boxing pattern, we don't consider abs-unboxing (i.e. // Ljava/lang/Number;.*value()*) at this time. if (opcode::is_an_invoke(inner_value.opcode) && inner_value.method == wrap_method) { auto unwrapped_value = inner_value.srcs.at(0); if (!is_pre_state_src(unwrapped_value)) { // TODO: Support capturing pre-state values return boost::optional<value_id_t>(unwrapped_value); } } return boost::none; } boost::optional<value_id_t> get_value_id(const IRValue& value) const { auto it = m_value_ids.find(value); if (it != m_value_ids.end()) { return boost::optional<value_id_t>(it->second); } value_id_t id = m_value_ids.size() * ValueIdFlags::BASE; always_assert(id / ValueIdFlags::BASE == m_value_ids.size()); if (opcode::is_an_aget(value.opcode)) { id |= get_location_value_id_mask(get_read_array_location(value.opcode)); } else if (opcode::is_an_iget(value.opcode) || opcode::is_an_sget(value.opcode)) { auto location = get_field_location(value.opcode, value.field); if (location == CseLocation(CseSpecialLocations::GENERAL_MEMORY_BARRIER)) { return boost::none; } id |= get_location_value_id_mask(location); } else if (opcode::is_an_invoke(value.opcode)) { id |= get_invoke_value_id_mask(value); } if (value.opcode != IOPCODE_PRE_STATE_SRC) { for (auto src : value.srcs) { id |= (src & ValueIdFlags::IS_TRACKED_LOCATION_MASK); } } if (value.opcode == IOPCODE_POSITIONAL) { m_positional_insns.emplace(id, value.positional_insn); } else if (value.opcode == IOPCODE_PRE_STATE_SRC) { m_pre_state_value_ids.insert(id); } else { const auto& abs_map = m_shared_state->get_abstract_map(); for (const auto [box_method, unbox_method] : m_shared_state->get_boxing_map()) { const DexMethodRef* abs_method = nullptr; auto abs_it = abs_map.find(unbox_method); if (abs_it != abs_map.end()) { abs_method = abs_it->second; } auto optional_unboxed_value_id = unwrap_value( value, unbox_method, box_method, abs_method, /* unboxed */ true); if (optional_unboxed_value_id) { // boxing-unboxing if (value.opcode == IOPCODE_POSITIONAL_UNBOXING) { // Since value is in boxing-unboxing pattern, we record it in // m_unboxing_insns. auto insn = const_cast<IRInstruction*>(value.positional_insn); if (m_unboxing_insns_set.insert(insn).second) { m_unboxing_insns.emplace_back(insn); } } return optional_unboxed_value_id; } auto optional_boxed_value_id = unwrap_value( value, box_method, unbox_method, abs_method, /* boxed */ false); if (optional_boxed_value_id) { // unboxing-boxing return optional_boxed_value_id; } } if (value.opcode == IOPCODE_POSITIONAL_UNBOXING) { // This means this value is not in a boxing-unboxing pattern. Therefore, // we put it in m_positional_insns list. m_positional_insns.emplace(id, value.positional_insn); } else { m_proper_id_values.emplace(id, value); } } m_value_ids.emplace(value, id); return boost::optional<value_id_t>(id); } IRValue get_array_length_value(value_id_t array_value_id) const { IRValue value; value.opcode = OPCODE_ARRAY_LENGTH; value.srcs.push_back(array_value_id); return value; } boost::optional<IRValue> get_equivalent_put_value( const IRInstruction* insn, CseEnvironment* current_state) const { const auto& ref_env = current_state->get_ref_env(); if (opcode::is_an_sput(insn->opcode())) { always_assert(insn->srcs_size() == 1); IRValue value; value.opcode = (IROpcode)(insn->opcode() - OPCODE_SPUT + OPCODE_SGET); value.field = insn->get_field(); return value; } else if (opcode::is_an_iput(insn->opcode())) { always_assert(insn->srcs_size() == 2); auto src1 = ref_env.get(insn->src(1)).get_constant(); if (src1) { IRValue value; value.opcode = (IROpcode)(insn->opcode() - OPCODE_IPUT + OPCODE_IGET); value.srcs.push_back(*src1); value.field = insn->get_field(); return value; } } else if (insn->opcode() == OPCODE_APUT_OBJECT) { // Skip this case. Statically, the incoming value can be of any object // type, as runtime validation ensures type correctness. Thus, we cannot // propagate an aput-object to an aget-object with a simple move-object. // TODO: Allow this here, but also insert a check-cast instead of a simple // move-object in this case, using type inference on the array argument of // the aget-object instruction. } else if (opcode::is_an_aput(insn->opcode())) { always_assert(insn->srcs_size() == 3); auto src1 = ref_env.get(insn->src(1)).get_constant(); auto src2 = ref_env.get(insn->src(2)).get_constant(); if (src1 && src2) { IRValue value; value.opcode = (IROpcode)(insn->opcode() - OPCODE_APUT + OPCODE_AGET); value.srcs.push_back(*src1); value.srcs.push_back(*src2); return value; } } return boost::none; } IRValue get_pre_state_src_value(reg_t reg, const IRInstruction* insn) const { IRValue value; value.opcode = IOPCODE_PRE_STATE_SRC; value.srcs.push_back(reg); value.positional_insn = insn; return value; } void init_pre_state(const IRInstruction* insn, CseEnvironment* current_state) const { auto ref_env = current_state->get_ref_env(); std::unordered_map<reg_t, value_id_t> new_pre_state_src_values; for (auto reg : insn->srcs()) { auto c = ref_env.get(reg).get_constant(); if (!c) { auto it = new_pre_state_src_values.find(reg); if (it == new_pre_state_src_values.end()) { auto value_id = get_pre_state_src_value_id(reg, insn); new_pre_state_src_values.emplace(reg, value_id); } } } if (!new_pre_state_src_values.empty()) { current_state->mutate_ref_env([&](RefEnvironment* env) { for (const auto& p : new_pre_state_src_values) { env->set(p.first, ValueIdDomain(p.second)); } }); } } IRValue get_value(const IRInstruction* insn, CseEnvironment* current_state, const CseUnorderedLocationSet& clobbered_locations) const { IRValue value; auto opcode = insn->opcode(); always_assert(opcode != IOPCODE_PRE_STATE_SRC); value.opcode = opcode; const auto& ref_env = current_state->get_ref_env(); for (auto reg : insn->srcs()) { auto c = ref_env.get(reg).get_constant(); always_assert(c); value_id_t value_id = *c; value.srcs.push_back(value_id); } if (opcode::is_commutative(opcode)) { std::sort(value.srcs.begin(), value.srcs.end()); } bool is_positional; bool is_potential_abs_unboxing = false; switch (insn->opcode()) { case IOPCODE_LOAD_PARAM: case IOPCODE_LOAD_PARAM_OBJECT: case IOPCODE_LOAD_PARAM_WIDE: case OPCODE_MOVE_EXCEPTION: case OPCODE_NEW_ARRAY: case OPCODE_NEW_INSTANCE: case OPCODE_FILLED_NEW_ARRAY: is_positional = true; break; case OPCODE_INVOKE_VIRTUAL: { is_potential_abs_unboxing = m_shared_state->has_potential_unboxing_method(insn); is_positional = !m_shared_state->has_pure_method(insn); break; } case OPCODE_INVOKE_SUPER: case OPCODE_INVOKE_DIRECT: case OPCODE_INVOKE_STATIC: case OPCODE_INVOKE_INTERFACE: { // TODO: Is this really safe for all virtual/interface invokes? This // mimics the way assumenosideeffects is used in LocalDCE. is_positional = !m_shared_state->has_pure_method(insn); break; } default: // there might be an impacted field, array element, general memory barrier always_assert(clobbered_locations.size() <= 1); is_positional = !clobbered_locations.empty(); break; } if (is_positional) { if (is_potential_abs_unboxing) { value.opcode = IOPCODE_POSITIONAL_UNBOXING; } else { value.opcode = IOPCODE_POSITIONAL; } value.positional_insn = insn; } else if (insn->has_literal()) { value.literal = insn->get_literal(); } else if (insn->has_type()) { value.type = insn->get_type(); } else if (insn->has_field()) { value.field = insn->get_field(); } else if (insn->has_method()) { value.method = insn->get_method(); } else if (insn->has_string()) { value.string = insn->get_string(); } else if (insn->has_data()) { value.data = insn->get_data(); } return value; } value_id_t get_location_value_id_mask(CseLocation l) const { if (l == CseLocation(CseSpecialLocations::GENERAL_MEMORY_BARRIER)) { return ValueIdFlags::IS_TRACKED_LOCATION_MASK; } else { return m_tracked_locations.at(l); } } value_id_t get_invoke_value_id_mask(const IRValue& value) const { always_assert(opcode::is_an_invoke(value.opcode)); value_id_t mask = 0; for (auto l : m_shared_state->get_read_locations_of_conditionally_pure_method( value.method, value.opcode)) { mask |= get_location_value_id_mask(l); } return mask; } DexType* get_exact_type(value_id_t value_id) const { auto it = m_positional_insns.find(value_id); if (it == m_positional_insns.end()) { return nullptr; } auto insn = it->second; switch (insn->opcode()) { case OPCODE_NEW_ARRAY: case OPCODE_NEW_INSTANCE: case OPCODE_FILLED_NEW_ARRAY: return insn->get_type(); default: return nullptr; } } bool m_using_other_tracked_location_bit{false}; CseUnorderedLocationSet m_read_locations; std::unordered_map<CseLocation, value_id_t, CseLocationHasher> m_tracked_locations; SharedState* m_shared_state; mutable std::unordered_map<IRValue, value_id_t, IRValueHasher> m_value_ids; mutable std::unordered_set<value_id_t> m_pre_state_value_ids; mutable std::unordered_map<value_id_t, const IRInstruction*> m_positional_insns; mutable std::unordered_map<value_id_t, IRValue> m_proper_id_values; }; } // namespace namespace cse_impl { //////////////////////////////////////////////////////////////////////////////// SharedState::SharedState( const std::unordered_set<DexMethodRef*>& pure_methods, const std::unordered_set<const DexString*>& finalish_field_names) : m_pure_methods(pure_methods), m_safe_methods(pure_methods), m_finalish_field_names(finalish_field_names) { // The following methods are... // - static, or // - direct (constructors), or // - virtual methods defined in final classes // that do not mutate any fields or array elements that could be directly // accessed (read or written) by user code, and they will not invoke user // code. // // The list of methods is not exhaustive; it was derived by observing the most // common barriers encountered in real-life code, and then studying their spec // to check whether they are "safe" in the context of CSE barriers. static const char* safe_method_names[] = { "Landroid/os/SystemClock;.elapsedRealtime:()J", "Landroid/os/SystemClock;.uptimeMillis:()J", "Landroid/util/SparseArray;.append:(ILjava/lang/Object;)V", "Landroid/util/SparseArray;.get:(I)Ljava/lang/Object;", "Landroid/util/SparseArray;.put:(ILjava/lang/Object;)V", "Landroid/util/SparseArray;.size:()I", "Landroid/util/SparseArray;.valueAt:(I)Ljava/lang/Object;", "Landroid/util/SparseIntArray;.put:(II)V", "Ljava/lang/Boolean;.parseBoolean:(Ljava/lang/String;)Z", "Ljava/lang/Byte;.parseByte:(Ljava/lang/String;)B", "Ljava/lang/Class;.forName:(Ljava/lang/String;)Ljava/lang/Class;", "Ljava/lang/Double;.parseDouble:(Ljava/lang/String;)D", "Ljava/lang/Enum;.valueOf:(Ljava/lang/Class;Ljava/lang/String;)Ljava/" "lang/Enum;", "Ljava/lang/Float;.parseFloat:(Ljava/lang/String;)F", "Ljava/lang/Integer;.parseInt:(Ljava/lang/String;)I", "Ljava/lang/Integer;.parseInt:(Ljava/lang/String;I)I", "Ljava/lang/Integer;.valueOf:(Ljava/lang/String;)Ljava/lang/Integer;", "Ljava/lang/Long;.parseLong:(Ljava/lang/String;)J", "Ljava/lang/Math;.addExact:(II)I", "Ljava/lang/Math;.addExact:(JJ)J", "Ljava/lang/Math;.decrementExact:(J)J", "Ljava/lang/Math;.decrementExact:(I)I", "Ljava/lang/Math;.incrementExact:(I)I", "Ljava/lang/Math;.incrementExact:(J)J", "Ljava/lang/Math;.multiplyExact:(II)I", "Ljava/lang/Math;.multiplyExact:(JJ)J", "Ljava/lang/Math;.negateExact:(I)I", "Ljava/lang/Math;.negateExact:(J)J", "Ljava/lang/Math;.subtractExact:(JJ)J", "Ljava/lang/Math;.subtractExact:(II)I", "Ljava/lang/Math;.toIntExact:(J)I", "Ljava/lang/ref/Reference;.get:()Ljava/lang/Object;", "Ljava/lang/String;.getBytes:()[B", "Ljava/lang/String;.split:(Ljava/lang/String;)[Ljava/lang/String;", "Ljava/lang/StringBuilder;.append:(C)Ljava/lang/StringBuilder;", "Ljava/lang/StringBuilder;.append:(I)Ljava/lang/StringBuilder;", "Ljava/lang/StringBuilder;.append:(J)Ljava/lang/StringBuilder;", "Ljava/lang/StringBuilder;.append:(Ljava/lang/String;)Ljava/lang/" "StringBuilder;", "Ljava/lang/StringBuilder;.append:(Z)Ljava/lang/StringBuilder;", "Ljava/lang/StringBuilder;.length:()I", "Ljava/lang/StringBuilder;.toString:()Ljava/lang/String;", "Ljava/lang/System;.currentTimeMillis:()J", "Ljava/lang/System;.nanoTime:()J", "Ljava/util/ArrayList;.add:(Ljava/lang/Object;)Z", "Ljava/util/ArrayList;.add:(ILjava/lang/Object;)V", "Ljava/util/ArrayList;.clear:()V", "Ljava/util/ArrayList;.get:(I)Ljava/lang/Object;", "Ljava/util/ArrayList;.isEmpty:()Z", "Ljava/util/ArrayList;.remove:(I)Ljava/lang/Object;", "Ljava/util/ArrayList;.size:()I", "Ljava/util/BitSet;.clear:()V", "Ljava/util/BitSet;.get:(I)Z", "Ljava/util/BitSet;.set:(I)V", "Ljava/util/HashMap;.isEmpty:()Z", "Ljava/util/HashMap;.size:()I", "Ljava/util/HashSet;.clear:()V", "Ljava/util/LinkedList;.add:(Ljava/lang/Object;)Z", "Ljava/util/LinkedList;.addLast:(Ljava/lang/Object;)V", "Ljava/util/LinkedList;.clear:()V", "Ljava/util/LinkedList;.get:(I)Ljava/lang/Object;", "Ljava/util/LinkedList;.getFirst:()Ljava/lang/Object;", "Ljava/util/LinkedList;.removeFirst:()Ljava/lang/Object;", "Ljava/util/LinkedList;.size:()I", "Ljava/util/Random;.nextInt:(I)I", "Landroid/util/Pair;.<init>:(Ljava/lang/Object;Ljava/lang/Object;)V", "Landroid/util/SparseArray;.<init>:()V", "Ljava/io/IOException;.<init>:(Ljava/lang/String;)V", "Ljava/lang/Enum;.<init>:(Ljava/lang/String;I)V", "Ljava/lang/Exception;.<init>:()V", "Ljava/lang/IllegalArgumentException;.<init>:(Ljava/lang/String;)V", "Ljava/lang/IllegalStateException;.<init>:()V", "Ljava/lang/IllegalStateException;.<init>:(Ljava/lang/String;)V", "Ljava/lang/Integer;.<init>:(I)V", "Ljava/lang/Long;.<init>:(J)V", "Ljava/lang/NullPointerException;.<init>:(Ljava/lang/String;)V", "Ljava/lang/Object;.<init>:()V", "Ljava/lang/RuntimeException;.<init>:(Ljava/lang/String;)V", "Ljava/lang/Short;.<init>:(S)V", "Ljava/lang/String;.<init>:(Ljava/lang/String;)V", "Ljava/lang/StringBuilder;.<init>:()V", "Ljava/lang/StringBuilder;.<init>:(I)V", "Ljava/lang/StringBuilder;.<init>:(Ljava/lang/String;)V", "Ljava/lang/UnsupportedOperationException;.<init>:(Ljava/lang/" "String;)V", "Ljava/util/ArrayList;.<init>:()V", "Ljava/util/ArrayList;.<init>:(I)V", "Ljava/util/BitSet;.<init>:(I)V", "Ljava/util/HashMap;.<init>:()V", "Ljava/util/HashMap;.<init>:(I)V", "Ljava/util/HashSet;.<init>:()V", "Ljava/util/LinkedHashMap;.<init>:()V", "Ljava/util/LinkedList;.<init>:()V", "Ljava/util/Random;.<init>:()V", }; for (auto const safe_method_name : safe_method_names) { const std::string& s(safe_method_name); auto method_ref = DexMethod::get_method(s); if (method_ref == nullptr) { TRACE(CSE, 1, "[CSE]: Could not find safe method %s", s.c_str()); continue; } m_safe_methods.insert(method_ref); } if (traceEnabled(CSE, 2)) { m_barriers.reset(new ConcurrentMap<Barrier, size_t, BarrierHasher>()); } const std::vector<DexType*> boxing_types = { type::java_lang_Boolean(), type::java_lang_Byte(), type::java_lang_Short(), type::java_lang_Character(), type::java_lang_Integer(), type::java_lang_Long(), type::java_lang_Float(), type::java_lang_Double()}; for (const auto* type : boxing_types) { const auto* box_method = type::get_value_of_method_for_type(type); const auto* unbox_method = type::get_unboxing_method_for_type(type); const auto* abs_method = type::get_Number_unboxing_method_for_type(type); m_boxing_map.insert({box_method, unbox_method}); m_abstract_map.insert({unbox_method, abs_method}); } } const method_override_graph::Graph* SharedState::get_method_override_graph() const { return m_method_override_graph.get(); } void SharedState::init_method_barriers(const Scope& scope) { Timer t("init_method_barriers"); auto iterations = compute_locations_closure( scope, m_method_override_graph.get(), [&](DexMethod* method) -> boost::optional<LocationsAndDependencies> { auto action = get_base_or_overriding_method_action( method, &m_safe_method_defs, /* ignore_methods_with_assumenosideeffects */ true); if (action == MethodOverrideAction::UNKNOWN) { return boost::none; } LocationsAndDependencies lads; if (action == MethodOverrideAction::EXCLUDE) { return lads; } auto code = method->get_code(); for (const auto& mie : cfg::InstructionIterable(code->cfg())) { auto* insn = mie.insn; if (may_be_barrier(insn, nullptr /* exact_virtual_scope */)) { auto barrier = make_barrier(insn); if (!opcode::is_an_invoke(barrier.opcode)) { auto location = get_written_location(barrier); if (location == CseLocation(CseSpecialLocations::GENERAL_MEMORY_BARRIER)) { return boost::none; } lads.locations.insert(location); continue; } if (barrier.opcode == OPCODE_INVOKE_SUPER) { // TODO: Implement return boost::none; } if (!process_base_and_overriding_methods( m_method_override_graph.get(), barrier.method, &m_safe_method_defs, /* ignore_methods_with_assumenosideeffects */ true, [&](DexMethod* other_method) { if (other_method != method) { lads.dependencies.insert(other_method); } return true; })) { return boost::none; } } } return lads; }, &m_method_written_locations); m_stats.method_barriers_iterations = iterations; m_stats.method_barriers = m_method_written_locations.size(); for (const auto& p : m_method_written_locations) { auto method = p.first; auto& written_locations = p.second; TRACE(CSE, 4, "[CSE] inferred barrier for %s: %s", SHOW(method), SHOW(&written_locations)); } } void SharedState::init_finalizable_fields(const Scope& scope) { Timer t("init_finalizable_fields"); field_op_tracker::FieldStatsMap field_stats = field_op_tracker::analyze(scope); for (auto& pair : field_stats) { auto* field = pair.first; auto& stats = pair.second; // We are checking a subset of what the AccessMarking pass is checking for // finalization, as that's what CSE really cares about. if (stats.init_writes == stats.writes && can_rename(field) && !is_final(field) && !is_volatile(field) && !field->is_external()) { m_finalizable_fields.insert(field); } } m_stats.finalizable_fields = m_finalizable_fields.size(); } void SharedState::init_scope( const Scope& scope, const method::ClInitHasNoSideEffectsPredicate& clinit_has_no_side_effects) { always_assert(!m_method_override_graph); m_method_override_graph = method_override_graph::build_graph(scope); auto iterations = compute_conditionally_pure_methods( scope, m_method_override_graph.get(), clinit_has_no_side_effects, m_pure_methods, &m_conditionally_pure_methods); m_stats.conditionally_pure_methods = m_conditionally_pure_methods.size(); m_stats.conditionally_pure_methods_iterations = iterations; for (const auto& p : m_conditionally_pure_methods) { m_pure_methods.insert(const_cast<DexMethod*>(p.first)); } for (auto method_ref : m_safe_methods) { auto method = method_ref->as_def(); if (method) { m_safe_method_defs.insert(method); } } init_method_barriers(scope); init_finalizable_fields(scope); } CseUnorderedLocationSet SharedState::get_relevant_written_locations( const IRInstruction* insn, DexType* exact_virtual_scope, const CseUnorderedLocationSet& read_locations) { if (may_be_barrier(insn, exact_virtual_scope)) { if (opcode::is_an_invoke(insn->opcode())) { return get_relevant_written_locations(insn, read_locations); } else { auto barrier = make_barrier(insn); if (is_barrier_relevant(barrier, read_locations)) { return CseUnorderedLocationSet{get_written_location(barrier)}; } } } return no_locations; } bool SharedState::may_be_barrier(const IRInstruction* insn, DexType* exact_virtual_scope) { auto opcode = insn->opcode(); switch (opcode) { case OPCODE_MONITOR_ENTER: case OPCODE_MONITOR_EXIT: case OPCODE_FILL_ARRAY_DATA: return true; default: if (opcode::is_an_aput(opcode) || opcode::is_an_iput(opcode) || opcode::is_an_sput(opcode)) { return true; } else if (opcode::is_an_invoke(opcode)) { return !is_invoke_safe(insn, exact_virtual_scope); } if (insn->has_field()) { always_assert(opcode::is_an_iget(opcode) || opcode::is_an_sget(opcode)); if (get_field_location(opcode, insn->get_field()) == CseLocation(CseSpecialLocations::GENERAL_MEMORY_BARRIER)) { return true; } } return false; } } bool SharedState::is_invoke_safe(const IRInstruction* insn, DexType* exact_virtual_scope) { always_assert(opcode::is_an_invoke(insn->opcode())); auto method_ref = insn->get_method(); auto opcode = insn->opcode(); if ((opcode == OPCODE_INVOKE_STATIC || opcode == OPCODE_INVOKE_DIRECT) && m_safe_methods.count(method_ref)) { return true; } auto method = resolve_method(method_ref, opcode_to_search(insn)); if (!method) { return false; } if ((opcode == OPCODE_INVOKE_STATIC || opcode == OPCODE_INVOKE_DIRECT) && m_safe_methods.count(method)) { return true; } if (opcode == OPCODE_INVOKE_VIRTUAL && m_safe_methods.count(method)) { auto type = method->get_class(); auto cls = type_class(type); always_assert(cls); if (is_final(cls) || is_final(method)) { return true; } if (type == exact_virtual_scope) { return true; } } if (opcode == OPCODE_INVOKE_INTERFACE && m_safe_methods.count(method)) { return true; } return false; } CseUnorderedLocationSet SharedState::get_relevant_written_locations( const IRInstruction* insn, const CseUnorderedLocationSet& read_locations) { always_assert(opcode::is_an_invoke(insn->opcode())); auto opcode = insn->opcode(); if (opcode == OPCODE_INVOKE_SUPER) { // TODO return general_memory_barrier_locations; } auto method_ref = insn->get_method(); DexMethod* method = resolve_method(method_ref, opcode_to_search(insn)); CseUnorderedLocationSet written_locations; if (!process_base_and_overriding_methods( m_method_override_graph.get(), method, &m_safe_method_defs, /* ignore_methods_with_assumenosideeffects */ true, [&](DexMethod* other_method) { auto it = m_method_written_locations.find(other_method); if (it == m_method_written_locations.end()) { return false; } written_locations.insert(it->second.begin(), it->second.end()); return true; })) { return general_memory_barrier_locations; } // Remove written locations that are not read std20::erase_if(written_locations, [&read_locations](auto& l) { return !read_locations.count(l); }); return written_locations; } void SharedState::log_barrier(const Barrier& barrier) { if (m_barriers) { m_barriers->update( barrier, [](const Barrier, size_t& v, bool /* exists */) { v++; }); } } const CseUnorderedLocationSet& SharedState::get_read_locations_of_conditionally_pure_method( const DexMethodRef* method_ref, IROpcode opcode) const { auto method = resolve_method(const_cast<DexMethodRef*>(method_ref), opcode_to_search(opcode)); if (method == nullptr) { return no_locations; } auto it = m_conditionally_pure_methods.find(method); if (it == m_conditionally_pure_methods.end()) { return no_locations; } else { return it->second; } } bool SharedState::has_potential_unboxing_method( const IRInstruction* insn) const { auto method_ref = insn->get_method(); for (const auto& abs_pair : get_abstract_map()) { const auto* abs_method = abs_pair.second; if (method_ref == abs_method) { return true; } } return false; } bool SharedState::has_pure_method(const IRInstruction* insn) const { auto method_ref = insn->get_method(); if (m_pure_methods.find(method_ref) != m_pure_methods.end()) { TRACE(CSE, 4, "[CSE] unresolved %spure for %s", (method_ref->is_def() && m_conditionally_pure_methods.count(method_ref->as_def())) ? "conditionally " : "", SHOW(method_ref)); return true; } auto method = resolve_method(insn->get_method(), opcode_to_search(insn)); if (method != nullptr && m_pure_methods.find(method) != m_pure_methods.end()) { TRACE(CSE, 4, "[CSE] resolved %spure for %s", m_conditionally_pure_methods.count(method) ? "conditionally " : "", SHOW(method)); return true; } return false; } bool SharedState::is_finalish(const DexField* field) const { return is_final(field) || !!m_finalizable_fields.count(field) || !!m_finalish_field_names.count(field->get_name()); } void SharedState::cleanup() { if (!m_barriers) { return; } std::vector<std::pair<Barrier, size_t>> ordered_barriers(m_barriers->begin(), m_barriers->end()); std::sort( ordered_barriers.begin(), ordered_barriers.end(), [](const std::pair<Barrier, size_t>& a, const std::pair<Barrier, size_t>& b) { return b.second < a.second; }); TRACE(CSE, 2, "most common barriers:"); for (const auto& p : ordered_barriers) { auto& b = p.first; auto c = p.second; if (opcode::is_an_invoke(b.opcode)) { TRACE(CSE, 2, "%s %s x %zu", SHOW(b.opcode), SHOW(b.method), c); } else if (opcode::is_an_ifield_op(b.opcode) || opcode::is_an_sfield_op(b.opcode)) { TRACE(CSE, 2, "%s %s x %zu", SHOW(b.opcode), SHOW(b.field), c); } else { TRACE(CSE, 2, "%s x %zu", SHOW(b.opcode), c); } } } CommonSubexpressionElimination::CommonSubexpressionElimination( SharedState* shared_state, cfg::ControlFlowGraph& cfg, bool is_static, bool is_init_or_clinit, DexType* declaring_type, DexTypeList* args) : m_cfg(cfg), m_is_static(is_static), m_declaring_type(declaring_type), m_args(args), m_abs_map(shared_state->get_abstract_map()) { Analyzer analyzer(shared_state, cfg, is_static, is_init_or_clinit, declaring_type); m_unboxing = analyzer.get_unboxing_insns(); m_stats.max_value_ids = analyzer.get_value_ids_size(); if (analyzer.using_other_tracked_location_bit()) { m_stats.methods_using_other_tracked_location_bit = 1; } std::unordered_map<std::vector<size_t>, size_t, boost::hash<std::vector<size_t>>> insns_ids; auto get_earlier_insns_index = [&](const PatriciaTreeSet<const IRInstruction*>& insns) { std::vector<size_t> ordered_ids; for (auto insn : insns) { ordered_ids.push_back(get_earlier_insn_id(insn)); } std::sort(ordered_ids.begin(), ordered_ids.end()); auto it = insns_ids.find(ordered_ids); if (it != insns_ids.end()) { return it->second; } auto index = insns_ids.size(); always_assert(m_earlier_insns.size() == index); insns_ids.emplace(ordered_ids, index); m_earlier_insns.push_back(insns); return index; }; // identify all instruction pairs where the result of the first instruction // can be forwarded to the second for (cfg::Block* block : cfg.blocks()) { auto env = analyzer.get_entry_state_at(block); if (env.is_bottom()) { continue; } for (const auto& mie : InstructionIterable(block)) { IRInstruction* insn = mie.insn; analyzer.analyze_instruction(insn, &env); auto opcode = insn->opcode(); if (!insn->has_dest() || opcode::is_a_move(opcode) || opcode::is_a_const(opcode)) { continue; } auto ref_c = env.get_ref_env().get(insn->dest()).get_constant(); if (!ref_c) { continue; } auto value_id = *ref_c; always_assert(!analyzer.is_pre_state_src(value_id)); auto defs = env.get_def_env().get(value_id); always_assert(!defs.is_top() && !defs.is_bottom()); auto earlier_insns = defs.elements(); if (earlier_insns.contains(insn)) { continue; } bool skip{false}; for (auto earlier_insn : earlier_insns) { auto earlier_opcode = earlier_insn->opcode(); if (opcode::is_a_load_param(earlier_opcode)) { skip = true; break; } if (opcode::is_a_cmp(opcode) || opcode::is_a_cmp(earlier_opcode)) { // See T46241704. We never de-duplicate cmp instructions due to an // apparent bug in various Dalvik (and ART?) versions. Also see this // documentation in the r8 source code: // https://r8.googlesource.com/r8/+/2638db4d3465d785a6a740cf09969cab96099cff/src/main/java/com/android/tools/r8/utils/InternalOptions.java#604 skip = true; break; } } if (skip) { continue; } auto earlier_insns_index = get_earlier_insns_index(earlier_insns); m_forward.push_back({earlier_insns_index, insn}); } } } size_t CommonSubexpressionElimination::get_earlier_insn_id( const IRInstruction* insn) { // We need some helper state/functions to build the list m_earlier_insns // of unique earlier-instruction sets. To make that deterministic, we use // instruction ids that represent the position of an instruction in the cfg. if (m_earlier_insn_ids.empty()) { for (const auto& mie : InstructionIterable(m_cfg)) { m_earlier_insn_ids.emplace(mie.insn, m_earlier_insn_ids.size()); } } return m_earlier_insn_ids.at(insn); } static IROpcode get_move_opcode(const IRInstruction* earlier_insn) { always_assert(!opcode::is_a_literal_const(earlier_insn->opcode())); if (earlier_insn->has_dest()) { return earlier_insn->dest_is_wide() ? OPCODE_MOVE_WIDE : earlier_insn->dest_is_object() ? OPCODE_MOVE_OBJECT : OPCODE_MOVE; } else if (earlier_insn->opcode() == OPCODE_NEW_ARRAY) { return OPCODE_MOVE; } else { always_assert(opcode::is_an_aput(earlier_insn->opcode()) || opcode::is_an_iput(earlier_insn->opcode()) || opcode::is_an_sput(earlier_insn->opcode())); return earlier_insn->src_is_wide(0) ? OPCODE_MOVE_WIDE : (earlier_insn->opcode() == OPCODE_APUT_OBJECT || earlier_insn->opcode() == OPCODE_IPUT_OBJECT || earlier_insn->opcode() == OPCODE_SPUT_OBJECT) ? OPCODE_MOVE_OBJECT : OPCODE_MOVE; } } static std::pair<IROpcode, std::optional<int64_t>> get_move_or_const_literal( const sparta::PatriciaTreeSet<const IRInstruction*>& insns) { std::optional<IROpcode> opcode; std::optional<int64_t> literal; for (auto insn : insns) { if (opcode::is_a_literal_const(insn->opcode())) { if (literal) { always_assert(*literal == insn->get_literal()); always_assert(*opcode == insn->opcode()); } else { literal = insn->get_literal(); opcode = insn->opcode(); } continue; } if (opcode) { always_assert(*opcode == get_move_opcode(insn)); } else { opcode = get_move_opcode(insn); } } always_assert(opcode); always_assert(opcode::is_a_move(*opcode) || opcode::is_a_literal_const(*opcode)); always_assert(!opcode::is_a_literal_const(*opcode) || literal); return std::make_pair(*opcode, literal); } bool CommonSubexpressionElimination::patch(bool runtime_assertions) { if (m_forward.empty()) { return false; } TRACE(CSE, 5, "[CSE] before:\n%s", SHOW(m_cfg)); // gather relevant instructions, and allocate temp registers // We'll allocate one temp per "earlier_insns_index". // TODO: Do better, use less. A subset and its superset can share a temp. std::unordered_map<size_t, std::pair<IROpcode, reg_t>> temps; // We also remember for which instructions we'll need an iterator, as we'll // want to insert something after them. std::unordered_set<const IRInstruction*> iterator_insns; std::unordered_set<const IRInstruction*> combined_earlier_insns; for (const auto& f : m_forward) { iterator_insns.insert(f.insn); if (temps.count(f.earlier_insns_index)) { continue; } auto& earlier_insns = m_earlier_insns.at(f.earlier_insns_index); auto move_or_const_literal = get_move_or_const_literal(earlier_insns); auto opcode = move_or_const_literal.first; if (!opcode::is_a_move(opcode)) { continue; } combined_earlier_insns.insert(earlier_insns.begin(), earlier_insns.end()); reg_t temp_reg = opcode == OPCODE_MOVE_WIDE ? m_cfg.allocate_wide_temp() : m_cfg.allocate_temp(); temps.emplace(f.earlier_insns_index, std::make_pair(opcode, temp_reg)); } for (auto earlier_insn : combined_earlier_insns) { iterator_insns.insert(earlier_insn); if (earlier_insn->has_dest()) { m_stats.results_captured++; } else if (earlier_insn->opcode() == OPCODE_NEW_ARRAY) { m_stats.array_lengths_captured++; } else { always_assert(opcode::is_an_aput(earlier_insn->opcode()) || opcode::is_an_iput(earlier_insn->opcode()) || opcode::is_an_sput(earlier_insn->opcode())); m_stats.stores_captured++; } } for (auto unboxing_insn : m_unboxing) { iterator_insns.insert(unboxing_insn); } // find all iterators in one sweep std::unordered_map<IRInstruction*, cfg::InstructionIterator> iterators; auto iterable = cfg::InstructionIterable(m_cfg); for (auto it = iterable.begin(); it != iterable.end(); ++it) { auto* insn = it->insn; if (iterator_insns.count(insn)) { iterators.emplace(insn, it); } } // insert moves to use the forwarded value std::vector<std::pair<Forward, IRInstruction*>> to_check; for (const auto& f : m_forward) { auto& earlier_insns = m_earlier_insns.at(f.earlier_insns_index); IRInstruction* insn = f.insn; auto& it = iterators.at(insn); auto temp_it = temps.find(f.earlier_insns_index); if (temp_it == temps.end()) { auto move_or_const_literal = get_move_or_const_literal(earlier_insns); auto const_opcode = move_or_const_literal.first; always_assert(opcode::is_a_literal_const(const_opcode)); auto literal = *(move_or_const_literal.second); // Consider this case: // 1. (const v0 0) // 2. (iput-object v0 v3 "LClass1;.field:Ljava/lang/Object;") // 3. (const v0 0) // 4. boxing-unboxing v0 // 5. (move-result v0) // 6. (iput-boolean v0 v4 "LClass2;.field:Z") // We need to make sure that after opt-out "boxing-unboxing v0", v0 used // in insn 6. should not be the one used in insn 2, since in line 2, v0 is // used as null-0 and in line 4, v0 should be int. Therfore, instead of // insert a move, if there is any literal_const load in earlier_insns, we // just clone that const insn and override the dest to current insn's // dest. No need to add this new insn for runtime-assertion since it is // just a clone. IRInstruction* const_insn = new IRInstruction(const_opcode); const_insn->set_literal(literal)->set_dest(insn->dest()); auto iterators_invalidated = m_cfg.insert_after(it, const_insn); always_assert(!iterators_invalidated); for (auto earlier_insn : earlier_insns) { TRACE(CSE, 4, "[CSE] forwarding %s to %s as const %ld", SHOW(earlier_insn), SHOW(insn), literal); } } else { auto [move_opcode, temp_reg] = temp_it->second; always_assert(opcode::is_a_move(move_opcode)); IRInstruction* move_insn = new IRInstruction(move_opcode); move_insn->set_src(0, temp_reg)->set_dest(insn->dest()); auto iterators_invalidated = m_cfg.insert_after(it, move_insn); always_assert(!iterators_invalidated); if (runtime_assertions) { to_check.emplace_back(f, move_insn); } for (auto earlier_insn : earlier_insns) { TRACE(CSE, 4, "[CSE] forwarding %s to %s via v%u", SHOW(earlier_insn), SHOW(insn), temp_reg); } } if (opcode::is_move_result_any(insn->opcode())) { insn = m_cfg.primary_instruction_of_move_result(it)->insn; if (opcode::is_an_invoke(insn->opcode())) { TRACE(CSE, 3, "[CSE] eliminating invocation of %s", SHOW(insn->get_method())); } } m_stats.eliminated_opcodes[insn->opcode()]++; } // Insert moves to define the forwarded value. // We are going to call ControlFlow::insert_after, which may introduce new // blocks when inserting after the last instruction of a block with // throws-edges. To make that deterministic, we need to make sure that we // insert in a deterministic order. To this end, we follow the deterministic // m_forward order, and we order earlier instructions by their ids. for (const auto& f : m_forward) { auto temp_it = temps.find(f.earlier_insns_index); if (temp_it == temps.end()) { continue; } auto [move_opcode, temp_reg] = temp_it->second; always_assert(opcode::is_a_move(move_opcode)); temps.erase(temp_it); auto& earlier_insns_unordered = m_earlier_insns.at(f.earlier_insns_index); std::vector<const IRInstruction*> earlier_insns_ordered( earlier_insns_unordered.begin(), earlier_insns_unordered.end()); always_assert(!m_earlier_insn_ids.empty()); std::sort(earlier_insns_ordered.begin(), earlier_insns_ordered.end(), [&](const auto* a, const auto* b) { return get_earlier_insn_id(a) < get_earlier_insn_id(b); }); for (auto earlier_insn : earlier_insns_ordered) { auto& it = iterators.at(const_cast<IRInstruction*>(earlier_insn)); IRInstruction* move_insn = new IRInstruction(move_opcode); auto src_reg = earlier_insn->has_dest() ? earlier_insn->dest() : earlier_insn->src(0); move_insn->set_src(0, src_reg)->set_dest(temp_reg); if (earlier_insn->opcode() == OPCODE_NEW_ARRAY) { // we need to capture the array-length register of a new-array // instruction *before* the instruction, as the dest of the instruction // may overwrite the incoming array length value auto iterators_invalidated = m_cfg.insert_before(it, move_insn); always_assert(!iterators_invalidated); } else { auto iterators_invalidated = m_cfg.insert_after(it, move_insn); always_assert(!iterators_invalidated); } } } always_assert(temps.empty()); if (runtime_assertions) { insert_runtime_assertions(to_check); } TRACE(CSE, 5, "[CSE] after:\n%s", SHOW(m_cfg)); m_stats.instructions_eliminated += m_forward.size(); if (!m_unboxing.empty()) { // Inserting check-casts might split blocks and invalidate iterators, thus // we go through the CFGMutation helper class that properly deals with this // complication. cfg::CFGMutation mutation(m_cfg); // For the abs methods which are part of boxing-unboxing pattern, we refine // it to its impl, which will be viewed as "pure" method and can be optmized // out later. for (auto unboxing_insn : m_unboxing) { auto& it = iterators.at(unboxing_insn); auto method_ref = unboxing_insn->get_method(); auto abs_it = std::find_if( m_abs_map.begin(), m_abs_map.end(), [method_ref](auto& p) { return p.second == method_ref; }); always_assert(abs_it != m_abs_map.end()); auto impl = abs_it->first; auto src_reg = unboxing_insn->src(0); auto check_cast_insn = (new IRInstruction(OPCODE_CHECK_CAST)) ->set_src(0, src_reg) ->set_type(impl->get_class()); auto pseudo_move_result_insn = (new IRInstruction(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT)) ->set_dest(src_reg); // Add instructions mutation.insert_before(it, {check_cast_insn, pseudo_move_result_insn}); // Replace the call method from abs_unboxing method to its impl. unboxing_insn->set_method(const_cast<DexMethodRef*>(impl)); } mutation.flush(); } return true; } void CommonSubexpressionElimination::insert_runtime_assertions( const std::vector<std::pair<Forward, IRInstruction*>>& to_check) { // For every instruction that CSE will effectively eliminate, we insert // code like the following: // // OLD_CODE: // first-instruction r0 // redundant-instruction r1 // NEW_ASSERTION_CODE: // if-ne r0, r1, THROW // CSE_CODE: // move r1, r0 // <= to realize CSE; without the NEW_ASSERTION_CODE, // // the redundant-instruction would be eliminated by DCE. // ... // THROW: // const r2, 0 // throw r2 // // The new throw instruction would throw a NullPointerException when the // redundant instruction didn't actually produce the same result as the // first instruction. // // TODO: Consider throwing a custom exception, possibly created by code // sitting behind an auxiliary method call to keep the code size distortion // small which may influence other optimizations. See // ConstantPropagationAssertHandler for inspiration. // // Note: Somehow inserting assertions seems to trip up the OptimizeEnumsPass // (it fails while running Redex). // TODO: Investigate why. Until then, disable that pass to test CSE. // If the original block had a throw-edge, then the new block that throws an // exception needs to have a corresponding throw-edge. As we split blocks to // insert conditional branches, and splitting blocks removes throw-edges from // the original block, we need to make sure that we track what throw-edges as\ // needed. (All this is to appease the Android verifier in the presence of // monitor instructions.) std::unordered_map<cfg::Block*, std::vector<cfg::Edge*>> outgoing_throws; for (auto b : m_cfg.blocks()) { outgoing_throws.emplace(b, b->get_outgoing_throws_in_order()); } // We need type inference information to generate the right kinds of // conditional branches. type_inference::TypeInference type_inference(m_cfg); type_inference.run(m_is_static, m_declaring_type, m_args); auto& type_environments = type_inference.get_type_environments(); for (const auto& p : to_check) { auto& f = p.first; auto& earlier_insns = m_earlier_insns.at(f.earlier_insns_index); for (auto earlier_insn : earlier_insns) { IRInstruction* insn = f.insn; auto move_insn = p.second; auto& type_environment = type_environments.at(insn); auto temp = move_insn->src(0); auto type = type_environment.get_type(temp); always_assert(!type.is_top()); always_assert(!type.is_bottom()); TRACE(CSE, 6, "[CSE] to check: %s => %s - r%u: %s", SHOW(earlier_insn), SHOW(insn), temp, SHOW(type.element())); always_assert(type.element() != CONST2); always_assert(type.element() != LONG2); always_assert(type.element() != DOUBLE2); always_assert(type.element() != SCALAR2); if (type.element() != ZERO && type.element() != CONST && type.element() != INT && type.element() != REFERENCE && type.element() != LONG1) { // TODO: Handle floats and doubles via Float.floatToIntBits and // Double.doubleToLongBits to deal with NaN. // TODO: Improve TypeInference so that we never have to deal with // SCALAR* values where we don't know if it's a int/float or // long/double. continue; } auto it = m_cfg.find_insn(insn); auto old_block = it.block(); auto new_block = m_cfg.split_block(it); outgoing_throws.emplace(new_block, outgoing_throws.at(old_block)); auto throw_block = m_cfg.create_block(); auto null_reg = m_cfg.allocate_temp(); IRInstruction* const_insn = new IRInstruction(OPCODE_CONST); const_insn->set_literal(0); const_insn->set_dest(null_reg); throw_block->push_back(const_insn); IRInstruction* throw_insn = new IRInstruction(OPCODE_THROW); throw_insn->set_src(0, null_reg); throw_block->push_back(throw_insn); for (auto e : outgoing_throws.at(old_block)) { auto throw_info = e->throw_info(); m_cfg.add_edge(throw_block, e->target(), throw_info->catch_type, throw_info->index); } if (type.element() == LONG1) { auto cmp_reg = m_cfg.allocate_temp(); IRInstruction* cmp_insn = new IRInstruction(OPCODE_CMP_LONG); cmp_insn->set_dest(cmp_reg); cmp_insn->set_src(0, move_insn->dest()); cmp_insn->set_src(1, move_insn->src(0)); old_block->push_back(cmp_insn); IRInstruction* if_insn = new IRInstruction(OPCODE_IF_NEZ); if_insn->set_src(0, cmp_reg); m_cfg.create_branch(old_block, if_insn, new_block, throw_block); } else { IRInstruction* if_insn = new IRInstruction(OPCODE_IF_NE); if_insn->set_src(0, move_insn->dest()); if_insn->set_src(1, move_insn->src(0)); m_cfg.create_branch(old_block, if_insn, new_block, throw_block); } } } } Stats& Stats::operator+=(const Stats& that) { results_captured += that.results_captured; stores_captured += that.stores_captured; array_lengths_captured += that.array_lengths_captured; instructions_eliminated += that.instructions_eliminated; max_value_ids = std::max(max_value_ids, that.max_value_ids); methods_using_other_tracked_location_bit += that.methods_using_other_tracked_location_bit; for (const auto& p : that.eliminated_opcodes) { eliminated_opcodes[p.first] += p.second; } max_iterations = std::max(max_iterations, that.max_iterations); return *this; } } // namespace cse_impl

service/cse/CommonSubexpressionElimination.cpp (1,468 lines of code) (raw):