opt/remove-builders/RemoveBuildersHelper.cpp (914 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "RemoveBuildersHelper.h" #include <boost/dynamic_bitset.hpp> #include <boost/regex.hpp> #include "ControlFlow.h" #include "Dataflow.h" #include "DexUtil.h" #include "IRCode.h" #include "IRInstruction.h" namespace { const IRInstruction* NULL_INSN = nullptr; void fields_mapping(const IRList::iterator& it, FieldsRegs* fregs, DexClass* builder) { always_assert(fregs != nullptr); always_assert(builder != nullptr); always_assert(it->type == MFLOW_OPCODE); const IRInstruction* insn = it->insn; if (insn->opcode() == IOPCODE_MOVE_RESULT_PSEUDO_OBJECT && std::prev(it)->insn->opcode() == OPCODE_NEW_INSTANCE && std::prev(it)->insn->get_type() == builder->get_type()) { // Set fields to UNDEFINED if new builder instance. for (auto& pair : fregs->field_to_reg) { fregs->field_to_reg[pair.first] = FieldOrRegStatus::UNDEFINED; fregs->field_to_iput_insns[pair.first].clear(); } } // Check if the register that used to hold the field's value is overwritten. if (insn->has_dest()) { const int current_dest = insn->dest(); for (const auto& pair : fregs->field_to_reg) { if (pair.second == current_dest || (insn->dest_is_wide() && pair.second == current_dest + 1)) { fregs->field_to_reg[pair.first] = FieldOrRegStatus::OVERWRITTEN; } } } if (opcode::is_an_iput(insn->opcode())) { auto field = resolve_field(insn->get_field(), FieldSearch::Instance); if (field != nullptr && field->get_class() == builder->get_type()) { reg_t current = insn->src(0); fregs->field_to_reg[field] = current; fregs->field_to_iput_insns[field].clear(); fregs->field_to_iput_insns[field].emplace(insn); } } } /** * Returns for every instruction, field value: * - a register: representing the register that stores the field's value * - UNDEFINED: not defined yet. * - DIFFERENT: no unique register. * - OVERWRITTEN: register no longer holds the value. */ std::unique_ptr<std::unordered_map<IRInstruction*, FieldsRegs>> fields_setters( const std::vector<cfg::Block*>& blocks, DexClass* builder) { std::function<void(IRList::iterator, FieldsRegs*)> trans = [&](const IRList::iterator& it, FieldsRegs* fregs) { fields_mapping(it, fregs, builder); }; return forwards_dataflow(blocks, FieldsRegs(builder), trans); } IROpcode get_move_opcode(const IRInstruction* insn) { always_assert(insn != nullptr); always_assert(opcode::is_an_iget(insn->opcode())); if (insn->opcode() == OPCODE_IGET_WIDE) { return OPCODE_MOVE_WIDE; } else if (insn->opcode() == OPCODE_IGET_OBJECT) { return OPCODE_MOVE_OBJECT; } return OPCODE_MOVE; } IRInstruction* construct_move_instr(reg_t dest_reg, reg_t src_reg, IROpcode move_opcode) { IRInstruction* insn = new IRInstruction(move_opcode); insn->set_dest(dest_reg); insn->set_src(0, src_reg); return insn; } IRInstruction* construct_null_instr(reg_t reg, IROpcode move_opcode) { IRInstruction* insn; if (move_opcode == OPCODE_MOVE_WIDE) { insn = new IRInstruction(OPCODE_CONST_WIDE); } else { insn = new IRInstruction(OPCODE_CONST); } insn->set_dest(reg); insn->set_literal(0); return insn; } /** * Adds instructions that initializes registers with null. */ void null_initializations( IRCode* code, const std::vector<std::pair<reg_t, IROpcode>>& null_regs) { always_assert(code != nullptr); auto params = code->get_param_instructions(); for (auto& null_reg_info : null_regs) { reg_t null_reg = null_reg_info.first; IROpcode move_opcode = null_reg_info.second; code->insert_before(params.end(), construct_null_instr(null_reg, move_opcode)); } } void add_instr(IRCode* code, const IRInstruction* position, IRInstruction* new_insn) { always_assert(code != nullptr); always_assert(position != nullptr); always_assert(new_insn != nullptr); for (auto it = code->begin(); it != code->end(); ++it) { if (it->type != MFLOW_OPCODE) { continue; } auto insn = it->insn; if (insn == position) { code->insert_before(it, new_insn); return; } } not_reached_log("insert position not found!"); } using MoveList = std::unordered_map<const IRInstruction*, IRInstruction*>; void method_updates(DexMethod* method, const std::vector<IRInstruction*>& deletes, const MoveList& move_list) { always_assert(method != nullptr); auto code = method->get_code(); // This will basically replace an iput / iget instruction // with a move (giving the instruction will be removed later). // // Example: // iput v0, object // field -> move new_reg, v0 // iget v0, object // field -> move v0, new_reg for (const auto& move_elem : move_list) { const IRInstruction* position = move_elem.first; IRInstruction* insn = move_elem.second; add_instr(code, position, insn); } for (const auto& insn : deletes) { code->remove_opcode(insn); } } /** * Giving a list of setters and a map with instruction replacements, * will return an already allocated new register, in case one of the * setters already has a replacement defined. Otherwise, it returns * UNDEFINED. */ int64_t get_new_reg_if_already_allocated( const std::unordered_set<const IRInstruction*>& iput_insns, MoveList& move_replacements) { int64_t new_reg = FieldOrRegStatus::UNDEFINED; for (const auto& iput_insn : iput_insns) { if (iput_insn != NULL_INSN) { if (move_replacements.find(iput_insn) != move_replacements.end()) { if (new_reg == FieldOrRegStatus::UNDEFINED) { new_reg = move_replacements[iput_insn]->dest(); } else { always_assert(new_reg == move_replacements[iput_insn]->dest()); } } } } return new_reg; } /** * Check builder's constructor does a small amount of work * - instantiates the parent class (Object) * - returns */ bool is_trivial_builder_constructor(DexMethod* method) { always_assert(method != nullptr); auto code = method->get_code(); if (!code) { return false; } if (!method::is_constructor(method)) { return false; } auto ii = InstructionIterable(code); auto it = ii.begin(); if (it->insn->opcode() != IOPCODE_LOAD_PARAM_OBJECT) { return false; } ++it; if (it->insn->opcode() != OPCODE_INVOKE_DIRECT) { return false; } else { auto invoked = resolve_method(it->insn->get_method(), MethodSearch::Direct); if (invoked == nullptr || !method::is_constructor(invoked)) { return false; } } ++it; if (it->insn->opcode() != OPCODE_RETURN_VOID) { return false; } ++it; return it == ii.end(); } std::unordered_set<DexMethod*> get_non_trivial_init_methods(IRCode* code, DexType* type) { always_assert(code != nullptr); always_assert(type != nullptr); std::unordered_set<DexMethod*> methods; for (auto const& mie : InstructionIterable(code)) { auto insn = mie.insn; if (opcode::is_an_invoke(insn->opcode())) { auto invoked = resolve_method(insn->get_method(), opcode_to_search(insn)); if (invoked != nullptr && invoked->get_class() == type) { if (method::is_constructor(invoked) && !is_trivial_builder_constructor(invoked)) { methods.insert(invoked); } } } } return methods; } std::unordered_set<IRInstruction*> get_super_class_initializations( DexMethod* method, DexType* parent_type) { always_assert(method != nullptr); always_assert(parent_type != nullptr); std::unordered_set<IRInstruction*> insns; auto code = method->get_code(); if (!code) { return insns; } for (auto& mie : InstructionIterable(code)) { auto insn = mie.insn; if (opcode::is_an_invoke(insn->opcode())) { auto invoked = resolve_method(insn->get_method(), opcode_to_search(insn)); if (invoked != nullptr && invoked->get_class() == parent_type && method::is_init(invoked)) { insns.emplace(insn); } } } return insns; } bool has_super_class_initializations(DexMethod* method, DexType* parent_type) { return !get_super_class_initializations(method, parent_type).empty(); } void remove_super_class_calls(DexMethod* method, DexType* parent_type) { std::unordered_set<IRInstruction*> to_delete = get_super_class_initializations(method, parent_type); auto code = method->get_code(); if (!code) { return; } for (const auto& insn : to_delete) { code->remove_opcode(insn); } } /** * Gathers all `MOVE` instructions that operate on a builder. */ std::vector<IRInstruction*> gather_move_builders_insn( IRCode* code, const std::vector<cfg::Block*>& blocks, DexType* builder) { std::vector<IRInstruction*> insns; reg_t regs_size = code->get_registers_size(); auto tainted_map = get_tainted_regs(regs_size, blocks, builder); for (auto it : *tainted_map) { auto insn = it.first; auto tainted = it.second.bits(); if (opcode::is_a_move(insn->opcode())) { if (tainted[insn->src(0)]) { insns.push_back(insn); } } } return insns; } /** * Keeps tracks of registers that are going to be used for undefined fields * depending on the type of field: wide, primitive etc. */ class ZeroRegs { public: bool has(DexType* type) { return get(type) != FieldOrRegStatus::UNDEFINED; } reg_t get(DexType* type, reg_t default_value) { if (!has(type)) { set(type, default_value); } return get(type); } private: int m_zero_reg_object{FieldOrRegStatus::UNDEFINED}; int m_zero_reg_int{FieldOrRegStatus::UNDEFINED}; int m_zero_reg_float{FieldOrRegStatus::UNDEFINED}; int m_zero_reg_long{FieldOrRegStatus::UNDEFINED}; int m_zero_reg_double{FieldOrRegStatus::UNDEFINED}; int get(DexType* type) { const auto* name = type->get_name()->c_str(); switch (name[0]) { case 'Z': case 'B': case 'S': case 'C': case 'I': return m_zero_reg_int; case 'J': return m_zero_reg_long; case 'F': return m_zero_reg_float; case 'D': return m_zero_reg_double; case 'L': case '[': return m_zero_reg_object; default: not_reached(); } } void set(DexType* type, reg_t value) { const auto* name = type->get_name()->c_str(); switch (name[0]) { case 'Z': case 'B': case 'S': case 'C': case 'I': m_zero_reg_int = value; return; case 'J': m_zero_reg_long = value; return; case 'F': m_zero_reg_float = value; return; case 'D': m_zero_reg_double = value; return; case 'L': case '[': m_zero_reg_object = value; return; default: not_reached(); } } }; bool remove_builder(DexMethod* method, DexClass* builder) { always_assert(method != nullptr); always_assert(builder != nullptr); auto code = method->get_code(); if (!code) { return false; } code->build_cfg(/* editable */ false); const auto& blocks = code->cfg().blocks_reverse_post_deprecated(); auto fields_in = fields_setters(blocks, builder); static auto init = DexString::make_string("<init>"); reg_t regs_size = code->get_registers_size(); reg_t next_available_reg = regs_size; reg_t extra_regs = 0; size_t num_builders = 0; std::vector<std::pair<reg_t, IROpcode>> extra_null_regs; ZeroRegs undef_fields_regs; // Instructions where the builder gets moved to a different // register need to be also removed (at the end). std::vector<IRInstruction*> deletes = gather_move_builders_insn(code, blocks, builder->get_type()); MoveList move_replacements; std::unordered_set<IRInstruction*> update_list; for (auto& block : blocks) { auto ii = InstructionIterable(block); for (auto it = ii.begin(); it != ii.end(); ++it) { auto insn = it->insn; IROpcode opcode = insn->opcode(); auto& fields_in_insn = fields_in->at(it->insn); if (opcode::is_an_iput(opcode)) { auto field = resolve_field(insn->get_field(), FieldSearch::Instance); if (field != nullptr && field->get_class() == builder->get_type()) { deletes.push_back(insn); continue; } } else if (opcode::is_an_iget(opcode)) { auto field = resolve_field(insn->get_field(), FieldSearch::Instance); if (field == nullptr) continue; if (field->get_class() == builder->get_type()) { IROpcode move_opcode = get_move_opcode(insn); bool is_wide = move_opcode == OPCODE_MOVE_WIDE; if (fields_in_insn.field_to_reg[field] == FieldOrRegStatus::DIFFERENT || fields_in_insn.field_to_reg[field] == FieldOrRegStatus::OVERWRITTEN) { const auto& iput_insns = fields_in_insn.field_to_iput_insns[field]; always_assert(!iput_insns.empty()); int64_t new_reg = get_new_reg_if_already_allocated(iput_insns, move_replacements); if (new_reg == FieldOrRegStatus::UNDEFINED) { // Allocating a new register since one was not allocated. new_reg = next_available_reg + extra_regs; extra_regs += is_wide ? 2 : 1; } for (const auto& iput_insn : iput_insns) { if (iput_insn != NULL_INSN) { if (move_replacements.find(iput_insn) != move_replacements.end()) { always_assert(new_reg == move_replacements[iput_insn]->dest()); } else { // Adding a move for each of the setters: // iput v1, object // field -> move new_reg, v1 move_replacements[iput_insn] = construct_move_instr( new_reg, iput_insn->src(0), move_opcode); } } else { // Initializes the register since the field might be // uninitialized. extra_null_regs.push_back(std::make_pair(new_reg, move_opcode)); } } // Adding a move for the getter: // iget v2, object // field -> move v2, new_reg move_replacements[insn] = construct_move_instr( std::next(it)->insn->dest(), new_reg, move_opcode); } else if (fields_in_insn.field_to_reg[field] == FieldOrRegStatus::UNDEFINED) { // Initializing the field with null. bool has_null_reg = undef_fields_regs.has(field->get_type()); reg_t new_null_reg = undef_fields_regs.get( field->get_type(), next_available_reg + extra_regs); move_replacements[insn] = construct_move_instr( std::next(it)->insn->dest(), new_null_reg, move_opcode); if (!has_null_reg) { extra_null_regs.emplace_back(new_null_reg, move_opcode); extra_regs += is_wide ? 2 : 1; } } else { // If we got here, the field is held in a register. // Get instruction that sets the field. const auto& iput_insns = fields_in_insn.field_to_iput_insns[field]; if (iput_insns.empty()) { return false; } always_assert(iput_insns.size() == 1); const IRInstruction* iput_insn = *iput_insns.begin(); // Check if we already have a value for it. if (move_replacements.find(iput_insn) != move_replacements.end()) { // Get the actual value. IRInstruction* new_insn = move_replacements[iput_insn]; reg_t new_reg = new_insn->dest(); move_replacements[insn] = construct_move_instr( std::next(it)->insn->dest(), new_reg, move_opcode); } else { // We can reuse the existing reg, so will have only 1 move. // // In case this is a parameter reg, it needs to be updated. if (iput_insn->src(0) >= next_available_reg) { update_list.emplace(insn); } move_replacements[insn] = construct_move_instr( std::next(it)->insn->dest(), iput_insn->src(0), move_opcode); } } deletes.push_back(insn); continue; } } else if (opcode == OPCODE_NEW_INSTANCE || opcode == OPCODE_CHECK_CAST) { DexType* cls = insn->get_type(); if (type_class(cls) == builder) { if (opcode == OPCODE_NEW_INSTANCE) num_builders++; // Safely avoiding the case where multiple builders are initialized. if (num_builders > 1) return false; deletes.push_back(insn); continue; } } else if (opcode::is_an_invoke(opcode)) { auto invoked = insn->get_method(); if (invoked->get_class() == builder->get_type() && invoked->get_name() == init) { deletes.push_back(insn); continue; } } } } code->set_registers_size(next_available_reg + extra_regs); null_initializations(code, extra_null_regs); method_updates(method, deletes, move_replacements); return true; } bool has_only_argument(DexMethod* method, DexType* type) { DexProto* proto = method->get_proto(); const auto& args = *proto->get_args(); if (args.size() != 1 || args.at(0) != type) { return false; } return true; } /** * Checks if the registers which hold the arguments for the given method * are used as source for any operation except `iget-*` */ bool params_change_regs(DexMethod* method) { DexProto* proto = method->get_proto(); auto* args = proto->get_args(); auto code = method->get_code(); code->build_cfg(/* editable */ false); const auto& blocks = code->cfg().blocks_reverse_post_deprecated(); reg_t regs_size = code->get_registers_size(); const auto& param_insns = InstructionIterable(code->get_param_instructions()); always_assert(!is_static(method)); // Skip the `this` param auto param_it = std::next(param_insns.begin()); for (DexType* arg : *args) { std::function<void(IRList::iterator, TaintedRegs*)> trans = [&](const IRList::iterator& it, TaintedRegs* tregs) { if (!opcode::is_a_load_param(it->insn->opcode())) { transfer_object_reach(arg, regs_size, it->insn, tregs->m_reg_set); } }; auto tainted = TaintedRegs(regs_size + 1); always_assert(param_it != param_insns.end()); auto arg_reg = (param_it++)->insn->dest(); tainted.m_reg_set[arg_reg] = 1; auto taint_map = forwards_dataflow(blocks, tainted, trans); for (auto it : *taint_map) { auto insn = it.first; auto insn_tainted = it.second.bits(); auto op = insn->opcode(); if (opcode::is_a_load_param(op)) { continue; } if (opcode::is_an_iget(op)) { DexField* field = resolve_field(insn->get_field(), FieldSearch::Instance); if (field->get_class() == arg) { continue; } } if (insn->has_dest() && insn_tainted[insn->dest()]) { return true; } for (size_t index = 0; index < insn->srcs_size(); ++index) { if (insn_tainted[insn->src(index)]) { return true; } } } } return false; } /** * Creates a DexProto starting from the ifields of the class. * Example: (field1_type, field2_type ...)V; */ DexProto* make_proto_for(DexClass* cls) { const auto& fields = cls->get_ifields(); DexTypeList::ContainerType dfields; for (const DexField* field : fields) { dfields.push_back(field->get_type()); } auto fields_list = DexTypeList::make_type_list(std::move(dfields)); return DexProto::make_proto(type::_void(), fields_list); } /** * Generate load params instructions for a non-static method with * the `field` arguments. * * At the same time, update field to register mapping. */ std::vector<IRInstruction*> generate_load_params( const std::vector<DexField*>& fields, uint32_t& params_reg_start, std::unordered_map<DexField*, uint32_t>& field_to_reg) { std::vector<IRInstruction*> load_params; // Load current instance. IRInstruction* insn = new IRInstruction(IOPCODE_LOAD_PARAM_OBJECT); insn->set_dest(params_reg_start++); load_params.push_back(insn); for (DexField* field : fields) { IROpcode op; if (type::is_wide_type(field->get_type())) { op = IOPCODE_LOAD_PARAM_WIDE; } else { op = type::is_primitive(field->get_type()) ? IOPCODE_LOAD_PARAM : IOPCODE_LOAD_PARAM_OBJECT; } insn = new IRInstruction(op); insn->set_dest(params_reg_start); field_to_reg[field] = params_reg_start; params_reg_start += type::is_wide_type(field->get_type()) ? 2 : 1; load_params.push_back(insn); } return load_params; } /** * Given a method that takes cls as an argument, creates a new method * that takes cls's fields as arguments. */ DexMethod* create_fields_constr(DexMethod* method, DexClass* cls) { auto init = DexString::get_string("<init>"); auto void_fields = make_proto_for(cls); DexMethod* fields_constr = DexMethod::make_method(method->get_class(), init, void_fields) ->make_concrete(ACC_PUBLIC | ACC_CONSTRUCTOR, false); auto code = method->get_code(); uint32_t regs_size = code->get_registers_size(); const auto& fields = cls->get_ifields(); std::unordered_map<DexField*, uint32_t> field_to_reg; std::unique_ptr<IRCode> new_code = std::make_unique<IRCode>(*code); // Non-input registers for the method are all registers except the // 'this' register and the arguments (which in this case is just 1) uint32_t new_regs_size = regs_size - 2; std::vector<IRInstruction*> load_params = generate_load_params(fields, new_regs_size, field_to_reg); new_code->set_registers_size(new_regs_size); std::vector<IRList::iterator> to_delete; std::unordered_map<IRInstruction*, IRInstruction*> to_replace; auto ii = InstructionIterable(*new_code); for (auto it = ii.begin(); it != ii.end(); ++it) { IRInstruction* insn = it->insn; // Delete old parameter loads. if (opcode::is_a_load_param(insn->opcode())) { to_delete.emplace_back(it.unwrap()); continue; } if (opcode::is_an_iget(insn->opcode())) { DexField* field = resolve_field(insn->get_field(), FieldSearch::Instance); if (field->get_class() == cls->get_type()) { // Replace `iget <v_dest>, <v_builder>` with `move <v_dest>, <v_field>` uint32_t current_reg = std::next(it)->insn->dest(); IROpcode move_opcode = get_move_opcode(insn); auto* move = new IRInstruction(move_opcode); move->set_src(0, field_to_reg[field]); move->set_dest(current_reg); to_replace.emplace(insn, move); } } } new_code->insert_after(nullptr, load_params); for (const auto& it : to_replace) { new_code->replace_opcode(it.first, it.second); } for (const auto& it : to_delete) { new_code->erase(it); } fields_constr->set_code(std::move(new_code)); type_class(method->get_class())->add_method(fields_constr); return fields_constr; } DexMethodRef* get_fields_constr_if_exists(DexMethod* method, DexClass* cls) { DexType* type = method->get_class(); auto void_fields = make_proto_for(cls); auto init = DexString::get_string("<init>"); return DexMethod::get_method(type, init, void_fields); } DexMethod* get_fields_constr(DexMethod* method, DexClass* cls) { DexMethodRef* fields_constr = get_fields_constr_if_exists(method, cls); if (!fields_constr || !fields_constr->is_def()) { return create_fields_constr(method, cls); } return static_cast<DexMethod*>(fields_constr); } std::vector<IRList::iterator> get_invokes_for_method(IRCode* code, DexMethod* method) { std::vector<IRList::iterator> fms; auto ii = InstructionIterable(code); for (auto it = ii.begin(); it != ii.end(); ++it) { auto insn = it->insn; if (opcode::is_an_invoke(insn->opcode())) { auto invoked = insn->get_method(); auto def = resolve_method(invoked, MethodSearch::Any); if (def) { invoked = def; } if (invoked == method) { fms.emplace_back(it.unwrap()); } } } return fms; } /** * For the cases where the buildee accepts the builder as the only argument, we * create a new constructor, that will take all the builder's fields as * arguments. */ bool update_buildee_constructor(DexMethod* method, DexClass* builder) { DexType* buildee = get_buildee(builder->get_type()); DexMethodRef* buildee_constr_ref = DexMethod::get_method( buildee, DexString::make_string("<init>"), DexProto::make_proto(type::_void(), DexTypeList::make_type_list({builder->get_type()}))); if (!buildee_constr_ref) { // Nothing to search for. return true; } DexMethod* buildee_constr = buildee_constr_ref->as_def(); if (!buildee_constr) { return true; } // Extra conservative: We expect the constructor to do minimum work. if (params_change_regs(buildee_constr)) { return false; } auto code = method->get_code(); std::vector<IRList::iterator> buildee_constr_calls = get_invokes_for_method(code, buildee_constr); if (!buildee_constr_calls.empty()) { DexMethod* fields_constr = get_fields_constr(buildee_constr, builder); if (!fields_constr) { return false; } for (const auto& it : buildee_constr_calls) { IRInstruction* insn = it->insn; uint32_t builder_reg = insn->src(1); uint32_t regs_size = code->get_registers_size(); uint32_t new_regs_size = regs_size; auto fields = builder->get_ifields(); insn->set_method(fields_constr); // 'Make room' for the reg arguments. insn->set_srcs_size(fields.size() + 1); // Loading each of the fields before passing them to the method. // `invoke-direct {v_class, v_builder}` -> // `iget v_field_1, v_builder // iget v_field_2, v_builder // .... // invoke_direct {v_class, v_field_1, v_field_2, ...}` uint32_t index = 1; for (DexField* field : fields) { auto* new_insn = new IRInstruction(opcode::iget_opcode_for_field(field)); new_insn->set_src(0, builder_reg); new_insn->set_field(field); code->insert_before(it, new_insn); auto* move_result_pseudo = new IRInstruction(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT); move_result_pseudo->set_dest(new_regs_size); code->insert_before(it, move_result_pseudo); insn->set_src(index++, new_regs_size); new_regs_size += type::is_wide_type(field->get_type()) ? 2 : 1; } code->set_registers_size(new_regs_size); } } return true; } } // namespace /////////////////////////////////////////////// void TaintedRegs::meet(const TaintedRegs& that) { m_reg_set |= that.m_reg_set; } bool TaintedRegs::operator==(const TaintedRegs& that) const { return m_reg_set == that.m_reg_set; } bool TaintedRegs::operator!=(const TaintedRegs& that) const { return !(*this == that); } void FieldsRegs::meet(const FieldsRegs& that) { for (const auto& pair : field_to_reg) { if (pair.second == FieldOrRegStatus::DEFAULT) { field_to_reg[pair.first] = that.field_to_reg.at(pair.first); field_to_iput_insns[pair.first] = that.field_to_iput_insns.at(pair.first); } else if (that.field_to_reg.at(pair.first) == FieldOrRegStatus::DEFAULT) { continue; } else { if (pair.second == FieldOrRegStatus::UNDEFINED || that.field_to_reg.at(pair.first) == FieldOrRegStatus::UNDEFINED) { field_to_iput_insns[pair.first].insert(NULL_INSN); } field_to_reg[pair.first] = FieldOrRegStatus::DIFFERENT; field_to_iput_insns[pair.first].insert( that.field_to_iput_insns.at(pair.first).begin(), that.field_to_iput_insns.at(pair.first).end()); } } } bool FieldsRegs::operator==(const FieldsRegs& that) const { return field_to_reg == that.field_to_reg && field_to_iput_insns == that.field_to_iput_insns; } bool FieldsRegs::operator!=(const FieldsRegs& that) const { return !(*this == that); } void transfer_object_reach(DexType* obj, uint32_t regs_size, const IRInstruction* insn, RegSet& regs) { always_assert(obj != nullptr); always_assert(insn != nullptr); auto op = insn->opcode(); if (opcode::is_a_move(op)) { regs[insn->dest()] = regs[insn->src(0)]; } else if (opcode::is_a_move_result(op)) { regs[insn->dest()] = regs[regs_size]; } else if (opcode::writes_result_register(op)) { if (opcode::is_an_invoke(op)) { auto invoked = insn->get_method(); auto def = resolve_method(invoked, MethodSearch::Any); if (def) { invoked = def; } if (invoked->get_proto()->get_rtype() == obj) { regs[regs_size] = 1; return; } } regs[regs_size] = 0; } else if (insn->has_dest() != 0) { regs[insn->dest()] = 0; } } bool tainted_reg_escapes( DexType* ty, DexMethod* method, const std::unordered_map<IRInstruction*, TaintedRegs>& taint_map, bool enable_buildee_constr_change) { always_assert(ty != nullptr); for (auto it : taint_map) { auto insn = it.first; auto tainted = it.second.bits(); auto op = insn->opcode(); if (opcode::is_an_invoke(op)) { auto invoked = resolve_method(insn->get_method(), opcode_to_search(insn)); size_t args_reg_start{0}; if (invoked == nullptr) { TRACE(BUILDERS, 5, "Unable to resolve %s", SHOW(insn)); continue; } if (method::is_init(invoked) || (invoked->get_class() == ty && !opcode::is_invoke_static(op))) { // if a builder is passed as the first arg to a virtual function or a // ctor, we can treat it as non-escaping, since we also check that // those methods don't allow the builder to escape. // // TODO: we should be able to relax the check above to be simply // `!is_static(invoked)`. We don't even need to check that the type // matches -- if the builder is being passed as the first arg reg // to a non-static function, it must be the `this` arg. And if the // non-static function is part of a different class hierarchy, the // builder cannot possibly be passed as the `this` arg. args_reg_start = 1; } for (size_t i = args_reg_start; i < insn->srcs_size(); ++i) { if (tainted[insn->src(i)]) { if (enable_buildee_constr_change) { // Don't consider builders that get passed to the buildee's // constructor. `update_buildee_constructor` will sort this // out later. if (method::is_init(invoked) && invoked->get_class() == get_buildee(ty) && has_only_argument(invoked, ty)) { // If the 'fields constructor' already exist, don't continue. if (get_fields_constr_if_exists(invoked, type_class(ty)) == nullptr) { continue; } } } TRACE(BUILDERS, 5, "Escaping instruction: %s", SHOW(insn)); return true; } } } else if (op == OPCODE_SPUT_OBJECT || op == OPCODE_IPUT_OBJECT || op == OPCODE_APUT_OBJECT || op == OPCODE_RETURN_OBJECT) { if (tainted[insn->src(0)]) { if (op == OPCODE_RETURN_OBJECT && method->get_class() == ty) { continue; } TRACE(BUILDERS, 5, "Escaping instruction: %s", SHOW(insn)); return true; } } else if (opcode::is_a_conditional_branch(op) || opcode::is_a_monitor(op)) { if (tainted[insn->src(0)]) { // TODO(emmasevastian): Treat this case separate. return true; } } else if (opcode::is_check_cast(op)) { if (tainted[insn->src(0)]) { TRACE(BUILDERS, 5, "Not supported: %s", SHOW(insn)); return true; } } } return false; } /** * Keep track, per instruction, what register(s) holds * an instance of the `type`. */ std::unique_ptr<std::unordered_map<IRInstruction*, TaintedRegs>> get_tainted_regs(uint32_t regs_size, const std::vector<cfg::Block*>& blocks, DexType* type) { std::function<void(IRList::iterator, TaintedRegs*)> trans = [&](const IRList::iterator& it, TaintedRegs* tregs) { auto insn = it->insn; auto& regs = tregs->m_reg_set; auto op = insn->opcode(); if (opcode::is_a_move_result_pseudo(op) && std::prev(it)->insn->opcode() == OPCODE_NEW_INSTANCE) { DexType* cls = std::prev(it)->insn->get_type(); auto dest = it->insn->dest(); if (cls == type) { regs[dest] = 1; } else { regs[dest] = 0; } } else { transfer_object_reach(type, regs_size, insn, tregs->m_reg_set); } }; // The extra register is used to keep track of the return values. return forwards_dataflow(blocks, TaintedRegs(regs_size + 1), trans); } ////////////////////////////////////////////// bool has_builder_name(DexType* type) { always_assert(type != nullptr); static boost::regex re{"\\$Builder;$"}; const auto& deobfuscated_name = type_class(type)->get_deobfuscated_name_or_empty(); if (!deobfuscated_name.empty()) { return boost::regex_search(deobfuscated_name.c_str(), re); } return boost::regex_search(type->c_str(), re); } DexType* get_buildee(DexType* builder) { always_assert(builder != nullptr); const auto& deobfuscated_name = type_class(builder)->get_deobfuscated_name_or_empty(); const auto& builder_name = !deobfuscated_name.empty() ? deobfuscated_name : builder->str(); auto buildee_name = builder_name.substr(0, builder_name.size() - 9) + ";"; return DexType::get_type(buildee_name.c_str()); } std::unordered_set<DexMethod*> get_all_methods(IRCode* code, DexType* type) { always_assert(code != nullptr); always_assert(type != nullptr); std::unordered_set<DexMethod*> methods; for (auto const& mie : InstructionIterable(code)) { auto insn = mie.insn; if (opcode::is_an_invoke(insn->opcode())) { auto invoked = resolve_method(insn->get_method(), opcode_to_search(insn)); if (invoked != nullptr && invoked->get_class() == type) { methods.insert(invoked); } } } return methods; } std::unordered_set<DexMethod*> get_non_init_methods(IRCode* code, DexType* type) { std::unordered_set<DexMethod*> methods = get_all_methods(code, type); for (auto it = methods.begin(); it != methods.end();) { if (method::is_init(*it)) { it = methods.erase(it); } else { it++; } } return methods; } bool BuilderTransform::inline_methods( DexMethod* method, DexType* type, const std::function<std::unordered_set<DexMethod*>(IRCode*, DexType*)>& get_methods_to_inline) { always_assert(method != nullptr); always_assert(type != nullptr); auto code = method->get_code(); if (!code) { return false; } std::unordered_set<DexMethod*> previous_to_inline; std::unordered_set<DexMethod*> to_inline = get_methods_to_inline(code, type); while (!to_inline.empty()) { for (const auto& inlinable : to_inline) { if (!inlinable->get_code()) { TRACE(BUILDERS, 2, "Trying to inline abstract / native etc method: %s in %s", SHOW(inlinable), SHOW(method)); return false; } } m_inliner->inline_callees(method, to_inline); // Check all possible methods were inlined. previous_to_inline = to_inline; to_inline = get_methods_to_inline(code, type); // Return false if nothing changed / nothing got inlined though // there were methods to inline. if (previous_to_inline == to_inline) { return false; } } return true; } bool remove_builder_from(DexMethod* method, DexClass* builder, BuilderTransform& b_transform, DexType* super_class_holder) { DexType* buildee = get_buildee(builder->get_type()); always_assert(buildee != nullptr); DexType* super_class = super_class_holder != nullptr ? super_class_holder : builder->get_super_class(); // TODO(emmasevastian): extend it. static DexType* object_type = type::java_lang_Object(); if (super_class != object_type) { return false; } bool tried_constructor_inlining = false; while (!get_non_trivial_init_methods(method->get_code(), builder->get_type()) .empty()) { tried_constructor_inlining = true; // Filter out builders for which the method contains super class invokes. if (has_super_class_initializations(method, super_class)) { return false; } if (!b_transform.inline_methods( method, builder->get_type(), &get_non_trivial_init_methods) || !b_transform.inline_methods( method, builder->get_type(), &get_non_init_methods)) { return false; } } if (!update_buildee_constructor(method, builder)) { return false; } if (!remove_builder(method, builder)) { return false; } // Cleanup after constructor inlining. if (tried_constructor_inlining) { remove_super_class_calls(method, super_class); } return true; }