libredex/ControlFlow.cpp

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "ControlFlow.h" #include <boost/dynamic_bitset.hpp> #include <boost/numeric/conversion/cast.hpp> #include <iterator> #include <stack> #include <utility> #include "CppUtil.h" #include "DexInstruction.h" #include "DexPosition.h" #include "DexUtil.h" #include "GraphUtil.h" #include "IRList.h" #include "RedexContext.h" #include "Show.h" #include "SourceBlocks.h" #include "Trace.h" #include "Transform.h" namespace { // return true if `it` should be the last instruction of this block bool end_of_block(const IRList* ir, const IRList::iterator& it, bool in_try) { auto next = std::next(it); if (next == ir->end()) { return true; } // End the block before the first target in a contiguous sequence of targets. if (next->type == MFLOW_TARGET && it->type != MFLOW_TARGET) { return true; } // End the block before the first catch marker in a contiguous sequence of // catch markers. if (next->type == MFLOW_CATCH && it->type != MFLOW_CATCH) { return true; } // End the block before a TRY_START // and after a TRY_END if ((next->type == MFLOW_TRY && next->tentry->type == TRY_START) || (it->type == MFLOW_TRY && it->tentry->type == TRY_END)) { return true; } if (in_try && it->type == MFLOW_OPCODE && opcode::may_throw(it->insn->opcode())) { return true; } if (it->type != MFLOW_OPCODE) { return false; } if (opcode::is_branch(it->insn->opcode()) || opcode::is_a_return(it->insn->opcode()) || it->insn->opcode() == OPCODE_THROW) { return true; } return false; } bool ends_with_may_throw(cfg::Block* p) { for (auto last = p->rbegin(); last != p->rend(); ++last) { if (last->type != MFLOW_OPCODE) { continue; } return opcode::can_throw(last->insn->opcode()); } return false; } /* * Given an method-item-entry ordering, delete positions that are... * - duplicates with the previous position, even across block boundaries * (they will get reconstituted when the cfg is rebuild) * - adjacent to an immediately following position, as the last position wins. * Parent positions are kept as needed. */ void remove_redundant_positions(IRList* ir) { // We build a set of duplicate positions. std::unordered_set<DexPosition*> duplicate_positions; std::unordered_map<DexPosition*, IRList::iterator> positions_to_remove; DexPosition* prev = nullptr; for (auto it = ir->begin(); it != ir->end(); it++) { if (it->type == MFLOW_POSITION) { DexPosition* curr = it->pos.get(); positions_to_remove.emplace(curr, it); if (prev != nullptr && *curr == *prev) { duplicate_positions.insert(curr); } prev = curr; } } // Backward pass to find positions that are not adjacent to an immediately // following position and must be kept (including their parents). bool keep_prev = false; for (auto it = ir->rbegin(); it != ir->rend(); it++) { switch (it->type) { case MFLOW_OPCODE: case MFLOW_DEX_OPCODE: case MFLOW_TARGET: case MFLOW_TRY: case MFLOW_CATCH: keep_prev = true; break; case MFLOW_POSITION: { DexPosition* curr = it->pos.get(); if (keep_prev && !duplicate_positions.count(curr)) { for (auto pos = curr; pos && positions_to_remove.erase(pos); pos = pos->parent) { } keep_prev = false; } break; } case MFLOW_SOURCE_BLOCK: case MFLOW_DEBUG: case MFLOW_FALLTHROUGH: // ignore break; } } // Final pass to do the actual deletion. for (auto& p : positions_to_remove) { ir->erase_and_dispose(p.second); } } // Follow the catch entry linked list starting at `first_mie` and check if the // throw edges (pointed to by `it`) are equivalent to the linked list. The throw // edges should be sorted by their indices. // // This function is useful in avoiding generating multiple identical catch // entries. // // Used while turning back into a linear representation. bool catch_entries_equivalent_to_throw_edges( cfg::ControlFlowGraph* cfg, MethodItemEntry* first_mie, std::vector<cfg::Edge*>::iterator it, std::vector<cfg::Edge*>::iterator end, const std::unordered_map<MethodItemEntry*, cfg::Block*>& catch_to_containing_block) { for (auto mie = first_mie; mie != nullptr; mie = mie->centry->next) { always_assert(mie->type == MFLOW_CATCH); if (it == end) { return false; } auto edge = *it; if (mie->centry->catch_type != edge->throw_info()->catch_type) { return false; } const auto& search = catch_to_containing_block.find(mie); always_assert_log(search != catch_to_containing_block.end(), "%s not found in %s", SHOW(*mie), SHOW(*cfg)); if (search->second != edge->target()) { return false; } ++it; } return it == end; } } // namespace namespace cfg { namespace details { std::string show_cfg(const ControlFlowGraph& cfg) { return show(cfg); } std::string show_insn(const IRInstruction* insn) { return show(insn); } } // namespace details void Block::free() { for (auto& mie : *this) { switch (mie.type) { case MFLOW_OPCODE: delete mie.insn; mie.insn = nullptr; break; case MFLOW_DEX_OPCODE: delete mie.dex_insn; mie.dex_insn = nullptr; break; default: break; } } } void Block::cleanup_debug(std::unordered_set<reg_t>& valid_regs) { this->m_entries.cleanup_debug(valid_regs); } IRList::iterator Block::begin() { if (m_parent->editable()) { return m_entries.begin(); } else { return m_begin; } } IRList::iterator Block::end() { if (m_parent->editable()) { return m_entries.end(); } else { return m_end; } } IRList::const_iterator Block::begin() const { if (m_parent->editable()) { return m_entries.begin(); } else { return m_begin; } } IRList::const_iterator Block::end() const { if (m_parent->editable()) { return m_entries.end(); } else { return m_end; } } bool Block::is_catch() const { return m_parent->get_pred_edge_of_type(this, EDGE_THROW) != nullptr; } bool Block::same_try(const Block* other) const { always_assert(other->m_parent == this->m_parent); return m_parent->blocks_are_in_same_try(this, other); } void Block::remove_insn(const InstructionIterator& it) { always_assert(m_parent->editable()); m_parent->remove_insn(it); } void Block::remove_insn(const ir_list::InstructionIterator& it) { always_assert(m_parent->editable()); remove_insn(to_cfg_instruction_iterator(it)); } void Block::remove_insn(const IRList::iterator& it) { always_assert(m_parent->editable()); remove_insn(to_cfg_instruction_iterator(it)); } void Block::remove_mie(const IRList::iterator& it) { if (it->type == MFLOW_OPCODE) { m_parent->m_removed_insns.push_back(it->insn); } m_entries.erase_and_dispose(it); } opcode::Branchingness Block::branchingness() { // TODO (cnli): put back 'always_assert(m_parent->editable());' // once ModelMethodMerger::sink_common_ctor_to_return_block update // to editable CFG. const auto& last = get_last_insn(); if (succs().empty() || (succs().size() == 1 && m_parent->get_succ_edge_of_type(this, EDGE_GHOST) != nullptr)) { if (last != end()) { auto op = last->insn->opcode(); if (opcode::is_a_return(op)) { return opcode::BRANCH_RETURN; } else if (op == OPCODE_THROW) { return opcode::BRANCH_THROW; } } return opcode::BRANCH_NONE; } if (m_parent->get_succ_edge_of_type(this, EDGE_THROW) != nullptr) { return opcode::BRANCH_THROW; } if (m_parent->get_succ_edge_of_type(this, EDGE_BRANCH) != nullptr) { always_assert(last != end()); auto br = opcode::branchingness(last->insn->opcode()); always_assert(br == opcode::BRANCH_IF || br == opcode::BRANCH_SWITCH); return br; } if (m_parent->get_succ_edge_of_type(this, EDGE_GOTO) != nullptr) { return opcode::BRANCH_GOTO; } return opcode::BRANCH_NONE; } uint32_t Block::num_opcodes() const { always_assert(m_parent->editable()); return m_entries.count_opcodes(); } uint32_t Block::sum_opcode_sizes() const { always_assert(m_parent->editable()); return m_entries.sum_opcode_sizes(); } // shallowly copy pointers (edges and parent cfg) // but deeply copy MethodItemEntries Block::Block(const Block& b, MethodItemEntryCloner* cloner) : m_id(b.m_id), m_preds(b.m_preds), m_succs(b.m_succs), m_parent(b.m_parent) { // only for editable, don't worry about m_begin and m_end always_assert(m_parent->editable()); for (const auto& mie : b.m_entries) { m_entries.push_back(*cloner->clone(&mie)); } } bool Block::has_pred(Block* b, EdgeType t) const { const auto& edges = preds(); return std::find_if(edges.begin(), edges.end(), [b, t](const Edge* edge) { return edge->src() == b && (t == EDGE_TYPE_SIZE || edge->type() == t); }) != edges.end(); } bool Block::has_succ(Block* b, EdgeType t) const { const auto& edges = succs(); return std::find_if(edges.begin(), edges.end(), [b, t](const Edge* edge) { return edge->target() == b && (t == EDGE_TYPE_SIZE || edge->type() == t); }) != edges.end(); } IRList::iterator Block::get_conditional_branch() { for (auto it = rbegin(); it != rend(); ++it) { if (it->type == MFLOW_OPCODE) { auto op = it->insn->opcode(); if (opcode::is_a_conditional_branch(op) || opcode::is_switch(op)) { return std::prev(it.base()); } } } return end(); } IRList::iterator Block::get_last_insn() { for (auto it = rbegin(); it != rend(); ++it) { if (it->type == MFLOW_OPCODE) { // Reverse iterators have a member base() which returns a corresponding // forward iterator. Beware that this isn't an iterator that refers to the // same object - it actually refers to the next object in the sequence. // This is so that rbegin() corresponds with end() and rend() corresponds // with begin(). Copied from https://stackoverflow.com/a/2037917 return std::prev(it.base()); } } return end(); } IRList::iterator Block::get_first_insn() { for (auto it = begin(); it != end(); ++it) { if (it->type == MFLOW_OPCODE) { return it; } } return end(); } IRList::iterator Block::get_first_non_param_loading_insn() { for (auto it = begin(); it != end(); ++it) { if (it->type != MFLOW_OPCODE) { continue; } if (!opcode::is_a_load_param(it->insn->opcode())) { return it; } } return end(); } IRList::iterator Block::get_last_param_loading_insn() { IRList::iterator res = end(); for (auto it = begin(); it != end(); ++it) { if (it->type != MFLOW_OPCODE) { continue; } if (opcode::is_a_load_param(it->insn->opcode())) { res = it; } else { // There won't be another one. break; } } return res; } IRList::iterator Block::get_first_insn_before_position() { for (auto it = begin(); it != end(); ++it) { if (it->type == MFLOW_OPCODE) { auto op = it->insn->opcode(); if (!opcode::is_move_result_any(op) && !opcode::is_goto(op)) { return it; } } else if (it->type == MFLOW_POSITION) { return end(); } } return end(); } bool Block::starts_with_move_result() { auto first_it = get_first_insn(); if (first_it != end()) { auto first_op = first_it->insn->opcode(); if (opcode::is_move_result_any(first_op)) { return true; } } return false; } bool Block::starts_with_move_exception() { auto first_it = get_first_insn(); if (first_it != end()) { auto first_op = first_it->insn->opcode(); if (opcode::is_move_exception(first_op)) { return true; } } return false; } bool Block::contains_opcode(IROpcode opcode) { for (auto it = begin(); it != end(); ++it) { if (it->type != MFLOW_OPCODE) { continue; } if (it->insn->opcode() == opcode) { return true; } } return false; } bool Block::begins_with(Block* other) { IRList::iterator self_it = this->begin(); IRList::iterator other_it = other->begin(); while (self_it != this->end() && other_it != other->end()) { if (*self_it != *other_it) { return false; } self_it++; other_it++; } return other_it == other->end(); } Block* Block::goes_to() const { const Edge* e = m_parent->get_succ_edge_of_type(this, EDGE_GOTO); if (e != nullptr) { return e->target(); } return nullptr; } Block* Block::goes_to_only_edge() const { const auto& s = succs(); if (s.size() == 1) { const auto& e = s[0]; if (e->type() == EDGE_GOTO) { return e->target(); } } return nullptr; } bool Block::cannot_throw() const { for (auto& mie : ir_list::ConstInstructionIterable(this)) { if (opcode::can_throw(mie.insn->opcode())) { return false; } } return true; } std::vector<Edge*> Block::get_outgoing_throws_in_order() const { std::vector<Edge*> result = m_parent->get_succ_edges_of_type(this, EDGE_THROW); std::sort(result.begin(), result.end(), [](const Edge* e1, const Edge* e2) { return e1->throw_info()->index < e2->throw_info()->index; }); return result; } // These assume that the iterator is inside this block cfg::InstructionIterator Block::to_cfg_instruction_iterator( const ir_list::InstructionIterator& list_it, bool next_on_end) { if (ControlFlowGraph::DEBUG && list_it.unwrap() != end()) { bool inside = false; auto needle = list_it.unwrap(); for (auto it = begin(); it != end(); ++it) { if (it == needle) { inside = true; } } always_assert(inside); } auto it = cfg::InstructionIterator(*m_parent, this, list_it); if (next_on_end && list_it.unwrap() == end()) { ++it; } return it; } cfg::InstructionIterator Block::to_cfg_instruction_iterator( const IRList::iterator& list_it, bool next_on_end) { always_assert(list_it == this->end() || list_it->type == MFLOW_OPCODE); return to_cfg_instruction_iterator( ir_list::InstructionIterator(list_it, this->end()), next_on_end); } cfg::InstructionIterator Block::to_cfg_instruction_iterator( MethodItemEntry& mie) { always_assert(m_parent->editable()); return to_cfg_instruction_iterator(m_entries.iterator_to(mie)); } // Forward the insertion methods to the parent CFG. bool Block::insert_before(const InstructionIterator& position, const std::vector<IRInstruction*>& insns) { always_assert(position.block() == this); return m_parent->insert_before(position, insns); } bool Block::insert_before(const InstructionIterator& position, IRInstruction* insn) { always_assert(position.block() == this); return m_parent->insert_before(position, insn); } bool Block::insert_after(const InstructionIterator& position, const std::vector<IRInstruction*>& insns) { always_assert(position.block() == this); return m_parent->insert_after(position, insns); } bool Block::insert_after(const InstructionIterator& position, IRInstruction* insn) { always_assert(position.block() == this); return m_parent->insert_after(position, insn); } bool Block::push_front(const std::vector<IRInstruction*>& insns) { return m_parent->push_front(this, insns); } bool Block::push_front(IRInstruction* insn) { return m_parent->push_front(this, insn); } bool Block::push_back(const std::vector<IRInstruction*>& insns) { return m_parent->push_back(this, insns); } bool Block::push_back(IRInstruction* insn) { return m_parent->push_back(this, insn); } bool Block::structural_equals(const Block* other) const { auto iterable1 = ir_list::ConstInstructionIterable(this); auto iterable2 = ir_list::ConstInstructionIterable(other); auto it1 = iterable1.begin(); auto it2 = iterable2.begin(); for (; it1 != iterable1.end() && it2 != iterable2.end(); ++it1, ++it2) { auto& mie1 = *it1; auto& mie2 = *it2; if (*mie1.insn != *mie2.insn) { return false; } } return it1 == iterable1.end() && it2 == iterable2.end(); } std::ostream& operator<<(std::ostream& os, const Edge& e) { switch (e.type()) { case EDGE_GOTO: return os << "goto"; case EDGE_BRANCH: { os << "branch"; const auto& key = e.case_key(); if (key) { os << " " << *key; } return os; } case EDGE_THROW: return os << "throw"; case EDGE_GHOST: return os << "ghost"; case EDGE_TYPE_SIZE: break; } not_reached(); } bool ControlFlowGraph::DEBUG = false; ControlFlowGraph::ControlFlowGraph(IRList* ir, reg_t registers_size, bool editable) : m_orig_list(editable ? nullptr : ir), m_registers_size(registers_size), m_editable(editable) { always_assert_log(!ir->empty(), "IRList contains no instructions"); BranchToTargets branch_to_targets; TryEnds try_ends; TryCatches try_catches; find_block_boundaries(ir, branch_to_targets, try_ends, try_catches); connect_blocks(branch_to_targets); add_catch_edges(try_ends, try_catches); if (m_editable) { remove_try_catch_markers(); // Often, the `registers_size` parameter passed into this constructor is // incorrect. We recompute here to safeguard against this. // TODO: fix the optimizations that don't track registers size correctly. recompute_registers_size(); TRACE_NO_LINE(CFG, 5, "before simplify:\n%s", SHOW(*this)); simplify(); TRACE_NO_LINE(CFG, 5, "after simplify:\n%s", SHOW(*this)); } else { remove_unreachable_succ_edges(); } TRACE_NO_LINE(CFG, 5, "editable %d, %s", m_editable, SHOW(*this)); } void ControlFlowGraph::find_block_boundaries(IRList* ir, BranchToTargets& branch_to_targets, TryEnds& try_ends, TryCatches& try_catches) { // create the entry block auto* block = create_block(); IRList::iterator block_begin; if (m_editable) { block_begin = ir->begin(); } else { block->m_begin = ir->begin(); } set_entry_block(block); bool in_try = false; IRList::iterator next; DexPosition* current_position = nullptr; DexPosition* last_pos_before_this_block = nullptr; for (auto it = ir->begin(); it != ir->end(); it = next) { next = std::next(it); if (it->type == MFLOW_TRY) { if (it->tentry->type == TRY_START) { // Assumption: TRY_STARTs are only at the beginning of blocks always_assert(!m_editable || it == block_begin); always_assert(m_editable || it == block->m_begin); in_try = true; } else if (it->tentry->type == TRY_END) { try_ends.emplace_back(it->tentry, block); in_try = false; } } else if (it->type == MFLOW_CATCH) { try_catches[it->centry] = block; } else if (it->type == MFLOW_TARGET) { branch_to_targets[it->target->src].emplace_back(block, &*it); } else if (it->type == MFLOW_POSITION) { current_position = it->pos.get(); } if (!end_of_block(ir, it, in_try)) { continue; } // End the current block. if (m_editable) { // Steal the code from the ir and put it into the block. // This is safe to do while iterating in ir because iterators in ir now // point to elements of block->m_entries (and we already computed next). block->m_entries.splice_selection(block->m_entries.end(), *ir, block_begin, next); if (last_pos_before_this_block != nullptr) { auto first_insn = block->get_first_insn_before_position(); if (first_insn != block->end()) { // DexPositions apply to every instruction in the linear stream until // the next DexPosition. Because we're breaking up the linear stream // into many small blocks, we need to make sure that instructions stay // associated with the same DexPosition as they were in the input // IRList. // // This creates duplicate positions, but we will remove any extras at // linearize time. block->m_entries.insert_before( first_insn, std::make_unique<DexPosition>(*last_pos_before_this_block)); } } } else { block->m_end = next; } if (next == ir->end()) { break; } // Start a new block at the next MethodItem. block = create_block(); if (m_editable) { last_pos_before_this_block = current_position; block_begin = next; } else { block->m_begin = next; } } TRACE(CFG, 5, " build: boundaries found"); } // Link the blocks together with edges. If the CFG is editable, also insert // fallthrough goto instructions and delete MFLOW_TARGETs. void ControlFlowGraph::connect_blocks(BranchToTargets& branch_to_targets) { for (auto it = m_blocks.begin(); it != m_blocks.end(); ++it) { // Set outgoing edge if last MIE falls through Block* b = it->second; auto& last_mie = *b->rbegin(); bool fallthrough = true; if (last_mie.type == MFLOW_OPCODE) { auto last_op = last_mie.insn->opcode(); if (opcode::is_branch(last_op)) { fallthrough = !opcode::is_goto(last_op); auto const& target_blocks = branch_to_targets[&last_mie]; for (auto& p : target_blocks) { auto target_block = p.first; auto& target_mie = *p.second; always_assert(target_mie.type == MFLOW_TARGET); always_assert(target_mie.target->src == &last_mie); Edge::MaybeCaseKey case_key; if (target_mie.target->type == BRANCH_MULTI) { always_assert_log(opcode::is_switch(last_mie.insn->opcode()), "block %zu in %s\n", target_block->id(), SHOW(*this)); case_key = target_mie.target->case_key; } else { always_assert(target_mie.target->type == BRANCH_SIMPLE); } if (m_editable) { // The the branch information is stored in the edges, we don't need // the targets inside the blocks anymore target_block->m_entries.erase_and_dispose( target_block->m_entries.iterator_to(target_mie)); } if (case_key) { add_edge(b, target_block, *case_key); continue; } auto edge_type = opcode::is_goto(last_op) ? EDGE_GOTO : EDGE_BRANCH; add_edge(b, target_block, edge_type); } if (m_editable && opcode::is_goto(last_op)) { // We don't need the gotos in editable mode because the edges // fully encode that information delete last_mie.insn; b->m_entries.erase_and_dispose(b->m_entries.iterator_to(last_mie)); } } else if (opcode::is_a_return(last_op) || last_op == OPCODE_THROW) { fallthrough = false; } } auto next = std::next(it); if (fallthrough && next != m_blocks.end()) { Block* next_b = next->second; TRACE(CFG, 6, "adding fallthrough goto %zu -> %zu", b->id(), next_b->id()); add_edge(b, next_b, EDGE_GOTO); } } TRACE(CFG, 5, " build: edges added"); } void ControlFlowGraph::add_catch_edges(TryEnds& try_ends, TryCatches& try_catches) { /* * Every block inside a try-start/try-end region * gets an edge to every catch block. This simplifies dataflow analysis * since you can always get the exception state by looking at successors, * without any additional analysis. * * NB: This algorithm assumes that a try-start/try-end region will consist of * sequentially-numbered blocks, which is guaranteed because catch regions * are contiguous in the bytecode, and we generate blocks in bytecode order. */ for (auto tep : try_ends) { auto try_end = tep.first; auto tryendblock = tep.second; size_t bid = tryendblock->id(); while (true) { Block* block = m_blocks.at(bid); if (ends_with_may_throw(block)) { uint32_t i = 0; for (auto mie = try_end->catch_start; mie != nullptr; mie = mie->centry->next) { auto catchblock = try_catches.at(mie->centry); // Create a throw edge with the information from this catch entry add_edge(block, catchblock, mie->centry->catch_type, i); ++i; } } auto block_begin = block->begin(); if (block_begin != block->end() && block_begin->type == MFLOW_TRY) { auto tentry = block_begin->tentry; if (tentry->type == TRY_START) { always_assert_log(tentry->catch_start == try_end->catch_start, "%s", SHOW(*this)); break; } } always_assert_log(bid > 0, "No beginning of try region found"); --bid; } } TRACE(CFG, 5, " build: catch edges added"); } BlockId ControlFlowGraph::next_block_id() const { // Choose the next largest id. Note that we can't use m_block.size() because // we may have deleted some blocks from the cfg. const auto& rbegin = m_blocks.rbegin(); return (rbegin == m_blocks.rend()) ? 0 : (rbegin->first + 1); } void ControlFlowGraph::remove_unreachable_succ_edges() { // Remove edges between unreachable blocks and their succ blocks. if (m_blocks.empty()) { return; } const auto& visited = visit(); if (visited.all()) { // All blocks are visited. No blocks need to have their succ edges removed. return; } for (auto it = m_blocks.begin(); it != m_blocks.end(); ++it) { Block* b = it->second; if (visited.test(b->id())) { continue; } TRACE(CFG, 5, " build: removing succ edges from block %zu", b->id()); delete_succ_edges(b); } TRACE(CFG, 5, " build: unreachables removed"); } /* * Traverse the graph, starting from the entry node. Return a bitset with IDs of * reachable blocks having 1 and IDs of unreachable blocks (or unused IDs) * having 0. */ boost::dynamic_bitset<> ControlFlowGraph::visit() const { std::stack<const cfg::Block*> to_visit; boost::dynamic_bitset<> visited{next_block_id()}; to_visit.push(entry_block()); while (!to_visit.empty()) { const cfg::Block* b = to_visit.top(); to_visit.pop(); if (visited.test_set(b->id())) { continue; } for (Edge* e : b->succs()) { to_visit.push(e->target()); } } return visited; } namespace { void chain_consecutive_source_blocks(IRList& list) { boost::optional<IRList::iterator> last_it = boost::none; for (auto it = list.begin(); it != list.end(); ++it) { if (it->type == MFLOW_POSITION || it->type == MFLOW_DEBUG) { // We can move over debug info. Otherwise, reset. continue; } if (it->type != MFLOW_SOURCE_BLOCK) { last_it = boost::none; continue; } if (last_it) { auto prev = std::prev(it); (*last_it)->src_block->append(std::move(it->src_block)); list.erase_and_dispose(it); it = prev; } else { last_it = it; } } } } // namespace uint32_t ControlFlowGraph::simplify() { auto [num_insns_removed, registers_size_possibly_reduced] = remove_unreachable_blocks(); if (registers_size_possibly_reduced) { recompute_registers_size(); } // FIXME: "Empty" blocks with only `DexPosition`s should be merged // into their successors for consistency. Otherwise // `remove_empty_blocks` will remove them, which it will not // if they are at the head of a non-empty block. remove_empty_blocks(); for (auto& p : m_blocks) { chain_consecutive_source_blocks(p.second->m_entries); } return num_insns_removed; } // remove blocks with no predecessors std::pair<uint32_t, bool> ControlFlowGraph::remove_unreachable_blocks() { uint32_t num_insns_removed = 0; remove_unreachable_succ_edges(); std::vector<std::unique_ptr<DexPosition>> dangling; bool registers_size_possibly_reduced = false; for (auto it = m_blocks.begin(); it != m_blocks.end();) { Block* b = it->second; const auto& preds = b->preds(); if (preds.empty() && b != entry_block()) { if (b == exit_block()) { set_exit_block(nullptr); } for (auto& mie : *b) { if (mie.type == MFLOW_POSITION) { dangling.push_back(std::move(mie.pos)); } else if (mie.type == MFLOW_OPCODE) { auto insn = mie.insn; if (insn->has_dest()) { // +1 because registers start at zero auto size_required = insn->dest() + insn->dest_is_wide() + 1; if (size_required >= m_registers_size) { // We're deleting an instruction that may have been the max // register of the entire function. registers_size_possibly_reduced = true; } } } } num_insns_removed += b->num_opcodes(); always_assert(b->succs().empty()); always_assert(b->preds().empty()); // Deletion of a block deletes MIEs, but MIEs do not delete instructions. // Gotta do this manually for now. b->free(); delete b; it = m_blocks.erase(it); } else { ++it; } } fix_dangling_parents(std::move(dangling)); return std::make_pair(num_insns_removed, registers_size_possibly_reduced); } void ControlFlowGraph::fix_dangling_parents( std::vector<std::unique_ptr<DexPosition>> dangling) { if (dangling.empty()) { return; } // We move all dangling positions into a map that allows us to quickly find a // position by its pointer value while maintaining ownership of the position // in the associated unique_ptr. We'll use this map later try to find parent // positions. std::unordered_map<DexPosition*, std::unique_ptr<DexPosition>> map; for (auto& pos : dangling) { auto pos_ptr = pos.get(); map.emplace(pos_ptr, std::move(pos)); } // Helper function to insert parent positions, as // needed. std::function<void(cfg::Block*, const IRList::iterator&, DexPosition*)> materialize; materialize = [&](cfg::Block* block, const IRList::iterator& it, DexPosition* pos) { if (!pos) { return; } auto it2 = map.find(pos); if (it2 == map.end()) { return; } materialize(block, it, pos->parent); insert_before(block, it, std::move(it2->second)); map.erase(it2); }; // Search for dangling parent pointers and fix them for (cfg::Block* block : blocks()) { for (auto it = block->begin(); it != block->end(); it++) { if (it->type != MFLOW_POSITION) { continue; } materialize(block, it, it->pos->parent); } } // Note that the memory of those mapped position that didn't get used up will // get released at this point } void ControlFlowGraph::remove_empty_blocks() { always_assert(editable()); std::vector<std::unique_ptr<DexPosition>> dangling; for (auto it = m_blocks.begin(); it != m_blocks.end();) { Block* b = it->second; if (b->get_first_insn() != b->end() || b == exit_block()) { ++it; continue; } const auto& succs = get_succ_edges_if( b, [](const Edge* e) { return e->type() != EDGE_GHOST; }); if (!succs.empty()) { always_assert_log(succs.size() == 1, "too many successors for empty block %zu:\n%s", it->first, SHOW(*this)); const auto& succ_edge = succs[0]; Block* succ = succ_edge->target(); if (b == succ) { // `b` follows itself: an infinite loop ++it; continue; } // Does it have source blocks, and the successor does have multiple // predecessors? bool move_source_blocks{false}; if (source_blocks::has_source_blocks(b)) { // The entry block has a virtual in-edge, don't merge on a single // back-edge. if (succ->preds().size() == 1 && succ != m_entry_block) { // Good case: just move the source blocks forward. move_source_blocks = true; } else if (g_redex->instrument_mode) { // If we are instrumenting, it is necessary to keep the block for its // source-blocks. ++it; continue; } } // b is empty and removable. Reorganize the edges so we can remove it // Remove the one goto edge from b to succ delete_edges_between(b, succ); // If b was a predecessor of the exit block (for example, part of an // infinite loop) we need to transfer that info to `succ` because `b` will // be made unreachable and deleted by simplify auto ghost = get_succ_edge_of_type(b, EDGE_GHOST); if (ghost != nullptr) { set_edge_source(ghost, succ); } // Redirect from b's predecessors to b's successor (skipping b). We // can't move edges around while we iterate through the edge list // though. std::vector<Edge*> need_redirect(b->m_preds.begin(), b->m_preds.end()); for (Edge* pred_edge : need_redirect) { set_edge_target(pred_edge, succ); } if (b == entry_block()) { m_entry_block = succ; } // Move positions if succ doesn't have any auto first_it = succ->get_first_insn_before_position(); if (first_it != succ->end()) { always_assert( !opcode::is_a_move_result_pseudo(first_it->insn->opcode())); for (auto& mie : *b) { if (mie.type == MFLOW_POSITION) { succ->m_entries.insert_before(first_it, std::move(mie.pos)); } } } // Move all source blocks. // Note: the order of source blocks does not really matter. if (move_source_blocks) { bool first = true; for (auto& mie : *b) { if (mie.type == MFLOW_SOURCE_BLOCK) { if (first) { succ->m_entries.insert_before(succ->begin(), std::move(mie.src_block)); } else { succ->m_entries.insert_after(succ->begin(), std::move(mie.src_block)); } first = false; } } } } if (b == m_entry_block) { // Don't delete the entry block. If it was empty and had a successor, // we'd have replaced it just above. ++it; continue; } for (auto& mie : *b) { if (mie.type == MFLOW_POSITION) { dangling.push_back(std::move(mie.pos)); } } b->free(); delete b; it = m_blocks.erase(it); } fix_dangling_parents(std::move(dangling)); } void ControlFlowGraph::no_unreferenced_edges() const { EdgeSet referenced; for (const auto& entry : m_blocks) { Block* b = entry.second; for (Edge* e : b->preds()) { referenced.insert(e); } for (Edge* e : b->succs()) { referenced.insert(e); } } always_assert(referenced == m_edges); } // Verify that // * MFLOW_TARGETs are gone // * OPCODE_GOTOs are gone // * Correct number of outgoing edges void ControlFlowGraph::sanity_check() const { if (m_editable) { for (const auto& entry : m_blocks) { Block* b = entry.second; if (DEBUG) { // No targets or gotos for (const auto& mie : *b) { always_assert_log(mie.type != MFLOW_TARGET, "failed to remove all targets. block %zu in\n%s", b->id(), SHOW(*this)); if (mie.type == MFLOW_OPCODE) { always_assert_log(!opcode::is_goto(mie.insn->opcode()), "failed to remove all gotos. block %zu in\n%s", b->id(), SHOW(*this)); } } } // Last instruction matches outgoing edges uint32_t num_goto_succs = 0; uint32_t num_succs = 0; for (const Edge* e : b->succs()) { if (e->type() == EDGE_GOTO) { ++num_goto_succs; } if (e->type() != EDGE_GHOST) { ++num_succs; } } auto last_it = b->get_last_insn(); auto num_preds = b->preds().size(); if (last_it != b->end()) { auto op = last_it->insn->opcode(); if (opcode::is_a_conditional_branch(op)) { always_assert_log(num_succs == 2, "block %zu, %s", b->id(), SHOW(*this)); } else if (opcode::is_switch(op)) { always_assert_log(num_succs > 1, "block %zu, %s", b->id(), SHOW(*this)); } else if (opcode::is_a_return(op)) { // Make sure we don't have any outgoing edges (except EDGE_GHOST) always_assert_log(num_succs == 0, "block %zu, %s", b->id(), SHOW(*this)); } else if (opcode::is_throw(op)) { // A throw could end the method or go to a catch handler. // Make sure this block has no outgoing non-throwing edges auto non_throw_edge = get_succ_edge_if(b, [](const Edge* e) { return e->type() != EDGE_THROW && e->type() != EDGE_GHOST; }); always_assert_log(non_throw_edge == nullptr, "block %zu, %s", b->id(), SHOW(*this)); } if (num_preds > 0 && !(opcode::is_a_return(op) || opcode::is_throw(op))) { // Control Flow shouldn't just fall off the end of a block, unless // it's an orphan block that's unreachable anyway always_assert_log(num_succs > 0, "block %zu, %s", b->id(), SHOW(*this)); always_assert_log(num_goto_succs == 1, "block %zu, %s", b->id(), SHOW(*this)); } } else if (num_preds > 0 && b != exit_block()) { // no instructions in this block. Control Flow shouldn't just fall off // the end always_assert_log(num_succs > 0, "block %zu, %s", b->id(), SHOW(*this)); always_assert_log(num_goto_succs == 1, "block %zu, %s", b->id(), SHOW(*this)); } always_assert_log(num_goto_succs < 2, "block %zu, %s", b->id(), SHOW(*this)); } // IRInstruction pointers must be unique. std::unordered_set<IRInstruction*> pointer_check; for (const auto& mie : ConstInstructionIterable(*this)) { auto insn = mie.insn; always_assert_log( pointer_check.count(insn) == 0, "IRInstruction pointers must be unqiue. You have inserted the " "following IRInstruction* multiple times:\n >> %s", SHOW(*insn)); pointer_check.insert(insn); } } for (const auto& entry : m_blocks) { Block* b = entry.second; // make sure the edge list in both blocks agree for (const auto e : b->succs()) { const auto& reverse_edges = e->target()->preds(); always_assert_log(std::find(reverse_edges.begin(), reverse_edges.end(), e) != reverse_edges.end(), "block %zu -> %zu, %s", b->id(), e->target()->id(), SHOW(*this)); } for (const auto e : b->preds()) { const auto& forward_edges = e->src()->succs(); always_assert_log(std::find(forward_edges.begin(), forward_edges.end(), e) != forward_edges.end(), "block %zu -> %zu, %s", e->src()->id(), b->id(), SHOW(*this)); } const auto& throws = b->get_outgoing_throws_in_order(); bool last = true; // Only the last throw edge can have a null catch type. for (auto it = throws.rbegin(); it != throws.rend(); ++it) { Edge* e = *it; if (!last) { always_assert_log( e->throw_info()->catch_type != nullptr, "Can't have a catchall (%zu -> %zu) that isn't last. %s", e->src()->id(), e->target()->id(), SHOW(*this)); } last = false; } } if (m_editable) { auto used_regs = compute_registers_size(); always_assert_log(used_regs <= m_registers_size, "used regs %d > registers size %d. %s", used_regs, m_registers_size, SHOW(*this)); } no_dangling_dex_positions(); if (DEBUG) { no_unreferenced_edges(); } } reg_t ControlFlowGraph::compute_registers_size() const { reg_t num_regs = 0; for (const auto& mie : cfg::ConstInstructionIterable(*this)) { auto insn = mie.insn; if (insn->has_dest()) { // +1 because registers start at v0 reg_t size_required = insn->dest() + insn->dest_is_wide() + 1; num_regs = std::max(size_required, num_regs); } } // We don't check the source registers because we shouldn't ever be using an // undefined register. If the input code is well-formed, there shouldn't be a // source register without an equivalent dest register. This is true for our // IR because of the load-param opcodes. return num_regs; } void ControlFlowGraph::recompute_registers_size() { m_registers_size = compute_registers_size(); } void ControlFlowGraph::no_dangling_dex_positions() const { std::unordered_map<DexPosition*, bool> parents; for (const auto& entry : m_blocks) { Block* b = entry.second; for (const auto& mie : *b) { if (mie.type == MFLOW_POSITION && mie.pos->parent != nullptr) { parents.emplace(mie.pos->parent, false); } } } for (const auto& entry : m_blocks) { Block* b = entry.second; for (const auto& mie : *b) { if (mie.type == MFLOW_POSITION) { auto search = parents.find(mie.pos.get()); if (search != parents.end()) { search->second = true; } } } } for (const auto& entry : parents) { always_assert_log(entry.second, "%p is a dangling parent pointer in %s", entry.first, SHOW(*this)); } } uint32_t ControlFlowGraph::num_opcodes() const { uint32_t result = 0; for (const auto& entry : m_blocks) { result += entry.second->num_opcodes(); } return result; } uint32_t ControlFlowGraph::sum_opcode_sizes() const { uint32_t result = 0; for (const auto& entry : m_blocks) { result += entry.second->sum_opcode_sizes(); } return result; } Block* ControlFlowGraph::get_first_block_with_insns() const { always_assert(editable()); Block* block = entry_block(); std::unordered_set<Block*> visited{block}; while (block != nullptr && (block->empty() || block->get_first_insn() == block->end())) { block = block->goes_to(); if (!visited.insert(block).second) { // We found a loop, and no param instructions. block = nullptr; break; } } return block; } boost::sub_range<IRList> ControlFlowGraph::get_param_instructions() const { if (!m_editable) { return m_orig_list->get_param_instructions(); } Block* block = get_first_block_with_insns(); if (block == nullptr) { // Return an empty sub_range return boost::sub_range<IRList>(); } return block->m_entries.get_param_instructions(); } void ControlFlowGraph::gather_catch_types(std::vector<DexType*>& types) const { always_assert(editable()); std::unordered_set<DexType*> seen; // get the catch types of all the incoming edges to all the catch blocks for (const auto& entry : m_blocks) { const Block* b = entry.second; if (b->is_catch()) { for (const cfg::Edge* e : b->preds()) { if (e->type() == cfg::EDGE_THROW) { DexType* t = e->throw_info()->catch_type; const auto pair = seen.insert(t); bool insertion_occured = pair.second; if (insertion_occured) { types.push_back(t); } } } } } } void ControlFlowGraph::gather_strings( std::vector<const DexString*>& strings) const { always_assert(editable()); for (const auto& entry : m_blocks) { entry.second->m_entries.gather_strings(strings); } } void ControlFlowGraph::gather_types(std::vector<DexType*>& types) const { always_assert(editable()); gather_catch_types(types); for (const auto& entry : m_blocks) { entry.second->m_entries.gather_types(types); } } void ControlFlowGraph::gather_init_classes(std::vector<DexType*>& types) const { always_assert(editable()); for (const auto& entry : m_blocks) { entry.second->m_entries.gather_init_classes(types); } } void ControlFlowGraph::gather_fields(std::vector<DexFieldRef*>& fields) const { always_assert(editable()); for (const auto& entry : m_blocks) { entry.second->m_entries.gather_fields(fields); } } void ControlFlowGraph::gather_methods( std::vector<DexMethodRef*>& methods) const { always_assert(editable()); for (const auto& entry : m_blocks) { entry.second->m_entries.gather_methods(methods); } } void ControlFlowGraph::gather_callsites( std::vector<DexCallSite*>& callsites) const { always_assert(editable()); for (const auto& entry : m_blocks) { entry.second->m_entries.gather_callsites(callsites); } } void ControlFlowGraph::gather_methodhandles( std::vector<DexMethodHandle*>& methodhandles) const { always_assert(editable()); for (const auto& entry : m_blocks) { entry.second->m_entries.gather_methodhandles(methodhandles); } } cfg::InstructionIterator ControlFlowGraph::primary_instruction_of_move_result( const cfg::InstructionIterator& it) { auto move_result_insn = it->insn; always_assert(opcode::is_move_result_any(move_result_insn->opcode())); auto block = const_cast<Block*>(it.block()); if (block->get_first_insn()->insn == move_result_insn) { auto& preds = block->preds(); always_assert(preds.size() == 1); auto previous_block = preds.front()->src(); auto res = previous_block->to_cfg_instruction_iterator( previous_block->get_last_insn()); auto insn = res->insn; always_assert(insn->has_move_result_any()); return res; } else { auto res = std::prev(it); always_assert(res.block() == it.block()); auto insn = res->insn; always_assert(insn->has_move_result_any()); return res; } } cfg::InstructionIterator ControlFlowGraph::next_following_gotos( const cfg::InstructionIterator& it) { auto next_it = std::next(it); if (!next_it.is_end() && next_it.block() == it.block()) { return next_it; } // We reached the end of the current block; let's look at the immediate // goto-target. auto block = it.block()->goes_to(); if (!block) { return InstructionIterable(*this).end(); } auto first_insn_it = block->get_first_insn(); if (first_insn_it != block->end()) { return block->to_cfg_instruction_iterator(first_insn_it); } // The immediate goto-target block was empty, so we have to continue our // chase. We have to check for non-terminating self-loops while doing that. std::unordered_set<cfg::Block*> visited{block}; while (true) { block = block->goes_to(); if (!block || !visited.insert(block).second) { // non-terminating empty self-loop return InstructionIterable(*this).end(); } first_insn_it = block->get_first_insn(); if (first_insn_it != block->end()) { return block->to_cfg_instruction_iterator(first_insn_it); } } } cfg::InstructionIterator ControlFlowGraph::move_result_of( const cfg::InstructionIterator& it) { auto next_it = next_following_gotos(it); if (next_it.is_end()) { return next_it; } if (opcode::is_move_result_any(next_it->insn->opcode())) { always_assert(primary_instruction_of_move_result(next_it) == it); return next_it; } return cfg::InstructionIterable(*this).end(); } /* * fill `new_cfg` with a copy of `this` */ void ControlFlowGraph::deep_copy(ControlFlowGraph* new_cfg) const { always_assert(editable()); new_cfg->clear(); new_cfg->m_editable = true; new_cfg->set_registers_size(this->get_registers_size()); std::unordered_map<const Edge*, Edge*> old_edge_to_new; size_t num_edges = this->m_edges.size(); new_cfg->m_edges.reserve(num_edges); old_edge_to_new.reserve(num_edges); for (const Edge* old_edge : this->m_edges) { // this shallowly copies block pointers inside, then we patch them later Edge* new_edge = new Edge(*old_edge); new_cfg->m_edges.insert(new_edge); old_edge_to_new.emplace(old_edge, new_edge); } // copy the code itself MethodItemEntryCloner cloner; for (const auto& entry : this->m_blocks) { const Block* block = entry.second; // this shallowly copies edge pointers inside, then we patch them later Block* new_block = new Block(*block, &cloner); new_block->m_parent = new_cfg; new_cfg->m_blocks.emplace(new_block->id(), new_block); } // We need a second pass because parent position pointers may refer to // positions in a block that would be processed later. cloner.fix_parent_positions(); // patch the edge pointers in the blocks to their new cfg counterparts for (auto& entry : new_cfg->m_blocks) { Block* b = entry.second; for (Edge*& e : b->m_preds) { e = old_edge_to_new.at(e); } for (Edge*& e : b->m_succs) { e = old_edge_to_new.at(e); } } // patch the block pointers in the edges to their new cfg counterparts for (Edge* e : new_cfg->m_edges) { e->set_src(new_cfg->m_blocks.at(e->src()->id())); e->set_target(new_cfg->m_blocks.at(e->target()->id())); } // update the entry and exit block pointers to their new cfg counterparts new_cfg->m_entry_block = new_cfg->m_blocks.at(this->m_entry_block->id()); if (this->m_exit_block != nullptr) { new_cfg->m_exit_block = new_cfg->m_blocks.at(this->m_exit_block->id()); } } InstructionIterator ControlFlowGraph::find_insn(IRInstruction* insn, Block* hint) { if (hint != nullptr) { auto ii = ir_list::InstructionIterable(hint); for (auto it = ii.begin(); it != ii.end(); ++it) { if (it->insn == insn) { return hint->to_cfg_instruction_iterator(it); } } } auto iterable = InstructionIterable(*this); for (auto it = iterable.begin(); it != iterable.end(); ++it) { if (it->insn == insn) { return it; } } return iterable.end(); } ConstInstructionIterator ControlFlowGraph::find_insn(IRInstruction* insn, Block* hint) const { if (hint != nullptr) { auto ii = ir_list::InstructionIterable(hint); for (auto it = ii.begin(); it != ii.end(); ++it) { if (it->insn == insn) { return hint->to_cfg_instruction_iterator(it); } } } auto iterable = ConstInstructionIterable(*this); for (auto it = iterable.begin(); it != iterable.end(); ++it) { if (it->insn == insn) { return it; } } return iterable.end(); } std::vector<Block*> ControlFlowGraph::order( const std::unique_ptr<LinearizationStrategy>& custom_strategy) { // We must simplify first to remove any unreachable blocks simplify(); // This is a modified Weak Topological Ordering (WTO). We create "chains" of // blocks that will be kept together, then feed these chains to WTO for it to // choose the ordering of the chains. Then, we deconstruct the chains to get // an ordering of the blocks. // hold the chains of blocks here, though they mostly will be accessed via the // map std::vector<std::unique_ptr<BlockChain>> chains; // keep track of which blocks are in each chain, for quick lookup. std::unordered_map<Block*, BlockChain*> block_to_chain; block_to_chain.reserve(m_blocks.size()); build_chains(&chains, &block_to_chain); auto wto = build_wto(block_to_chain); auto result = custom_strategy ? custom_strategy->order(*this, std::move(wto)) : wto_chains(std::move(wto)); always_assert_log(result.size() == m_blocks.size(), "result has %lu blocks, m_blocks has %lu", result.size(), m_blocks.size()); // The entry block must always be first. redex_assert(m_entry_block == result.at(0)); return result; } void ControlFlowGraph::build_chains( std::vector<std::unique_ptr<BlockChain>>* chains, std::unordered_map<Block*, BlockChain*>* block_to_chain) { auto handle_block = [&](Block* b) { if (block_to_chain->count(b) != 0) { return; } always_assert_log(!DEBUG || !b->starts_with_move_result(), "%zu is wrong %s", b->id(), SHOW(*this)); auto unique = std::make_unique<BlockChain>(); BlockChain* chain = unique.get(); chains->push_back(std::move(unique)); chain->push_back(b); block_to_chain->emplace(b, chain); auto goto_edge = get_succ_edge_of_type(b, EDGE_GOTO); while (goto_edge != nullptr) { // make sure we handle a chain of blocks that all start with move-results auto goto_block = goto_edge->target(); always_assert_log(!DEBUG || m_blocks.count(goto_block->id()) > 0, "bogus block reference %zu -> %zu in %s", goto_edge->src()->id(), goto_block->id(), SHOW(*this)); if (goto_block->starts_with_move_result() || goto_block->same_try(b)) { // If the goto edge leads to a block with a move-result(-pseudo), then // that block must be placed immediately after this one because we can't // insert anything between an instruction and its move-result(-pseudo). // // We also add gotos that are in the same try because we can minimize // instructions (by using fallthroughs) without adding another try // region. This is not required, but empirical evidence shows that it // generates smaller dex files. const auto& pair = block_to_chain->emplace(goto_block, chain); bool was_already_there = !pair.second; if (was_already_there) { if (goto_block->starts_with_move_result() && chain != block_to_chain->at(goto_block)) { // We cannot allow this to be in a separate chain. The WTO (and its // walk) cannot enforce the correct ordering, e.g., it might put a // throw block in the middle. TRACE(CFG, 5, "Need to collapse goto chain with move result!"); auto* goto_chain = block_to_chain->at(goto_block); redex_assert(goto_chain->at(0) == goto_block); for (auto* gcb : *goto_chain) { chain->push_back(gcb); (*block_to_chain)[gcb] = chain; } auto it = std::find_if( chains->begin(), chains->end(), [&](const auto& uptr) { return uptr.get() == goto_chain; }); redex_assert(it != chains->end()); chains->erase(it); } break; } chain->push_back(goto_block); goto_edge = get_succ_edge_of_type(goto_block, EDGE_GOTO); } else { break; } } }; // It is important to always start with the entry block. Otherwise it may // be incorrectly merged into a chain. redex_assert(m_entry_block != nullptr); if (DEBUG) { auto it = m_blocks.find(m_entry_block->id()); redex_assert(it != m_blocks.end()); redex_assert(it->second == m_entry_block); } handle_block(m_entry_block); std::vector<Block*> move_result_blocks_out_of_order; for (const auto& entry : m_blocks) { // Must not handle blocks that start with a move-result. These need to go // into the same chain as the owner. if (entry.second->starts_with_move_result()) { if (DEBUG) { move_result_blocks_out_of_order.push_back(entry.second); } continue; } handle_block(entry.second); } // All postponed move-result blocks should be in a chain now, or they were // dangling and should have been removed. if (DEBUG) { for (auto* b : move_result_blocks_out_of_order) { always_assert_log(block_to_chain->count(b) > 0, "Did not find B%zu in chains of\n%s", b->id(), SHOW(*this)); } } } sparta::WeakTopologicalOrdering<BlockChain*> ControlFlowGraph::build_wto( const std::unordered_map<Block*, BlockChain*>& block_to_chain) { return sparta::WeakTopologicalOrdering<BlockChain*>( block_to_chain.at(entry_block()), [&block_to_chain](BlockChain* const& chain) { // The chain successor function returns all the outgoing edges' target // chains. Where outgoing means that the edge does not go to this chain. // // FIXME: this algorithm ignores real infinite loops in the block graph std::vector<BlockChain*> result; result.reserve(chain->size()); // TODO: Sort the outputs by edge type, case key, and throw index // * We may be able to use fewer debug positions if we emit case blocks // in the original order. // * Right now, it seems the switches are being output in reverse // order, which is annoying for writing tests. const auto& end = chain->end(); for (auto it = chain->begin(); it != end;) { Block* b = *it; const auto& next_it = std::next(it); Block* next = (next_it == end) ? nullptr : *next_it; for (Edge* e : b->succs()) { if (e->target() == next) { // The most common intra-chain edge is a GOTO to the very next // block. Let's cheaply detect this case and filter it early, // before we have to do an expensive map lookup. continue; } const auto& succ_chain = block_to_chain.at(e->target()); // Filter out any edges within this chain. We don't want to // erroneously create infinite loops in the chain graph that don't // exist in the block graph. if (succ_chain != chain) { result.push_back(succ_chain); } } it = next_it; } return result; }); } std::vector<Block*> ControlFlowGraph::wto_chains( sparta::WeakTopologicalOrdering<BlockChain*> wto) { std::vector<Block*> main_order; main_order.reserve(this->blocks().size()); auto chain_order = [&main_order](BlockChain* c) { for (Block* b : *c) { main_order.push_back(b); } }; wto.visit_depth_first(chain_order); return main_order; } // Add an MFLOW_TARGET at the end of each edge. // Insert GOTOs where necessary. void ControlFlowGraph::insert_branches_and_targets( const std::vector<Block*>& ordering) { for (auto it = ordering.begin(); it != ordering.end(); ++it) { Block* b = *it; for (const Edge* edge : b->succs()) { if (edge->type() == EDGE_BRANCH) { auto branch_it = b->get_conditional_branch(); always_assert_log(branch_it != b->end(), "block %zu %s", b->id(), SHOW(*this)); auto& branch_mie = *branch_it; BranchTarget* bt = edge->case_key() != boost::none ? new BranchTarget(&branch_mie, *edge->case_key()) : new BranchTarget(&branch_mie); auto target_mie = new MethodItemEntry(bt); edge->target()->m_entries.push_front(*target_mie); } else if (edge->type() == EDGE_GOTO) { auto next_it = std::next(it); if (next_it != ordering.end()) { Block* next = *next_it; if (edge->target() == next) { // Don't need a goto because this will fall through to `next` continue; } } auto branch_mie = new MethodItemEntry(new IRInstruction(OPCODE_GOTO)); auto target_mie = new MethodItemEntry(new BranchTarget(branch_mie)); edge->src()->m_entries.push_back(*branch_mie); edge->target()->m_entries.push_front(*target_mie); } } } } // remove all try and catch markers because we may reorder the blocks void ControlFlowGraph::remove_try_catch_markers() { always_assert(m_editable); for (const auto& entry : m_blocks) { Block* b = entry.second; b->m_entries.remove_and_dispose_if([](const MethodItemEntry& mie) { return mie.type == MFLOW_TRY || mie.type == MFLOW_CATCH; }); } } IRList* ControlFlowGraph::linearize( const std::unique_ptr<LinearizationStrategy>& custom_strategy) { always_assert(m_editable); sanity_check(); IRList* result = new IRList; TRACE_NO_LINE(CFG, 5, "before linearize:\n%s", SHOW(*this)); auto ordering = this->order(custom_strategy); insert_branches_and_targets(ordering); insert_try_catch_markers(ordering); for (Block* b : ordering) { result->splice(result->end(), b->m_entries); } remove_redundant_positions(result); return result; } void ControlFlowGraph::insert_try_catch_markers( const std::vector<Block*>& ordering) { // add back the TRY START, TRY_ENDS, and, MFLOW_CATCHes const auto& insert_try_marker_between = [this](Block* prev, MethodItemEntry* new_try_marker, Block* b) { auto first_it = b->get_first_insn(); if (first_it != b->end() && opcode::is_a_move_result_pseudo(first_it->insn->opcode())) { // Make sure we don't split up a move-result-pseudo and its primary // instruction by placing the marker after the move-result-pseudo // // TODO: relax the constraint that move-result-pseudo must be // immediately after its partner, allowing non-opcode // MethodItemEntries between b->m_entries.insert_after(first_it, *new_try_marker); } else if (new_try_marker->tentry->type == TRY_START) { if (prev == nullptr && b == entry_block()) { // Parameter loading instructions come before a TRY_START auto params = b->m_entries.get_param_instructions(); b->m_entries.insert_before(params.end(), *new_try_marker); } else { // TRY_START belongs at the front of a block b->m_entries.push_front(*new_try_marker); } } else { // TRY_END belongs at the end of a block prev->m_entries.push_back(*new_try_marker); } }; std::unordered_map<MethodItemEntry*, Block*> catch_to_containing_block; Block* prev = nullptr; MethodItemEntry* active_catch = nullptr; for (auto it = ordering.begin(); it != ordering.end(); prev = *(it++)) { Block* b = *it; MethodItemEntry* new_catch = create_catch(b, &catch_to_containing_block); if (new_catch == nullptr && b->cannot_throw() && !b->is_catch()) { // Generate fewer try regions by merging blocks that cannot throw into the // previous try region. // // But, we have to be careful to not include the catch block of this try // region, which would create invalid Dex Try entries. For any given try // region, none of its catches may be inside that region. continue; } if (active_catch != new_catch) { // If we're switching try regions between these blocks, the TRY_END must // come first then the TRY_START. We insert the TRY_START earlier because // we're using `insert_after` which inserts things in reverse order if (new_catch != nullptr) { // Start a new try region before b auto new_start = new MethodItemEntry(TRY_START, new_catch); insert_try_marker_between(prev, new_start, b); } if (active_catch != nullptr) { // End the current try region before b auto new_end = new MethodItemEntry(TRY_END, active_catch); insert_try_marker_between(prev, new_end, b); } active_catch = new_catch; } } if (active_catch != nullptr) { always_assert_log(active_catch->centry->next != active_catch, "Invalid cycle: %s", SHOW(active_catch)); ordering.back()->m_entries.push_back( *new MethodItemEntry(TRY_END, active_catch)); } } MethodItemEntry* ControlFlowGraph::create_catch( Block* block, std::unordered_map<MethodItemEntry*, Block*>* catch_to_containing_block) { always_assert(m_editable); using EdgeVector = std::vector<Edge*>; EdgeVector throws = get_succ_edges_of_type(block, EDGE_THROW); if (throws.empty()) { // No need to create a catch if there are no throws return nullptr; } std::sort(throws.begin(), throws.end(), [](const Edge* e1, const Edge* e2) { return e1->throw_info()->index < e2->throw_info()->index; }); const auto& throws_end = throws.end(); // recurse through `throws` adding catch entries to blocks at the ends of // throw edges and connecting the catch entry `next` pointers according to the // throw edge indices. // // We stop early if we find find an equivalent linked list of catch entries return self_recursive_fn( [this, &throws_end, catch_to_containing_block]( auto self, const EdgeVector::iterator& it) -> MethodItemEntry* { if (it == throws_end) { return nullptr; } auto edge = *it; auto catch_block = edge->target(); for (auto& mie : *catch_block) { // Is there already a catch here that's equivalent to the catch we // would create? if (mie.type == MFLOW_CATCH && catch_entries_equivalent_to_throw_edges( this, &mie, it, throws_end, *catch_to_containing_block)) { // The linked list of catch entries starting at `mie` is equivalent // to the rest of `throws` from `it` to `end`. So we don't need to // create another one, use the existing list. return &mie; } } // We recurse and find the next catch before creating this catch because // otherwise, we could create a cycle of the catch entries. MethodItemEntry* next = self(self, std::next(it)); // create a new catch entry and insert it into the bytecode auto new_catch = new MethodItemEntry(edge->throw_info()->catch_type); new_catch->centry->next = next; catch_block->m_entries.push_front(*new_catch); catch_to_containing_block->emplace(new_catch, catch_block); return new_catch; }, throws.begin()); } std::vector<Block*> ControlFlowGraph::blocks() const { std::vector<Block*> result; result.reserve(m_blocks.size()); for (const auto& entry : m_blocks) { Block* b = entry.second; result.emplace_back(b); } return result; } // Uses a standard depth-first search ith a side table of already-visited nodes. std::vector<Block*> ControlFlowGraph::blocks_reverse_post_deprecated() const { std::stack<Block*> stack; for (const auto& entry : m_blocks) { // include unreachable blocks too Block* b = entry.second; if (b != entry_block() && b->preds().empty()) { stack.push(b); } } stack.push(entry_block()); std::vector<Block*> postorder; postorder.reserve(m_blocks.size()); std::unordered_set<Block*> visited; visited.reserve(m_blocks.size()); while (!stack.empty()) { const auto& curr = stack.top(); visited.insert(curr); bool all_succs_visited = [&] { for (auto const& s : curr->succs()) { if (!visited.count(s->target())) { stack.push(s->target()); return false; } } return true; }(); if (all_succs_visited) { redex_assert(curr == stack.top()); postorder.push_back(curr); stack.pop(); } } std::reverse(postorder.begin(), postorder.end()); return postorder; } ControlFlowGraph::~ControlFlowGraph() { free_all_blocks_and_edges_and_removed_insns(); } Block* ControlFlowGraph::create_block() { size_t id = next_block_id(); Block* b = new Block(this, id); m_blocks.emplace(id, b); return b; } Block* ControlFlowGraph::duplicate_block(Block* original) { Block* copy = create_block(); MethodItemEntryCloner cloner; for (const auto& mie : *original) { copy->m_entries.push_back(*cloner.clone(&mie)); } return copy; } // We create a small class here (instead of a recursive lambda) so we can // label visit with NO_SANITIZE_ADDRESS class ExitBlocks { private: uint32_t next_dfn{0}; std::stack<const Block*> stack; // Depth-first number. Special values: // 0 - unvisited // UINT32_MAX - visited and determined to be in a separate SCC std::unordered_map<const Block*, uint32_t> dfns; static constexpr uint32_t VISITED = std::numeric_limits<uint32_t>::max(); // This is basically Tarjan's algorithm for finding SCCs. I pass around an // extra has_exit value to determine if a given SCC has any successors. using t = std::pair<uint32_t, bool>; public: std::vector<Block*> exit_blocks; NO_SANITIZE_ADDRESS // because of deep recursion. ASAN uses too much memory. t visit(const Block* b) { stack.push(b); uint32_t head = dfns[b] = ++next_dfn; // whether any vertex in the current SCC has a successor edge that points // outside itself bool has_exit{false}; for (auto& succ : b->succs()) { uint32_t succ_dfn = dfns[succ->target()]; uint32_t min; if (succ_dfn == 0) { bool succ_has_exit; std::tie(min, succ_has_exit) = visit(succ->target()); has_exit |= succ_has_exit; } else { has_exit |= succ_dfn == VISITED; min = succ_dfn; } head = std::min(min, head); } if (head == dfns[b]) { const Block* top{nullptr}; if (!has_exit) { exit_blocks.push_back(const_cast<Block*>(b)); has_exit = true; } do { top = stack.top(); stack.pop(); dfns[top] = VISITED; } while (top != b); } return t(head, has_exit); } }; std::vector<Block*> ControlFlowGraph::real_exit_blocks( bool include_infinite_loops) { std::vector<Block*> result; if (m_exit_block != nullptr && include_infinite_loops) { auto ghosts = get_pred_edges_of_type(m_exit_block, EDGE_GHOST); if (!ghosts.empty()) { // The exit block is a ghost block, ignore it and get the real exit // points. for (auto e : ghosts) { result.push_back(e->src()); } } else { // Empty ghosts means the method has a single exit point and // calculate_exit_block didn't add a ghost block. result.push_back(m_exit_block); } } else { always_assert_log(!include_infinite_loops, "call calculate_exit_block first"); for (const auto& entry : m_blocks) { Block* block = entry.second; const auto& b = block->branchingness(); if (b == opcode::BRANCH_RETURN || b == opcode::BRANCH_THROW) { result.push_back(block); } } } return result; } std::vector<Block*> ControlFlowGraph::return_blocks() const { std::vector<Block*> result; for (const auto& entry : m_blocks) { Block* block = entry.second; const auto& b = block->branchingness(); if (b == opcode::BRANCH_RETURN) { result.push_back(block); } } return result; } /* * Find all exit blocks. Note that it's not as simple as looking for Blocks with * return or throw opcodes; infinite loops are a valid way of terminating dex * bytecode too. As such, we need to find all strongly connected components * (SCCs) and vertices that lack successors. For SCCs that lack successors, any * one of its vertices can be treated as an exit block; this implementation * picks the head of the SCC. */ void ControlFlowGraph::calculate_exit_block() { if (m_editable) { reset_exit_block(); } else if (m_exit_block != nullptr) { // nothing to do, as nothing can ever change in non-editable cfg return; } always_assert(m_exit_block == nullptr); ExitBlocks eb; eb.visit(entry_block()); if (eb.exit_blocks.size() == 1) { m_exit_block = eb.exit_blocks[0]; } else { m_exit_block = create_block(); for (Block* b : eb.exit_blocks) { add_edge(b, m_exit_block, EDGE_GHOST); } } } void ControlFlowGraph::reset_exit_block() { if (m_exit_block == nullptr) { return; } if (get_pred_edge_of_type(m_exit_block, EDGE_GHOST) == nullptr) { m_exit_block = nullptr; return; } // If we get here, we have a "ghost" exit block, that was created to represent // multiple exist blocks. We need to remove that "ghost" exit block before // recomputing the exit of a CFG with multiple exit points. remove_block(m_exit_block); always_assert(m_exit_block == nullptr); } // public API edge removal functions void ControlFlowGraph::delete_edge(Edge* edge) { remove_edge(edge); free_edge(edge); } void ControlFlowGraph::delete_succ_edges(Block* b) { free_edges(remove_succ_edges(b)); } void ControlFlowGraph::delete_pred_edges(Block* b) { free_edges(remove_pred_edges(b)); } // private edge removal functions // These are raw removal, they don't free the edge. ControlFlowGraph::EdgeSet ControlFlowGraph::remove_edges_between(Block* p, Block* s, bool cleanup) { return remove_edge_if( p, s, [](const Edge*) { return true; }, cleanup); } void ControlFlowGraph::delete_edges_between(Block* p, Block* s) { free_edges(remove_edges_between(p, s)); } void ControlFlowGraph::remove_edge(Edge* edge, bool cleanup) { remove_edge_if( edge->src(), edge->target(), [edge](const Edge* e) { return edge == e; }, cleanup); } void ControlFlowGraph::free_all_blocks_and_edges_and_removed_insns() { if (m_owns_insns) { for (const auto& entry : m_blocks) { Block* b = entry.second; b->free(); delete b; } } else { for (const auto& entry : m_blocks) { Block* b = entry.second; delete b; } } for (Edge* e : m_edges) { delete e; } if (m_owns_removed_insns) { for (auto* insn : m_removed_insns) { delete insn; } m_removed_insns.clear(); } } void ControlFlowGraph::clear() { free_all_blocks_and_edges_and_removed_insns(); m_blocks.clear(); m_edges.clear(); m_registers_size = 0; m_entry_block = nullptr; m_exit_block = nullptr; m_editable = true; } namespace { Edge* get_singleton_normal_forward_edge(Block* block) { Edge* singleton = nullptr; for (auto succ : block->succs()) { if (succ->type() == EDGE_GOTO || succ->type() == EDGE_BRANCH) { if (singleton) { return nullptr; } singleton = succ; } } return singleton; } } // namespace // After `edges` have been removed from the graph, // * Turn BRANCHes/SWITCHes with one outgoing edge into GOTOs void ControlFlowGraph::cleanup_deleted_edges(const EdgeSet& edges) { for (Edge* e : edges) { auto pred_block = e->src(); auto last_it = pred_block->get_last_insn(); if (last_it != pred_block->end()) { auto last_insn = last_it->insn; auto op = last_insn->opcode(); Edge* fwd_edge; if ((opcode::is_a_conditional_branch(op) || opcode::is_switch(op)) && (fwd_edge = get_singleton_normal_forward_edge(pred_block))) { m_removed_insns.push_back(last_insn); pred_block->m_entries.erase_and_dispose(last_it); fwd_edge->set_type(EDGE_GOTO); fwd_edge->set_case_key(boost::none); } } } } void ControlFlowGraph::free_edge(Edge* edge) { m_edges.erase(edge); delete edge; } void ControlFlowGraph::free_edges(const EdgeSet& edges) { for (Edge* e : edges) { free_edge(e); } } Edge* ControlFlowGraph::get_pred_edge_of_type(const Block* block, EdgeType type) const { return get_pred_edge_if(block, [type](const Edge* e) { return e->type() == type; }); } Edge* ControlFlowGraph::get_succ_edge_of_type(const Block* block, EdgeType type) const { return get_succ_edge_if(block, [type](const Edge* e) { return e->type() == type; }); } std::vector<Edge*> ControlFlowGraph::get_pred_edges_of_type( const Block* block, EdgeType type) const { return get_pred_edges_if(block, [type](const Edge* e) { return e->type() == type; }); } std::vector<Edge*> ControlFlowGraph::get_succ_edges_of_type( const Block* block, EdgeType type) const { return get_succ_edges_if(block, [type](const Edge* e) { return e->type() == type; }); } Block* ControlFlowGraph::split_block(Block* old_block, const IRList::iterator& raw_it) { always_assert(raw_it != old_block->end()); always_assert(editable()); // new_block will be the successor Block* new_block = create_block(); // move the rest of the instructions after the split point into the new block new_block->m_entries.splice_selection(new_block->begin(), old_block->m_entries, std::next(raw_it), old_block->end()); // make the outgoing edges come from the new block... std::vector<Edge*> to_move(old_block->succs().begin(), old_block->succs().end()); for (auto e : to_move) { // ... except if we didn't move the branching/throwing instruction; in that // case, just rewire the goto, as we are going to create a new one if (new_block->empty() && e->type() != EDGE_GOTO) { continue; } set_edge_source(e, new_block); } // connect the halves of the block we just split up add_edge(old_block, new_block, EDGE_GOTO); return new_block; } Block* ControlFlowGraph::split_block(const cfg::InstructionIterator& it) { always_assert(!it.is_end()); return split_block(it.block(), it.unwrap()); } Block* ControlFlowGraph::split_block_before(Block* old_block, const IRList::iterator& raw_it) { always_assert(editable()); // Do not split in front of special move instructions. This would likely end // up being illegal. always_assert(!opcode::is_a_move_result(raw_it->insn->opcode()) && !opcode::is_a_move_result_pseudo(raw_it->insn->opcode())); // new_block will be the predecessor. Block* new_block = create_block(); // move the rest of the instructions after the split point into the new block new_block->m_entries.splice_selection( new_block->begin(), old_block->m_entries, old_block->begin(), raw_it); // make the incoming edges go to the new block... std::vector<Edge*> to_move(old_block->preds().begin(), old_block->preds().end()); for (auto e : to_move) { set_edge_target(e, new_block); } // Copy outgoing throw edges. for (auto e : old_block->succs()) { if (e->type() != EDGE_THROW) { continue; } auto new_edge = new Edge(*e); new_edge->set_src(new_block); add_edge(new_edge); } // connect the halves of the block we just split up add_edge(new_block, old_block, EDGE_GOTO); return new_block; } Block* ControlFlowGraph::split_block_before( const cfg::InstructionIterator& it) { always_assert(!it.is_end()); return split_block_before(it.block(), it.unwrap()); } void ControlFlowGraph::merge_blocks(Block* pred, Block* succ) { const auto& not_throws = [](const Edge* e) { return e->type() != EDGE_THROW; }; { const auto& forwards = get_succ_edges_if(pred, not_throws); always_assert(forwards.size() == 1); auto forward_edge = forwards[0]; always_assert(forward_edge->target() == succ); always_assert(forward_edge->type() == EDGE_GOTO); const auto& reverses = succ->preds(); always_assert(reverses.size() == 1); auto reverse_edge = reverses[0]; always_assert(forward_edge == reverse_edge); } delete_edges_between(pred, succ); // move succ's code into pred pred->m_entries.splice(pred->m_entries.end(), succ->m_entries); // move succ's outgoing edges to pred. // Intentionally copy the vector of edges because set_edge_source edits the // edge vectors auto succs = get_succ_edges_if(succ, not_throws); for (auto succ_edge : succs) { set_edge_source(succ_edge, pred); } // remove the succ block delete_pred_edges(succ); delete_succ_edges(succ); m_blocks.erase(succ->id()); delete succ; } void ControlFlowGraph::set_edge_target(Edge* edge, Block* new_target) { move_edge(edge, nullptr, new_target); } void ControlFlowGraph::set_edge_source(Edge* edge, Block* new_source) { move_edge(edge, new_source, nullptr); } // Move this edge out of the vectors between its old blocks // and into the vectors between the new blocks void ControlFlowGraph::move_edge(Edge* edge, Block* new_source, Block* new_target) { // remove this edge from the graph temporarily but do not delete it because // we're going to move it elsewhere remove_edge(edge, /* cleanup */ false); if (new_source != nullptr) { edge->set_src(new_source); } if (new_target != nullptr) { edge->set_target(new_target); } edge->src()->m_succs.push_back(edge); edge->target()->m_preds.push_back(edge); } bool ControlFlowGraph::blocks_are_in_same_try(const Block* b1, const Block* b2) const { const auto& throws1 = b1->get_outgoing_throws_in_order(); const auto& throws2 = b2->get_outgoing_throws_in_order(); if (throws1.size() != throws2.size()) { return false; } auto it1 = throws1.begin(); auto it2 = throws2.begin(); for (; it1 != throws1.end(); ++it1, ++it2) { auto e1 = *it1; auto e2 = *it2; if (e1->target() != e2->target() || e1->throw_info()->catch_type != e2->throw_info()->catch_type) { return false; } } return true; } bool ControlFlowGraph::replace_insns(const InstructionIterator& it, const std::vector<IRInstruction*>& insns) { return replace_insns(it, insns.begin(), insns.end()); } bool ControlFlowGraph::replace_insn(const InstructionIterator& it, IRInstruction* insn) { return replace_insns(it, {insn}); } void ControlFlowGraph::remove_insn(const InstructionIterator& it) { always_assert(m_editable); MethodItemEntry& mie = *it; auto insn = mie.insn; auto op = insn->opcode(); always_assert_log(op != OPCODE_GOTO, "There are no GOTO instructions in the CFG"); Block* block = it.block(); auto last_it = block->get_last_insn(); always_assert_log(last_it != block->end(), "cannot remove from empty block"); if (insn == last_it->insn && (opcode::may_throw(op) || op == OPCODE_THROW)) { // We're deleting the last instruction that may throw, this block no longer // throws. We should remove the throw edges delete_succ_edge_if(block, [](const Edge* e) { return e->type() == EDGE_THROW; }); } if (opcode::is_a_conditional_branch(op) || opcode::is_switch(op)) { // Remove all outgoing EDGE_BRANCHes // leaving behind only an EDGE_GOTO (and maybe an EDGE_THROW?) // // Don't cleanup because we're deleting the instruction at the end of this // function singleton_iterable<Block*> iterable(block); free_edges(remove_succ_edge_if( iterable.begin(), iterable.end(), [](const Edge* e) { return e->type() == EDGE_BRANCH; }, /* cleanup */ false)); } else if (insn->has_move_result_any()) { // delete the move-result(-pseudo) too if (insn == last_it->insn) { // The move-result(-pseudo) is in the next (runtime) block, if any. // We follow the goto edge to the block that should have the // move-result(-pseudo). // // We can't use std::next because that goes to the next block in ID order, // which may not be the next runtime block. auto move_result_block = block->goes_to(); if (move_result_block != nullptr) { auto first_it = move_result_block->get_first_insn(); if (first_it != move_result_block->end() && opcode::is_move_result_any(first_it->insn->opcode())) { // We can safely delete this move-result(-pseudo) because it cannot be // the move-result(-pseudo) of more than one primary instruction. A // CFG with multiple edges to a block beginning with a // move-result(-pseudo) is a malformed CFG. always_assert_log(move_result_block->preds().size() == 1, "Multiple edges to a move-result-pseudo in %zu. %s", move_result_block->id(), SHOW(*this)); m_removed_insns.push_back(first_it->insn); move_result_block->m_entries.erase_and_dispose(first_it); } } } else { // The move-result(-pseudo) is in the same block as this one. // This occurs when we're not in a try region. auto mrp_it = std::next(it); always_assert(mrp_it.block() == block); if (opcode::is_move_result_any(mrp_it->insn->opcode())) { m_removed_insns.push_back(mrp_it->insn); block->m_entries.erase_and_dispose(mrp_it.unwrap()); } } } // delete the requested instruction m_removed_insns.push_back(it->insn); block->m_entries.erase_and_dispose(it.unwrap()); } void ControlFlowGraph::insert_before(const InstructionIterator& it, std::unique_ptr<DexPosition> pos) { always_assert(m_editable); Block* block = it.block(); block->m_entries.insert_before(it.unwrap(), std::move(pos)); } void ControlFlowGraph::insert_after(const InstructionIterator& it, std::unique_ptr<DexPosition> pos) { always_assert(m_editable); Block* block = it.block(); block->m_entries.insert_after(it.unwrap(), std::move(pos)); } void ControlFlowGraph::insert_before(Block* block, const IRList::iterator& it, std::unique_ptr<DexPosition> pos) { always_assert(m_editable); block->m_entries.insert_before(it, std::move(pos)); } void ControlFlowGraph::insert_after(Block* block, const IRList::iterator& it, std::unique_ptr<DexPosition> pos) { always_assert(m_editable); block->m_entries.insert_after(it, std::move(pos)); } void Block::insert_before(const IRList::iterator& it, std::unique_ptr<SourceBlock> sb) { m_entries.insert_before(it, std::move(sb)); } void Block::insert_after(const IRList::iterator& it, std::unique_ptr<SourceBlock> sb) { m_entries.insert_after(it, std::move(sb)); } void ControlFlowGraph::insert_before(const InstructionIterator& it, std::unique_ptr<SourceBlock> sb) { always_assert(m_editable); Block* block = it.block(); block->m_entries.insert_before(it.unwrap(), std::move(sb)); } void ControlFlowGraph::insert_after(const InstructionIterator& it, std::unique_ptr<SourceBlock> sb) { always_assert(m_editable); Block* block = it.block(); block->m_entries.insert_after(it.unwrap(), std::move(sb)); } void ControlFlowGraph::create_branch(Block* b, IRInstruction* insn, Block* fls, Block* tru) { create_branch(b, insn, fls, {{1, tru}}); } void ControlFlowGraph::create_branch( Block* b, IRInstruction* insn, Block* goto_block, const std::vector<std::pair<int32_t, Block*>>& case_to_block) { auto op = insn->opcode(); always_assert(m_editable); always_assert_log(opcode::is_branch(op), "%s is not a branch instruction", SHOW(op)); always_assert_log(!opcode::is_goto(op), "There are no gotos in the editable CFG. Use add_edge()"); auto existing_last = b->get_last_insn(); if (existing_last != b->end()) { auto last_op = existing_last->insn->opcode(); always_assert_log(!(opcode::is_branch(last_op) || opcode::is_throw(last_op) || opcode::is_a_return(last_op)), "Can't add branch after %s in Block %zu in %s", SHOW(existing_last->insn), b->id(), SHOW(*this)); } auto existing_goto_edge = get_succ_edge_of_type(b, EDGE_GOTO); if (goto_block != nullptr) { if (existing_goto_edge != nullptr) { // redirect it set_edge_target(existing_goto_edge, goto_block); } else { add_edge(b, goto_block, EDGE_GOTO); } } else { always_assert_log(existing_goto_edge != nullptr, "%s must have a false case", SHOW(insn)); } b->m_entries.push_back(*new MethodItemEntry(insn)); if (opcode::is_switch(op)) { for (const auto& entry : case_to_block) { add_edge(b, entry.second, entry.first); } } else { always_assert(opcode::is_a_conditional_branch(op)); always_assert_log(case_to_block.size() == 1, "Wrong number of non-goto cases (%zu) for %s", case_to_block.size(), SHOW(op)); const auto& entry = case_to_block[0]; always_assert_log(entry.first == 1, "%s only has boolean case key values", SHOW(op)); add_edge(b, entry.second, EDGE_BRANCH); } } void ControlFlowGraph::copy_succ_edges(Block* from, Block* to) { copy_succ_edges_if(from, to, [](const Edge*) { return true; }); } void ControlFlowGraph::copy_succ_edges_of_type(Block* from, Block* to, EdgeType type) { copy_succ_edges_if(from, to, [type](const Edge* edge) { return edge->type() == type; }); } template <typename EdgePredicate> void ControlFlowGraph::copy_succ_edges_if(Block* from, Block* to, EdgePredicate edge_predicate) { const auto& edges = get_succ_edges_if(from, std::move(edge_predicate)); for (auto e : edges) { Edge* copy = new Edge(*e); copy->set_src(to); add_edge(copy); } } bool ControlFlowGraph::insert_before(const InstructionIterator& position, const std::vector<IRInstruction*>& insns) { return insert_before(position, insns.begin(), insns.end()); } bool ControlFlowGraph::insert_after(const InstructionIterator& position, const std::vector<IRInstruction*>& insns) { return insert_after(position, insns.begin(), insns.end()); } bool ControlFlowGraph::push_front(Block* b, const std::vector<IRInstruction*>& insns) { return push_front(b, insns.begin(), insns.end()); } bool ControlFlowGraph::push_back(Block* b, const std::vector<IRInstruction*>& insns) { return push_back(b, insns.begin(), insns.end()); } bool ControlFlowGraph::insert_before(const InstructionIterator& position, IRInstruction* insn) { return insert_before(position, std::vector<IRInstruction*>{insn}); } bool ControlFlowGraph::insert_after(const InstructionIterator& position, IRInstruction* insn) { return insert_after(position, std::vector<IRInstruction*>{insn}); } bool ControlFlowGraph::push_front(Block* b, IRInstruction* insn) { return push_front(b, std::vector<IRInstruction*>{insn}); } bool ControlFlowGraph::push_back(Block* b, IRInstruction* insn) { return push_back(b, std::vector<IRInstruction*>{insn}); } uint32_t ControlFlowGraph::remove_blocks(const std::vector<Block*>& blocks) { std::vector<std::unique_ptr<DexPosition>> dangling; uint32_t insns_removed = 0; for (auto block : blocks) { if (block == entry_block()) { always_assert(block->succs().size() == 1); set_entry_block(block->succs()[0]->target()); } if (block == exit_block()) { set_exit_block(nullptr); } delete_pred_edges(block); delete_succ_edges(block); for (auto& mie : *block) { if (mie.type == MFLOW_OPCODE) { m_removed_insns.push_back(mie.insn); insns_removed++; } else if (mie.type == MFLOW_POSITION) { dangling.push_back(std::move(mie.pos)); } } auto id = block->id(); auto num_removed = m_blocks.erase(id); always_assert_log(num_removed == 1, "Block %zu wasn't in CFG. Attempted double delete?", id); block->m_entries.clear_and_dispose(); delete block; } fix_dangling_parents(std::move(dangling)); return insns_removed; } // delete old_block and reroute its predecessors to new_block uint32_t ControlFlowGraph::replace_blocks( const std::vector<std::pair<Block*, Block*>>& old_new_blocks) { std::vector<Block*> blocks_to_remove; for (auto& p : old_new_blocks) { auto old_block = p.first; auto new_block = p.second; std::vector<Edge*> to_redirect = old_block->preds(); for (auto e : to_redirect) { set_edge_target(e, new_block); } blocks_to_remove.push_back(old_block); } return remove_blocks(blocks_to_remove); } std::ostream& ControlFlowGraph::write_dot_format(std::ostream& o) const { o << "digraph {\n"; for (auto* block : blocks()) { for (auto& succ : block->succs()) { o << block->id() << " -> " << succ->target()->id() << "\n"; } } o << "}\n"; return o; } ControlFlowGraph::EdgeSet ControlFlowGraph::remove_succ_edges(Block* b, bool cleanup) { singleton_iterable<Block*> iterable(b); return remove_succ_edge_if( iterable.begin(), iterable.end(), [](const Edge*) { return true; }, cleanup); } ControlFlowGraph::EdgeSet ControlFlowGraph::remove_pred_edges(Block* b, bool cleanup) { singleton_iterable<Block*> iterable(b); return remove_pred_edge_if( iterable.begin(), iterable.end(), [](const Edge*) { return true; }, cleanup); } DexPosition* ControlFlowGraph::get_dbg_pos(const cfg::InstructionIterator& it) { always_assert(&it.cfg() == this); auto search_block = [](Block* b, IRList::iterator in_block_it) -> DexPosition* { // Search for an MFLOW_POSITION preceding this instruction within the // same block while (in_block_it->type != MFLOW_POSITION && in_block_it != b->begin()) { --in_block_it; } return in_block_it->type == MFLOW_POSITION ? in_block_it->pos.get() : nullptr; }; auto result = search_block(it.block(), it.unwrap()); if (result != nullptr) { return result; } // TODO: Positions should be connected to instructions rather than preceding // them in the flow of instructions. Having the positions depend on the order // of instructions is a very linear way to encode the information which isn't // very amenable to the editable CFG. // while there's a single predecessor, follow that edge std::unordered_set<Block*> visited; std::function<DexPosition*(Block*)> check_prev_block; check_prev_block = [this, &visited, &check_prev_block, &search_block](Block* b) -> DexPosition* { // Check for an infinite loop const auto& pair = visited.insert(b); bool already_there = !pair.second; if (already_there) { return nullptr; } const auto& reverse_gotos = this->get_pred_edges_of_type(b, EDGE_GOTO); if (b->preds().size() == 1 && !reverse_gotos.empty()) { Block* prev_block = reverse_gotos[0]->src(); if (!prev_block->empty()) { auto result = search_block(prev_block, std::prev(prev_block->end())); if (result != nullptr) { return result; } } // Didn't find any MFLOW_POSITIONs in `prev_block`, keep going. return check_prev_block(prev_block); } // This block has no solo predecessors anymore. Nowhere left to search. return nullptr; }; return check_prev_block(it.block()); } } // namespace cfg namespace { template <typename InternalIterator> class IteratorMapper { public: using difference_type = IROpcode; using value_type = IROpcode; using pointer = IROpcode*; using reference = IROpcode&; using iterator_category = std::random_access_iterator_tag; explicit IteratorMapper(const InternalIterator& it) : m_internal_it(it) {} IteratorMapper(const IteratorMapper<InternalIterator>& other) : m_internal_it(other.m_internal_it) {} ~IteratorMapper() {} IteratorMapper& operator=(const IteratorMapper<InternalIterator>& other) { m_internal_it = other.m_internal_it; return *this; } IteratorMapper& operator++() { m_internal_it++; return *this; } bool operator!=(const IteratorMapper<InternalIterator>& other) { return m_internal_it != other.m_internal_it; } IROpcode& operator*() { m_opcode = m_internal_it->insn->opcode(); return m_opcode; } private: InternalIterator m_internal_it; IROpcode m_opcode; }; } // namespace namespace cfg { std::size_t ControlFlowGraph::opcode_hash() const { auto ii = cfg::ConstInstructionIterable(*this); return boost::hash_range(IteratorMapper(ii.begin()), IteratorMapper(ii.end())); } } // namespace cfg

libredex/ControlFlow.cpp (2,325 lines of code) (raw):