libredex/ControlFlow.h

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #pragma once #include <boost/dynamic_bitset.hpp> #include <boost/optional/optional.hpp> #include <boost/range/sub_range.hpp> #include <type_traits> #include <unordered_set> #include <utility> #include <vector> #include "DexPosition.h" #include "IRCode.h" #include "SingletonIterable.h" #include "WeakTopologicalOrdering.h" /** * A Control Flow Graph is a directed graph of Basic Blocks. * * Each `Block` has some number of successors and predecessors. `Block`s are * connected to their predecessors and successors by `Edge`s that specify * the type of connection. * * EDITABLE MODE: * Right now there are two types of CFGs. Editable and non-editable: * A non editable CFG's blocks have begin and end pointers into the big linear * IRList inside IRCode. * An editable CFG's blocks each own a small IRList (with MethodItemEntries * taken from IRCode) * * Editable mode is the new version of the CFG. In the future, it will replace * IRCode entirely as the primary code representation. To build an editable CFG, * call * * `code->build_cfg(true)` * * The editable CFG takes the MethodItemEntries from the IRCode object and moves * them into the blocks. The editable CFG steals the code out of the IRCode * object. After you've built the CFG in editable mode, you should use the CFG, * not the IRCode. The IRCode is empty while the editable CFG exists. * * You can make all sorts of changes to the CFG and when * you're done, move it all back into an IRCode object with * * `code->clear_cfg()` * * It is easier to maintain the data structure when there is no unnecessary * information duplication. Therefore, MFLOW_TARGETs, OPCODE_GOTOs, MFLOW_TRYs, * and MFLOW_CATCHes are deleted and their information is moved to the edges of * the CFG. * * TODO: Add useful CFG editing methods * TODO: phase out edits to the IRCode and move them all to the editable CFG * TODO: remove non-editable CFG option * * TODO?: make MethodItemEntry's fields private? */ namespace source_blocks { namespace impl { struct BlockAccessor; } // namespace impl } // namespace source_blocks namespace cfg { enum EdgeType : uint8_t { // The false branch of an if statement, default of a switch, or unconditional // goto EDGE_GOTO, // The true branch of an if statement or non-default case of a switch EDGE_BRANCH, // The edges to a catch block EDGE_THROW, // A "fake" edge so that we can have a single exit block EDGE_GHOST, EDGE_TYPE_SIZE }; class Block; class ControlFlowGraph; class CFGInliner; namespace details { // To avoid "Show.h" in the header. std::string show_cfg(const ControlFlowGraph& cfg); std::string show_insn(const IRInstruction* insn); } // namespace details struct ThrowInfo { // nullptr means catch all DexType* catch_type; // Index from the linked list of `CatchEntry`s in the IRCode. An index of 0 // denotes the "primary" catch. At runtime, the primary catch is the first // block to be checked for equality with the runtime exception type, then onto // index 1, etc. uint32_t index; ThrowInfo(DexType* catch_type, uint32_t index) : catch_type(catch_type), index(index) {} bool operator==(const ThrowInfo& other) { return catch_type == other.catch_type && index == other.index; } }; class Edge final { public: using CaseKey = int32_t; using MaybeCaseKey = boost::optional<CaseKey>; private: Block* m_src; Block* m_target; union { // If `m_type` is EDGE_THROW then this union points to a ThrowInfo // Edge owns this ThrowInfo and is responsible for deleting it. ThrowInfo* m_throw_info; // If `m_type` is not EDGE_THROW then this union is an optional case key. // If this edge is a non-default outgoing edge of a OPCODE_SWITCH, then // this is not `boost::none`. MaybeCaseKey m_case_key; }; EdgeType m_type; public: Edge(Block* src, Block* target, EdgeType type) : m_src(src), m_target(target), m_case_key(boost::none), m_type(type) { always_assert_log(m_type != EDGE_THROW, "Need a catch type and index to create a THROW edge"); } Edge(Block* src, Block* target, CaseKey case_key) : m_src(src), m_target(target), m_case_key(case_key), m_type(EDGE_BRANCH) {} Edge(Block* src, Block* target, DexType* catch_type, uint32_t index) : m_src(src), m_target(target), m_throw_info(new ThrowInfo(catch_type, index)), m_type(EDGE_THROW) {} /* * Copy constructor. * Notice that this shallowly copies the block pointers! */ Edge(const Edge& e) : m_src(e.m_src), m_target(e.m_target), m_type(e.m_type) { if (m_type == EDGE_THROW) { m_throw_info = new ThrowInfo(*e.throw_info()); } else { m_case_key = e.case_key(); } } ~Edge() { if (m_type == EDGE_THROW) { delete m_throw_info; } } bool operator==(const Edge& that) const { return m_src == that.m_src && m_target == that.m_target && equals_ignore_source_and_target(that); } bool operator!=(const Edge& that) const { return !(*this == that); } bool equals_ignore_source(const Edge& that) const { return m_target == that.m_target && equals_ignore_source_and_target(that); } bool equals_ignore_target(const Edge& that) const { return m_src == that.m_src && equals_ignore_source_and_target(that); } bool equals_ignore_source_and_target(const Edge& that) const { if (m_type != that.m_type) { return false; } else if (m_type == EDGE_THROW) { return (throw_info() == nullptr && that.throw_info() == nullptr) || *throw_info() == *that.throw_info(); } else { return case_key() == that.case_key(); } } // getters Block* src() const { return m_src; } Block* target() const { return m_target; } EdgeType type() const { return m_type; } ThrowInfo* throw_info() const { always_assert(m_type == EDGE_THROW); return m_throw_info; } const MaybeCaseKey& case_key() const { always_assert(m_type != EDGE_THROW); return m_case_key; } // setters void set_case_key(const MaybeCaseKey& k) { always_assert(m_type != EDGE_THROW); m_case_key = k; } void set_src(Block* b) { m_src = b; } void set_target(Block* b) { m_target = b; } void set_type(EdgeType new_type) { always_assert_log(!((m_type == EDGE_THROW) ^ (new_type == EDGE_THROW)), "Can't convert to or from throw edge"); m_type = new_type; } }; std::ostream& operator<<(std::ostream& os, const Edge& e); using BlockId = size_t; template <bool is_const> class InstructionIteratorImpl; using InstructionIterator = InstructionIteratorImpl</* is_const */ false>; using ConstInstructionIterator = InstructionIteratorImpl</* is_const */ true>; template <bool is_const> class InstructionIterableImpl; using InstructionIterable = InstructionIterableImpl</* is_const */ false>; using ConstInstructionIterable = InstructionIterableImpl</* is_const */ true>; // A piece of "straight-line" code. Targets are only at the beginning of a block // and branches (throws, gotos, switches, etc) are only at the end of a block. class Block final { public: explicit Block(ControlFlowGraph* parent, BlockId id) : m_id(id), m_parent(parent) {} ~Block() { m_entries.clear_and_dispose(); } // This is different from the destructor. It also frees MethodItemEntry // payload that is not deleted on MIE deletion. void free(); // copy constructor Block(const Block& b, MethodItemEntryCloner* cloner); BlockId id() const { return m_id; } ControlFlowGraph& cfg() const { always_assert(m_parent != nullptr); return *m_parent; } const std::vector<Edge*>& preds() const { return m_preds; } const std::vector<Edge*>& succs() const { return m_succs; } bool operator<(const Block& other) const { return this->id() < other.id(); } // return true if `b` is a predecessor of this. // optionally supply a specific type of predecessor. The default, // EDGE_TYPE_SIZE, means any type bool has_pred(Block* b, EdgeType t = EDGE_TYPE_SIZE) const; // return true if `b` is a successor of this. // optionally supply a specific type of successor. The default, // EDGE_TYPE_SIZE, means any type bool has_succ(Block* b, EdgeType t = EDGE_TYPE_SIZE) const; IRList::iterator begin(); IRList::iterator end(); IRList::const_iterator begin() const; IRList::const_iterator end() const; IRList::reverse_iterator rbegin() { return IRList::reverse_iterator(end()); } IRList::reverse_iterator rend() { return IRList::reverse_iterator(begin()); } bool is_catch() const; bool same_try(const Block* other) const; // Any removed instruction will be freed when the cfg is destroyed. void remove_insn(const InstructionIterator& it); // Any removed instruction will be freed when the cfg is destroyed. void remove_insn(const ir_list::InstructionIterator& it); // Will remove the first entry after it containing an MFLOW_OPCODE, // leaving the intervening instructions unmodified. If a non-MFLOW_OPCODE // instruction is to be removed, remove_mie should be used instead. // Any removed instruction will be freed when the cfg is destroyed. void remove_insn(const IRList::iterator& it); // Any removed instruction will be freed when the cfg is destroyed. void remove_mie(const IRList::iterator& it); // Removes a subset of MFLOW_DEBUG instructions from the block. valid_regs // is an accumulator set of registers used by either DBG_START_LOCAL // or DBG_START_LOCAL_EXTENDED. The DBG_END_LOCAL and DBG_RESTART_LOCAL // instructions are erased, unless valid_regs contains the registers they use. // Note: When iterating (WTO order) over the blocks of a CFG, if this method // is to be applied for each block, then the valid_regs accumulator should be // sequentially passed to each of the blocks to incorporate the "global" // information of the CFG. That is, if a block is an ancestor of another, // then the valid registers that the ancestor block defines should be // acknowledged by the descendant block. void cleanup_debug(std::unordered_set<reg_t>& valid_regs); opcode::Branchingness branchingness(); // returns true if there are no MethodItemEntries (not IRInstructions) bool empty() const { return m_entries.empty(); } uint32_t num_opcodes() const; uint32_t sum_opcode_sizes() const; // return an iterator to the last MFLOW_OPCODE, or end() if there are none IRList::iterator get_last_insn(); // return an iterator to the first MFLOW_OPCODE, or end() if there are none IRList::iterator get_first_insn(); // return an iterator to the first non-param-loading MFLOW_OPCODE, or end() if // there are none. IRList::iterator get_first_non_param_loading_insn(); // return an iterator to the last param-loading MFLOW_OPCODE, or end() if // there are none. IRList::iterator get_last_param_loading_insn(); // return an iterator to the first instruction (except move-result* and goto) // if it occurs before the first position, or end() if there are none. IRList::iterator get_first_insn_before_position(); // including move-result-pseudo bool starts_with_move_result(); bool starts_with_move_exception(); bool contains_opcode(IROpcode opcode); // returns true iff the block starts with the same MethodItemEntries as the // other block. bool begins_with(Block* other); // If this block has a single outgoing edge and it is a goto, return its // target. Otherwise, return nullptr Block* goes_to_only_edge() const; // If this block has an outgoing goto edge, return its target. // Otherwise, return nullptr Block* goes_to() const; // If this block contains no instructions that can throw. bool cannot_throw() const; // TODO?: Should we just always store the throws in index order? std::vector<Edge*> get_outgoing_throws_in_order() const; // These assume that the iterator is inside this block InstructionIterator to_cfg_instruction_iterator( const ir_list::InstructionIterator& list_it, bool next_on_end = false); InstructionIterator to_cfg_instruction_iterator( const IRList::iterator& list_it, bool next_on_end = false); InstructionIterator to_cfg_instruction_iterator(MethodItemEntry& mie); // These forward to implementations in ControlFlowGraph, See comment there template <class ForwardIt> bool insert_before(const InstructionIterator& position, const ForwardIt& begin, const ForwardIt& end); bool insert_before(const InstructionIterator& position, const std::vector<IRInstruction*>& insns); bool insert_before(const InstructionIterator& position, IRInstruction* insn); template <class ForwardIt> bool insert_after(const InstructionIterator& position, const ForwardIt& begin, const ForwardIt& end); bool insert_after(const InstructionIterator& position, const std::vector<IRInstruction*>& insns); bool insert_after(const InstructionIterator& position, IRInstruction* insn); template <class ForwardIt> bool push_front(const ForwardIt& begin, const ForwardIt& end); bool push_front(const std::vector<IRInstruction*>& insns); bool push_front(IRInstruction* insn); template <class ForwardIt> bool push_back(const ForwardIt& begin, const ForwardIt& end); bool push_back(const std::vector<IRInstruction*>& insns); bool push_back(IRInstruction* insn); void insert_before(const IRList::iterator& it, std::unique_ptr<SourceBlock> sb); void insert_after(const IRList::iterator& it, std::unique_ptr<SourceBlock> sb); bool structural_equals(const Block* other) const; private: friend class ControlFlowGraph; friend class CFGInliner; friend class InstructionIteratorImpl<false>; friend class InstructionIteratorImpl<true>; friend struct ::source_blocks::impl::BlockAccessor; // return an iterator to the conditional branch (including switch) in this // block. If there is no such instruction, return end() IRList::iterator get_conditional_branch(); BlockId m_id; // MethodItemEntries get moved from IRCode into here (if m_editable) // otherwise, this is empty. IRList m_entries; // TODO delete these // These refer into the IRCode IRList // These are only used in non-editable mode. IRList::iterator m_begin; IRList::iterator m_end; std::vector<Edge*> m_preds; std::vector<Edge*> m_succs; // the graph that this block belongs to ControlFlowGraph* m_parent = nullptr; }; struct DominatorInfo { Block* dom; size_t postorder; }; using BlockChain = std::vector<Block*>; struct LinearizationStrategy { virtual ~LinearizationStrategy() {} virtual std::vector<Block*> order( cfg::ControlFlowGraph& cfg, sparta::WeakTopologicalOrdering<BlockChain*> wto) = 0; }; class ControlFlowGraph { public: static bool DEBUG; ControlFlowGraph() = default; ControlFlowGraph(const ControlFlowGraph&) = delete; /* * if editable is false, changes to the CFG aren't reflected in the output dex * instructions. */ ControlFlowGraph(IRList* ir, reg_t registers_size, bool editable = true); ~ControlFlowGraph(); /* * convert from the graph representation to a list of MethodItemEntries * Using custom_strategy allows custom order linearization of the CFG. */ IRList* linearize( const std::unique_ptr<LinearizationStrategy>& custom_strategy = nullptr); // Return the blocks of this CFG in an arbitrary order. // // NOTE: this function copies pointers to blocks from m_blocks. // If a block is created or destroyed while we're iterating on a copy, the // copy is now stale. That stale copy may have a pointer to a deleted block or // it may be incomplete (not iterating over the newly creating block). // // TODO: We should probably have an API to offer iterators into the blocks map // instead for reads or some mutations since insertion and erasure of elements // stored in std::map will not invalidate the iterators referencing other // elements. std::vector<Block*> blocks() const; // Return vector of blocks in reverse post order (RPO). If there is a path // from Block A to Block B, then A appears before B in this vector. // // // DEPRECATED: Use graph::postorder_sort instead, which is faster. The only // functional difference is that the new version doesn't include unreachable // blocks in the sorted output. std::vector<Block*> blocks_reverse_post_deprecated() const; Block* create_block(); // Create a new block (with a unique ID) that has a copy of the code inside // `original` The edges are not copied. The new block has no incoming or // outgoing edges Block* duplicate_block(Block* original); Block* entry_block() const { return m_entry_block; } Block* exit_block() const { return m_exit_block; } void set_entry_block(Block* b) { m_entry_block = b; } void set_exit_block(Block* b) { m_exit_block = b; } void reset_exit_block(); /* * If there is a single method exit point, this returns a vector holding the * exit block. If there are multiple method exit points, this returns a vector * of them, ignoring the "ghost" exit block introduced by * `calculate_exit_block()`. */ std::vector<Block*> real_exit_blocks(bool include_infinite_loops = false); std::vector<Block*> return_blocks() const; /* * Determine where the exit block is. If there is more than one, create a * "ghost" block that is the successor to all of them. * * The exit blocks are not computed upon creation. It is left up to the user * to call this method if they plan to use the exit block. If you make * significant changes to this CFG in editable mode that effect the exit * points of the method, you need to call this method again. * TODO: detect changes and recompute when necessary. */ void calculate_exit_block(); // args are arguments to an Edge constructor template <class... Args> void add_edge(Args&&... args) { add_edge(new Edge(std::forward<Args>(args)...)); } void add_edge(Edge* e) { m_edges.insert(e); e->src()->m_succs.emplace_back(e); e->target()->m_preds.emplace_back(e); } // copies all edges from one block to another void copy_succ_edges(Block* from, Block* to); // copies all edges of a certain type from one block to another void copy_succ_edges_of_type(Block* from, Block* to, EdgeType type); // copes all edges that match the predicate from one block to another template <typename EdgePredicate> void copy_succ_edges_if(Block* from, Block* to, EdgePredicate edge_predicate); using EdgeSet = std::unordered_set<Edge*>; // Make `e` point to a new target block. // The source block is unchanged. void set_edge_target(Edge* e, Block* new_target); // Make `e` come from a new source block // The target block is unchanged. void set_edge_source(Edge* e, Block* new_source); // return the first edge for which predicate returns true // or nullptr if no such edge exists template <typename EdgePredicate> Edge* get_pred_edge_if(const Block* block, EdgePredicate predicate) const { for (Edge* e : block->preds()) { if (predicate(e)) { return e; } } return nullptr; } template <typename EdgePredicate> Edge* get_succ_edge_if(const Block* block, EdgePredicate predicate) const { for (Edge* e : block->succs()) { if (predicate(e)) { return e; } } return nullptr; } // return all edges for which predicate returns true template <typename EdgePredicate> std::vector<Edge*> get_pred_edges_if(const Block* block, EdgePredicate predicate) const { const auto& preds = block->preds(); std::vector<Edge*> result; for (Edge* e : preds) { if (predicate(e)) { result.push_back(e); } } return result; } template <typename EdgePredicate> std::vector<Edge*> get_succ_edges_if(const Block* block, EdgePredicate predicate) const { const auto& succs = block->succs(); std::vector<Edge*> result; for (Edge* e : succs) { if (predicate(e)) { result.push_back(e); } } return result; } // return the first edge of the given type // or nullptr if no such edge exists Edge* get_pred_edge_of_type(const Block* block, EdgeType type) const; Edge* get_succ_edge_of_type(const Block* block, EdgeType type) const; // return all edges of the given type std::vector<Edge*> get_pred_edges_of_type(const Block* block, EdgeType type) const; std::vector<Edge*> get_succ_edges_of_type(const Block* block, EdgeType type) const; // delete_..._edge: // * These functions remove edges from the graph and free the memory // * the `_if` functions take a predicate to decide which edges to delete void delete_edge(Edge* edge); void delete_succ_edges(Block* b); void delete_pred_edges(Block* b); void delete_edges_between(Block* p, Block* s); template <class ForwardIt> void delete_edges(const ForwardIt& begin, const ForwardIt& end) { std::unordered_set<cfg::Edge*> edges; std::unordered_set<cfg::Block*> srcs; for (auto it = begin; it != end; it++) { auto e = *it; edges.insert(e); srcs.insert(e->src()); } delete_succ_edge_if(srcs.begin(), srcs.end(), [&](Edge* e) { return edges.count(e); }); } template <typename EdgePredicate> void delete_edge_if(Block* source, Block* target, EdgePredicate predicate) { free_edges(remove_edge_if(source, target, std::move(predicate))); } template <typename EdgePredicate> void delete_succ_edge_if(cfg::Block* b, EdgePredicate predicate) { singleton_iterable<Block*> iterable(b); delete_succ_edge_if(iterable.begin(), iterable.end(), std::move(predicate)); } template <class ForwardIt, typename EdgePredicate> void delete_succ_edge_if(const ForwardIt& begin, const ForwardIt& end, EdgePredicate predicate) { free_edges(remove_succ_edge_if(begin, end, std::move(predicate))); } template <typename EdgePredicate> void delete_pred_edge_if(cfg::Block* b, EdgePredicate predicate) { singleton_iterable<Block*> iterable(b); delete_pred_edge_if(iterable.begin(), iterable.end(), std::move(predicate)); } template <class ForwardIt, typename EdgePredicate> void delete_pred_edge_if(const ForwardIt& begin, const ForwardIt& end, EdgePredicate predicate) { free_edges(remove_pred_edge_if(begin, end, std::move(predicate))); } bool blocks_are_in_same_try(const Block* b1, const Block* b2) const; /* * Split this block into two blocks. After this call, `it` will be the last * instruction in the predecessor block. * * The existing block will become the predecessor. All code after `it` will be * moved into the new block (the successor). Return the (new) successor. */ Block* split_block(const cfg::InstructionIterator& it); Block* split_block(Block* block, const IRList::iterator& it); // Same as above, splits so that `it` will be the first instruction in the // successor block. The new block is the predecessor and will be returned. // // Note: Incoming edges are changed. Outgoing edges of type EDGE_THROW are // duplicated. Block* split_block_before(const cfg::InstructionIterator& it); Block* split_block_before(Block* block, const IRList::iterator& it); // Merge `succ` into `pred` and delete `succ` // // `pred` must be the only predecessor of `succ` // `succ` must be the only successor of `pred` // `pred` and `succ` must be in the same try region void merge_blocks(Block* pred, Block* succ); // remove the IRInstruction that `it` points to. // // If `it` points to a branch instruction, remove the corresponding outgoing // edges. // // Any removed instruction will be freed when the cfg is destroyed. void remove_insn(const InstructionIterator& it); void insert_before(const InstructionIterator& it, std::unique_ptr<DexPosition> pos); void insert_after(const InstructionIterator& it, std::unique_ptr<DexPosition> pos); void insert_before(Block* block, const IRList::iterator& it, std::unique_ptr<DexPosition> pos); void insert_after(Block* block, const IRList::iterator& it, std::unique_ptr<DexPosition> pos); void insert_before(const InstructionIterator& it, std::unique_ptr<SourceBlock> sb); void insert_after(const InstructionIterator& it, std::unique_ptr<SourceBlock> sb); // Insertion Methods (insert_before/after and push_front/back): // * These methods add instructions to the CFG // * They do not add branch (if-*, switch-*) instructions to the cfg (use // create_branch for that) // * If the inserted instruction requires a block boundary after it, the // block will be split, instructions will be moved to the next // (non-exceptional) block and the next insertion from `insns` will also // occur in the next block. This invalidates iterators into the CFG. // * If the inserted instruction could end the method (return-* or throw), // then instructions after the insertion point will be removed and // successor edges will be removed from the block. When inserting a return // or throw, it must be the last in `insns`. This invalidates // iterators into the CFG. // // insert insns before position // return a boolean: // true means that iterators into the CFG are now invalid // false means that iterators are still valid bool insert_before(const InstructionIterator& position, const std::vector<IRInstruction*>& insns); // The iterator variants support iterators of the following types: // * IRInstruction* // * std::unique_ptr<SourceBlock> // * std::unique_ptr<DexPosition> // * InsertVariant, std::variant of the previous types using InsertVariant = std::variant<IRInstruction*, std::unique_ptr<SourceBlock>, std::unique_ptr<DexPosition>>; template <class ForwardIt> bool insert_before(const InstructionIterator& position, const ForwardIt& begin, const ForwardIt& end) { return insert(position, begin, end, /* before */ true); } // insert insns after position bool insert_after(const InstructionIterator& position, const std::vector<IRInstruction*>& insns); template <class ForwardIt> bool insert_after(const InstructionIterator& position, const ForwardIt& begin, const ForwardIt& end) { return insert(position, begin, end, /* before */ false); } // insert insns at the beginning of block b bool push_front(Block* b, const std::vector<IRInstruction*>& insns); template <class ForwardIt> bool push_front(Block* b, const ForwardIt& begin, const ForwardIt& end); // insert insns at the end of block b bool push_back(Block* b, const std::vector<IRInstruction*>& insns); template <class ForwardIt> bool push_back(Block* b, const ForwardIt& begin, const ForwardIt& end); // Convenience functions that add just one instruction. bool insert_before(const InstructionIterator& position, IRInstruction* insn); bool insert_after(const InstructionIterator& position, IRInstruction* insn); bool push_front(Block* b, IRInstruction* insn); bool push_back(Block* b, IRInstruction* insn); // Replace one IRInstruction with some number of new instructions // * None of the new instructions can be an if or goto // * Returns a boolean indicating whether or not InstructionIterators were // invalidated (see insertion methods for more details) // * The throw edges of the block that contains `it` are copied and used as // the throw edges of any new `may_throw` instructions // * If the old instruction has a move-result(-pseudo) it will also be // removed. When adding instructions that may-throw, you should include // move-result(-pseudo)s for them. // Any removed instruction will be freed when the cfg is destroyed. bool replace_insn(const InstructionIterator& it, IRInstruction* insn); template <class ForwardIt> bool replace_insns(const InstructionIterator& it, const ForwardIt& begin, const ForwardIt& end); bool replace_insns(const InstructionIterator& it, const std::vector<IRInstruction*>& insns); // Create a conditional branch, which consists of: // * inserting an `if` instruction at the end of b // * Possibly add an EDGE_GOTO to the false block (`fls` may be null if b // already has a goto leaving it) // * add an EDGE_BRANCH to the true block void create_branch(Block* b, IRInstruction* insn, Block* fls, Block* tru); // Create an `if` or `switch`, which consists of: // * insert an `if` or `switch` instruction at the end of b // * Possibly add an EDGE_GOTO to the default block (`goto_block` may be null // if b already has a goto leaving it) // * add EDGE_BRANCHes to the other blocks based on the `case_to_block` // vector. void create_branch( Block* b, IRInstruction* insn, Block* goto_block, const std::vector<std::pair<int32_t, Block*>>& case_to_block); // delete old blocks and reroute its predecessors to new blocks // Returns number of removed instructions. // May reset exit_block if it is replaced. uint32_t replace_blocks( const std::vector<std::pair<Block*, Block*>>& old_new_blocks); // delete old_block and reroute its predecessors to new_block // Note that replacing blocks is relatively expensive as it scans and fixes up // dangling parent positions in all other blocks; consider calling // remove_blocks to remove multiple blocks at once. // May reset exit_block if it is replaced. // Returns number of removed instructions. uint32_t replace_block(Block* old_block, Block* new_block) { return replace_blocks({{old_block, new_block}}); } // Remove blocks from the graph and release associated memory. // Remove all incoming and outgoing edges. // May reset exit_block if it is removed. // Returns number of removed instructions. uint32_t remove_blocks(const std::vector<Block*>& blocks); // Remove this block from the graph and release associated memory. // Remove all incoming and outgoing edges. // Note that removing blocks is relatively expensive as it scans and fixes up // dangling parent positions in all other blocks; consider calling // remove_blocks to remove multiple blocks at once. // May reset exit_block if it is removed. // Returns number of removed instructions. uint32_t remove_block(Block* block) { return remove_blocks({block}); } /* * Print the graph in the DOT graph description language. */ std::ostream& write_dot_format(std::ostream&) const; // Do writes to this CFG propagate back to IR and Dex code? bool editable() const { return m_editable; } size_t num_blocks() const { return m_blocks.size(); } size_t num_edges() const { return m_edges.size(); } /* * Traverse the graph, starting from the entry node. Return a bitset with IDs * of reachable blocks having 1 and IDs of unreachable blocks (or unused IDs) * having 0. */ boost::dynamic_bitset<> visit() const; cfg::Block* get_block(BlockId id) const { return m_blocks.at(id); } // Returns the block with the highest block id. cfg::Block* get_last_block() const { const auto& rbegin = m_blocks.rbegin(); return rbegin == m_blocks.rend() ? nullptr : rbegin->second; } // remove blocks with no predecessors // returns pair of 1) the number of instructions removed, and 2) whether an // instruction with the destination of the last register was removed, and thus // a call to recompute_registers_size might be beneficial. std::pair<uint32_t, bool> remove_unreachable_blocks(); // transform the CFG to an equivalent but more canonical state // Assumes m_editable is true // returns the number of instructions removed uint32_t simplify(); // SIGABORT if the internal state of the CFG is invalid void sanity_check() const; // SIGABORT if there are dangling parent pointers to deleted DexPositions void no_dangling_dex_positions() const; uint32_t num_opcodes() const; uint32_t sum_opcode_sizes() const; reg_t allocate_temp() { return m_registers_size++; } reg_t allocate_wide_temp() { reg_t new_reg = m_registers_size; m_registers_size += 2; return new_reg; } reg_t get_registers_size() const { return m_registers_size; } void set_registers_size(reg_t sz) { m_registers_size = sz; } // Find the highest register in use and set m_registers_size // // Call this function after removing instructions that may have been the only // use of the highest numbered register, or any other significant changes to // the instructions. void recompute_registers_size(); // Only used in editable cfg. \returns the first block that has instructions // if there is any. Otherwise, \returns null. Block* get_first_block_with_insns() const; // by default, start at the entry block boost::sub_range<IRList> get_param_instructions() const; void gather_catch_types(std::vector<DexType*>& types) const; void gather_strings(std::vector<const DexString*>& strings) const; void gather_types(std::vector<DexType*>& types) const; void gather_init_classes(std::vector<DexType*>& types) const; void gather_fields(std::vector<DexFieldRef*>& fields) const; void gather_methods(std::vector<DexMethodRef*>& methods) const; void gather_callsites(std::vector<DexCallSite*>& callsites) const; void gather_methodhandles(std::vector<DexMethodHandle*>& methodhandles) const; cfg::InstructionIterator primary_instruction_of_move_result( const cfg::InstructionIterator& it); cfg::InstructionIterator move_result_of(const cfg::InstructionIterator& it); /* * Gets the next instruction, following gotos if the end of blocks are * reached. In case of an infinite loop, `InstructionIterable(*this).end()` is * returned. */ cfg::InstructionIterator next_following_gotos( const cfg::InstructionIterator& it); /* * clear and fill `new_cfg` with a copy of `this`. Copies of all instructions * will be made, and are owned by the caller. Consider calling * set_insn_ownership on the new cfg to have it own the instructions. */ void deep_copy(ControlFlowGraph* new_cfg) const; /* * Set whether this cfg holds the memory ownership of the contained * instructions, deleting them when the cfg is destroyed. (The default is * false.) */ void set_insn_ownership(bool owns_insns) { m_owns_insns = owns_insns; } /* * Set whether this cfg holds the memory ownership of instructions that are * removed. (The default is true.) */ void set_removed_insn_ownerhsip(bool owns_removed_insns) { m_owns_removed_insns = owns_removed_insns; } // Search all the instructions in this CFG for the given one. Return an // iterator to it, or end, if it isn't in the graph. InstructionIterator find_insn(IRInstruction* insn, Block* hint = nullptr); ConstInstructionIterator find_insn(IRInstruction* insn, Block* hint = nullptr) const; // choose an order of blocks for output std::vector<Block*> order( const std::unique_ptr<LinearizationStrategy>& custom_strategy = nullptr); /* * Find the first debug position preceding an instruction */ DexPosition* get_dbg_pos(const cfg::InstructionIterator& it); std::size_t opcode_hash() const; std::vector<IRInstruction*> release_removed_instructions() { return std::move(m_removed_insns); } private: friend class Block; using BranchToTargets = std::unordered_map<MethodItemEntry*, std::vector<std::pair<Block*, MethodItemEntry*>>>; using TryEnds = std::vector<std::pair<TryEntry*, Block*>>; using TryCatches = std::unordered_map<CatchEntry*, Block*>; using Blocks = std::map<BlockId, Block*>; friend class InstructionIteratorImpl<false>; friend class InstructionIteratorImpl<true>; friend class CFGInliner; // Find block boundaries in IRCode and create the blocks // For use by the constructor. You probably don't want to call this from // elsewhere void find_block_boundaries(IRList* ir, BranchToTargets& branch_to_targets, TryEnds& try_ends, TryCatches& try_catches); // Add edges between blocks created by `find_block_boundaries` // For use by the constructor. You probably don't want to call this from // elsewhere void connect_blocks(BranchToTargets& branch_to_targets); // Add edges from try blocks to their catch handlers. // For use by the constructor. You probably don't want to call this from // elsewhere void add_catch_edges(TryEnds& try_ends, TryCatches& try_catches); // For use by the constructor. You probably don't want to call this from // elsewhere void remove_unreachable_succ_edges(); // remove all MFLOW_TRY and MFLOW_CATCH markers because that information is // moved into the edges. // Assumes m_editable is true void remove_try_catch_markers(); // helper functions void build_chains(std::vector<std::unique_ptr<BlockChain>>* chains, std::unordered_map<Block*, BlockChain*>* block_to_chain); sparta::WeakTopologicalOrdering<BlockChain*> build_wto( const std::unordered_map<Block*, BlockChain*>& block_to_chain); std::vector<Block*> wto_chains( sparta::WeakTopologicalOrdering<BlockChain*> wto); // Materialize target instructions and gotos corresponding to control-flow // edges. Used while turning back into a linear representation. void insert_branches_and_targets(const std::vector<Block*>& ordering); // Create MFLOW_CATCH entries for each EDGE_THROW. returns the primary // MFLOW_CATCH (first element in linked list of CatchEntries) on a block that // can throw. returns nullptr on a block without outgoing throw edges. This // function is used while turning back into a linear representation. // // Example: For a block with outgoing edges // // Edge(block, catch_block_1, FooException, 1) // Edge(block, catch_block_2, BarException, 2) // Edge(block, catch_block_3, BazException, 3) // // This generates CatchEntries (linked list) // // CatchEntry(FooException) -> // CatchEntry(BarException) -> // CatchEntry(BazException) MethodItemEntry* create_catch( Block* block, std::unordered_map<MethodItemEntry*, Block*>* catch_to_containing_block); // Materialize TRY_STARTs, TRY_ENDs, and MFLOW_CATCHes // Used while turning back into a linear representation. void insert_try_catch_markers(const std::vector<Block*>& ordering); // remove blocks with no entries void remove_empty_blocks(); // Re-insert any parent pointer that got deleted. This is a useful // method to invoke just after removing positions to avoid leaving // behind dangling parents. void fix_dangling_parents(std::vector<std::unique_ptr<DexPosition>>); // Assert if there are edges that are never a predecessor or successor of a // block void no_unreferenced_edges() const; // Remove all edges and blocks of the CFG, free the memory and // set all fields to their defaults. // NOTE: this will result in an empty CFG, same as if the default // constructor has been called. void clear(); template <class ForwardIt> bool insert(const InstructionIterator& position, const ForwardIt& begin_index, const ForwardIt& end_index, bool before); // remove_..._edge: // * These functions remove edges from the graph. // * They do not free the memory of the edge. `free_edge` does that. // * The cleanup flag controls whether or not `cleanup_deleted_edges` is // called. See that function for more documentation. // * the `_if` functions take a predicate to decide which edges to delete // * They return which edges were removed (with the exception of // `remove_edge`) void remove_edge(Edge* edge, bool cleanup = true); EdgeSet remove_succ_edges(Block* b, bool cleanup = true); EdgeSet remove_pred_edges(Block* b, bool cleanup = true); EdgeSet remove_edges_between(Block* p, Block* s, bool cleanup = true); template <typename EdgePredicate> EdgeSet remove_edge_if(Block* source, Block* target, EdgePredicate predicate, bool cleanup = true) { auto& forward_edges = source->m_succs; EdgeSet to_remove; forward_edges.erase( std::remove_if(forward_edges.begin(), forward_edges.end(), [&target, &predicate, &to_remove](Edge* e) { if (e->target() == target && predicate(e)) { to_remove.insert(e); return true; } return false; }), forward_edges.end()); auto& reverse_edges = target->m_preds; reverse_edges.erase(std::remove_if(reverse_edges.begin(), reverse_edges.end(), [&to_remove](Edge* e) { return to_remove.count(e) > 0; }), reverse_edges.end()); if (cleanup) { cleanup_deleted_edges(to_remove); } return to_remove; } template <class ForwardIt, typename EdgePredicate> EdgeSet remove_pred_edge_if(const ForwardIt& begin, const ForwardIt& end, EdgePredicate predicate, bool cleanup = true) { std::unordered_set<Block*> source_blocks; EdgeSet to_remove; for (auto it = begin; it != end; it++) { auto& reverse_edges = (*it)->m_preds; reverse_edges.erase( std::remove_if(reverse_edges.begin(), reverse_edges.end(), [&source_blocks, &to_remove, &predicate](Edge* e) { if (predicate(e)) { source_blocks.insert(e->src()); to_remove.insert(e); return true; } return false; }), reverse_edges.end()); } for (Block* source_block : source_blocks) { auto& forward_edges = source_block->m_succs; forward_edges.erase( std::remove_if( forward_edges.begin(), forward_edges.end(), [&to_remove](Edge* e) { return to_remove.count(e) > 0; }), forward_edges.end()); } if (cleanup) { cleanup_deleted_edges(to_remove); } return to_remove; } template <class ForwardIt, typename EdgePredicate> EdgeSet remove_succ_edge_if(const ForwardIt& begin, const ForwardIt& end, EdgePredicate predicate, bool cleanup = true) { std::unordered_set<Block*> target_blocks; std::unordered_set<Edge*> to_remove; for (auto it = begin; it != end; it++) { auto& forward_edges = (*it)->m_succs; forward_edges.erase( std::remove_if(forward_edges.begin(), forward_edges.end(), [&target_blocks, &to_remove, &predicate](Edge* e) { if (predicate(e)) { target_blocks.insert(e->target()); to_remove.insert(e); return true; } return false; }), forward_edges.end()); } for (Block* target_block : target_blocks) { auto& reverse_edges = target_block->m_preds; reverse_edges.erase( std::remove_if( reverse_edges.begin(), reverse_edges.end(), [&to_remove](Edge* e) { return to_remove.count(e) > 0; }), reverse_edges.end()); } if (cleanup) { cleanup_deleted_edges(to_remove); } return to_remove; } // Assumes the edge is already removed. void free_edge(Edge* edge); // Assumes the edge is already removed. void free_edges(const EdgeSet& edges); // The `cleanup` boolean flag on the edge removal functions controls whether // or not to call this function afterwards. // * `cleanup` false means only remove the edges // * `cleanup` true means remove the edges and edit the instructions to // match the edge state. For example, delete branch/switch with only one // outgoing edge instructions. void cleanup_deleted_edges(const EdgeSet& edges); // free all allocated and owned memory of the CFG void free_all_blocks_and_edges_and_removed_insns(); // Move edge between new_source and new_target. // If either new_source or new_target is null, don't change that field of the // edge void move_edge(Edge* edge, Block* new_source, Block* new_target); reg_t compute_registers_size() const; // Return the next unused block identifier BlockId next_block_id() const; std::vector<Block*> blocks_post_helper(bool reverse) const; // The memory of all blocks and edges in this graph are owned here Blocks m_blocks; EdgeSet m_edges; IRList* m_orig_list{nullptr}; // Only set when !m_editable. Block* m_entry_block{nullptr}; Block* m_exit_block{nullptr}; reg_t m_registers_size{0}; bool m_editable{true}; bool m_owns_insns{false}; bool m_owns_removed_insns{true}; std::vector<IRInstruction*> m_removed_insns; }; // A static-method-only API for use with the monotonic fixpoint iterator. class GraphInterface { public: using Graph = ControlFlowGraph; using NodeId = Block*; using EdgeId = Edge*; static NodeId entry(const Graph& graph) { return const_cast<NodeId>(graph.entry_block()); } static NodeId exit(const Graph& graph) { return const_cast<NodeId>(graph.exit_block()); } static std::vector<EdgeId> predecessors(const Graph&, const NodeId& b) { return b->preds(); } static std::vector<EdgeId> successors(const Graph&, const NodeId& b) { return b->succs(); } static NodeId source(const Graph&, const EdgeId& e) { return e->src(); } static NodeId target(const Graph&, const EdgeId& e) { return e->target(); } }; template <bool is_const> class InstructionIteratorImpl { using Cfg = typename std:: conditional<is_const, const ControlFlowGraph, ControlFlowGraph>::type; using Mie = typename std:: conditional<is_const, const MethodItemEntry, MethodItemEntry>::type; using Iterator = typename std:: conditional<is_const, IRList::const_iterator, IRList::iterator>::type; using IRListInstructionIterable = ir_list::InstructionIterableImpl<is_const>; // Use a pointer so that we can be copy constructible Cfg* m_cfg; ControlFlowGraph::Blocks::const_iterator m_block; // Depends on C++14 Null Forward Iterators // Assuming the default constructed InstructionIterator compares equal // to other default constructed InstructionIterator // // boost.org/doc/libs/1_58_0/doc/html/container/Cpp11_conformance.html ir_list::InstructionIteratorImpl<is_const> m_it; // go to beginning of next block, skipping empty blocks void to_next_block() { while (m_block != m_cfg->m_blocks.end() && m_it.unwrap() == m_block->second->m_entries.end()) { ++m_block; if (m_block != m_cfg->m_blocks.end()) { Block* b = m_block->second; m_it = ir_list::InstructionIteratorImpl<is_const>(b->m_entries.begin(), b->m_entries.end()); } else { m_it = ir_list::InstructionIteratorImpl<is_const>(); } } } friend class ControlFlowGraph; friend class Block; InstructionIteratorImpl(Cfg& cfg, Block* b, const ir_list::InstructionIterator& it) : m_cfg(&cfg), m_block(m_cfg->m_blocks.find(b->id())), m_it(it) {} public: using reference = Mie&; using difference_type = long; using value_type = Mie&; using pointer = Mie*; using iterator_category = std::bidirectional_iterator_tag; // TODO: Is it possible to recover a valid state of iterators into the CFG // after an insertion operation? // // The goal is to maintain a valid state within the CFG at all times. If the // user wants to insert an instruction that ends the block (return, can_throw, // if, switch, etc.), the block needs to split. When you split a block into // two parts, you're moving code from one block to another. When code is moved // `InstructionIterator`s may be left in an invalid state because their `m_it` // is pointing into a different block than `m_block`. InstructionIteratorImpl() = delete; explicit InstructionIteratorImpl(Cfg& cfg, bool is_begin) : m_cfg(&cfg) { always_assert(m_cfg->editable()); if (is_begin) { m_block = m_cfg->m_blocks.begin(); if (m_block != m_cfg->m_blocks.end()) { auto iterable = IRListInstructionIterable(m_block->second); m_it = iterable.begin(); if (m_it == iterable.end()) { to_next_block(); } } } else { m_block = m_cfg->m_blocks.end(); } } InstructionIteratorImpl(const InstructionIteratorImpl<false>& rhs) : m_cfg(rhs.m_cfg), m_block(rhs.m_block), m_it(rhs.m_it) {} InstructionIteratorImpl& operator=(const InstructionIteratorImpl& other) = default; InstructionIteratorImpl<is_const>& operator++() { assert_not_end(); ++m_it; to_next_block(); return *this; } InstructionIteratorImpl<is_const> operator++(int) { auto result = *this; ++(*this); return result; } InstructionIteratorImpl<is_const>& operator--() { assert_not_begin(); // as long as we are at the beginning of the current block, keep going back // to the end of the previous block while (m_block == m_cfg->m_blocks.end() || m_it == IRListInstructionIterable(m_block->second).begin()) { m_it = IRListInstructionIterable((--m_block)->second).end(); } --m_it; return *this; } InstructionIteratorImpl<is_const> operator--(int) { auto result = *this; --(*this); return result; } reference operator*() const { assert_not_end(); return *m_it; } pointer operator->() const { return &(this->operator*()); } bool operator==(const InstructionIteratorImpl& other) const { return this->m_block == other.m_block && this->m_it == other.m_it; } bool operator!=(const InstructionIteratorImpl& other) const { return !(*this == other); } void assert_not_begin() const { if (!ControlFlowGraph::DEBUG) { return; } auto begin = InstructionIteratorImpl<is_const>(*m_cfg, true); always_assert_log(*this != begin, "%s", details::show_cfg(*m_cfg).c_str()); } void assert_not_end() const { if (!ControlFlowGraph::DEBUG) { return; } always_assert_log(m_block != m_cfg->m_blocks.end(), "%s", details::show_cfg(*m_cfg).c_str()); always_assert_log(m_it != ir_list::InstructionIteratorImpl<is_const>(), "%s", details::show_cfg(*m_cfg).c_str()); } bool is_end() const { return m_block == m_cfg->m_blocks.end() && m_it == ir_list::InstructionIteratorImpl<is_const>(); } Iterator unwrap() const { return m_it.unwrap(); } Block* block() const { assert_not_end(); return m_block->second; } Cfg& cfg() const { return *m_cfg; } template <bool kConst> friend class InstructionIteratorImpl; }; // Iterate through all IRInstructions in the CFG. // Instructions in the same block are processed in order. // Blocks are iterated in an undefined order template <bool is_const> class InstructionIterableImpl { using Cfg = typename std:: conditional<is_const, const ControlFlowGraph, ControlFlowGraph>::type; using Iterator = typename std:: conditional<is_const, IRList::const_iterator, IRList::iterator>::type; Cfg& m_cfg; public: InstructionIterableImpl() = delete; explicit InstructionIterableImpl(Cfg& cfg) : m_cfg(cfg) {} InstructionIteratorImpl<is_const> begin() { return InstructionIteratorImpl<is_const>(m_cfg, true); } InstructionIteratorImpl<true> begin() const { return InstructionIteratorImpl<true>(m_cfg, true); } InstructionIteratorImpl<is_const> end() { return InstructionIteratorImpl<is_const>(m_cfg, false); } InstructionIteratorImpl<true> end() const { return InstructionIteratorImpl<true>(m_cfg, false); } bool empty() const { return begin() == end(); } }; template <class ForwardIt> bool ControlFlowGraph::replace_insns(const InstructionIterator& it, const ForwardIt& begin, const ForwardIt& end) { always_assert(m_editable); // Save these values before we insert in case the insertion causes iterator // invalidation. auto insn_to_del = it->insn; bool invalidated = insert(it, begin, end, true /* before */); if (invalidated) { const auto& found = find_insn(insn_to_del); if (!found.is_end()) { remove_insn(found); } // If find_insn can't find `insn_to_del` it was most likely removed by // `insert`. This happens when a throw or return is added to the block // because the remaining code in the block is removed. } else { remove_insn(it); } return invalidated; } template <class ForwardIt> bool ControlFlowGraph::insert(const InstructionIterator& position, const ForwardIt& begin_index, const ForwardIt& end_index, bool before) { // Convert to the before case by moving the position forward one. Block* b = position.block(); if (position.unwrap() == b->end()) { always_assert_log(before, "can't insert after the end"); } IRList::iterator pos = before ? position.unwrap() : std::next(position.unwrap()); bool invalidated_its = false; for (auto insns_it = begin_index; insns_it != end_index; insns_it++) { // Coercing everything to a variant allows us to handle the complicated // case easily. The compiler should be able to optimize this back for // a simple type. auto v = InsertVariant(std::move(*insns_it)); if (std::holds_alternative<IRInstruction*>(v)) { IRInstruction* insn = std::get<IRInstruction*>(v); bool throws = get_succ_edge_of_type(b, EDGE_THROW) != nullptr; auto op = insn->opcode(); // Certain types of blocks cannot have instructions added to the end. // Disallow that case here. if (pos == b->end()) { auto existing_last = b->get_last_insn(); if (existing_last != b->end()) { // This will abort if someone tries to insert after a returning or // throwing instruction. auto existing_last_op = existing_last->insn->opcode(); always_assert_log( !opcode::is_branch(existing_last_op) && !opcode::is_throw(existing_last_op) && !opcode::is_a_return(existing_last_op), "Can't add instructions after %s in Block %zu in %s", details::show_insn(existing_last->insn).c_str(), b->id(), details::show_cfg(*this).c_str()); // When inserting after an instruction that may throw, we need to // start a new block. We also copy over all throw-edges. See FIXME // below for a discussion about try-regions in general. if (throws) { always_assert_log( !existing_last->insn->has_move_result_any(), "Can't add instructions after throwing instruction " "%s with move-result in Block %zu in %s", details::show_insn(existing_last->insn).c_str(), b->id(), details::show_cfg(*this).c_str()); Block* new_block = create_block(); if (opcode::may_throw(op)) { copy_succ_edges_of_type(b, new_block, EDGE_THROW); } const auto& existing_goto_edge = get_succ_edge_of_type(b, EDGE_GOTO); set_edge_source(existing_goto_edge, new_block); add_edge(b, new_block, EDGE_GOTO); // Continue inserting in the new block. b = new_block; pos = new_block->begin(); } } } always_assert_log(!opcode::is_branch(op), "insert() does not support branch opcodes. Use " "create_branch() instead"); IRList::iterator new_inserted_it = b->m_entries.insert_before(pos, insn); if (opcode::is_throw(op) || opcode::is_a_return(op)) { // Stop adding instructions when we understand that op // is the end of the block. insns_it = std::prev(end_index); std::vector<std::unique_ptr<DexPosition>> dangling; for (auto it = pos; it != b->m_entries.end();) { switch (it->type) { case MFLOW_POSITION: dangling.push_back(std::move(it->pos)); break; case MFLOW_OPCODE: m_removed_insns.push_back(it->insn); break; default: break; } it = b->m_entries.erase_and_dispose(it); invalidated_its = true; } fix_dangling_parents(std::move(dangling)); if (opcode::is_a_return(op)) { // This block now ends in a return, it must have no successors. delete_succ_edge_if( b, [](const Edge* e) { return e->type() != EDGE_GHOST; }); } else { always_assert(opcode::is_throw(op)); // The only valid way to leave this block is via a throw edge. delete_succ_edge_if(b, [](const Edge* e) { return !(e->type() == EDGE_THROW || e->type() == EDGE_GHOST); }); } // If this created unreachable blocks, they will be removed by simplify. } else if (opcode::may_throw(op) && throws) { invalidated_its = true; // FIXME: Copying the outgoing throw edges isn't enough. // When the editable CFG is constructed, we transform the try regions // into throw edges. We only add these edges to blocks that may throw, // thus losing the knowledge of which blocks were originally inside a // try region. If we add a new throwing instruction here. It may be // added to a block that was originally inside a try region, but we lost // that information already. // // Possible Solutions: // * Rework throw representation to regions instead of duplicated edges? // * User gives a block that we want to copy the throw edges from? // * User specifies which throw edges they want and to which blocks? // Split the block after the new instruction. // b has become the predecessor of the new split pair Block* succ = split_block(b->to_cfg_instruction_iterator(new_inserted_it)); if (!succ->empty()) { // Copy the outgoing throw edges of the new block back into the // original block copy_succ_edges_of_type(succ, b, EDGE_THROW); } // Continue inserting in the successor block. b = succ; pos = succ->begin(); } } else if (std::holds_alternative<std::unique_ptr<SourceBlock>>(v)) { b->m_entries.insert_before( pos, std::get<std::unique_ptr<SourceBlock>>(std::move(v))); } else if (std::holds_alternative<std::unique_ptr<DexPosition>>(v)) { b->m_entries.insert_before( pos, std::get<std::unique_ptr<DexPosition>>(std::move(v))); } else { not_reached(); } } return invalidated_its; } template <class ForwardIt> bool ControlFlowGraph::push_front(Block* b, const ForwardIt& begin, const ForwardIt& end) { const auto& block_begin = ir_list::InstructionIterable(b).begin(); return insert(b->to_cfg_instruction_iterator(block_begin), begin, end, /* before */ true); } template <class ForwardIt> bool ControlFlowGraph::push_back(Block* b, const ForwardIt& begin, const ForwardIt& end) { const auto& block_end = ir_list::InstructionIterable(b).end(); return insert(b->to_cfg_instruction_iterator(block_end), begin, end, /* before */ true); } template <class ForwardIt> bool Block::insert_before(const InstructionIterator& position, const ForwardIt& begin, const ForwardIt& end) { return m_parent->insert_before(position, begin, end); } template <class ForwardIt> bool Block::insert_after(const InstructionIterator& position, const ForwardIt& begin, const ForwardIt& end) { return m_parent->insert_after(position, begin, end); } template <class ForwardIt> bool Block::push_front(const ForwardIt& begin, const ForwardIt& end) { return m_parent->push_front(this, begin, end); } template <class ForwardIt> bool Block::push_back(const ForwardIt& begin, const ForwardIt& end) { return m_parent->push_back(this, begin, end); } } // namespace cfg inline cfg::InstructionIterable InstructionIterable( cfg::ControlFlowGraph& cfg) { return cfg::InstructionIterable(cfg); } inline cfg::ConstInstructionIterable InstructionIterable( const cfg::ControlFlowGraph& cfg) { return cfg::ConstInstructionIterable(cfg); }

libredex/ControlFlow.h (984 lines of code) (raw):