libredex/SourceBlockConsistencyCheck.h (191 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #pragma once #include <cstdint> #include <limits> #include <map> #include <set> #include <unordered_map> #include <vector> #include "ControlFlow.h" #include "Dominators.h" #include "MonotonicFixpointIterator.h" #include "Show.h" #include "SourceBlocks.h" #include "Trace.h" #pragma clang optimize off class DexMethod; class DexMethodRef; class DexString; // The SourceBlockConsistencyCheck class implements a simple consistency check // which can be run after each phase to ensure that no phase removes source // blocks in a way which is inconsistent with the source blocks' dominator // tree. // // It is off by default. It runs as part of the assessor, and may be emabled // by adding '"run_sb_consistency": true' under "assessor" properties in the // config JSON. // // At the end of InsertSourceBlocks, a SourceBlockDomInfo is created for the // current method. This consists of a dominator tree for the original source // blocks of the method (SourceBlockDomTree), along with APIs for querying // which source blocks are legally removeable from the current method, and to // actually remove the representation of a source block from the // SourceBlockDomTree. // // After each pass, source blocks which are present in each method's IR are // compared with the original set. Any which are missing, and which are not // legally removeable according to the SourceBlockDomTree, will be reported as // "missing". The set of removed source blocks is stored so that missing source // blocks are only reported just after the pass they were removed in. // // Only leaves in the source blocks' dominator tree are legally removeable // (after which, the source block can be removed from the dominator tree // itself, potentially creating new leaves). In practice, since this check // runs after an entire pass, it validates that the set of removed source // blocks could have been removed legally, but doesn't strictly validate that // they were removed in the correct order. // // Recalculation of the source blocks' dominator tree is currently never done. // This could lead to false negatives, for example: // // CFG: Dom Tree: // A A // / \ /|\ // C B B C D // \ / // D // // It's not legal to remove B and C without removing D. Recalc'ing after // removing B or C would make C or B D's immediate dominator, but without that, // removing B and C is reported as legal. However, this should only over-report // leaves, and thus shouldn't cause false positives to be reported. // // Inlining also isn't accounted for as a result. In future, the dom tree // should be recalculated at strategic points. // // Note: Under the hood, SourceBlockDomTree is really represented by a "flipped" // dominator tree, i.e. a DAG where edges symbolize "is dominated by" // relations. namespace source_blocks { struct SourceBlockInfo { const DexString* original_dex_method; uint32_t id; }; constexpr SourceBlockInfo kInvalidSBI = SourceBlockInfo{nullptr, std::numeric_limits<uint32_t>::max()}; bool operator<(const SourceBlockInfo& l, const SourceBlockInfo& r); bool operator==(const SourceBlockInfo& l, const SourceBlockInfo& r); enum class SourceBlockDomTreeKind { Dom, PostDom }; struct DomTreeNode { SourceBlockInfo imm_dom = kInvalidSBI; uint32_t in_degree = 0; }; template <SourceBlockDomTreeKind Kind> class SourceBlockDomTree { using GraphInterfaceT = std::conditional_t< Kind == SourceBlockDomTreeKind::Dom, cfg::GraphInterface, sparta::BackwardsFixpointIterationAdaptor<cfg::GraphInterface>>; public: SourceBlockDomTree() = default; SourceBlockDomTree(const cfg::ControlFlowGraph& cfg, uint32_t num_src_blks); const std::set<SourceBlockInfo>& leaves(); void remove_src_blk(const SourceBlockInfo& sb_info); private: std::map<SourceBlockInfo, DomTreeNode> m_dom_tree_nodes; std::set<SourceBlockInfo> m_leaves; }; class SourceBlockDomInfo { public: SourceBlockDomInfo() = default; SourceBlockDomInfo(const cfg::ControlFlowGraph& cfg, uint32_t num_src_blks); std::vector<SourceBlockInfo> get_removeable_src_blks(); void remove_src_blk(const SourceBlockInfo& sb_info); private: SourceBlockDomTree<SourceBlockDomTreeKind::Dom> m_dom_tree; }; struct SBConsistencyContext { std::set<SourceBlockInfo> m_source_blocks; std::set<SourceBlockInfo> m_known_missing_source_blocks; source_blocks::SourceBlockDomInfo m_sbdi; }; class SourceBlockConsistencyCheck { public: SourceBlockConsistencyCheck() = default; void initialize(const Scope& scope); bool is_initialized() const; size_t run(const Scope& scope); private: void rebuild_sbdi(DexMethod* dex_method, const cfg::ControlFlowGraph& cfg); std::unordered_map<DexMethod*, SBConsistencyContext> m_context_map; bool m_is_initialized = false; }; // inline functions inline bool SourceBlockConsistencyCheck::is_initialized() const { return m_is_initialized; } // Template implementations: template <SourceBlockDomTreeKind Kind> SourceBlockDomTree<Kind>::SourceBlockDomTree(const cfg::ControlFlowGraph& cfg, uint32_t num_src_blks) { if (cfg.blocks().empty()) { return; } auto first_block = *cfg.blocks().begin(); always_assert(first_block); auto first_sb = source_blocks::get_first_source_block(first_block); always_assert(first_sb); auto* dex_method = first_sb->src; for (uint32_t i = 0; i < num_src_blks; i++) { m_leaves.insert({dex_method, i}); } auto doms = dominators::SimpleFastDominators<GraphInterfaceT>(cfg); for (cfg::Block* block : cfg.blocks()) { if (block == cfg.exit_block() && cfg.get_pred_edge_of_type(block, EDGE_GHOST)) { continue; } source_blocks::foreach_source_block( block, [num_src_blks, dex_method, this](auto& sb) { always_assert(sb); always_assert(sb->id < num_src_blks); always_assert(sb->src == dex_method); auto sb_info = SourceBlockInfo{sb->src, sb->id}; m_dom_tree_nodes[sb_info] = {}; }); } for (cfg::Block* block : cfg.blocks()) { if (block == cfg.exit_block() && cfg.get_pred_edge_of_type(block, EDGE_GHOST)) { continue; } SourceBlock* prev = nullptr; auto first_sb_in_block = source_blocks::get_first_source_block(block); if (!first_sb_in_block) { continue; } source_blocks::foreach_source_block( block, [this, first_sb_in_block, &prev](const auto& sb) { if (sb != first_sb_in_block) { always_assert(prev); auto prev_sb_info = SourceBlockInfo{prev->src, prev->id}; auto curr_sb_info = SourceBlockInfo{sb->src, sb->id}; if constexpr (Kind == SourceBlockDomTreeKind::Dom) { m_dom_tree_nodes.at(curr_sb_info).imm_dom = prev_sb_info; m_dom_tree_nodes.at(prev_sb_info).in_degree++; m_leaves.erase(prev_sb_info); } else { m_dom_tree_nodes.at(prev_sb_info).imm_dom = curr_sb_info; m_dom_tree_nodes.at(curr_sb_info).in_degree++; m_leaves.erase(curr_sb_info); } } prev = sb; }); auto curr_idom = doms.get_idom(block); if (!curr_idom || (curr_idom == cfg.exit_block() && cfg.get_pred_edge_of_type(curr_idom, EDGE_GHOST))) { continue; } // In the idom implementation in Dominators.h, the entry block's idom is // set to itself, which is not correct according to the definition of an // idom (requires strict dominance - should be null for the entry block), // but let's not risk breaking anything else by fixing that. if (curr_idom == block) { continue; } SourceBlock* sb_in_idom = nullptr; if constexpr (Kind == SourceBlockDomTreeKind::Dom) { sb_in_idom = source_blocks::get_last_source_block(curr_idom); } else { sb_in_idom = source_blocks::get_first_source_block(curr_idom); } always_assert(sb_in_idom); always_assert(sb_in_idom->id < num_src_blks); always_assert(sb_in_idom->src == dex_method); auto sb_in_idom_info = SourceBlockInfo{sb_in_idom->src, sb_in_idom->id}; m_leaves.erase(sb_in_idom_info); m_dom_tree_nodes.at(sb_in_idom_info).in_degree++; if constexpr (Kind == SourceBlockDomTreeKind::Dom) { auto first_sb_in_block_info = SourceBlockInfo{first_sb_in_block->src, first_sb_in_block->id}; m_dom_tree_nodes.at(first_sb_in_block_info).imm_dom = sb_in_idom_info; } else { auto last_sb_in_block = source_blocks::get_last_source_block(curr_idom); auto last_sb_in_block_info = SourceBlockInfo{last_sb_in_block->src, last_sb_in_block->id}; m_dom_tree_nodes.at(last_sb_in_block_info).imm_dom = sb_in_idom_info; } } } template <SourceBlockDomTreeKind Kind> void SourceBlockDomTree<Kind>::remove_src_blk(const SourceBlockInfo& sb_info) { { auto it = m_leaves.find(sb_info); always_assert(it != m_leaves.end()); m_leaves.erase(it); } auto it1 = m_dom_tree_nodes.find(sb_info); always_assert(it1 != m_dom_tree_nodes.end()); auto& node = it1->second; always_assert(node.in_degree == 0); if (!(node.imm_dom == kInvalidSBI)) { auto it2 = m_dom_tree_nodes.find(node.imm_dom); always_assert(it2 != m_dom_tree_nodes.end()); auto& imm_dom_node = it2->second; always_assert(imm_dom_node.in_degree > 0); imm_dom_node.in_degree--; if (imm_dom_node.in_degree == 0) { m_leaves.insert(node.imm_dom); } } node = DomTreeNode{kInvalidSBI, std::numeric_limits<uint32_t>::max()}; } template <SourceBlockDomTreeKind Kind> const std::set<SourceBlockInfo>& SourceBlockDomTree<Kind>::leaves() { return m_leaves; } } // namespace source_blocks