include/WeakTopologicalOrdering.h (240 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #pragma once #include <cstddef> #include <functional> #include <future> #include <iterator> #include <limits> #include <ostream> #include <stack> #include <unordered_map> #include <vector> #include "Exceptions.h" namespace sparta { // Forward declarations template <typename NodeId> class WtoComponent; template <typename NodeId, typename NodeHash> class WeakTopologicalOrdering; namespace wto_impl { // Forward declaration template <typename NodeId, typename NodeHash, typename SuccFn> class WtoBuilder; /* * Iterator over the subcomponents of a strongly connected component (head * node excluded). This is a regular C++ iterator meant for traversing a * strongly connected component. It's not a fixpoint iterator. */ template <typename NodeId> class WtoComponentIterator final : public std::iterator<std::forward_iterator_tag, WtoComponent<NodeId>> { public: WtoComponentIterator& operator++() { RUNTIME_CHECK(m_component != m_end, undefined_operation()); // All components of a WTO are stored linearly inside a vector in reverse // order. The subcomponents of an SCC are stored between the head node and // the next component in the WTO. m_component -= m_component->m_next_component_offset; return *this; } WtoComponentIterator operator++(int) { WtoComponentIterator retval = *this; ++(*this); return retval; } bool operator==(const WtoComponentIterator& other) const { return m_component == other.m_component; } bool operator!=(const WtoComponentIterator& other) const { return !(*this == other); } const WtoComponent<NodeId>& operator*() { RUNTIME_CHECK(m_component != m_end, undefined_operation()); return *m_component; } const WtoComponent<NodeId>* operator->() { RUNTIME_CHECK(m_component != m_end, undefined_operation()); return m_component; } private: WtoComponentIterator(const WtoComponent<NodeId>* component, const WtoComponent<NodeId>* end) : m_component(component), m_end(end) {} const WtoComponent<NodeId>* m_component; const WtoComponent<NodeId>* m_end; template <typename T> friend class sparta::WtoComponent; template <typename T1, typename T2> friend class sparta::WeakTopologicalOrdering; }; } // namespace wto_impl /* * A component of a weak topological ordering is either a vertex or a strongly * connected set of nodes with a distinguished node (the head). */ template <typename NodeId> class WtoComponent final { public: using iterator = wto_impl::WtoComponentIterator<NodeId>; enum class Kind { Vertex, Scc }; WtoComponent(const NodeId& node, Kind kind, int32_t position, int32_t next_component_position) : m_node(node), m_kind(kind) { RUNTIME_CHECK(position > next_component_position, internal_error()); // When a component is constructed, its position inside the vector is // specified by its absolute index. Since we want to navigate the WTO by // recursively exploring SCCs, it's more efficient to maintain relative // offsets between adjacent components. An absolute position of -1 means // the end of an SCC or the end of the WTO. m_next_component_offset = position - next_component_position; } // Correct iteration depends on these objects being in one contiguous piece of // memory. Make sure users don't accidentally copy these objects WtoComponent(WtoComponent&& that) = default; WtoComponent(const WtoComponent& that) = delete; /* * If the component is not strongly connected, this method returns the single * node contained inside a Vertex component. */ const NodeId& head_node() const { return m_node; } bool is_vertex() const { return m_kind == Kind::Vertex; } bool is_scc() const { return m_kind == Kind::Scc; } iterator begin() const { RUNTIME_CHECK(is_scc(), undefined_operation()); // All the components of a WTO are stored linearly inside a vector. A vector // guarantees that all its elements are stored adjacently in a contiguous // block of memory, which allows us to safely perform pointer arithmetic // based on the 'this' pointer (the vector is never resized after the WTO // has been constructed). return iterator(this - 1, this - m_next_component_offset); } iterator end() const { RUNTIME_CHECK(is_scc(), undefined_operation()); auto end_ptr = this - m_next_component_offset; return iterator(end_ptr, end_ptr); } private: NodeId m_node; Kind m_kind; // The offset to the next component (NOT subcomponent) in the m_wto_space // vector. If we are at the end of the WTO, this will point to one element // before the start of the vector. If we are a subcomponent at the end of // the parent component, this points to one element past the end of the // parent component. size_t m_next_component_offset; template <typename T> friend class wto_impl::WtoComponentIterator; }; /* * Implementation of the decomposition of a rooted directed graph into a weak * topological ordering (WTO), as described in Bourdoncle's original paper: * * F. Bourdoncle. Efficient chaotic iteration strategies with widenings. * In Formal Methods in Programming and Their Applications, pp 128-141. * * State-of-the-art fixpoint iteration algorithms use weak topological orderings * as the underlying structure for high performance. Although WTOs are primarily * used with control-flow graphs of functions or methods, they can come handy * when manipulating structures like call graphs or dependency graphs. * * - NodeId is the identifier of a node in the graph. Nodes should be comparable * using `operator==()`. * - NodeHash is a functional structure providing a hash function on nodes. * * Please note that node identifiers are copied around at various steps of the * algorithm, in particular wherever the `m_successors` function is invoked. For * performance reasons, it's a good idea to keep the structure of `NodeId` as * simple as possible, such as a pointer or a structure of primitive types. */ template <typename NodeId, typename NodeHash = std::hash<NodeId>> class WeakTopologicalOrdering final { public: using iterator = wto_impl::WtoComponentIterator<NodeId>; /* * In order to construct a WTO, we just need to specify the root of the graph * and the successor function. */ template <typename SuccFn> WeakTopologicalOrdering(const NodeId& root, SuccFn successors) { if (successors(root).empty()) { // If the CFG consists of a single node with no control-flow edges, we // don't need to run the general algorithm. This avoids building all the // auxiliary data structures required by Bourdoncle's algorithm. // This optimization benefits the simple parallel fixpoint iterator, which // computes a WTO for each toplevel component of the CFG, most of them // single nodes in practice. m_components.emplace_back(root, WtoComponent<NodeId>::Kind::Vertex, /* position */ 0, /* next_component_position */ -1); return; } wto_impl::WtoBuilder<NodeId, NodeHash, SuccFn> builder(successors, &m_components); builder.build(root); } iterator begin() const { return iterator(&m_components.back(), &m_components.front() - 1); } iterator end() const { auto end_ptr = &m_components.front() - 1; return iterator(end_ptr, end_ptr); } // Recursively iterate through the wto order and invoke a callback for each // node. void visit_depth_first(std::function<void(const NodeId&)> f) { std::function<void(const WtoComponent<NodeId>&)> visit_component; visit_component = [&visit_component, &f](const WtoComponent<NodeId>& v) { f(v.head_node()); if (v.is_scc()) { for (const auto& inner : v) { visit_component(inner); } } }; for (const auto& v : *this) { visit_component(v); } } private: // We store all the components of a WTO inside a vector. This is more // efficient than allocating each component individually on the heap. // It's also more cache-friendly when repeatedly traversing the WTO during // a fixpoint iteration. std::vector<WtoComponent<NodeId>> m_components; }; namespace wto_impl { template <typename NodeId, typename NodeHash, typename SuccFn> class WtoBuilder final { public: WtoBuilder(SuccFn successors, std::vector<WtoComponent<NodeId>>* wto_space) : m_successors(successors), m_wto_space(wto_space), m_free_position(0), m_num(0) {} void build(const NodeId& root) { int32_t partition = -1; visit(root, &partition); } // We keep the notations used by Bourdoncle in the paper to describe the // algorithm. uint32_t visit(const NodeId& vertex, int32_t* partition) { m_stack.push(vertex); uint32_t head = set_dfn(vertex, ++m_num); bool loop = false; for (const NodeId& succ : m_successors(vertex)) { uint32_t succ_dfn = get_dfn(succ); uint32_t min; if (succ_dfn == 0) { min = visit(succ, partition); } else { min = succ_dfn; }; if (min <= head) { head = min; loop = true; } } if (head == get_dfn(vertex)) { // We encode the special value +oo used in the paper with UINT32_MAX. set_dfn(vertex, std::numeric_limits<uint32_t>::max()); NodeId element = m_stack.top(); m_stack.pop(); if (loop) { // Nodes are required to be comparable using `operator==()`. We don't // assume `operator!=()` to be defined on nodes. while (!(element == vertex)) { set_dfn(element, 0); element = m_stack.top(); m_stack.pop(); } push_component(vertex, *partition); } auto kind = loop ? WtoComponent<NodeId>::Kind::Scc : WtoComponent<NodeId>::Kind::Vertex; m_wto_space->emplace_back(vertex, kind, m_free_position, *partition); *partition = m_free_position++; } return head; } void push_component(const NodeId& vertex, int32_t partition) { for (const NodeId& succ : m_successors(vertex)) { if (get_dfn(succ) == 0) { visit(succ, &partition); } } } uint32_t get_dfn(const NodeId& node) { auto it = m_dfn.find(node); if (it != m_dfn.end()) { return it->second; } return 0; } uint32_t set_dfn(const NodeId& node, uint32_t number) { if (number == 0) { m_dfn.erase(node); } else { m_dfn[node] = number; } return number; } SuccFn m_successors; std::vector<WtoComponent<NodeId>>* m_wto_space; // The next available position at the end of the vector of components. int32_t m_free_position; // These are auxiliary data structures used by Bourdoncle's algorithm. std::unordered_map<NodeId, uint32_t, NodeHash> m_dfn; std::stack<NodeId> m_stack; uint32_t m_num; }; } // namespace wto_impl } // namespace sparta template <typename NodeId> inline std::ostream& operator<<( std::ostream& o, const typename sparta::WtoComponent<NodeId>& c) { using namespace sparta; if (c.is_scc()) { o << "(" << c.head_node(); for (const WtoComponent<NodeId>& sub : c) { o << " " << sub; } o << ")"; } else { o << c.head_node(); } return o; } template <typename NodeId> inline std::ostream& operator<<( std::ostream& o, const typename sparta::WeakTopologicalOrdering<NodeId>& wto) { for (auto it = wto.begin(); it != wto.end();) { o << *it++; if (it != wto.end()) { o << " "; } } return o; }