include/AbstractDomain.h

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #pragma once #include <functional> #include <memory> #include <ostream> #include <type_traits> #include <utility> #include "Exceptions.h" namespace sparta { /* * This is an API for abstract domains, which are the fundamental structures in * Abstract Interpretation, as described in the seminal paper: * * Patrick Cousot & Radhia Cousot. Abstract interpretation: a unified lattice * model for static analysis of programs by construction or approximation of * fixpoints. In Conference Record of the Fourth Annual ACM SIGPLAN-SIGACT * Symposium on Principles of Programming Languages, pages 238—252, 1977. * * Abstract domains were originally defined as lattices, but this is not a * hard requirement. As long as the join and meet operation are sound * approximations of the corresponding union and intersection operations on the * concrete domain, one can perform computations in a sound manner. Please see * the following paper for a description of more general Abstract Interpretation * frameworks: * * Patrick Cousot & Radhia Cousot. Abstract interpretation frameworks. Journal * of Logic and Computation, 2(4):511—547, 1992. * * This API has been designed with performance in mind. This is why an element * of an abstract domain is mutable and all basic operations have side effects. * A functional interface is provided for convenience as a layer on top of these * operations. An abstract domain is thread safe, as all side-effecting * operations are guaranteed to be only invoked on thread-local objects. It is * the responsibility of the fixpoint operators to ensure that this assumption * is always verified. * * In order to avoid the use of type casts, the interface for abstract domains * is defined using the curiously recurring template pattern (CRTP). The final * class derived from an abstract domain is passed as a parameter to the base * domain. This guarantees that all operations carry the type of the derived * class. The standard usage is: * * class MyDomain final : public AbstractDomain<MyDomain> { ... }; * * All generic abstract domain combinators should follow the CRTP. * * Sample usage: * * class MyDomain final : public AbstractDomain<MyDomain> { * public: * bool is_bottom() { ... } * ... * }; * */ template <typename Derived> class AbstractDomain { public: virtual ~AbstractDomain() { // The destructor is the only method that is guaranteed to be created when a // class template is instantiated. This is a good place to perform all the // sanity checks on the template parameters. static_assert(std::is_base_of<AbstractDomain<Derived>, Derived>::value, "Derived doesn't inherit from AbstractDomain"); static_assert(std::is_default_constructible<Derived>::value, "Derived is not default constructible"); static_assert(std::is_copy_constructible<Derived>::value, "Derived is not copy constructible"); static_assert(std::is_copy_assignable<Derived>::value, "Derived is not copy assignable"); // top() and bottom() are factory methods that respectively produce the top // and bottom values. We define default implementations for them in terms // of set_to_{top, bottom}, but implementors may wish to override those // implementations with more efficient versions. Here we check that any // such overrides bear the correct method signature. static_assert(std::is_same<decltype(Derived::bottom()), Derived>::value, "Derived::bottom() does not exist"); static_assert(std::is_same<decltype(Derived::top()), Derived>::value, "Derived::top() does not exist"); } virtual bool is_bottom() const = 0; virtual bool is_top() const = 0; /* * The partial order relation. */ virtual bool leq(const Derived& other) const = 0; /* * a.equals(b) is semantically equivalent to a.leq(b) && b.leq(a). */ virtual bool equals(const Derived& other) const = 0; /* * Many C++ libraries default to using operator== to check for equality, * so we define it here as an alias of equals(). */ friend bool operator==(const Derived& self, const Derived& other) { return self.equals(other); } friend bool operator!=(const Derived& self, const Derived& other) { return !self.equals(other); } /* * Elements of an abstract domain are mutable and the basic operations have * side effects. */ virtual void set_to_bottom() = 0; virtual void set_to_top() = 0; /* * If the abstract domain is a lattice, this is the least upper bound * operation. */ virtual void join_with(const Derived& other) = 0; /* * If the abstract domain has finite ascending chains, one doesn't need to * define a widening operator and can simply use the join instead. */ virtual void widen_with(const Derived& other) = 0; /* * If the abstract domain is a lattice, this is the greatest lower bound * operation. */ virtual void meet_with(const Derived& other) = 0; /* * If the abstract domain has finite descending chains, one doesn't need to * define a narrowing operator and can simply use the meet instead. */ virtual void narrow_with(const Derived& other) = 0; /* * This is a functional interface on top of the side-effecting API provided * for convenience. */ Derived join(const Derived& other) const { // Here and below: the static_cast is required in order to instruct // the compiler to use the copy constructor of the derived class. Derived tmp(static_cast<const Derived&>(*this)); tmp.join_with(other); return tmp; } Derived widening(const Derived& other) const { Derived tmp(static_cast<const Derived&>(*this)); tmp.widen_with(other); return tmp; } Derived meet(const Derived& other) const { Derived tmp(static_cast<const Derived&>(*this)); tmp.meet_with(other); return tmp; } Derived narrowing(const Derived& other) const { Derived tmp(static_cast<const Derived&>(*this)); tmp.narrow_with(other); return tmp; } static Derived top() { Derived tmp; tmp.set_to_top(); return tmp; } static Derived bottom() { Derived tmp; tmp.set_to_bottom(); return tmp; } }; /* * When implementing an abstract domain, one often has to encode the Top and * Bottom values in a special way. This leads to a nontrivial case analysis when * writing the domain operations. The purpose of AbstractDomainScaffolding is to * implement this boilerplate logic provided a representation of regular * elements defined by the AbstractValue interface. */ enum class AbstractValueKind { Bottom, Value, Top }; } // namespace sparta inline std::ostream& operator<<(std::ostream& o, const sparta::AbstractValueKind& kind) { using namespace sparta; switch (kind) { case AbstractValueKind::Bottom: { o << "_|_"; break; } case AbstractValueKind::Value: { o << "V"; break; } case AbstractValueKind::Top: { o << "T"; break; } } return o; } namespace sparta { /* * This interface represents the structure of the regular elements of an * abstract domain (like a constant, an interval, a points-to set, etc.). * Performing operations on those regular values may yield Top or Bottom, which * is why we define an AbstractValueKind type to identify such situations. * * Sample usage: * * class MyAbstractValue final : public AbstractValue<MyAbstractValue> { * public: * void clear() { m_table.clear(); } * ... * private: * std::unordered_map<...> m_table; * }; * */ template <typename Derived> class AbstractValue { public: virtual ~AbstractValue() { static_assert(std::is_base_of<AbstractValue<Derived>, Derived>::value, "Derived doesn't inherit from AbstractValue"); static_assert(std::is_default_constructible<Derived>::value, "Derived is not default constructible"); static_assert(std::is_copy_constructible<Derived>::value, "Derived is not copy constructible"); static_assert(std::is_copy_assignable<Derived>::value, "Derived is not copy assignable"); } /* * When the result of an operation is Top or Bottom, we no longer need an * explicit representation for the abstract value. This method frees the * memory used to represent an abstract value (hash tables, vectors, etc.). */ virtual void clear() = 0; /* * Even though we factor out the logic for Top and Bottom, these elements may * still be represented by regular abstract values (for example [0, -1] and * [-oo, +oo] in the domain of intervals), hence the need for such a method. */ virtual AbstractValueKind kind() const = 0; virtual bool leq(const Derived& other) const = 0; virtual bool equals(const Derived& other) const = 0; /* * These are the regular abstract domain operations that perform side effects. * They return a AbstractValueKind value to identify situations where the * result of the operation is either Top or Bottom. */ virtual AbstractValueKind join_with(const Derived& other) = 0; virtual AbstractValueKind widen_with(const Derived& other) = 0; virtual AbstractValueKind meet_with(const Derived& other) = 0; virtual AbstractValueKind narrow_with(const Derived& other) = 0; }; /* * This exception flags the use of an abstract value with invalid kind in the * given context. */ class invalid_abstract_value : public virtual abstract_interpretation_exception {}; using expected_kind = boost::error_info<struct tag_expected_kind, AbstractValueKind>; using actual_kind = boost::error_info<struct tag_actual_kind, AbstractValueKind>; /* * This abstract domain combinator takes an abstract value specification and * constructs a full-fledged abstract domain, handling all the logic for Top and * Bottom. It takes a poset and adds the two extremal elements Top and Bottom. * If the poset contains a Top and/or Bottom element, then those should be * coalesced with the extremal elements added by AbstractDomainScaffolding. It * also explains why the lattice operations return an AbstractValueKind: the * scaffolding must be able to identify when the result of one of these * operations is an extremal element that must be coalesced. While the helper * method `normalize()` can be used to automatically coalesce the Top elements, * Bottom should be handled separately in each operation of the abstract domain. * * Sample usage: * * class MyAbstractValue final : public AbstractValue<MyAbstractValue> { * ... * }; * * class MyAbstractDomain final * : public AbstractDomainScaffolding<MyAbstractValue, MyAbstractDomain> { * public: * MyAbstractDomain(...) { ... } * * // All basic operations defined in AbstractDomain are provided by * // AbstractDomainScaffolding. We only need to define the operations * // that are specific to MyAbstractDomain. * * void my_operation(...) { ... } * * void my_other_operation(...) { ... } * }; * */ template <typename Value, typename Derived> class AbstractDomainScaffolding : public AbstractDomain<Derived> { public: virtual ~AbstractDomainScaffolding() { static_assert(std::is_base_of<AbstractValue<Value>, Value>::value, "Value doesn't inherit from AbstractValue"); static_assert(std::is_base_of<AbstractDomainScaffolding<Value, Derived>, Derived>::value, "Derived doesn't inherit from AbstractDomainScaffolding"); } /* * The choice of lattice element returned by the default constructor is * completely arbitrary. In pratice though, the abstract value used to * initiate a fixpoint iteration is most often constructed in this way. */ AbstractDomainScaffolding() { m_kind = m_value.kind(); } /* * A convenience constructor for creating Bottom and Top. */ explicit AbstractDomainScaffolding(AbstractValueKind kind) : m_kind(kind) { RUNTIME_CHECK(kind != AbstractValueKind::Value, invalid_abstract_value() << actual_kind(kind)); } AbstractValueKind kind() const { return m_kind; } bool is_bottom() const override { return m_kind == AbstractValueKind::Bottom; } bool is_top() const override { return m_kind == AbstractValueKind::Top; } bool is_value() const { return m_kind == AbstractValueKind::Value; } void set_to_bottom() override { m_kind = AbstractValueKind::Bottom; m_value.clear(); } void set_to_top() override { m_kind = AbstractValueKind::Top; m_value.clear(); } bool leq(const Derived& other) const override { if (is_bottom()) { return true; } if (other.is_bottom()) { return false; } if (other.is_top()) { return true; } if (is_top()) { return false; } RUNTIME_CHECK(m_kind == AbstractValueKind::Value, invalid_abstract_value() << expected_kind(AbstractValueKind::Value) << actual_kind(m_kind)); RUNTIME_CHECK(other.m_kind == AbstractValueKind::Value, invalid_abstract_value() << expected_kind(AbstractValueKind::Value) << actual_kind(other.m_kind)); return m_value.leq(other.m_value); } bool equals(const Derived& other) const override { if (is_bottom()) { return other.is_bottom(); } if (is_top()) { return other.is_top(); } RUNTIME_CHECK(m_kind == AbstractValueKind::Value, invalid_abstract_value() << expected_kind(AbstractValueKind::Value) << actual_kind(m_kind)); if (other.m_kind != AbstractValueKind::Value) { return false; } return m_value.equals(other.m_value); } void join_with(const Derived& other) override { auto value_join = [this, &other]() { m_kind = m_value.join_with(other.m_value); }; join_like_operation_with(other, value_join); } void widen_with(const Derived& other) override { auto value_widening = [this, &other]() { m_kind = m_value.widen_with(other.m_value); }; join_like_operation_with(other, value_widening); } void meet_with(const Derived& other) override { auto value_meet = [this, &other]() { m_kind = m_value.meet_with(other.m_value); }; meet_like_operation_with(other, value_meet); } void narrow_with(const Derived& other) override { auto value_narrowing = [this, &other]() { m_kind = m_value.narrow_with(other.m_value); }; meet_like_operation_with(other, value_narrowing); } protected: // These methods allow the derived class to manipulate the abstract value. Value* get_value() { return &m_value; } const Value* get_value() const { return &m_value; } void set_to_value(const Value& value) { m_kind = value.kind(); m_value = value; } // In some implementations, the data structure chosen to represent an abstract // value can also denote Top (e.g., when using a map to represent an abstract // environment, the empty map commonly denotes Top). This method is used to // keep the internal representation and the m_kind flag in sync after // performing an operation. It should never be used to infer Bottom from the // internal representation of an abstract value. Bottom should always be // treated as a special case in every operation of the abstract domain. void normalize() { if (m_kind == AbstractValueKind::Bottom) { return; } // Consider an abstract environment represented as a map. After removing // some bindings from the environment, the internal map might become empty // and the m_kind flag must be set to AbstractValueKind::Top. Conversely, // after adding a binding to the abstract environment Top, the internal map // is no longer empty and m_kind must be set to AbstractValueKind::Value. // This step synchronizes both representations. m_kind = m_value.kind(); if (m_kind == AbstractValueKind::Top) { // We can discard any leftover data from the internal representation. m_value.clear(); } } private: void join_like_operation_with(const Derived& other, std::function<void()> operation) { if (is_top() || other.is_bottom()) { return; } if (other.is_top()) { set_to_top(); return; } if (is_bottom()) { m_kind = other.m_kind; m_value = other.m_value; return; } operation(); } void meet_like_operation_with(const Derived& other, std::function<void()> operation) { if (is_bottom() || other.is_top()) { return; } if (other.is_bottom()) { set_to_bottom(); return; } if (is_top()) { m_kind = other.m_kind; m_value = other.m_value; return; } operation(); } AbstractValueKind m_kind; Value m_value; }; // Implement copy on write for Value. The FixpointIterator makes lots of copies // of Domain objects. This class delays copying Value until we actually need to // (before a write). m_value is read-only until the first write operation. // // This AbstractValue is recommended whenever copying the underlying abstract // value incurs a significant cost template <typename Value> class CopyOnWriteAbstractValue : AbstractValue<CopyOnWriteAbstractValue<Value>> { using This = CopyOnWriteAbstractValue<Value>; public: void clear() override { get().clear(); } AbstractValueKind kind() const override { return get().kind(); } bool leq(const This& other) const override { return get().leq(other.get()); } bool equals(const This& other) const override { return get().equals(other.get()); } AbstractValueKind join_with(const This& other) override { return get().join_with(other.get()); } AbstractValueKind widen_with(const This& other) override { return get().widen_with(other.get()); } AbstractValueKind meet_with(const This& other) override { return get().meet_with(other.get()); } AbstractValueKind narrow_with(const This& other) override { return get().narrow_with(other.get()); } // m_value should _only_ be accessed via `get()` Value& get() { upgrade_to_writer(); return *m_value; } const Value& get() const { return *m_value; } private: // WARNING: This Copy on write implementation is likely to fail with multiple // concurrent accesses void upgrade_to_writer() { if (m_value.use_count() > 1) { // need to make a copy m_value = std::make_shared<Value>(*m_value); } } // m_value should only be accessed via `get()` std::shared_ptr<Value> m_value{std::make_shared<Value>()}; }; /* * This reverses the top / bottom elements and meet / join operations of the * domain. It also switches widen / narrow, which is *only* valid for finite * abstract domains, where the widening / narrowing are identical to the join / * meet operations. */ template <typename Domain, typename Derived> class AbstractDomainReverseAdaptor : public AbstractDomain<Derived> { public: using BaseDomain = Domain; AbstractDomainReverseAdaptor() = default; template <typename... Args> explicit AbstractDomainReverseAdaptor(Args&&... args) : m_domain(std::forward<Args>(args)...) {} bool is_bottom() const override { return m_domain.is_top(); } bool is_top() const override { return m_domain.is_bottom(); } bool leq(const Derived& other) const override { return m_domain.equals(other.m_domain) || !m_domain.leq(other.m_domain); } bool equals(const Derived& other) const override { return m_domain.equals(other.m_domain); } void set_to_bottom() override { m_domain.set_to_top(); } void set_to_top() override { m_domain.set_to_bottom(); } void join_with(const Derived& other) override { m_domain.meet_with(other.m_domain); } void widen_with(const Derived& other) override { m_domain.narrow_with(other.m_domain); } void meet_with(const Derived& other) override { m_domain.join_with(other.m_domain); } void narrow_with(const Derived& other) override { m_domain.widen_with(other.m_domain); } static Derived bottom() { return Derived(Domain::top()); } static Derived top() { return Derived(Domain::bottom()); } Domain& unwrap() { return m_domain; } const Domain& unwrap() const { return m_domain; } private: Domain m_domain; }; } // namespace sparta template <typename Domain, typename Derived> inline std::ostream& operator<<( std::ostream& o, const typename sparta::AbstractDomainReverseAdaptor<Domain, Derived>& d) { o << d.unwrap(); return o; }

include/AbstractDomain.h (347 lines of code) (raw):