sparta/include/PatriciaTreeSet.h (801 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <algorithm>
#include <atomic>
#include <cstdint>
#include <functional>
#include <initializer_list>
#include <iterator>
#include <limits>
#include <ostream>
#include <stack>
#include <type_traits>
#include <utility>
#include <boost/functional/hash.hpp>
#include <boost/intrusive_ptr.hpp>
#include "Exceptions.h"
#include "PatriciaTreeUtil.h"
namespace sparta {
// Forward declarations.
namespace pt_impl {
template <typename IntegerType>
class PatriciaTree;
template <typename IntegerType>
class PatriciaTreeLeaf;
template <typename IntegerType>
class PatriciaTreeIterator;
template <typename IntegerType>
inline bool contains(
IntegerType key,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree);
template <typename IntegerType>
inline bool is_subset_of(
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree1,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree2);
template <typename IntegerType>
inline bool equals(
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree1,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree2);
template <typename IntegerType>
inline boost::intrusive_ptr<PatriciaTree<IntegerType>> insert(
IntegerType key,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree);
template <typename IntegerType>
inline boost::intrusive_ptr<PatriciaTree<IntegerType>> insert_leaf(
const boost::intrusive_ptr<PatriciaTreeLeaf<IntegerType>>& leaf,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree);
template <typename IntegerType>
inline boost::intrusive_ptr<PatriciaTree<IntegerType>> remove(
IntegerType key,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree);
template <typename IntegerType>
inline boost::intrusive_ptr<PatriciaTree<IntegerType>> filter(
const std::function<bool(IntegerType)>& predicate,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree);
template <typename IntegerType>
inline boost::intrusive_ptr<PatriciaTree<IntegerType>> merge(
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& s,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& t);
template <typename IntegerType>
inline boost::intrusive_ptr<PatriciaTree<IntegerType>> intersect(
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& s,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& t);
template <typename IntegerType>
inline boost::intrusive_ptr<PatriciaTree<IntegerType>> diff(
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& s,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& t);
} // namespace pt_impl
/*
* This implementation of sets of integers using Patricia trees is based on the
* following paper:
*
* C. Okasaki, A. Gill. Fast Mergeable Integer Maps. In Workshop on ML (1998).
*
* Patricia trees are a highly efficient representation of compressed binary
* tries. They are well suited for the situation where one has to manipulate
* many large sets that are identical or nearly identical. In the paper,
* Patricia trees are entirely reconstructed for each operation. We have
* modified the original algorithms, so that subtrees that are not affected by
* an operation remain unchanged. Since this is a functional data structure,
* identical subtrees are therefore shared among all Patricia tries manipulated
* by the program. This effectively achieves a form of incremental hash-consing.
* Note that it's not perfect, since identical trees that are independently
* constructed are not equated, but it's a lot more efficient than regular
* hash-consing. This data structure doesn't just reduce the memory footprint of
* sets, it also significantly speeds up certain operations. Whenever two sets
* represented as Patricia trees share some structure, their union and
* intersection can often be computed in sublinear time.
*
* Patricia trees can only handle unsigned integers. Arbitrary objects can be
* accommodated as long as they are represented as pointers. Our implementation
* of Patricia-tree sets can transparently operate on either unsigned integers
* or pointers to objects.
*/
template <typename Element>
class PatriciaTreeSet final {
public:
// C++ container concept member types
using iterator = pt_impl::PatriciaTreeIterator<Element>;
using const_iterator = iterator;
using value_type = Element;
using difference_type = std::ptrdiff_t;
using size_type = size_t;
using const_reference = const Element&;
using const_pointer = const Element*;
using IntegerType = typename std::
conditional_t<std::is_pointer<Element>::value, uintptr_t, Element>;
PatriciaTreeSet() = default;
explicit PatriciaTreeSet(std::initializer_list<Element> l) {
for (Element x : l) {
insert(x);
}
}
template <typename InputIterator>
PatriciaTreeSet(InputIterator first, InputIterator last) {
for (auto it = first; it != last; ++it) {
insert(*it);
}
}
bool empty() const { return m_tree == nullptr; }
size_t size() const {
size_t s = 0;
std::for_each(begin(), end(), [&s](const auto&) { ++s; });
return s;
}
size_t max_size() const { return std::numeric_limits<IntegerType>::max(); }
iterator begin() const { return iterator(m_tree); }
iterator end() const { return iterator(); }
bool contains(Element key) const {
if (m_tree == nullptr) {
return false;
}
return pt_impl::contains<IntegerType>(encode(key), m_tree);
}
bool is_subset_of(const PatriciaTreeSet& other) const {
return pt_impl::is_subset_of<IntegerType>(m_tree, other.m_tree);
}
bool equals(const PatriciaTreeSet& other) const {
return pt_impl::equals<IntegerType>(m_tree, other.m_tree);
}
friend bool operator==(const PatriciaTreeSet& s1, const PatriciaTreeSet& s2) {
return s1.equals(s2);
}
friend bool operator!=(const PatriciaTreeSet& s1, const PatriciaTreeSet& s2) {
return !s1.equals(s2);
}
/*
* This faster equality predicate can be used to check whether a sequence of
* in-place modifications leaves a Patricia-tree set unchanged. For comparing
* two arbitrary Patricia-tree sets, one needs to use the `equals()`
* predicate.
*
* Example:
*
* PatriciaTreeSet<...> s, t;
* ...
* t = s;
* t.union_with(...);
* t.remove(...);
* t.intersection_with(...);
* if (s.reference_equals(t)) { // This test is equivalent to s.equals(t)
* ...
* }
*/
bool reference_equals(const PatriciaTreeSet& other) const {
return m_tree == other.m_tree;
}
PatriciaTreeSet& insert(Element key) {
m_tree = pt_impl::insert<IntegerType>(encode(key), m_tree);
return *this;
}
PatriciaTreeSet& remove(Element key) {
m_tree = pt_impl::remove<IntegerType>(encode(key), m_tree);
return *this;
}
PatriciaTreeSet& filter(
const std::function<bool(const Element&)>& predicate) {
auto encoded_predicate = [&predicate](IntegerType key) {
return predicate(decode(key));
};
m_tree = pt_impl::filter<IntegerType>(encoded_predicate, m_tree);
return *this;
}
PatriciaTreeSet& union_with(const PatriciaTreeSet& other) {
m_tree = pt_impl::merge<IntegerType>(m_tree, other.m_tree);
return *this;
}
PatriciaTreeSet& intersection_with(const PatriciaTreeSet& other) {
m_tree = pt_impl::intersect<IntegerType>(m_tree, other.m_tree);
return *this;
}
PatriciaTreeSet& difference_with(const PatriciaTreeSet& other) {
m_tree = pt_impl::diff<IntegerType>(m_tree, other.m_tree);
return *this;
}
PatriciaTreeSet get_union_with(const PatriciaTreeSet& other) const {
auto result = *this;
result.union_with(other);
return result;
}
PatriciaTreeSet get_intersection_with(const PatriciaTreeSet& other) const {
auto result = *this;
result.intersection_with(other);
return result;
}
PatriciaTreeSet get_difference_with(const PatriciaTreeSet& other) const {
auto result = *this;
result.difference_with(other);
return result;
}
/*
* The hash codes are computed incrementally when the Patricia trees are
* constructed. Hence, this method has complexity O(1).
*/
size_t hash() const { return m_tree == nullptr ? 0 : m_tree->hash(); }
void clear() { m_tree.reset(); }
friend std::ostream& operator<<(std::ostream& o,
const PatriciaTreeSet<Element>& s) {
o << "{";
for (auto it = s.begin(); it != s.end(); ++it) {
o << pt_util::Dereference<Element>()(*it);
if (std::next(it) != s.end()) {
o << ", ";
}
}
o << "}";
return o;
}
private:
// These functions are used to handle the type conversions required when
// manipulating sets of pointers. The first parameter is necessary to make
// template deduction work.
template <typename T = Element,
typename std::enable_if_t<std::is_pointer<T>::value, int> = 0>
static uintptr_t encode(Element x) {
return reinterpret_cast<uintptr_t>(x);
}
template <typename T = Element,
typename std::enable_if_t<!std::is_pointer<T>::value, int> = 0>
static Element encode(Element x) {
return x;
}
template <typename T = Element,
typename std::enable_if_t<std::is_pointer<T>::value, int> = 0>
static Element decode(uintptr_t x) {
return reinterpret_cast<Element>(x);
}
template <typename T = Element,
typename std::enable_if_t<!std::is_pointer<T>::value, int> = 0>
static Element decode(Element x) {
return x;
}
boost::intrusive_ptr<pt_impl::PatriciaTree<IntegerType>> m_tree;
template <typename T>
friend class pt_impl::PatriciaTreeIterator;
};
namespace pt_impl {
using namespace pt_util;
template <typename IntegerType>
class PatriciaTree {
public:
// A Patricia tree is an immutable structure.
PatriciaTree& operator=(const PatriciaTree& other) = delete;
virtual ~PatriciaTree() {
// The destructor is the only method that is guaranteed to be created when
// a class template is instantiated. This is a good place to perform all
// the sanity checks on the template parameters.
static_assert(std::is_unsigned<IntegerType>::value,
"IntegerType is not an unsigned arihmetic type");
}
virtual bool is_leaf() const = 0;
bool is_branch() const { return !is_leaf(); }
size_t hash() const { return m_hash; }
void set_hash(size_t h) { m_hash = h; }
friend void intrusive_ptr_add_ref(const PatriciaTree<IntegerType>* p) {
p->m_reference_count.fetch_add(1, std::memory_order_relaxed);
}
friend void intrusive_ptr_release(const PatriciaTree<IntegerType>* p) {
if (p->m_reference_count.fetch_sub(1, std::memory_order_release) == 1) {
std::atomic_thread_fence(std::memory_order_acquire);
delete p;
}
}
private:
mutable std::atomic<size_t> m_reference_count{0};
size_t m_hash;
};
// This defines an internal node of a Patricia tree. Patricia trees are
// compressed binary tries, where a path in the tree represents a sequence of
// branchings based on the value of some bits at certain positions in the binary
// decomposition of the key. The position of the bit in the key which determines
// the branching at a given node is represented in m_branching_bit as a bit mask
// (i.e., all bits are 0 except for the branching bit). All keys in the subtree
// originating from a given node share the same bit prefix (in the little endian
// ordering), which is stored in m_prefix.
template <typename IntegerType>
class PatriciaTreeBranch final : public PatriciaTree<IntegerType> {
public:
PatriciaTreeBranch(IntegerType prefix,
IntegerType branching_bit,
boost::intrusive_ptr<PatriciaTree<IntegerType>> left_tree,
boost::intrusive_ptr<PatriciaTree<IntegerType>> right_tree)
: m_prefix(prefix),
m_branching_bit(branching_bit),
m_left_tree(std::move(left_tree)),
m_right_tree(std::move(right_tree)) {
size_t seed = 0;
boost::hash_combine(seed, m_prefix);
boost::hash_combine(seed, m_branching_bit);
boost::hash_combine(seed, m_left_tree->hash());
boost::hash_combine(seed, m_right_tree->hash());
this->set_hash(seed);
}
bool is_leaf() const override { return false; }
IntegerType prefix() const { return m_prefix; }
IntegerType branching_bit() const { return m_branching_bit; }
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& left_tree() const {
return m_left_tree;
}
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& right_tree() const {
return m_right_tree;
}
static boost::intrusive_ptr<PatriciaTreeBranch<IntegerType>> make(
IntegerType prefix,
IntegerType branching_bit,
boost::intrusive_ptr<PatriciaTree<IntegerType>> left_tree,
boost::intrusive_ptr<PatriciaTree<IntegerType>> right_tree) {
return new PatriciaTreeBranch<IntegerType>(
prefix, branching_bit, std::move(left_tree), std::move(right_tree));
}
private:
IntegerType m_prefix;
IntegerType m_branching_bit;
boost::intrusive_ptr<PatriciaTree<IntegerType>> m_left_tree;
boost::intrusive_ptr<PatriciaTree<IntegerType>> m_right_tree;
};
template <typename IntegerType>
class PatriciaTreeLeaf final : public PatriciaTree<IntegerType> {
public:
explicit PatriciaTreeLeaf(IntegerType key) : m_key(key) {
boost::hash<IntegerType> hasher;
this->set_hash(hasher(key));
}
bool is_leaf() const override { return true; }
const IntegerType& key() const { return m_key; }
static boost::intrusive_ptr<PatriciaTreeLeaf<IntegerType>> make(
IntegerType key) {
return new PatriciaTreeLeaf<IntegerType>(key);
}
private:
IntegerType m_key;
};
template <typename IntegerType>
boost::intrusive_ptr<PatriciaTreeBranch<IntegerType>> join(
IntegerType prefix0,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree0,
IntegerType prefix1,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree1) {
IntegerType m = get_branching_bit(prefix0, prefix1);
if (is_zero_bit(prefix0, m)) {
return PatriciaTreeBranch<IntegerType>::make(
mask(prefix0, m), m, tree0, tree1);
} else {
return PatriciaTreeBranch<IntegerType>::make(
mask(prefix0, m), m, tree1, tree0);
}
}
// This function is used by remove() to prevent the creation of branch nodes
// with only one child.
template <typename IntegerType>
boost::intrusive_ptr<PatriciaTree<IntegerType>> make_branch(
IntegerType prefix,
IntegerType branching_bit,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& left_tree,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& right_tree) {
if (left_tree == nullptr) {
return right_tree;
}
if (right_tree == nullptr) {
return left_tree;
}
return PatriciaTreeBranch<IntegerType>::make(
prefix, branching_bit, left_tree, right_tree);
}
template <typename IntegerType>
inline bool contains(
IntegerType key,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree) {
if (tree == nullptr) {
return false;
}
if (tree->is_leaf()) {
const auto& leaf =
boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(tree);
return key == leaf->key();
}
const auto& branch =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(tree);
if (is_zero_bit(key, branch->branching_bit())) {
return contains(key, branch->left_tree());
} else {
return contains(key, branch->right_tree());
}
}
template <typename IntegerType>
inline bool is_subset_of(
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree1,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree2) {
if (tree1 == tree2) {
// This conditions allows the inclusion test to run in sublinear time
// when comparing Patricia trees that share some structure.
return true;
}
if (tree1 == nullptr) {
return true;
}
if (tree2 == nullptr) {
return false;
}
if (tree1->is_leaf()) {
const auto& leaf =
boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(tree1);
return contains(leaf->key(), tree2);
}
if (tree2->is_leaf()) {
return false;
}
const auto& branch1 =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(tree1);
const auto& branch2 =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(tree2);
if (branch1->prefix() == branch2->prefix() &&
branch1->branching_bit() == branch2->branching_bit()) {
return is_subset_of(branch1->left_tree(), branch2->left_tree()) &&
is_subset_of(branch1->right_tree(), branch2->right_tree());
}
if (branch1->branching_bit() > branch2->branching_bit() &&
match_prefix(
branch1->prefix(), branch2->prefix(), branch2->branching_bit())) {
if (is_zero_bit(branch1->prefix(), branch2->branching_bit())) {
return is_subset_of(branch1->left_tree(), branch2->left_tree()) &&
is_subset_of(branch1->right_tree(), branch2->left_tree());
} else {
return is_subset_of(branch1->left_tree(), branch2->right_tree()) &&
is_subset_of(branch1->right_tree(), branch2->right_tree());
}
}
return false;
}
// A Patricia tree is a canonical representation of the set of keys it contains.
// Hence, set equality is equivalent to structural equality of Patricia trees.
template <typename IntegerType>
inline bool equals(
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree1,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree2) {
if (tree1 == tree2) {
// This conditions allows the equality test to run in sublinear time
// when comparing Patricia trees that share some structure.
return true;
}
if (tree1 == nullptr) {
return tree2 == nullptr;
}
if (tree2 == nullptr) {
return false;
}
// Since the hash codes are readily available (they're computed when the trees
// are constructed), we can use them to cut short the equality test.
if (tree1->hash() != tree2->hash()) {
return false;
}
if (tree1->is_leaf()) {
if (tree2->is_branch()) {
return false;
}
const auto& leaf1 =
boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(tree1);
const auto& leaf2 =
boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(tree2);
return leaf1->key() == leaf2->key();
}
if (tree2->is_leaf()) {
return false;
}
const auto& branch1 =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(tree1);
const auto& branch2 =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(tree2);
return branch1->prefix() == branch2->prefix() &&
branch1->branching_bit() == branch2->branching_bit() &&
equals(branch1->left_tree(), branch2->left_tree()) &&
equals(branch1->right_tree(), branch2->right_tree());
}
template <typename IntegerType>
inline boost::intrusive_ptr<PatriciaTree<IntegerType>> insert(
IntegerType key,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree) {
if (tree == nullptr) {
return PatriciaTreeLeaf<IntegerType>::make(key);
}
if (tree->is_leaf()) {
const auto& leaf =
boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(tree);
if (key == leaf->key()) {
return leaf;
}
return pt_impl::join<IntegerType>(
key, PatriciaTreeLeaf<IntegerType>::make(key), leaf->key(), leaf);
}
const auto& branch =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(tree);
if (match_prefix(key, branch->prefix(), branch->branching_bit())) {
if (is_zero_bit(key, branch->branching_bit())) {
auto new_left_tree = insert(key, branch->left_tree());
if (new_left_tree == branch->left_tree()) {
return branch;
}
return PatriciaTreeBranch<IntegerType>::make(branch->prefix(),
branch->branching_bit(),
new_left_tree,
branch->right_tree());
} else {
auto new_right_tree = insert(key, branch->right_tree());
if (new_right_tree == branch->right_tree()) {
return branch;
}
return PatriciaTreeBranch<IntegerType>::make(branch->prefix(),
branch->branching_bit(),
branch->left_tree(),
new_right_tree);
}
}
return pt_impl::join<IntegerType>(
key, PatriciaTreeLeaf<IntegerType>::make(key), branch->prefix(), branch);
}
template <typename IntegerType>
inline boost::intrusive_ptr<PatriciaTree<IntegerType>> insert_leaf(
const boost::intrusive_ptr<PatriciaTreeLeaf<IntegerType>>& leaf,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree) {
if (tree == nullptr) {
return leaf;
}
if (tree->is_leaf()) {
const auto& tree_leaf =
boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(tree);
if (leaf->key() == tree_leaf->key()) {
return tree_leaf;
}
return pt_impl::join<IntegerType>(
leaf->key(), leaf, tree_leaf->key(), tree_leaf);
}
const auto& branch =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(tree);
if (match_prefix(leaf->key(), branch->prefix(), branch->branching_bit())) {
if (is_zero_bit(leaf->key(), branch->branching_bit())) {
auto new_left_tree = insert_leaf(leaf, branch->left_tree());
if (new_left_tree == branch->left_tree()) {
return branch;
}
return PatriciaTreeBranch<IntegerType>::make(branch->prefix(),
branch->branching_bit(),
new_left_tree,
branch->right_tree());
} else {
auto new_right_tree = insert_leaf(leaf, branch->right_tree());
if (new_right_tree == branch->right_tree()) {
return branch;
}
return PatriciaTreeBranch<IntegerType>::make(branch->prefix(),
branch->branching_bit(),
branch->left_tree(),
new_right_tree);
}
}
return pt_impl::join<IntegerType>(
leaf->key(), leaf, branch->prefix(), branch);
}
template <typename IntegerType>
inline boost::intrusive_ptr<PatriciaTree<IntegerType>> remove(
IntegerType key,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree) {
if (tree == nullptr) {
return nullptr;
}
if (tree->is_leaf()) {
const auto& leaf =
boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(tree);
if (key == leaf->key()) {
return nullptr;
}
return leaf;
}
const auto& branch =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(tree);
if (match_prefix(key, branch->prefix(), branch->branching_bit())) {
if (is_zero_bit(key, branch->branching_bit())) {
auto new_left_tree = remove(key, branch->left_tree());
if (new_left_tree == branch->left_tree()) {
return branch;
}
return make_branch<IntegerType>(branch->prefix(),
branch->branching_bit(),
new_left_tree,
branch->right_tree());
} else {
auto new_right_tree = remove(key, branch->right_tree());
if (new_right_tree == branch->right_tree()) {
return branch;
}
return make_branch<IntegerType>(branch->prefix(),
branch->branching_bit(),
branch->left_tree(),
new_right_tree);
}
}
return branch;
}
template <typename IntegerType>
inline boost::intrusive_ptr<PatriciaTree<IntegerType>> filter(
const std::function<bool(IntegerType key)>& predicate,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree) {
if (tree == nullptr) {
return nullptr;
}
if (tree->is_leaf()) {
const auto& leaf =
boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(tree);
return predicate(leaf->key()) ? leaf : nullptr;
}
const auto& branch =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(tree);
auto new_left_tree = filter(predicate, branch->left_tree());
auto new_right_tree = filter(predicate, branch->right_tree());
if (new_left_tree == branch->left_tree() &&
new_right_tree == branch->right_tree()) {
return branch;
} else {
return make_branch<IntegerType>(branch->prefix(),
branch->branching_bit(),
new_left_tree,
new_right_tree);
}
}
// We keep the notations of the paper so as to make the implementation easier
// to follow.
template <typename IntegerType>
inline boost::intrusive_ptr<PatriciaTree<IntegerType>> merge(
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& s,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& t) {
if (s == t) {
// This conditional is what allows the union operation to complete in
// sublinear time when the operands share some structure.
return s;
}
if (s == nullptr) {
return t;
}
if (t == nullptr) {
return s;
}
// We need to check whether t is a leaf before we do the same for s.
// Otherwise, if s and t are both leaves, we would end up inserting s into t.
// This would violate the assumptions required by `reference_equals()`.
if (t->is_leaf()) {
const auto& leaf =
boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(t);
return insert_leaf(leaf, s);
}
if (s->is_leaf()) {
const auto& leaf =
boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(s);
return insert_leaf(leaf, t);
}
const auto& s_branch =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(s);
const auto& t_branch =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(t);
IntegerType m = s_branch->branching_bit();
IntegerType n = t_branch->branching_bit();
IntegerType p = s_branch->prefix();
IntegerType q = t_branch->prefix();
const auto& s0 = s_branch->left_tree();
const auto& s1 = s_branch->right_tree();
const auto& t0 = t_branch->left_tree();
const auto& t1 = t_branch->right_tree();
if (m == n && p == q) {
// The two trees have the same prefix. We just merge the subtrees.
auto new_left = merge(s0, t0);
auto new_right = merge(s1, t1);
if (new_left == s0 && new_right == s1) {
return s;
}
if (new_left == t0 && new_right == t1) {
return t;
}
return PatriciaTreeBranch<IntegerType>::make(p, m, new_left, new_right);
}
if (m < n && match_prefix(q, p, m)) {
// q contains p. Merge t with a subtree of s.
if (is_zero_bit(q, m)) {
auto new_left = merge(s0, t);
if (s0 == new_left) {
return s;
}
return PatriciaTreeBranch<IntegerType>::make(p, m, new_left, s1);
} else {
auto new_right = merge(s1, t);
if (s1 == new_right) {
return s;
}
return PatriciaTreeBranch<IntegerType>::make(p, m, s0, new_right);
}
}
if (m > n && match_prefix(p, q, n)) {
// p contains q. Merge s with a subtree of t.
if (is_zero_bit(p, n)) {
auto new_left = merge(s, t0);
if (t0 == new_left) {
return t;
}
return PatriciaTreeBranch<IntegerType>::make(q, n, new_left, t1);
} else {
auto new_right = merge(s, t1);
if (t1 == new_right) {
return t;
}
return PatriciaTreeBranch<IntegerType>::make(q, n, t0, new_right);
}
}
// The prefixes disagree.
return pt_impl::join(p, s, q, t);
}
template <typename IntegerType>
inline boost::intrusive_ptr<PatriciaTree<IntegerType>> intersect(
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& s,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& t) {
if (s == t) {
// This conditional is what allows the intersection operation to complete in
// sublinear time when the operands share some structure.
return s;
}
if (s == nullptr || t == nullptr) {
return nullptr;
}
if (s->is_leaf()) {
const auto& leaf =
boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(s);
return contains(leaf->key(), t) ? leaf : nullptr;
}
if (t->is_leaf()) {
const auto& leaf =
boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(t);
return contains(leaf->key(), s) ? leaf : nullptr;
}
const auto& s_branch =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(s);
const auto& t_branch =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(t);
IntegerType m = s_branch->branching_bit();
IntegerType n = t_branch->branching_bit();
IntegerType p = s_branch->prefix();
IntegerType q = t_branch->prefix();
const auto& s0 = s_branch->left_tree();
const auto& s1 = s_branch->right_tree();
const auto& t0 = t_branch->left_tree();
const auto& t1 = t_branch->right_tree();
if (m == n && p == q) {
// The two trees have the same prefix. We merge the intersection of the
// corresponding subtrees.
return merge(intersect(s0, t0), intersect(s1, t1));
}
if (m < n && match_prefix(q, p, m)) {
// q contains p. Intersect t with a subtree of s.
return intersect(is_zero_bit(q, m) ? s0 : s1, t);
}
if (m > n && match_prefix(p, q, n)) {
// p contains q. Intersect s with a subtree of t.
return intersect(s, is_zero_bit(p, n) ? t0 : t1);
}
// The prefixes disagree.
return nullptr;
}
template <typename IntegerType>
inline boost::intrusive_ptr<PatriciaTree<IntegerType>> diff(
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& s,
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& t) {
if (s == t) {
// This conditional is what allows the intersection operation to complete in
// sublinear time when the operands share some structure.
return nullptr;
}
if (s == nullptr) {
return nullptr;
}
if (t == nullptr) {
return s;
}
if (s->is_leaf()) {
const auto& leaf =
boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(s);
return contains(leaf->key(), t) ? nullptr : leaf;
}
if (t->is_leaf()) {
const auto& leaf =
boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(t);
return remove(leaf->key(), s);
}
const auto& s_branch =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(s);
const auto& t_branch =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(t);
IntegerType m = s_branch->branching_bit();
IntegerType n = t_branch->branching_bit();
IntegerType p = s_branch->prefix();
IntegerType q = t_branch->prefix();
const auto& s0 = s_branch->left_tree();
const auto& s1 = s_branch->right_tree();
const auto& t0 = t_branch->left_tree();
const auto& t1 = t_branch->right_tree();
if (m == n && p == q) {
// The two trees have the same prefix. We merge the difference of the
// corresponding subtrees.
return merge(diff(s0, t0), diff(s1, t1));
}
if (m < n && match_prefix(q, p, m)) {
// q contains p. Diff t with a subtree of s.
if (is_zero_bit(q, m)) {
return merge(diff(s0, t), s1);
} else {
return merge(s0, diff(s1, t));
}
}
if (m > n && match_prefix(p, q, n)) {
// p contains q. Diff s with a subtree of t.
if (is_zero_bit(p, n)) {
return diff(s, t0);
} else {
return diff(s, t1);
}
}
// The prefixes disagree.
return s;
}
// The iterator basically performs a post-order traversal of the tree, pausing
// at each leaf.
template <typename Element>
class PatriciaTreeIterator final {
public:
// C++ iterator concept member types
using iterator_category = std::forward_iterator_tag;
using value_type = Element;
using difference_type = std::ptrdiff_t;
using pointer = Element*;
using reference = const Element&;
using IntegerType = typename PatriciaTreeSet<Element>::IntegerType;
PatriciaTreeIterator() {}
explicit PatriciaTreeIterator(
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree) {
if (tree == nullptr) {
return;
}
go_to_next_leaf(tree);
}
PatriciaTreeIterator& operator++() {
// We disallow incrementing the end iterator.
RUNTIME_CHECK(m_leaf != nullptr, undefined_operation());
if (m_stack.empty()) {
// This means that we were on the rightmost leaf. We've reached the end of
// the iteration.
m_leaf = nullptr;
return *this;
}
// Otherwise, we pop out a branch from the stack and move to the leftmost
// leaf in its right-hand subtree.
auto branch = m_stack.top();
m_stack.pop();
go_to_next_leaf(branch->right_tree());
return *this;
}
PatriciaTreeIterator operator++(int) {
PatriciaTreeIterator retval = *this;
++(*this);
return retval;
}
bool operator==(const PatriciaTreeIterator& other) const {
// Note that there's no need to check the stack (it's just used to traverse
// the tree).
return m_leaf == other.m_leaf;
}
bool operator!=(const PatriciaTreeIterator& other) const {
return !(*this == other);
}
Element operator*() {
return PatriciaTreeSet<Element>::decode(m_leaf->key());
}
private:
// The argument is never null.
void go_to_next_leaf(
const boost::intrusive_ptr<PatriciaTree<IntegerType>>& tree) {
auto t = tree;
// We go to the leftmost leaf, storing the branches that we're traversing
// on the stack. By definition of a Patricia tree, a branch node always
// has two children, hence the leftmost leaf always exists.
while (t->is_branch()) {
auto branch =
boost::static_pointer_cast<PatriciaTreeBranch<IntegerType>>(t);
m_stack.push(branch);
t = branch->left_tree();
// A branch node always has two children.
RUNTIME_CHECK(t != nullptr, internal_error());
}
m_leaf = boost::static_pointer_cast<PatriciaTreeLeaf<IntegerType>>(t);
}
std::stack<boost::intrusive_ptr<PatriciaTreeBranch<IntegerType>>> m_stack;
boost::intrusive_ptr<PatriciaTreeLeaf<IntegerType>> m_leaf;
};
} // namespace pt_impl
} // namespace sparta