common/unordered_inline_set.h (674 lines of code) (raw):
#pragma once
#include <assert.h>
#include <inttypes.h>
#include <initializer_list>
#include <memory>
#include <type_traits>
#include <limits>
#include <cmath>
#include <string.h>
// an array with possible missing values
template<typename T>
class SparseArray {
protected:
// T[i] are elements; and ((bit*)T)[-i]
// are bools for existance of T[i]
T* _ptr = nullptr;
size_t _capacity = 0, _size = 0;
bool get_bit(size_t i) const {
auto c = (char*)_ptr - i / 8 - 1;
return *c & (1 << (i % 8));
}
void set_bit(size_t i) {
auto c = (char*)_ptr - i / 8 - 1;
*c |= (1 << (i % 8));
}
void erase_bit(size_t i) {
auto c = (char*)_ptr - i / 8 - 1;
*c &= ~(1 << (i % 8));
}
size_t get_s(size_t capacity) const {
auto u = sizeof(T) * 8;
return (capacity + u - 1) / u;
}
void* buf() {
return _ptr - get_s(_capacity);
}
const T* _get(size_t i) const {
assert(i < _capacity);
return get_bit(i) ? (_ptr + i) : nullptr;
}
public:
SparseArray(size_t capacity) { reset(capacity); }
void* reset(size_t new_capacity) {
if (_size > 0)
for (auto x: *this) ((T&)x).~T();
if (new_capacity == _capacity)
return buf();
auto s = get_s(new_capacity) * sizeof(T);
if (new_capacity > 0) {
auto ptr = realloc(buf(), s + new_capacity * sizeof(T));
memset(ptr, 0, s);
_ptr = (T*)((char*)ptr + s);
_capacity = new_capacity;
_size = 0;
return ptr;
} else {
free(buf());
_capacity = _size = 0;
return _ptr = nullptr;
}
}
SparseArray(SparseArray&& rhs) { *this = std::move(rhs); }
void operator=(SparseArray&& rhs) {
reset(0);
_ptr = rhs._ptr;
_capacity = rhs._capacity;
_size = rhs._size;
rhs._ptr = nullptr;
rhs._capacity = 0;
rhs._size = 0;
}
template<typename X>
void set(size_t i, X&& x) {
assert(i < _capacity);
if (get_bit(i)) {
_ptr[i] = std::forward<X>(x);
} else {
new (_ptr + i) T(std::forward<X>(x));
set_bit(i);
_size++;
}
}
template<typename...Xs>
void emplace(size_t i, Xs&&...xs) {
assert(i < _capacity);
if (!get_bit(i)) {
// _ptr[i].T(std::forward<Xs>(xs)...); // these 2 forms should be almost equal, except
new (_ptr + i) T(std::forward<Xs>(xs)...); // this is compatible with basic types, like int, etc.
set_bit(i);
_size++;
} else {
_ptr[i].~T();
new (_ptr + i) T(std::forward<Xs>(xs)...);
}
}
const T* get(size_t i) const noexcept { return _get(i); }
T* get(size_t i) noexcept { return (T*)_get(i); }
void erase(size_t i) {
assert(i < _capacity);
if (get_bit(i)) {
erase_bit(i);
_ptr[i].~T();
}
}
bool empty() const noexcept { return size() == 0; }
size_t size() const noexcept { return _size; }
size_t capacity() const noexcept { return _capacity; }
~SparseArray() { reset(0); }
class iterator {
protected:
SparseArray* _a = nullptr;
size_t _i = 0;
void seek(size_t d) {
_i = _a->seek_existance(_i + d, d);
}
public:
iterator() = default;
iterator(SparseArray* a, size_t i = 0) : _a(a), _i(i) { }
iterator& operator++() { seek(1); return *this; }
iterator& operator--() { seek(-1); return *this; }
iterator operator++(int) { auto r = *this; seek(1); return r; }
iterator operator--(int) { auto r = *this; seek(-1); return r; }
void erase() { _a->erase(_i); }
const T& operator*() const { return *_a->get(_i); }
const T* operator->() const { return _a->get(_i); }
template<typename P>
void set(P&& x) { _a->set(_i, std::forward<P>(x)); }
bool operator==(const iterator& rhs) const {
return _a == rhs._a && _i == rhs._i;
}
bool operator!=(const iterator& rhs) const {
return !(*this == rhs);
}
T extract() {
assert(_a && _i < _a->size());
assert(_a->get_bit(_i));
auto v = _a->get(_i);
if (!v) return T();
auto r = std::move(*v);
_a->erase_bit(_i);
return r;
}
iterator get_next() const {
return ++iterator(*this);
}
using difference_type = std::ptrdiff_t;
using value_type = T;
using pointer = T*;
using reference = T&;
using iterator_category = std::bidirectional_iterator_tag;
};
iterator begin() noexcept { return _begin(); }
iterator end() noexcept { return {this, _capacity}; }
iterator begin() const noexcept { return ((SparseArray*)this)->_begin(); }
iterator end() const noexcept { return {(SparseArray*)this, _capacity}; }
bool iterator_valid(iterator it) const noexcept {
return it._a == this && it._i <= capacity();
}
const T& operator[](size_t i) const {
return *get(i);
}
const T& operator[](iterator it) const {
assert(iterator_valid(it));
return *get(it._i);
}
template<class P = T>
typename std::enable_if<std::is_trivially_copyable<
std::remove_reference_t<P>>::value>::type
operator = (const SparseArray& rhs) {
auto b = reset(rhs._capacity);
auto src = (char*)rhs.buf();
memcpy(b, src, (char*)(rhs._ptr + rhs._capacity) - src);
_size = rhs._size;
}
template<class P = T>
typename std::enable_if<!std::is_trivially_copyable<
std::remove_reference_t<P>>::value>::type
operator = (const SparseArray& rhs) {
auto b = reset(rhs._capacity);
for (auto it = rhs.begin(); it != rhs.end(); ++it)
(*this)[it] = *it;
}
protected:
size_t seek_existance(size_t i, size_t delta) {
for (; i < _capacity; i += delta)
if (get_bit(i))
return i;
return _capacity;
}
iterator _begin() noexcept {
return {this, seek_existance(0, 1)};
}
};
// A hash table with open addressing, i.e. collision
// is resolved by probing (searching) through
// alternative locations in the array. This design
// avoids allocation and deallocation during insertion
// and deletion, unless it needs to rehash.
template<class Key,
class Hash = std::hash<Key>,
class KeyEqual = std::equal_to<Key>,
class Allocator = std::allocator<Key>>
class unordered_inline_set {
// it also features the ability to extract (move out) a
// stored element itself, without involving a "handle".
protected:
constexpr static size_t MIN_CAPACITY = 16;
SparseArray<Key> _data { MIN_CAPACITY };
constexpr static float DEFAULT_LOAD_FACTOR_MIN = 0.1;
constexpr static float DEFAULT_LOAD_FACTOR_MAX = 0.5;
float _load_factor_max = DEFAULT_LOAD_FACTOR_MAX;
float _load_factor_min = DEFAULT_LOAD_FACTOR_MIN;
size_t _load_max = MIN_CAPACITY * DEFAULT_LOAD_FACTOR_MAX;
size_t _load_min = MIN_CAPACITY * DEFAULT_LOAD_FACTOR_MIN;
uint8_t _max_collision = 8;
Hash _hasher;
KeyEqual _key_equal;
Allocator _alloc;
void _set_load_min_max() {
_load_max = _data.capacity() * _load_factor_max;
_load_min = _data.capacity() * _load_factor_min;
}
public:
using key_type = Key;
using value_type = Key;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
using hasher = Hash;
using key_equal = KeyEqual;
using allocator_type = Allocator;
using reference = value_type&;
using const_reference = const value_type&;
using pointer = typename std::allocator_traits<Allocator>::pointer;
using const_pointer = typename std::allocator_traits<Allocator>::const_pointer;
unordered_inline_set() = default;
explicit unordered_inline_set( size_type bucket_count,
const Hash& hash = Hash(),
const key_equal& equal = key_equal(),
const Allocator& alloc = Allocator() )
: _data(bucket_count), _hasher(hash), _key_equal(equal), _alloc(alloc) {}
explicit unordered_inline_set( size_type bucket_count,
const Allocator& alloc )
: unordered_inline_set(bucket_count, Hash(), key_equal(), alloc) {}
explicit unordered_inline_set( size_type bucket_count,
const Hash& hash,
const Allocator& alloc )
: unordered_inline_set(bucket_count, hash, key_equal(), alloc) {}
explicit unordered_inline_set( const Allocator& alloc )
: _alloc(alloc) { }
template< class InputIt >
explicit unordered_inline_set( InputIt first, InputIt last,
size_type bucket_count = MIN_CAPACITY,
const Hash& hash = Hash(),
const key_equal& equal = key_equal(),
const Allocator& alloc = Allocator() )
: unordered_inline_set(bucket_count, hash, equal, alloc) {
_set_load_min_max();
for (auto it = first; it != last; ++it)
insert(*it);
}
template< class InputIt >
unordered_inline_set( InputIt first, InputIt last,
size_type bucket_count,
const Allocator& alloc )
: unordered_inline_set(first, last,
bucket_count, Hash(), key_equal(), alloc) {}
template< class InputIt >
unordered_inline_set( InputIt first, InputIt last,
size_type bucket_count,
const Hash& hash,
const Allocator& alloc )
: unordered_inline_set(first, last,
bucket_count, hash, key_equal(), alloc) {}
unordered_inline_set( const unordered_inline_set& other )
: unordered_inline_set(other.begin(), other.end()) {}
unordered_inline_set( const unordered_inline_set& other, const Allocator& alloc )
: unordered_inline_set(other.begin(), other.end(), MIN_CAPACITY, alloc) {}
unordered_inline_set( unordered_inline_set&& other )
: unordered_inline_set( std::move(other), std::move(other._alloc) ) {}
unordered_inline_set( unordered_inline_set&& other, const Allocator& alloc )
: _data(std::move(other._data)), _hasher(std::move(_hasher)),
_key_equal(std::move(other._key_equal)), _alloc(alloc) { }
unordered_inline_set( std::initializer_list<value_type> init,
size_type bucket_count = MIN_CAPACITY,
const Hash& hash = Hash(),
const key_equal& equal = key_equal(),
const Allocator& alloc = Allocator() )
: unordered_inline_set(init.begin(), init.end(),
bucket_count, hash, equal, alloc) {}
unordered_inline_set( std::initializer_list<value_type> init,
size_type bucket_count,
const Allocator& alloc )
: unordered_inline_set(init, bucket_count,
Hash(), key_equal(), alloc) {}
unordered_inline_set( std::initializer_list<value_type> init,
size_type bucket_count,
const Hash& hash,
const Allocator& alloc )
: unordered_inline_set(init, bucket_count,
hash, key_equal(), alloc) {}
#if __cplusplus >= 202300L
template< compatible_range_type R >
unordered_inline_set( std::from_range_t, R&& rg,
size_type bucket_count = MIN_CAPACITY,
const Hash& hash = Hash(),
const key_equal& equal = key_equal(),
const Allocator& alloc = Allocator() );
template< compatible_range_type R >
unordered_inline_set( std::from_range_t, R&& rg,
size_type bucket_count,
const Allocator& alloc )
: unordered_inline_set(std::from_range, std::forward<R>(rg),
bucket_count, Hash(), key_equal(), alloc) {}
template< compatible_range_type R >
unordered_inline_set( std::from_range_t, R&& rg,
size_type bucket_count,
const Hash& hash,
const Alloc& alloc )
: unordered_inline_set(std::from_range, std::forward<R>(rg),
bucket_count, hash, key_equal(), alloc) {}
#endif
template<typename Iterator>
void assign(Iterator begin, Iterator end, size_t capacity = 0) {
_data.reset(capacity);
_load_max = capacity * _load_factor_max;
_load_min = capacity * _load_factor_min;
_set_load_min_max();
for (auto it = begin; it != end; ++it)
insert(*it);
}
unordered_inline_set& operator=( const unordered_inline_set& other ) {
assign(other.begin(), other.end(), other.size());
}
unordered_inline_set& operator=( unordered_inline_set&& other ) noexcept {
_data.reset(0);
_data = std::move(other._data);
_load_min = other._load_min;
_load_max = other._load_max;
_load_factor_min = other._load_factor_min;
_load_factor_max = other._load_factor_max;
_hasher = std::move(other._hasher);
_key_equal = std::move(other._key_equal);
_alloc = std::move(other._alloc);
}
unordered_inline_set& operator=( std::initializer_list<value_type> ilist ) {
assign(ilist.begin(), ilist.end(), ilist.size());
}
allocator_type get_allocator() const noexcept {
return _alloc;
}
using const_iterator = const typename SparseArray<Key>::iterator;
using iterator = const_iterator;
// iterator begin() noexcept { return _data.begin(); }
const_iterator begin() const noexcept { return _data.begin(); }
const_iterator cbegin() const noexcept { return _data.begin(); }
// iterator end() noexcept { return _data.end(); }
const_iterator end() const noexcept { return _data.end(); }
const_iterator cend() const noexcept { return _data.end(); }
bool empty() const noexcept {
return size() == 0;
}
size_type size() const noexcept {
return _data.size();
}
size_type max_size() const noexcept {
return std::numeric_limits<size_type>::max();
}
uint8_t max_collision() const noexcept {
return _max_collision;
}
void max_collision(uint8_t n) {
_max_collision = n;
}
void clear() noexcept {
assign(nullptr, nullptr, 0);
}
protected:
using SAKI = typename SparseArray<Key>::iterator;
template<typename K>
auto _make_room( const K& value ) -> std::pair<SAKI, bool> {
assert(_data.size() < _load_max );
auto capacity = _data.capacity();
assert(capacity >= _data.size());
assert((capacity & (capacity - 1)) == 0); // capacity is 2^n
if (_data.size() + 1 >= _load_max) {
do_rehash:
rehash(capacity *= 2);
}
auto r = _find(value);
if (r.first < capacity) // found available slot (true) or collision (false)
return {{&_data, r.first}, r.second == capacity};
goto do_rehash;
}
template<typename K>
auto _find(const K& value) const -> std::pair<size_t, size_t> {
auto capacity = _data.capacity();
assert(capacity >= _data.size());
assert((capacity & (capacity - 1)) == 0); // capacity is 2^n
size_t mask = capacity - 1;
auto begin = _hasher(value) % capacity;
for (size_t i = begin; i < begin + _max_collision; ++i) {
auto j = i & mask;
auto item = _data.get(j);
if (!item) return {j, capacity};
if (_key_equal(*item, value)) return {j, j};
}
return {capacity, capacity};
}
public:
template< class InputIt >
void insert( InputIt first, InputIt last ) {
for (auto it = first; it != last; ++it)
insert(*it);
}
void insert( std::initializer_list<value_type> ilist ) {
insert(ilist.begin(), ilist.end());
}
std::pair<iterator,bool> insert( const value_type& value ) {
auto r = _make_room(value);
if (r.second) *r.first = value;
return r;
}
std::pair<iterator,bool> insert( value_type&& value ) {
auto r = _make_room(value);
if (r.second) r.first.set(std::move(value));
return r;
}
template< class K >
std::pair<iterator, bool> insert( K&& value ) {
auto r = _make_room(value);
if (r.second) r.first.set(std::forward<K>(value));
return r;
}
iterator insert( const_iterator hint, const value_type& value ) {
return insert(value).first;
}
iterator insert( const_iterator hint, value_type&& value ) {
return insert(std::move(value)).first;
}
template< class K >
iterator insert( const_iterator hint, K&& value ) {
return insert<K>(std::forward<K>(value)).first;
}
#if __cplusplus >= 202300L
template< compatible_range_type R >
void insert_range( R&& rg );
#endif
template< class... Args >
std::pair<iterator, bool> emplace( Args&&... args ) {
// we need to construct the obj of T before hashing it
return insert( T(std::forward<Args>(args)...) );
}
template< class... Args >
iterator emplace_hint( const_iterator hint, Args&&... args ) {
return emplace( std::forward<Args>(args)... ).first;
}
size_type count( const Key& key ) const {
return find(key) != end();
}
template< class K >
size_type count( const K& x ) const {
return find<K>(x) != end();
}
bool contains( const Key& key ) const {
return find(key) != end();
}
template< class K >
bool contains( const K& x ) const {
return find(x) != end();
}
std::pair<iterator, iterator> equal_range( const Key& key ) {
auto it = find(key);
if (it == end()) return {it, it};
return {it, it.get_next()};
}
std::pair<const_iterator, const_iterator> equal_range( const Key& key ) const {
auto it = find(key);
if (it == end()) return {it, it};
return {it, it.get_next()};
}
template< class K >
std::pair<iterator, iterator> equal_range( const K& x ) {
auto it = find(x);
if (it == end()) return {it, it};
return {it, it.get_next()};
}
template< class K >
std::pair<const_iterator, const_iterator> equal_range( const K& x ) const {
auto it = find(x);
if (it == end()) return {it, it};
return {it, it.get_next()};
}
public:
iterator find( const Key& key ) {
return {&_data, _find(key).second};
}
const_iterator find( const Key& key ) const {
return {(SparseArray<Key>*)&_data, _find(key).second};
}
template< class K >
iterator find( const K& x ) {
return {&_data, _find(x).second};
}
template< class K >
const_iterator find( const K& x ) const {
return {&_data, _find(x).second};
}
iterator erase( iterator pos ) {
if (pos._a != &_data ||
pos._i >= _data.capacity())
return pos;
(pos++).erase();
_check_down_rehash(pos);
return pos;
}
// iterator erase( const_iterator pos );
iterator erase( const_iterator first, const_iterator last ) {
assert(first._a == _data);
assert( last._a == _data);
while (first != last)
(first++).erase();
return first;
}
protected:
void _rehash( size_type count, iterator& pos ) {
assert(pos._a == &_data && pos._i <= _data.capacity());
auto at_least = std::ceil(_data.size() / max_load_factor());
if (count < at_least) count = at_least;
if (size() == count) return;
auto data = std::move(_data);
_data.reset(count);
_set_load_min_max();
for (auto it = data.begin(); it != data.end(); ++it)
insert(it.extract());
}
void _check_down_rehash(iterator pos) {
if (_data.size() >= _load_min ||
_data.capacity() <= MIN_CAPACITY) return;
rehash(_data.capacity() / 2);
_load_min /= 2;
_load_max /= 2;
}
void _check_down_rehash() {
if (_data.size() <= _load_max ) return;
rehash(_data.capacity() * 2);
_load_min *= 2;
_load_max *= 2;
}
size_type _erase_it(iterator it) {
if (it != end()) {
erase(it);
return 1;
} else {
return 0;
}
}
public:
size_type erase( const Key& key ) {
return _erase_it(find(key));
}
template< class K >
size_type erase( K&& k ) {
return _erase_it(find(std::forward<K>(k)));
}
void swap( unordered_inline_set& other ) noexcept;
template< class K >
value_type&& extract( K&& x ) {
return extract(find(std::forward<K>(x)));
}
value_type&& extract( iterator position ) {
assert(_data.iterator_valid(position));
return position.extract();
}
value_type&& extract( const Key& key ) {
return extract(find(key));
}
// bucket interface
using local_iterator = const_iterator;
using const_local_iterator = const_iterator;
protected:
local_iterator _bucket_it(size_type i) {
if (!_data.get(i)) i = _data.capacity();
return {_data, i};
}
public:
local_iterator begin( size_type n ) {
return _bucket_it(n);
}
const_local_iterator begin( size_type n ) const {
return _bucket_it(n);
}
const_local_iterator cbegin( size_type n ) const {
return _bucket_it(n);
}
local_iterator end( size_type n ) {
return _bucket_it(n);
}
const_local_iterator end( size_type n ) const {
return _bucket_it(n);
}
const_local_iterator cend( size_type n ) const {
return _bucket_it(n);
}
size_type bucket_count() const {
return _data.capacity();
}
size_type max_bucket_count() const {
return max_size();
}
size_type bucket_size( size_type n ) const {
return 1;
}
size_type bucket( const Key& key ) const {
return _hasher(key) % bucket_count();
}
template< typename K >
size_type bucket( const K& k ) const {
return _hasher(k) % bucket_count();
}
float load_factor() const {
return float(_data.size()) / _data.capacity();
}
void load_factor(float min, float max) {
if (0 < min && min < max && max < 1) {
_load_factor_min = min;
_load_factor_max = max;
_set_load_min_max();
}
}
float max_load_factor() const {
return _load_factor_max;
}
void max_load_factor( float mlf ) {
assert(mlf < 1);
if (_load_factor_min < mlf && mlf < 1) {
_load_factor_max = mlf;
_load_max = _data.capacity() * mlf;
}
}
void rehash( size_type count ) {
auto at_least = std::ceil(_data.size() / max_load_factor());
if (count < at_least) count = at_least;
if (size() == count) return;
auto data = std::move(_data);
_load_max = count * _load_factor_max;
_load_min = count * _load_factor_min;
_data.reset(count);
for (auto it = data.begin(); it != data.end(); ++it)
insert(it.extract());
}
void reserve( size_type count ) {
rehash(std::ceil(count / max_load_factor()));
}
hasher hash_function() const {
return _hasher;
}
key_equal key_eq() const {
return _key_equal;
}
bool operator == (const unordered_inline_set& rhs) const {
return _data.size() == rhs._data.size() && alleq(rhs);
}
bool operator != (const unordered_inline_set& rhs) const {
return !(*this == rhs);
}
template<typename Pred>
size_type erase_if(const Pred& pred) {
size_type s0 = size();
for (auto it = begin(); it != end(); )
it = pred(*it) ? erase(it) : ++it;
return size() - s0;
}
protected:
bool alleq(const unordered_inline_set& rhs) {
for (auto& x: _data)
if (rhs.count(x) == 0)
return false;
return true;
}
};
template< class Key, class Hash, class KeyEqual, class Alloc >
void swap( unordered_inline_set<Key, Hash, KeyEqual, Alloc>& lhs,
unordered_inline_set<Key, Hash, KeyEqual, Alloc>& rhs ) noexcept {
lhs.swap(rhs);
}
template< class Key, class Hash, class KeyEqual, class Alloc, class Pred >
typename unordered_inline_set<Key, Hash, KeyEqual, Alloc>::size_type
erase_if(unordered_inline_set<Key, Hash, KeyEqual, Alloc>& c, const Pred& pred) {
return c.erase_if(pred);
}