glean/rts/ownership/setu32.cpp (415 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "glean/rts/ownership/setu32.h"
#include <xxhash.h>
using namespace folly::compression;
namespace facebook {
namespace glean {
namespace rts {
namespace {
template<typename T>
bool vec_eq(const std::vector<T>& x, const std::vector<T>& y) {
const auto n = x.size();
return n == y.size()
&& (n == 0 || std::memcmp(x.data(), y.data(), n * sizeof(x[0])) == 0);
}
template<typename T>
static uint64_t vec_hash(const std::vector<T>& x, uint64_t seed = 0) {
return XXH64(x.data(), x.size() * sizeof(x[0]), seed);
}
template<typename T>
static size_t vec_bytes(const std::vector<T>& x) {
return x.size() * sizeof(x[0]);
}
}
bool SetU32::Block::operator==(const SetU32::Block& other) const {
if (hdr != other.hdr) {
return false;
}
switch (hdr.type()) {
case SetU32::Hdr::Sparse:
return std::memcmp(&*sparse, &*other.sparse, hdr.sparseLen()) == 0;
case SetU32::Hdr::Dense:
return *dense == *other.dense;
case SetU32::Hdr::Full:
return true;
}
}
bool SetU32::Block::includes(const SetU32::Block& other) const {
if (hdr.id() != other.hdr.id()) {
return false;
}
switch (hdr.type()) {
case SetU32::Hdr::Sparse:
return other.hdr.type() == SetU32::Hdr::Sparse
&& std::includes(
sparse,
sparse + hdr.sparseLen(),
other.sparse,
other.sparse + other.hdr.sparseLen());
case SetU32::Hdr::Dense:
switch (other.hdr.type()) {
case SetU32::Hdr::Sparse: {
const auto v = *dense;
for (auto i = 0; i < other.hdr.sparseLen(); ++i) {
if (!v.contains(other.sparse[i])) {
return false;
}
}
return true;
}
case SetU32::Hdr::Dense: {
return dense->includes(*other.dense);
}
case SetU32::Hdr::Full:
return false;
}
break;
case SetU32::Hdr::Full:
return true;
}
}
SetU32::SetU32(const SetU32& other, SetU32::copy_capacity_tag) {
reserve(other.capacities());
hdrs.insert(hdrs.begin(), other.hdrs.begin(), other.hdrs.end());
dense.insert(dense.begin(), other.dense.begin(), other.dense.end());
sparse.insert(sparse.begin(), other.sparse.begin(), other.sparse.end());
}
bool SetU32::operator==(const SetU32& other) const {
return vec_eq(hdrs, other.hdrs)
&& vec_eq(dense, other.dense)
&& vec_eq(sparse, other.sparse);
}
uint64_t SetU32::hash(uint64_t seed) const {
return vec_hash(sparse, vec_hash(dense, vec_hash(hdrs, seed)));
}
SetU32::Sizes SetU32::sizes() const {
return {hdrs.size(), dense.size(), sparse.size()};
}
SetU32::Sizes SetU32::capacities() const {
return {hdrs.capacity(), dense.capacity(), sparse.capacity()};
}
size_t SetU32::bytes() const {
return vec_bytes(hdrs) + vec_bytes(dense) + vec_bytes(sparse);
}
SetU32::const_iterator SetU32::lower_bound(
SetU32::const_iterator start,
SetU32::const_iterator finish,
uint32_t id) {
struct Local {
static std::pair<uint32_t, uint32_t> lengths(
std::vector<Hdr>::const_iterator first,
std::vector<Hdr>::const_iterator last) {
uint32_t dense = 0;
uint32_t sparse = 0;
while (first != last) {
dense += first->denseLen();
sparse += first->sparseLen();
++first;
}
return {dense, sparse};
}
};
auto pos = std::lower_bound(
start.hdrs,
finish.hdrs,
id,
[](auto x, auto id) { return x.before(id); });
const auto l = pos - start.hdrs;
const auto r = finish.hdrs - pos;
if (l <= r) {
const auto [d,s] = Local::lengths(start.hdrs, pos);
return {pos, start.block.dense + d, start.block.sparse + s};
} else {
const auto [d,s] = Local::lengths(pos, finish.hdrs);
return {pos, finish.block.dense - d, finish.block.sparse - s};
}
}
void SetU32::reserve(Sizes sizes) {
hdrs.reserve(sizes.hdrs);
dense.reserve(sizes.dense);
sparse.reserve(sizes.sparse);
}
void SetU32::shrink_to_fit() {
hdrs.shrink_to_fit();
dense.shrink_to_fit();
sparse.shrink_to_fit();
}
size_t SetU32::size() const {
size_t s = 0;
for (auto &block : *this) {
switch (block.hdr.type()) {
case Hdr::Sparse: {
s += block.hdr.sparseLen();
break;
}
case Hdr::Dense: {
s += block.dense->count();
break;
}
case Hdr::Full: {
s += 256;
break;
}
}
}
return s;
}
uint32_t SetU32::upper() const {
auto &hdr = this->hdrs.back();
auto id = hdr.id() << 8;
switch (hdr.type()) {
case Hdr::Sparse: {
return id | this->sparse.back();
}
case Hdr::Dense: {
return id | this->dense.back().upper();
}
case Hdr::Full: {
return id | 255;
}
}
}
void SetU32::append(uint32_t value) {
const auto block = value / 256;
const auto bit = value % 256;
if (!hdrs.empty() && hdrs.back().id() == block) {
auto& hdr = hdrs.back();
switch (hdr.type()) {
case Hdr::Sparse:
assert(bit >= sparse.back());
if (bit != sparse.back()) {
const auto len = hdr.sparseLen();
if (fitsSparse(len, 1)) {
hdr.addSparseLen(1);
sparse.push_back(bit);
} else {
dense.push_back(Bits256(&*(sparse.end() - len), len) | Bits256::single(bit));
sparse.resize(sparse.size() - len);
hdr = Hdr::dense(block);
}
}
break;
case Hdr::Dense:
dense.back() |= Bits256::single(bit);
if (dense.back() == Bits256::all()) {
dense.pop_back();
hdr = Hdr::full(block);
}
break;
case Hdr::Full:
assert(bit == 255);
}
} else {
hdrs.push_back(Hdr::sparse(block, 1));
sparse.push_back(bit);
}
}
void SetU32::append(const_iterator start, const_iterator finish) {
hdrs.insert(hdrs.end(), start.hdrs, finish.hdrs);
dense.insert(dense.end(), start.block.dense, finish.block.dense);
sparse.insert(sparse.end(), start.block.sparse, finish.block.sparse);
}
namespace {
enum Which { Left, Right };
std::tuple<const SetU32*, const SetU32*, SetU32::const_iterator, SetU32::const_iterator>
skipSubset(const SetU32* l, const SetU32* r) {
auto left = l->begin();
auto left_end = l->end();
auto right = r->begin();
auto right_end = r->end();
while (left != left_end && right != right_end && *left == *right) {
++left;
++right;
}
bool swapit = false;
if (right != right_end) {
if (left == left_end || right.hdrs->id() < left.hdrs->id()) {
swapit = true;
} else if(right->includes(*left)) {
++left;
++right;
swapit = true;
}
}
if (swapit) {
std::swap(l,r);
std::swap(left,right);
std::swap(left_end,right_end);
}
while (left != left_end && right != right_end) {
if (left.hdrs->id() < right.hdrs->id()) {
left = SetU32::lower_bound(left, left_end, right.hdrs->id());
} else if (left->includes(*right)) {
++left;
++right;
} else {
break;
}
}
return {l, r, left, right};
}
}
void SetU32::append(uint32_t id, Bits256 w) {
if (w == Bits256::all()) {
hdrs.push_back(Hdr::full(id));
} else {
hdrs.push_back(Hdr::dense(id));
dense.push_back(w);
}
}
void SetU32::appendMerge(SetU32::Block left, SetU32::Block right) {
struct Local {
static void mergeSparse(
SetU32& set,
uint32_t id,
std::vector<uint8_t>::const_iterator ls,
uint8_t ln,
std::vector<uint8_t>::const_iterator rs,
uint8_t rn) {
uint8_t n = 0;
while (ln != 0 && rn != 0 && fitsSparse(n,1)) {
const auto l = *ls;
const auto r = *rs;
set.sparse.push_back(l <= r ? l : r);
++n;
if (l <= r) {
++ls;
--ln;
}
if (r <= l) {
++rs;
--rn;
}
}
if (fitsSparse(n, ln+rn)) {
set.hdrs.push_back(Hdr::sparse(id, n+ln+rn));
set.sparse.insert(set.sparse.end(), ls, ls+ln);
set.sparse.insert(set.sparse.end(), rs, rs+rn);
} else {
const auto dense = Bits256(&*(set.sparse.end()-n),n).with(&*ls,ln).with(&*rs,rn);
set.hdrs.push_back(Hdr::dense(id));
set.dense.push_back(dense);
set.sparse.resize(set.sparse.size()-n);
}
}
};
switch(left.hdr.type()) {
case SetU32::Hdr::Sparse:
switch(right.hdr.type()) {
case SetU32::Hdr::Sparse:
Local::mergeSparse(*this, left.hdr.id(), left.sparse, left.hdr.sparseLen(), right.sparse, right.hdr.sparseLen());
break;
case SetU32::Hdr::Dense:
append(left.hdr.id(), right.dense->with(&*left.sparse, left.hdr.sparseLen()));
break;
case SetU32::Hdr::Full:
hdrs.push_back(right.hdr);
break;
}
break;
case SetU32::Hdr::Dense:
switch (right.hdr.type()) {
case SetU32::Hdr::Sparse:
append(left.hdr.id(), left.dense->with(&*right.sparse, right.hdr.sparseLen()));
break;
case SetU32::Hdr::Dense:
append(left.hdr.id(), *left.dense | *right.dense);
break;
case SetU32::Hdr::Full:
hdrs.push_back(right.hdr);
break;
}
break;
case SetU32::Hdr::Full:
hdrs.push_back(left.hdr);
break;
}
}
void SetU32::appendMerge(
SetU32::const_iterator left,
SetU32::const_iterator left_end,
SetU32::const_iterator right,
SetU32::const_iterator right_end) {
while (left != left_end && right != right_end) {
if (left.hdrs->id() < right.hdrs->id()) {
const auto prev = left;
left = SetU32::lower_bound(left, left_end, right.hdrs->id());
append(prev,left);
} else if (right.hdrs->id() < left.hdrs->id()) {
const auto prev = right;
right = SetU32::lower_bound(right, right_end, left.hdrs->id());
append(prev,right);
} else {
appendMerge(*left, *right);
++left;
++right;
}
}
append(left, left_end);
append(right, right_end);
}
const SetU32 *SetU32::merge(SetU32& result, const SetU32& left, const SetU32& right) {
if (&left == &right) {
return &left;
} else {
auto [super, sub, super_s, sub_s] = skipSubset(&left, &right);
if (sub_s == sub->end()) {
return super;
} else {
result.reserve(left.sizes() + right.sizes());
result.append(super->begin(), super_s);
result.appendMerge(super_s, super->end(), sub_s, sub->end());
return &result;
}
}
}
SetU32::MutableEliasFanoList SetU32::toEliasFano() {
auto upperBound = this->upper();
size_t size = this->size();
folly::compression::EliasFanoEncoder<uint32_t, uint32_t> encoder(
size, upperBound);
VLOG(5) << "upper=" << upperBound << ", size=" << size;
foreach([&](uint32_t elt) {
encoder.add(elt);
});
return encoder.finish();
}
SetU32 SetU32::fromEliasFano(const EliasFanoList& list) {
SetU32 set;
auto reader =
EliasFanoReader<folly::compression::EliasFanoEncoder<uint32_t, uint32_t>>(
list);
while (reader.next()) {
set.append(reader.value());
}
return set;
}
void SetU32::dump(SetU32 &set) {
for (auto &block : set) {
auto id = block.hdr.id() << 8;
switch (block.hdr.type()) {
case SetU32::Hdr::Sparse: {
for (uint32_t i = 0; i < block.hdr.sparseLen(); i++) {
LOG(INFO) << "sparse: " << (id | block.sparse[i]);
}
break;
}
case SetU32::Hdr::Dense: {
for (uint32_t i = 0; i < 256; i++) {
if (block.dense->contains(i)) {
LOG(INFO) << "dense: " << (id | i);
}
}
break;
}
case SetU32::Hdr::Full: {
for (uint32_t i = 0; i < 256; i++) {
LOG(INFO) << "full: " << (id | i);
}
break;
}
}
}
}
}
}
}