glean/rts/ownership/uset.h (174 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include "glean/rts/ownership/setu32.h"
#include <folly/container/F14Set.h>
#include <folly/Hash.h>
#include <vector>
namespace facebook {
namespace glean {
namespace rts {
using UsetId = uint32_t;
constexpr UsetId INVALID_USET = 0xffffffff;
enum SetOp { Or, And };
/**
* A set expression
*
* Elements of the set are
* 0 ... MAX_UNIT_ID : units
* MAX_UNIT_ID+1 ... : sets
*
* facts with this owner set are visible if and only if:
* Or => any of the members of the set are visible
* And => all the members of the set are visible
*/
template<typename Set>
struct SetExpr {
SetOp op;
Set set;
bool operator==(const SetExpr<Set>& other) const& {
return op == other.op && set == other.set;
}
};
/**
* A "unique" set stored by `Usets` below. This is a `SetU32` with a memoized
* hash, a ref count and some administrative data used by the ownership
* algorithms. It should probably be given a more sane interface.
*/
struct Uset {
explicit Uset(SetU32 s, uint32_t r = 0) :
exp({ Or, std::move(s) }), refs(r) {}
explicit Uset(SetU32 s, SetOp op = Or, uint32_t r = 0) :
exp({ op, std::move(s) }), refs(r) {}
void rehash() {
hash = exp.set.hash(exp.op); // don't forget to include op in the hash
}
void use(int32_t n) {
refs += n;
}
void drop() {
if (--refs == 0) {
delete this;
}
}
void *link() const {
return ptr;
}
void link(void *p) {
ptr = p;
}
/** The set */
SetExpr<SetU32> exp;
/** The set's hash which must be memoized explicitly via `rehash`. */
size_t hash;
/** Reference count */
uint32_t refs;
/**
* Once a set is promoted to the DB (i.e. it is the ownership set of
* at least one fact), we assign it a 32-bit ID. Before it is promoted,
* id is INVALID_USET.
*/
UsetId id = INVALID_USET;
bool promoted() const { return id != INVALID_USET; };
/**
* Generic pointer used temporarily for a variety of things - it is much
* faster than a F14FastMap<Uset *, T>.
*/
void *ptr = nullptr;
struct Eq {
bool operator()(const Uset *x, const Uset *y) const {
return x == y || x->exp == y->exp;
}
};
struct Hash {
using folly_is_avalanching = std::true_type;
size_t operator()(const Uset *x) const {
return x->hash;
}
};
using MutableEliasFanoList = SetU32::MutableEliasFanoList;
SetExpr<MutableEliasFanoList> toEliasFano();
};
/**
* Container for `Usets` which guarantees to store each set exactly once.
*/
struct Usets {
explicit Usets(uint32_t firstId)
: firstId(firstId), nextId(firstId) {}
Usets(Usets&& other) noexcept
: firstId(other.firstId), nextId(other.nextId) {
std::swap(usets, other.usets);
stats = other.stats;
}
~Usets() {
// We can't delete the Usets before destroying the `usets` map because
// the latter might access hashes in its destructor.
std::vector<Uset *> refs;
refs.reserve(usets.size());
refs.insert(refs.end(), usets.begin(), usets.end());
usets.clear();
for (auto ref : refs) {
delete ref;
}
}
template<typename F>
void foreach(F&& f) {
for (auto entry : usets) {
f(entry);
}
}
Uset *add(Uset *entry) {
entry->rehash();
const auto [p, added] = usets.insert(entry);
if (added) {
entry->exp.set.shrink_to_fit();
++stats.adds;
stats.bytes += entry->exp.set.bytes();
return entry;
} else {
++stats.dups;
use(*p, entry->refs);
return *p;
}
}
Uset *add(std::unique_ptr<Uset> entry) {
auto p = add(entry.get());
if (p == entry.get()) {
entry.release();
}
return p;
}
Uset *add(SetU32 set, uint32_t refs = 0) {
return add(std::unique_ptr<Uset>(new Uset(std::move(set), refs)));
}
Uset *lookup(Uset *entry) const {
entry->rehash();
auto it = usets.find(entry);
if (it != usets.end()) {
return *it;
} else {
return nullptr;
}
}
// only when both sets have the same op
Uset *merge(Uset *left, Uset *right) {
SetU32 set;
assert(left->exp.op == right->exp.op);
auto res = SetU32::merge(set, left->exp.set, right->exp.set);
if (res == &set) {
return add(std::move(set), 1);
} else {
auto& r = res == &left->exp.set ? left : right;
use(r, 1);
return r;
}
}
void use(Uset *set, uint32_t refs = 1) {
set->refs += refs;
}
void drop(Uset *uset) {
assert(uset->refs != 0);
--uset->refs;
if (uset->refs == 0) {
assert(!uset->promoted());
usets.erase(uset);
stats.bytes -= uset->exp.set.bytes();
delete uset;
}
}
void promote(Uset * uset) {
if (!uset->promoted()) {
uset->id = nextId++;
++uset->refs;
++stats.promoted;
}
}
size_t size() const {
return usets.size();
}
struct Stats {
size_t bytes = 0;
size_t promoted = 0;
size_t adds = 0;
size_t dups = 0;
};
const Stats& statistics() const {
return stats;
}
UsetId getFirstId() const { return firstId; }
UsetId getNextId() const { return nextId; }
using MutableEliasFanoList = SetU32::MutableEliasFanoList;
std::vector<SetExpr<MutableEliasFanoList>> toEliasFano();
private:
folly::F14FastSet<Uset *, Uset::Hash, Uset::Eq> usets;
Stats stats;
const UsetId firstId;
UsetId nextId;
};
}
}
}