glean/cpp/glean.h (357 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
/*
* A very simple types C++ interface to Glean batches.
*
*/
#pragma once
#include <tuple>
#include <boost/operators.hpp>
#include <boost/variant.hpp>
#include <folly/Format.h>
#include "glean/rts/binary.h"
#include "glean/rts/cache.h"
#include "glean/rts/inventory.h"
#include "glean/rts/id.h"
#include "glean/rts/stacked.h"
namespace facebook {
namespace glean {
namespace cpp {
using rts::Id;
using rts::Pid;
// There are two kinds of types:
// * Representation types
// * Value types
//
// Representation Types
// --------------------
//
// These aren't real values, they're just tags which can be filled
// by different types. For instance, a Tuple<Nat,Nat> might be filled by
// std::tuple<uint64_t, size_t> and an Array<Byte> might be filled by
// std::string, fbstring etc.
//
// The following types are supported:
//
// Byte bytes
//
// Nat packed natural numbers (up to 64 bits)
//
// Array<T> arrays
//
// Tuple<...> tuples
// Unit = Tuple<>
//
// Sum<...> sum types
// Maybe<T> = Sum<Unit,T>
//
// Enum<N> enums with range N
// (map to sums underneath)
// Bool = Enum<2>
//
// Predicates, e.g.:
// struct Foo : Predicate<KeyType, ValueType> {
// static const char *name() { return "Foo"; }
// };
// NOTE: KeyType and ValueType are *value* types. This is to
// increase type safety when constructing facts: in fact<P>(x),
// x must have the right type, not just the right shape.
//
//
// Value Types
// -----------
//
// Value types are inhabited instances of representation types.
// A value type has a single representation type, however there
// are multiple possible value types for a given representation.
// e.g. for the representation Array<Byte>, value types might be
// std::string or std::vector<uint8_t>.
//
// std::vector<T>
// value type corresponding to Array<Repr<T>>
//
// std::tuple<Xs...>
// value type corresponding to Tuple<Repr<Xs>...>
//
// facts
// Fact<P> is a reference to a fact of predicate P
//
// enumerated types
// enum class Foo { thing1, thing2 };
// template<> struct Repr_<Foo> {
// using Type = Enum<2>;
// }
//
// user-defined arbitrary types which map to any representation type
// struct Foo {
// uint64_t x,y;
// void outputRepr(Output<Repr<Foo>> output) const {
// outputValue(output, std::make_pair(x,y));
// }
// };
// template<> struct Repr_<Foo> {
// using Type = Tuple<Nat,Nat>;
// }
//
//
// The Repr<T> template
// --------------------
//
// Repr<V> is the representation type corresponding to the
// value type V.
//
// Repr<V> is defined for:
// * user-defined structs
// * enumerated types
template<typename T> struct Repr_;
// Convenience:
template<typename T> using Repr = typename Repr_<T>::Type;
struct Byte;
template<> struct Repr_<unsigned char> { using Type = Byte; };
struct Nat;
template<> struct Repr_<unsigned long> { using Type = Nat; };
struct String;
template<> struct Repr_<std::string> { using Type = String; };
template<typename T> struct Array;
template<typename T>
struct Repr_<std::vector<T>> {
using Type = Array<Repr<T>>;
};
template<typename... Ts> struct Tuple;
using Unit = Tuple<>;
template<typename... Ts>
struct Repr_<std::tuple<Ts...>> {
using Type = Tuple<Repr<Ts>...>;
};
template<typename... Ts> struct Sum;
template<typename T>
using Maybe = Sum<Unit,T>;
template<size_t i, typename T>
struct Alt : private boost::operators<Alt<i,T>> {
T value;
explicit Alt(const T& x) : value(x) {}
explicit Alt(T&& x) : value(std::forward<T>(x)) {}
bool operator==(const Alt& other) const {
return value == other.value;
}
bool operator<(const Alt& other) const {
return value < other.value;
}
};
template<size_t... is, typename... Ts>
struct Repr_<boost::variant<Alt<is,Ts>...>> {
using Type = Sum<Repr<Ts>...>;
};
template<size_t i, typename T>
Alt<i,std::decay_t<T>> alt(T&& x) {
return Alt<i,std::decay_t<T>>(std::forward<T>(x));
}
inline Alt<0,std::tuple<>> nothing() {
return alt<0>(std::make_tuple());
}
template<typename T>
Alt<1,std::decay_t<T>> just(T&& x) {
return alt<1>(std::forward<T>(x));
}
template<typename T> using maybe_type =
boost::variant<Alt<0, std::tuple<>>, Alt<1,T>>;
template<typename T>
maybe_type<T> maybe(const folly::Optional<T>& x) {
if (x) {
return alt<1>(x.value());
} else {
return alt<0>(std::make_tuple());
}
}
// Enums - a special case of sums
//
// T is the actual value type, N is the range of the enum
template<size_t N> struct Enum {};
using Bool = Enum<2>;
template<> struct Repr_<bool> { using Type = Bool; };
// Predicates
//
// Predicates are type tags. Fact<P> (see below) is the corresponding value.
//
// A predicate is reflected into C++ as follows:
//
// struct Foo : Predicate<Tuple<Nat,Nat>> {
// static const char *name() { return "foo"; }
// };
//
// struct Bar : Predicate<Foo,Array<Byte>> {
// static const char *name() { return "bar"; }
// };
//
template<typename Key, typename Value = std::tuple<>>
struct Predicate {
using KeyType = Key;
using ValueType = Value;
};
// Facts
template<typename P> struct Fact : private boost::operators<Fact<P>> {
Id getId() const { return id; }
bool operator==(const Fact& other) const {
return id == other.id;
}
bool operator<(const Fact& other) const {
return id < other.id;
}
private:
template<typename Schema>
friend class Batch;
explicit Fact(Id i) : id(i) {}
Id id;
};
// The representation type of a Fact is its predicate:
template<typename P> struct Repr_<Fact<P>> { using Type = P; };
// Typed value output buffers
template<typename T>
struct Output {
binary::Output& output;
explicit Output(binary::Output& o) : output(o) {}
};
template<typename T, typename U>
Output<T> unsafeAs(Output<U> o) {
return Output<T>(o.output);
}
inline void outputValue(Output<Byte> o, uint8_t x) {
o.output.fixed(x);
}
inline void outputValue(Output<Nat> o, uint64_t x) {
o.output.packed(x);
}
inline void outputValue(Output<String> o, const std::string& s) {
// TODO: Validate UTF-8?
o.output.mangleString(binary::byteRange(s));
}
template<typename T, typename U>
void outputValue(Output<Array<T>> o, std::initializer_list<U> xs) {
o.output.packed(xs.size());
for (const auto& x : xs) {
outputValue(unsafeAs<T>(o), x);
}
}
template<typename T, typename U>
void outputValue(Output<Array<T>> o, const std::vector<U>& xs) {
o.output.packed(xs.size());
for (const auto& x : xs) {
outputValue(unsafeAs<T>(o), x);
}
}
template<typename T, typename Iter>
void outputValue(Output<Array<T>> o, const folly::Range<Iter>& range) {
o.output.packed(range.size());
for (const auto& x : range) {
outputValue(unsafeAs<T>(o), x);
}
}
inline void outputValue(Output<Array<Byte>> o, folly::ByteRange r) {
o.output.packed(r.size());
o.output.put(r);
}
inline void outputValue(Output<Array<Byte>> o, const std::string& s) {
outputValue(o, binary::byteRange(s));
}
inline void outputValue(Output<Array<Byte>> o, const folly::fbstring& s) {
outputValue(o, binary::byteRange(s));
}
namespace detail {
template<size_t i, typename U>
void outputValues(binary::Output&, const U&) {}
template<size_t i, typename U, typename T, typename... Ts>
void outputValues(binary::Output& output, const U& u) {
outputValue(Output<T>(output), std::get<i>(u));
outputValues<i+1, U, Ts...>(output,u);
}
}
template<
typename... Ts,
typename... Us,
typename = std::enable_if_t<sizeof...(Ts) == sizeof...(Us)>>
void outputValue(Output<Tuple<Ts...>> o, const std::tuple<Us...>& xs) {
detail::outputValues<0, std::tuple<Us...>, Ts...>(o.output, xs);
}
template<
size_t i,
typename U,
typename... Ts,
typename = std::enable_if_t<i < sizeof...(Ts)>>
void outputValue(Output<Sum<Ts...>> o, const Alt<i,U>& alt) {
o.output.packed(i);
outputValue(
unsafeAs<typename std::tuple_element<i, std::tuple<Ts...>>::type>(o),
alt.value);
}
namespace {
template<typename... Ts>
struct OutputAlt : public boost::static_visitor<void> {
explicit OutputAlt(Output<Sum<Ts...>> o) : out(o) {}
template<size_t i, typename U>
void operator()(const Alt<i,U>& alt) const {
outputValue(out,alt);
}
Output<Sum<Ts...>> out;
};
}
template<
typename... Ts,
typename... Us,
typename = std::enable_if_t<sizeof...(Ts) == sizeof...(Us)>>
void outputValue(Output<Sum<Ts...>> o, const boost::variant<Us...>& v) {
boost::apply_visitor(OutputAlt<Ts...>(o), v);
}
template<typename T, typename U>
void outputValue(Output<Sum<Unit,T>> o, const folly::Optional<U>& x) {
if (x) {
o.output.packed(1);
outputValue(unsafeAs<T>(o), x.value());
} else {
o.output.packed(0);
}
}
template<typename T>
void outputValue(Output<Sum<Unit,T>> o, folly::None) {
o.output.packed(0);
}
template<typename T, size_t N>
void outputValue(Output<Enum<N>> o, T x) {
uint64_t n = static_cast<uint64_t>(x);
assert (n < N);
outputValue(unsafeAs<Nat>(o), n);
}
template<typename P>
void outputValue(Output<P> o, Fact<P> fact) {
o.output.packed(fact.getId());
}
template<typename T>
void outputValue(Output<Repr<T>> o, const T& x) {
x.outputRepr(o);
}
struct FactStats {
size_t memory;
size_t count;
};
struct SchemaInventory {
rts::Inventory inventory;
/// Mapping from the autogenerated Schema::index<P>::value to the Id of P
/// in the inventory (if it exists there).
std::vector<const rts::Predicate * FOLLY_NULLABLE> predicates;
};
class BatchBase {
public:
explicit BatchBase(
const SchemaInventory *inventory,
size_t cache_capacity);
BatchBase(BatchBase&&) = default;
BatchBase& operator=(BatchBase&&) = default;
BatchBase(const BatchBase&) = delete;
BatchBase& operator=(const BatchBase&) = delete;
const rts::Predicate * FOLLY_NULLABLE predicate(size_t i) const {
return inventory->predicates[i];
}
Id define(Pid ty, rts::Fact::Clause clause) {
return facts.define(ty, clause);
}
thrift::Batch serialize() const;
void rebase(const thrift::Subst&);
FactStats bufferStats() const {
return FactStats{buffer.factMemory(), buffer.size()};
}
// TODO: This is a temporary hack for backwards compatibility, add proper
// stats reporting
struct CacheStats {
FactStats facts;
size_t hits = 0;
size_t misses = 0;
};
CacheStats cacheStats();
private:
const SchemaInventory *inventory;
std::shared_ptr<rts::LookupCache::Stats> stats;
rts::LookupCache cache;
rts::LookupCache::Anchor anchor;
rts::FactSet buffer;
rts::Stacked<rts::Define> facts;
};
/// A typed instantiation of an Inventory for a particular Schema.
template<typename Schema>
struct DbSchema {
SchemaInventory inventory;
explicit DbSchema(rts::Inventory inv)
: inventory{std::move(inv), {}}
{
inventory.predicates = getPredicates(inventory.inventory);
}
private:
using ref_t = std::pair<std::string, size_t>;
template<size_t i> using pred_t =
typename Schema::template predicate<i>::type;
template<size_t... I> static std::vector<ref_t> getRefs(
std::index_sequence<I...>) {
return std::vector<ref_t>{
ref_t{pred_t<I>::GLEAN_name(),pred_t<I>::GLEAN_version()}...};
}
using pred_map_t =
std::unordered_map<std::pair<std::string, int32_t>, size_t>;
static std::vector<const rts::Predicate * FOLLY_NULLABLE>
getPredicates(const rts::Inventory& inventory) {
const auto seq = std::make_index_sequence<Schema::count>();
auto refs = getRefs(seq);
std::unordered_map<ref_t, size_t> indices;
for (size_t i = 0; i < refs.size(); ++i) {
indices.insert({refs[i], i});
}
const auto inventory_preds = inventory.predicates();
std::vector<const rts::Predicate * FOLLY_NULLABLE>
preds(refs.size(), nullptr);
for (auto p : inventory_preds) {
auto i = indices.find({p->name, p->version});
if (i != indices.end()) {
preds[i->second] = p;
}
}
// TODO: verify that predicates have the expected types
return preds;
}
};
// A Batch encapsulates a set of cached facts which is initially empty and a
// set of facts local to the Batch. Local facts can reference cached ones but
// not vice versa. New local facts can be added to the Batch.
//
// A Batch can be serialized (typically for sending to the server) and rebased
// based on a Substitution (typically received from the server). Local facts
// that are in range of the substitution are moved to the cache; the remaining
// local facts are assigned new Ids which don't clash which cached ones.
//
template<typename Schema>
class Batch : private BatchBase {
public:
Batch(const DbSchema<Schema>* schema, size_t cache_capacity)
: BatchBase(&(schema->inventory), cache_capacity)
{}
template<typename P>
const rts::Predicate *predicate() const {
if (auto p = base().predicate(Schema::template index<P>::value)) {
return p;
} else {
throw std::runtime_error(
std::string("unknown predicate ") + P::GLEAN_name()
+ "[" + folly::to<std::string>(P::GLEAN_version()) + "]");
}
}
template<
typename P,
typename =
std::enable_if_t<
std::is_same<typename P::ValueType, std::tuple<>>::value>>
Fact<P> fact(const typename P::KeyType& x) {
binary::Output output;
outputValue(Output<Repr<typename P::KeyType>>(output), x);
auto fact = base().define(
predicate<P>()->id, rts::Fact::Clause::fromKey(output.bytes()));
assert(fact);
return Fact<P>(fact);
}
template<
typename P,
typename... Ts,
typename = std::enable_if_t<1 < sizeof...(Ts)>>
Fact<P> fact(Ts&&... xs) {
return fact<P>(std::make_tuple(std::forward<Ts>(xs)...));
}
template<typename P>
Fact<P> factV(const typename P::KeyType& k, const typename P::ValueType& v) {
binary::Output clause;
outputValue(Output<Repr<typename P::KeyType>>(clause), k);
const auto key_size = clause.size();
outputValue(Output<Repr<typename P::ValueType>>(clause), v);
auto fact = base().define(
predicate<P>()->id, rts::Fact::Clause::from(clause.bytes(), key_size));
assert(fact);
return Fact<P>(fact);
}
BatchBase& base() { return *this; }
const BatchBase& base() const { return *this; }
using BatchBase::serialize;
using BatchBase::rebase;
using BatchBase::bufferStats;
using BatchBase::CacheStats;
using BatchBase::cacheStats;
};
}
}
}