glean/rts/cache.h (192 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <vector>
#include <folly/SharedMutex.h>
#include <folly/Synchronized.h>
#include <folly/ThreadCachedInt.h>
#include "glean/rts/factset.h"
#include "glean/rts/lookup.h"
namespace facebook {
namespace glean {
namespace rts {
/// An LRU fact cache for speeding up point lookups (and only those) during
/// writes. It only loads as much data as has been requested by the client which
/// means that for any given fact, it can store either only its type (via
/// typeById), its type and key (via idByKey) or everything (via factById).
///
/// The cache itself doesn't implement Lookup. To actually look things up, it
/// must be pair with a base Lookup via 'anchor'. This allows the cache to not
/// be tied to the lifetime of one specific Lookup instance. It is the user's
/// responsibility that the Lookups it is paired with are morally the same
/// throughout its lifetime (such as the same database opened multiple times).
///
/// The cache can be used concurrently by multiple threads and is supposed to
/// scale at least a little bit for hits. The hash maps are guarded by
/// a read-write, write-priority lock and the LRU list by a mutex. Crucially,
/// we don't update the LRU list on every access. Rather, we record all
/// accesses in append-only, lossy buffers sharded by threads (cf. the Touched
/// structure below). These get drained (in a not-really-LRU order) when we
/// need to evict or when they become full. This (hopefully) minimises
/// contention for read-only accesses. This is quite similar to what, e.g.,
/// Java's Caffeine library does.
///
/// TODO: Switch to a variant of the Clock algorithm or something similar which
/// lets us avoid a doubly linked list (which costs 2 pointers per fact).
/// TODO: We probably want to remove facts which haven't been used for some time
/// even if the cache isn't full.
/// TODO: Say we cache the result of idByKey but then only ever do typeById
/// afterwards. This will keep the cache entry alive and we will never
/// get rid of the key even though it's not needed. We'll see how much
/// of a problem this is.
/// TODO: We want to share one capacity between all db caches.
class LookupCache {
public:
// A Stats object can be shared between different caches which will accumulate
// their statistics into it.
//
// xxx_hits = cache hits by lookup function xxx
// xxx_misses = true misses (i.e., fact isn't cached but exists in the Lookup)
// xxx_failures = fact doesn't exist in the Lookup
// xxx_deletes = fact existed in the cache but had too little information
// (e.g., only the type but xxx also needed the key)
//
// NOTE: This must be kept in sync with the Counter type in
// Glean.RTS.Foreign.LookupCache
struct Stats {
enum Stat : size_t {
FIRST_COUNTER,
idByKey_hits = FIRST_COUNTER,
idByKey_misses,
idByKey_failures,
idByKey_deletes,
typeById_hits,
typeById_misses,
typeById_failures,
factById_hits,
factById_misses,
factById_failures,
factById_deletes,
FIRST_NON_COUNTER,
factBytes = FIRST_NON_COUNTER,
factCount,
COUNT
};
folly::ThreadCachedInt<uint64_t> values[COUNT];
/// Obtain an approximate snapshot of the values in 'Stats'.
std::vector<uint64_t> read();
/// Obtain an approximate snapshot of the values in 'Stats' and reset all
/// counters to 0 (but not sums like total cache size).
std::vector<uint64_t> readAndResetCounters();
};
struct Options {
/// Max capacity
size_t capacity;
/// Number of shards for Touched buffer - should typically be number of
/// threads.
size_t shards = std::thread::hardware_concurrency();
/// How many hits to record locally before draining them to global buffer.
size_t touched_buffer_size = 64 * 1024;
};
LookupCache(const Options& opts, std::shared_ptr<Stats> s);
LookupCache(const LookupCache&) = delete;
LookupCache(LookupCache&&) = delete;
LookupCache& operator=(const LookupCache&) = delete;
LookupCache& operator=(LookupCache&&) = delete;
// Clear statistics
void clear();
// LookupCache paired with a base Lookup for looking up facts
struct Anchor : Lookup {
Anchor(Lookup *b, LookupCache *c)
: base(b), cache(c)
{}
Id idByKey(Pid type, folly::ByteRange key) override;
Pid typeById(Id id) override;
bool factById(Id id, std::function<void(Pid, Fact::Clause)> f) override;
virtual Id startingId() const override { return base->startingId(); }
virtual Id firstFreeId() const override { return base->firstFreeId(); }
std::unique_ptr<FactIterator> enumerate(Id from, Id upto) override;
std::unique_ptr<FactIterator> enumerateBack(Id from, Id downto) override;
Interval count(Pid pid) const override { return base->count(pid); }
std::unique_ptr<FactIterator>
seek(Pid type, folly::ByteRange start, size_t prefix_size) override;
Lookup *base;
LookupCache *cache;
};
friend struct Anchor;
Anchor anchor(Lookup *base) {
return Anchor(base, this);
}
// Execute a bulk loading function which takes a 'Store&' argument. The cache
// will be locked during the execution so any lookups will cause a deadlock.
template<typename F> inline void withBulkStore(F&& f) {
performUpdate([&](auto& windex, auto& wstorage) {
Inserter inserter(*this, windex, wstorage);
f(static_cast<Store&>(inserter));
});
}
private:
Options options;
struct Index {
FastSetBy<const Fact *, FactById> ids; // id -> fact
FastSetBy<const Fact *, FactByKey> keys; // (type,key) -> fact
// Only delete (as in free) facts while holding this lock exclusively. By
// construction, it can only be acquired when holding a lock for the Index.
folly::SharedMutex delete_lock;
};
using SyncIndex = folly::Synchronized<Index, folly::SharedMutex>;
SyncIndex index; // index guarded by r/w lock
class Storage {
public:
Storage() {}
Storage(Storage&&) = default;
Storage(const Storage&) = delete;
Storage& operator=(Storage&&) = default;
Storage& operator=(const Storage&) = delete;
~Storage();
using iterator = Fact::intrusive_list::iterator;
iterator begin() { return facts.begin(); }
iterator end() { return facts.end(); }
size_t factBytes() const { return bytes; }
size_t factCount() const { return count; }
const Fact *push_back(Fact::unique_ptr fact);
Fact::unique_ptr release(const Fact *fact);
void touch(const Fact *fact);
class Range {
public:
const Fact * FOLLY_NULLABLE next() {
if (pos != finish) {
const auto fact = &*pos;
bytes += fact->size();
++count;
++pos;
return fact;
} else {
return nullptr;
}
}
bool empty() const {
return pos == finish;
}
size_t factBytes() const { return bytes; }
size_t factCount() const { return count; }
private:
Range(iterator s, iterator e)
: start(s)
, pos(s)
, finish(e)
, bytes(0)
, count(0)
{}
friend class Storage;
iterator start;
iterator pos;
iterator finish;
size_t bytes;
size_t count;
};
Range start() {
return Range(begin(), end());
}
void splice_to(
Fact::intrusive_list& list,
Fact::intrusive_list::iterator pos,
Range range);
private:
Fact::intrusive_list facts;
size_t bytes = 0;
size_t count = 0;
};
using SyncStorage = folly::Synchronized<Storage, std::mutex>;
SyncStorage storage; // fact storage guarded by mutex
// NOTE: locking order is always Index then Storage, never the other way
// round.
// Container for recording cache hits. This is intentionally very lossy as it
// can be written to by multiple threads without synchronisation.
struct alignas(folly::hardware_destructive_interference_size) Touched {
// Make things atomic for the unlikely case we're ever going to be running
// on something other than x86_64.
std::unique_ptr<std::atomic<const Fact *>[]> facts;
std::atomic<size_t> next { 0 };
};
std::vector<Touched> touched; // cache hits sharded by thread
std::shared_ptr<Stats> stats; // statistics
// Execute a function which updates the cache and updates statistics.
template<typename F> inline void performUpdate(F&& f) {
// We actually rely on wrap-around for underflow for these two. Initialise
// to make Infer happy.
uint64_t bytes_diff = 0;
uint64_t count_diff = 0;
index.withWLock([&](auto& windex) {
storage.withLock([&](auto& wstorage) {
const auto initial_bytes = wstorage.factBytes();
const auto initial_count = wstorage.factCount();
f(windex, wstorage);
bytes_diff = wstorage.factBytes() - initial_bytes;
count_diff = wstorage.factCount() - initial_count;
});
});
stats->values[Stats::factBytes] += bytes_diff;
stats->values[Stats::factCount] += count_diff;
}
// Insert a new fact into the cache.
void insert(Fact::unique_ptr);
// Insert a new fact into the locked cache and move all evicted facts into
// 'dead'.
void insertOne(
Index& index,
Storage& storage,
Fact::unique_ptr,
Fact::intrusive_list& dead);
// Delete a fact from the locked index but not from the storage
static void deleteFromIndex(Index& index, const Fact *fact);
// Record a hit on a particular fact. Note that the ownership of the read
// lock is passed to touch which will release it.
void touch(SyncIndex::RLockedPtr, const Fact *);
// Evict facts from the cache until we've freed up at least target bytes and
// move evicted facts into 'dead'.
static void evict(
Index& index,
Storage& storage,
size_t target,
Fact::intrusive_list& dead);
// Drain hits recorded in 'touched', updating the storage as necessary.
static void drain(Storage& storage, Touched& touched);
struct Inserter : Store {
LookupCache& cache;
Index& index;
Storage& storage;
Inserter(LookupCache& c, Index& i, Storage& s)
: cache(c), index(i), storage(s) {}
void insert(Fact::Ref fact) override;
};
};
}
}
}