glean/rts/cache.cpp (291 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "glean/rts/cache.h"
#include <folly/concurrency/CacheLocality.h>
namespace facebook {
namespace glean {
namespace rts {
std::vector<uint64_t> LookupCache::Stats::read() {
std::vector<uint64_t> buffer(COUNT);
for (size_t i = 0; i < COUNT; ++i) {
buffer[i] = values[i].readFull();
}
return buffer;
}
std::vector<uint64_t> LookupCache::Stats::readAndResetCounters() {
std::vector<uint64_t> buffer(COUNT);
for (size_t i = 0; i < FIRST_NON_COUNTER; ++i) {
buffer[i] = values[i].readFullAndReset();
}
for (size_t i = FIRST_NON_COUNTER; i < COUNT; ++i) {
buffer[i] = values[i].readFull();
}
return buffer;
}
LookupCache::Storage::~Storage() {
facts.erase_and_dispose(facts.begin(), facts.end(), Fact::destroy);
}
const Fact *LookupCache::Storage::push_back(Fact::unique_ptr fact) {
facts.push_back(*fact);
bytes += fact->size();
++count;
return fact.release();
}
Fact::unique_ptr LookupCache::Storage::release(const Fact *fact) {
assert(bytes >= fact->size());
assert(count > 0);
bytes -= fact->size();
--count;
facts.erase(facts.iterator_to(*fact));
return Fact::unique_ptr(const_cast<Fact *>(fact));
}
void LookupCache::Storage::splice_to(
Fact::intrusive_list& list,
Fact::intrusive_list::iterator pos,
LookupCache::Storage::Range range) {
assert(bytes >= range.bytes);
assert(count >= range.count);
list.splice(pos, facts, range.start, range.pos);
bytes -= range.bytes;
count -= range.count;
}
void LookupCache::Storage::touch(const Fact *fact) {
facts.erase(facts.iterator_to(*fact));
facts.push_back(const_cast<Fact&>(*fact));
}
namespace {
/// We use the tag field of Fact to indicate how much data the fact has.
enum Tag : unsigned int {
TYPE = 0, // type only (not key or value)
KEY = 1, // type and key only (not value)
FULL = 2 // everything
};
}
LookupCache::LookupCache(
const Options& opts,
std::shared_ptr<LookupCache::Stats> s)
: options(opts)
, touched(opts.shards)
, stats(std::move(s)) {
for (auto& t : touched) {
t.facts = std::make_unique<std::atomic<const Fact *>[]>(
options.touched_buffer_size);
}
}
void LookupCache::clear() {
storage.withLock([&](const auto& rstorage) {
// NOTE: we don't seem to be able to this in the destructor because
// something in ThreadCachedInt triggers ASAN
stats->values[Stats::factBytes] -= rstorage.factBytes();
stats->values[Stats::factCount] -= rstorage.factCount();
// TODO
// *this = LookupCache(base, capacity, stats);
});
}
Id LookupCache::Anchor::idByKey(Pid type, folly::ByteRange key) {
const auto cached = cache->index.withRLockPtr([&](auto rindex) {
const auto i = rindex->keys.find(FactByKey::value_type{type, key});
if (i != rindex->keys.end()) {
const auto fact = *i;
const auto id = fact->id();
cache->touch(std::move(rindex), fact);
return id;
} else {
return Id::invalid();
}
});
if (cached) {
++cache->stats->values[Stats::idByKey_hits];
return cached;
}
if (const auto id = base->idByKey(type, key)) {
++cache->stats->values[Stats::idByKey_misses];
cache->insert(Fact::create({id, type, Fact::Clause::fromKey(key)}, KEY));
return id;
} else {
++cache->stats->values[Stats::idByKey_failures];
return Id::invalid();
}
}
Pid LookupCache::Anchor::typeById(Id id) {
const auto cached = cache->index.withRLockPtr([&](auto rindex) {
const auto i = rindex->ids.find(id);
if (i != rindex->ids.end()) {
const auto fact = *i;
const auto ty = fact->type();
cache->touch(std::move(rindex), fact);
return ty;
} else {
return Pid::invalid();
}
});
if (cached) {
++cache->stats->values[Stats::typeById_hits];
return cached;
} else if(auto type = base->typeById(id)) {
++cache->stats->values[Stats::typeById_misses];
cache->insert(Fact::create({id, type, {}}, TYPE));
return type;
} else {
++cache->stats->values[Stats::typeById_failures];
return Pid::invalid();
}
}
bool LookupCache::Anchor::factById(
Id id,
std::function<void(Pid, Fact::Clause)> f) {
const auto cached = cache->index.withRLockPtr([&](auto rindex) {
const auto i = rindex->ids.find(id);
if (i != rindex->ids.end() && (*i)->tag() == FULL) {
const auto fact = *i;
// It is imporant to call f after we (i.e., touch) have released the read
// lock (since f might use the cache). However, fact needs to exist until
// after we've called f. Grabbing a read lock for delete_lock makes sure
// no facts will be deleted until we're done.
//
// We might consider finer-grained locking if this becomes an issue.
folly::SharedMutex::ReadHolder dont_delete(rindex->delete_lock);
cache->touch(std::move(rindex), fact);
f(fact->type(), fact->clause());
return true;
} else {
return false;
}
});
if (cached) {
++cache->stats->values[Stats::factById_hits];
return true;
}
Fact::unique_ptr fact;
base->factById(id, [&](auto type, auto clause) {
fact = Fact::create({id, type, clause}, FULL);
});
if (fact) {
++cache->stats->values[Stats::factById_misses];
f(fact->type(), fact->clause());
// FIXME: probably do this before f
cache->insert(std::move(fact));
return true;
} else {
++cache->stats->values[Stats::factById_failures];
return false;
}
}
std::unique_ptr<FactIterator> LookupCache::Anchor::enumerate(Id from, Id upto) {
return base->enumerate(from, upto);
}
std::unique_ptr<FactIterator> LookupCache::Anchor::enumerateBack(
Id from, Id downto) {
return base->enumerateBack(from, downto);
}
std::unique_ptr<FactIterator>LookupCache::Anchor::seek(
Pid type,
folly::ByteRange start,
size_t prefix_size) {
return base->seek(type, start, prefix_size);
}
void LookupCache::insert(Fact::unique_ptr owned) {
folly::SharedMutex::WriteHolder delete_write(nullptr);
Fact::intrusive_list dead;
performUpdate([&](Index& index, Storage& storage) {
insertOne(index, storage, std::move(owned), dead);
if (!dead.empty()) {
delete_write = folly::SharedMutex::WriteHolder(index.delete_lock);
}
});
// Perform the actual deletions after we've released all locks on the index
// and storage.
if (!dead.empty()) {
dead.erase_and_dispose(dead.begin(), dead.end(), Fact::destroy);
}
}
void LookupCache::insertOne(
Index& index,
Storage& storage,
Fact::unique_ptr owned,
Fact::intrusive_list& dead) {
const auto size = owned->size();
if (size > options.capacity) {
return;
}
// check if we already have a fact with this id in the cache
const Fact *existing = nullptr;
{
auto o = index.ids.find(owned->id());
if (o != index.ids.end()) {
existing = *o;
}
}
if (existing && existing->tag() >= owned->tag()) {
// the cached fact has same or more data than we are trying to insert
return;
}
if (existing || storage.factBytes() + size > options.capacity) {
// Do deferred LRU operations if we're going to evict or replace. We need
// to do this even when replacing because the hit buffers might reference
// the fact we're going to delete.
//
// NOTE: drain is "lossy" but in this case, we're running under a
// write lock for the index so there will be no concurrent drainers
// and we'll have had a memory barrier before - so drain isn't
// actually lossy here.
//
// This doesn't preserve LRU order as we process shards one by one,
// i.e., hits within one shard a processes in chronological order
// but we lose ordering across shards. It possible to fix this at the cost
// of some performance but probably not worth doing.
for (auto& t : touched) {
drain(storage, t);
}
if (existing) {
if (owned->tag() == KEY) {
++stats->values[Stats::idByKey_deletes];
} else {
++stats->values[Stats::factById_deletes];
}
deleteFromIndex(index, existing);
// TODO: defer this, see comments in evict
auto fact = storage.release(existing);
dead.push_back(*fact);
// dead assumed ownership of the fact
(void)fact.release();
}
if (storage.factBytes() + size > options.capacity) {
// shrink cache to ~90% of capacity
const auto wanted =
options.capacity * 0.9 > size ? options.capacity * 0.9 - size : 0;
evict(index, storage, wanted, dead);
}
}
const auto fact = storage.push_back(std::move(owned));
// For 'insert' (but not 'BulkStorage') we could unlock the storage here. It
// doesn't matter, though, since nothing will really use it without getting a
// lock for index first.
index.ids.insert(fact);
if (fact->tag() != TYPE) {
index.keys.insert(fact);
}
}
void LookupCache::deleteFromIndex(Index& index, const Fact *fact) {
index.ids.erase(fact);
if (fact->tag() > TYPE) {
index.keys.erase(fact);
}
}
void LookupCache::evict(
LookupCache::Index& index,
LookupCache::Storage& storage,
size_t target,
Fact::intrusive_list& dead) {
auto range = storage.start();
while (storage.factBytes() - range.factBytes() > target && !range.empty()) {
deleteFromIndex(index, range.next());
}
storage.splice_to(dead, dead.end(), range);
}
void LookupCache::touch(
LookupCache::SyncIndex::RLockedPtr rindex,
const Fact *fact) {
if (options.shards > 0) {
auto& t = touched[folly::AccessSpreader<>::cachedCurrent(options.shards)];
// This is intentionally very lossy
auto k = t.next.load(std::memory_order_acquire);
if (k < options.touched_buffer_size) {
// yes, we might overwrite a previous store
t.facts[k].store(fact, std::memory_order_release);
// yes, there might have been other stores here in the meantime
t.next.store(k+1, std::memory_order_release);
} else {
rindex.unlock();
if (auto wstorage = storage.tryLock()) {
// Only drain our shard - as in 'insert', this loses LRU ordering across
// shards.
drain(*wstorage, t);
}
}
} else {
storage.withLock([&](auto& wstorage) {
wstorage.touch(fact);
});
}
}
void LookupCache::drain(Storage& storage, Touched& t) {
// There might be calls to touch (but not to drain!) running concurrently to
// this. It's ok to lose those. The only invariant we rely on is that the
// Touched buffer has been filled up to n.
const auto n = t.next.load(std::memory_order_acquire);
for (size_t i = 0; i < n; ++i) {
storage.touch(t.facts[i].load(std::memory_order_acquire));
}
// yes, we might lose recorded hits here
t.next.store(0, std::memory_order_release);
}
void LookupCache::Inserter::insert(Fact::Ref fact) {
Fact::intrusive_list dead;
cache.insertOne(
index,
storage,
Fact::create(fact, FULL),
dead);
if (!dead.empty()) {
folly::SharedMutex::WriteHolder delete_write(index.delete_lock);
dead.erase_and_dispose(dead.begin(), dead.end(), Fact::destroy);
}
}
}
}
}