cachelib/compact_cache/CCache.h (166 lines of code) (raw):
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* The compact cache is a highly memory-efficient way of mapping a key to a
* value. It is implemented as an N-way associative hash table. This means that
* each bucket in the hash table has room for at most N items. If the bucket
* fills up, the least recently accessed item inside that bucket will be
* evicted. The compact cache interface functions are all thread safe (i.e, they
* do their own locking).
*
* The compact cache is a class template with a template parameter called a
* CompactCacheDescriptor which defines the type of the keys and values and what
* the compact cache can do with them. The CompactCacheDescriptor is a wrapper
* around a Key and a ValueDescriptor. The Key defines the type of a key. The
* ValueDescriptor defines the type of the value (if any), and how values can be
* added together (needed in order to have the compact cache provide the add
* operation).
* Read ccache_descriptor.h for more information on value descriptor.
*
* The LRU mechanism withing a bucket is achieved by sliding the entries down
* each time an entry is added on top of the bucket or each time an entry is
* promoted because it was read. This compact cache is very efficient for small
* values but can have bad performance when used with big values as each read
* requires moving N big values.
*/
#pragma once
#include <type_traits>
#include "cachelib/allocator/Cache.h"
#include "cachelib/allocator/CacheStats.h"
#include "cachelib/allocator/ICompactCache.h"
#include "cachelib/common/Cohort.h"
#include "cachelib/common/FastStats.h"
#include "cachelib/compact_cache/CCacheBucketLock.h"
#include "cachelib/compact_cache/CCacheFixedLruBucket.h"
#include "cachelib/compact_cache/CCacheVariableLruBucket.h"
/****************************************************************************/
/* CONFIGURATION */
/**
* Default amount of entries per bucket.
* Only applicable when creating a compact cache for fixed size values.
*/
#define NB_ENTRIES_PER_BUCKET 8
/****************************************************************************/
/* CompactCache definition */
namespace facebook {
namespace cachelib {
/**
* Trait class used to define the default bucket descriptor to be used for a
* given compact cache descriptor.
* Use either FixedLruBucket or VariableLruBucket depending on whether or not
* the values are of a variable size.
*
* @param C compact cache descriptor.
*/
template <typename C>
struct DefaultBucketDescriptor {
using type =
typename std::conditional<C::kValuesFixedSize,
FixedLruBucket<C, NB_ENTRIES_PER_BUCKET>,
VariableLruBucket<C>>::type;
};
enum class CCacheReturn : int {
TIMEOUT = -2,
ERROR = -1,
NOTFOUND = 0,
FOUND = 1
};
enum class RehashOperation { COPY, DELETE };
/**
* Interface for a compact cache. User should not need to directly reference
* this type. Instead, use CCacheCreator<...>::type as the Compact Cache type.
*
* @param C Class that provides information about the data (keys and values)
* processed by the compact cache. See ccache_descriptor.h for more
* information.
* @param A Class that provides interface to allocate memory. It needs to
* implement the CCacheAllocatorBase interface.
* @param B Class that handles the management of the data in the buckets. This
* will use FixedLruBucket by default if the values are of a fixed
* size and VariableLruBucket if they are of a variable size. You can
* provide another bucket descriptor provided that it is compatible
* with the values being stored in this compact cache.
*/
template <typename C,
typename A,
typename B = typename DefaultBucketDescriptor<C>::type>
class CompactCache : public ICompactCache {
public:
using SelfType = CompactCache<C, A, B>;
using BucketDescriptor = B;
using ValueDescriptor = typename C::ValueDescriptor;
using Value = typename ValueDescriptor::Value;
using Key = typename C::Key;
using EntryHandle = typename BucketDescriptor::EntryHandle;
using Bucket = typename BucketDescriptor::Bucket;
using Allocator = A;
constexpr static bool kHasValues = C::kHasValues;
constexpr static bool kValuesFixedSize = C::kValuesFixedSize;
enum Operation { READ, WRITE };
/** Type of the callbacks called when an entry is removed.
* The type of the callback depends on the type of the values in this
* compact cache. By using std::conditional, we are able to provide a
* different type depending on the use case, i.e the callback does not take
* a value if the compact cache does not store values. The context is set
* appropriately if its an eviction or deletion or garbage collection.
* Note: if the values stored in this compact cache are of a variable size,
* it is assumed that the user will be able to compute the size if needed.
* For example, user has already stored a size field as part of the value,
* or can tell how big a value should be given the key.
*/
using RemoveCb = typename std::conditional<
kHasValues,
/* For a compact cache with values */
std::function<void(const Key&, const Value*, const RemoveContext)>,
/* For a compact cache with no values */
std::function<void(const Key&, const RemoveContext)>>::type;
using ReplaceCb = typename std::conditional<
kHasValues,
/* For a compact cache with values */
std::function<void(const Key&, const Value*, const Value*)>,
/* For a compact cache with no values */
std::function<void(const Key&)>>::type;
/** callbacks for validating an entry
* An entry may be invalid because of something happened outside of compact
* cache. If an entry is invalid, we can safely remove it without paying the
* cost of potential cache miss. */
using ValidCb = std::function<bool(const Key&)>;
/**
* Construct a new compact cache instance.
* Use this constructor only if you do not need to track when elements are
* removed or replaced (this is used by some tests for example).
*
* @param allocator Compact cache allocator to be used by this instance.
* The caller must guarantee the validity of
* the allocator for the lifetime of the compact cache.
* @param allowPromotions Whether we should allow promotions on read
* operations. True by default
*/
explicit CompactCache(Allocator& allocator, bool allowPromotions = true);
/**
* Construct a new compact cache instance with callbacks to track when
* items are removed / replaced.
*
* @param allocator Compact cache allocator to be used by this instance.
* The caller must guarantee the validity of the
* allocator for the lifetime of the compact cache.
* @param removeCb callback to be called when an item is removed.
* @param replaceCb callback to be called when an item is replaced.
* @param validCb callback to be called to check whether an entry is
* valid
* @param allowPromotions Whether we should allow promotions on read
* operations. True by default
*/
CompactCache(Allocator& allocator,
RemoveCb removeCb,
ReplaceCb replaceCb,
ValidCb validCb,
bool allowPromotions = true);
/**
* Destructor will detach the allocator. Only after a CompactCache instance
* is destroyed, user can attach another instance to the same allocator.
*/
~CompactCache();
/**
* Resize the arena of this compact cache.
* Shrinking the cache:
* (1) Reduce num_chunks, so new requests only hash to the reduced size
* (2) If currently taking requests, wait for all requests that picked up
* the old num_chunk value to drain out. During this time some requests
* will use the old and others will use the new num_chunk value, which
* is ok because we're using a consistent hash.
* (3) Walk the table and free the entries in the chunks we're not using.
* (4) Free no longer used chunks.
* Growing the cache:
* (1) Allocate new chunks
* (2) Update num_chunks, wait for refcount on old value to hit 0
* (3) Walk all the entries in the old table size and see which ones would
* now hash to new chunks and move them.
*/
void resize() override;
/**
* return whether the cache is enabled
* this is non-virtual for better performance since it is hotly accessed
*/
bool enabled() const {
return numChunks_ > 0 && allocator_.getChunkSize() > 0;
}
/**
* return the total size of currently used chunks (virtual function). This
* could be different from the configured size since the resizing happens
* offline.
*/
size_t getSize() const override {
return numChunks_ * allocator_.getChunkSize();
}
/**
* return config size of this compact cache
*/
size_t getConfiguredSize() const override {
return allocator_.getConfiguredSize();
}
std::string getName() const override { return allocator_.getName(); }
PoolId getPoolId() const { return allocator_.getPoolId(); }
/**
* return the total number of entries
*/
size_t getNumEntries() const {
return numChunks_ * bucketsPerChunk_ * BucketDescriptor::kEntriesPerBucket;
}
/**
* Set or update a value in the compact cache.
*
* @param key Key for which to set / update the value.
* @param timeout if greater than 0, take a timed lock
* @param val Val to set / update for the key. Must not be nullptr unless
* the value type is NoValue which is a special indicator for
* a compact cache of no values.
* @param size Size of the value. 0 means the value is fixed size.
* @return CCacheReturn with appropriate result type:
* FOUND (on hit - the value was updated), NOTFOUND (on miss -
* the value was set), TIMEOUT, ERROR (other error)
*/
CCacheReturn set(const Key& key,
const std::chrono::microseconds& timeout,
const Value* val = nullptr,
size_t size = 0);
CCacheReturn set(const Key& key,
const Value* val = nullptr,
size_t size = 0) {
return set(key, std::chrono::microseconds::zero(), val, size);
}
/**
* Delete the entry mapped by a key and retrieve the old value.
*
* @param key Key of the entry to be deleted.
* @param timeout if greater than 0, take a timed lock
* @param val Pointer to the memory location where the old value will be
* written to. Left untouched on a miss. Nullptr means we don't
* have a value, or we don't need to obtain the old value.
* @param size Poiner to the memory location where the deleted value's size
* will be written to if the value is variable. Fixed values
* will NOT have their sizes written into this field. Nullptr
* means caller does not need the size of the deleted value.
* @return CCacheReturn with appropriate result type:
* FOUND (on hit - the value was deleted), NOTFOUND (on miss),
* TIMEOUT, ERROR (other error)
*/
CCacheReturn del(const Key& key,
const std::chrono::microseconds& timeout,
Value* val = nullptr,
size_t* size = nullptr);
CCacheReturn del(const Key& key,
Value* val = nullptr,
size_t* size = nullptr) {
return del(key, std::chrono::microseconds::zero(), val, size);
}
/**
* Retrieve the value mapped by a key.
*
* @param key Key of the entry to be read.
* @param timeout if greater than 0, take a timed lock
* @param val Pointer to the memory location where the value will
* be written to. Left untouched on a miss. Nullptr
* means caller does not need the value.
* @param size Poiner to the memory location where the value's
* size will be written to if the value is variable.
* Fixed values will NOT have their sizes written into
* this field. Nullptr means size info is not needed.
* @param shouldPromote Whether key should be promoted. Note that if the
* CCache promotion is disabled by default (set at
* construction), this parameter is ignored.
*
* @return CCacheReturn with appropriate result type:
* FOUND (on hit - the value was read), NOTFOUND (on
* miss), TIMEOUT or ERROR (other error)
*/
CCacheReturn get(const Key& key,
const std::chrono::microseconds& timeout,
Value* val = nullptr,
size_t* size = nullptr,
bool shouldPromote = true);
CCacheReturn get(const Key& key,
Value* val = nullptr,
size_t* size = nullptr,
bool shouldPromote = true) {
return get(
key, std::chrono::microseconds::zero(), val, size, shouldPromote);
}
/**
* Check if the key exists in the compact cache. Useful if the caller wants
* to check for only existence and not copy the value out.
*
* @param key key of the entry
* @param timeout if greater than 0, take a timed lock
*
* @return CCacheReturn with appropriate result type:
* FOUND (on hit - the value was found), NOTFOUND (on
* miss), TIMEOUT or ERROR (other error)
*/
CCacheReturn exists(const Key& key, const std::chrono::microseconds& timeout);
CCacheReturn exists(const Key& key) {
return exists(key, std::chrono::microseconds::zero());
}
/**
* Accepts a prefix and value, returning whether or not to purge the entry.
* @param key key of the entry
* @param value value of the entry. Nullptr if compact cache has no value
*/
enum class PurgeFilterResult { SKIP, PURGE, ABORT };
using PurgeFilter =
std::function<PurgeFilterResult(const Key&, const Value* const)>;
/**
* Go through all entries and dump the ones that shouldPurge returns true
*
* @param shouldPurge Function that returns whether or not to purge the
* current value. In the case of a cache without values,
* nullptr will be passed. This function is executed
* under the bucket lock.
* @return true on success, false on failure
*/
bool purge(const PurgeFilter& shouldPurge);
/**
* Iterate on all the bucket in the ccache and call the passed-in callback to
* collect stats, purge entries, etc.
* @return true on success, false on failure
*/
using BucketCallBack = std::function<bool(Bucket*)>;
bool forEachBucket(const BucketCallBack& cb);
/** Move or purge entries that would change which chunk they hash to
* based on the specified old and new numbers of chunks. */
void tableRehash(size_t oldNumChunks,
size_t newNumChunks,
RehashOperation op);
/** return the current snapshot of all stats */
CCacheStats getStats() const override { return stats_.getSnapshot(); }
private:
/**
* Execute a request handler f on a given key.
* The following operations are performed:
* 1) Check if the cache should miss due to an ongoing purge.
* 2) Increase the refcount on the current cohort;
* 3) Find the hash table bucket for the key;
* 4) Lock the bucket;
* 5) Call the request handler;
* 6) Unlock the bucket;
* 7) Decrease the refcount on the cohort.
*
* @param key Key on which to perform the request.
* @param timeout if greater than 0, take a timed lock
* @param f Handler to execute. The handler must have the following
* signature:
* int (*f)(Bucket*);
* The handler must return 0 on a miss, 1 on a hit, -1 in case of
* an error.
* @param Args Any extra params neeeded for Fn
*
* @return 0 on a miss, 1 on a hit, -2 on timeout, -1 on other error
*/
template <typename Fn, typename... Args>
int callBucketFn(const Key& key,
Operation op,
const std::chrono::microseconds& timeout,
Fn f,
Args... args);
/** Free chunks whose index is between chunk_index_low, inclusive, and
* chunk_index_high, exclusive. Used which shrinking the cache. */
int tableChunksFree(size_t chunkIndexLow, size_t chunkIndexHigh);
/**
* Find the chunk on which the specified key would be located given the
* specified number of chunks. Used by tableFindBucket for looking update
* buckets in the table (in which case numChunks = arena_->num_chunks) and
* by tableRehashLarger (in which case numChunks is the new number of
* chunks).
*
* @param numChunks Total number of chunks.
* @praam key Key for which to compute the corresponding chunk.
*
* @return pointer to the first bucket of the desired chunk.
*/
Bucket* tableFindChunk(size_t numChunks, const Key& key);
/**
* Find out which bucket an entry might be in. Do this in a 2 step process:
* 1. Use consistent hashing (furc_hash) to determine the table chunk.
* 2. Treat each chunk as a small, single chunk compact cache--hash the key
* again and us that to find the right bucket.
*
* This means if chunk size remains fixed (ex 1MB) while the number of
* chunks possibly varies (shrinking or growing the cache), a given key
* will always map to the same bucket offset within the chunk no matter
* which chunk it's in. So eg if a key maps to the 15th bucket of chunk 3,
* and we grow the total number of chunks from 9 to 10, the key will map
* either still to the 15th bucket of chunk 3, or to the 15th bucket of
* chunk 10.
*
* @param const Key& key for which to determine the corresponding bucket.
*
* @return bucket that maps to the key.
*/
Bucket* tableFindBucket(const Key& key);
Bucket* tableFindDblWriteBucket(const Key& key);
/**
* Callback called by the bucket descriptor when an entry is evicted.
*
* @param handle Handle to the evicted entry.
*/
void onEntryEvicted(const EntryHandle& handle);
enum class BucketReturn { ERROR = -1, NOTFOUND = 0, FOUND = 1, PROMOTE = 2 };
int toInt(const BucketReturn br) const {
return static_cast<typename std::underlying_type<BucketReturn>::type>(br);
}
/**
* Set a new entry in a bucket. If there already is an entry for the
* specified key, update the entry's value (if any).
*
* @param bucket Bucket from which to look for an entry.
* @param key Key to search for in the bucket.
* @param val Value to insert into the bucket. Nullptr means no value.
* @param size Size of the value. Unused if this compact cache stores
* values of a fixed size.
*
* @return FOUND if entry is found (an existing entry was updated),
* or FOUND if not found (a new entry was inserted).
*/
BucketReturn bucketSet(Bucket* bucket,
const Key& key,
const Value* val = nullptr,
size_t size = 0);
/**
* Delete an entry from a bucket.
*
* @param bucket Bucket from which to delete an entry.
* @param key Key of the entry to be deleted.
* @param val The value of the entry that gets removed is written at the
* location pointed to by this pointer.
* If no entry is deleted, 0 is written.
* @param size Pointer to the memory location where to write the size of
* the deleted value. Ignored if this compact cache stores
* values of a fixed size.
*
* @return FOUND if entry is found (an entry was deleted),
* or NOTFOUND if not found (no entry was deleted).
*/
BucketReturn bucketDel(Bucket* bucket,
const Key& key,
Value* val,
size_t* size);
/**
* Check if entry is present. Doesn't promote.
*
* @param bucket Bucket from which to look for an entry.
* @param key Key to search for in the bucket.
* @param val The value of the found entry is written at the
* location pointed to by this pointer. This is left
* untouched if the entry is not present.
* @param size Pointer to the memory location where to write the
* size of the retrieved value. Ignored if this compact
* cache stores values of a fixed size.
* @param shouldPromote Whether key should be promoted.
*
* @return FOUND if found, NOTFOUND if not found, or PROMOTE
* if found and needs promotion
*/
BucketReturn bucketGet(Bucket* bucket,
const Key& key,
Value* val,
size_t* size,
bool shouldPromote);
/**
* Promotes an entry if necessary. Must be called under write lock.
*
* @return FOUND if found, promoted if necessary
* NOTFOUND if not found
*/
BucketReturn bucketPromote(Bucket* bucket, const Key& key);
/**
* Find the position of an entry in a bucket.
*
* @param bucket Bucket from which to look for an entry.
* @param key Key to search for in the bucket.
*
* @return Handle to the found entry or invalid handle if not found.
*/
EntryHandle bucketFind(Bucket* bucket, const Key& key);
/**
* Data for a compact cache instance.
* The arena must remain alive (i.e. not free'd) during the lifetime of the
* compact cache.
*/
Allocator& allocator_;
CCRWBucketLocks locks_;
RemoveCb removeCb_;
ReplaceCb replaceCb_;
ValidCb validCb_;
facebook::cachelib::Cohort cohort_; /**< resize cohort synchronization */
folly::SharedMutex resizeLock_; /**< Lock to prevent resize conflicts. */
const size_t bucketsPerChunk_;
util::FastStats<CCacheStats> stats_;
const bool allowPromotions_; /**< Whether promotions are allowed on read
operations */
protected:
// expose these two fields for test hack
std::atomic<size_t> numChunks_;
std::atomic<size_t> pendingNumChunks_;
};
} // namespace cachelib
} // namespace facebook
/* This file implements the ccache methods. */
#include "cachelib/compact_cache/CCache-inl.h"