cachelib/allocator/nvmcache/NvmCache.h

/* * Copyright (c) Facebook, Inc. and its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include <folly/container/F14Map.h> #include <folly/container/F14Set.h> #include <folly/dynamic.h> #include <folly/hash/Hash.h> #include <folly/json.h> #include <folly/synchronization/Baton.h> #include <array> #include <mutex> #include <stdexcept> #include <vector> #include "cachelib/allocator/nvmcache/CacheApiWrapper.h" #include "cachelib/allocator/nvmcache/InFlightPuts.h" #include "cachelib/allocator/nvmcache/NavyConfig.h" #include "cachelib/allocator/nvmcache/NavySetup.h" #include "cachelib/allocator/nvmcache/NvmItem.h" #include "cachelib/allocator/nvmcache/ReqContexts.h" #include "cachelib/allocator/nvmcache/TombStones.h" #include "cachelib/allocator/nvmcache/WaitContext.h" #include "cachelib/common/AtomicCounter.h" #include "cachelib/common/EventInterface.h" #include "cachelib/common/Exceptions.h" #include "cachelib/common/Utils.h" #include "cachelib/navy/common/Device.h" #include "folly/Range.h" namespace facebook { namespace cachelib { namespace tests { class NvmCacheTest; } // NvmCache is a key-value cache on flash. It is intended to be used // along with a CacheAllocator to provide a uniform API to the application // by abstracting away the details of ram and flash management. template <typename C> class NvmCache { public: using Item = typename C::Item; using ChainedItem = typename Item::ChainedItem; using ChainedItemIter = typename C::ChainedItemIter; using ItemDestructor = typename C::ItemDestructor; using DestructorData = typename C::DestructorData; // Context passed in encodeCb or decodeCb. If the item has children, // they are passed in the form of a folly::Range. // Chained items must be iterated though @chainedItemRange. // Other APIs used to access chained items are not compatible and should not // be used. struct EncodeDecodeContext { Item& item; folly::Range<ChainedItemIter> chainedItemRange; }; // call back to encode an item to prepare for nvmcache if needed // @param it item we are writing to nvmcache // @param allocs pointer to chained allocs if the item has chained allocs // @return true if the item can be written. false if we need to fail using EncodeCB = std::function<bool(EncodeDecodeContext)>; // same as the above, but reverses the item back to its expected state if // needed. using DecodeCB = std::function<void(EncodeDecodeContext)>; // encrypt everything written to the device and decrypt on every read using DeviceEncryptor = navy::DeviceEncryptor; using ItemHandle = typename C::ItemHandle; using ReadHandle = typename C::ReadHandle; using DeleteTombStoneGuard = typename TombStones::Guard; using PutToken = typename InFlightPuts::PutToken; struct Config { navy::NavyConfig navyConfig{}; // (Optional) enables the user to change some bits to prepare the item for // nvmcache serialization like fixing up pointers etc. Both must be encode // and decode must be specified or NOT specified. Encode and decode happen // before encryption and after decryption respectively if enabled. EncodeCB encodeCb{}; DecodeCB decodeCb{}; // (Optional) This enables encryption on a device level. Everything we write // into the nvm device will be encrypted. std::shared_ptr<navy::DeviceEncryptor> deviceEncryptor{}; // Whether or not store full alloc-class sizes into NVM device. // If true, only store the orignal size the user requested. bool truncateItemToOriginalAllocSizeInNvm{false}; // when enabled, nvmcache will attempt to resolve misses without incurring // thread hops by using synchronous methods. bool enableFastNegativeLookups{false}; // serialize the config for debugging purposes std::map<std::string, std::string> serialize() const; // validate the config, set any defaults that were not set and return a // copy. // // @throw std::invalid_argument if encoding/decoding/encryption is // incompatible. Config validateAndSetDefaults(); }; // @param c the cache instance using nvmcache // @param config the config for nvmcache // @param truncate if we should truncate the nvmcache store NvmCache(C& c, Config config, bool truncate, const ItemDestructor& itemDestructor); // Look up item by key // @param key key to lookup // @return ItemHandle ItemHandle find(folly::StringPiece key); // Try to mark the key as in process of being evicted from RAM to NVM. // This is used to maintain the consistency between the RAM cache and // NvmCache when an item is transitioning from RAM to NvmCache in the // presence of concurrent gets and deletes from other threads. // // @param key the key which is being evicted // @return return true if successfully marked and the eviction // can proceed to queue a put in the future. return // false otherwise PutToken createPutToken(folly::StringPiece key); // store the given item in navy // @param hdl handle to cache item. should not be null // @param token the put token for the item. this must have been // obtained before enqueueing the put to maintain // consistency void put(ItemHandle& hdl, PutToken token); // returns the current state of whether nvmcache is enabled or not. nvmcache // can be disabled if the backend implementation ends up in a corrupt state bool isEnabled() const noexcept { return navyEnabled_; } // creates a delete tombstone for the key. This will ensure that all // concurrent gets and puts to nvmcache can synchronize with an upcoming // delete to make the cache consistent. DeleteTombStoneGuard createDeleteTombStone(folly::StringPiece key); // remove an item by key // @param key key to remove // @param tombstone the tombstone guard associated for this key. See // CacheAllocator::remove on how tombstone maintain // consistency with presence of concurrent get/puts to the // same key from nvmcache.Tombstone is kept alive and // destroyed until the remove operation is finished // processing asynchronously. // The tombstone should be created by // 'createDeleteTombStone', it can be created right before // the remove called if caller is only removing item in // nvm; if the caller is also removing the item from ram, // tombstone should be created before removing item in ram. void remove(folly::StringPiece key, DeleteTombStoneGuard tombstone); // peek the nvmcache without bringing the item into the cache. creates a // temporary item handle with the content of the nvmcache. this is intended // for debugging purposes // // @param key the key for the cache item // @return handle to the item in nvmcache if present. if not, nullptr is // returned. if a handle is returned, it is not inserted into // cache and is temporary. ItemHandle peek(folly::StringPiece key); // safely shut down the cache. must be called after stopping all concurrent // access to cache. using nvmcache after this will result in no-op. // Returns true if shutdown was performed properly, false otherwise. bool shutDown(); // blocks until all in-flight ops are flushed to the device. To be used when // there are no more operations being enqueued. void flushPendingOps(); // Obtain stats in a <string -> double> representation. std::unordered_map<std::string, double> getStatsMap() const; size_t getSize() const noexcept { return navyCache_->getSize(); } bool updateMaxRateForDynamicRandomAP(uint64_t maxRate) { return navyCache_->updateMaxRateForDynamicRandomAP(maxRate); } // This lock is to protect concurrent NvmCache evictCB and CacheAllocator // remove/insertOrReplace/invalidateNvm. // This lock scope within the above functions is // 1. check DRAM visibility in NvmCache evictCB // 2. modify DRAM visibility in CacheAllocator remove/insertOrReplace // 3. check/modify NvmClean, NvmEvicted flag // 4. check/modify itemRemoved_ set // The lock ensures that the items in itemRemoved_ must exist in nvm, and nvm // eviction must erase item from itemRemoved_, so there won't memory leak or // influence to future item with same key. std::unique_lock<std::mutex> getItemDestructorLock( folly::StringPiece key) const { using LockType = std::unique_lock<std::mutex>; return itemDestructor_ ? LockType{itemDestructorMutex_[getShardForKey(key)]} : LockType{}; } // For items with this key that are present in NVM, mark the DRAM to be the // authoritative copy for destructor events. This is usually done when items // are in-place mutated/removed/replaced and the nvm copy is being // invalidated by calling NvmCache::remove subsequently. // // caller must make sure itemDestructorLock is locked, // and the item is present in NVM (NvmClean set and NvmEvicted flag unset). void markNvmItemRemovedLocked(folly::StringPiece key); private: // returns the itemRemoved_ set size // it is the number of items were both in dram and nvm // and were removed from dram but not yet removed from nvm uint64_t getNvmItemRemovedSize() const; bool checkAndUnmarkItemRemovedLocked(folly::StringPiece key); detail::Stats& stats() { return CacheAPIWrapperForNvm<C>::getStats(cache_); } // creates the RAM item from NvmItem. // // @param key key for the nvm item // @param nvmItem contents for the key // // @param return an item handle allocated and initialized to the right state // based on the NvmItem ItemHandle createItem(folly::StringPiece key, const NvmItem& nvmItem); // creates the item into IOBuf from NvmItem, if the item has chained items, // chained IOBufs will be created. // @param key key for the dipper item // @param nvmItem contents for the key // // @return an IOBuf allocated for the item and initialized the memory to Item // based on the NvmItem std::unique_ptr<folly::IOBuf> createItemAsIOBuf(folly::StringPiece key, const NvmItem& dItem); // Returns an iterator to the item's chained IOBufs. The order of // iteration on the item will be LIFO of the addChainedItem calls. // This is only used when we have to create cache items on heap (IOBuf) for // the purpose of ItemDestructor. folly::Range<ChainedItemIter> viewAsChainedAllocsRange(folly::IOBuf*) const; // returns true if there is tombstone entry for the key. bool hasTombStone(folly::StringPiece key); std::unique_ptr<NvmItem> makeNvmItem(const ItemHandle& handle); // wrap an item into a blob for writing into navy. Blob makeBlob(const Item& it); uint32_t getStorageSizeInNvm(const Item& it); // Holds all the necessary data to do an async navy get // All of the supported operations aren't thread safe. The caller // needs to ensure thread safety struct GetCtx { NvmCache& cache; //< the NvmCache instance const std::string key; //< key being fetched std::vector<std::shared_ptr<WaitContext<ReadHandle>>> waiters; // list of // waiters ItemHandle it; // will be set when Context is being filled util::LatencyTracker tracker_; bool valid_; GetCtx(NvmCache& c, folly::StringPiece k, std::shared_ptr<WaitContext<ReadHandle>> ctx, util::LatencyTracker tracker) : cache(c), key(k.toString()), tracker_(std::move(tracker)), valid_(true) { it.markWentToNvm(); addWaiter(std::move(ctx)); } ~GetCtx() { // prevent any further enqueue to waiters // Note: we don't need to hold locks since no one can enqueue // after this point. wakeUpWaiters(); } // @return key as StringPiece folly::StringPiece getKey() const { return {key.data(), key.length()}; } // record the item handle. Upon destruction we will wake up the waiters // and pass a clone of the handle to the callBack. By default we pass // a null handle void setItemHandle(ItemHandle _it) { it = std::move(_it); } // enqueue a waiter into the waiter list // @param waiter WaitContext void addWaiter(std::shared_ptr<WaitContext<ReadHandle>> waiter) { XDCHECK(waiter); waiters.push_back(std::move(waiter)); } // notify all pending waiters that are waiting for the fetch. void wakeUpWaiters() { bool refcountOverflowed = false; for (auto& w : waiters) { // If refcount overflowed earlier, then we will return miss to // all subsequent waitors. if (refcountOverflowed) { w->set(ItemHandle{}); continue; } try { w->set(it.clone()); } catch (const exception::RefcountOverflow&) { // We'll return a miss to the user's pending read, // so we should enqueue a delete via NvmCache. cache.cache_.remove(it); refcountOverflowed = true; } } } void invalidate() { valid_ = false; } bool isValid() const { return valid_; } }; // Erase entry for the ctx from the fill map // @param ctx ctx to erase void removeFromFillMap(const GetCtx& ctx) { auto key = ctx.getKey(); auto lock = getFillLock(key); getFillMap(key).erase(key); } // Erase entry for the ctx from the fill map // @param key item key void invalidateFill(folly::StringPiece key) { auto shard = getShardForKey(key); auto lock = getFillLockForShard(shard); auto& map = getFillMapForShard(shard); auto it = map.find(key); if (it != map.end() && it->second) { it->second->invalidate(); } } // Logs and disables navy usage void disableNavy(const std::string& msg); // map of concurrent fills by key. The key is a string piece wrapper around // GetCtx's std::string. This makes the lookups possible without // constructing a string key. using FillMap = folly::F14ValueMap<folly::StringPiece, std::unique_ptr<GetCtx>>; static size_t getShardForKey(folly::StringPiece key) { return folly::Hash()(key) % kShards; } FillMap& getFillMapForShard(size_t shard) { return fills_[shard].fills_; } FillMap& getFillMap(folly::StringPiece key) { return getFillMapForShard(getShardForKey(key)); } std::unique_lock<std::mutex> getFillLockForShard(size_t shard) { return std::unique_lock<std::mutex>(fillLock_[shard].fillLock_); } std::unique_lock<std::mutex> getFillLock(folly::StringPiece key) { return getFillLockForShard(getShardForKey(key)); } void onGetComplete(GetCtx& ctx, navy::Status s, navy::BufferView key, navy::BufferView value); void evictCB(navy::BufferView key, navy::BufferView val, navy::DestructorEvent e); static navy::BufferView makeBufferView(folly::ByteRange b) { return navy::BufferView{b.size(), b.data()}; } const Config config_; C& cache_; //< cache allocator std::atomic<bool> navyEnabled_{true}; //< switch to turn off/on navy static constexpr size_t kShards = 8192; // a map of all pending fills to prevent thundering herds struct { alignas(folly::hardware_destructive_interference_size) FillMap fills_; } fills_[kShards]; // a map of fill locks for each shard struct { alignas(folly::hardware_destructive_interference_size) std::mutex fillLock_; } fillLock_[kShards]; // currently queued put operations to navy. std::array<PutContexts, kShards> putContexts_; // currently queued delete operations to navy. std::array<DelContexts, kShards> delContexts_; // co-ordination between in-flight evictions from cache that are not queued // to navy and in-flight gets into nvmcache that are not yet queued. std::array<InFlightPuts, kShards> inflightPuts_; std::array<TombStones, kShards> tombstones_; const ItemDestructor itemDestructor_; mutable std::array<std::mutex, kShards> itemDestructorMutex_; // Used to track the keys of items present in NVM that should be excluded for // executing Destructor upon eviction from NVM, if the item is not present in // DRAM. The ownership of item destructor is already managed elsewhere for // these keys. This data struct is updated prior to issueing NvmCache::remove // to handle any racy eviction from NVM before the NvmCache::remove is // finished. std::array<folly::F14FastSet<std::string>, kShards> itemRemoved_; std::unique_ptr<cachelib::navy::AbstractCache> navyCache_; friend class tests::NvmCacheTest; }; } // namespace cachelib } // namespace facebook #include "cachelib/allocator/nvmcache/NvmCache-inl.h"

cachelib/allocator/nvmcache/NvmCache.h (204 lines of code) (raw):