cachelib/allocator/MM2Q.h (287 lines of code) (raw):

/* * Copyright (c) Facebook, Inc. and its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include <atomic> #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #include <folly/Format.h> #pragma GCC diagnostic pop #include <folly/lang/Align.h> #include <folly/synchronization/DistributedMutex.h> #include "cachelib/allocator/Cache.h" #include "cachelib/allocator/CacheStats.h" #include "cachelib/allocator/Util.h" #include "cachelib/allocator/datastruct/MultiDList.h" #include "cachelib/allocator/memory/serialize/gen-cpp2/objects_types.h" #include "cachelib/common/CompilerUtils.h" #include "cachelib/common/Mutex.h" namespace facebook { namespace cachelib { template <typename MMType> class MMTypeTest; // The basic version of this cache has 3 queues (Hot, Warm, and Cold). When an // item is added, it goes to Hot queue; we have percentage sizes for Hot and // Warm, and when they are too large, in rebalance() items can be moved to // Cold. Promotion moves items to the head of its queue except for Cold it // moves to the head of Warm queue. The eviction order is Cold, Hot, and Warm. // // When config.tailSize > 0, we enable two extra queues (ColdTail, WarmTail) of // memory size 1 slab to provide access stats for the tail lru slab. In this // case, in rebalance() we move items in WarmTail to Cold before moving from // Warm to Cold if the total size of Warm and WarmTail is larger than expected. // Also, tail queues are adjusted in rebalance() so that their sizes are // config.tailSize. The eviction order is ColdTail, Cold, Hot, WarmTail, and // Warm. class MM2Q { public: // unique identifier per MMType static const int kId; // forward declaration; template <typename T> using Hook = DListHook<T>; using SerializationType = serialization::MM2QObject; using SerializationConfigType = serialization::MM2QConfig; using SerializationTypeContainer = serialization::MM2QCollection; enum LruType { Warm, WarmTail, Hot, Cold, ColdTail, NumTypes }; // Config class for MM2Q struct Config { // Create from serialized config explicit Config(SerializationConfigType configState) : Config(*configState.lruRefreshTime(), *configState.lruRefreshRatio(), *configState.updateOnWrite(), *configState.updateOnRead(), *configState.tryLockUpdate(), *configState.rebalanceOnRecordAccess(), *configState.hotSizePercent(), *configState.coldSizePercent()) {} // @param time the refresh time in seconds to trigger an update in // position upon access. An item will be promoted only once in each lru // refresh time depite the number of accesses it gets. // @param udpateOnW whether to promote the item on write // @param updateOnR whether to promote the item on read Config(uint32_t time, bool updateOnW, bool updateOnR) : Config(time, updateOnW, updateOnR, /* try lock update */ false, /* rebalanceOnRecordAccess */ false, 30, 30) {} // @param time the LRU refresh time. // An item will be promoted only once in each lru refresh time depite the // number of accesses it gets. // @param udpateOnW whether to promote the item on write // @param updateOnR whether to promote the item on read // @param hotPercent percentage number for the size of the hot queue in the // overall size. // @param coldPercent percentage number for the size of the cold queue in // the overall size. Config(uint32_t time, bool updateOnW, bool updateOnR, size_t hotPercent, size_t coldPercent) : Config(time, updateOnW, updateOnR, /* try lock update */ false, /* rebalanceOnRecordAccess */ false, hotPercent, coldPercent) {} // @param time the LRU refresh time. // An item will be promoted only once in each // lru refresh time depite the // number of accesses it gets. // @param udpateOnW whether to promote the item on write // @param updateOnR whether to promote the item on read // @param tryLockU whether to use a try lock when doing // update. // @param rebalanceOnRecordAccs whether to do rebalance on access. If set // to false, rebalance only happens when // items are added or removed to the queue. // @param hotPercent percentage number for the size of the hot // queue in the overall size. // @param coldPercent percentage number for the size of the cold // queue in the overall size. Config(uint32_t time, bool updateOnW, bool updateOnR, bool tryLockU, bool rebalanceOnRecordAccs, size_t hotPercent, size_t coldPercent) : Config(time, 0., updateOnW, updateOnR, tryLockU, rebalanceOnRecordAccs, hotPercent, coldPercent) {} // @param time the LRU refresh time. // An item will be promoted only once in each // lru refresh time depite the // number of accesses it gets. // @param ratio the LRU refresh ratio. The ratio times the // oldest element's lifetime in warm queue // would be the minimum value of LRU refresh // time. // @param udpateOnW whether to promote the item on write // @param updateOnR whether to promote the item on read // @param tryLockU whether to use a try lock when doing // update. // @param rebalanceOnRecordAccs whether to do rebalance on access. If set // to false, rebalance only happens when // items are added or removed to the queue. // @param hotPercent percentage number for the size of the hot // queue in the overall size. // @param coldPercent percentage number for the size of the cold // queue in the overall size. Config(uint32_t time, double ratio, bool updateOnW, bool updateOnR, bool tryLockU, bool rebalanceOnRecordAccs, size_t hotPercent, size_t coldPercent) : Config(time, ratio, updateOnW, updateOnR, tryLockU, rebalanceOnRecordAccs, hotPercent, coldPercent, 0) {} // @param time the LRU refresh time. // An item will be promoted only once in each // lru refresh time depite the // number of accesses it gets. // @param ratio the LRU refresh ratio. The ratio times the // oldest element's lifetime in warm queue // would be the minimum value of LRU refresh // time. // @param udpateOnW whether to promote the item on write // @param updateOnR whether to promote the item on read // @param tryLockU whether to use a try lock when doing // update. // @param rebalanceOnRecordAccs whether to do rebalance on access. If set // to false, rebalance only happens when // items are added or removed to the queue. // @param hotPercent percentage number for the size of the hot // queue in the overall size. // @param coldPercent percentage number for the size of the cold // queue in the overall size. // @param mmReconfigureInterval Time interval for recalculating lru // refresh time according to the ratio. Config(uint32_t time, double ratio, bool updateOnW, bool updateOnR, bool tryLockU, bool rebalanceOnRecordAccs, size_t hotPercent, size_t coldPercent, uint32_t mmReconfigureInterval) : defaultLruRefreshTime(time), lruRefreshRatio(ratio), updateOnWrite(updateOnW), updateOnRead(updateOnR), tryLockUpdate(tryLockU), rebalanceOnRecordAccess(rebalanceOnRecordAccs), hotSizePercent(hotPercent), coldSizePercent(coldPercent), mmReconfigureIntervalSecs( std::chrono::seconds(mmReconfigureInterval)) { checkLruSizes(); } Config() = default; Config(const Config& rhs) = default; Config(Config&& rhs) = default; Config& operator=(const Config& rhs) = default; Config& operator=(Config&& rhs) = default; size_t getWarmSizePercent() const noexcept { return 100 - hotSizePercent - coldSizePercent; } // Make sure that the size percent numbers are sane. void checkLruSizes() { auto warmSizePercent = getWarmSizePercent(); // 100% hot is allowed as a drop-in replacement for LRU if (hotSizePercent == 100) { if (coldSizePercent != 0 && warmSizePercent != 0) { throw std::invalid_argument(folly::sformat( "Invalid hot/cold/warm lru size {}/{}/{}. When Hot is 100%," " Warm and Cold must be 0.", hotSizePercent, coldSizePercent, warmSizePercent)); } } else if (hotSizePercent <= 0 || hotSizePercent > 100 || coldSizePercent <= 0 || coldSizePercent >= 100 || warmSizePercent <= 0 || warmSizePercent >= 100) { throw std::invalid_argument( folly::sformat("Invalid hot/cold/warm lru size {}/{}/{}. Hot, " "Warm and Cold lru's " "must all have non-zero sizes.", hotSizePercent, coldSizePercent, warmSizePercent)); } } // adding extra config after generating the config: tailSize void addExtraConfig(size_t tSize) { tailSize = tSize; } // threshold value in seconds to compare with a node's update time to // determine if we need to update the position of the node in the linked // list. By default this is 60s to reduce the contention on the lru lock. uint32_t defaultLruRefreshTime{60}; uint32_t lruRefreshTime{defaultLruRefreshTime}; // ratio of LRU refresh time to the tail age. If a refresh time computed // according to this ratio is larger than lruRefreshtime, we will adopt // this one instead of the lruRefreshTime set. double lruRefreshRatio{0.}; // whether the lru needs to be updated on writes for recordAccess. If // false, accessing the cache for writes does not promote the cached item // to the head of the lru. bool updateOnWrite{false}; // whether the lru needs to be updated on reads for recordAccess. If // false, accessing the cache for reads does not promote the cached item // to the head of the lru. bool updateOnRead{true}; // whether to tryLock or lock the lru lock when attempting promotion on // access. If set, and tryLock fails, access will not result in promotion. bool tryLockUpdate{false}; // if set to true, we attempt to rebalance the hot cold and warm sizes on // every record access. If you are sensitive to access latencies to your // cache on reads, disable this. bool rebalanceOnRecordAccess{true}; // Size of hot and cold lru sizes. size_t hotSizePercent{30}; size_t coldSizePercent{30}; // number of items in the tail slab (set to 0 to disable) // This should be set by cache allocator through addExtraConfig(); user // should not set this manually. size_t tailSize{0}; // Minimum interval between reconfigurations. If 0, reconfigure is never // called. std::chrono::seconds mmReconfigureIntervalSecs{}; }; // The container object which can be used to keep track of objects of type // T. T must have a public member of type Hook. This object is wrapper // around DList, is thread safe and can be accessed from multiple threads. // The current implementation models an LRU using the above DList // implementation. template <typename T, Hook<T> T::*HookPtr> struct Container { private: using LruList = MultiDList<T, HookPtr>; using Mutex = folly::DistributedMutex; using LockHolder = std::unique_lock<Mutex>; using PtrCompressor = typename T::PtrCompressor; using Time = typename Hook<T>::Time; using CompressedPtr = typename T::CompressedPtr; using RefFlags = typename T::Flags; public: Container() = default; Container(Config c, PtrCompressor compressor) : lru_(LruType::NumTypes, std::move(compressor)), tailTrackingEnabled_(c.tailSize > 0), config_(std::move(c)) { lruRefreshTime_ = config_.lruRefreshTime; nextReconfigureTime_ = config_.mmReconfigureIntervalSecs.count() == 0 ? std::numeric_limits<Time>::max() : static_cast<Time>(util::getCurrentTimeSec()) + config_.mmReconfigureIntervalSecs.count(); } // If the serialized data has 3 lists and we want to expand to 5 lists // without enabling tail hits tracking, we need to adjust list positions // so that a cold roll can be avoided. Container(const serialization::MM2QObject& object, PtrCompressor compressor); Container(const Container&) = delete; Container& operator=(const Container&) = delete; // context for iterating the MM container. At any given point of time, // there can be only one iterator active since we need to lock the LRU for // iteration. we can support multiple iterators at same time, by using a // shared ptr in the context for the lock holder in the future. class Iterator : public LruList::Iterator { public: // noncopyable but movable. Iterator(const Iterator&) = delete; Iterator& operator=(const Iterator&) = delete; Iterator(Iterator&&) noexcept = default; // 1. Invalidate this iterator // 2. Unlock void destroy() { LruList::Iterator::reset(); if (l_.owns_lock()) { l_.unlock(); } } // Reset this iterator to the beginning void resetToBegin() { if (!l_.owns_lock()) { l_.lock(); } LruList::Iterator::resetToBegin(); } private: // private because it's easy to misuse and cause deadlock for MM2Q Iterator& operator=(Iterator&&) noexcept = default; // create an lru iterator with the lock being held. Iterator(LockHolder l, const typename LruList::Iterator& iter) noexcept; // only the container can create iterators friend Container<T, HookPtr>; // lock protecting the validity of the iterator LockHolder l_; }; // records the information that the node was accessed. This could bump up // the node to the head of the lru depending on the time when the node was // last updated in lru and the kLruRefreshTime. If the node was moved to // the head in the lru, the node's updateTime will be updated // accordingly. // // @param node node that we want to mark as relevant/accessed // @param mode the mode for the access operation. // // @return True if the information is recorded and bumped the node // to the head of the lru, returns false otherwise bool recordAccess(T& node, AccessMode mode) noexcept; // adds the given node into the container and marks it as being present in // the container. The node is added to the head of the lru. // // @param node The node to be added to the container. // @return True if the node was successfully added to the container. False // if the node was already in the contianer. On error state of node // is unchanged. bool add(T& node) noexcept; // removes the node from the lru and sets it previous and next to nullptr. // // @param node The node to be removed from the container. // @return True if the node was successfully removed from the container. // False if the node was not part of the container. On error, the // state of node is unchanged. bool remove(T& node) noexcept; // same as the above but uses an iterator context. The iterator is updated // on removal of the corresponding node to point to the next node. The // iterator context holds the lock on the lru. // // iterator will be advanced to the next node after removing the node // // We do not do rebalance in this remove because changing the queues while // having the iterator can cause probelms. // // @param it Iterator that will be removed void remove(Iterator& it) noexcept; // replaces one node with another, at the same position // // @param oldNode node being replaced // @param newNode node to replace oldNode with // // @return true If the replace was successful. Returns false if the // destination node did not exist in the container, or if the // source node already existed. bool replace(T& oldNode, T& newNode) noexcept; // Obtain an iterator that start from the tail and can be used // to search for evictions. This iterator holds a lock to this // container and only one such iterator can exist at a time Iterator getEvictionIterator() const noexcept; // get the current config as a copy Config getConfig() const; // override the current config. void setConfig(const Config& newConfig); bool isEmpty() const noexcept { return size() == 0; } size_t size() const noexcept { return lruMutex_->lock_combine([this]() { return lru_.size(); }); } // Returns the eviction age stats. See CacheStats.h for details EvictionAgeStat getEvictionAgeStat(uint64_t projectedLength) const noexcept; // for saving the state of the lru // // precondition: serialization must happen without any reader or writer // present. Any modification of this object afterwards will result in an // invalid, inconsistent state for the serialized data. // serialization::MM2QObject saveState() const noexcept; // return the stats for this container. MMContainerStat getStats() const noexcept; // TODO: make this static and *Hot, *Cold, *Tail methods static // This can only be done after T33401054 is finished. // Since we need to always turn on the Tail feature. LruType getLruType(const T& node) const noexcept; private: // reconfigure the MMContainer: update LRU refresh time according to current // tail age void reconfigureLocked(const Time& currTime); EvictionAgeStat getEvictionAgeStatLocked( uint64_t projectedLength) const noexcept; uint32_t getOldestAgeLocked(LruType lruType, Time currentTime) const noexcept; static Time getUpdateTime(const T& node) noexcept { return (node.*HookPtr).getUpdateTime(); } static void setUpdateTime(T& node, Time time) noexcept { (node.*HookPtr).setUpdateTime(time); } // remove node from lru and adjust insertion points // // @param node node to remove // @param doRebalance whether to do rebalance in this remove void removeLocked(T& node, bool doRebalance = true) noexcept; // Bit MM_BIT_0 is used to record if the item is hot. void markHot(T& node) noexcept { node.template setFlag<RefFlags::kMMFlag0>(); } void unmarkHot(T& node) noexcept { node.template unSetFlag<RefFlags::kMMFlag0>(); } bool isHot(const T& node) const noexcept { return node.template isFlagSet<RefFlags::kMMFlag0>(); } // Bit MM_BIT_2 is used to record if the item is cold. void markCold(T& node) noexcept { node.template setFlag<RefFlags::kMMFlag2>(); } void unmarkCold(T& node) noexcept { node.template unSetFlag<RefFlags::kMMFlag2>(); } bool isCold(const T& node) const noexcept { return node.template isFlagSet<RefFlags::kMMFlag2>(); } bool isWarm(const T& node) const noexcept { return !isHot(node) && !isCold(node); } // Bit MM_Bit_1 is used to record whether the item is in tail // this flag should be set only if the 1-slab tail is enabled, i.e. // config_.tailSize > 0, and a node is in Tail only if the tail is enabled. void markTail(T& node) noexcept { XDCHECK(config_.tailSize > 0); node.template setFlag<RefFlags::kMMFlag1>(); } void unmarkTail(T& node) noexcept { node.template unSetFlag<RefFlags::kMMFlag1>(); } bool inTail(const T& node) const noexcept { return config_.tailSize && node.template isFlagSet<RefFlags::kMMFlag1>(); } // get the tail lru for current lru, i.e. WarmTail for Warm, and ColdTail // for Cold // // @param list the Lru of which we want to know the tail (should be either // Warm or Cold) LruType getTailLru(LruType list) const; // adjust the size of tail list given a regular list. The tail size is // adjusted to reflect config.tailSize by moving nodes from the regular // list to the list's corresponding tail. // // @param list the Lru of which the tail we want to adjust (should be // either Warm or Cold) void adjustTail(LruType list); // Rebalance hot/cold/warm lrus to hot/cold size percent. // // - First, move items from WarmTail (or Warm, if WarmTail is empty) to Cold // until Warm part is within expected size. // - Then move items from Hot to Cold until Hot is within expected size. // - Lastly, adjust Warm and Cold so that their tails are at expected sizes. void rebalance() noexcept; // protects all operations on the lru. We never really just read the state // of the LRU. Hence we dont really require a RW mutex at this point of // time. mutable folly::cacheline_aligned<Mutex> lruMutex_; // the lru LruList lru_{LruType::NumTypes, PtrCompressor{}}; // size of tail after insertion point size_t tailSize_{0}; // number of hits in each lru. uint64_t numHotAccesses_{0}; uint64_t numColdAccesses_{0}; uint64_t numWarmAccesses_{0}; // number of hits in tail parts uint64_t numWarmTailAccesses_{0}; uint64_t numColdTailAccesses_{0}; // This boolean is to track whether tail tracking is enabled. When we are // turning on or off this feature by setting a new tailSize, cold roll is // required, which is indicated by throwing exception after checking this // flag. It is initialized to true when creating new MMContainers with // configs that specifies a non-zero tailSize. const bool tailTrackingEnabled_{false}; // The next time to reconfigure the container. std::atomic<Time> nextReconfigureTime_{}; // How often to promote an item in the eviction queue. std::atomic<uint32_t> lruRefreshTime_{}; // Config for this lru. // Write access to the MM2Q Config is serialized. // Reads may be racy. Config config_{}; // Max lruFreshTime. static constexpr uint32_t kLruRefreshTimeCap{900}; FRIEND_TEST(MM2QTest, DetailedTest); FRIEND_TEST(MM2QTest, DeserializeToMoreLists); FRIEND_TEST(MM2QTest, TailHits); FRIEND_TEST(MM2QTest, TailTrackingEnabledCheck); // still useful for helper functions friend class MMTypeTest<MM2Q>; }; }; } // namespace cachelib } // namespace facebook #include "cachelib/allocator/MM2Q-inl.h"