cachelib/allocator/MemoryMonitor.h

/* * Copyright (c) Facebook, Inc. and its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include <atomic> #include "cachelib/allocator/Cache.h" #include "cachelib/allocator/RebalanceStrategy.h" #include "cachelib/allocator/SlabReleaseStats.h" #include "cachelib/common/PeriodicWorker.h" namespace facebook { namespace cachelib { namespace tests { template <typename AllocatorT> class AllocatorResizeTest; } // A utility to track rate of increase/decrease of values over a window of time // and use it to throttle specified value at the same rate. This means if the // rate of change in values over the window exceeds the specified value, the // rate limiter throttles it down to 0, otherwise the value is reduced // proportionally and not throttled at all when rate of change drops to 0. class RateLimiter { public: // @param detectIncrease Detect rate of increase if true, decrease otherwise explicit RateLimiter(bool detectIncrease); // Update window size // @param windowSize The window size in terms of the number of values to // track for rate limiting void setWindowSize(size_t windowSize) { windowSize_ = windowSize + 1; } // specify a new sample value // @param value Sample value void addValue(int64_t value); // throttle down the proposed change in value based on current rate of change // @param value Value to throttle // @return throttled value size_t throttle(int64_t delta); private: // List of values over the window for rate of change calculation std::list<int64_t> values_; // Whether to detect an increase or decrease bool detectIncrease_{true}; // Number of values to track for the window size_t windowSize_{0}; // Current rate of increase/decrease. int64_t rateOfChange_{0}; }; // The goal of memory monitoring is to avoid an out-of-memory (OOM) situation, // either by ensuring there's enough free memory available on the system or // that the caching process does not exceed a given memory usage limit. // Note: For processes running inside cgroups with memory limits, the free // memory monitoring does not work. Instead use the resident memory monitoring // to keep the process memory usage below the cgroup memory limit. class MemoryMonitor : public PeriodicWorker { public: enum Mode { FreeMemory, ResidentMemory, TestMode, Disabled }; struct Config { // Memory monitoring mode. Enable memory monitoring by setting this to // MemoryMonitor::ResidentMemory or MemoryMonitor::FreeMemory mode. Mode mode{Mode::Disabled}; // percentage of memUpperLimit - memLowerLimit to be advised away // in an iteration. size_t maxAdvisePercentPerIter{5}; // percentage of memUpperLimit - memLowerLimit to be reclaimed for // cache in an iteration. size_t maxReclaimPercentPerIter{5}; // lower limit for free/resident memory in GBs. // Note: the lower/upper limit is used in exactly opposite ways for the // FreeMemory versus ResidentMemory mode. // 1. In the ResidentMemory mode, when the resident memory usage drops // below this limit, advised away slabs are reclaimed in proportion to // the size of pools, to increase cache size and raise resident memory // above this limit. // 2. In the FreeMemory mode, when the system free memory drops below this // limit, slabs are advised away from pools in proportion to their size to // raise system free memory above this limit. size_t lowerLimitGB{10}; // upper limit for free/resident memory in GBs. // Note: the lower/upper limit is used in exactly opposite ways for the // FreeMemory versus ResidentMemory mode. // 1. In the ResidentMemory mode, when the resident memory usage exceeds // this limit, slabs are advised away from pools in proportion to their // size to reduce resident memory usage below this limit. // 2. In the FreeMemory mode, when the system free memory exceeds // this limit and if there are slabs that were advised away earlier, // they're reclaimed by pools in proportion to their sizes to reduce the // system free memory below this limit. size_t upperLimitGB{15}; // maximum percentage of item cache that can be advised away size_t maxAdvisePercent{20}; // On restart, the heap usage for applications grows slowly to steady state // over time. Memory monitoring may reclaim advised memory leaving // application vulnerable to OOMs due rapid growth in heap usage. // Cachelib supports rate limiting reclaiming of advised memory to avoid OOM // This is enabled by setting tracking window size to a non-zero value. // Setting this config to a value > 0 enables rate limiting reclaiming of // advised memory by the amount by which free/resident memory is // decreasing/increasing std::chrono::seconds reclaimRateLimitWindowSecs{0}; }; // Memory monitoring can be setup to run in one of the two following modes: // // 1. Free Memory Monitoring (Not supported for processes in cgroups) // // Setup a free memory monitor that periodically checks system free memory. // If it dips below lowerLimitGB bytes, it advises percentAdvisePerIteration // percent of (upperLimitGB - lowerLimitGB) at a time (in every iteration), // until system free memory is above the lowerLimitGB bytes. If the system // free memory exceeds upperLimitGB bytes, it reclaims // percentReclaimPerIteration percent of (upperLimitGB - lowerLimitGB) // at a time until system free memory drops below upperLimitGB. A maximum of // maxLimitPercent of total cache size (excluding compact cache) can be // advised away, after which advising stops to avoid cache from becoming // too small. // Given N bytes of memory on a host, typically N-M bytes are used by cache // leaving M bytes for the heap usage by cache process, kernel and other // processes running on the box. When non-cache memory usage exceeds M bytes // the host goes Out-Of-Memory (OOM) and may fail or kill the cache process. // The free memory monitor ensures that there's at least lowerLimitGB amount // of memory free by giving up to maxLimitPercent of the cache (excluding // compact cache), there by avoiding OOM condition. // // @param mode FreeMemory // @param cache Cache // @param percentAdvisePerIteration // Percentage of upperLimitGB-lowerLimitGB to be // advised every poll period. This // governs the rate of advise // @param percentReclaimPerIteration // Percentage of upperLimitGB-lowerLimitGB to be // reclaimed every poll period. This // governs the rate of reclaim // @param lowerLimitGB The lower limit of free memory in GBytes that // triggers advising away of memory from cache // @param upperLimitGB The upper limit of free memory in GBytes that // triggers reclaiming of advised away memory // @param maxLimitPercent Maximum percentage of item cache limit that can // be advised away before advising is disabled // leading to a probable OOM. // @param strategy Strategy to use to determine the allocation // class in pool to steal slabs from, for advising // @param reclaimRateLimitWindowSecs // Specifies window in seconds over which // free memory values are tracked to detect // decreasing free memory values. Setting this to // non-zero value enables rate limiting reclaim. // // 2. Resident Memory Monitoring // // Setup a resident memory monitor to advise away memory to avoid OOM, by // by limiting process's total resident memory usage. The resident memory // usage of the process can be split into two parts, the cache and everything // else. When the resident memory usage exceeds the upperLimitGB, the monitor // gives away memory from cache, percentAdvisePerIteration of // (upperLimitGB - lowerLimitGB) every poll period, until the memory usage // drops below upperLimitGB. When the resident memory usage dips below // lowerLimitGB, the monitor reclaims memory for cache (if previously given // away), until the resident memory usage is above the lowerLimitGB, // percentReclaimPerIteration of (upperLimitGB - lowerLimitGB) at a time. // // @param cache Cachelib instance // @param config Memory monitor config // @param strategy Strategy to use to determine the allocation // class in pool to steal slabs from, for advising MemoryMonitor(CacheBase& cache, Config config, std::shared_ptr<RebalanceStrategy> strategy); ~MemoryMonitor() override; // number of slabs that have been advised away unsigned int getNumSlabsAdvisedAway() const noexcept { return slabsAdvised_; } // number of slabs that have been reclaimed unsigned int getNumSlabsReclaimed() const noexcept { return slabsReclaimed_; } // maximum percentage of regular cache memory that can be advised away. size_t getMaxAdvisePct() const noexcept { return maxLimitPercent_; } // amount of memory available on the host size_t getMemAvailableSize() const noexcept { return memAvailableSize_; } // rss size of the process size_t getMemRssSize() const noexcept { return memRssSize_; } SlabReleaseEvents getSlabReleaseEvents(PoolId pid) const { return stats_.getSlabReleaseEvents(pid); } private: // check free memory and advise/reclaim if necessary void checkFreeMemory(); // check resident memory and advise/reclaim if necessary void checkResidentMemory(); // check pools for memory to be advised or reclaimed and execute // Checks the target number of slabs to be advised and compares with // the currently advised away slabs. Slabs are advised away or reclaimed // to make these two numbers equal void checkPoolsAndAdviseReclaim(); // @param poolId the pool id // @return number of slabs in use by pool size_t getPoolUsedSlabs(PoolId poolId) const noexcept; // @param poolId the pool id // @return number of slabs in based on pool size size_t getPoolSlabs(PoolId poolId) const noexcept; // @return number of slabs based on the size of all pools added together size_t getTotalSlabs() const noexcept; // @return number of slabs in use by all pools together size_t getSlabsInUse() const noexcept; // advise away slabs to increase free memory or reduce RSS void adviseAwaySlabs(); // reclaim slabs to increase cache size and reduce free memory/increase RSS void reclaimSlabs(); // cache's interface for rebalancing CacheBase& cache_; // Memory monitoring mode Mode mode_; // user-defined rebalance strategy that would be used to pick a victim. If // this does not work out, we pick the first allocation class that has // non-zero slabs. std::shared_ptr<RebalanceStrategy> strategy_; // slab release stats for memory monitor. ReleaseStats stats_{}; // number of slabs released as a part of resizing pools. std::atomic<unsigned int> slabsReleased_{0}; // Specifies the percentage of the advising bounds (upperlimit - lowerlimit) // that is advised away per iteration of memory monitor. size_t percentAdvisePerIteration_{0}; // Specifies the percentage of the advising bounds (upperlimit - lowerlimit) // that is reclaimed per iteration of memory monitor. size_t percentReclaimPerIteration_{0}; // lower limit for free/resident memory in GBs. // Note: the lower/upper limit is used in exactly opposite ways for the // FreeMemory versus ResidentMemory mode. // 1. In the ResidentMemory mode, when the resident memory usage drops // below this limit, advised away slabs are reclaimed in proportion to // the size of pools, to increase cache size and raise resident memory // above this limit. // 2. In the FreeMemory mode, when the system free memory drops below this // limit, slabs are advised away from pools in proportion to their size to // raise system free memory above this limit. size_t lowerLimit_{0}; // upper limit for free/resident memory in GBs. // Note: the lower/upper limit is used in exactly opposite ways for the // FreeMemory versus ResidentMemory mode. // 1. In the ResidentMemory mode, when the resident memory usage exceeds // this limit, slabs are advised away from pools in proportion to their // size to reduce resident memory usage below this limit. // 2. In the FreeMemory mode, when the system free memory exceeds // this limit and if there are slabs that were advised away earlier, // they're reclaimed by pools in proportion to their sizes to reduce the // system free memory below this limit. size_t upperLimit_{0}; // the maximum percentage of total memory that can be advised away size_t maxLimitPercent_{0}; // Specifies window in seconds over which resident memory values are tracked // to detect increasing resident memory values. Setting this to non-zero // value enables rate limiting reclaim std::chrono::seconds reclaimRateLimitWindowSecs_; // On restart, the heap usage for applications grows slowly to steady state // over time. Memory monitoring may reclaim advised memory leaving application // vulnerable to OOMs due rapid growth in heap usage. // Cachelib supports rate limiting reclaiming of advised memory to avoid OOM // where the rate of reclaim is reduced by rate of free/resident memory // decrease/increase. RateLimiter rateLimiter_; // a count of total number of slabs advised away std::atomic<unsigned int> slabsAdvised_{0}; template <typename AllocatorT> friend class facebook::cachelib::tests::AllocatorResizeTest; // a count of total number of slabs reclaimed std::atomic<unsigned int> slabsReclaimed_{0}; // amount of memory available on the host std::atomic<size_t> memAvailableSize_{0}; // rss size of the process std::atomic<size_t> memRssSize_{0}; // implements the actual logic of running tryRebalancing and // updating the stats void work() final; }; } // namespace cachelib } // namespace facebook

cachelib/allocator/MemoryMonitor.h (80 lines of code) (raw):