cachelib/allocator/RebalanceStrategy.cpp (218 lines of code) (raw):

/* * Copyright (c) Facebook, Inc. and its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "cachelib/allocator/RebalanceStrategy.h" #include <folly/logging/xlog.h> namespace facebook { namespace cachelib { using detail::Info; const RebalanceContext RebalanceStrategy::kNoOpContext = { Slab::kInvalidClassId, Slab::kInvalidClassId}; void RebalanceStrategy::recordCurrentState(PoolId pid, const PoolStats& stats) { auto& currRecord = poolState_[pid]; for (const auto i : stats.getClassIds()) { currRecord[i].updateRecord(stats); } } ClassId RebalanceStrategy::pickAnyClassIdForResizing(const CacheBase& cache, PoolId pid) { const auto stats = cache.getPoolStats(pid); const auto& candidates = stats.mpStats.classIds; // pick victim by maximum number of slabs. const auto ret = *std::max_element( candidates.begin(), candidates.end(), [&](ClassId a, ClassId b) { return stats.mpStats.acStats.at(a).totalSlabs() < stats.mpStats.acStats.at(b).totalSlabs(); }); // if we dont find any, it is likely that the pool has its slabs in its // private free list. if (stats.mpStats.acStats.at(ret).totalSlabs() == 0) { return Slab::kInvalidClassId; } return ret; } void RebalanceStrategy::initPoolState(PoolId pid, const PoolStats& stats) { // default initialize auto& curr = poolState_[pid]; for (auto id : stats.getClassIds()) { curr[id] = Info{id, stats.mpStats.acStats.at(id).totalSlabs(), stats.cacheStats.at(id).numEvictions(), stats.numHitsForClass(id), stats.cacheStats.at(id).containerStat.numTailAccesses}; } } ClassId RebalanceStrategy::pickVictimByFreeMem(const std::set<ClassId>& victims, const PoolStats& stats, size_t threshold, const PoolState& prevState) { // filter out evicting alloc classes. Since these are evicting, the free // allocs is probably needed here. const auto nonEvicting = filter( victims, [&](ClassId id) { return prevState.at(id).getDeltaEvictions(stats) > 0; }, "filtering evicting classes for free-mem"); if (victims.empty()) { return Slab::kInvalidClassId; } const auto& mpStats = stats.mpStats; const auto it = std::max_element(nonEvicting.cbegin(), nonEvicting.cend(), [&mpStats](const ClassId& a, const ClassId& b) { const auto& acStats = mpStats.acStats; return acStats.at(a).getTotalFreeMemory() < acStats.at(b).getTotalFreeMemory(); }); if (mpStats.acStats.at(*it).getTotalFreeMemory() <= threshold) { return Slab::kInvalidClassId; } return *it; } // filter the candidates by amount of evictable slabs std::set<ClassId> RebalanceStrategy::filterByNumEvictableSlabs( const PoolStats& poolStats, std::set<ClassId> candidates, unsigned int minSlabs) { auto condition = [&](const ClassId& id) { const auto& acStat = poolStats.mpStats.acStats.at(id); return acStat.totalSlabs() <= minSlabs; }; return filter( std::move(candidates), condition, "remove candidates by numSlabs"); } std::set<ClassId> RebalanceStrategy::filterByAllocFailure( const PoolStats& stats, std::set<ClassId> candidates, const PoolState& prevState) { return filter( std::move(candidates), [&](ClassId cid) { return prevState.at(cid).deltaAllocFailures(stats) == 0; }, "candidates with allocation failures"); } std::set<ClassId> RebalanceStrategy::filterByNoEvictions( const PoolStats& stats, std::set<ClassId> candidates, const PoolState& prevState) { return filter( std::move(candidates), [&](ClassId cid) { return stats.mpStats.acStats.at(cid).freeSlabs > 0 || prevState.at(cid).getDeltaEvictions(stats) == 0; }, " candidates with free memory or no evictions"); } std::set<ClassId> RebalanceStrategy::filterByMinTailAge( const PoolEvictionAgeStats& poolEvictionAgeStats, std::set<ClassId> candidates, unsigned int minTailAge) { return filter( std::move(candidates), [&](ClassId cid) { return poolEvictionAgeStats.getOldestElementAge(cid) < minTailAge; }, folly::sformat(" candidates with less than {} seconds for tail age", minTailAge)); } std::set<ClassId> RebalanceStrategy::filter( std::set<ClassId> input, std::function<bool(const ClassId& id)> pred, const std::string& purpose) { if (input.empty()) { return input; } const auto before = input.size(); auto it = input.begin(); while (it != input.end()) { if (pred(*it)) { it = input.erase(it); } else { ++it; } } const auto after = input.size(); XLOGF(DBG, "Rebalancing: filtered out {} classes for {}. Remaining: {}", before - after, purpose, after); return input; } std::set<ClassId> RebalanceStrategy::filterVictimsByHoldOff( PoolId pid, const PoolStats& stats, std::set<ClassId> victims) { auto condition = [this, &stats, pid](const ClassId& id) { auto& currRecord = poolState_.at(pid).at(id); if (!currRecord.isOnHoldOff()) { return false; } if (currRecord.getDeltaEvictions(stats) == 0) { currRecord.reduceHoldOff(); // filter return true; } else { currRecord.resetHoldOff(); return false; } }; return filter(std::move(victims), condition, "remove recent receivers"); } RebalanceContext RebalanceStrategy::pickVictimAndReceiver( const CacheBase& cache, PoolId pid) { return executeAndRecordCurrentState<RebalanceContext>( cache, pid, [&]() { // Pick receiver based on allocation failures. If nothing found, // fall back to strategy specific Impl RebalanceContext ctx; ctx.receiverClassId = pickReceiverWithAllocFailures(cache, pid); if (ctx.receiverClassId != Slab::kInvalidClassId) { ctx.victimClassId = pickVictimImpl(cache, pid); if (ctx.victimClassId == cachelib::Slab::kInvalidClassId) { ctx.victimClassId = pickAnyClassIdForResizing(cache, pid); } if (ctx.victimClassId != Slab::kInvalidClassId && ctx.victimClassId != ctx.receiverClassId && getPoolState(pid).at(ctx.victimClassId).nSlabs > 1) { // start a hold off so that the receiver does not become a victim // soon enough. getPoolState(pid).at(ctx.receiverClassId).startHoldOff(); return ctx; } } return pickVictimAndReceiverImpl(cache, pid); }, kNoOpContext); } ClassId RebalanceStrategy::pickVictimForResizing(const CacheBase& cache, PoolId pid) { // Pick only the victim irrespective of who is receiving the slab. This is // used mostly for pool resizing. auto victimClassId = executeAndRecordCurrentState<ClassId>( cache, pid, [&]() { return pickVictimImpl(cache, pid); }, Slab::kInvalidClassId); if (victimClassId == cachelib::Slab::kInvalidClassId) { victimClassId = pickAnyClassIdForResizing(cache, pid); } return victimClassId; } ClassId RebalanceStrategy::pickReceiverWithAllocFailures(const CacheBase& cache, PoolId pid) { const auto stats = cache.getPoolStats(pid); auto receivers = stats.getClassIds(); const auto receiverWithAllocFailures = filterByAllocFailure(stats, std::move(receivers), getPoolState(pid)); if (receiverWithAllocFailures.empty()) { return Slab::kInvalidClassId; } const auto& poolState = getPoolState(pid); // pick the receiver with the most allocation failures return *std::max_element(receiverWithAllocFailures.begin(), receiverWithAllocFailures.end(), [&](ClassId a, ClassId b) { return poolState.at(a).deltaAllocFailures(stats) < poolState.at(b).deltaAllocFailures(stats); }); } template <typename T> T RebalanceStrategy::executeAndRecordCurrentState( const CacheBase& cache, PoolId pid, const std::function<T()>& impl, T noOp) { const auto poolStats = cache.getPoolStats(pid); // if this is the first time we are encountering this pool, initialize // the state and not do anything at the moment. if (!poolStatePresent(pid)) { initPoolState(pid, poolStats); return noOp; } auto rv = impl(); recordCurrentState(pid, poolStats); return rv; } } // namespace cachelib } // namespace facebook