cachelib/common/hothash/HotHashDetector.cpp (114 lines of code) (raw):
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "cachelib/common/hothash/HotHashDetector.h"
#include <folly/Likely.h>
#include <glog/logging.h>
#include <algorithm>
#include <deque>
namespace facebook {
namespace cachelib {
uint8_t HotHashDetector::bumpHash(uint64_t hash) {
if (UNLIKELY(++bumpsSinceMaintenance_ >= maintenanceInterval_)) {
doMaintenance();
};
auto idx = l1HashFunction(hash);
auto l1count = ++l1Vector_[idx];
// Checking against threshold/2, hot index should pass after one decay
if (l1count < (l1Threshold_ / 2)) {
return 0;
}
uint8_t result = 0;
// different hash for L2:
auto idx2 = l2HashFunction(hash);
// If L2 has a count, then there is a chance that the given hash is hot,
// scan a few elements for this hash.
uint32_t l2count = l2Vector_[idx2].count;
if (l2count > 0) {
for (unsigned i = 0; i < kScanLen; ++i) {
auto& cell = l2Vector_[(idx2 + i) & bucketsMask_];
if (cell.hash == 0) {
break;
}
if (cell.hash == hash) {
// Get a number between 1 and 255 for how much hot the L2 entry is.
result = static_cast<uint8_t>(std::min<uint32_t>(
255, std::max<uint32_t>(1, l2count / hotnessMultiplier_)));
++cell.hashHits;
break;
}
}
}
// Proceed to bump L2 if the l1count is divisible by current L1 threshold.
if (l1count % l1Threshold_ != 0) {
return result;
}
if (++l2Vector_[idx2].count < hotnessMultiplier_) {
return result;
}
for (unsigned i = 0; i < kScanLen; ++i) {
auto& cell = l2Vector_[(idx2 + i) & bucketsMask_];
if (cell.hash == 0) {
cell.hash = hash;
break;
}
if (cell.hash == hash) {
break;
}
// NOTE: in the unlikely case that we scanned kScanLen entries, we will drop
// the hash without inserting it.
}
return result;
}
void HotHashDetector::doMaintenance() {
bumpsSinceMaintenance_ = 0;
// Decay L1
for (uint32_t& val : l1Vector_) {
val /= 2;
}
// Decay L2
for (auto& cell : l2Vector_) {
cell.count = std::min<uint32_t>(hotnessMultiplier_ - 1, cell.count / 2);
cell.hashHits = cell.hashHits / 2;
};
// Hashes in L2 may need to move if their preceding cell has decayed. We
// have to run at least size() elements, but also continue running in case
// the last kScanLen elements had at least one move. We keep a running sum
// of the number of moves in the last kScanLen iterations.
size_t runningSum = 0;
std::deque<size_t> lastMoves(kScanLen, 0);
for (unsigned i = 0; i < l2Vector_.size() || runningSum > 0; ++i) {
size_t moved = fixL2Holes(i & bucketsMask_) ? 1 : 0;
runningSum += moved - lastMoves.front();
lastMoves.push_back(moved);
lastMoves.pop_front();
}
// Adjust L1 threshold so the number of L2 non-empty slots will be in some
// range.
auto nonZeroCount = numBuckets_ - std::count_if(l2Vector_.cbegin(),
l2Vector_.cend(),
[](L2Record const& cell) {
return cell.count == 0;
});
if (nonZeroCount == 0) {
l1Threshold_ = std::max<size_t>(2U, l1Threshold_ / 2);
} else if (nonZeroCount > numWarmItems_) {
l1Threshold_ = std::min<size_t>(1U << 20, l1Threshold_ * 2);
}
calcMaintenanceInterval();
}
bool HotHashDetector::fixL2Holes(unsigned idx) {
assert(idx < l2Vector_.size());
auto hash = l2Vector_[idx].hash;
if (hash == 0) {
return false;
}
auto correctIdx = l2HashFunction(hash);
if (l2Vector_[correctIdx].count == 0 ||
l2Vector_[idx].hashHits < (l1Threshold_ / 2)) {
l2Vector_[idx].hash = 0;
l2Vector_[idx].hashHits = 0;
return true;
}
if (idx == correctIdx) {
return false;
}
for (unsigned j = 1; j < kScanLen; ++j) {
auto candidateIdx = (correctIdx + j) & bucketsMask_;
if (candidateIdx == idx) {
return false;
}
if (l2Vector_[candidateIdx].hash == 0) {
l2Vector_[candidateIdx].hash = hash;
l2Vector_[candidateIdx].hashHits = l2Vector_[idx].hashHits;
l2Vector_[idx].hash = 0;
l2Vector_[idx].hashHits = 0;
return true;
}
}
// We always expect the hash to reside at most kScanLen-1 cells away from
// its correct index, we shouldn't get here.
assert(false);
return false;
}
} // namespace cachelib
} // namespace facebook