cachelib/common/ApproxSplitSet.h

/* * Copyright (c) Facebook, Inc. and its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include <glog/logging.h> #include <atomic> #include <cstring> #include <deque> #include <memory> #include <mutex> #include <numeric> #include "cachelib/common/Time.h" namespace facebook { namespace cachelib { // Set that can drop entries. It does that to maintain internal hash table's // load factor and max collision chain length under control. // // Implementation uses Robin Hood hashing with limit on probes. Because of this // we also allocate "probe limit" extra slots to avoid dealing with wrapping // probing around. template <typename UIntT> class DropSet { public: using UInt = UIntT; explicit DropSet(uint32_t probeLimit) : DropSet{probeLimit, kInitialCapacity} {} DropSet(const DropSet&) = delete; DropSet& operator=(const DropSet&) = delete; DropSet(DropSet&& other) noexcept : probeLimit_{other.probeLimit_}, capacity_{other.capacity_}, size_{other.size_}, creationTimeSecs_{other.creationTimeSecs_}, table_{std::move(other.table_)} {} DropSet& operator=(DropSet&& other) noexcept { table_.reset(); new (this) DropSet{std::move(other)}; return *this; } void insert(UInt key) { // Rebuild at 90% full with allocation of +50% of space means minimum // load factor is 60% and average is 75%. // Although this computation is on the hot path, compiler elides division // and benchmark shows that there is no difference performance penalty // compared to storing this number precomputed. if (size_ >= rehashSize()) { DropSet dst{probeLimit_, rehashCapacity()}; for (uint32_t i = 0; i < capacity_; i++) { if (table_[i] != kEmptyKey) { dst.insertNoResize(table_[i]); } } *this = std::move(dst); } insertNoResize(key); } bool lookup(UInt key) const { uint32_t lookupProbeDistance{0}; uint32_t i = bucketOf(key); // If probe length is really small, we may try avoiding the DIB condition // all together and just scan up to probe limit. This optimization may be // very workload dependent though. while (lookupProbeDistance < probeLimit_) { DCHECK_LT(i, capacity_ + probeLimit_); if (table_[i] == key) { return true; } // our lookup key is not in its ideal position. we relocate a key only if // its probe distance is smaller than probe distance of the key being // inserted. compare if the existing key has a higher probe distance. If // the key at current position has a lower probe distance from its // original bucket position, there is no chance of the key we are looking // for being relocated beyond this position if (table_[i] == kEmptyKey || lookupProbeDistance > probeDistance(i)) { break; } lookupProbeDistance++; i++; } return false; } void reset() { size_ = 0; std::memset(table_.get(), 0, tableByteSize()); creationTimeSecs_ = util::getCurrentTimeSec(); } uint32_t size() const { return size_; } uint32_t getCreationTimeSecs() const { return creationTimeSecs_; } private: static constexpr uint32_t kInitialCapacity{4}; static constexpr UInt kEmptyKey{0}; explicit DropSet(uint32_t probeLimit, uint32_t capacity) : probeLimit_{probeLimit}, capacity_{capacity}, creationTimeSecs_{util::getCurrentTimeSec()}, table_{std::make_unique<UInt[]>(capacity + probeLimit)} {} size_t tableByteSize() const { return sizeof(UInt) * (capacity_ + probeLimit_); } uint32_t rehashSize() const { return capacity_ * 9 / 10; } uint32_t rehashCapacity() const { return std::max(capacity_ * 3 / 2, 16u); } uint32_t bucketOf(UInt key) const { // See "A fast alternative to the modulo reduction" by D. Lemire for proof. // Method works for well distributes keys (i.e., good hash function). return (uint64_t{key} * capacity_) >> 32; } uint32_t probeDistance(uint32_t pos) const { DCHECK_GE(pos, bucketOf(table_[pos])); return pos - bucketOf(table_[pos]); } void insertNoResize(UInt key) { uint32_t insertProbeDistance{0}; uint32_t i = bucketOf(key); while (insertProbeDistance < probeLimit_) { DCHECK_LT(i, capacity_ + probeLimit_); if (table_[i] == kEmptyKey) { table_[i] = key; size_++; return; } auto currProbeDistance = probeDistance(i); if (currProbeDistance <= insertProbeDistance) { std::swap(key, table_[i]); insertProbeDistance = currProbeDistance; } insertProbeDistance++; i++; } } const uint32_t probeLimit_; const uint32_t capacity_; uint32_t size_{}; time_t creationTimeSecs_; // creation time in seconds since epoch std::unique_ptr<UInt[]> table_; }; // Approximate set made of several splits which are populated in FIFO order class ApproxSplitSet { public: // May want to use 64 bit if too many collisions. Good for use cases we have. using Split = DropSet<uint32_t>; ApproxSplitSet(uint64_t numEntries, uint32_t numSplits) : numSplits_{numSplits}, maxSplitSize_{static_cast<uint32_t>(numEntries / numSplits)} { if (numEntries == 0) { throw std::invalid_argument("NumEntries for ApproxDropSplit is 0"); } addNewSplit(); } // Returns true if @key already existed in the set before insert bool insert(uint64_t key) { std::lock_guard<std::mutex> l(mutex_); DCHECK(!splits_.empty()); // Important to lookup first, because entry may be in another split, not the // one we're currently inserting into. auto exists = lookupLocked(key); if (!exists) { if (splits_.back().size() >= maxSplitSize_) { addNewSplit(); } splits_.back().insert(key); } return exists; } void reset() { std::lock_guard<std::mutex> lock{mutex_}; splits_.clear(); addNewSplit(); } uint32_t numSplits() const { return numSplits_; } uint64_t maxSplitSize() const { return maxSplitSize_; } uint64_t numKeysTracked() const { std::lock_guard<std::mutex> lock{mutex_}; return std::accumulate( splits_.begin(), splits_.end(), 0ULL, [](uint64_t a, const Split& s) { return a += s.size(); }); } uint32_t trackingWindowDurationSecs() const { std::lock_guard<std::mutex> lock{mutex_}; if (splits_.empty()) { return 0; } return util::getCurrentTimeSec() - splits_.front().getCreationTimeSecs(); } private: static constexpr uint32_t kProbeLimit{10}; bool lookupLocked(uint64_t key) const { for (const auto& s : splits_) { if (s.lookup(key)) { return true; } } return false; } void addNewSplit() { if (splits_.size() < numSplits_) { splits_.push_back(Split{kProbeLimit}); } else { DCHECK(!splits_.empty()); auto s = std::move(splits_.front()); s.reset(); splits_.pop_front(); splits_.push_back(std::move(s)); } } mutable std::mutex mutex_; const uint32_t numSplits_{}; const uint32_t maxSplitSize_{}; std::deque<Split> splits_; }; } // namespace cachelib } // namespace facebook

cachelib/common/ApproxSplitSet.h (181 lines of code) (raw):