aios/storage/indexlib/index/common/hash_table/CuckooHashTable.h (1,097 lines of code) (raw):

/* * Copyright 2014-present Alibaba Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include <algorithm> #include "autil/MurmurHash.h" #include "indexlib/index/common/hash_table/BucketCompressor.h" #include "indexlib/index/common/hash_table/ClosedHashTableIterator.h" #include "indexlib/index/common/hash_table/ClosedHashTableTraits.h" #include "indexlib/index/common/hash_table/HashTableBase.h" #include "indexlib/index/common/hash_table/HashTableOptions.h" #include "indexlib/index/common/hash_table/SpecialKeyBucket.h" #include "indexlib/util/Status.h" namespace indexlibv2::index { #ifndef CACHE_LINE_SIZE #define CACHE_LINE_SIZE 64U #endif template <typename _KT, typename _VT, bool HasSpecialKey = ClosedHashTableTraits<_KT, _VT, false>::HasSpecialKey, bool useCompactBucket = false> class CuckooHashTableBase : public HashTableBase { public: CuckooHashTableBase() : _bucket(NULL) , _bucketCount(0) , _blockCount(0) , _nu_hashFunc(2) , _occupancyPct(0) , _bFSDepth(100) , _curCallId(0) , _deleteCount(0) { } ~CuckooHashTableBase() {} public: typedef _KT KeyType; typedef _VT ValueType; struct HashTableHeader { uint8_t version = 0; uint8_t nu_hashFunc = 0; uint8_t maxNu_hashFunc = 8; uint64_t bucketCount = 0; uint64_t keyCount = 0; uint64_t stretchSize = 0; uint64_t padding[0] __attribute__((aligned(CACHE_LINE_SIZE))); }; static const int32_t OCCUPANCY_PCT = 80; static const uint32_t BLOCK_SIZE = 4; static const uint64_t MAX_NUM_BFS_TREE_NODE = 1048576UL; // 1 << 20 static constexpr double STRETCH_MEM_RATIO = 0.01; public: typedef typename ClosedHashTableTraits<_KT, _VT, useCompactBucket>::Bucket Bucket; private: struct TreeNode { static const uint64_t BFS_ROOT_IDX = std::numeric_limits<uint64_t>::max() >> 8; uint64_t _bucketId; // first bucket id in block uint64_t _parent : 56; // parent's idx in bfsTree uint64_t _inBlockId : 8; // parent's inBlockId in its block TreeNode(uint64_t bucketId, uint64_t parent, uint8_t inBlockId) : _bucketId(bucketId) , _parent(parent) , _inBlockId(inBlockId) { } }; typedef std::vector<TreeNode> TreeNodeVec; public: virtual indexlib::util::Status Find(uint64_t key, autil::StringView& value) const override = 0; indexlib::util::Status FindForReadWrite(uint64_t key, autil::StringView& value, autil::mem_pool::Pool* pool) const override final { assert(false); // cuckoo hash table do not support mem table return indexlib::util::Status::NOT_FOUND; } public: int32_t GetRecommendedOccupancy(int32_t occupancy) const override final { return DoGetRecommendedOccupancy(occupancy); } uint64_t CapacityToTableMemory(uint64_t maxKeyCount, const HashTableOptions& options) const override; uint64_t CapacityToBuildMemory(uint64_t maxKeyCount, const HashTableOptions& options) const override; size_t TableMemroyToCapacity(size_t tableMemory, int32_t occupancyPct) const override; size_t BuildMemoryToCapacity(size_t buildMemory, int32_t occupancyPct) const override; size_t BuildMemoryToTableMemory(size_t buildMemory, int32_t occupancyPct) const override; uint64_t Capacity() const override { return BucketCountToCapacity(_bucketCount, _occupancyPct); } public: static int32_t DoGetRecommendedOccupancy(int32_t occupancy) { return (occupancy > 97) ? 97 : occupancy; } static uint64_t DoCapacityToTableMemory(uint64_t maxKeyCount, const HashTableOptions& options); static uint64_t DoCapacityToBuildMemory(uint64_t maxKeyCount, const HashTableOptions& options); static size_t DoTableMemroyToCapacity(size_t tableMemory, int32_t occupancyPct); static size_t DoBuildMemoryToCapacity(size_t buildMemory, int32_t occupancyPct); static size_t DoBuildMemoryToTableMemory(size_t buildMemory, int32_t occupancyPct); public: bool Insert(uint64_t key, const autil::StringView& value) override; bool Delete(uint64_t key, const autil::StringView& value = autil::StringView()) override; public: bool MountForWrite(void* data, size_t size, const HashTableOptions& options = OCCUPANCY_PCT) override; bool MountForRead(const void* data, size_t size) override; uint64_t Size() const override { return Header()->keyCount; } bool IsFull() const override { return Size() >= Capacity(); } uint64_t MemoryUse() const override; uint64_t BuildAssistantMemoryUse() const override; bool Shrink(int32_t occupancyPct = 0) override; int32_t GetOccupancyPct() const override { return _occupancyPct; } uint64_t GetDeleteCount() const override { return _deleteCount; } public: bool Insert(const _KT& key, const _VT& value); indexlib::util::Status Find(const _KT& key, const _VT*& value) const; indexlib::util::Status FindForReadWrite(const _KT& key, _VT& value) const; uint64_t BucketCount() const { return _bucketCount; } bool Stretch() override; void* Address() const override { return Header(); } // ATTENTION: ClosedHashTableTraits<_KT, _VT>::HasSpecialKey may not equal HasSpecialKey typedef ClosedHashTableIterator<_KT, _VT, Bucket, ClosedHashTableTraits<_KT, _VT, useCompactBucket>::HasSpecialKey> Iterator; Iterator* CreateIterator() { return new Iterator(_bucket, _bucketCount, Size()); } void PrintHashTable() const; size_t Compress(BucketCompressor* bucketCompressor) override; public: // public for CuckooHashTableFileReader static _KT CuckooHash(const _KT& key, uint32_t hashFuncId); static uint64_t GetFirstBucketIdInBlock(const _KT& hash, uint64_t blockCount) { return (blockCount > std::numeric_limits<uint32_t>::max()) ? ((hash % blockCount) * BLOCK_SIZE) : ((uint64_t)((uint32_t)hash % (uint32_t)blockCount) * BLOCK_SIZE); } // { return (hash % blockCount) * BLOCK_SIZE; } // { return ((uint32_t)hash % (uint32_t) blockCount) * BLOCK_SIZE; } // { return (hash % (uint32_t) blockCount) * BLOCK_SIZE; } protected: HashTableHeader* Header() const { return (HashTableHeader*)((uint8_t*)_bucket - sizeof(HashTableHeader)); } Bucket* InternalFindBucket(const _KT& key); Bucket* DoInternalFindBucket(const _KT& key); private: static uint64_t CapacityToBucketCount(uint64_t maxKeyCount, int32_t occupancyPct); static uint64_t BucketCountToCapacity(uint64_t bucketCount, int32_t occupancyPct); bool ReHash(uint64_t newBucketCount); const Bucket* FindBucketForRead(const _KT& key, uint8_t nu_hashFunc) const; template <typename functor> bool InternalInsert(const _KT key, const _VT& value); Bucket* BFSFindBucket(TreeNodeVec& bfsTree); Bucket* CuckooKick(TreeNodeVec& bfsTree, uint32_t inEmptyBlockId); bool EvictionInsert(Bucket seekingBucket, std::vector<bool>& bitmap); Bucket* FindBucketForEviction(const _KT& key, std::vector<bool>& bitmap); Bucket* BFSFindBucketForEviction(TreeNodeVec& bfsTree, std::vector<bool>& bitmap); bool CheckBucket(const _KT& key, uint64_t targetBucketId) const; protected: Bucket* _bucket; uint64_t _bucketCount; uint64_t _blockCount; uint8_t _nu_hashFunc; int32_t _occupancyPct; uint32_t _bFSDepth; uint32_t _curCallId; std::vector<uint32_t> _callIdVec; uint64_t _deleteCount; protected: AUTIL_LOG_DECLARE(); friend class CuckooHashTableTest; }; /////////////////////////////////////////// template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> alog::Logger* CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::_logger = alog::Logger::getLogger("indexlib.index.CuckooHashTableBase"); template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> bool CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Insert(const _KT& key, const _VT& value) { struct InsertFunctor { void operator()(Bucket& bucket, const _KT& key, const _VT& value, uint64_t& deleteCount) { if (bucket.IsDeleted()) { deleteCount--; } bucket.Set(key, value); } }; return InternalInsert<InsertFunctor>(key, value); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Insert(uint64_t key, const autil::StringView& value) { const _VT& v = *reinterpret_cast<const _VT*>(value.data()); assert(sizeof(_VT) == value.size()); return Insert(key, v); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Delete(uint64_t key, const autil::StringView& value) { struct DeleteFunctor { void operator()(Bucket& bucket, const _KT& key, const _VT& value, uint64_t& deleteCount) { if (!bucket.IsDeleted()) { deleteCount++; } bucket.SetDelete(key, value); } }; if (value.empty()) { return InternalInsert<DeleteFunctor>(key, _VT()); } const _VT& v = *reinterpret_cast<const _VT*>(value.data()); assert(sizeof(_VT) == value.size()); return InternalInsert<DeleteFunctor>(key, v); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline size_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Compress(BucketCompressor* bucketCompressor) { char* begin = static_cast<char*>((void*)_bucket); char* outBuffer = begin; for (uint64_t i = 0u; i < _bucketCount; ++i) { size_t compressedBucketSize = bucketCompressor->Compress(static_cast<char*>((void*)(_bucket + i)), sizeof(Bucket), outBuffer); outBuffer += compressedBucketSize; } return sizeof(HashTableHeader) + (outBuffer - begin); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::CapacityToTableMemory( uint64_t maxKeyCount, const HashTableOptions& options) const { return DoCapacityToTableMemory(maxKeyCount, options); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::DoCapacityToTableMemory(uint64_t maxKeyCount, const HashTableOptions& options) { int32_t occupancyPct = options.occupancyPct; size_t bucketSize = sizeof(Bucket) * CapacityToBucketCount(maxKeyCount, occupancyPct); if (options.mayStretch) { bucketSize = bucketSize * (1 + STRETCH_MEM_RATIO) + 1; // +1 for decimal bucketSize += sizeof(Bucket) * BLOCK_SIZE; // at least one block } return bucketSize + sizeof(HashTableHeader); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::CapacityToBuildMemory( uint64_t maxKeyCount, const HashTableOptions& options) const { return DoCapacityToBuildMemory(maxKeyCount, options); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::DoCapacityToBuildMemory(uint64_t maxKeyCount, const HashTableOptions& options) { size_t hashTableSize = DoCapacityToTableMemory(maxKeyCount, options); size_t bucketCount = CapacityToBucketCount(maxKeyCount, options.occupancyPct); size_t blockCount = bucketCount / BLOCK_SIZE; size_t callIdSize = blockCount * sizeof(uint32_t); size_t bfsTreeSize = std::min(blockCount, (size_t)MAX_NUM_BFS_TREE_NODE) * sizeof(TreeNode); return hashTableSize + callIdSize + bfsTreeSize; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline size_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::DoTableMemroyToCapacity(size_t tableMemory, int32_t occupancyPct) { if (tableMemory < sizeof(HashTableHeader)) { return 0; } uint64_t bucketCount = (tableMemory - sizeof(HashTableHeader)) / sizeof(Bucket); return bucketCount * occupancyPct / 100; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline size_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::TableMemroyToCapacity(size_t tableMemory, int32_t occupancyPct) const { return DoTableMemroyToCapacity(tableMemory, occupancyPct); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline size_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::DoBuildMemoryToCapacity(size_t buildMemory, int32_t occupancyPct) { // Header, Bukcet, Callid, BFSTree if (buildMemory < sizeof(HashTableHeader)) { return 0; } size_t leftMemory = buildMemory - sizeof(HashTableHeader); size_t maxKeyCountThreshhold = MAX_NUM_BFS_TREE_NODE * BLOCK_SIZE * occupancyPct / 100; size_t threshhold = DoCapacityToBuildMemory(maxKeyCountThreshhold, occupancyPct); size_t bucketCount = 0; if (buildMemory >= threshhold) { leftMemory -= (size_t)MAX_NUM_BFS_TREE_NODE * sizeof(TreeNode); size_t blockCount = leftMemory / (sizeof(uint32_t) + BLOCK_SIZE * sizeof(Bucket)); bucketCount = blockCount * BLOCK_SIZE; } else { size_t blockCount = leftMemory / (sizeof(uint32_t) + sizeof(TreeNode) + BLOCK_SIZE * sizeof(Bucket)); bucketCount = blockCount * BLOCK_SIZE; } return bucketCount * occupancyPct / 100; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline size_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::BuildMemoryToCapacity(size_t buildMemory, int32_t occupancyPct) const { return DoBuildMemoryToCapacity(buildMemory, occupancyPct); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline size_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::DoBuildMemoryToTableMemory(size_t buildMemory, int32_t occupancyPct) { size_t maxKeyCount = DoBuildMemoryToCapacity(buildMemory, occupancyPct); return DoCapacityToTableMemory(maxKeyCount, occupancyPct); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline size_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::BuildMemoryToTableMemory(size_t buildMemory, int32_t occupancyPct) const { return DoBuildMemoryToTableMemory(buildMemory, occupancyPct); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::CapacityToBucketCount(uint64_t maxKeyCount, int32_t occupancyPct) { assert(occupancyPct > 0 && occupancyPct <= 100); uint64_t bucketCount = ((maxKeyCount > 0 ? maxKeyCount : 1) * 100.0 + occupancyPct - 1) / occupancyPct; return (bucketCount + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::BucketCountToCapacity(uint64_t bucketCount, int32_t occupancyPct) { assert(occupancyPct > 0 && occupancyPct <= 100); uint64_t maxKeyCount = bucketCount * occupancyPct / 100.0; return maxKeyCount; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::MountForWrite(void* data, size_t size, const HashTableOptions& inputOptions) { HashTableOptions options(OCCUPANCY_PCT); if (inputOptions.Valid()) { options = inputOptions; } if (size < sizeof(HashTableHeader)) { AUTIL_LOG(ERROR, "not enough space, header size[%lu], give[%lu]", sizeof(HashTableHeader), size); return false; } size_t bucketSize = size - sizeof(HashTableHeader); size_t stretchSize = 0; if (options.mayStretch) { stretchSize = sizeof(Bucket) * BLOCK_SIZE; stretchSize += (bucketSize - bucketSize / (1 + STRETCH_MEM_RATIO)); bucketSize -= stretchSize; } _occupancyPct = options.occupancyPct; _blockCount = bucketSize / sizeof(Bucket) / BLOCK_SIZE; if (_blockCount < 1) { AUTIL_LOG(ERROR, "size[%lu] too small", size); return false; } _bucketCount = _blockCount * BLOCK_SIZE; _callIdVec.resize(_blockCount, 0); _curCallId = 0; if (!data) { AUTIL_LOG(ERROR, "data is null"); return false; } HashTableHeader* header = (HashTableHeader*)data; header->nu_hashFunc = _nu_hashFunc; header->bucketCount = _bucketCount; header->keyCount = 0; header->maxNu_hashFunc = options.maxNu_hashFunc; header->stretchSize = stretchSize; _bucket = (Bucket*)(header + 1); new (_bucket) Bucket[_bucketCount]; AUTIL_LOG(INFO, "mount for write, size[%lu], occupancyPct[%d], bucketCount[%lu], " "_bucket[%lu], headerSize[%lu]", size, _occupancyPct, _bucketCount, (uint64_t)_bucket, sizeof(HashTableHeader)); return true; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::MountForRead(const void* data, size_t size) { assert(data); if (size < sizeof(HashTableHeader)) { AUTIL_LOG(ERROR, "too small size[%lu], header size[%lu]", size, sizeof(HashTableHeader)); return false; } HashTableHeader* header = (HashTableHeader*)data; _nu_hashFunc = header->nu_hashFunc; _bucketCount = header->bucketCount; _blockCount = _bucketCount / BLOCK_SIZE; _occupancyPct = header->keyCount * 100 / _bucketCount; if (size < sizeof(HashTableHeader) + _bucketCount * sizeof(Bucket)) { AUTIL_LOG(ERROR, "too small size[%lu], header size[%lu], bucket size[%lu]", size, sizeof(HashTableHeader), _bucketCount * sizeof(Bucket)); return false; } _bucket = (Bucket*)(header + 1); if ((uint64_t)_bucket % CACHE_LINE_SIZE != 0) { AUTIL_LOG(INFO, "mount for read, headerSize[%lu], _bucket[%lu]", sizeof(HashTableHeader), (uint64_t)_bucket); } return true; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline indexlib::util::Status CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Find(const _KT& key, const _VT*& value) const { const Bucket* bucket = FindBucketForRead(key, _nu_hashFunc); if (bucket == NULL) { return indexlib::util::NOT_FOUND; } value = &(bucket->Value()); return bucket->IsDeleted() ? indexlib::util::DELETED : indexlib::util::OK; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline indexlib::util::Status CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::FindForReadWrite(const _KT& key, _VT& value) const { while (true) { const Bucket* bucket = FindBucketForRead(key, Header()->nu_hashFunc); indexlib::util::Status status = indexlib::util::NOT_FOUND; if (bucket == NULL) { // maybe: during a kick process, (false NOT_FOUND) could happen return status; } value = bucket->Value(); status = (bucket->IsDeleted()) ? indexlib::util::DELETED : indexlib::util::OK; // IsEmpty(): old k,v kicked out // Key Not Equal: new k,v already replaced old k,v // Value not Equal: very rare situation (k,v kicked out and then kicked back with k,v') if (likely(!bucket->IsEmpty() && bucket->IsEqual(key) && bucket->Value() == value)) { return status; } } } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline void CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::PrintHashTable() const { std::stringstream ss; for (uint32_t i = 0; i < _blockCount; ++i) { ss << "[" << i << "] "; for (uint32_t inBlockId = 0; inBlockId < BLOCK_SIZE; ++inBlockId) { const Bucket& bucket = _bucket[i * BLOCK_SIZE + inBlockId]; const auto& key = bucket.Key(); if (bucket.IsEmpty() || bucket.IsDeleted()) { ss << (bucket.IsEmpty() ? "E[] " : "D[] "); } else { ss << key << "["; for (uint8_t hashFuncId = 0; hashFuncId < _nu_hashFunc - 1; ++hashFuncId) { ss << (uint32_t)CuckooHash(key, hashFuncId) % (uint32_t)_blockCount << ","; } ss << (uint32_t)CuckooHash(key, _nu_hashFunc - 1) % (uint32_t)_blockCount << "] "; } } ss << std::endl; } std::cerr << ss.str() << std::endl; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::CheckBucket(const _KT& key, uint64_t targetBucketId) const { uint64_t targetBlockId = targetBucketId / BLOCK_SIZE; for (uint32_t hashFuncId = 0; hashFuncId < _nu_hashFunc; ++hashFuncId) { const _KT& hash = CuckooHash(key, hashFuncId); uint64_t blockId = GetFirstBucketIdInBlock(hash, _blockCount) / BLOCK_SIZE; if (blockId == targetBlockId) { return true; } } std::stringstream ss; ss << "MV " << key << "["; for (uint8_t hashFuncId = 0; hashFuncId < _nu_hashFunc - 1; ++hashFuncId) { const _KT& hash = CuckooHash(key, hashFuncId); ss << GetFirstBucketIdInBlock(hash, _blockCount) / BLOCK_SIZE << ","; } const _KT& hash = CuckooHash(key, _nu_hashFunc); ss << GetFirstBucketIdInBlock(hash, _blockCount) / BLOCK_SIZE << "] to wrong block [" << targetBlockId << "]"; std::cerr << ss.str() << std::endl; return false; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Shrink(int32_t occupancyPct) { assert(_occupancyPct > 0 && _occupancyPct <= 100); assert(occupancyPct >= 0 && occupancyPct <= 100); uint64_t oldBucketCount = _bucketCount; uint64_t newBucketCount = CapacityToBucketCount(Size(), (occupancyPct > 0 ? occupancyPct : _occupancyPct)); assert(newBucketCount > 0); if (newBucketCount >= oldBucketCount) { return true; } if (ReHash(newBucketCount)) { _occupancyPct = occupancyPct > 0 ? occupancyPct : _occupancyPct; return true; } AUTIL_LOG(WARN, "Shrink fail, recover it now"); return ReHash(oldBucketCount); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Stretch() { uint64_t newBlockCount = Header()->stretchSize / sizeof(Bucket) / BLOCK_SIZE + _blockCount; if (newBlockCount <= _blockCount) { return true; } Header()->stretchSize = 0; // only support stretch once now uint64_t newBucketCount = newBlockCount * BLOCK_SIZE; new (&_bucket[_bucketCount]) Bucket[newBucketCount - _bucketCount]; return ReHash(newBucketCount); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::ReHash(uint64_t newBucketCount) { uint64_t oldBucketCount = _bucketCount; _bucketCount = newBucketCount; assert(_bucketCount % BLOCK_SIZE == 0); _blockCount = _bucketCount / BLOCK_SIZE; _nu_hashFunc = 2; Header()->nu_hashFunc = 2; Header()->bucketCount = newBucketCount; _callIdVec.assign(_blockCount, 0); _curCallId = 0; std::vector<bool> bitmap(newBucketCount, false); // first part, these buckets in new and also in old region for (uint64_t i = 0; i < newBucketCount; ++i) { if (bitmap[i]) // processed bucket { continue; } if (_bucket[i].IsEmpty()) // still empty { bitmap[i] = true; continue; } Bucket evictedBucket = _bucket[i]; new (&_bucket[i]) Bucket; bitmap[i] = true; if (unlikely(!EvictionInsert(evictedBucket, bitmap))) { Header()->bucketCount = oldBucketCount; _bucketCount = oldBucketCount; _blockCount = _bucketCount / BLOCK_SIZE; AUTIL_LOG(ERROR, "ReHash failed, newBucketCount[%lu], oldBucketCount[%lu]", newBucketCount, oldBucketCount); return false; } } // second part, these buckets in old region only for (size_t i = newBucketCount; i < oldBucketCount; ++i) { Bucket& bucket = _bucket[i]; if (!bucket.IsEmpty()) { Bucket* newBucket = InternalFindBucket(bucket.Key()); if (unlikely(!newBucket)) { Header()->bucketCount = oldBucketCount; _bucketCount = oldBucketCount; _blockCount = _bucketCount / BLOCK_SIZE; AUTIL_LOG(ERROR, "ReHash failed, newBucketCount[%lu], oldBucketCount[%lu]", newBucketCount, oldBucketCount); return false; } *newBucket = bucket; } } return true; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::EvictionInsert(Bucket seekingBucket, std::vector<bool>& bitmap) { while (true) { Bucket* bucket = FindBucketForEviction(seekingBucket.Key(), bitmap); if (!bucket) // can not find a bucket, rehash { if (_nu_hashFunc >= Header()->maxNu_hashFunc) { AUTIL_LOG(ERROR, "can not find bucket to store key[%lu] nu_hashFunc[%u]", (uint64_t)seekingBucket.Key(), _nu_hashFunc); return false; } ++_nu_hashFunc; ++Header()->nu_hashFunc; AUTIL_LOG(WARN, "Increase nu_hashFunc to [%u], size[%lu], bucketCount[%lu]", _nu_hashFunc, Size(), _bucketCount); continue; } assert(CheckBucket(seekingBucket.Key(), bucket - _bucket)); if (bucket->IsEmpty()) // got an empty bucket, store and return { *bucket = seekingBucket; return true; } else // got a non-empty & unreached bucket, swap and continue { Bucket evictedBucket = *bucket; *bucket = seekingBucket; seekingBucket = evictedBucket; } } return true; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline typename CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Bucket* CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::FindBucketForEviction(const _KT& key, std::vector<bool>& bitmap) { TreeNodeVec bfsTree; for (uint32_t hashFuncId = 0; hashFuncId < _nu_hashFunc; ++hashFuncId) { const _KT& hash = CuckooHash(key, hashFuncId); uint64_t firstBucketId = GetFirstBucketIdInBlock(hash, _blockCount); for (uint32_t curBucketId = firstBucketId; curBucketId < firstBucketId + BLOCK_SIZE; ++curBucketId) { Bucket* curBucket = &(_bucket[curBucketId]); if (curBucket->IsEmpty() || !bitmap[curBucketId]) { bitmap[curBucketId] = true; return curBucket; } } bfsTree.push_back(TreeNode(firstBucketId, TreeNode::BFS_ROOT_IDX, 0)); } // find a bucket through cuckoo kick // 4 kick 3 kick 2 kick 1, return bucket 4 with 1's old content // if an empty bucket was kicked out, we have a new empty bucket // if an un-reached bucket was kicked out, store it in the new empty bucket return BFSFindBucketForEviction(bfsTree, bitmap); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline typename CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Bucket* CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::BFSFindBucketForEviction(TreeNodeVec& bfsTree, std::vector<bool>& bitmap) { ++_curCallId; for (uint32_t i = 0; i < bfsTree.size(); ++i) { _callIdVec[bfsTree[i]._bucketId / BLOCK_SIZE] = _curCallId; } uint64_t idx = 0; uint64_t nextDepthBeginIdx = 0; uint32_t nextDepth = 0; while (idx < bfsTree.size()) { if (idx == nextDepthBeginIdx) { if (++nextDepth > _bFSDepth) // got all nodes with Depth = 0, 1, 2, ..., _dfsDepth { break; } nextDepthBeginIdx = bfsTree.size(); } uint64_t bucketId = bfsTree[idx]._bucketId; for (uint32_t inBlockId = 0; inBlockId < BLOCK_SIZE; ++inBlockId) { const _KT& key = _bucket[bucketId + inBlockId].Key(); for (uint32_t hashFuncId = 0; hashFuncId < _nu_hashFunc; ++hashFuncId) { const _KT& hash = CuckooHash(key, hashFuncId); uint64_t firstBucketId = GetFirstBucketIdInBlock(hash, _blockCount); if (_callIdVec[firstBucketId / BLOCK_SIZE] == _curCallId) { continue; } _callIdVec[firstBucketId / BLOCK_SIZE] = _curCallId; for (uint32_t curBucketId = firstBucketId; curBucketId < firstBucketId + BLOCK_SIZE; ++curBucketId) { Bucket& curBucket = _bucket[curBucketId]; if (curBucket.IsEmpty()) // got an empty bucket { bitmap[curBucketId] = true; bfsTree.push_back(TreeNode(firstBucketId, idx, inBlockId)); return CuckooKick(bfsTree, curBucketId - firstBucketId); } if (!bitmap[curBucketId]) // got a non-empty & unreached bucket { Bucket evictedBucket = curBucket; bitmap[curBucketId] = true; bfsTree.push_back(TreeNode(firstBucketId, idx, inBlockId)); Bucket* retBucket = CuckooKick(bfsTree, curBucketId - firstBucketId); *retBucket = evictedBucket; return retBucket; } } if (bfsTree.size() < MAX_NUM_BFS_TREE_NODE) { bfsTree.push_back(TreeNode(firstBucketId, idx, inBlockId)); } } } ++idx; } AUTIL_LOG(ERROR, "can not find a bucket"); return NULL; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::MemoryUse() const { return _bucketCount * sizeof(Bucket) + sizeof(HashTableHeader); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::BuildAssistantMemoryUse() const { size_t callIdSize = _callIdVec.size() * sizeof(uint32_t); size_t bfsTreeSize = std::min(_blockCount, (size_t)MAX_NUM_BFS_TREE_NODE) * sizeof(TreeNode); return callIdSize + bfsTreeSize; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline typename CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Bucket* CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::InternalFindBucket(const _KT& key) { while (true) { Bucket* retBucket = DoInternalFindBucket(key); if (retBucket) { return retBucket; } if (_nu_hashFunc >= Header()->maxNu_hashFunc) { break; } ++_nu_hashFunc; ++(Header()->nu_hashFunc); AUTIL_LOG(WARN, "Increase nu_hashFunc to [%u], size[%lu], bucketCount[%lu]", _nu_hashFunc, Size(), _bucketCount); } AUTIL_LOG(ERROR, "can not find bucket to store key[%lu] nu_hashFunc[%u]", (uint64_t)key, _nu_hashFunc); return NULL; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline typename CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Bucket* CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::DoInternalFindBucket(const _KT& key) { Bucket* retBucket = NULL; TreeNodeVec bfsTree; for (uint32_t hashFuncId = 0; hashFuncId < _nu_hashFunc; ++hashFuncId) { const _KT& hash = CuckooHash(key, hashFuncId); uint64_t firstBucketId = GetFirstBucketIdInBlock(hash, _blockCount); Bucket* curBucket = &(_bucket[firstBucketId]); for (uint32_t inBlockId = 0; inBlockId < BLOCK_SIZE; ++inBlockId) { if (curBucket->IsEqual(key)) // same key existed { return curBucket; } if (curBucket->IsEmpty() && !retBucket) // got an empty bucket { retBucket = curBucket; } ++curBucket; } bfsTree.push_back(TreeNode(firstBucketId, TreeNode::BFS_ROOT_IDX, 0)); } if (retBucket) { return retBucket; } return BFSFindBucket(bfsTree); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline const typename CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Bucket* CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::FindBucketForRead(const _KT& key, uint8_t nu_hashFunc) const { uint64_t bucketId = GetFirstBucketIdInBlock(CuckooHash(key, 0), _blockCount); __builtin_prefetch(&(_bucket[bucketId]), 0, 1); const _KT& hash = CuckooHash(key, 1); uint64_t nextId = GetFirstBucketIdInBlock(hash, _blockCount); __builtin_prefetch(&(_bucket[nextId]), 0, 1); for (uint32_t inBlockId = 0; inBlockId < BLOCK_SIZE; ++inBlockId) { // line 1, hash = key const Bucket& curBucket = _bucket[bucketId + inBlockId]; if (curBucket.IsEmpty()) { return NULL; // or continue; for read-write } else if (curBucket.IsEqual(key)) { return &curBucket; } } for (uint32_t inBlockId = 0; inBlockId < BLOCK_SIZE; ++inBlockId) { // line 2, hash = func1 const Bucket& curBucket = _bucket[nextId + inBlockId]; if (curBucket.IsEmpty()) { return NULL; // or continue; for read-write } else if (curBucket.IsEqual(key)) { return &curBucket; } } for (uint32_t hashFuncId = 2; hashFuncId < nu_hashFunc; ++hashFuncId) { // line 1,2 put out of this loop because of performance const _KT& hash = CuckooHash(key, hashFuncId); uint64_t bucketId = GetFirstBucketIdInBlock(hash, _blockCount); for (uint32_t inBlockId = 0; inBlockId < BLOCK_SIZE; ++inBlockId) { const Bucket& curBucket = _bucket[bucketId + inBlockId]; if (curBucket.IsEmpty()) { return NULL; // or continue; for read-write } else if (curBucket.IsEqual(key)) { return &curBucket; } } } return NULL; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> template <typename functor> inline bool CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::InternalInsert(const _KT key, const _VT& value) { Bucket* bucket = InternalFindBucket(key); if (unlikely(!bucket)) { return false; } bool isNewKey = bucket->IsEmpty(); functor()(*bucket, key, value, _deleteCount); if (isNewKey) { ++(Header()->keyCount); } return true; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline typename CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Bucket* CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::BFSFindBucket(TreeNodeVec& bfsTree) { ++_curCallId; for (uint32_t i = 0; i < bfsTree.size(); ++i) { _callIdVec[bfsTree[i]._bucketId / BLOCK_SIZE] = _curCallId; } uint64_t idx = 0; uint64_t nextDepthBeginIdx = 0; uint32_t nextDepth = 0; while (idx < bfsTree.size()) { if (idx == nextDepthBeginIdx) { ++nextDepth; if (nextDepth > _bFSDepth) // got all nodes with Depth = 0, 1, 2, ..., _dfsDepth { break; } nextDepthBeginIdx = bfsTree.size(); } uint64_t bucketId = bfsTree[idx]._bucketId; for (uint32_t inBlockId = 0; inBlockId < BLOCK_SIZE; ++inBlockId) { const _KT& key = _bucket[bucketId + inBlockId].Key(); for (uint32_t hashFuncId = 0; hashFuncId < _nu_hashFunc; ++hashFuncId) { const _KT& hash = CuckooHash(key, hashFuncId); uint64_t firstBucketId = GetFirstBucketIdInBlock(hash, _blockCount); if (_callIdVec[firstBucketId / BLOCK_SIZE] == _curCallId) { continue; } if (bfsTree.size() >= MAX_NUM_BFS_TREE_NODE) { AUTIL_LOG(WARN, "too many[%lu] node in bfs tree more than [%lu], skip it", bfsTree.size(), MAX_NUM_BFS_TREE_NODE); continue; } _callIdVec[firstBucketId / BLOCK_SIZE] = _curCallId; bfsTree.push_back(TreeNode(firstBucketId, idx, inBlockId)); Bucket* curBucket = &(_bucket[firstBucketId]); for (uint32_t inCurBlockId = 0; inCurBlockId < BLOCK_SIZE; ++inCurBlockId, ++curBucket) { if (curBucket->IsEmpty()) { return CuckooKick(bfsTree, inCurBlockId); } } } } ++idx; } AUTIL_LOG(ERROR, "can not find a bucket"); return NULL; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline typename CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Bucket* CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::CuckooKick(TreeNodeVec& bfsTree, uint32_t emptyInBlockId) { uint64_t emptyIdx = bfsTree.size() - 1; uint64_t prevIdx = bfsTree[emptyIdx]._parent; while (prevIdx != TreeNode::BFS_ROOT_IDX) { uint8_t prevInBlockId = bfsTree[emptyIdx]._inBlockId; Bucket& emptyBucket = _bucket[bfsTree[emptyIdx]._bucketId + emptyInBlockId]; Bucket& prevBucket = _bucket[bfsTree[prevIdx]._bucketId + prevInBlockId]; assert(CheckBucket(prevBucket.Key(), bfsTree[emptyIdx]._bucketId + emptyInBlockId)); emptyBucket.Set(prevBucket); prevBucket.SetEmpty(); emptyInBlockId = prevInBlockId; emptyIdx = prevIdx; prevIdx = bfsTree[emptyIdx]._parent; } return &(_bucket[bfsTree[emptyIdx]._bucketId + emptyInBlockId]); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline _KT CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::CuckooHash(const _KT& key, uint32_t hashFuncId) { // too slow when use sharding // if (hashFuncId == 0) { // return key; // } static const uint64_t CUCKOO_MURMUR_SEED_MULTIPLIER = 816922183UL; return autil::MurmurHash::MurmurHash64A(&key, sizeof(key), CUCKOO_MURMUR_SEED_MULTIPLIER * hashFuncId); } /////////////////////////////////////////////////////////////////////////////// template <typename _KT, typename _VT, bool HasSpecialKey = ClosedHashTableTraits<_KT, _VT, false>::HasSpecialKey, bool useCompactBucket = false> class CuckooHashTable final : public CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket> { public: typedef CuckooHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket> Base; using Base::Find; using Base::OCCUPANCY_PCT; public: indexlib::util::Status Find(uint64_t key, autil::StringView& value) const override final { const _VT* typedValuePtr = NULL; auto status = Find((_KT)key, typedValuePtr); value = {(char*)typedValuePtr, sizeof(_VT)}; return status; } }; /////////////////////////////////////////////////////////////////////////// // hide some methods template <typename _KT, typename _VT, bool useCompactBucket> class CuckooHashTable<_KT, _VT, true, useCompactBucket> final : public CuckooHashTableBase<_KT, _VT, false, useCompactBucket> { public: typedef typename ClosedHashTableTraits<_KT, _VT, useCompactBucket>::Bucket Bucket; typedef typename ClosedHashTableTraits<_KT, _VT, useCompactBucket>::SpecialBucket SpecialBucket; private: typedef CuckooHashTableBase<_KT, _VT, false, useCompactBucket> Base; using Base::_bucket; using Base::_bucketCount; using Base::_logger; using Base::FindForReadWrite; using Base::OCCUPANCY_PCT; public: // public for CuckooHashTableFileIterator & CuckooHashTableFileReader typedef typename Base::HashTableHeader HashTableHeader; public: uint64_t CapacityToTableMemory(uint64_t maxKeyCount, const HashTableOptions& options) const override { return DoCapacityToTableMemory(maxKeyCount, options); } uint64_t CapacityToBuildMemory(uint64_t maxKeyCount, const HashTableOptions& options) const override { return DoCapacityToBuildMemory(maxKeyCount, options); } size_t TableMemroyToCapacity(size_t tableMemory, int32_t occupancyPct) const override { return DoTableMemroyToCapacity(tableMemory, occupancyPct); } size_t BuildMemoryToCapacity(size_t buildMemory, int32_t occupancyPct) const override { return DoBuildMemoryToCapacity(buildMemory, occupancyPct); } size_t BuildMemoryToTableMemory(size_t buildMemory, int32_t occupancyPct) const override { return DoBuildMemoryToTableMemory(buildMemory, occupancyPct); } public: static uint64_t DoCapacityToTableMemory(uint64_t maxKeyCount, const HashTableOptions& options) { return Base::DoCapacityToTableMemory(maxKeyCount, options) + sizeof(SpecialBucket) * 2; } static uint64_t DoCapacityToBuildMemory(uint64_t maxKeyCount, const HashTableOptions& options) { return Base::DoCapacityToBuildMemory(maxKeyCount, options) + sizeof(SpecialBucket) * 2; } static size_t DoTableMemroyToCapacity(size_t tableMemory, int32_t occupancyPct) { return Base::DoTableMemroyToCapacity(tableMemory - sizeof(SpecialBucket) * 2, occupancyPct); } static size_t DoBuildMemoryToCapacity(size_t buildMemory, int32_t occupancyPct) { return Base::DoBuildMemoryToCapacity(buildMemory - sizeof(SpecialBucket) * 2, occupancyPct); } static size_t DoBuildMemoryToTableMemory(size_t buildMemory, int32_t occupancyPct) { size_t maxKeyCount = DoBuildMemoryToCapacity(buildMemory, occupancyPct); return DoCapacityToTableMemory(maxKeyCount, occupancyPct); } public: indexlib::util::Status Find(uint64_t key, autil::StringView& value) const override final { const _VT* typedValuePtr = NULL; auto status = Find((_KT)key, typedValuePtr); value = {(char*)typedValuePtr, sizeof(_VT)}; return status; } bool Insert(const _KT& key, const _VT& value) { struct InsertFunctor { void operator()(Bucket& bucket, const _KT& key, const _VT& value, uint64_t& deleteCount) { if (bucket.IsDeleted()) { deleteCount--; } bucket.Set(key, value); } void operator()(SpecialBucket& bucket, const _KT& key, const _VT& value, uint64_t& deleteCount) { if (bucket.IsDeleted()) { deleteCount--; } bucket.Set(key, value); } }; return InternalInsert<InsertFunctor>(key, value); } bool Insert(uint64_t key, const autil::StringView& value) override { const _VT& v = *reinterpret_cast<const _VT*>(value.data()); assert(sizeof(_VT) == value.size()); return Insert(key, v); } bool Delete(uint64_t key, const autil::StringView& value = autil::StringView()) override { struct DeleteFunctor { void operator()(Bucket& bucket, const _KT& key, const _VT& value, uint64_t& deleteCount) { if (!bucket.IsDeleted()) { deleteCount++; } bucket.SetDelete(key, value); } void operator()(SpecialBucket& bucket, const _KT& key, const _VT& value, uint64_t& deleteCount) { if (!bucket.IsDeleted()) { deleteCount++; } bucket.SetDelete(key, value); } }; if (value.empty()) { return InternalInsert<DeleteFunctor>(key, _VT()); } const _VT& v = *reinterpret_cast<const _VT*>(value.data()); assert(sizeof(_VT) == value.size()); return InternalInsert<DeleteFunctor>(key, v); } bool MountForWrite(void* data, size_t size, const HashTableOptions& inputOptions = OCCUPANCY_PCT) override { HashTableOptions options(OCCUPANCY_PCT); if (inputOptions.Valid()) { options = inputOptions; } size_t minSize = sizeof(HashTableHeader) + sizeof(SpecialBucket) * 2; if (size < minSize) { AUTIL_LOG(ERROR, "not enough space, min size[%lu], give[%lu]", minSize, size); return false; } size_t leftSize = size - sizeof(SpecialBucket) * 2; if (!Base::MountForWrite(data, leftSize, options)) { return false; } new (EmptyBucket()) SpecialBucket(); new (DeleteBucket()) SpecialBucket(); return true; } bool MountForRead(const void* data, size_t size) override { if (size < sizeof(SpecialBucket) * 2) { AUTIL_LOG(ERROR, "too small size[%lu], BucketSize[%lu]", size, sizeof(SpecialBucket)); return false; } return Base::MountForRead(data, size - sizeof(SpecialBucket) * 2); } indexlib::util::Status Find(const _KT& key, const _VT*& value) const { if (likely(!Bucket::IsEmptyKey(key) && !Bucket::IsDeleteKey(key))) { return Base::Find(key, value); } SpecialBucket* bucket = Bucket::IsEmptyKey(key) ? EmptyBucket() : DeleteBucket(); if (bucket->IsEmpty()) { return indexlib::util::NOT_FOUND; } value = &(bucket->Value()); return bucket->IsDeleted() ? indexlib::util::DELETED : indexlib::util::OK; } indexlib::util::Status FindForReadWrite(const _KT& key, _VT& value) const { if (likely(!Bucket::IsEmptyKey(key) && !Bucket::IsDeleteKey(key))) { return Base::FindForReadWrite(key, value); } SpecialBucket* bucket = Bucket::IsEmptyKey(key) ? EmptyBucket() : DeleteBucket(); if (bucket->IsEmpty()) { return indexlib::util::NOT_FOUND; } value = bucket->Value(); return bucket->IsDeleted() ? indexlib::util::DELETED : indexlib::util::OK; } uint64_t MemoryUse() const override { return Base::MemoryUse() + sizeof(SpecialBucket) * 2; } bool Shrink(int32_t occupancyPct = 0) override { SpecialBucket emptyBucket = *EmptyBucket(); SpecialBucket deleteBucket = *DeleteBucket(); if (!Base::Shrink(occupancyPct)) { return false; } *EmptyBucket() = emptyBucket; *DeleteBucket() = deleteBucket; return true; } bool Stretch() override { SpecialBucket emptyBucket = *EmptyBucket(); SpecialBucket deleteBucket = *DeleteBucket(); bool ret = Base::Stretch(); *EmptyBucket() = emptyBucket; *DeleteBucket() = deleteBucket; return ret; } protected: SpecialBucket* EmptyBucket() const { return reinterpret_cast<SpecialBucket*>(&_bucket[_bucketCount]); } SpecialBucket* DeleteBucket() const { return reinterpret_cast<SpecialBucket*>(&_bucket[_bucketCount]) + 1; } template <typename functor> bool InternalInsert(const _KT& key, const _VT& value) { bool isNewKey = false; if (unlikely(Bucket::IsEmptyKey(key))) { SpecialBucket* bucket = EmptyBucket(); isNewKey = bucket->IsEmpty(); functor()(*bucket, key, value, Base::_deleteCount); } else if (unlikely(Bucket::IsDeleteKey(key))) { SpecialBucket* bucket = DeleteBucket(); isNewKey = bucket->IsEmpty(); functor()(*bucket, key, value, Base::_deleteCount); } else { Bucket* bucket = Base::InternalFindBucket(key); if (unlikely(!bucket)) { return false; } isNewKey = bucket->IsEmpty(); functor()(*bucket, key, value, Base::_deleteCount); } if (isNewKey) { ++(Base::Header()->keyCount); } return true; } private: friend class CuckooHashTableTest; }; } // namespace indexlibv2::index