aios/storage/indexlib/index/common/hash_table/DenseHashTable.h (727 lines of code) (raw):

/* * Copyright 2014-present Alibaba Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include <vector> // for shrink, use as bitmap // #include <immintrin.h> #include "indexlib/index/common/hash_table/BucketCompressor.h" #include "indexlib/index/common/hash_table/ClosedHashTableIterator.h" #include "indexlib/index/common/hash_table/ClosedHashTableTraits.h" #include "indexlib/index/common/hash_table/HashTableBase.h" #include "indexlib/index/common/hash_table/HashTableOptions.h" #include "indexlib/util/PoolUtil.h" #include "indexlib/util/Status.h" #include "indexlib/util/counter/AccumulativeCounter.h" namespace indexlibv2::index { // #define DENSE_HASH_TABLE_JUMP(num_probes) (num_probes) // Quadratic probing[(n^2+n)/2] #define DENSE_HASH_TABLE_JUMP(num_probes) (1) // Linear probing template <typename _KT, typename _VT, bool HasSpecialKey = ClosedHashTableTraits<_KT, _VT, false>::HasSpecialKey, bool useCompactBucket = false> class DenseHashTableBase : public HashTableBase { public: DenseHashTableBase() : _bucket(NULL) , _bucketCount(0) , _occupancyPct(0) , _deleteCount(0) , _totalProbeCount(std::make_shared<indexlib::util::AccumulativeCounter>()) , _totalCollisionTimes(std::make_shared<indexlib::util::AccumulativeCounter>()) { } ~DenseHashTableBase() {} public: typedef _KT KeyType; typedef _VT ValueType; public: // higher OCCUPANCY_PCT causes to probe too much, though it saves MemoryUse static const int32_t OCCUPANCY_PCT = 50; // public for DenseHashTableFileIterator & DenseHashTableFileReader struct HashTableHeader { uint64_t bucketCount; uint64_t keyCount; }; public: typedef ClosedHashTableTraits<_KT, _VT, useCompactBucket> Traits; typedef typename Traits::Bucket Bucket; public: virtual indexlib::util::Status Find(uint64_t key, autil::StringView& value) const override = 0; virtual indexlib::util::Status FindForReadWrite(uint64_t key, autil::StringView& value, autil::mem_pool::Pool* pool) const override = 0; public: int32_t GetRecommendedOccupancy(int32_t occupancy) const override final { return DoGetRecommendedOccupancy(occupancy); } size_t BuildMemoryToTableMemory(size_t buildMemory, int32_t occupancyPct) const override final { return DoBuildMemoryToTableMemory(buildMemory, occupancyPct); } public: uint64_t CapacityToTableMemory(uint64_t maxKeyCount, const HashTableOptions& options) const override; uint64_t CapacityToBuildMemory(uint64_t maxKeyCount, const HashTableOptions& options) const override; size_t TableMemroyToCapacity(size_t tableMemory, int32_t occupancyPct) const override; size_t BuildMemoryToCapacity(size_t buildMemory, int32_t occupancyPct) const override; uint64_t Capacity() const override; public: static int32_t DoGetRecommendedOccupancy(int32_t occupancy) { return (occupancy > 80) ? 80 : occupancy; } static size_t DoBuildMemoryToTableMemory(size_t buildMemory, int32_t occupancyPct) { return buildMemory; } static uint64_t DoCapacityToTableMemory(uint64_t maxKeyCount, const HashTableOptions& options); static uint64_t DoCapacityToBuildMemory(uint64_t maxKeyCount, const HashTableOptions& options); static size_t DoTableMemroyToCapacity(size_t tableMemory, int32_t occupancyPct); static size_t DoBuildMemoryToCapacity(size_t buildMemory, int32_t occupancyPct); public: bool MountForWrite(void* data, size_t size, const HashTableOptions& options = OCCUPANCY_PCT) override; bool MountForRead(const void* data, size_t size) override; uint64_t Size() const override { return Header()->keyCount; } bool IsFull() const override { return Size() >= Capacity(); } uint64_t MemoryUse() const override; uint64_t BuildAssistantMemoryUse() const override { return 0; } bool Shrink(int32_t occupancyPct = 0) override; int32_t GetOccupancyPct() const override { return _occupancyPct; } uint64_t GetDeleteCount() const override { return _deleteCount; } bool Insert(uint64_t key, const autil::StringView& value) override; bool Delete(uint64_t key, const autil::StringView& value = autil::StringView()) override; public: static bool Probe(const _KT& key, uint64_t& probeCount, uint64_t& bucketId, uint64_t bucketCount); static uint64_t BucketCountToCapacity(uint64_t bucketCount, int32_t occupancyPct); public: indexlib::util::Status Find(const _KT& key, const _VT*& value) const; indexlib::util::Status FindForReadWrite(const _KT& key, _VT& value) const; bool Insert(const _KT& key, const _VT& value); void* Address() const override { return Header(); } uint64_t BucketCount() const { return _bucketCount; } bool Stretch() override { assert(false); return true; } // ATTENTION: ClosedHashTableTraits<_KT, _VT>::HasSpecialKey may not equal HasSpecialKey typedef ClosedHashTableIterator<_KT, _VT, Bucket, ClosedHashTableTraits<_KT, _VT, useCompactBucket>::HasSpecialKey> Iterator; Iterator* CreateIterator() { return new Iterator(_bucket, _bucketCount, Size()); } size_t Compress(BucketCompressor* bucketCompressor) override; protected: HashTableHeader* Header() const { return (HashTableHeader*)((uint8_t*)_bucket - sizeof(HashTableHeader)); } template <typename functor> bool InternalInsert(const _KT& key, const _VT& value); Bucket* InternalFindBucket(const _KT& key) const; private: static uint64_t CapacityToBucketCount(uint64_t maxKeyCount, int32_t occupancyPct = OCCUPANCY_PCT); bool EvictionInsert(const Bucket& firstSeekingBucket, std::vector<bool>& bitmap); public: std::string name; protected: Bucket* _bucket; uint64_t _bucketCount; int32_t _occupancyPct; uint64_t _deleteCount; std::shared_ptr<indexlib::util::AccumulativeCounter> _totalProbeCount; std::shared_ptr<indexlib::util::AccumulativeCounter> _totalCollisionTimes; protected: // friend for Probe using in DenseHashTableFileReader template <typename, typename, bool, bool> friend class DenseHashTableFileReaderBase; friend class DenseHashTableTest; AUTIL_LOG_DECLARE(); }; /////////////////////////////////////////////////////////////////////////////// template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> alog::Logger* DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::_logger = alog::Logger::getLogger("indexlib.index.DenseHashTableBase"); template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> bool DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Insert(const _KT& key, const _VT& value) { struct InsertFunctor { void operator()(Bucket& bucket, const _KT& key, const _VT& value, uint64_t& deleteCount) { if (bucket.IsDeleted()) { deleteCount--; } bucket.Set(key, value); } }; return InternalInsert<InsertFunctor>(key, value); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Insert(uint64_t key, const autil::StringView& value) { const _VT& v = *reinterpret_cast<const _VT*>(value.data()); assert(sizeof(_VT) == value.size()); return Insert(key, v); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Delete(uint64_t key, const autil::StringView& value) { struct DeleteFunctor { void operator()(Bucket& bucket, const _KT& key, const _VT& value, uint64_t& deleteCount) { if (!bucket.IsDeleted()) { deleteCount++; } bucket.SetDelete(key, value); } }; if (value.empty()) { return InternalInsert<DeleteFunctor>(key, _VT()); } const _VT& v = *reinterpret_cast<const _VT*>(value.data()); assert(sizeof(_VT) == value.size()); // NOTE: we always *insert* a delete due to KV & KKV requrires. return InternalInsert<DeleteFunctor>(key, v); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline size_t DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Compress(BucketCompressor* bucketCompressor) { char* begin = static_cast<char*>((void*)_bucket); char* outBuffer = begin; for (uint64_t i = 0u; i < _bucketCount; ++i) { size_t compressedBucketSize = bucketCompressor->Compress(static_cast<char*>((void*)(_bucket + i)), sizeof(Bucket), outBuffer); outBuffer += compressedBucketSize; } return sizeof(HashTableHeader) + (outBuffer - begin); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::CapacityToTableMemory( uint64_t maxKeyCount, const HashTableOptions& options) const { return DoCapacityToTableMemory(maxKeyCount, options); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::DoCapacityToTableMemory(uint64_t maxKeyCount, const HashTableOptions& options) { return sizeof(HashTableHeader) + sizeof(Bucket) * CapacityToBucketCount(maxKeyCount, options.occupancyPct); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::CapacityToBuildMemory( uint64_t maxKeyCount, const HashTableOptions& options) const { return DoCapacityToBuildMemory(maxKeyCount, options); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::DoCapacityToBuildMemory(uint64_t maxKeyCount, const HashTableOptions& options) { return DoCapacityToTableMemory(maxKeyCount, options); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline size_t DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::TableMemroyToCapacity(size_t tableMemory, int32_t occupancyPct) const { return DoTableMemroyToCapacity(tableMemory, occupancyPct); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline size_t DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::DoTableMemroyToCapacity(size_t tableMemory, int32_t occupancyPct) { if (tableMemory < sizeof(HashTableHeader)) { return 0; } uint64_t bucketCount = (tableMemory - sizeof(HashTableHeader)) / sizeof(Bucket); return bucketCount * occupancyPct / 100; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline size_t DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::BuildMemoryToCapacity(size_t buildMemory, int32_t occupancyPct) const { return DoBuildMemoryToCapacity(buildMemory, occupancyPct); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline size_t DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::DoBuildMemoryToCapacity(size_t buildMemory, int32_t occupancyPct) { return DoTableMemroyToCapacity(buildMemory, occupancyPct); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::MountForWrite(void* data, size_t size, const HashTableOptions& inputOptions) { AUTIL_LOG(INFO, "begin mount for write, size[%lu]", size); HashTableOptions options(OCCUPANCY_PCT); if (inputOptions.Valid()) { options = inputOptions; } if (size < sizeof(HashTableHeader)) { AUTIL_LOG(ERROR, "not enough space, header size[%lu], give[%lu]", sizeof(HashTableHeader), size); return false; } _occupancyPct = options.occupancyPct; _bucketCount = (size - sizeof(HashTableHeader)) / sizeof(Bucket); HashTableHeader* header = (HashTableHeader*)data; header->bucketCount = _bucketCount; header->keyCount = 0; _bucket = (Bucket*)(header + 1); new (_bucket) Bucket[_bucketCount]; AUTIL_LOG(INFO, "mount for write, size[%lu], occupancyPct[%d], bucketCount[%lu]", size, _occupancyPct, _bucketCount); return true; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::MountForRead(const void* data, size_t size) { assert(data); if (size < sizeof(HashTableHeader)) { AUTIL_LOG(ERROR, "too small size[%lu], header size[%lu]", size, sizeof(HashTableHeader)); return false; } HashTableHeader* header = (HashTableHeader*)data; _bucketCount = header->bucketCount; _occupancyPct = header->keyCount * 100 / _bucketCount; if (size < sizeof(HashTableHeader) + _bucketCount * sizeof(Bucket)) { AUTIL_LOG(ERROR, "too small size[%lu], header size[%lu], bucket size[%lu]", size, sizeof(HashTableHeader), _bucketCount * sizeof(Bucket)); return false; } _bucket = (Bucket*)(header + 1); return true; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline indexlib::util::Status DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Find(const _KT& key, const _VT*& value) const { auto bucket = InternalFindBucket(key); if (!bucket || bucket->IsEmpty()) { return indexlib::util::NOT_FOUND; } value = &bucket->Value(); return bucket->IsDeleted() ? indexlib::util::DELETED : indexlib::util::OK; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline indexlib::util::Status DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::FindForReadWrite(const _KT& key, _VT& value) const { auto bucket = InternalFindBucket(key); if (!bucket || bucket->IsEmpty()) { return indexlib::util::NOT_FOUND; } auto [isDeleted, tmpValue] = bucket->DeletedOrValue(); value = tmpValue; if (isDeleted) { return indexlib::util::DELETED; } return indexlib::util::OK; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Capacity() const { return BucketCountToCapacity(_bucketCount, _occupancyPct); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::MemoryUse() const { return _bucketCount * sizeof(Bucket) + sizeof(HashTableHeader); } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Shrink(int32_t occupancyPct) { assert(_occupancyPct > 0 && _occupancyPct < 100); assert(occupancyPct >= 0 && occupancyPct < 100); uint64_t oldBucketCount = _bucketCount; uint64_t newBucketCount = CapacityToBucketCount(Size(), (occupancyPct > 0 ? occupancyPct : _occupancyPct)); if (newBucketCount >= oldBucketCount) { return true; } Header()->bucketCount = newBucketCount; _bucketCount = newBucketCount; std::vector<bool> bitmap(newBucketCount, false); // first part, these buckets in new and also in old region for (uint64_t i = 0; i < newBucketCount; ++i) { if (bitmap[i]) // processed bucket { continue; } if (_bucket[i].IsEmpty()) // still empty { bitmap[i] = true; continue; } Bucket evictedBucket = _bucket[i]; new (&_bucket[i]) Bucket; bitmap[i] = true; if (unlikely(!EvictionInsert(evictedBucket, bitmap))) { Header()->bucketCount = oldBucketCount; _bucketCount = oldBucketCount; return false; } } // second part, these buckets in old region only for (size_t i = newBucketCount; i < oldBucketCount; ++i) { Bucket& bucket = _bucket[i]; if (!bucket.IsEmpty()) { Bucket* newBucket = InternalFindBucket(bucket.Key()); if (unlikely(!newBucket)) { Header()->bucketCount = oldBucketCount; _bucketCount = oldBucketCount; return false; } *newBucket = bucket; } } _occupancyPct = occupancyPct > 0 ? occupancyPct : _occupancyPct; return true; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::BucketCountToCapacity(uint64_t bucketCount, int32_t occupancyPct) { assert(occupancyPct > 0 && occupancyPct <= 100); uint64_t maxKeyCount = bucketCount * occupancyPct / 100.0; return maxKeyCount; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Probe(const _KT& key, uint64_t& probeCount, uint64_t& bucketId, uint64_t bucketCount) { ++probeCount; bucketId += DENSE_HASH_TABLE_JUMP(probeCount); if (unlikely(bucketId >= bucketCount)) { bucketId %= bucketCount; } if (unlikely(probeCount >= bucketCount)) { AUTIL_LOG(ERROR, "too many probings for key[%lu], probeCount[%lu], " "bucketCount[%lu]", (uint64_t)key, probeCount, bucketCount); return false; } return true; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> template <typename functor> inline bool DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::InternalInsert(const _KT& key, const _VT& value) { Bucket* bucket = InternalFindBucket(key); if (unlikely(!bucket)) { return false; } bool isNewKey = bucket->IsEmpty(); functor()(*bucket, key, value, _deleteCount); // insert or delete if (isNewKey) { ++(Header()->keyCount); } return true; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline typename DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::Bucket* DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::InternalFindBucket(const _KT& key) const { uint64_t bucketCount = _bucketCount; uint64_t bucketId = key % bucketCount; uint64_t probeCount = 0; while (true) { Bucket& bucket = _bucket[bucketId]; if (bucket.IsEmpty() // not found || bucket.IsEqual(key)) // found it or deleted { return &bucket; } if (unlikely(!Probe(key, probeCount, bucketId, bucketCount))) { return NULL; } } if (probeCount > 0) { if (probeCount > 100) { AUTIL_INTERVAL_LOG2(5, INFO, "too many probings for key [%lu], probeCount[%lu], bucketCount[%lu]", (uint64_t)key, probeCount, bucketCount); } _totalCollisionTimes->Increase(1); _totalProbeCount->Increase(probeCount); if (_totalCollisionTimes->GetLocal() == 10000) { int64_t totalCount = _totalProbeCount->GetLocal(); _totalProbeCount->Reset(); _totalCollisionTimes->Reset(); AUTIL_LOG(INFO, "average probings count [%ld] in latest 10000 times bucket collision, bucketCount[%lu].", (uint64_t)totalCount / 10000, bucketCount); } } return NULL; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline uint64_t DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::CapacityToBucketCount(uint64_t maxKeyCount, int32_t occupancyPct) { assert(occupancyPct > 0 && occupancyPct <= 100); return ((maxKeyCount > 0 ? maxKeyCount : 1) * 100.0 + occupancyPct - 1) / occupancyPct; } template <typename _KT, typename _VT, bool HasSpecialKey, bool useCompactBucket> inline bool DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket>::EvictionInsert(const Bucket& firstSeekingBucket, std::vector<bool>& bitmap) { uint64_t bucketCount = _bucketCount; Bucket seekingBucket = firstSeekingBucket; uint64_t bucketId = seekingBucket.Key() % bucketCount; uint64_t probeCount = 0; uint64_t evictionCount = 0; while (true) { Bucket& bucket = _bucket[bucketId]; if (bucket.IsEmpty()) // found { bitmap[bucketId] = true; bucket = seekingBucket; return true; } else if (bitmap[bucketId]) // need probe { if (unlikely(!Probe(seekingBucket.Key(), probeCount, bucketId, bucketCount))) { assert(false); AUTIL_LOG(ERROR, "too many probes [%lu], guradCount[%lu]", probeCount, evictionCount); return false; } continue; } else // eviction insert { bitmap[bucketId] = true; Bucket evictedBucket = bucket; bucket = seekingBucket; seekingBucket = evictedBucket; bucketId = seekingBucket.Key() % bucketCount; probeCount = 0; if (unlikely(++evictionCount > bucketCount)) { assert(false); AUTIL_LOG(ERROR, "too many eviction insert[%lu], lost key[%lu]", evictionCount, (uint64_t)seekingBucket.Key()); return false; } } } assert(false); // never got hear return false; } /////////////////////////////////////////////////////////////////////////////// template <typename _KT, typename _VT, bool HasSpecialKey = ClosedHashTableTraits<_KT, _VT, false>::HasSpecialKey, bool useCompactBucket = false> class DenseHashTable final : public DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket> { public: typedef DenseHashTableBase<_KT, _VT, HasSpecialKey, useCompactBucket> Base; using Base::Find; using Base::FindForReadWrite; using Base::Insert; using Base::OCCUPANCY_PCT; public: indexlib::util::Status Find(uint64_t key, autil::StringView& value) const override final { const _VT* typedValuePtr = NULL; auto status = Base::Find((_KT)key, typedValuePtr); value = {(char*)typedValuePtr, sizeof(_VT)}; return status; } indexlib::util::Status FindForReadWrite(uint64_t key, autil::StringView& value, autil::mem_pool::Pool* pool) const override final { assert(pool); _VT* valueBuffer = (_VT*)IE_POOL_NEW_VECTOR(pool, char, sizeof(_VT)); auto status = Base::FindForReadWrite((_KT)key, *valueBuffer); value = {(char*)valueBuffer, sizeof(_VT)}; return status; } }; ///////////////////////////////////////////////////////////////////////////// // hide some methods template <typename _KT, typename _VT, bool useCompactBucket> class DenseHashTable<_KT, _VT, true, useCompactBucket> final : public DenseHashTableBase<_KT, _VT, false, useCompactBucket> { public: typedef ClosedHashTableTraits<_KT, _VT, useCompactBucket> Traits; typedef typename Traits::Bucket Bucket; typedef typename Traits::SpecialBucket SpecialBucket; private: typedef DenseHashTableBase<_KT, _VT, false, useCompactBucket> Base; using Base::_bucket; using Base::_bucketCount; using Base::_logger; using Base::OCCUPANCY_PCT; public: // public for DenseHashTableFileIterator & DenseHashTableFileReader typedef typename Base::HashTableHeader HashTableHeader; public: indexlib::util::Status Find(uint64_t key, autil::StringView& value) const override final { const _VT* typedValuePtr = NULL; auto status = Find((_KT)key, typedValuePtr); value = {(char*)typedValuePtr, sizeof(_VT)}; return status; } indexlib::util::Status FindForReadWrite(uint64_t key, autil::StringView& value, autil::mem_pool::Pool* pool) const override final { assert(pool); _VT* valueBuffer = (_VT*)IE_POOL_NEW_VECTOR(pool, char, sizeof(_VT)); auto status = Base::FindForReadWrite((_KT)key, *valueBuffer); value = {(char*)valueBuffer, sizeof(_VT)}; return status; } bool Insert(uint64_t key, const autil::StringView& value) override final { const _VT& v = *reinterpret_cast<const _VT*>(value.data()); assert(sizeof(_VT) == value.size()); return Insert(key, v); } bool Insert(const _KT& key, const _VT& value) { struct InsertFunctor { void operator()(Bucket& bucket, const _KT& key, const _VT& value, uint64_t& deleteCount) { if (bucket.IsDeleted()) { deleteCount--; } bucket.Set(key, value); } void operator()(SpecialBucket& bucket, const _KT& key, const _VT& value, uint64_t& deleteCount) { if (bucket.IsDeleted()) { deleteCount--; } bucket.Set(key, value); } }; return InternalInsert<InsertFunctor>(key, value); } bool Delete(uint64_t key, const autil::StringView& value = autil::StringView()) override final { struct DeleteFunctor { void operator()(Bucket& bucket, const _KT& key, const _VT& value, uint64_t& deleteCount) { if (!bucket.IsDeleted()) { deleteCount++; } bucket.SetDelete(key, value); } void operator()(SpecialBucket& bucket, const _KT& key, const _VT& value, uint64_t& deleteCount) { if (!bucket.IsDeleted()) { deleteCount++; } bucket.SetDelete(key, value); } }; if (value.empty()) { return InternalInsert<DeleteFunctor>(key, _VT()); } const _VT& v = *reinterpret_cast<const _VT*>(value.data()); assert(sizeof(_VT) == value.size()); return InternalInsert<DeleteFunctor>(key, v); } uint64_t MemoryUse() const override final { return Base::MemoryUse() + sizeof(SpecialBucket) * 2; } bool Shrink(int32_t occupancyPct = 0) override final { SpecialBucket emptyBucket = *EmptyBucket(); SpecialBucket deleteBucket = *DeleteBucket(); if (!Base::Shrink(occupancyPct)) { return false; } *EmptyBucket() = emptyBucket; *DeleteBucket() = deleteBucket; return true; } bool MountForWrite(void* data, size_t size, const HashTableOptions& inputOptions = OCCUPANCY_PCT) override final { HashTableOptions options(OCCUPANCY_PCT); if (inputOptions.Valid()) { options = inputOptions; } size_t minSize = sizeof(HashTableHeader) + sizeof(SpecialBucket) * 2; if (size < minSize) { AUTIL_LOG(ERROR, "not enough space, min size[%lu], give[%lu]", minSize, size); return false; } size_t leftSize = size - sizeof(SpecialBucket) * 2; if (!Base::MountForWrite(data, leftSize, options)) { return false; } new (EmptyBucket()) SpecialBucket(); new (DeleteBucket()) SpecialBucket(); return true; } bool MountForRead(const void* data, size_t size) override { if (size < sizeof(SpecialBucket) * 2) { AUTIL_LOG(ERROR, "too small size[%lu], BucketSize[%lu]", size, sizeof(SpecialBucket)); return false; } return Base::MountForRead(data, size - sizeof(SpecialBucket) * 2); } public: uint64_t CapacityToTableMemory(uint64_t maxKeyCount, const HashTableOptions& options) const override { return DoCapacityToTableMemory(maxKeyCount, options); } uint64_t CapacityToBuildMemory(uint64_t maxKeyCount, const HashTableOptions& options) const override { return DoCapacityToBuildMemory(maxKeyCount, options); } size_t TableMemroyToCapacity(size_t tableMemory, int32_t occupancyPct) const override { return DoTableMemroyToCapacity(tableMemory, occupancyPct); } size_t BuildMemoryToCapacity(size_t buildMemory, int32_t occupancyPct) const override { return DoBuildMemoryToCapacity(buildMemory, occupancyPct); } public: static uint64_t DoCapacityToTableMemory(uint64_t maxKeyCount, const HashTableOptions& options) { return Base::DoCapacityToTableMemory(maxKeyCount, options) + sizeof(SpecialBucket) * 2; } static uint64_t DoCapacityToBuildMemory(uint64_t maxKeyCount, const HashTableOptions& options) { return DoCapacityToTableMemory(maxKeyCount, options); } static size_t DoBuildMemoryToCapacity(size_t buildMemory, int32_t occupancyPct) { return DoTableMemroyToCapacity(buildMemory, occupancyPct); } static size_t DoTableMemroyToCapacity(size_t tableMemory, int32_t occupancyPct) { return Base::DoTableMemroyToCapacity(tableMemory - sizeof(SpecialBucket) * 2, occupancyPct); } public: indexlib::util::Status Find(const _KT& key, const _VT*& value) const { if (likely(!Bucket::IsEmptyKey(key) && !Bucket::IsDeleteKey(key))) { auto bucket = Base::InternalFindBucket(key); if (!bucket || bucket->IsEmpty()) { return indexlib::util::NOT_FOUND; } value = &bucket->Value(); return bucket->IsDeleted() ? indexlib::util::DELETED : indexlib::util::OK; } SpecialBucket* bucket = Bucket::IsEmptyKey(key) ? EmptyBucket() : DeleteBucket(); if (bucket->IsEmpty()) { return indexlib::util::NOT_FOUND; } value = &(bucket->Value()); return bucket->IsDeleted() ? indexlib::util::DELETED : indexlib::util::OK; } private: SpecialBucket* EmptyBucket() const { return reinterpret_cast<SpecialBucket*>(&_bucket[_bucketCount]); } SpecialBucket* DeleteBucket() const { return reinterpret_cast<SpecialBucket*>(&_bucket[_bucketCount]) + 1; } template <typename functor> bool InternalInsert(const _KT& key, const _VT& value) { bool isNewKey = false; if (unlikely(Bucket::IsEmptyKey(key))) { SpecialBucket* bucket = EmptyBucket(); isNewKey = bucket->IsEmpty(); functor()(*bucket, key, value, Base::_deleteCount); } else if (unlikely(Bucket::IsDeleteKey(key))) { SpecialBucket* bucket = DeleteBucket(); isNewKey = bucket->IsEmpty(); functor()(*bucket, key, value, Base::_deleteCount); } else { Bucket* bucket = Base::InternalFindBucket(key); if (unlikely(!bucket)) { return false; } isNewKey = bucket->IsEmpty(); functor()(*bucket, key, value, Base::_deleteCount); } if (isNewKey) { ++(Base::Header()->keyCount); } return true; } private: friend class DenseHashTableTest; }; } // namespace indexlibv2::index