aios/storage/indexlib/util/HashMap.h (335 lines of code) (raw):

/* * Copyright 2014-present Alibaba Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include <assert.h> #include <stdio.h> #include "autil/Lock.h" #include "autil/mem_pool/Pool.h" #include "indexlib/base/Constant.h" #include "indexlib/util/HashBucket.h" #include "indexlib/util/HashUtil.h" namespace indexlib { namespace util { template <typename Key, typename Value, typename Pool = autil::mem_pool::Pool, typename HashFunc = KeyHash<Key>, typename Comp = KeyEqual<Key>, int Sparsity = 2> class HashMap { using Typed = HashMap<Key, Value, Pool, HashFunc, Comp>; HashMap(const Typed&) = delete; Typed& operator=(const Typed&) = delete; public: typedef Key KeyType; typedef Value ValueType; typedef std::pair<KeyType, ValueType> KeyValuePair; typedef Pool PoolType; typedef HashBucket<KeyValuePair, PoolType> BucketType; typedef typename BucketType::Block BlockType; typedef HashMap<Key, Value, Pool, HashFunc, Comp> HashMapType; static const size_t DEFAULT_HASHMAP_POOL_SIZE = indexlibv2::DEFAULT_CHUNK_SIZE * 1024 * 1024; // 10M public: class Iterator { public: Iterator() : _mapSelf(), _buckets(NULL), _currentBlock(NULL), _blockItemPos(0) {} Iterator(const HashMap* m) : _mapSelf(m), _buckets(_mapSelf->_bucket), _currentBlock(NULL), _blockItemPos(0) { Reset(); } void Reset() { _currentBlock = _buckets->GetFirstBlock(); while (_currentBlock) { Bitmap& bitmap = _currentBlock->bitmap; _blockItemPos = 0; while (true) { if (_blockItemPos >= _currentBlock->size) { break; } if (bitmap.Test(_blockItemPos)) { return; } ++_blockItemPos; } _currentBlock = _currentBlock->next; } } inline bool HasNext() const { return _currentBlock && _currentBlock->bitmap.Test(_blockItemPos); } inline KeyValuePair& Next() { KeyValuePair& kv = _currentBlock->begin[_blockItemPos++]; while (_currentBlock) { Bitmap& bitmap = _currentBlock->bitmap; while (true) { if (_blockItemPos >= _currentBlock->size) { break; } if (bitmap.Test(_blockItemPos)) { return kv; } ++_blockItemPos; } _currentBlock = _currentBlock->next; _blockItemPos = 0; } return kv; } private: const HashMap* _mapSelf; BucketType* _buckets; BlockType* _currentBlock; size_t _blockItemPos; }; public: HashMap(PoolType* pPool, size_t initSize); HashMap(size_t initSize); HashMap(size_t initSize, size_t poolSize); ~HashMap(); public: ValueType* Find(const KeyType& x) const; const ValueType& Find(const KeyType& x, const ValueType& _none) const; ValueType* FindWithLock(const KeyType& x) const; void Replace(const KeyType& x, const ValueType& value); // the same key may have duplicated entries void Insert(const KeyType& key, const ValueType& value); void Insert(const KeyValuePair& x); void FindAndInsert(const KeyType& key, const ValueType& value); bool operator==(const HashMapType& otherMap) const; Iterator CreateIterator() const { return Iterator(this); } static size_t EstimateFirstBucketSize(size_t initSize); //@return : in Byte static int64_t EstimateNeededMemory(int64_t keyCount); public: size_t Size() const { return _elemCount; } void Clear(); private: size_t Threshold(); KeyValuePair* Lookup(const KeyType& x) const; void FindAndInsertInLastBucket(const KeyValuePair& x); bool IsValid() const; private: HashFunc _hasher; Comp _comparator; size_t _elemCount; BucketType* _bucket; PoolType* _memPool; bool _ownPool; mutable autil::ReadWriteLock _lock; private: friend class HashMapTest; AUTIL_LOG_DECLARE(); }; ///////////////////////////////////////////////////////// // Implementation template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::HashMap(PoolType* pPool, size_t initSize) : _elemCount(0) , _bucket(new BucketType(pPool, initSize * Sparsity)) , _memPool(pPool) , _ownPool(false) { assert(_memPool != NULL); } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::HashMap(size_t initSize) : _elemCount(0) , _memPool(new PoolType(DEFAULT_HASHMAP_POOL_SIZE)) , _ownPool(true) { _bucket = new BucketType(_memPool, initSize * Sparsity); } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::HashMap(size_t initSize, size_t poolSize) : _elemCount(0) , _memPool(new PoolType(poolSize)) , _ownPool(true) { _bucket = new BucketType(_memPool, initSize * Sparsity); } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::~HashMap() { Clear(); delete _bucket; _bucket = NULL; if (_ownPool && _memPool) { _memPool->release(); delete _memPool; _memPool = NULL; } } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> inline Value* HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::Find(const KeyType& x) const { assert(IsValid()); KeyValuePair* p = Lookup(x); if (p != NULL) { return &p->second; } return NULL; } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> inline const Value& HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::Find(const KeyType& x, const ValueType& _none) const { assert(IsValid()); KeyValuePair* p = Lookup(x); if (p != NULL) { return p->second; } return _none; } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> inline Value* HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::FindWithLock(const KeyType& x) const { autil::ScopedReadWriteLock lock(_lock, 'r'); return Find(x); } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> inline void HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::Replace(const KeyType& x, const ValueType& value) { assert(IsValid()); KeyValuePair* p = Lookup(x); if (p != NULL) { p->second = value; } } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> inline void HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::Insert(const KeyType& key, const ValueType& value) { Insert(std::make_pair(key, value)); } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> inline void HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::FindAndInsert(const KeyType& key, const ValueType& value) { ValueType* retValue = Find(key); if (retValue != NULL) { *retValue = value; } else { Insert(key, value); } } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> inline void HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::Insert(const KeyValuePair& x) { assert(IsValid()); if (_elemCount >= Threshold()) { _bucket->AddBlock(); } FindAndInsertInLastBucket(x); } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> void HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::Clear() { _bucket->Clear(); _elemCount = 0; if (_ownPool && _memPool) { _memPool->reset(); } } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> inline size_t HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::Threshold() { return _bucket->Size() / Sparsity; } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> inline typename HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::KeyValuePair* HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::Lookup(const KeyType& x) const { BlockType* buff = _bucket->GetLastBlock(); while (buff) { Bitmap& bitmap = buff->bitmap; size_t pos = _hasher(x) % buff->size; while (bitmap.Test(pos)) { KeyValuePair* p = buff->begin + pos; if (_comparator(p->first, x)) { return p; } if (++pos >= buff->size) { pos = 0; } } buff = buff->prev; } return NULL; } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> inline void HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::FindAndInsertInLastBucket(const KeyValuePair& x) { BlockType* buff = _bucket->GetLastBlock(); Bitmap& bitmap = buff->bitmap; size_t pos = _hasher(x.first) % buff->size; KeyValuePair* p = buff->begin + pos; bool isFound = false; while (bitmap.Test(pos)) { if (_comparator(p->first, x.first)) { isFound = true; break; } if (++pos >= buff->size) { pos = 0; } p = buff->begin + pos; } if (!isFound) { _elemCount++; _bucket->GetLastBlock()->capacity++; } p->second = x.second; p->first = x.first; MEMORY_BARRIER(); bitmap.Set(pos); } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> size_t HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::EstimateFirstBucketSize(size_t initSize) { size_t allocElemSize = BucketType::GetNextBlockSize(initSize * Sparsity); int32_t slotCount = Bitmap::GetSlotCount(allocElemSize); return allocElemSize * sizeof(KeyValuePair) + sizeof(BlockType) + slotCount * sizeof(uint32_t); } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> int64_t HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::EstimateNeededMemory(int64_t keyCount) { int64_t memNeed = 0; int64_t allocElemSize = 0; int64_t capacity = 0; while (keyCount > (capacity / Sparsity)) { allocElemSize = BucketType::GetNextBlockSize(allocElemSize); capacity += allocElemSize; int32_t slotCount = Bitmap::GetSlotCount(allocElemSize); memNeed += allocElemSize * sizeof(KeyValuePair) + sizeof(BlockType) + slotCount * sizeof(uint32_t); } return memNeed; } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> inline bool HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::IsValid() const { return (_elemCount >= 0) && (_elemCount <= (size_t)_bucket->Size() / Sparsity); } template <typename Key, typename Value, typename Pool, typename HashFunc, typename Comp, int Sparsity> inline bool HashMap<Key, Value, Pool, HashFunc, Comp, Sparsity>::operator==(const HashMapType& otherMap) const { if (_elemCount != otherMap._elemCount) { return false; } BlockType* buff = _bucket->GetLastBlock(); BlockType* otherBuff = otherMap._bucket->GetLastBlock(); while (buff != NULL && otherBuff != NULL) { // compare current block if (buff->size != otherBuff->size) { return false; } Bitmap& bitmap = buff->bitmap; Bitmap& otherBitmap = otherBuff->bitmap; if (bitmap != otherBitmap) { return false; } for (size_t i = 0; i < buff->size; ++i) { if (!bitmap.Test(i)) { continue; } if (buff->begin[i] != otherBuff->begin[i]) { return false; } } buff = buff->prev; otherBuff = otherBuff->prev; } if ((buff == NULL && otherBuff != NULL) || (buff != NULL && otherBuff == NULL)) { return false; } return true; } }} // namespace indexlib::util