aios/storage/indexlib/index/common/block_array/BlockArrayReader.h (363 lines of code) (raw):
/*
* Copyright 2014-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <memory>
#include "autil/EnvUtil.h"
#include "indexlib/file_system/file/FileReader.h"
#include "indexlib/file_system/file/FileWriter.h"
#include "indexlib/index/common/block_array/BlockArrayCacheDataAccessor.h"
#include "indexlib/index/common/block_array/BlockArrayCompressDataAccessor.h"
#include "indexlib/index/common/block_array/BlockArrayDataAccessor.h"
#include "indexlib/index/common/block_array/BlockArrayIterator.h"
#include "indexlib/index/common/block_array/BlockArrayMemoryDataAccessor.h"
#include "indexlib/util/PathUtil.h"
namespace indexlibv2 { namespace index {
template <typename Key, typename Value>
class BlockArrayReader
{
public:
using Iterator = BlockArrayIterator<Key, Value>;
using IteratorPtr = std::shared_ptr<Iterator>;
using KVItem = indexlib::index::KeyValueItem<Key, Value>;
using BlockArrayDataAccessorPtr = std::shared_ptr<BlockArrayDataAccessor<Key, Value>>;
using BlockArrayMemoryDataAccessorPtr = std::shared_ptr<BlockArrayMemoryDataAccessor<Key, Value>>;
using BlockArrayCacheDataAccessorPtr = std::shared_ptr<BlockArrayCacheDataAccessor<Key, Value>>;
// we should guarantee KVItem is packed, because writer write Key and Value, reader read KVItem
static_assert(sizeof(KVItem) == sizeof(Key) + sizeof(Value), "KVItem should be packed");
BlockArrayReader()
: _dataBlockSize(0)
, _itemCount(0)
, _bottomMetaKeyCount(0)
, _metaKeyCountPerUnit(0)
, _blockCount(0)
, _itemCountPerBlock(0)
, _metaKeyBaseAddress(nullptr)
, _metaKeyCount(0)
{
}
virtual ~BlockArrayReader() {}
BlockArrayReader(const BlockArrayReader&) = delete;
BlockArrayReader& operator=(const BlockArrayReader&) = delete;
BlockArrayReader(BlockArrayReader&&) = delete;
BlockArrayReader& operator=(BlockArrayReader&&) = delete;
public:
// Init by file reader, if you need call method @Find, your parm @loadMetaIndex must be "true"
// if @loadMetaIndex is "false", @directory is useless, and we support any file reader type
// if @loadMetaIndex is "true", we will use file reader to access data, we support memory access and block cache
virtual std::pair<Status, bool> Init(const indexlib::file_system::FileReaderPtr& fileReader,
const indexlib::file_system::DirectoryPtr& directory, uint64_t readLength,
bool loadMetaIndex);
// Find key, if existed fill value and return true, otherwise return false without changing value
indexlib::index::Result<bool> Find(const Key& key, indexlib::file_system::ReadOption option,
Value* value) noexcept __ALWAYS_INLINE;
inline future_lite::coro::Lazy<indexlib::index::Result<bool>>
FindAsync(const Key& key, indexlib::file_system::ReadOption option, Value* value) noexcept;
inline uint64_t GetItemCount() const;
// Create Iterator, you should call method @Init before
// @loadMetaIndex in @Init can be "false" if not use @Find
std::pair<Status, Iterator*> CreateIterator() const;
uint64_t EstimateMetaSize() const;
protected:
virtual std::pair<Status, bool> InitMeta(bool loadMetaIndex, uint64_t readLength);
std::pair<Status, bool> LoadMetaIndex(uint64_t offset);
uint64_t CaculateDataLength() const;
bool CheckIntegrity(uint64_t actualDataLength) const;
bool CheckMeta(const uint64_t& offset) const;
void FillMetaHelpInfo();
bool GetBlockId(const Key& key, uint64_t* blockId) const noexcept __ALWAYS_INLINE;
BlockArrayDataAccessor<Key, Value>* CreateBlockArrayDataAccessor(indexlib::file_system::FileReaderPtr fileReader);
protected:
indexlib::file_system::FileReaderPtr _fileReader;
indexlib::file_system::DirectoryPtr _directory;
BlockArrayDataAccessorPtr _accessor;
BlockArrayMemoryDataAccessorPtr _memoryAccessor;
// loaded from file
uint64_t _dataBlockSize;
uint64_t _itemCount;
uint64_t _bottomMetaKeyCount;
uint64_t _metaKeyCountPerUnit;
// calculated by meta
uint64_t _blockCount;
uint64_t _itemCountPerBlock;
indexlib::file_system::FileReaderPtr _sliceFileReader;
Key* _metaKeyBaseAddress;
uint64_t _metaKeyCount; // the meta key count (sum of the all level)
//[left, right)
std::vector<uint64_t> _leftBound; // level -> the header's position in @_metaKeyBaseAddress in this level
std::vector<uint64_t> _rightBound; // level -> the end's position in @_metaKeyBaseAddress in this level
std::vector<uint64_t> _levelToMetaKeyCount; // level -> the meta key count in this level
indexlib::file_system::BlockFileNodePtr _blockFileNode; // used for reading block on disk
private:
AUTIL_LOG_DECLARE();
};
AUTIL_LOG_SETUP_TEMPLATE_2(indexlib.index, BlockArrayReader, Key, Value);
template <typename Key, typename Value>
std::pair<Status, bool> BlockArrayReader<Key, Value>::Init(const indexlib::file_system::FileReaderPtr& fileReader,
const indexlib::file_system::DirectoryPtr& directory,
uint64_t readLength, bool loadMetaIndex)
{
if (loadMetaIndex && !directory) {
AUTIL_LOG(ERROR, "Unsupport loadMetaIndex with null directory");
return std::make_pair(Status::OK(), false);
}
_fileReader = fileReader;
_directory = directory;
// step1 : we load meta from reader into memory
// include block data info, meta index info (and meta index if need),
// while we should check integrity
auto [status, ret] = InitMeta(loadMetaIndex, readLength);
RETURN2_IF_STATUS_ERROR(status, false, "init meta fail");
if (!ret) {
return std::make_pair(Status::OK(), false);
}
if (!loadMetaIndex) {
return std::make_pair(Status::OK(), true);
}
_accessor.reset(CreateBlockArrayDataAccessor(_fileReader));
if (!_accessor) {
return std::make_pair(Status::OK(), false);
}
status = _accessor->Init(_fileReader, _dataBlockSize);
RETURN2_IF_STATUS_ERROR(status, false, "accessor init fail");
_memoryAccessor = std::dynamic_pointer_cast<BlockArrayMemoryDataAccessor<Key, Value>>(_accessor);
return std::make_pair(Status::OK(), true);
}
template <typename Key, typename Value>
inline indexlib::index::Result<bool>
BlockArrayReader<Key, Value>::Find(const Key& key, indexlib::file_system::ReadOption option, Value* value) noexcept
{
// step1 : we locate which block we use to find the Key (inline)
uint64_t blockId = 0;
if (!GetBlockId(key, &blockId)) {
return false;
}
// consider block may be the last block
uint64_t keyCountInBlock =
blockId + 1 == _blockCount ? _itemCount - _itemCountPerBlock * blockId : _itemCountPerBlock;
// step2 : we locate the key/value item in the block
return _memoryAccessor ? _memoryAccessor->GetValueInBlock(key, blockId, keyCountInBlock, option, value)
: _accessor->GetValueInBlock(key, blockId, keyCountInBlock, option, value);
}
template <typename Key, typename Value>
inline future_lite::coro::Lazy<indexlib::index::Result<bool>>
BlockArrayReader<Key, Value>::FindAsync(const Key& key, indexlib::file_system::ReadOption option, Value* value) noexcept
{
// step1 : we locate which block we use to find the Key (inline)
uint64_t blockId = 0;
if (!GetBlockId(key, &blockId)) {
co_return false;
}
// consider block may be the last block
uint64_t keyCountInBlock =
blockId + 1 == _blockCount ? _itemCount - _itemCountPerBlock * blockId : _itemCountPerBlock;
if (_memoryAccessor) {
co_return _memoryAccessor->GetValueInBlock(key, blockId, keyCountInBlock, option, value);
}
co_return co_await _accessor->GetValueInBlockAsync(key, blockId, keyCountInBlock, option, value);
}
template <typename Key, typename Value>
inline uint64_t BlockArrayReader<Key, Value>::GetItemCount() const
{
return _itemCount;
}
template <typename Key, typename Value>
inline bool BlockArrayReader<Key, Value>::GetBlockId(const Key& key, uint64_t* blockId) const noexcept
{
uint64_t levelNum = _leftBound.size();
if (levelNum == 1) {
uint64_t blockIdMay =
std::lower_bound(_metaKeyBaseAddress, _metaKeyBaseAddress + _metaKeyCount, key) - _metaKeyBaseAddress;
if (blockIdMay == _metaKeyCount) {
return false;
}
*blockId = blockIdMay;
return true;
}
assert(levelNum > 0);
uint64_t position = 0;
for (uint64_t i = 0; i < levelNum; ++i) {
uint64_t lower = _leftBound[i] + position * _metaKeyCountPerUnit;
uint64_t upper = std::min(lower + _metaKeyCountPerUnit, _rightBound[i]);
position = std::lower_bound(_metaKeyBaseAddress + lower, _metaKeyBaseAddress + upper, key) -
_metaKeyBaseAddress - _leftBound[i];
if (_leftBound[i] + position == upper) {
return false;
}
}
*blockId = position;
return true;
}
template <typename Key, typename Value>
std::pair<Status, BlockArrayIterator<Key, Value>*> BlockArrayReader<Key, Value>::CreateIterator() const
{
Iterator* iter = new Iterator();
auto status = iter->Init(_fileReader, _dataBlockSize, _itemCount);
RETURN2_IF_STATUS_ERROR(status, nullptr, "block array iterator init fail");
return std::make_pair(Status::OK(), iter);
}
template <typename Key, typename Value>
std::pair<Status, bool> BlockArrayReader<Key, Value>::InitMeta(bool loadMetaIndex, uint64_t readLength)
{
assert(readLength <= _fileReader->GetLogicLength());
uint64_t offset = readLength;
uint64_t readSize = 0;
offset -= sizeof(_bottomMetaKeyCount);
auto pairRet = _fileReader->Read(&_bottomMetaKeyCount, sizeof(_bottomMetaKeyCount), offset).StatusWith();
RETURN2_IF_STATUS_ERROR(pairRet.first, false, "file[%s] read fail", _fileReader->GetLogicalPath().c_str());
readSize = pairRet.second;
if (readSize != sizeof(_bottomMetaKeyCount)) {
AUTIL_LOG(ERROR, "Read _bottomMetaKeyCount error, expected size[%zu], write size[%zu]",
sizeof(_bottomMetaKeyCount), readSize);
return std::make_pair(Status::OK(), false);
}
offset -= sizeof(_metaKeyCountPerUnit);
pairRet = _fileReader->Read(&_metaKeyCountPerUnit, sizeof(_metaKeyCountPerUnit), offset).StatusWith();
RETURN2_IF_STATUS_ERROR(pairRet.first, false, "file[%s] read fail", _fileReader->GetLogicalPath().c_str());
readSize = pairRet.second;
if (readSize != sizeof(_metaKeyCountPerUnit)) {
AUTIL_LOG(ERROR, "Read _metaKeyCountPerUnit error, expected size[%zu], write size[%zu]",
sizeof(_metaKeyCountPerUnit), readSize);
return std::make_pair(Status::OK(), false);
}
if (!CheckMeta(offset)) {
return std::make_pair(Status::OK(), false);
}
FillMetaHelpInfo();
offset -= _metaKeyCount * sizeof(Key);
if (loadMetaIndex) {
auto [status, ret] = LoadMetaIndex(offset);
RETURN2_IF_STATUS_ERROR(status, false, "load meta index fail");
if (!ret) {
return std::make_pair(Status::OK(), false);
}
}
offset -= sizeof(_dataBlockSize);
pairRet = _fileReader->Read(&_dataBlockSize, sizeof(_dataBlockSize), offset).StatusWith();
RETURN2_IF_STATUS_ERROR(pairRet.first, false, "file[%s] read fail", _fileReader->GetLogicalPath().c_str());
readSize = pairRet.second;
if (readSize != sizeof(_dataBlockSize)) {
AUTIL_LOG(ERROR, "Read _dataBlockSize error, expected size[%zu], write size[%zu]", sizeof(_dataBlockSize),
readSize);
return std::make_pair(Status::OK(), false);
}
offset -= sizeof(_itemCount);
pairRet = _fileReader->Read(&_itemCount, sizeof(_itemCount), offset).StatusWith();
RETURN2_IF_STATUS_ERROR(pairRet.first, false, "file[%s] read fail", _fileReader->GetLogicalPath().c_str());
readSize = pairRet.second;
if (readSize != sizeof(_itemCount)) {
AUTIL_LOG(ERROR, "Read _itemCount error, expected size[%zu], write size[%zu]", sizeof(_itemCount), readSize);
return std::make_pair(Status::OK(), false);
}
_itemCountPerBlock = _dataBlockSize / sizeof(KVItem);
_blockCount = (_itemCount + _itemCountPerBlock - 1) / _itemCountPerBlock;
return std::make_pair(Status::OK(), CheckIntegrity(offset));
}
template <typename Key, typename Value>
void BlockArrayReader<Key, Value>::FillMetaHelpInfo()
{
// fill @_metaKeyCount, @_leftBound @_levelToMetaKeyCount
uint64_t currentMetaKeyCount = _bottomMetaKeyCount;
// we calculate every level meta key count
_levelToMetaKeyCount.clear();
_levelToMetaKeyCount.push_back(currentMetaKeyCount);
while (currentMetaKeyCount > _metaKeyCountPerUnit) {
// 相当于 currentMetaKeyCount = [ currentMetaKeyCount / _metaKeyCountPerUnit ] 上取整
currentMetaKeyCount = (currentMetaKeyCount + _metaKeyCountPerUnit - 1) / _metaKeyCountPerUnit;
_levelToMetaKeyCount.push_back(currentMetaKeyCount);
}
std::reverse(_levelToMetaKeyCount.begin(), _levelToMetaKeyCount.end());
_leftBound.clear();
_leftBound.reserve(_levelToMetaKeyCount.size());
_metaKeyCount = 0;
for (auto metaKeyCountInLevel : _levelToMetaKeyCount) {
_leftBound.push_back(_metaKeyCount);
_metaKeyCount += metaKeyCountInLevel;
_rightBound.push_back(_metaKeyCount);
}
}
template <typename Key, typename Value>
uint64_t BlockArrayReader<Key, Value>::CaculateDataLength() const
{
if (_blockCount == 0) {
return 0;
}
assert(_blockCount > 0);
uint64_t lastBlockItemCount = _itemCount - (_blockCount - 1) * _itemCountPerBlock;
uint64_t lastBlockSize =
lastBlockItemCount == _itemCountPerBlock ? _dataBlockSize : lastBlockItemCount * sizeof(KVItem);
return (_blockCount - 1) * _dataBlockSize + lastBlockSize;
}
template <typename Key, typename Value>
bool BlockArrayReader<Key, Value>::CheckIntegrity(uint64_t actualDataLength) const
{
// block data length should be consistent with @_dataBlockSize and @_blockCount
uint64_t expectedDataLength = CaculateDataLength();
if (actualDataLength != expectedDataLength) {
AUTIL_LOG(ERROR, "Check integrity failed, expected block data length[%zu], actual block data length[%zu]",
expectedDataLength, actualDataLength);
return false;
}
if (_blockCount != _bottomMetaKeyCount) {
AUTIL_LOG(ERROR,
"Check integrity failed, calculated _blockCount [%zu] not equal with _bottomMetaKeyCount[%zu]",
_blockCount, _bottomMetaKeyCount);
return false;
}
return true;
}
template <typename Key, typename Value>
bool BlockArrayReader<Key, Value>::CheckMeta(const uint64_t& offset) const
{
if (_metaKeyCountPerUnit <= 1) {
AUTIL_LOG(ERROR, "Check meta error, _metaKeyCountPerUnit[%zu] is illegal.", _metaKeyCountPerUnit);
return false;
}
uint64_t metaKeyCount = _bottomMetaKeyCount;
uint64_t currentCount = _bottomMetaKeyCount;
while (currentCount > _metaKeyCountPerUnit) {
currentCount = (currentCount + _metaKeyCountPerUnit - 1) / _metaKeyCountPerUnit;
metaKeyCount += currentCount;
}
if (offset < metaKeyCount * sizeof(Key) * 2) {
AUTIL_LOG(ERROR, "Check meta error, offset[%zu], metaKeyCount[%zu].", offset, _bottomMetaKeyCount);
return false;
}
return true;
}
template <typename Key, typename Value>
std::pair<Status, bool> BlockArrayReader<Key, Value>::LoadMetaIndex(uint64_t offset)
{
if (_bottomMetaKeyCount == 0) {
return std::make_pair(Status::OK(), true);
}
// use slice file when mmap-nolock when not-disable slice memlock
bool disableSliceMemLock = autil::EnvUtil::getEnv<bool>("INDEXLIB_DISABLE_SLICE_MEM_LOCK", false);
if (_fileReader->GetBaseAddress() && (disableSliceMemLock || _fileReader->IsMemLock())) {
_metaKeyBaseAddress = (Key*)((char*)_fileReader->GetBaseAddress() + offset);
} else {
// we need load Meta Index into slice file if need, initializee @_metaKeyBaseAddress
// first we should check whether meta index has been written into slice file
// if not, we should write it from readr (at offset) to the slice file
std::string fileName;
indexlib::util::PathUtil::GetRelativePath(_directory->GetLogicalPath(), _fileReader->GetLogicalPath(),
fileName);
std::string sliceFileName = fileName + ".block_index";
auto fileReader = _directory->CreateFileReader(sliceFileName, indexlib::file_system::FSOT_SLICE);
if (!fileReader) {
uint64_t sliceLen = sizeof(Key) * _metaKeyCount;
auto fileWriter =
_directory->CreateFileWriter(sliceFileName, indexlib::file_system::WriterOption::Slice(sliceLen, 1));
RETURN2_IF_STATUS_ERROR(fileWriter->Truncate(sliceLen).Status(), false, "file[%s] truncate fail",
fileWriter->GetLogicalPath().c_str());
fileReader = _directory->CreateFileReader(sliceFileName, indexlib::file_system::FSOT_SLICE);
RETURN2_IF_STATUS_ERROR(fileWriter->Close().Status(), false, "file[%s] close fail",
fileWriter->GetLogicalPath().c_str());
auto [status, readLen] = _fileReader->Read(fileReader->GetBaseAddress(), sliceLen, offset).StatusWith();
RETURN2_IF_STATUS_ERROR(status, false, "file[%s] read fail", _fileReader->GetLogicalPath().c_str());
if (sliceLen != readLen) {
RETURN2_IF_STATUS_ERROR(Status::Corruption(), false, "read block index data fail in file [%s]",
_fileReader->DebugString().c_str());
return std::make_pair(Status::OK(), false);
}
}
// we use the slice file memory address as @keys, then meta index is all in memory
// hold @_sliceFileReader lifecycle is to guarantee @_metaKeyBaseAddress is always legal
_sliceFileReader = fileReader;
_metaKeyBaseAddress = (Key*)_sliceFileReader->GetBaseAddress();
}
return std::make_pair(Status::OK(), true);
}
template <typename Key, typename Value>
uint64_t BlockArrayReader<Key, Value>::EstimateMetaSize() const
{
return _metaKeyCount * sizeof(Key);
}
template <typename Key, typename Value>
BlockArrayDataAccessor<Key, Value>*
BlockArrayReader<Key, Value>::CreateBlockArrayDataAccessor(indexlib::file_system::FileReaderPtr fileReader)
{
if (fileReader->GetBaseAddress()) {
return new BlockArrayMemoryDataAccessor<Key, Value>();
}
indexlib::file_system::CompressFileReaderPtr compressFileReader =
std::dynamic_pointer_cast<indexlib::file_system::CompressFileReader>(fileReader);
if (compressFileReader) {
return new BlockArrayCompressDataAccessor<Key, Value>();
}
if (std::dynamic_pointer_cast<indexlib::file_system::BlockFileNode>(fileReader->GetFileNode())) {
return new BlockArrayCacheDataAccessor<Key, Value>();
}
AUTIL_LOG(ERROR, "fileReader type error, block_array only support memory or blockCache fileReader.");
return nullptr;
}
}} // namespace indexlibv2::index