aios/storage/indexlib/index/common/numeric_compress/EquivalentCompressDefine.h (350 lines of code) (raw):
/*
* Copyright 2014-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "indexlib/base/Status.h"
#include "indexlib/base/Types.h"
namespace indexlib::index {
enum SlotItemType {
SIT_EQUAL = 1,
SIT_DELTA_UINT8 = 0,
SIT_DELTA_UINT16 = 2,
SIT_DELTA_UINT32 = 4,
SIT_DELTA_BIT1 = 5,
SIT_DELTA_BIT2 = 6,
SIT_DELTA_BIT4 = 7,
SIT_DELTA_UINT64 = 3,
};
struct EquivalentCompressFileContent {
uint32_t itemCount = 0;
uint32_t slotBitNum = 0;
uint32_t slotMask = 0;
uint32_t slotNum = 0;
uint64_t slotBaseOffset = 0;
uint64_t valueBaseOffset = 0;
uint64_t fileLength = 0;
EquivalentCompressFileContent(uint32_t _itemCount, uint32_t _slotBitNum, uint32_t _slotMask, uint32_t _slotNum,
uint64_t _slotBaseOffset, uint64_t _valueBaseOffset, uint64_t _fileLength)
: itemCount(_itemCount)
, slotBitNum(_slotBitNum)
, slotMask(_slotMask)
, slotNum(_slotNum)
, slotBaseOffset(_slotBaseOffset)
, valueBaseOffset(_valueBaseOffset)
, fileLength(_fileLength)
{
}
EquivalentCompressFileContent() {}
};
class EquivalentCompressFileFormat
{
public:
inline static uint8_t DecodeBitValue(uint32_t itemCountMask, uint32_t itemCountPowerNum, uint32_t bitCountPowerNum,
uint32_t bitMask, size_t valueIdx, uint8_t* deltaArray)
{
uint32_t alignSize = 8;
uint32_t byteIndex = valueIdx >> itemCountPowerNum;
uint32_t posInByte = valueIdx & itemCountMask;
uint8_t byteValue = deltaArray[byteIndex];
uint32_t bitMoveCount = alignSize - ((posInByte + 1) << bitCountPowerNum);
byteValue = byteValue >> bitMoveCount;
byteValue &= bitMask;
return byteValue;
}
inline static size_t GetDeltaArraySizeBySlotItemType(SlotItemType slotType, size_t count)
{
switch (slotType) {
case SIT_DELTA_UINT8: // uint8_t
{
return sizeof(uint8_t) * count;
}
case SIT_DELTA_UINT16: // uint16_t
{
return sizeof(uint16_t) * count;
}
case SIT_DELTA_UINT32: // uint32_t
{
return sizeof(uint32_t) * count;
}
case SIT_DELTA_UINT64: // uint64_t
{
return sizeof(uint64_t) * count;
}
case SIT_DELTA_BIT1: {
return (count + 7) >> 3;
}
case SIT_DELTA_BIT2: {
return ((count << 1) + 7) >> 3;
}
case SIT_DELTA_BIT4: {
return ((count << 2) + 7) >> 3;
}
default:
assert(false);
}
return 0;
}
inline static size_t GetDeltaArraySizeByDeltaType(uint64_t deltaType, size_t count)
{
switch (deltaType) {
case 0: // uint8_t
{
return sizeof(uint8_t) * count;
}
case 1: // uint16_t
{
return sizeof(uint16_t) * count;
}
case 2: // uint32_t
{
return sizeof(uint32_t) * count;
}
case 3: // uint64_t
{
return sizeof(uint64_t) * count;
}
case 4: // 1 bit
{
return (count + 7) >> 3;
}
case 5: // 2 bits
{
return ((count << 1) + 7) >> 3;
}
case 6: // 4 bits
{
return ((count << 2) + 7) >> 3;
}
default:
assert(false);
}
return 0;
}
template <typename ReturnType>
inline static ReturnType GetDeltaValueBySlotItemType(SlotItemType slotType, uint8_t* deltaBuffer, size_t valueIdx)
{
switch (slotType) {
case SIT_DELTA_UINT8: // uint8_t
{
return deltaBuffer[valueIdx];
}
case SIT_DELTA_UINT16: // uint16_t
{
return ((uint16_t*)deltaBuffer)[valueIdx];
}
case SIT_DELTA_UINT32: // uint32_t
{
return ((uint32_t*)deltaBuffer)[valueIdx];
}
case SIT_DELTA_UINT64: // uint64_t
{
return ((uint64_t*)deltaBuffer)[valueIdx];
}
case SIT_DELTA_BIT1: {
return DecodeBitValue(7, 3, 0, 1, valueIdx, deltaBuffer);
}
case SIT_DELTA_BIT2: {
return DecodeBitValue(3, 2, 1, 3, valueIdx, deltaBuffer);
}
case SIT_DELTA_BIT4: {
return DecodeBitValue(1, 1, 2, 15, valueIdx, deltaBuffer);
}
default:
assert(false);
}
return 0;
}
template <typename ReturnType>
inline static ReturnType GetDeltaValueByDeltaType(uint64_t deltaType, uint8_t* deltaArray, size_t valueIdx)
{
switch (deltaType) {
case 0: // uint8_t
{
return ((uint8_t*)deltaArray)[valueIdx];
}
case 1: // uint16_t
{
return ((uint16_t*)deltaArray)[valueIdx];
}
case 2: // uint32_t
{
return ((uint32_t*)deltaArray)[valueIdx];
}
case 3: // uint64_t
{
return ((uint64_t*)deltaArray)[valueIdx];
}
case 4: // 1 bit
{
return DecodeBitValue(7, 3, 0, 1, valueIdx, deltaArray);
}
case 5: // 2 bits
{
return DecodeBitValue(3, 2, 1, 3, valueIdx, deltaArray);
}
case 6: // 4 bits
{
return DecodeBitValue(1, 1, 2, 15, valueIdx, deltaArray);
}
default:
assert(false);
}
return 0;
}
};
struct EquivalentCompressSessionOption {
size_t slotBufferSize = 8 * 1024;
size_t valueBufferSize = 32 * 1024;
size_t ioGapSize = 4 * 1024;
size_t maxRecursionDepth = 100;
};
template <typename UT>
SlotItemType GetSlotItemType(UT maxDelta)
{
if (maxDelta <= (UT)1) {
return SIT_DELTA_BIT1;
}
if (maxDelta <= (UT)3) {
return SIT_DELTA_BIT2;
}
if (maxDelta <= (UT)15) {
return SIT_DELTA_BIT4;
}
if (maxDelta <= (UT)std::numeric_limits<uint8_t>::max()) {
return SIT_DELTA_UINT8;
}
if (maxDelta <= (UT)std::numeric_limits<uint16_t>::max()) {
return SIT_DELTA_UINT16;
}
if (maxDelta <= (UT)std::numeric_limits<uint32_t>::max()) {
return SIT_DELTA_UINT32;
}
return SIT_DELTA_UINT64;
}
inline SlotItemType DeltaFlagToSlotItemType(uint64_t flag)
{
switch (flag) {
case 0:
return SIT_DELTA_UINT8;
case 1:
return SIT_DELTA_UINT16;
case 2:
return SIT_DELTA_UINT32;
case 3:
return SIT_DELTA_UINT64;
case 4:
return SIT_DELTA_BIT1;
case 5:
return SIT_DELTA_BIT2;
case 6:
return SIT_DELTA_BIT4;
default:
assert(false);
}
return SIT_EQUAL;
}
inline uint64_t SlotItemTypeToDeltaFlag(SlotItemType slotType)
{
switch (slotType) {
case SIT_DELTA_UINT8: {
return 0;
}
case SIT_DELTA_UINT16: {
return 1;
}
case SIT_DELTA_UINT32: {
return 2;
}
case SIT_DELTA_UINT64: {
return 3;
}
case SIT_DELTA_BIT1: {
return 4;
}
case SIT_DELTA_BIT2: {
return 5;
}
case SIT_DELTA_BIT4: {
return 6;
}
default:
assert(false);
}
return 7;
}
inline size_t GetCompressDeltaBufferSize(SlotItemType slotType, uint32_t bufferItemCount)
{
uint32_t alignSize = 8; // 8 bit
switch (slotType) {
case SIT_DELTA_BIT1: {
return (bufferItemCount + alignSize - 1) / alignSize;
}
case SIT_DELTA_BIT2: {
return (bufferItemCount * 2 + alignSize - 1) / alignSize;
}
case SIT_DELTA_BIT4: {
return (bufferItemCount * 4 + alignSize - 1) / alignSize;
}
case SIT_DELTA_UINT8: {
return bufferItemCount * sizeof(uint8_t);
}
case SIT_DELTA_UINT16: {
return bufferItemCount * sizeof(uint16_t);
}
case SIT_DELTA_UINT32: {
return bufferItemCount * sizeof(uint32_t);
}
case SIT_DELTA_UINT64: {
return bufferItemCount * sizeof(uint64_t);
}
default:
assert(false);
}
return 0;
}
struct SlotItem {
SlotItemType slotType : 3;
uint64_t value : 61;
bool IsEqual() const { return slotType == SIT_EQUAL; }
};
template <typename UT>
struct DeltaValueArrayTyped {
UT baseValue;
uint8_t delta[0];
};
/* uint64_t/int64_t type only*/
struct LongSlotItem {
uint64_t isValue : 1;
uint64_t value : 63;
bool IsEqual() const { return isValue == 1; }
};
/* uint64_t/int64_t type only*/
struct LongValueArrayHeader {
uint64_t baseValue;
uint64_t deltaType; // delta type
};
template <typename T>
class ItemIteratorTyped
{
public:
ItemIteratorTyped(const T* data, uint32_t count) : _data(data), _count(count), _cursor(0) {}
bool HasNext() const { return _cursor < _count; }
std::pair<Status, T> Next()
{
assert(_data);
assert(_cursor < _count);
return std::make_pair(Status::OK(), _data[_cursor++]);
}
private:
const T* _data;
uint32_t _count;
uint32_t _cursor;
};
template <typename T>
struct SlotItemTraits {
typedef SlotItem SlotArrayValueType;
};
#define DECLARE_LONG_SLOT_ITEM_TRAITS(TYPE) \
template <> \
struct SlotItemTraits<TYPE> { \
typedef LongSlotItem SlotArrayValueType; \
}
DECLARE_LONG_SLOT_ITEM_TRAITS(double);
DECLARE_LONG_SLOT_ITEM_TRAITS(int64_t);
DECLARE_LONG_SLOT_ITEM_TRAITS(uint64_t);
} // namespace indexlib::index