cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.cpp (880 lines of code) (raw):
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "SplittableBzip2ReadBuffer.h"
#if USE_BZIP2
#include <IO/SeekableReadBuffer.h>
#include <IO/VarInt.h>
#include <base/find_symbols.h>
#include <Common/logger_useful.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int POSITION_OUT_OF_BOUND;
}
std::vector<Int32> & SplittableBzip2ReadBuffer::Data::initTT(Int32 length)
{
if (tt.size() < static_cast<size_t>(length))
tt.resize(length);
return tt;
}
template <typename T>
std::string SplittableBzip2ReadBuffer::Data::arrayToString(const std::vector<T> & arr)
{
std::string result = "[";
for (size_t i = 0; i < arr.size(); i++)
{
if (i)
result += ", ";
result += std::to_string(static_cast<Int32>(arr[i]));
}
result += "]";
return result;
}
template <typename T>
std::string SplittableBzip2ReadBuffer::Data::array2DToString(T arr[BZip2Constants::N_GROUPS][BZip2Constants::MAX_ALPHA_SIZE])
{
std::string result = "[";
for (int i = 0; i < BZip2Constants::N_GROUPS; i++)
{
if (i)
result += ", ";
result += arrayToString(arr[i], BZip2Constants::MAX_ALPHA_SIZE);
}
result += "]";
return result;
}
template <typename T>
std::string SplittableBzip2ReadBuffer::Data::arrayToString(const T * arr, size_t size)
{
std::string result = "[";
for (size_t i = 0; i < size; i++)
{
if (i)
result += ", ";
if constexpr (std::is_same_v<T, bool>)
result += arr[i] ? "true" : "false";
else
result += std::to_string(static_cast<Int32>(arr[i]));
}
result += "]";
return result;
}
std::string SplittableBzip2ReadBuffer::Data::toString()
{
std::string result = "Data{";
result += "\ninUse=" + arrayToString(inUse, 256);
result += "\nseqToUnseq=" + arrayToString(seqToUnseq, 256);
result += "\nselector=" + arrayToString(selector, BZip2Constants::MAX_SELECTORS);
result += "\nselectorMtf=" + arrayToString(selectorMtf, BZip2Constants::MAX_SELECTORS);
result += "\nunzftab=" + arrayToString(unzftab, 256);
result += "\nlimit=" + array2DToString(limit);
result += "\nbase=" + array2DToString(base);
result += "\nperm=" + array2DToString(perm);
result += "\nminLens=" + arrayToString(minLens, BZip2Constants::N_GROUPS);
result += "\ncftab=" + arrayToString(cftab, 257);
result += "\ngetAndMoveToFrontDecode_yy=" + arrayToString(getAndMoveToFrontDecode_yy, 256);
result += "\ntemp_charArray2d=" + array2DToString(temp_charArray2d);
result += "\nrecvDecodingTables_pos=" + arrayToString(recvDecodingTables_pos, BZip2Constants::N_GROUPS);
result += "\ntt=" + arrayToString(tt);
result += "\nll8=" + arrayToString(ll8);
result += "}";
return result;
}
void SplittableBzip2ReadBuffer::hbCreateDecodeTables(
int * __restrict limit,
int * __restrict base,
int * __restrict perm,
const UInt16 * __restrict length,
int minLen,
int maxLen,
int alphaSize)
{
for (int i = minLen, pp = 0; i <= maxLen; i++)
{
for (int j = 0; j < alphaSize; j++)
if (length[j] == i)
perm[pp++] = j;
}
for (int i = BZip2Constants::MAX_CODE_LEN - 1; i > 0; --i)
{
base[i] = 0;
limit[i] = 0;
}
for (int i = 0; i < alphaSize; i++)
base[length[i] + 1]++;
for (int i = 1, b = base[0]; i < BZip2Constants::MAX_CODE_LEN; i++)
{
b += base[i];
base[i] = b;
}
for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++)
{
int nb = base[i + 1];
vec += nb - b;
b = nb;
limit[i] = vec - 1;
vec <<= 1;
}
for (int i = minLen + 1; i <= maxLen; i++)
base[i] = ((limit[i - 1] + 1) << 1) - base[i];
}
SplittableBzip2ReadBuffer::SplittableBzip2ReadBuffer(
std::unique_ptr<ReadBuffer> in_,
bool first_block_need_special_process_,
bool last_block_need_special_process_,
size_t buf_size,
char * existing_memory,
size_t alignment)
: CompressedReadBufferWrapper(std::move(in_), buf_size, existing_memory, alignment)
, first_block_need_special_process(first_block_need_special_process_)
, last_block_need_special_process(last_block_need_special_process_)
, is_first_block(true)
, blockSize100k(9)
, currentState(STATE::NO_PROCESS_STATE)
, skipResult(false)
, currentChar(0)
, storedBlockCRC(0)
, blockRandomised(false)
, data(nullptr)
, computedBlockCRC(0)
, storedCombinedCRC(0)
, computedCombinedCRC(0)
, origPtr(0)
, nInUse(0)
, bsBuff(0)
, bsLive(0)
, last(0)
{
auto * seekable = dynamic_cast<SeekableReadBuffer*>(in.get());
skipResult = skipToNextMarker(BLOCK_DELIMITER, DELIMITER_BIT_LENGTH);
if (seekable && skipResult)
{
/// Update adjusted_start
adjusted_start = seekable->getPosition();
}
changeStateToProcessABlock();
LOG_DEBUG(
getLogger("SplittableBzip2ReadBuffer"),
"adjusted_start:{} first_block_need_special_process:{} last_block_need_special_process:{} buf_size:{}",
*adjusted_start,
first_block_need_special_process,
last_block_need_special_process,
buf_size);
}
Int32 SplittableBzip2ReadBuffer::read(char * dest, size_t dest_size, size_t offs, size_t len)
{
if (offs + len > dest_size)
throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "offs({}) + len({}) > dest_size({}).", offs, len, dest_size);
const size_t hi = offs + len;
size_t destOffs = offs;
Int32 b = 0;
for (; (destOffs < hi && (b = read0()) >= 0); ++destOffs)
{
dest[destOffs] = static_cast<char>(b);
}
Int32 result = static_cast<Int32>(destOffs - offs);
if (result == 0)
{
result = b;
skipResult = skipToNextMarker(SplittableBzip2ReadBuffer::BLOCK_DELIMITER, DELIMITER_BIT_LENGTH);
changeStateToProcessABlock();
}
return result;
}
bool SplittableBzip2ReadBuffer::nextImpl()
{
const Position dest = internal_buffer.begin();
const size_t dest_size = internal_buffer.size();
size_t offset = 0;
if (last_block_need_special_process && !last_incomplete_line.empty())
{
/// If we have last incomplete line, append it to the beginning of internal buffer
memcpy(dest, last_incomplete_line.data(), last_incomplete_line.size());
offset += last_incomplete_line.size();
last_incomplete_line.clear();
}
Int32 result;
do
{
result = read(dest, dest_size, offset, dest_size - offset);
if (result > 0)
offset += result;
else if (first_block_need_special_process && result == BZip2Constants::END_OF_BLOCK && is_first_block)
{
/// Special processing for the first block
/// Notice that row delim could be \n (Unix) or \r\n (DOS/Windows) or \n\r (Mac OS Classic)
is_first_block = false;
Position end = dest + offset;
auto * pos = find_last_symbols_or_null<'\n'>(dest, end);
if (pos)
{
if (pos == end - 1 || (pos == end - 2 && *(pos + 1) == '\r'))
{
/// The last row ends with \n or \r\n or \n\r, discard all lines in internal buffer
offset = 0;
}
else
{
/// The last row does not end with \n or \r\n or \n\r, rewrite the last row to internal buffer
Position last_line = pos + 1;
size_t last_line_size = end - pos - 1;
if (*(pos + 1) == '\r')
last_line_size--;
memmove(dest, last_line, last_line_size);
offset = last_line_size;
}
}
LOG_DEBUG(
getLogger("SplittableBzip2ReadBuffer"),
"Header of first block after special processed:{}",
std::string(dest, std::min(offset, 100UL)));
}
} while (result != BZip2Constants::END_OF_STREAM && offset < dest_size);
if (last_block_need_special_process && offset)
{
/// Trim the last incomplete line from [dest, dest+offset), and record it in last_incomplete_line
bool reach_eof = (result == BZip2Constants::END_OF_STREAM);
if (reach_eof)
{
LOG_DEBUG(
getLogger("SplittableBzip2ReadBuffer"),
"Header of last block before special processed:{}",
std::string(dest, std::min(offset, 100UL)));
}
/// Trim the last incomplete line from [dest, dest+offset), and record it in last_incomplete_line
Position end = dest + offset;
auto * pos = find_last_symbols_or_null<'\n'>(dest, end);
if (!pos)
{
if (reach_eof)
offset = 0;
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't find row delimiter in working buffer with size:{}", offset);
}
else
{
/// Discard the last incomplete row(if has), and record it in last_incomplete_line
size_t old_offset = offset;
offset = pos - dest + 1;
if (pos + 1 < end && *(pos + 1) == '\r')
offset++;
if (!reach_eof)
{
/// Only record last incomplete line when eof not reached
last_incomplete_line.assign(&dest[offset], old_offset - offset);
}
}
}
if (offset)
{
working_buffer.resize(offset);
return true;
}
else
return false;
}
Int32 SplittableBzip2ReadBuffer::read0()
{
Int32 retChar = currentChar;
switch (currentState)
{
case STATE::END_OF_FILE:
return BZip2Constants::END_OF_STREAM;
case STATE::NO_PROCESS_STATE:
return BZip2Constants::END_OF_BLOCK;
case STATE::START_BLOCK_STATE:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong state {}", magic_enum::enum_name(currentState));
case STATE::RAND_PART_A_STATE:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong state {}", magic_enum::enum_name(currentState));
case STATE::RAND_PART_B_STATE:
setupRandPartB();
break;
case STATE::RAND_PART_C_STATE:
setupRandPartC();
break;
case STATE::NO_RAND_PART_A_STATE:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong state {}", magic_enum::enum_name(currentState));
case STATE::NO_RAND_PART_B_STATE:
setupNoRandPartB();
break;
case STATE::NO_RAND_PART_C_STATE:
setupNoRandPartC();
break;
}
return retChar;
}
Int32 SplittableBzip2ReadBuffer::readAByte(ReadBuffer & in_)
{
char c;
if (in_.read(c))
return static_cast<Int32>(c) & 0xff;
else
return -1;
}
bool SplittableBzip2ReadBuffer::skipToNextMarker(Int64 marker, Int32 markerBitLength, ReadBuffer & in_, Int64 & bsBuff_, Int64 & bsLive_)
{
try
{
if (markerBitLength > 63)
throw Exception(ErrorCodes::LOGICAL_ERROR, "skipToNextMarker can not find patterns greater than 63 bits");
Int64 bytes = bsR(markerBitLength, in_, bsBuff_, bsLive_);
if (bytes == -1)
{
return false;
}
while (true)
{
if (bytes == marker)
{
return true;
}
else
{
bytes = bytes << 1;
bytes = bytes & ((1L << markerBitLength) - 1);
Int32 oneBit = static_cast<Int32>(bsR(1, in_, bsBuff_, bsLive_));
if (oneBit != -1)
{
bytes = bytes | oneBit;
}
else
{
return false;
}
}
}
}
catch (const Exception &)
{
return false;
}
}
bool SplittableBzip2ReadBuffer::skipToNextMarker(Int64 marker, Int32 markerBitLength)
{
return skipToNextMarker(marker, markerBitLength, *in, bsBuff, bsLive);
}
void SplittableBzip2ReadBuffer::reportCRCError()
{
auto * seekable = dynamic_cast<SeekableReadBuffer*>(in.get());
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"CRC error in position:{} computedBlockCRC:{} storedBlockCRC:{}",
seekable->getPosition(),
computedBlockCRC,
storedBlockCRC);
}
void SplittableBzip2ReadBuffer::makeMaps()
{
Int32 nInUseShadow = 0;
for (Int32 i = 0; i < 256; i++)
if (data->inUse[i])
data->seqToUnseq[nInUseShadow++] = i;
nInUse = nInUseShadow;
}
void SplittableBzip2ReadBuffer::changeStateToProcessABlock()
{
if (skipResult == true)
{
initBlock();
setupBlock();
}
else
{
currentState = STATE::END_OF_FILE;
}
}
void SplittableBzip2ReadBuffer::initBlock()
{
storedBlockCRC = bsGetInt();
blockRandomised = (bsR(1) == 1);
if (!data)
data = std::make_unique<Data>(blockSize100k);
getAndMoveToFrontDecode();
crc.initialiseCRC();
currentState = STATE::START_BLOCK_STATE;
}
void SplittableBzip2ReadBuffer::endBlock()
{
computedBlockCRC = crc.getFinalCRC();
if (storedBlockCRC != computedBlockCRC)
{
computedCombinedCRC = (storedCombinedCRC << 1) | (static_cast<UInt32>(storedCombinedCRC) >> 31);
computedCombinedCRC ^= storedBlockCRC;
reportCRCError();
}
computedCombinedCRC = (computedCombinedCRC << 1) | (static_cast<UInt32>(computedCombinedCRC) >> 31);
computedCombinedCRC ^= computedBlockCRC;
}
void SplittableBzip2ReadBuffer::complete()
{
storedCombinedCRC = bsGetInt();
currentState = STATE::END_OF_FILE;
data = nullptr;
if (storedCombinedCRC != computedCombinedCRC)
reportCRCError();
}
Int64 SplittableBzip2ReadBuffer::bsR(Int64 n, ReadBuffer & in_, Int64 & bsBuff_, Int64 & bsLive_)
{
Int64 bsLiveShadow = bsLive_;
Int64 bsBuffShadow = bsBuff_;
if (bsLiveShadow < n)
{
do
{
Int32 thech = readAByte(in_);
if (thech < 0)
DB::throwReadAfterEOF();
bsBuffShadow = (bsBuffShadow << 8) | thech;
bsLiveShadow += 8;
} while (bsLiveShadow < n);
bsBuff_ = bsBuffShadow;
}
bsLive_ = bsLiveShadow - n;
return (bsBuffShadow >> (bsLiveShadow - n)) & ((1L << n) - 1);
}
Int64 SplittableBzip2ReadBuffer::bsR(Int64 n)
{
return bsR(n, *in, bsBuff, bsLive);
}
bool SplittableBzip2ReadBuffer::bsGetBit()
{
Int64 bsLiveShadow = bsLive;
Int64 bsBuffShadow = bsBuff;
if (bsLiveShadow < 1)
{
Int32 thech = readAByte(*in);
if (thech < 0)
DB::throwReadAfterEOF();
bsBuffShadow = (bsBuffShadow << 8) | thech;
bsLiveShadow += 8;
bsBuff = bsBuffShadow;
}
bsLive = bsLiveShadow - 1;
return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0;
}
void SplittableBzip2ReadBuffer::recvDecodingTables()
{
Data * dataShadow = data.get();
bool * inUse = dataShadow->inUse;
char * pos = dataShadow->recvDecodingTables_pos;
char * selector = dataShadow->selector;
char * selectorMtf = dataShadow->selectorMtf;
Int32 inUse16 = 0;
for (Int32 i = 0; i < 16; ++i)
if (bsGetBit())
inUse16 |= 1 << i;
for (Int32 i = 255; i >= 0; --i)
inUse[i] = false;
for (Int32 i = 0; i < 16; ++i)
{
if ((inUse16 & (1 << i)) != 0)
{
Int32 i16 = i << 4;
for (Int32 j = 0; j < 16; j++)
if (bsGetBit())
inUse[i16 + j] = true;
}
}
makeMaps();
Int32 alphaSize = nInUse + 2;
Int32 nGroups = static_cast<Int32>(bsR(3));
Int32 nSelectors = static_cast<Int32>(bsR(15));
for (Int32 i = 0; i < nSelectors; ++i)
{
Int32 j = 0;
while (bsGetBit())
j++;
selectorMtf[i] = j;
}
for (Int32 v = nGroups - 1; v >= 0; --v)
pos[v] = v;
for (Int32 i = 0; i < nSelectors; ++i)
{
Int32 v = selectorMtf[i] & 0xff;
char tmp = pos[v];
while (v > 0)
{
pos[v] = pos[v - 1];
v--;
}
pos[0] = tmp;
selector[i] = tmp;
}
auto * len = dataShadow->temp_charArray2d;
for (Int32 t = 0; t < nGroups; t++)
{
Int32 curr = static_cast<Int32>(bsR(5));
auto * len_t = len[t];
for (Int32 i = 0; i < alphaSize; i++)
{
while (bsGetBit())
curr += bsGetBit() ? -1 : 1;
len_t[i] = curr;
}
}
createHuffmanDecodingTables(alphaSize, nGroups);
}
void SplittableBzip2ReadBuffer::createHuffmanDecodingTables(Int32 alphaSize, Int32 nGroups)
{
Data * dataShadow = data.get();
auto * len = dataShadow->temp_charArray2d;
auto * minLens = dataShadow->minLens;
auto * limit = dataShadow->limit;
auto * base = dataShadow->base;
auto * perm = dataShadow->perm;
for (Int32 t = 0; t < nGroups; t++)
{
Int32 minLen = 32;
Int32 maxLen = 0;
auto * len_t = len[t];
for (Int32 i = alphaSize - 1; i >= 0; --i)
{
Int32 lent = len_t[i];
if (lent > maxLen)
maxLen = lent;
if (lent < minLen)
minLen = lent;
}
hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, maxLen, alphaSize);
minLens[t] = minLen;
}
}
void SplittableBzip2ReadBuffer::getAndMoveToFrontDecode()
{
origPtr = static_cast<Int32>(bsR(24));
recvDecodingTables();
ReadBuffer * inShadow = in.get();
Data * dataShadow = data.get();
auto & ll8 = dataShadow->ll8;
Int32 * unzftab = dataShadow->unzftab;
char * selector = dataShadow->selector;
auto * seqToUnseq = dataShadow->seqToUnseq;
auto * yy = dataShadow->getAndMoveToFrontDecode_yy;
Int32 * minLens = dataShadow->minLens;
auto * limit = dataShadow->limit;
auto * base = dataShadow->base;
auto * perm = dataShadow->perm;
Int32 limitLast = blockSize100k * 100000;
for (Int32 i = 256; --i >= 0;)
{
yy[i] = i;
unzftab[i] = 0;
}
Int32 groupNo = 0;
Int32 groupPos = BZip2Constants::G_SIZE - 1;
Int32 eob = nInUse + 1;
Int32 nextSym = getAndMoveToFrontDecode0(0);
Int32 bsBuffShadow = static_cast<Int32>(bsBuff);
Int32 bsLiveShadow = static_cast<Int32>(bsLive);
Int32 lastShadow = -1;
Int32 zt = selector[groupNo] & 0xff;
Int32 * base_zt = base[zt];
Int32 * limit_zt = limit[zt];
Int32 * perm_zt = perm[zt];
Int32 minLens_zt = minLens[zt];
while (nextSym != eob)
{
if ((nextSym == BZip2Constants::RUNA) || (nextSym == BZip2Constants::RUNB))
{
Int32 s = -1;
for (Int32 n = 1; true; n <<= 1)
{
if (nextSym == BZip2Constants::RUNA)
s += n;
else if (nextSym == BZip2Constants::RUNB)
s += n << 1;
else
break;
if (groupPos == 0)
{
groupPos = BZip2Constants::G_SIZE - 1;
zt = selector[++groupNo] & 0xff;
base_zt = base[zt];
limit_zt = limit[zt];
perm_zt = perm[zt];
minLens_zt = minLens[zt];
}
else
{
groupPos--;
}
Int32 zn = minLens_zt;
while (bsLiveShadow < zn)
{
Int32 thech = readAByte(*inShadow);
if (thech < 0)
DB::throwReadAfterEOF();
bsBuffShadow = (bsBuffShadow << 8) | thech;
bsLiveShadow += 8;
}
Int64 zvec = (bsBuffShadow >> (bsLiveShadow - zn)) & ((1 << zn) - 1);
bsLiveShadow -= zn;
while (zvec > limit_zt[zn])
{
zn++;
while (bsLiveShadow < 1)
{
Int32 thech = readAByte(*inShadow);
if (thech < 0)
DB::throwReadAfterEOF();
bsBuffShadow = (bsBuffShadow << 8) | thech;
bsLiveShadow += 8;
}
bsLiveShadow--;
zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
}
nextSym = perm_zt[static_cast<Int32>(zvec - base_zt[zn])];
}
char ch = seqToUnseq[yy[0]];
unzftab[ch & 0xff] += s + 1;
while (s-- >= 0)
ll8[++lastShadow] = ch;
if (lastShadow >= limitLast)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Block overrun");
}
else
{
if (++lastShadow >= limitLast)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Block overrun");
auto tmp = yy[nextSym - 1];
unzftab[seqToUnseq[tmp] & 0xff]++;
ll8[lastShadow] = seqToUnseq[tmp];
if (nextSym <= 16)
for (Int32 j = nextSym - 1; j > 0; --j)
yy[j] = yy[j - 1];
else
memmove(&yy[1], &yy[0], (nextSym - 1) * sizeof(yy[0]));
yy[0] = tmp;
if (groupPos == 0)
{
groupPos = BZip2Constants::G_SIZE - 1;
zt = selector[++groupNo] & 0xff;
base_zt = base[zt];
limit_zt = limit[zt];
perm_zt = perm[zt];
minLens_zt = minLens[zt];
}
else
{
groupPos--;
}
Int32 zn = minLens_zt;
while (bsLiveShadow < zn)
{
Int32 thech = readAByte(*inShadow);
if (thech < 0)
DB::throwReadAfterEOF();
bsBuffShadow = (bsBuffShadow << 8) | thech;
bsLiveShadow += 8;
}
Int32 zvec = (bsBuffShadow >> (bsLiveShadow - zn)) & ((1 << zn) - 1);
bsLiveShadow -= zn;
while (zvec > limit_zt[zn])
{
zn++;
while (bsLiveShadow < 1)
{
Int32 thech = readAByte(*inShadow);
if (thech < 0)
DB::throwReadAfterEOF();
bsBuffShadow = (bsBuffShadow << 8) | thech;
bsLiveShadow += 8;
}
bsLiveShadow--;
zvec = ((zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1));
}
nextSym = perm_zt[zvec - base_zt[zn]];
}
}
last = lastShadow;
bsLive = bsLiveShadow;
bsBuff = bsBuffShadow;
}
Int32 SplittableBzip2ReadBuffer::getAndMoveToFrontDecode0(Int32 groupNo)
{
ReadBuffer * inShadow = in.get();
Data * dataShadow = data.get();
Int32 zt = dataShadow->selector[groupNo] & 0xff;
Int32 * limit_zt = dataShadow->limit[zt];
Int32 zn = dataShadow->minLens[zt];
Int32 zvec = static_cast<Int32>(bsR(zn));
Int32 bsLiveShadow = static_cast<Int32>(bsLive);
Int32 bsBuffShadow = static_cast<Int32>(bsBuff);
while (zvec > limit_zt[zn])
{
zn++;
while (bsLiveShadow < 1)
{
Int32 thech = readAByte(*inShadow);
if (thech < 0)
DB::throwReadAfterEOF();
bsBuffShadow = (bsBuffShadow << 8) | thech;
bsLiveShadow += 8;
}
bsLiveShadow--;
zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
}
bsLive = bsLiveShadow;
bsBuff = bsBuffShadow;
return dataShadow->perm[zt][zvec - dataShadow->base[zt][zn]];
}
void SplittableBzip2ReadBuffer::setupBlock()
{
if (!data)
return;
Int32 * cftab = data->cftab;
std::vector<Int32> & tt = data->initTT(last + 1);
auto & ll8 = data->ll8;
cftab[0] = 0;
memcpy(&cftab[1], &data->unzftab[0], 256 * sizeof(cftab[0]));
for (Int32 i = 1, c = cftab[0]; i <= 256; i++)
{
c += cftab[i];
cftab[i] = c;
}
for (Int32 i = 0, lastShadow = last; i <= lastShadow; i++)
tt[cftab[ll8[i] & 0xff]++] = i;
if (origPtr < 0 || static_cast<size_t>(origPtr) >= tt.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Stream corrupted");
su_tPos = tt[origPtr];
su_count = 0;
su_i2 = 0;
su_ch2 = 256;
if (blockRandomised)
{
su_rNToGo = 0;
su_rTPos = 0;
setupRandPartA();
}
else
{
setupNoRandPartA();
}
}
void SplittableBzip2ReadBuffer::setupRandPartA()
{
if (su_i2 <= last)
{
su_chPrev = su_ch2;
Int32 su_ch2Shadow = data->ll8[su_tPos] & 0xff;
su_tPos = data->tt[su_tPos];
if (su_rNToGo == 0)
{
su_rNToGo = BZip2Constants::rNums[su_rTPos] - 1;
if (++su_rTPos == 512)
su_rTPos = 0;
}
else
{
su_rNToGo--;
}
su_ch2 = ((su_ch2Shadow ^= (su_rNToGo == 1)) ? 1 : 0);
su_i2++;
currentChar = su_ch2Shadow;
currentState = STATE::RAND_PART_B_STATE;
crc.updateCRC(su_ch2Shadow);
}
else
{
endBlock();
currentState = STATE::NO_PROCESS_STATE;
}
}
void SplittableBzip2ReadBuffer::setupNoRandPartA()
{
if (su_i2 <= last)
{
su_chPrev = su_ch2;
Int32 su_ch2Shadow = data->ll8[su_tPos] & 0xff;
su_ch2 = su_ch2Shadow;
su_tPos = data->tt[su_tPos];
su_i2++;
currentChar = su_ch2Shadow;
currentState = STATE::NO_RAND_PART_B_STATE;
crc.updateCRC(su_ch2Shadow);
}
else
{
currentState = STATE::NO_RAND_PART_A_STATE;
endBlock();
currentState = STATE::NO_PROCESS_STATE;
}
}
void SplittableBzip2ReadBuffer::setupRandPartB()
{
if (su_ch2 != su_chPrev)
{
currentState = STATE::RAND_PART_A_STATE;
su_count = 1;
setupRandPartA();
}
else if (++su_count >= 4)
{
su_z = data->ll8[su_tPos] & 0xff;
su_tPos = data->tt[su_tPos];
if (su_rNToGo == 0)
{
su_rNToGo = BZip2Constants::rNums[su_rTPos] - 1;
if (++su_rTPos == 512)
su_rTPos = 0;
}
else
{
su_rNToGo--;
}
su_j2 = 0;
currentState = STATE::RAND_PART_C_STATE;
if (su_rNToGo == 1)
su_z ^= 1;
setupRandPartC();
}
else
{
currentState = STATE::RAND_PART_A_STATE;
setupRandPartA();
}
}
void SplittableBzip2ReadBuffer::setupRandPartC()
{
if (su_j2 < su_z)
{
currentChar = su_ch2;
crc.updateCRC(su_ch2);
su_j2++;
}
else
{
currentState = STATE::RAND_PART_A_STATE;
su_i2++;
su_count = 0;
setupRandPartA();
}
}
void SplittableBzip2ReadBuffer::setupNoRandPartB()
{
if (su_ch2 != su_chPrev)
{
su_count = 1;
setupNoRandPartA();
}
else if (++su_count >= 4)
{
su_z = data->ll8[su_tPos] & 0xff;
su_tPos = data->tt[su_tPos];
su_j2 = 0;
setupNoRandPartC();
}
else
{
setupNoRandPartA();
}
}
void SplittableBzip2ReadBuffer::setupNoRandPartC()
{
if (su_j2 < su_z)
{
Int32 su_ch2Shadow = su_ch2;
currentChar = su_ch2Shadow;
crc.updateCRC(su_ch2Shadow);
su_j2++;
currentState = STATE::NO_RAND_PART_C_STATE;
}
else
{
su_i2++;
su_count = 0;
setupNoRandPartA();
}
}
}
#endif