cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.cpp (880 lines of code) (raw):

/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "SplittableBzip2ReadBuffer.h" #if USE_BZIP2 #include <IO/SeekableReadBuffer.h> #include <IO/VarInt.h> #include <base/find_symbols.h> #include <Common/logger_useful.h> namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int POSITION_OUT_OF_BOUND; } std::vector<Int32> & SplittableBzip2ReadBuffer::Data::initTT(Int32 length) { if (tt.size() < static_cast<size_t>(length)) tt.resize(length); return tt; } template <typename T> std::string SplittableBzip2ReadBuffer::Data::arrayToString(const std::vector<T> & arr) { std::string result = "["; for (size_t i = 0; i < arr.size(); i++) { if (i) result += ", "; result += std::to_string(static_cast<Int32>(arr[i])); } result += "]"; return result; } template <typename T> std::string SplittableBzip2ReadBuffer::Data::array2DToString(T arr[BZip2Constants::N_GROUPS][BZip2Constants::MAX_ALPHA_SIZE]) { std::string result = "["; for (int i = 0; i < BZip2Constants::N_GROUPS; i++) { if (i) result += ", "; result += arrayToString(arr[i], BZip2Constants::MAX_ALPHA_SIZE); } result += "]"; return result; } template <typename T> std::string SplittableBzip2ReadBuffer::Data::arrayToString(const T * arr, size_t size) { std::string result = "["; for (size_t i = 0; i < size; i++) { if (i) result += ", "; if constexpr (std::is_same_v<T, bool>) result += arr[i] ? "true" : "false"; else result += std::to_string(static_cast<Int32>(arr[i])); } result += "]"; return result; } std::string SplittableBzip2ReadBuffer::Data::toString() { std::string result = "Data{"; result += "\ninUse=" + arrayToString(inUse, 256); result += "\nseqToUnseq=" + arrayToString(seqToUnseq, 256); result += "\nselector=" + arrayToString(selector, BZip2Constants::MAX_SELECTORS); result += "\nselectorMtf=" + arrayToString(selectorMtf, BZip2Constants::MAX_SELECTORS); result += "\nunzftab=" + arrayToString(unzftab, 256); result += "\nlimit=" + array2DToString(limit); result += "\nbase=" + array2DToString(base); result += "\nperm=" + array2DToString(perm); result += "\nminLens=" + arrayToString(minLens, BZip2Constants::N_GROUPS); result += "\ncftab=" + arrayToString(cftab, 257); result += "\ngetAndMoveToFrontDecode_yy=" + arrayToString(getAndMoveToFrontDecode_yy, 256); result += "\ntemp_charArray2d=" + array2DToString(temp_charArray2d); result += "\nrecvDecodingTables_pos=" + arrayToString(recvDecodingTables_pos, BZip2Constants::N_GROUPS); result += "\ntt=" + arrayToString(tt); result += "\nll8=" + arrayToString(ll8); result += "}"; return result; } void SplittableBzip2ReadBuffer::hbCreateDecodeTables( int * __restrict limit, int * __restrict base, int * __restrict perm, const UInt16 * __restrict length, int minLen, int maxLen, int alphaSize) { for (int i = minLen, pp = 0; i <= maxLen; i++) { for (int j = 0; j < alphaSize; j++) if (length[j] == i) perm[pp++] = j; } for (int i = BZip2Constants::MAX_CODE_LEN - 1; i > 0; --i) { base[i] = 0; limit[i] = 0; } for (int i = 0; i < alphaSize; i++) base[length[i] + 1]++; for (int i = 1, b = base[0]; i < BZip2Constants::MAX_CODE_LEN; i++) { b += base[i]; base[i] = b; } for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) { int nb = base[i + 1]; vec += nb - b; b = nb; limit[i] = vec - 1; vec <<= 1; } for (int i = minLen + 1; i <= maxLen; i++) base[i] = ((limit[i - 1] + 1) << 1) - base[i]; } SplittableBzip2ReadBuffer::SplittableBzip2ReadBuffer( std::unique_ptr<ReadBuffer> in_, bool first_block_need_special_process_, bool last_block_need_special_process_, size_t buf_size, char * existing_memory, size_t alignment) : CompressedReadBufferWrapper(std::move(in_), buf_size, existing_memory, alignment) , first_block_need_special_process(first_block_need_special_process_) , last_block_need_special_process(last_block_need_special_process_) , is_first_block(true) , blockSize100k(9) , currentState(STATE::NO_PROCESS_STATE) , skipResult(false) , currentChar(0) , storedBlockCRC(0) , blockRandomised(false) , data(nullptr) , computedBlockCRC(0) , storedCombinedCRC(0) , computedCombinedCRC(0) , origPtr(0) , nInUse(0) , bsBuff(0) , bsLive(0) , last(0) { auto * seekable = dynamic_cast<SeekableReadBuffer*>(in.get()); skipResult = skipToNextMarker(BLOCK_DELIMITER, DELIMITER_BIT_LENGTH); if (seekable && skipResult) { /// Update adjusted_start adjusted_start = seekable->getPosition(); } changeStateToProcessABlock(); LOG_DEBUG( getLogger("SplittableBzip2ReadBuffer"), "adjusted_start:{} first_block_need_special_process:{} last_block_need_special_process:{} buf_size:{}", *adjusted_start, first_block_need_special_process, last_block_need_special_process, buf_size); } Int32 SplittableBzip2ReadBuffer::read(char * dest, size_t dest_size, size_t offs, size_t len) { if (offs + len > dest_size) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "offs({}) + len({}) > dest_size({}).", offs, len, dest_size); const size_t hi = offs + len; size_t destOffs = offs; Int32 b = 0; for (; (destOffs < hi && (b = read0()) >= 0); ++destOffs) { dest[destOffs] = static_cast<char>(b); } Int32 result = static_cast<Int32>(destOffs - offs); if (result == 0) { result = b; skipResult = skipToNextMarker(SplittableBzip2ReadBuffer::BLOCK_DELIMITER, DELIMITER_BIT_LENGTH); changeStateToProcessABlock(); } return result; } bool SplittableBzip2ReadBuffer::nextImpl() { const Position dest = internal_buffer.begin(); const size_t dest_size = internal_buffer.size(); size_t offset = 0; if (last_block_need_special_process && !last_incomplete_line.empty()) { /// If we have last incomplete line, append it to the beginning of internal buffer memcpy(dest, last_incomplete_line.data(), last_incomplete_line.size()); offset += last_incomplete_line.size(); last_incomplete_line.clear(); } Int32 result; do { result = read(dest, dest_size, offset, dest_size - offset); if (result > 0) offset += result; else if (first_block_need_special_process && result == BZip2Constants::END_OF_BLOCK && is_first_block) { /// Special processing for the first block /// Notice that row delim could be \n (Unix) or \r\n (DOS/Windows) or \n\r (Mac OS Classic) is_first_block = false; Position end = dest + offset; auto * pos = find_last_symbols_or_null<'\n'>(dest, end); if (pos) { if (pos == end - 1 || (pos == end - 2 && *(pos + 1) == '\r')) { /// The last row ends with \n or \r\n or \n\r, discard all lines in internal buffer offset = 0; } else { /// The last row does not end with \n or \r\n or \n\r, rewrite the last row to internal buffer Position last_line = pos + 1; size_t last_line_size = end - pos - 1; if (*(pos + 1) == '\r') last_line_size--; memmove(dest, last_line, last_line_size); offset = last_line_size; } } LOG_DEBUG( getLogger("SplittableBzip2ReadBuffer"), "Header of first block after special processed:{}", std::string(dest, std::min(offset, 100UL))); } } while (result != BZip2Constants::END_OF_STREAM && offset < dest_size); if (last_block_need_special_process && offset) { /// Trim the last incomplete line from [dest, dest+offset), and record it in last_incomplete_line bool reach_eof = (result == BZip2Constants::END_OF_STREAM); if (reach_eof) { LOG_DEBUG( getLogger("SplittableBzip2ReadBuffer"), "Header of last block before special processed:{}", std::string(dest, std::min(offset, 100UL))); } /// Trim the last incomplete line from [dest, dest+offset), and record it in last_incomplete_line Position end = dest + offset; auto * pos = find_last_symbols_or_null<'\n'>(dest, end); if (!pos) { if (reach_eof) offset = 0; else throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't find row delimiter in working buffer with size:{}", offset); } else { /// Discard the last incomplete row(if has), and record it in last_incomplete_line size_t old_offset = offset; offset = pos - dest + 1; if (pos + 1 < end && *(pos + 1) == '\r') offset++; if (!reach_eof) { /// Only record last incomplete line when eof not reached last_incomplete_line.assign(&dest[offset], old_offset - offset); } } } if (offset) { working_buffer.resize(offset); return true; } else return false; } Int32 SplittableBzip2ReadBuffer::read0() { Int32 retChar = currentChar; switch (currentState) { case STATE::END_OF_FILE: return BZip2Constants::END_OF_STREAM; case STATE::NO_PROCESS_STATE: return BZip2Constants::END_OF_BLOCK; case STATE::START_BLOCK_STATE: throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong state {}", magic_enum::enum_name(currentState)); case STATE::RAND_PART_A_STATE: throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong state {}", magic_enum::enum_name(currentState)); case STATE::RAND_PART_B_STATE: setupRandPartB(); break; case STATE::RAND_PART_C_STATE: setupRandPartC(); break; case STATE::NO_RAND_PART_A_STATE: throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong state {}", magic_enum::enum_name(currentState)); case STATE::NO_RAND_PART_B_STATE: setupNoRandPartB(); break; case STATE::NO_RAND_PART_C_STATE: setupNoRandPartC(); break; } return retChar; } Int32 SplittableBzip2ReadBuffer::readAByte(ReadBuffer & in_) { char c; if (in_.read(c)) return static_cast<Int32>(c) & 0xff; else return -1; } bool SplittableBzip2ReadBuffer::skipToNextMarker(Int64 marker, Int32 markerBitLength, ReadBuffer & in_, Int64 & bsBuff_, Int64 & bsLive_) { try { if (markerBitLength > 63) throw Exception(ErrorCodes::LOGICAL_ERROR, "skipToNextMarker can not find patterns greater than 63 bits"); Int64 bytes = bsR(markerBitLength, in_, bsBuff_, bsLive_); if (bytes == -1) { return false; } while (true) { if (bytes == marker) { return true; } else { bytes = bytes << 1; bytes = bytes & ((1L << markerBitLength) - 1); Int32 oneBit = static_cast<Int32>(bsR(1, in_, bsBuff_, bsLive_)); if (oneBit != -1) { bytes = bytes | oneBit; } else { return false; } } } } catch (const Exception &) { return false; } } bool SplittableBzip2ReadBuffer::skipToNextMarker(Int64 marker, Int32 markerBitLength) { return skipToNextMarker(marker, markerBitLength, *in, bsBuff, bsLive); } void SplittableBzip2ReadBuffer::reportCRCError() { auto * seekable = dynamic_cast<SeekableReadBuffer*>(in.get()); throw Exception( ErrorCodes::LOGICAL_ERROR, "CRC error in position:{} computedBlockCRC:{} storedBlockCRC:{}", seekable->getPosition(), computedBlockCRC, storedBlockCRC); } void SplittableBzip2ReadBuffer::makeMaps() { Int32 nInUseShadow = 0; for (Int32 i = 0; i < 256; i++) if (data->inUse[i]) data->seqToUnseq[nInUseShadow++] = i; nInUse = nInUseShadow; } void SplittableBzip2ReadBuffer::changeStateToProcessABlock() { if (skipResult == true) { initBlock(); setupBlock(); } else { currentState = STATE::END_OF_FILE; } } void SplittableBzip2ReadBuffer::initBlock() { storedBlockCRC = bsGetInt(); blockRandomised = (bsR(1) == 1); if (!data) data = std::make_unique<Data>(blockSize100k); getAndMoveToFrontDecode(); crc.initialiseCRC(); currentState = STATE::START_BLOCK_STATE; } void SplittableBzip2ReadBuffer::endBlock() { computedBlockCRC = crc.getFinalCRC(); if (storedBlockCRC != computedBlockCRC) { computedCombinedCRC = (storedCombinedCRC << 1) | (static_cast<UInt32>(storedCombinedCRC) >> 31); computedCombinedCRC ^= storedBlockCRC; reportCRCError(); } computedCombinedCRC = (computedCombinedCRC << 1) | (static_cast<UInt32>(computedCombinedCRC) >> 31); computedCombinedCRC ^= computedBlockCRC; } void SplittableBzip2ReadBuffer::complete() { storedCombinedCRC = bsGetInt(); currentState = STATE::END_OF_FILE; data = nullptr; if (storedCombinedCRC != computedCombinedCRC) reportCRCError(); } Int64 SplittableBzip2ReadBuffer::bsR(Int64 n, ReadBuffer & in_, Int64 & bsBuff_, Int64 & bsLive_) { Int64 bsLiveShadow = bsLive_; Int64 bsBuffShadow = bsBuff_; if (bsLiveShadow < n) { do { Int32 thech = readAByte(in_); if (thech < 0) DB::throwReadAfterEOF(); bsBuffShadow = (bsBuffShadow << 8) | thech; bsLiveShadow += 8; } while (bsLiveShadow < n); bsBuff_ = bsBuffShadow; } bsLive_ = bsLiveShadow - n; return (bsBuffShadow >> (bsLiveShadow - n)) & ((1L << n) - 1); } Int64 SplittableBzip2ReadBuffer::bsR(Int64 n) { return bsR(n, *in, bsBuff, bsLive); } bool SplittableBzip2ReadBuffer::bsGetBit() { Int64 bsLiveShadow = bsLive; Int64 bsBuffShadow = bsBuff; if (bsLiveShadow < 1) { Int32 thech = readAByte(*in); if (thech < 0) DB::throwReadAfterEOF(); bsBuffShadow = (bsBuffShadow << 8) | thech; bsLiveShadow += 8; bsBuff = bsBuffShadow; } bsLive = bsLiveShadow - 1; return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0; } void SplittableBzip2ReadBuffer::recvDecodingTables() { Data * dataShadow = data.get(); bool * inUse = dataShadow->inUse; char * pos = dataShadow->recvDecodingTables_pos; char * selector = dataShadow->selector; char * selectorMtf = dataShadow->selectorMtf; Int32 inUse16 = 0; for (Int32 i = 0; i < 16; ++i) if (bsGetBit()) inUse16 |= 1 << i; for (Int32 i = 255; i >= 0; --i) inUse[i] = false; for (Int32 i = 0; i < 16; ++i) { if ((inUse16 & (1 << i)) != 0) { Int32 i16 = i << 4; for (Int32 j = 0; j < 16; j++) if (bsGetBit()) inUse[i16 + j] = true; } } makeMaps(); Int32 alphaSize = nInUse + 2; Int32 nGroups = static_cast<Int32>(bsR(3)); Int32 nSelectors = static_cast<Int32>(bsR(15)); for (Int32 i = 0; i < nSelectors; ++i) { Int32 j = 0; while (bsGetBit()) j++; selectorMtf[i] = j; } for (Int32 v = nGroups - 1; v >= 0; --v) pos[v] = v; for (Int32 i = 0; i < nSelectors; ++i) { Int32 v = selectorMtf[i] & 0xff; char tmp = pos[v]; while (v > 0) { pos[v] = pos[v - 1]; v--; } pos[0] = tmp; selector[i] = tmp; } auto * len = dataShadow->temp_charArray2d; for (Int32 t = 0; t < nGroups; t++) { Int32 curr = static_cast<Int32>(bsR(5)); auto * len_t = len[t]; for (Int32 i = 0; i < alphaSize; i++) { while (bsGetBit()) curr += bsGetBit() ? -1 : 1; len_t[i] = curr; } } createHuffmanDecodingTables(alphaSize, nGroups); } void SplittableBzip2ReadBuffer::createHuffmanDecodingTables(Int32 alphaSize, Int32 nGroups) { Data * dataShadow = data.get(); auto * len = dataShadow->temp_charArray2d; auto * minLens = dataShadow->minLens; auto * limit = dataShadow->limit; auto * base = dataShadow->base; auto * perm = dataShadow->perm; for (Int32 t = 0; t < nGroups; t++) { Int32 minLen = 32; Int32 maxLen = 0; auto * len_t = len[t]; for (Int32 i = alphaSize - 1; i >= 0; --i) { Int32 lent = len_t[i]; if (lent > maxLen) maxLen = lent; if (lent < minLen) minLen = lent; } hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, maxLen, alphaSize); minLens[t] = minLen; } } void SplittableBzip2ReadBuffer::getAndMoveToFrontDecode() { origPtr = static_cast<Int32>(bsR(24)); recvDecodingTables(); ReadBuffer * inShadow = in.get(); Data * dataShadow = data.get(); auto & ll8 = dataShadow->ll8; Int32 * unzftab = dataShadow->unzftab; char * selector = dataShadow->selector; auto * seqToUnseq = dataShadow->seqToUnseq; auto * yy = dataShadow->getAndMoveToFrontDecode_yy; Int32 * minLens = dataShadow->minLens; auto * limit = dataShadow->limit; auto * base = dataShadow->base; auto * perm = dataShadow->perm; Int32 limitLast = blockSize100k * 100000; for (Int32 i = 256; --i >= 0;) { yy[i] = i; unzftab[i] = 0; } Int32 groupNo = 0; Int32 groupPos = BZip2Constants::G_SIZE - 1; Int32 eob = nInUse + 1; Int32 nextSym = getAndMoveToFrontDecode0(0); Int32 bsBuffShadow = static_cast<Int32>(bsBuff); Int32 bsLiveShadow = static_cast<Int32>(bsLive); Int32 lastShadow = -1; Int32 zt = selector[groupNo] & 0xff; Int32 * base_zt = base[zt]; Int32 * limit_zt = limit[zt]; Int32 * perm_zt = perm[zt]; Int32 minLens_zt = minLens[zt]; while (nextSym != eob) { if ((nextSym == BZip2Constants::RUNA) || (nextSym == BZip2Constants::RUNB)) { Int32 s = -1; for (Int32 n = 1; true; n <<= 1) { if (nextSym == BZip2Constants::RUNA) s += n; else if (nextSym == BZip2Constants::RUNB) s += n << 1; else break; if (groupPos == 0) { groupPos = BZip2Constants::G_SIZE - 1; zt = selector[++groupNo] & 0xff; base_zt = base[zt]; limit_zt = limit[zt]; perm_zt = perm[zt]; minLens_zt = minLens[zt]; } else { groupPos--; } Int32 zn = minLens_zt; while (bsLiveShadow < zn) { Int32 thech = readAByte(*inShadow); if (thech < 0) DB::throwReadAfterEOF(); bsBuffShadow = (bsBuffShadow << 8) | thech; bsLiveShadow += 8; } Int64 zvec = (bsBuffShadow >> (bsLiveShadow - zn)) & ((1 << zn) - 1); bsLiveShadow -= zn; while (zvec > limit_zt[zn]) { zn++; while (bsLiveShadow < 1) { Int32 thech = readAByte(*inShadow); if (thech < 0) DB::throwReadAfterEOF(); bsBuffShadow = (bsBuffShadow << 8) | thech; bsLiveShadow += 8; } bsLiveShadow--; zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); } nextSym = perm_zt[static_cast<Int32>(zvec - base_zt[zn])]; } char ch = seqToUnseq[yy[0]]; unzftab[ch & 0xff] += s + 1; while (s-- >= 0) ll8[++lastShadow] = ch; if (lastShadow >= limitLast) throw Exception(ErrorCodes::LOGICAL_ERROR, "Block overrun"); } else { if (++lastShadow >= limitLast) throw Exception(ErrorCodes::LOGICAL_ERROR, "Block overrun"); auto tmp = yy[nextSym - 1]; unzftab[seqToUnseq[tmp] & 0xff]++; ll8[lastShadow] = seqToUnseq[tmp]; if (nextSym <= 16) for (Int32 j = nextSym - 1; j > 0; --j) yy[j] = yy[j - 1]; else memmove(&yy[1], &yy[0], (nextSym - 1) * sizeof(yy[0])); yy[0] = tmp; if (groupPos == 0) { groupPos = BZip2Constants::G_SIZE - 1; zt = selector[++groupNo] & 0xff; base_zt = base[zt]; limit_zt = limit[zt]; perm_zt = perm[zt]; minLens_zt = minLens[zt]; } else { groupPos--; } Int32 zn = minLens_zt; while (bsLiveShadow < zn) { Int32 thech = readAByte(*inShadow); if (thech < 0) DB::throwReadAfterEOF(); bsBuffShadow = (bsBuffShadow << 8) | thech; bsLiveShadow += 8; } Int32 zvec = (bsBuffShadow >> (bsLiveShadow - zn)) & ((1 << zn) - 1); bsLiveShadow -= zn; while (zvec > limit_zt[zn]) { zn++; while (bsLiveShadow < 1) { Int32 thech = readAByte(*inShadow); if (thech < 0) DB::throwReadAfterEOF(); bsBuffShadow = (bsBuffShadow << 8) | thech; bsLiveShadow += 8; } bsLiveShadow--; zvec = ((zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1)); } nextSym = perm_zt[zvec - base_zt[zn]]; } } last = lastShadow; bsLive = bsLiveShadow; bsBuff = bsBuffShadow; } Int32 SplittableBzip2ReadBuffer::getAndMoveToFrontDecode0(Int32 groupNo) { ReadBuffer * inShadow = in.get(); Data * dataShadow = data.get(); Int32 zt = dataShadow->selector[groupNo] & 0xff; Int32 * limit_zt = dataShadow->limit[zt]; Int32 zn = dataShadow->minLens[zt]; Int32 zvec = static_cast<Int32>(bsR(zn)); Int32 bsLiveShadow = static_cast<Int32>(bsLive); Int32 bsBuffShadow = static_cast<Int32>(bsBuff); while (zvec > limit_zt[zn]) { zn++; while (bsLiveShadow < 1) { Int32 thech = readAByte(*inShadow); if (thech < 0) DB::throwReadAfterEOF(); bsBuffShadow = (bsBuffShadow << 8) | thech; bsLiveShadow += 8; } bsLiveShadow--; zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); } bsLive = bsLiveShadow; bsBuff = bsBuffShadow; return dataShadow->perm[zt][zvec - dataShadow->base[zt][zn]]; } void SplittableBzip2ReadBuffer::setupBlock() { if (!data) return; Int32 * cftab = data->cftab; std::vector<Int32> & tt = data->initTT(last + 1); auto & ll8 = data->ll8; cftab[0] = 0; memcpy(&cftab[1], &data->unzftab[0], 256 * sizeof(cftab[0])); for (Int32 i = 1, c = cftab[0]; i <= 256; i++) { c += cftab[i]; cftab[i] = c; } for (Int32 i = 0, lastShadow = last; i <= lastShadow; i++) tt[cftab[ll8[i] & 0xff]++] = i; if (origPtr < 0 || static_cast<size_t>(origPtr) >= tt.size()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Stream corrupted"); su_tPos = tt[origPtr]; su_count = 0; su_i2 = 0; su_ch2 = 256; if (blockRandomised) { su_rNToGo = 0; su_rTPos = 0; setupRandPartA(); } else { setupNoRandPartA(); } } void SplittableBzip2ReadBuffer::setupRandPartA() { if (su_i2 <= last) { su_chPrev = su_ch2; Int32 su_ch2Shadow = data->ll8[su_tPos] & 0xff; su_tPos = data->tt[su_tPos]; if (su_rNToGo == 0) { su_rNToGo = BZip2Constants::rNums[su_rTPos] - 1; if (++su_rTPos == 512) su_rTPos = 0; } else { su_rNToGo--; } su_ch2 = ((su_ch2Shadow ^= (su_rNToGo == 1)) ? 1 : 0); su_i2++; currentChar = su_ch2Shadow; currentState = STATE::RAND_PART_B_STATE; crc.updateCRC(su_ch2Shadow); } else { endBlock(); currentState = STATE::NO_PROCESS_STATE; } } void SplittableBzip2ReadBuffer::setupNoRandPartA() { if (su_i2 <= last) { su_chPrev = su_ch2; Int32 su_ch2Shadow = data->ll8[su_tPos] & 0xff; su_ch2 = su_ch2Shadow; su_tPos = data->tt[su_tPos]; su_i2++; currentChar = su_ch2Shadow; currentState = STATE::NO_RAND_PART_B_STATE; crc.updateCRC(su_ch2Shadow); } else { currentState = STATE::NO_RAND_PART_A_STATE; endBlock(); currentState = STATE::NO_PROCESS_STATE; } } void SplittableBzip2ReadBuffer::setupRandPartB() { if (su_ch2 != su_chPrev) { currentState = STATE::RAND_PART_A_STATE; su_count = 1; setupRandPartA(); } else if (++su_count >= 4) { su_z = data->ll8[su_tPos] & 0xff; su_tPos = data->tt[su_tPos]; if (su_rNToGo == 0) { su_rNToGo = BZip2Constants::rNums[su_rTPos] - 1; if (++su_rTPos == 512) su_rTPos = 0; } else { su_rNToGo--; } su_j2 = 0; currentState = STATE::RAND_PART_C_STATE; if (su_rNToGo == 1) su_z ^= 1; setupRandPartC(); } else { currentState = STATE::RAND_PART_A_STATE; setupRandPartA(); } } void SplittableBzip2ReadBuffer::setupRandPartC() { if (su_j2 < su_z) { currentChar = su_ch2; crc.updateCRC(su_ch2); su_j2++; } else { currentState = STATE::RAND_PART_A_STATE; su_i2++; su_count = 0; setupRandPartA(); } } void SplittableBzip2ReadBuffer::setupNoRandPartB() { if (su_ch2 != su_chPrev) { su_count = 1; setupNoRandPartA(); } else if (++su_count >= 4) { su_z = data->ll8[su_tPos] & 0xff; su_tPos = data->tt[su_tPos]; su_j2 = 0; setupNoRandPartC(); } else { setupNoRandPartA(); } } void SplittableBzip2ReadBuffer::setupNoRandPartC() { if (su_j2 < su_z) { Int32 su_ch2Shadow = su_ch2; currentChar = su_ch2Shadow; crc.updateCRC(su_ch2Shadow); su_j2++; currentState = STATE::NO_RAND_PART_C_STATE; } else { su_i2++; su_count = 0; setupNoRandPartA(); } } } #endif