in common/src/main/java/org/apache/comet/parquet/ColumnPageReader.java [110:223]
public DataPage readPage() {
final DataPage compressedPage = compressedPages.poll();
if (compressedPage == null) {
return null;
}
final int currentPageIndex = pageIndex++;
if (null != blockDecryptor) {
AesCipher.quickUpdatePageAAD(dataPageAAD, getPageOrdinal(currentPageIndex));
}
return compressedPage.accept(
new DataPage.Visitor<DataPage>() {
@Override
public DataPage visit(DataPageV1 dataPageV1) {
try {
BytesInput bytes = dataPageV1.getBytes();
if (null != blockDecryptor) {
bytes = BytesInput.from(blockDecryptor.decrypt(bytes.toByteArray(), dataPageAAD));
}
BytesInput decompressed =
decompressor.decompress(bytes, dataPageV1.getUncompressedSize());
final DataPageV1 decompressedPage;
if (offsetIndex == null) {
decompressedPage =
new DataPageV1(
decompressed,
dataPageV1.getValueCount(),
dataPageV1.getUncompressedSize(),
dataPageV1.getStatistics(),
dataPageV1.getRlEncoding(),
dataPageV1.getDlEncoding(),
dataPageV1.getValueEncoding());
} else {
long firstRowIndex = offsetIndex.getFirstRowIndex(currentPageIndex);
decompressedPage =
new DataPageV1(
decompressed,
dataPageV1.getValueCount(),
dataPageV1.getUncompressedSize(),
firstRowIndex,
Math.toIntExact(
offsetIndex.getLastRowIndex(currentPageIndex, rowCount)
- firstRowIndex
+ 1),
dataPageV1.getStatistics(),
dataPageV1.getRlEncoding(),
dataPageV1.getDlEncoding(),
dataPageV1.getValueEncoding());
}
if (dataPageV1.getCrc().isPresent()) {
decompressedPage.setCrc(dataPageV1.getCrc().getAsInt());
}
return decompressedPage;
} catch (IOException e) {
throw new ParquetDecodingException("could not decompress page", e);
}
}
@Override
public DataPage visit(DataPageV2 dataPageV2) {
if (!dataPageV2.isCompressed() && offsetIndex == null && null == blockDecryptor) {
return dataPageV2;
}
BytesInput pageBytes = dataPageV2.getData();
if (null != blockDecryptor) {
try {
pageBytes =
BytesInput.from(blockDecryptor.decrypt(pageBytes.toByteArray(), dataPageAAD));
} catch (IOException e) {
throw new ParquetDecodingException(
"could not convert page ByteInput to byte array", e);
}
}
if (dataPageV2.isCompressed()) {
int uncompressedSize =
Math.toIntExact(
dataPageV2.getUncompressedSize()
- dataPageV2.getDefinitionLevels().size()
- dataPageV2.getRepetitionLevels().size());
try {
pageBytes = decompressor.decompress(pageBytes, uncompressedSize);
} catch (IOException e) {
throw new ParquetDecodingException("could not decompress page", e);
}
}
if (offsetIndex == null) {
return DataPageV2.uncompressed(
dataPageV2.getRowCount(),
dataPageV2.getNullCount(),
dataPageV2.getValueCount(),
dataPageV2.getRepetitionLevels(),
dataPageV2.getDefinitionLevels(),
dataPageV2.getDataEncoding(),
pageBytes,
dataPageV2.getStatistics());
} else {
return DataPageV2.uncompressed(
dataPageV2.getRowCount(),
dataPageV2.getNullCount(),
dataPageV2.getValueCount(),
offsetIndex.getFirstRowIndex(currentPageIndex),
dataPageV2.getRepetitionLevels(),
dataPageV2.getDefinitionLevels(),
dataPageV2.getDataEncoding(),
pageBytes,
dataPageV2.getStatistics());
}
}
});
}