in parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java [1281:1366]
public void writeDataPageV2(
int rowCount,
int nullCount,
int valueCount,
BytesInput repetitionLevels,
BytesInput definitionLevels,
Encoding dataEncoding,
BytesInput bytes,
boolean compressed,
int uncompressedDataSize,
Statistics<?> statistics,
BlockCipher.Encryptor metadataBlockEncryptor,
byte[] pageHeaderAAD,
SizeStatistics sizeStatistics)
throws IOException {
state = state.write();
int rlByteLength = toIntWithCheck(repetitionLevels.size(), "page repetition levels");
int dlByteLength = toIntWithCheck(definitionLevels.size(), "page definition levels");
int compressedSize = toIntWithCheck(bytes.size() + repetitionLevels.size() + definitionLevels.size(), "page");
int uncompressedSize =
toIntWithCheck(uncompressedDataSize + repetitionLevels.size() + definitionLevels.size(), "page");
long beforeHeader = out.getPos();
if (currentChunkFirstDataPage < 0) {
currentChunkFirstDataPage = beforeHeader;
}
if (pageWriteChecksumEnabled) {
crc.reset();
if (repetitionLevels.size() > 0) {
crcUpdate(repetitionLevels);
}
if (definitionLevels.size() > 0) {
crcUpdate(definitionLevels);
}
if (bytes.size() > 0) {
crcUpdate(bytes);
}
metadataConverter.writeDataPageV2Header(
uncompressedSize,
compressedSize,
valueCount,
nullCount,
rowCount,
dataEncoding,
rlByteLength,
dlByteLength,
compressed,
(int) crc.getValue(),
out,
metadataBlockEncryptor,
pageHeaderAAD);
} else {
metadataConverter.writeDataPageV2Header(
uncompressedSize,
compressedSize,
valueCount,
nullCount,
rowCount,
dataEncoding,
rlByteLength,
dlByteLength,
compressed,
out,
metadataBlockEncryptor,
pageHeaderAAD);
}
long headersSize = out.getPos() - beforeHeader;
this.uncompressedLength += uncompressedSize + headersSize;
this.compressedLength += compressedSize + headersSize;
mergeColumnStatistics(statistics, sizeStatistics);
currentEncodings.add(dataEncoding);
encodingStatsBuilder.addDataEncoding(dataEncoding);
BytesInput.concat(repetitionLevels, definitionLevels, bytes).writeAllTo(out);
offsetIndexBuilder.add(
toIntWithCheck(out.getPos() - beforeHeader, "page"),
rowCount,
sizeStatistics != null ? sizeStatistics.getUnencodedByteArrayDataBytes() : Optional.empty());
}