in parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java [1553:1640]
public ParquetMetadata readParquetMetadata(
final InputStream fromInputStream,
MetadataFilter filter,
final InternalFileDecryptor fileDecryptor,
final boolean encryptedFooter,
final int combinedFooterLength)
throws IOException {
final BlockCipher.Decryptor footerDecryptor = (encryptedFooter ? fileDecryptor.fetchFooterDecryptor() : null);
final byte[] encryptedFooterAAD =
(encryptedFooter ? AesCipher.createFooterAAD(fileDecryptor.getFileAAD()) : null);
// Mark the beginning of the footer for verifyFooterIntegrity
final InputStream from;
if (fileDecryptor != null && fileDecryptor.checkFooterIntegrity()) {
// fromInputStream should already support marking but let's be on the safe side
if (!fromInputStream.markSupported()) {
from = new BufferedInputStream(fromInputStream, combinedFooterLength);
} else {
from = fromInputStream;
}
from.mark(combinedFooterLength);
} else {
from = fromInputStream;
}
FileMetaDataAndRowGroupOffsetInfo fileMetaDataAndRowGroupInfo =
filter.accept(new MetadataFilterVisitor<FileMetaDataAndRowGroupOffsetInfo, IOException>() {
@Override
public FileMetaDataAndRowGroupOffsetInfo visit(NoFilter filter) throws IOException {
FileMetaData fileMetadata = readFileMetaData(from, footerDecryptor, encryptedFooterAAD);
return new FileMetaDataAndRowGroupOffsetInfo(
fileMetadata, generateRowGroupOffsets(fileMetadata));
}
@Override
public FileMetaDataAndRowGroupOffsetInfo visit(SkipMetadataFilter filter) throws IOException {
FileMetaData fileMetadata = readFileMetaData(from, true, footerDecryptor, encryptedFooterAAD);
return new FileMetaDataAndRowGroupOffsetInfo(
fileMetadata, generateRowGroupOffsets(fileMetadata));
}
@Override
public FileMetaDataAndRowGroupOffsetInfo visit(OffsetMetadataFilter filter) throws IOException {
FileMetaData fileMetadata = readFileMetaData(from, footerDecryptor, encryptedFooterAAD);
// We must generate the map *before* filtering because it modifies `fileMetadata`.
Map<RowGroup, Long> rowGroupToRowIndexOffsetMap = generateRowGroupOffsets(fileMetadata);
FileMetaData filteredFileMetadata = filterFileMetaDataByStart(fileMetadata, filter);
return new FileMetaDataAndRowGroupOffsetInfo(filteredFileMetadata, rowGroupToRowIndexOffsetMap);
}
@Override
public FileMetaDataAndRowGroupOffsetInfo visit(RangeMetadataFilter filter) throws IOException {
FileMetaData fileMetadata = readFileMetaData(from, footerDecryptor, encryptedFooterAAD);
// We must generate the map *before* filtering because it modifies `fileMetadata`.
Map<RowGroup, Long> rowGroupToRowIndexOffsetMap = generateRowGroupOffsets(fileMetadata);
FileMetaData filteredFileMetadata = filterFileMetaDataByMidpoint(fileMetadata, filter);
return new FileMetaDataAndRowGroupOffsetInfo(filteredFileMetadata, rowGroupToRowIndexOffsetMap);
}
});
FileMetaData fileMetaData = fileMetaDataAndRowGroupInfo.fileMetadata;
Map<RowGroup, Long> rowGroupToRowIndexOffsetMap = fileMetaDataAndRowGroupInfo.rowGroupToRowIndexOffsetMap;
LOG.debug("{}", fileMetaData);
if (!encryptedFooter && null != fileDecryptor) {
if (!fileMetaData.isSetEncryption_algorithm()) { // Plaintext file
fileDecryptor.setPlaintextFile();
// Done to detect files that were not encrypted by mistake
if (!fileDecryptor.plaintextFilesAllowed()) {
throw new ParquetCryptoRuntimeException("Applying decryptor on plaintext file");
}
} else { // Encrypted file with plaintext footer
// if no fileDecryptor, can still read plaintext columns
fileDecryptor.setFileCryptoMetaData(
fileMetaData.getEncryption_algorithm(), false, fileMetaData.getFooter_signing_key_metadata());
if (fileDecryptor.checkFooterIntegrity()) {
verifyFooterIntegrity(from, fileDecryptor, combinedFooterLength);
}
}
}
ParquetMetadata parquetMetadata =
fromParquetMetadata(fileMetaData, fileDecryptor, encryptedFooter, rowGroupToRowIndexOffsetMap);
if (LOG.isDebugEnabled()) {
LOG.debug(ParquetMetadata.toPrettyJSON(parquetMetadata));
}
return parquetMetadata;
}