public ParquetMetadata readParquetMetadata()

in parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java [1553:1640]


  public ParquetMetadata readParquetMetadata(
      final InputStream fromInputStream,
      MetadataFilter filter,
      final InternalFileDecryptor fileDecryptor,
      final boolean encryptedFooter,
      final int combinedFooterLength)
      throws IOException {

    final BlockCipher.Decryptor footerDecryptor = (encryptedFooter ? fileDecryptor.fetchFooterDecryptor() : null);
    final byte[] encryptedFooterAAD =
        (encryptedFooter ? AesCipher.createFooterAAD(fileDecryptor.getFileAAD()) : null);

    // Mark the beginning of the footer for verifyFooterIntegrity
    final InputStream from;
    if (fileDecryptor != null && fileDecryptor.checkFooterIntegrity()) {
      // fromInputStream should already support marking but let's be on the safe side
      if (!fromInputStream.markSupported()) {
        from = new BufferedInputStream(fromInputStream, combinedFooterLength);
      } else {
        from = fromInputStream;
      }
      from.mark(combinedFooterLength);
    } else {
      from = fromInputStream;
    }

    FileMetaDataAndRowGroupOffsetInfo fileMetaDataAndRowGroupInfo =
        filter.accept(new MetadataFilterVisitor<FileMetaDataAndRowGroupOffsetInfo, IOException>() {
          @Override
          public FileMetaDataAndRowGroupOffsetInfo visit(NoFilter filter) throws IOException {
            FileMetaData fileMetadata = readFileMetaData(from, footerDecryptor, encryptedFooterAAD);
            return new FileMetaDataAndRowGroupOffsetInfo(
                fileMetadata, generateRowGroupOffsets(fileMetadata));
          }

          @Override
          public FileMetaDataAndRowGroupOffsetInfo visit(SkipMetadataFilter filter) throws IOException {
            FileMetaData fileMetadata = readFileMetaData(from, true, footerDecryptor, encryptedFooterAAD);
            return new FileMetaDataAndRowGroupOffsetInfo(
                fileMetadata, generateRowGroupOffsets(fileMetadata));
          }

          @Override
          public FileMetaDataAndRowGroupOffsetInfo visit(OffsetMetadataFilter filter) throws IOException {
            FileMetaData fileMetadata = readFileMetaData(from, footerDecryptor, encryptedFooterAAD);
            // We must generate the map *before* filtering because it modifies `fileMetadata`.
            Map<RowGroup, Long> rowGroupToRowIndexOffsetMap = generateRowGroupOffsets(fileMetadata);
            FileMetaData filteredFileMetadata = filterFileMetaDataByStart(fileMetadata, filter);
            return new FileMetaDataAndRowGroupOffsetInfo(filteredFileMetadata, rowGroupToRowIndexOffsetMap);
          }

          @Override
          public FileMetaDataAndRowGroupOffsetInfo visit(RangeMetadataFilter filter) throws IOException {
            FileMetaData fileMetadata = readFileMetaData(from, footerDecryptor, encryptedFooterAAD);
            // We must generate the map *before* filtering because it modifies `fileMetadata`.
            Map<RowGroup, Long> rowGroupToRowIndexOffsetMap = generateRowGroupOffsets(fileMetadata);
            FileMetaData filteredFileMetadata = filterFileMetaDataByMidpoint(fileMetadata, filter);
            return new FileMetaDataAndRowGroupOffsetInfo(filteredFileMetadata, rowGroupToRowIndexOffsetMap);
          }
        });
    FileMetaData fileMetaData = fileMetaDataAndRowGroupInfo.fileMetadata;
    Map<RowGroup, Long> rowGroupToRowIndexOffsetMap = fileMetaDataAndRowGroupInfo.rowGroupToRowIndexOffsetMap;
    LOG.debug("{}", fileMetaData);

    if (!encryptedFooter && null != fileDecryptor) {
      if (!fileMetaData.isSetEncryption_algorithm()) { // Plaintext file
        fileDecryptor.setPlaintextFile();
        // Done to detect files that were not encrypted by mistake
        if (!fileDecryptor.plaintextFilesAllowed()) {
          throw new ParquetCryptoRuntimeException("Applying decryptor on plaintext file");
        }
      } else { // Encrypted file with plaintext footer
        // if no fileDecryptor, can still read plaintext columns
        fileDecryptor.setFileCryptoMetaData(
            fileMetaData.getEncryption_algorithm(), false, fileMetaData.getFooter_signing_key_metadata());
        if (fileDecryptor.checkFooterIntegrity()) {
          verifyFooterIntegrity(from, fileDecryptor, combinedFooterLength);
        }
      }
    }

    ParquetMetadata parquetMetadata =
        fromParquetMetadata(fileMetaData, fileDecryptor, encryptedFooter, rowGroupToRowIndexOffsetMap);
    if (LOG.isDebugEnabled()) {
      LOG.debug(ParquetMetadata.toPrettyJSON(parquetMetadata));
    }
    return parquetMetadata;
  }