public ParquetMetadata fromParquetMetadata()

in parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java [1669:1816]


  public ParquetMetadata fromParquetMetadata(
      FileMetaData parquetMetadata,
      InternalFileDecryptor fileDecryptor,
      boolean encryptedFooter,
      Map<RowGroup, Long> rowGroupToRowIndexOffsetMap)
      throws IOException {
    MessageType messageType = fromParquetSchema(parquetMetadata.getSchema(), parquetMetadata.getColumn_orders());
    List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
    List<RowGroup> row_groups = parquetMetadata.getRow_groups();

    if (row_groups != null) {
      for (RowGroup rowGroup : row_groups) {
        BlockMetaData blockMetaData = new BlockMetaData();
        blockMetaData.setRowCount(rowGroup.getNum_rows());
        blockMetaData.setTotalByteSize(rowGroup.getTotal_byte_size());
        if (rowGroupToRowIndexOffsetMap.containsKey(rowGroup)) {
          blockMetaData.setRowIndexOffset(rowGroupToRowIndexOffsetMap.get(rowGroup));
        }
        // not set in legacy files
        if (rowGroup.isSetOrdinal()) {
          blockMetaData.setOrdinal(rowGroup.getOrdinal());
        }
        List<ColumnChunk> columns = rowGroup.getColumns();
        String filePath = columns.get(0).getFile_path();
        int columnOrdinal = -1;
        for (ColumnChunk columnChunk : columns) {
          columnOrdinal++;
          if ((filePath == null && columnChunk.getFile_path() != null)
              || (filePath != null && !filePath.equals(columnChunk.getFile_path()))) {
            throw new ParquetDecodingException(
                "all column chunks of the same row group must be in the same file for now");
          }
          ColumnMetaData metaData = columnChunk.meta_data;
          ColumnCryptoMetaData cryptoMetaData = columnChunk.getCrypto_metadata();
          ColumnChunkMetaData column = null;
          ColumnPath columnPath = null;
          boolean lazyMetadataDecryption = false;

          if (null == cryptoMetaData) { // Plaintext column
            columnPath = getPath(metaData);
            if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
              // mark this column as plaintext in encrypted file decryptor
              fileDecryptor.setColumnCryptoMetadata(
                  columnPath, false, false, (byte[]) null, columnOrdinal);
            }
          } else { // Encrypted column
            boolean encryptedWithFooterKey = cryptoMetaData.isSetENCRYPTION_WITH_FOOTER_KEY();
            if (encryptedWithFooterKey) { // Column encrypted with footer key
              if (null == fileDecryptor) {
                throw new ParquetCryptoRuntimeException(
                    "Column encrypted with footer key: No keys available");
              }
              if (null == metaData) {
                throw new ParquetCryptoRuntimeException(
                    "ColumnMetaData not set in Encryption with Footer key");
              }
              columnPath = getPath(metaData);
              if (!encryptedFooter) { // Unencrypted footer. Decrypt full column metadata, using footer
                // key
                ByteArrayInputStream tempInputStream =
                    new ByteArrayInputStream(columnChunk.getEncrypted_column_metadata());
                byte[] columnMetaDataAAD = AesCipher.createModuleAAD(
                    fileDecryptor.getFileAAD(),
                    ModuleType.ColumnMetaData,
                    rowGroup.getOrdinal(),
                    columnOrdinal,
                    -1);
                try {
                  metaData = readColumnMetaData(
                      tempInputStream, fileDecryptor.fetchFooterDecryptor(), columnMetaDataAAD);
                } catch (IOException e) {
                  throw new ParquetCryptoRuntimeException(
                      columnPath + ". Failed to decrypt column metadata", e);
                }
              }
              fileDecryptor.setColumnCryptoMetadata(columnPath, true, true, (byte[]) null, columnOrdinal);
            } else { // Column encrypted with column key
              // setColumnCryptoMetadata triggers KMS interaction, hence delayed until this column is
              // projected
              lazyMetadataDecryption = true;
            }
          }

          String createdBy = parquetMetadata.getCreated_by();
          if (!lazyMetadataDecryption) { // full column metadata (with stats) is available
            column = buildColumnChunkMetaData(
                metaData,
                columnPath,
                messageType.getType(columnPath.toArray()).asPrimitiveType(),
                createdBy);
            column.setRowGroupOrdinal(rowGroup.getOrdinal());
            if (metaData.isSetBloom_filter_offset()) {
              column.setBloomFilterOffset(metaData.getBloom_filter_offset());
            }
            if (metaData.isSetBloom_filter_length()) {
              column.setBloomFilterLength(metaData.getBloom_filter_length());
            }
          } else { // column encrypted with column key
            // Metadata will be decrypted later, if this column is accessed
            EncryptionWithColumnKey columnKeyStruct = cryptoMetaData.getENCRYPTION_WITH_COLUMN_KEY();
            List<String> pathList = columnKeyStruct.getPath_in_schema();
            byte[] columnKeyMetadata = columnKeyStruct.getKey_metadata();
            columnPath = ColumnPath.get(pathList.toArray(new String[pathList.size()]));
            byte[] encryptedMetadataBuffer = columnChunk.getEncrypted_column_metadata();
            column = ColumnChunkMetaData.getWithEncryptedMetadata(
                this,
                columnPath,
                messageType.getType(columnPath.toArray()).asPrimitiveType(),
                encryptedMetadataBuffer,
                columnKeyMetadata,
                fileDecryptor,
                rowGroup.getOrdinal(),
                columnOrdinal,
                createdBy);
          }

          column.setColumnIndexReference(toColumnIndexReference(columnChunk));
          column.setOffsetIndexReference(toOffsetIndexReference(columnChunk));

          // TODO
          // index_page_offset
          // key_value_metadata
          blockMetaData.addColumn(column);
        }
        blockMetaData.setPath(filePath);
        blocks.add(blockMetaData);
      }
    }
    Map<String, String> keyValueMetaData = new HashMap<String, String>();
    List<KeyValue> key_value_metadata = parquetMetadata.getKey_value_metadata();
    if (key_value_metadata != null) {
      for (KeyValue keyValue : key_value_metadata) {
        keyValueMetaData.put(keyValue.key, keyValue.value);
      }
    }
    EncryptionType encryptionType;
    if (encryptedFooter) {
      encryptionType = EncryptionType.ENCRYPTED_FOOTER;
    } else if (parquetMetadata.isSetEncryption_algorithm()) {
      encryptionType = EncryptionType.PLAINTEXT_FOOTER;
    } else {
      encryptionType = EncryptionType.UNENCRYPTED;
    }
    return new ParquetMetadata(
        new org.apache.parquet.hadoop.metadata.FileMetaData(
            messageType, keyValueMetaData, parquetMetadata.getCreated_by(), encryptionType, fileDecryptor),
        blocks);
  }