public ColumnChunkPageReader readAllPages()

in parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java [1550:1680]


    public ColumnChunkPageReader readAllPages(BlockCipher.Decryptor headerBlockDecryptor, BlockCipher.Decryptor pageBlockDecryptor,
        byte[] aadPrefix, int rowGroupOrdinal, int columnOrdinal) throws IOException {
      List<DataPage> pagesInChunk = new ArrayList<>();
      DictionaryPage dictionaryPage = null;
      PrimitiveType type = getFileMetaData().getSchema()
          .getType(descriptor.col.getPath()).asPrimitiveType();
      long valuesCountReadSoFar = 0L;
      int dataPageCountReadSoFar = 0;
      byte[] dataPageHeaderAAD = null;
      if (null != headerBlockDecryptor) {
        dataPageHeaderAAD = AesCipher.createModuleAAD(aadPrefix, ModuleType.DataPageHeader, rowGroupOrdinal,
            columnOrdinal, getPageOrdinal(dataPageCountReadSoFar));
      }
      while (hasMorePages(valuesCountReadSoFar, dataPageCountReadSoFar)) {
        byte[] pageHeaderAAD = dataPageHeaderAAD;
        if (null != headerBlockDecryptor) {
          // Important: this verifies file integrity (makes sure dictionary page had not been removed)
          if (null == dictionaryPage && descriptor.metadata.hasDictionaryPage()) {
            pageHeaderAAD = AesCipher.createModuleAAD(aadPrefix, ModuleType.DictionaryPageHeader, rowGroupOrdinal, columnOrdinal, -1);
          }  else {
            int pageOrdinal = getPageOrdinal(dataPageCountReadSoFar);
            AesCipher.quickUpdatePageAAD(dataPageHeaderAAD, pageOrdinal);
          }
        }
        PageHeader pageHeader = readPageHeader(headerBlockDecryptor, pageHeaderAAD);
        int uncompressedPageSize = pageHeader.getUncompressed_page_size();
        int compressedPageSize = pageHeader.getCompressed_page_size();
        final BytesInput pageBytes;
        switch (pageHeader.type) {
          case DICTIONARY_PAGE:
            // there is only one dictionary page per column chunk
            if (dictionaryPage != null) {
              throw new ParquetDecodingException("more than one dictionary page in column " + descriptor.col);
            }
            pageBytes = this.readAsBytesInput(compressedPageSize);
            if (options.usePageChecksumVerification() && pageHeader.isSetCrc()) {
              verifyCrc(pageHeader.getCrc(), pageBytes.toByteArray(),
                "could not verify dictionary page integrity, CRC checksum verification failed");
            }
            DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header();
            dictionaryPage =
                new DictionaryPage(
                    pageBytes,
                    uncompressedPageSize,
                    dicHeader.getNum_values(),
                    converter.getEncoding(dicHeader.getEncoding())
                    );
            // Copy crc to new page, used for testing
            if (pageHeader.isSetCrc()) {
              dictionaryPage.setCrc(pageHeader.getCrc());
            }
            break;
          case DATA_PAGE:
            DataPageHeader dataHeaderV1 = pageHeader.getData_page_header();
            pageBytes = this.readAsBytesInput(compressedPageSize);
            if (options.usePageChecksumVerification() && pageHeader.isSetCrc()) {
              verifyCrc(pageHeader.getCrc(), pageBytes.toByteArray(),
                "could not verify page integrity, CRC checksum verification failed");
            }
            DataPageV1 dataPageV1 = new DataPageV1(
              pageBytes,
              dataHeaderV1.getNum_values(),
              uncompressedPageSize,
              converter.fromParquetStatistics(
                getFileMetaData().getCreatedBy(),
                dataHeaderV1.getStatistics(),
                type),
              converter.getEncoding(dataHeaderV1.getRepetition_level_encoding()),
              converter.getEncoding(dataHeaderV1.getDefinition_level_encoding()),
              converter.getEncoding(dataHeaderV1.getEncoding()));
            // Copy crc to new page, used for testing
            if (pageHeader.isSetCrc()) {
              dataPageV1.setCrc(pageHeader.getCrc());
            }
            pagesInChunk.add(dataPageV1);
            valuesCountReadSoFar += dataHeaderV1.getNum_values();
            ++dataPageCountReadSoFar;
            break;
          case DATA_PAGE_V2:
            DataPageHeaderV2 dataHeaderV2 = pageHeader.getData_page_header_v2();
            int dataSize = compressedPageSize - dataHeaderV2.getRepetition_levels_byte_length() -
              dataHeaderV2.getDefinition_levels_byte_length();
            final BytesInput repetitionLevels = this.readAsBytesInput(dataHeaderV2.getRepetition_levels_byte_length());
            final BytesInput definitionLevels = this.readAsBytesInput(dataHeaderV2.getDefinition_levels_byte_length());
            final BytesInput values = this.readAsBytesInput(dataSize);
            if (options.usePageChecksumVerification() && pageHeader.isSetCrc()) {
              pageBytes = BytesInput.concat(repetitionLevels, definitionLevels, values);
              verifyCrc(pageHeader.getCrc(), pageBytes.toByteArray(),
                "could not verify page integrity, CRC checksum verification failed");
            }
            DataPageV2 dataPageV2 = new DataPageV2(
              dataHeaderV2.getNum_rows(),
              dataHeaderV2.getNum_nulls(),
              dataHeaderV2.getNum_values(),
              repetitionLevels,
              definitionLevels,
              converter.getEncoding(dataHeaderV2.getEncoding()),
              values,
              uncompressedPageSize,
              converter.fromParquetStatistics(
                getFileMetaData().getCreatedBy(),
                dataHeaderV2.getStatistics(),
                type),
              dataHeaderV2.isIs_compressed()
            );
            // Copy crc to new page, used for testing
            if (pageHeader.isSetCrc()) {
              dataPageV2.setCrc(pageHeader.getCrc());
            }
            pagesInChunk.add(dataPageV2);
            valuesCountReadSoFar += dataHeaderV2.getNum_values();
            ++dataPageCountReadSoFar;
            break;
          default:
            LOG.debug("skipping page of type {} of size {}", pageHeader.getType(), compressedPageSize);
            stream.skipFully(compressedPageSize);
            break;
        }
      }
      if (offsetIndex == null && valuesCountReadSoFar != descriptor.metadata.getValueCount()) {
        // Would be nice to have a CorruptParquetFileException or something as a subclass?
        throw new IOException(
            "Expected " + descriptor.metadata.getValueCount() + " values in column chunk at " +
            getPath() + " offset " + descriptor.metadata.getFirstDataPageOffset() +
            " but got " + valuesCountReadSoFar + " values instead over " + pagesInChunk.size()
            + " pages ending at file offset " + (descriptor.fileOffset + stream.position()));
      }
      BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(descriptor.metadata.getCodec());
      return new ColumnChunkPageReader(decompressor, pagesInChunk, dictionaryPage, offsetIndex,
        rowCount, pageBlockDecryptor, aadPrefix, rowGroupOrdinal, columnOrdinal, options);
    }