public static List checkContractViolations()

in parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnIndexValidator.java [554:625]


  public static List<ContractViolation> checkContractViolations(InputFile file) throws IOException {
    List<ContractViolation> violations = new ArrayList<>();
    try (ParquetFileReader reader = ParquetFileReader.open(file)) {
      FileMetaData meta = reader.getFooter().getFileMetaData();
      MessageType schema = meta.getSchema();
      List<ColumnDescriptor> columns = schema.getColumns();

      List<BlockMetaData> blocks = reader.getFooter().getBlocks();
      int rowGroupNumber = 0;
      PageReadStore rowGroup = reader.readNextRowGroup();
      while (rowGroup != null) {
        ColumnReadStore columnReadStore = new ColumnReadStoreImpl(
            rowGroup, new DummyRecordConverter(schema).getRootConverter(), schema, null);
        List<ColumnChunkMetaData> columnChunks =
            blocks.get(rowGroupNumber).getColumns();
        assert (columnChunks.size() == columns.size());
        for (int columnNumber = 0; columnNumber < columns.size(); ++columnNumber) {
          ColumnDescriptor column = columns.get(columnNumber);
          ColumnChunkMetaData columnChunk = columnChunks.get(columnNumber);
          ColumnIndex columnIndex = reader.readColumnIndex(columnChunk);
          if (columnIndex == null) {
            continue;
          }
          ColumnPath columnPath = columnChunk.getPath();
          OffsetIndex offsetIndex = reader.readOffsetIndex(columnChunk);
          List<ByteBuffer> minValues = columnIndex.getMinValues();
          List<ByteBuffer> maxValues = columnIndex.getMaxValues();
          BoundaryOrder boundaryOrder = columnIndex.getBoundaryOrder();
          List<Long> nullCounts = columnIndex.getNullCounts();
          List<Boolean> nullPages = columnIndex.getNullPages();
          long rowNumber = 0;
          ColumnReader columnReader = columnReadStore.getColumnReader(column);
          ByteBuffer prevMinValue = null;
          ByteBuffer prevMaxValue = null;
          for (int pageNumber = 0; pageNumber < offsetIndex.getPageCount(); ++pageNumber) {
            boolean isNullPage = nullPages.get(pageNumber);
            ByteBuffer minValue = minValues.get(pageNumber);
            ByteBuffer maxValue = maxValues.get(pageNumber);
            PageValidator pageValidator = new PageValidator(
                column.getPrimitiveType(),
                rowGroupNumber,
                columnNumber,
                columnPath,
                pageNumber,
                violations,
                columnReader,
                minValue,
                maxValue,
                prevMinValue,
                prevMaxValue,
                boundaryOrder,
                nullCounts.get(pageNumber),
                isNullPage);
            if (!isNullPage) {
              prevMinValue = minValue;
              prevMaxValue = maxValue;
            }
            long lastRowNumberInPage = offsetIndex.getLastRowIndex(pageNumber, rowGroup.getRowCount());
            while (rowNumber <= lastRowNumberInPage) {
              pageValidator.validateValuesBelongingToRow();
              ++rowNumber;
            }
            pageValidator.finishPage();
          }
        }
        rowGroup.close();
        rowGroup = reader.readNextRowGroup();
        rowGroupNumber++;
      }
    }
    return violations;
  }