public static void main()

in parquet-hadoop/src/main/java/org/apache/parquet/hadoop/PrintFooter.java [58:152]


  public static void main(String[] args) throws Exception {
    if (args.length != 1) {
      System.err.println("usage PrintFooter <path>");
      return;
    }
    Path path = new Path(new URI(args[0]));
    final Configuration configuration = new Configuration();

    final FileSystem fs = path.getFileSystem(configuration);
    FileStatus fileStatus = fs.getFileStatus(path);
    Path summary = new Path(fileStatus.getPath(), PARQUET_METADATA_FILE);
    if (fileStatus.isDir() && fs.exists(summary)) {
      System.out.println("reading summary file");
      FileStatus summaryStatus = fs.getFileStatus(summary);
      List<Footer> readSummaryFile = ParquetFileReader.readSummaryFile(configuration, summaryStatus);
      for (Footer footer : readSummaryFile) {
        add(footer.getParquetMetadata());
      }
    } else {
      List<FileStatus> statuses;
      if (fileStatus.isDir()) {
        System.out.println("listing files in " + fileStatus.getPath());
        statuses = Arrays.asList(fs.listStatus(fileStatus.getPath(), HiddenFileFilter.INSTANCE));
      } else {
        statuses = new ArrayList<FileStatus>();
        statuses.add(fileStatus);
      }
      System.out.println("opening " + statuses.size() + " files");
      int i = 0;
      ExecutorService threadPool = Executors.newFixedThreadPool(5);
      try {
        long t0 = System.currentTimeMillis();
        Deque<Future<ParquetMetadata>> footers = new LinkedBlockingDeque<Future<ParquetMetadata>>();
        for (final FileStatus currentFile : statuses) {
          footers.add(threadPool.submit(() -> {
            try {
              return ParquetFileReader.readFooter(configuration, currentFile, NO_FILTER);
            } catch (Exception e) {
              throw new ParquetDecodingException("could not read footer", e);
            }
          }));
        }
        int previousPercent = 0;
        int n = 60;
        System.out.print("0% [");
        for (int j = 0; j < n; j++) {
          System.out.print(" ");
        }
        System.out.print("] 100%");
        for (int j = 0; j < n + 6; j++) {
          System.out.print('\b');
        }
        while (!footers.isEmpty()) {
          Future<ParquetMetadata> futureFooter = footers.removeFirst();
          if (!futureFooter.isDone()) {
            footers.addLast(futureFooter);
            continue;
          }
          ParquetMetadata footer = futureFooter.get();
          int currentPercent = (++i * n / statuses.size());
          while (currentPercent > previousPercent) {
            System.out.print("*");
            previousPercent++;
          }
          add(footer);
        }
        System.out.println("");
        long t1 = System.currentTimeMillis();
        System.out.println("read all footers in " + (t1 - t0) + " ms");
      } finally {
        threadPool.shutdownNow();
      }
    }
    Set<Entry<ColumnDescriptor, ColStats>> entries = stats.entrySet();
    long total = 0;
    long totalUnc = 0;
    for (Entry<ColumnDescriptor, ColStats> entry : entries) {
      ColStats colStats = entry.getValue();
      total += colStats.allStats.total;
      totalUnc += colStats.uncStats.total;
    }

    for (Entry<ColumnDescriptor, ColStats> entry : entries) {
      ColStats colStats = entry.getValue();
      System.out.println(
          entry.getKey() + " " + percent(colStats.allStats.total, total) + "% of all space " + colStats);
    }

    System.out.println("number of blocks: " + blockCount);
    System.out.println("total data size: " + humanReadable(total) + " (raw " + humanReadable(totalUnc) + ")");
    System.out.println("total record: " + humanReadable(recordCount));
    System.out.println("average block size: " + humanReadable(total / blockCount) + " (raw "
        + humanReadable(totalUnc / blockCount) + ")");
    System.out.println("average record count: " + humanReadable(recordCount / blockCount));
  }