in parquet-hadoop/src/main/java/org/apache/parquet/hadoop/PrintFooter.java [58:152]
public static void main(String[] args) throws Exception {
if (args.length != 1) {
System.err.println("usage PrintFooter <path>");
return;
}
Path path = new Path(new URI(args[0]));
final Configuration configuration = new Configuration();
final FileSystem fs = path.getFileSystem(configuration);
FileStatus fileStatus = fs.getFileStatus(path);
Path summary = new Path(fileStatus.getPath(), PARQUET_METADATA_FILE);
if (fileStatus.isDir() && fs.exists(summary)) {
System.out.println("reading summary file");
FileStatus summaryStatus = fs.getFileStatus(summary);
List<Footer> readSummaryFile = ParquetFileReader.readSummaryFile(configuration, summaryStatus);
for (Footer footer : readSummaryFile) {
add(footer.getParquetMetadata());
}
} else {
List<FileStatus> statuses;
if (fileStatus.isDir()) {
System.out.println("listing files in " + fileStatus.getPath());
statuses = Arrays.asList(fs.listStatus(fileStatus.getPath(), HiddenFileFilter.INSTANCE));
} else {
statuses = new ArrayList<FileStatus>();
statuses.add(fileStatus);
}
System.out.println("opening " + statuses.size() + " files");
int i = 0;
ExecutorService threadPool = Executors.newFixedThreadPool(5);
try {
long t0 = System.currentTimeMillis();
Deque<Future<ParquetMetadata>> footers = new LinkedBlockingDeque<Future<ParquetMetadata>>();
for (final FileStatus currentFile : statuses) {
footers.add(threadPool.submit(() -> {
try {
return ParquetFileReader.readFooter(configuration, currentFile, NO_FILTER);
} catch (Exception e) {
throw new ParquetDecodingException("could not read footer", e);
}
}));
}
int previousPercent = 0;
int n = 60;
System.out.print("0% [");
for (int j = 0; j < n; j++) {
System.out.print(" ");
}
System.out.print("] 100%");
for (int j = 0; j < n + 6; j++) {
System.out.print('\b');
}
while (!footers.isEmpty()) {
Future<ParquetMetadata> futureFooter = footers.removeFirst();
if (!futureFooter.isDone()) {
footers.addLast(futureFooter);
continue;
}
ParquetMetadata footer = futureFooter.get();
int currentPercent = (++i * n / statuses.size());
while (currentPercent > previousPercent) {
System.out.print("*");
previousPercent++;
}
add(footer);
}
System.out.println("");
long t1 = System.currentTimeMillis();
System.out.println("read all footers in " + (t1 - t0) + " ms");
} finally {
threadPool.shutdownNow();
}
}
Set<Entry<ColumnDescriptor, ColStats>> entries = stats.entrySet();
long total = 0;
long totalUnc = 0;
for (Entry<ColumnDescriptor, ColStats> entry : entries) {
ColStats colStats = entry.getValue();
total += colStats.allStats.total;
totalUnc += colStats.uncStats.total;
}
for (Entry<ColumnDescriptor, ColStats> entry : entries) {
ColStats colStats = entry.getValue();
System.out.println(
entry.getKey() + " " + percent(colStats.allStats.total, total) + "% of all space " + colStats);
}
System.out.println("number of blocks: " + blockCount);
System.out.println("total data size: " + humanReadable(total) + " (raw " + humanReadable(totalUnc) + ")");
System.out.println("total record: " + humanReadable(recordCount));
System.out.println("average block size: " + humanReadable(total / blockCount) + " (raw "
+ humanReadable(totalUnc / blockCount) + ")");
System.out.println("average record count: " + humanReadable(recordCount / blockCount));
}