in java/tools/src/java/org/apache/orc/tools/FileDump.java [340:474]
private static void printMetaDataImpl(final String filename,
final Configuration conf, List<Integer> rowIndexCols, final boolean printTimeZone,
final List<String> corruptFiles, final boolean printColumnType) throws IOException {
Path file = new Path(filename);
Reader reader = getReader(file, conf, corruptFiles);
// if we can create reader then footer is not corrupt and file will readable
if (reader == null) {
return;
}
TypeDescription schema = reader.getSchema();
System.out.println("Structure for " + filename);
System.out.println("File Version: " + reader.getFileVersion().getName() +
" with " + reader.getWriterVersion() + " by " +
reader.getSoftwareVersion());
RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
System.out.println("Rows: " + reader.getNumberOfRows());
System.out.println("Compression: " + reader.getCompressionKind());
if (reader.getCompressionKind() != CompressionKind.NONE) {
System.out.println("Compression size: " + reader.getCompressionSize());
}
System.out.println("Calendar: " + (reader.writerUsedProlepticGregorian()
? "Proleptic Gregorian"
: "Julian/Gregorian"));
System.out.println("Type: " + reader.getSchema().toString());
if (printColumnType) {
System.out.println("Columns type:");
printColumnsType(reader.getSchema());
}
printTypeAnnotations(reader.getSchema(), "root");
System.out.println("\nStripe Statistics:");
List<StripeStatistics> stripeStats = reader.getStripeStatistics();
for (int n = 0; n < stripeStats.size(); n++) {
System.out.println(" Stripe " + (n + 1) + ":");
StripeStatistics ss = stripeStats.get(n);
ColumnStatistics[] columnStatistics = ss.getColumnStatistics();
for (int i = 0; i < columnStatistics.length; ++i) {
System.out.println(" Column " + i + ": " +
columnStatistics[i].toString());
}
}
ColumnStatistics[] stats = reader.getStatistics();
int colCount = stats.length;
if (rowIndexCols == null) {
rowIndexCols = new ArrayList<>(colCount);
for (int i = 0; i < colCount; ++i) {
rowIndexCols.add(i);
}
}
System.out.println("\nFile Statistics:");
for (int i = 0; i < stats.length; ++i) {
System.out.println(" Column " + i + ": " + stats[i].toString());
}
System.out.println("\nStripes:");
int stripeIx = -1;
for (StripeInformation stripe : reader.getStripes()) {
++stripeIx;
long stripeStart = stripe.getOffset();
OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
if (printTimeZone) {
String tz = footer.getWriterTimezone();
if (tz == null || tz.isEmpty()) {
tz = UNKNOWN;
}
System.out.println(" Stripe: " + stripe + " timezone: " + tz);
} else {
System.out.println(" Stripe: " + stripe);
}
long sectionStart = stripeStart;
for (OrcProto.Stream section : footer.getStreamsList()) {
String kind = section.hasKind() ? section.getKind().name() : UNKNOWN;
System.out.println(" Stream: column " + section.getColumn() +
" section " + kind + " start: " + sectionStart +
" length " + section.getLength());
sectionStart += section.getLength();
}
for (int i = 0; i < footer.getColumnsCount(); ++i) {
OrcProto.ColumnEncoding encoding = footer.getColumns(i);
StringBuilder buf = new StringBuilder();
buf.append(" Encoding column ");
buf.append(i);
buf.append(": ");
buf.append(encoding.getKind());
if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
buf.append("[");
buf.append(encoding.getDictionarySize());
buf.append("]");
}
System.out.println(buf);
}
if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
// include the columns that are specified, only if the columns are included, bloom filter
// will be read
boolean[] sargColumns = new boolean[colCount];
for (int colIdx : rowIndexCols) {
sargColumns[colIdx] = true;
}
OrcIndex indices = rows.readRowIndex(stripeIx, null, sargColumns);
for (int col : rowIndexCols) {
StringBuilder buf = new StringBuilder();
String rowIdxString = getFormattedRowIndices(col,
indices.getRowGroupIndex(), schema, (ReaderImpl) reader);
buf.append(rowIdxString);
String bloomFilString = getFormattedBloomFilters(col, indices,
reader.getWriterVersion(),
reader.getSchema().findSubtype(col).getCategory(),
footer.getColumns(col));
buf.append(bloomFilString);
System.out.println(buf);
}
}
}
FileSystem fs = file.getFileSystem(conf);
long fileLen = fs.getFileStatus(file).getLen();
long rawDataSize = reader.getRawDataSize();
long paddedBytes = getTotalPaddingSize(reader);
double percentPadding = (fileLen == 0) ? 0.0d : 100.0d * paddedBytes / fileLen;
DecimalFormat format = new DecimalFormat("##.##");
System.out.println("\nFile length: " + fileLen + " bytes");
System.out.println("File raw data size: " + rawDataSize + " bytes");
System.out.println("Padding length: " + paddedBytes + " bytes");
System.out.println("Padding ratio: " + format.format(percentPadding) + "%");
//print out any user metadata properties
List<String> keys = reader.getMetadataKeys();
for(int i = 0; i < keys.size(); i++) {
if(i == 0) {
System.out.println("\nUser Metadata:");
}
ByteBuffer byteBuffer = reader.getMetadataValue(keys.get(i));
System.out.println(" " + keys.get(i) + "="
+ StandardCharsets.UTF_8.decode(byteBuffer));
}
rows.close();
}