in java/tools/src/java/org/apache/orc/tools/JsonFileDump.java [62:252]
public static void printJsonMetaData(List<String> files,
Configuration conf,
List<Integer> rowIndexCols, boolean prettyPrint, boolean printTimeZone)
throws IOException {
if (files.isEmpty()) {
return;
}
StringWriter stringWriter = new StringWriter();
JsonWriter writer = new JsonWriter(stringWriter);
writer.setLenient(true);
if (prettyPrint) {
writer.setIndent(" ");
}
boolean multiFile = files.size() > 1;
if (multiFile) {
writer.beginArray();
} else {
writer.beginObject();
}
for (String filename : files) {
try {
if (multiFile) {
writer.beginObject();
}
writer.name("fileName").value(filename);
Path path = new Path(filename);
Reader reader = FileDump.getReader(path, conf, null);
if (reader == null) {
writer.name("status").value("FAILED");
continue;
}
writer.name("fileVersion").value(reader.getFileVersion().getName());
writer.name("writerVersion").value(reader.getWriterVersion().toString());
writer.name("softwareVersion").value(reader.getSoftwareVersion());
RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
writer.name("numberOfRows").value(reader.getNumberOfRows());
writer.name("compression").value(reader.getCompressionKind().toString());
if (reader.getCompressionKind() != CompressionKind.NONE) {
writer.name("compressionBufferSize").value(reader.getCompressionSize());
}
writer.name("schemaString").value(reader.getSchema().toString());
writer.name("schema");
writeSchema(writer, reader.getSchema());
writer.name("calendar").value(reader.writerUsedProlepticGregorian()
? "proleptic Gregorian"
: "Julian/Gregorian");
writer.name("stripeStatistics").beginArray();
List<StripeStatistics> stripeStatistics = reader.getStripeStatistics();
for (int n = 0; n < stripeStatistics.size(); n++) {
writer.beginObject();
writer.name("stripeNumber").value(n + 1);
StripeStatistics ss = stripeStatistics.get(n);
writer.name("columnStatistics").beginArray();
ColumnStatistics[] columnStatistics = ss.getColumnStatistics();
for (int i = 0; i < columnStatistics.length; i++) {
writer.beginObject();
writer.name("columnId").value(i);
writeColumnStatistics(writer, columnStatistics[i]);
writer.endObject();
}
writer.endArray();
writer.endObject();
}
writer.endArray();
ColumnStatistics[] stats = reader.getStatistics();
int colCount = stats.length;
if (rowIndexCols == null) {
rowIndexCols = new ArrayList<>(colCount);
for (int i = 0; i < colCount; ++i) {
rowIndexCols.add(i);
}
}
writer.name("fileStatistics").beginArray();
for (int i = 0; i < stats.length; ++i) {
writer.beginObject();
writer.name("columnId").value(i);
writeColumnStatistics(writer, stats[i]);
writer.endObject();
}
writer.endArray();
writer.name("stripes").beginArray();
int stripeIx = -1;
for (StripeInformation stripe : reader.getStripes()) {
++stripeIx;
long stripeStart = stripe.getOffset();
OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
writer.beginObject(); // start of stripe information
writer.name("stripeNumber").value(stripeIx + 1);
writer.name("stripeInformation");
writeStripeInformation(writer, stripe);
if (printTimeZone) {
writer.name("writerTimezone").value(
footer.hasWriterTimezone() ? footer.getWriterTimezone() : FileDump.UNKNOWN);
}
long sectionStart = stripeStart;
writer.name("streams").beginArray();
for (OrcProto.Stream section : footer.getStreamsList()) {
writer.beginObject();
String kind = section.hasKind() ? section.getKind().name() : FileDump.UNKNOWN;
writer.name("columnId").value(section.getColumn());
writer.name("section").value(kind);
writer.name("startOffset").value(sectionStart);
writer.name("length").value(section.getLength());
sectionStart += section.getLength();
writer.endObject();
}
writer.endArray();
writer.name("encodings").beginArray();
for (int i = 0; i < footer.getColumnsCount(); ++i) {
writer.beginObject();
OrcProto.ColumnEncoding encoding = footer.getColumns(i);
writer.name("columnId").value(i);
writer.name("kind").value(encoding.getKind().toString());
if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
writer.name("dictionarySize").value(encoding.getDictionarySize());
}
writer.endObject();
}
writer.endArray();
if (!rowIndexCols.isEmpty()) {
// include the columns that are specified, only if the columns are included, bloom filter
// will be read
boolean[] sargColumns = new boolean[colCount];
for (int colIdx : rowIndexCols) {
sargColumns[colIdx] = true;
}
OrcIndex indices = rows.readRowIndex(stripeIx, null, sargColumns);
writer.name("indexes").beginArray();
for (int col : rowIndexCols) {
writer.beginObject();
writer.name("columnId").value(col);
writeRowGroupIndexes(writer, col, indices.getRowGroupIndex(),
reader.getSchema(), (ReaderImpl) reader);
writeBloomFilterIndexes(writer, col, indices,
reader.getWriterVersion(),
reader.getSchema().findSubtype(col).getCategory(),
footer.getColumns(col));
writer.endObject();
}
writer.endArray();
}
writer.endObject(); // end of stripe information
}
writer.endArray();
FileSystem fs = path.getFileSystem(conf);
long fileLen = fs.getContentSummary(path).getLength();
long rawDataSize = reader.getRawDataSize();
long paddedBytes = FileDump.getTotalPaddingSize(reader);
// empty ORC file is ~45 bytes. Assumption here is file length always >0
double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
writer.name("fileLength").value(fileLen);
writer.name("rawDataSize").value(rawDataSize);
writer.name("paddingLength").value(paddedBytes);
writer.name("paddingRatio").value(percentPadding);
AcidStats acidStats = OrcAcidUtils.parseAcidStats(reader);
if (acidStats != null) {
writer.name("numInserts").value(acidStats.inserts);
writer.name("numDeletes").value(acidStats.deletes);
writer.name("numUpdates").value(acidStats.updates);
}
List<String> keys = reader.getMetadataKeys();
keys.remove(OrcAcidUtils.ACID_STATS);
if (!keys.isEmpty()) {
writer.name("userMetadata").beginObject();
for (String key : keys) {
writer.name(key);
ByteBuffer byteBuffer = reader.getMetadataValue(key);
writer.value(String.valueOf(StandardCharsets.UTF_8.decode(byteBuffer)));
}
writer.endObject();
}
writer.name("status").value("OK");
rows.close();
writer.endObject();
} catch (Throwable e) {
writer.name("status").value("FAILED");
throw e;
}
}
if (multiFile) {
writer.endArray();
}
System.out.println(stringWriter);
}