public static void printJsonMetaData()

in java/tools/src/java/org/apache/orc/tools/JsonFileDump.java [62:252]


  public static void printJsonMetaData(List<String> files,
      Configuration conf,
      List<Integer> rowIndexCols, boolean prettyPrint, boolean printTimeZone)
      throws IOException {
    if (files.isEmpty()) {
      return;
    }
    StringWriter stringWriter = new StringWriter();
    JsonWriter writer = new JsonWriter(stringWriter);
    writer.setLenient(true);
    if (prettyPrint) {
      writer.setIndent("  ");
    }
    boolean multiFile = files.size() > 1;
    if (multiFile) {
      writer.beginArray();
    } else {
      writer.beginObject();
    }
    for (String filename : files) {
      try {
        if (multiFile) {
          writer.beginObject();
        }
        writer.name("fileName").value(filename);
        Path path = new Path(filename);
        Reader reader = FileDump.getReader(path, conf, null);
        if (reader == null) {
          writer.name("status").value("FAILED");
          continue;
        }
        writer.name("fileVersion").value(reader.getFileVersion().getName());
        writer.name("writerVersion").value(reader.getWriterVersion().toString());
        writer.name("softwareVersion").value(reader.getSoftwareVersion());
        RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
        writer.name("numberOfRows").value(reader.getNumberOfRows());
        writer.name("compression").value(reader.getCompressionKind().toString());
        if (reader.getCompressionKind() != CompressionKind.NONE) {
          writer.name("compressionBufferSize").value(reader.getCompressionSize());
        }
        writer.name("schemaString").value(reader.getSchema().toString());
        writer.name("schema");
        writeSchema(writer, reader.getSchema());
        writer.name("calendar").value(reader.writerUsedProlepticGregorian()
            ? "proleptic Gregorian"
            : "Julian/Gregorian");
        writer.name("stripeStatistics").beginArray();
        List<StripeStatistics> stripeStatistics = reader.getStripeStatistics();
        for (int n = 0; n < stripeStatistics.size(); n++) {
          writer.beginObject();
          writer.name("stripeNumber").value(n + 1);
          StripeStatistics ss = stripeStatistics.get(n);
          writer.name("columnStatistics").beginArray();
          ColumnStatistics[] columnStatistics = ss.getColumnStatistics();
          for (int i = 0; i < columnStatistics.length; i++) {
            writer.beginObject();
            writer.name("columnId").value(i);
            writeColumnStatistics(writer, columnStatistics[i]);
            writer.endObject();
          }
          writer.endArray();
          writer.endObject();
        }
        writer.endArray();

        ColumnStatistics[] stats = reader.getStatistics();
        int colCount = stats.length;
        if (rowIndexCols == null) {
          rowIndexCols = new ArrayList<>(colCount);
          for (int i = 0; i < colCount; ++i) {
            rowIndexCols.add(i);
          }
        }
        writer.name("fileStatistics").beginArray();
        for (int i = 0; i < stats.length; ++i) {
          writer.beginObject();
          writer.name("columnId").value(i);
          writeColumnStatistics(writer, stats[i]);
          writer.endObject();
        }
        writer.endArray();

        writer.name("stripes").beginArray();
        int stripeIx = -1;
        for (StripeInformation stripe : reader.getStripes()) {
          ++stripeIx;
          long stripeStart = stripe.getOffset();
          OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
          writer.beginObject(); // start of stripe information
          writer.name("stripeNumber").value(stripeIx + 1);
          writer.name("stripeInformation");
          writeStripeInformation(writer, stripe);
          if (printTimeZone) {
            writer.name("writerTimezone").value(
                footer.hasWriterTimezone() ? footer.getWriterTimezone() : FileDump.UNKNOWN);
          }
          long sectionStart = stripeStart;

          writer.name("streams").beginArray();
          for (OrcProto.Stream section : footer.getStreamsList()) {
            writer.beginObject();
            String kind = section.hasKind() ? section.getKind().name() : FileDump.UNKNOWN;
            writer.name("columnId").value(section.getColumn());
            writer.name("section").value(kind);
            writer.name("startOffset").value(sectionStart);
            writer.name("length").value(section.getLength());
            sectionStart += section.getLength();
            writer.endObject();
          }
          writer.endArray();

          writer.name("encodings").beginArray();
          for (int i = 0; i < footer.getColumnsCount(); ++i) {
            writer.beginObject();
            OrcProto.ColumnEncoding encoding = footer.getColumns(i);
            writer.name("columnId").value(i);
            writer.name("kind").value(encoding.getKind().toString());
            if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
                encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
              writer.name("dictionarySize").value(encoding.getDictionarySize());
            }
            writer.endObject();
          }
          writer.endArray();
          if (!rowIndexCols.isEmpty()) {
            // include the columns that are specified, only if the columns are included, bloom filter
            // will be read
            boolean[] sargColumns = new boolean[colCount];
            for (int colIdx : rowIndexCols) {
              sargColumns[colIdx] = true;
            }
            OrcIndex indices = rows.readRowIndex(stripeIx, null, sargColumns);
            writer.name("indexes").beginArray();
            for (int col : rowIndexCols) {
              writer.beginObject();
              writer.name("columnId").value(col);
              writeRowGroupIndexes(writer, col, indices.getRowGroupIndex(),
                  reader.getSchema(), (ReaderImpl) reader);
              writeBloomFilterIndexes(writer, col, indices,
                  reader.getWriterVersion(),
                  reader.getSchema().findSubtype(col).getCategory(),
                  footer.getColumns(col));
              writer.endObject();
            }
            writer.endArray();
          }
          writer.endObject(); // end of stripe information
        }
        writer.endArray();

        FileSystem fs = path.getFileSystem(conf);
        long fileLen = fs.getContentSummary(path).getLength();
        long rawDataSize = reader.getRawDataSize();
        long paddedBytes = FileDump.getTotalPaddingSize(reader);
        // empty ORC file is ~45 bytes. Assumption here is file length always >0
        double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
        writer.name("fileLength").value(fileLen);
        writer.name("rawDataSize").value(rawDataSize);
        writer.name("paddingLength").value(paddedBytes);
        writer.name("paddingRatio").value(percentPadding);
        AcidStats acidStats = OrcAcidUtils.parseAcidStats(reader);
        if (acidStats != null) {
          writer.name("numInserts").value(acidStats.inserts);
          writer.name("numDeletes").value(acidStats.deletes);
          writer.name("numUpdates").value(acidStats.updates);
        }
        List<String> keys = reader.getMetadataKeys();
        keys.remove(OrcAcidUtils.ACID_STATS);
        if (!keys.isEmpty()) {
          writer.name("userMetadata").beginObject();
          for (String key : keys) {
            writer.name(key);
            ByteBuffer byteBuffer = reader.getMetadataValue(key);
            writer.value(String.valueOf(StandardCharsets.UTF_8.decode(byteBuffer)));
          }
          writer.endObject();
        }
        writer.name("status").value("OK");
        rows.close();

        writer.endObject();
      } catch (Throwable e) {
        writer.name("status").value("FAILED");
        throw e;
      }
    }
    if (multiFile) {
      writer.endArray();
    }
    System.out.println(stringWriter);
  }