public int run()

in parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java [78:150]


  public int run() throws IOException {
    Preconditions.checkArgument(targets != null && targets.size() >= 1, "A Parquet file is required.");
    Preconditions.checkArgument(targets.size() == 1, "Cannot process multiple Parquet files.");

    // Even though the implementation is separated the functionality is logically related so placed in the same
    // command.
    if (raw) {
      try (RawPagesReader reader =
          new RawPagesReader(HadoopInputFile.fromPath(qualifiedPath(targets.get(0)), getConf()), columns)) {
        reader.listPages(console);
      }
      return 0;
    }

    String source = targets.get(0);
    try (ParquetFileReader reader = ParquetFileReader.open(getConf(), qualifiedPath(source))) {
      MessageType schema = reader.getFileMetaData().getSchema();
      Map<ColumnDescriptor, PrimitiveType> columns = Maps.newLinkedHashMap();
      if (this.columns == null || this.columns.isEmpty()) {
        for (ColumnDescriptor descriptor : schema.getColumns()) {
          columns.put(descriptor, primitive(schema, descriptor.getPath()));
        }
      } else {
        for (String column : this.columns) {
          columns.put(descriptor(column, schema), primitive(column, schema));
        }
      }

      CompressionCodecName codec =
          reader.getRowGroups().get(0).getColumns().get(0).getCodec();
      // accumulate formatted lines to print by column
      Map<String, List<String>> formatted = Maps.newLinkedHashMap();
      PageFormatter formatter = new PageFormatter();
      PageReadStore pageStore;
      int rowGroupNum = 0;
      while ((pageStore = reader.readNextRowGroup()) != null) {
        for (ColumnDescriptor descriptor : columns.keySet()) {
          List<String> lines = formatted.get(columnName(descriptor));
          if (lines == null) {
            lines = Lists.newArrayList();
            formatted.put(columnName(descriptor), lines);
          }

          formatter.setContext(rowGroupNum, columns.get(descriptor), codec);
          PageReader pages = pageStore.getPageReader(descriptor);

          DictionaryPage dict = pages.readDictionaryPage();
          if (dict != null) {
            lines.add(formatter.format(dict));
          }
          DataPage page;
          while ((page = pages.readPage()) != null) {
            lines.add(formatter.format(page));
          }
        }
        rowGroupNum += 1;
        pageStore.close();
      }

      // TODO: Show total column size and overall size per value in the column summary line
      for (String columnName : formatted.keySet()) {
        console.info(String.format(
            "\nColumn: %s\n%s", columnName, new TextStringBuilder(80).appendPadding(80, '-')));
        console.info(formatter.getHeader());
        for (String line : formatted.get(columnName)) {
          console.info(line);
        }
        console.info("");
      }
    }

    return 0;
  }