void ParquetFilePrinter::DebugPrint()

in cpp/src/parquet/printer.cc [86:242]


void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selected_columns,
                                    bool print_values, bool format_dump,
                                    bool print_key_value_metadata, const char* filename) {
  const FileMetaData* file_metadata = fileReader->metadata().get();

  stream << "File Name: " << filename << "\n";
  stream << "Version: " << ParquetVersionToString(file_metadata->version()) << "\n";
  stream << "Created By: " << file_metadata->created_by() << "\n";
  stream << "Total rows: " << file_metadata->num_rows() << "\n";

  if (print_key_value_metadata && file_metadata->key_value_metadata()) {
    auto key_value_metadata = file_metadata->key_value_metadata();
    PrintKeyValueMetadata(stream, *key_value_metadata);
  }

  stream << "Number of RowGroups: " << file_metadata->num_row_groups() << "\n";
  stream << "Number of Real Columns: "
         << file_metadata->schema()->group_node()->field_count() << "\n";

  if (selected_columns.size() == 0) {
    for (int i = 0; i < file_metadata->num_columns(); i++) {
      selected_columns.push_back(i);
    }
  } else {
    for (auto i : selected_columns) {
      if (i < 0 || i >= file_metadata->num_columns()) {
        throw ParquetException("Selected column is out of range");
      }
    }
  }

  stream << "Number of Columns: " << file_metadata->num_columns() << "\n";
  stream << "Number of Selected Columns: " << selected_columns.size() << "\n";
  for (auto i : selected_columns) {
    const ColumnDescriptor* descr = file_metadata->schema()->Column(i);
    stream << "Column " << i << ": " << descr->path()->ToDotString() << " ("
           << TypeToString(descr->physical_type(), descr->type_length());
    const auto& logical_type = descr->logical_type();
    if (!logical_type->is_none()) {
      stream << " / " << logical_type->ToString();
    }
    if (descr->converted_type() != ConvertedType::NONE) {
      stream << " / " << ConvertedTypeToString(descr->converted_type());
      if (descr->converted_type() == ConvertedType::DECIMAL) {
        stream << "(" << descr->type_precision() << "," << descr->type_scale() << ")";
      }
    }
    stream << ")" << std::endl;
  }

  for (int r = 0; r < file_metadata->num_row_groups(); ++r) {
    stream << "--- Row Group: " << r << " ---\n";

    auto group_reader = fileReader->RowGroup(r);
    std::unique_ptr<RowGroupMetaData> group_metadata = file_metadata->RowGroup(r);

    stream << "--- Total Bytes: " << group_metadata->total_byte_size() << " ---\n";
    stream << "--- Total Compressed Bytes: " << group_metadata->total_compressed_size()
           << " ---\n";
    auto sorting_columns = group_metadata->sorting_columns();
    if (!sorting_columns.empty()) {
      stream << "--- Sort Columns:\n";
      for (auto column : sorting_columns) {
        stream << "column_idx: " << column.column_idx
               << ", descending: " << column.descending
               << ", nulls_first: " << column.nulls_first << "\n";
      }
    }
    stream << "--- Rows: " << group_metadata->num_rows() << " ---\n";

    // Print column metadata
    for (auto i : selected_columns) {
      auto column_chunk = group_metadata->ColumnChunk(i);
      std::shared_ptr<Statistics> stats = column_chunk->statistics();

      const ColumnDescriptor* descr = file_metadata->schema()->Column(i);
      stream << "Column " << i << std::endl;
      if (print_key_value_metadata && column_chunk->key_value_metadata()) {
        PrintKeyValueMetadata(stream, *column_chunk->key_value_metadata(), 1, 2);
      }
      stream << "  Values: " << column_chunk->num_values();
      if (column_chunk->is_stats_set()) {
        std::string min = stats->EncodeMin(), max = stats->EncodeMax();
        stream << ", Null Values: " << stats->null_count()
               << ", Distinct Values: " << stats->distinct_count() << std::endl
               << "  Max: " << FormatStatValue(descr->physical_type(), max)
               << ", Min: " << FormatStatValue(descr->physical_type(), min);
      } else {
        stream << "  Statistics Not Set";
      }
      stream << std::endl
             << "  Compression: "
             << ::arrow::internal::AsciiToUpper(
                    Codec::GetCodecAsString(column_chunk->compression()))
             << ", Encodings: ";
      if (column_chunk->encoding_stats().empty()) {
        for (auto encoding : column_chunk->encodings()) {
          stream << EncodingToString(encoding) << " ";
        }
      } else {
        PrintPageEncodingStats(stream, column_chunk->encoding_stats());
      }
      stream << std::endl
             << "  Uncompressed Size: " << column_chunk->total_uncompressed_size()
             << ", Compressed Size: " << column_chunk->total_compressed_size()
             << std::endl;
    }

    if (!print_values) {
      continue;
    }
    stream << "--- Values ---\n";

    static constexpr int bufsize = COL_WIDTH + 1;
    char buffer[bufsize];

    // Create readers for selected columns and print contents
    std::vector<std::shared_ptr<Scanner>> scanners(selected_columns.size(), nullptr);
    int j = 0;
    for (auto i : selected_columns) {
      std::shared_ptr<ColumnReader> col_reader = group_reader->Column(i);
      // This is OK in this method as long as the RowGroupReader does not get
      // deleted
      auto& scanner = scanners[j++] = Scanner::Make(col_reader);

      if (format_dump) {
        stream << "Column " << i << std::endl;
        while (scanner->HasNext()) {
          scanner->PrintNext(stream, 0, true);
          stream << "\n";
        }
        continue;
      }

      snprintf(buffer, bufsize, "%-*s", COL_WIDTH,
               file_metadata->schema()->Column(i)->name().c_str());
      stream << buffer << '|';
    }
    if (format_dump) {
      continue;
    }
    stream << "\n";

    bool hasRow;
    do {
      hasRow = false;
      for (const auto& scanner : scanners) {
        if (scanner->HasNext()) {
          hasRow = true;
          scanner->PrintNext(stream, COL_WIDTH);
          stream << '|';
        }
      }
      stream << "\n";
    } while (hasRow);
  }
}