uint64_t TestBinaryPackedEncoding()

in benchmarks/decode_benchmark.cc [214:277]


uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>& values,
                                  int benchmark_iters = -1,
                                  int benchmark_batch_size = 1) {
  int mini_block_size;
  if (values.size() < 8) {
    mini_block_size = 8;
  } else if (values.size() < 16) {
    mini_block_size = 16;
  } else {
    mini_block_size = 32;
  }
  parquet::DeltaBitPackDecoder<parquet::Int64Type> decoder(nullptr);
  DeltaBitPackEncoder encoder(mini_block_size);
  for (size_t i = 0; i < values.size(); ++i) {
    encoder.Add(values[i]);
  }

  int raw_len = static_cast<int>(encoder.num_values() * sizeof(int));
  int len;
  uint8_t* buffer = encoder.Encode(&len);

  if (benchmark_iters == -1) {
    printf("%s\n", name);
    printf("  Raw len: %d\n", raw_len);
    printf("  Encoded len: %d (%0.2f%%)\n", len,
           static_cast<float>(len) * 100.0f / static_cast<float>(raw_len));
    decoder.SetData(encoder.num_values(), buffer, len);
    for (int i = 0; i < encoder.num_values(); ++i) {
      int64_t x = 0;
      decoder.Decode(&x, 1);
      if (values[i] != x) {
        std::cerr << "Bad: " << i << std::endl;
        std::cerr << "  " << x << " != " << values[i] << std::endl;
        break;
      }
    }
    return 0;
  } else {
    printf("%s\n", name);
    printf("  Raw len: %d\n", raw_len);
    printf("  Encoded len: %d (%0.2f%%)\n", len,
           static_cast<float>(len) * 100.0f / static_cast<float>(raw_len));

    uint64_t result = 0;
    std::vector<int64_t> buf(benchmark_batch_size);
    parquet::StopWatch sw;
    sw.Start();
    for (int k = 0; k < benchmark_iters; ++k) {
      decoder.SetData(encoder.num_values(), buffer, len);
      for (size_t i = 0; i < values.size();) {
        int n = decoder.Decode(buf.data(), benchmark_batch_size);
        for (int j = 0; j < n; ++j) {
          result += buf[j];
        }
        i += n;
      }
    }
    uint64_t elapsed = sw.Stop();
    double num_ints = static_cast<double>(values.size() * benchmark_iters) * 1000.;
    printf("%s rate (batch size = %2d): %0.3fM per second.\n", name, benchmark_batch_size,
           num_ints / static_cast<double>(elapsed));
    return result;
  }
}