in benchmarks/decode_benchmark.cc [214:277]
uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>& values,
int benchmark_iters = -1,
int benchmark_batch_size = 1) {
int mini_block_size;
if (values.size() < 8) {
mini_block_size = 8;
} else if (values.size() < 16) {
mini_block_size = 16;
} else {
mini_block_size = 32;
}
parquet::DeltaBitPackDecoder<parquet::Int64Type> decoder(nullptr);
DeltaBitPackEncoder encoder(mini_block_size);
for (size_t i = 0; i < values.size(); ++i) {
encoder.Add(values[i]);
}
int raw_len = static_cast<int>(encoder.num_values() * sizeof(int));
int len;
uint8_t* buffer = encoder.Encode(&len);
if (benchmark_iters == -1) {
printf("%s\n", name);
printf(" Raw len: %d\n", raw_len);
printf(" Encoded len: %d (%0.2f%%)\n", len,
static_cast<float>(len) * 100.0f / static_cast<float>(raw_len));
decoder.SetData(encoder.num_values(), buffer, len);
for (int i = 0; i < encoder.num_values(); ++i) {
int64_t x = 0;
decoder.Decode(&x, 1);
if (values[i] != x) {
std::cerr << "Bad: " << i << std::endl;
std::cerr << " " << x << " != " << values[i] << std::endl;
break;
}
}
return 0;
} else {
printf("%s\n", name);
printf(" Raw len: %d\n", raw_len);
printf(" Encoded len: %d (%0.2f%%)\n", len,
static_cast<float>(len) * 100.0f / static_cast<float>(raw_len));
uint64_t result = 0;
std::vector<int64_t> buf(benchmark_batch_size);
parquet::StopWatch sw;
sw.Start();
for (int k = 0; k < benchmark_iters; ++k) {
decoder.SetData(encoder.num_values(), buffer, len);
for (size_t i = 0; i < values.size();) {
int n = decoder.Decode(buf.data(), benchmark_batch_size);
for (int j = 0; j < n; ++j) {
result += buf[j];
}
i += n;
}
}
uint64_t elapsed = sw.Stop();
double num_ints = static_cast<double>(values.size() * benchmark_iters) * 1000.;
printf("%s rate (batch size = %2d): %0.3fM per second.\n", name, benchmark_batch_size,
num_ints / static_cast<double>(elapsed));
return result;
}
}