cpp/src/parquet/encoding_benchmark.cc (1,241 lines of code) (raw):
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "benchmark/benchmark.h"
#include <array>
#include <cmath>
#include <limits>
#include <random>
#include "arrow/array.h"
#include "arrow/array/builder_binary.h"
#include "arrow/array/builder_dict.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/random.h"
#include "arrow/testing/util.h"
#include "arrow/type.h"
#include "arrow/util/byte_stream_split_internal.h"
#include "arrow/visit_data_inline.h"
#include "parquet/encoding.h"
#include "parquet/platform.h"
#include "parquet/schema.h"
using arrow::default_memory_pool;
using arrow::MemoryPool;
namespace {
// The min/max number of values used to drive each family of encoding benchmarks
constexpr int MIN_RANGE = 1024;
constexpr int MAX_RANGE = 65536;
} // namespace
namespace parquet {
using schema::PrimitiveNode;
std::shared_ptr<ColumnDescriptor> Int64Schema(Repetition::type repetition) {
auto node = PrimitiveNode::Make("int64", repetition, Type::INT64);
return std::make_shared<ColumnDescriptor>(node, repetition != Repetition::REQUIRED,
repetition == Repetition::REPEATED);
}
static void BM_PlainEncodingBoolean(benchmark::State& state) {
std::vector<bool> values(state.range(0), true);
auto encoder = MakeEncoder(Type::BOOLEAN, Encoding::PLAIN);
auto typed_encoder = dynamic_cast<BooleanEncoder*>(encoder.get());
for (auto _ : state) {
typed_encoder->Put(values, static_cast<int>(values.size()));
typed_encoder->FlushValues();
}
state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool));
state.SetItemsProcessed(state.iterations() * state.range(0));
}
BENCHMARK(BM_PlainEncodingBoolean)->Range(MIN_RANGE, MAX_RANGE);
static void BM_PlainDecodingBoolean(benchmark::State& state) {
std::vector<bool> values(state.range(0), true);
bool* output = new bool[state.range(0)];
auto encoder = MakeEncoder(Type::BOOLEAN, Encoding::PLAIN);
auto typed_encoder = dynamic_cast<BooleanEncoder*>(encoder.get());
typed_encoder->Put(values, static_cast<int>(values.size()));
std::shared_ptr<Buffer> buf = encoder->FlushValues();
for (auto _ : state) {
auto decoder = MakeTypedDecoder<BooleanType>(Encoding::PLAIN);
decoder->SetData(static_cast<int>(values.size()), buf->data(),
static_cast<int>(buf->size()));
decoder->Decode(output, static_cast<int>(values.size()));
}
state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool));
state.SetItemsProcessed(state.iterations() * state.range(0));
delete[] output;
}
BENCHMARK(BM_PlainDecodingBoolean)->Range(MIN_RANGE, MAX_RANGE);
static void BM_PlainDecodingBooleanToBitmap(benchmark::State& state) {
std::vector<bool> values(state.range(0), true);
int64_t bitmap_bytes = ::arrow::bit_util::BytesForBits(state.range(0));
std::vector<uint8_t> output(bitmap_bytes, 0);
auto encoder = MakeEncoder(Type::BOOLEAN, Encoding::PLAIN);
auto typed_encoder = dynamic_cast<BooleanEncoder*>(encoder.get());
typed_encoder->Put(values, static_cast<int>(values.size()));
std::shared_ptr<Buffer> buf = encoder->FlushValues();
for (auto _ : state) {
auto decoder = MakeTypedDecoder<BooleanType>(Encoding::PLAIN);
decoder->SetData(static_cast<int>(values.size()), buf->data(),
static_cast<int>(buf->size()));
decoder->Decode(output.data(), static_cast<int>(values.size()));
}
// Still set `BytesProcessed` to byte level.
state.SetBytesProcessed(state.iterations() * bitmap_bytes);
state.SetItemsProcessed(state.iterations() * state.range(0));
}
BENCHMARK(BM_PlainDecodingBooleanToBitmap)->Range(MIN_RANGE, MAX_RANGE);
static void BM_PlainEncodingInt64(benchmark::State& state) {
std::vector<int64_t> values(state.range(0), 64);
auto encoder = MakeTypedEncoder<Int64Type>(Encoding::PLAIN);
for (auto _ : state) {
encoder->Put(values.data(), static_cast<int>(values.size()));
encoder->FlushValues();
}
state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int64_t));
}
BENCHMARK(BM_PlainEncodingInt64)->Range(MIN_RANGE, MAX_RANGE);
static void BM_PlainDecodingInt64(benchmark::State& state) {
std::vector<int64_t> values(state.range(0), 64);
auto encoder = MakeTypedEncoder<Int64Type>(Encoding::PLAIN);
encoder->Put(values.data(), static_cast<int>(values.size()));
std::shared_ptr<Buffer> buf = encoder->FlushValues();
for (auto _ : state) {
auto decoder = MakeTypedDecoder<Int64Type>(Encoding::PLAIN);
decoder->SetData(static_cast<int>(values.size()), buf->data(),
static_cast<int>(buf->size()));
decoder->Decode(values.data(), static_cast<int>(values.size()));
}
state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int64_t));
}
BENCHMARK(BM_PlainDecodingInt64)->Range(MIN_RANGE, MAX_RANGE);
static void BM_PlainEncodingDouble(benchmark::State& state) {
std::vector<double> values(state.range(0), 64.0);
auto encoder = MakeTypedEncoder<DoubleType>(Encoding::PLAIN);
for (auto _ : state) {
encoder->Put(values.data(), static_cast<int>(values.size()));
encoder->FlushValues();
}
state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(double));
}
BENCHMARK(BM_PlainEncodingDouble)->Range(MIN_RANGE, MAX_RANGE);
static void BM_PlainEncodingDoubleNaN(benchmark::State& state) {
std::vector<double> values(state.range(0), nan(""));
auto encoder = MakeTypedEncoder<DoubleType>(Encoding::PLAIN);
for (auto _ : state) {
encoder->Put(values.data(), static_cast<int>(values.size()));
encoder->FlushValues();
}
state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(double));
}
BENCHMARK(BM_PlainEncodingDoubleNaN)->Range(MIN_RANGE, MAX_RANGE);
static void BM_PlainDecodingDouble(benchmark::State& state) {
std::vector<double> values(state.range(0), 64.0);
auto encoder = MakeTypedEncoder<DoubleType>(Encoding::PLAIN);
encoder->Put(values.data(), static_cast<int>(values.size()));
std::shared_ptr<Buffer> buf = encoder->FlushValues();
for (auto _ : state) {
auto decoder = MakeTypedDecoder<DoubleType>(Encoding::PLAIN);
decoder->SetData(static_cast<int>(values.size()), buf->data(),
static_cast<int>(buf->size()));
decoder->Decode(values.data(), static_cast<int>(values.size()));
}
state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(double));
}
BENCHMARK(BM_PlainDecodingDouble)->Range(MIN_RANGE, MAX_RANGE);
static void BM_PlainEncodingFloat(benchmark::State& state) {
std::vector<float> values(state.range(0), 64.0);
auto encoder = MakeTypedEncoder<FloatType>(Encoding::PLAIN);
for (auto _ : state) {
encoder->Put(values.data(), static_cast<int>(values.size()));
encoder->FlushValues();
}
state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float));
}
BENCHMARK(BM_PlainEncodingFloat)->Range(MIN_RANGE, MAX_RANGE);
static void BM_PlainEncodingFloatNaN(benchmark::State& state) {
std::vector<float> values(state.range(0), nanf(""));
auto encoder = MakeTypedEncoder<FloatType>(Encoding::PLAIN);
for (auto _ : state) {
encoder->Put(values.data(), static_cast<int>(values.size()));
encoder->FlushValues();
}
state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float));
}
BENCHMARK(BM_PlainEncodingFloatNaN)->Range(MIN_RANGE, MAX_RANGE);
static void BM_PlainDecodingFloat(benchmark::State& state) {
std::vector<float> values(state.range(0), 64.0);
auto encoder = MakeTypedEncoder<FloatType>(Encoding::PLAIN);
encoder->Put(values.data(), static_cast<int>(values.size()));
std::shared_ptr<Buffer> buf = encoder->FlushValues();
for (auto _ : state) {
auto decoder = MakeTypedDecoder<FloatType>(Encoding::PLAIN);
decoder->SetData(static_cast<int>(values.size()), buf->data(),
static_cast<int>(buf->size()));
decoder->Decode(values.data(), static_cast<int>(values.size()));
}
state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float));
}
BENCHMARK(BM_PlainDecodingFloat)->Range(MIN_RANGE, MAX_RANGE);
template <typename ParquetType>
struct BM_SpacedEncodingTraits {
using ArrowType = typename EncodingTraits<ParquetType>::ArrowType;
using ArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType;
using CType = typename ParquetType::c_type;
};
template <>
struct BM_SpacedEncodingTraits<BooleanType> {
// Leverage UInt8 vector array data for Boolean, the input src of PutSpaced is bool*
using ArrowType = ::arrow::UInt8Type;
using ArrayType = ::arrow::UInt8Array;
using CType = bool;
};
static void BM_SpacedArgs(benchmark::internal::Benchmark* bench) {
constexpr auto kPlainSpacedSize = 32 * 1024; // 32k
bench->Args({/*size*/ kPlainSpacedSize, /*null_in_ten_thousand*/ 1});
bench->Args({/*size*/ kPlainSpacedSize, /*null_in_ten_thousand*/ 100});
bench->Args({/*size*/ kPlainSpacedSize, /*null_in_ten_thousand*/ 1000});
bench->Args({/*size*/ kPlainSpacedSize, /*null_in_ten_thousand*/ 5000});
bench->Args({/*size*/ kPlainSpacedSize, /*null_in_ten_thousand*/ 10000});
}
template <typename ParquetType>
static void BM_EncodingSpaced(benchmark::State& state, Encoding::type encoding) {
using ArrowType = typename BM_SpacedEncodingTraits<ParquetType>::ArrowType;
using ArrayType = typename BM_SpacedEncodingTraits<ParquetType>::ArrayType;
using CType = typename BM_SpacedEncodingTraits<ParquetType>::CType;
const int num_values = static_cast<int>(state.range(0));
const double null_percent = static_cast<double>(state.range(1)) / 10000.0;
auto rand = ::arrow::random::RandomArrayGenerator(1923);
const auto array = rand.Numeric<ArrowType>(num_values, -100, 100, null_percent);
const auto valid_bits = array->null_bitmap_data();
const auto array_actual = ::arrow::internal::checked_pointer_cast<ArrayType>(array);
const auto raw_values = array_actual->raw_values();
// Guarantee the type cast between raw_values and input of PutSpaced.
static_assert(sizeof(CType) == sizeof(*raw_values), "Type mismatch");
// Cast only happens for BooleanType as it use UInt8 for the array data to match a bool*
// input to PutSpaced.
const auto src = reinterpret_cast<const CType*>(raw_values);
auto encoder = MakeTypedEncoder<ParquetType>(encoding);
for (auto _ : state) {
encoder->PutSpaced(src, num_values, valid_bits, 0);
encoder->FlushValues();
}
state.counters["null_percent"] = null_percent * 100;
state.SetBytesProcessed(state.iterations() * num_values * sizeof(CType));
}
template <>
void BM_EncodingSpaced<BooleanType>(benchmark::State& state, Encoding::type encoding) {
using CType = bool;
const int num_values = static_cast<int>(state.range(0));
const double null_percent = static_cast<double>(state.range(1)) / 10000.0;
auto rand = ::arrow::random::RandomArrayGenerator(1923);
const auto array = rand.Boolean(num_values, 0.5, null_percent);
const auto valid_bits = array->null_bitmap_data();
bool* output = new bool[state.range(0)];
int output_idx = 0;
PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<::arrow::BooleanType>(
*array->data(),
[&](bool value) {
output[output_idx] = value;
++output_idx;
return ::arrow::Status::OK();
},
[]() { return ::arrow::Status::OK(); }));
auto encoder = MakeTypedEncoder<BooleanType>(encoding);
for (auto _ : state) {
encoder->PutSpaced(output, num_values, valid_bits, 0);
encoder->FlushValues();
}
state.counters["null_percent"] = null_percent * 100;
state.SetBytesProcessed(state.iterations() * num_values * sizeof(CType));
delete[] output;
}
template <typename ParquetType>
static void BM_PlainEncodingSpaced(benchmark::State& state) {
BM_EncodingSpaced<ParquetType>(state, Encoding::PLAIN);
}
static void BM_PlainEncodingSpacedBoolean(benchmark::State& state) {
BM_PlainEncodingSpaced<BooleanType>(state);
}
BENCHMARK(BM_PlainEncodingSpacedBoolean)->Apply(BM_SpacedArgs);
static void BM_PlainEncodingSpacedFloat(benchmark::State& state) {
BM_PlainEncodingSpaced<FloatType>(state);
}
BENCHMARK(BM_PlainEncodingSpacedFloat)->Apply(BM_SpacedArgs);
static void BM_PlainEncodingSpacedDouble(benchmark::State& state) {
BM_PlainEncodingSpaced<DoubleType>(state);
}
BENCHMARK(BM_PlainEncodingSpacedDouble)->Apply(BM_SpacedArgs);
template <typename ParquetType>
static void BM_DecodingSpaced(benchmark::State& state, Encoding::type encoding) {
using ArrowType = typename BM_SpacedEncodingTraits<ParquetType>::ArrowType;
using ArrayType = typename BM_SpacedEncodingTraits<ParquetType>::ArrayType;
using CType = typename BM_SpacedEncodingTraits<ParquetType>::CType;
const int num_values = static_cast<int>(state.range(0));
const auto null_percent = static_cast<double>(state.range(1)) / 10000.0;
auto rand = ::arrow::random::RandomArrayGenerator(1923);
std::shared_ptr<::arrow::Array> array;
if constexpr (std::is_same_v<ParquetType, BooleanType>) {
array = rand.Boolean(num_values, /*true_probability*/ 0.5, null_percent);
} else {
array = rand.Numeric<ArrowType>(num_values, -100, 100, null_percent);
}
const auto valid_bits = array->null_bitmap_data();
const int null_count = static_cast<int>(array->null_count());
const auto array_actual = ::arrow::internal::checked_pointer_cast<ArrayType>(array);
auto encoder = MakeTypedEncoder<ParquetType>(encoding);
encoder->Put(*array);
std::shared_ptr<Buffer> buf = encoder->FlushValues();
auto decoder = MakeTypedDecoder<ParquetType>(encoding);
std::vector<uint8_t> decode_values(num_values * sizeof(CType));
auto decode_buf = reinterpret_cast<CType*>(decode_values.data());
for (auto _ : state) {
decoder->SetData(num_values - null_count, buf->data(), static_cast<int>(buf->size()));
decoder->DecodeSpaced(decode_buf, num_values, null_count, valid_bits, 0);
}
state.counters["null_percent"] = null_percent * 100;
state.SetBytesProcessed(state.iterations() * num_values * sizeof(CType));
}
template <typename ParquetType>
static void BM_PlainDecodingSpaced(benchmark::State& state) {
BM_DecodingSpaced<ParquetType>(state, Encoding::PLAIN);
}
static void BM_PlainDecodingSpacedBoolean(benchmark::State& state) {
BM_PlainDecodingSpaced<BooleanType>(state);
}
BENCHMARK(BM_PlainDecodingSpacedBoolean)->Apply(BM_SpacedArgs);
static void BM_PlainDecodingSpacedFloat(benchmark::State& state) {
BM_PlainDecodingSpaced<FloatType>(state);
}
BENCHMARK(BM_PlainDecodingSpacedFloat)->Apply(BM_SpacedArgs);
static void BM_PlainDecodingSpacedDouble(benchmark::State& state) {
BM_PlainDecodingSpaced<DoubleType>(state);
}
BENCHMARK(BM_PlainDecodingSpacedDouble)->Apply(BM_SpacedArgs);
template <typename T>
struct ByteStreamSplitDummyValue {
static constexpr T value() { return static_cast<T>(42); }
};
template <typename T, size_t N>
struct ByteStreamSplitDummyValue<std::array<T, N>> {
using Array = std::array<T, N>;
static constexpr Array value() {
Array array{};
array.fill(ByteStreamSplitDummyValue<T>::value());
return array;
}
};
template <typename T, typename DecodeFunc>
static void BM_ByteStreamSplitDecode(benchmark::State& state, DecodeFunc&& decode_func) {
const std::vector<T> values(state.range(0), ByteStreamSplitDummyValue<T>::value());
const uint8_t* values_raw = reinterpret_cast<const uint8_t*>(values.data());
std::vector<T> output(state.range(0));
for (auto _ : state) {
decode_func(values_raw,
/*width=*/static_cast<int>(sizeof(T)),
/*num_values=*/static_cast<int64_t>(values.size()),
/*stride=*/static_cast<int64_t>(values.size()),
reinterpret_cast<uint8_t*>(output.data()));
benchmark::ClobberMemory();
}
state.SetBytesProcessed(state.iterations() * values.size() * sizeof(T));
state.SetItemsProcessed(state.iterations() * values.size());
}
template <typename T, typename EncodeFunc>
static void BM_ByteStreamSplitEncode(benchmark::State& state, EncodeFunc&& encode_func) {
const std::vector<T> values(state.range(0), ByteStreamSplitDummyValue<T>::value());
const uint8_t* values_raw = reinterpret_cast<const uint8_t*>(values.data());
std::vector<uint8_t> output(state.range(0) * sizeof(T));
for (auto _ : state) {
encode_func(values_raw, /*width=*/static_cast<int>(sizeof(T)), values.size(),
output.data());
benchmark::ClobberMemory();
}
state.SetBytesProcessed(state.iterations() * values.size() * sizeof(T));
state.SetItemsProcessed(state.iterations() * values.size());
}
static void BM_ByteStreamSplitDecode_Float_Generic(benchmark::State& state) {
BM_ByteStreamSplitDecode<float>(state, ::arrow::util::internal::ByteStreamSplitDecode);
}
static void BM_ByteStreamSplitDecode_Double_Generic(benchmark::State& state) {
BM_ByteStreamSplitDecode<double>(state, ::arrow::util::internal::ByteStreamSplitDecode);
}
template <int N>
static void BM_ByteStreamSplitDecode_FLBA_Generic(benchmark::State& state) {
BM_ByteStreamSplitDecode<std::array<int8_t, N>>(
state, ::arrow::util::internal::ByteStreamSplitDecode);
}
static void BM_ByteStreamSplitEncode_Float_Generic(benchmark::State& state) {
BM_ByteStreamSplitEncode<float>(state, ::arrow::util::internal::ByteStreamSplitEncode);
}
static void BM_ByteStreamSplitEncode_Double_Generic(benchmark::State& state) {
BM_ByteStreamSplitEncode<double>(state, ::arrow::util::internal::ByteStreamSplitEncode);
}
template <int N>
static void BM_ByteStreamSplitEncode_FLBA_Generic(benchmark::State& state) {
BM_ByteStreamSplitEncode<std::array<int8_t, N>>(
state, ::arrow::util::internal::ByteStreamSplitEncode);
}
static void BM_ByteStreamSplitDecode_Float_Scalar(benchmark::State& state) {
BM_ByteStreamSplitDecode<float>(
state, ::arrow::util::internal::ByteStreamSplitDecodeScalar<sizeof(float)>);
}
static void BM_ByteStreamSplitDecode_Double_Scalar(benchmark::State& state) {
BM_ByteStreamSplitDecode<double>(
state, ::arrow::util::internal::ByteStreamSplitDecodeScalar<sizeof(double)>);
}
static void BM_ByteStreamSplitEncode_Float_Scalar(benchmark::State& state) {
BM_ByteStreamSplitEncode<float>(
state, ::arrow::util::internal::ByteStreamSplitEncodeScalar<sizeof(float)>);
}
static void BM_ByteStreamSplitEncode_Double_Scalar(benchmark::State& state) {
BM_ByteStreamSplitEncode<double>(
state, ::arrow::util::internal::ByteStreamSplitEncodeScalar<sizeof(double)>);
}
static void ByteStreamSplitApply(::benchmark::internal::Benchmark* bench) {
// Reduce the number of variations by only testing the two range ends.
bench->Arg(MIN_RANGE)->Arg(MAX_RANGE);
}
BENCHMARK(BM_ByteStreamSplitDecode_Float_Generic)->Apply(ByteStreamSplitApply);
BENCHMARK(BM_ByteStreamSplitDecode_Double_Generic)->Apply(ByteStreamSplitApply);
BENCHMARK_TEMPLATE(BM_ByteStreamSplitDecode_FLBA_Generic, 2)->Apply(ByteStreamSplitApply);
BENCHMARK_TEMPLATE(BM_ByteStreamSplitDecode_FLBA_Generic, 7)->Apply(ByteStreamSplitApply);
BENCHMARK_TEMPLATE(BM_ByteStreamSplitDecode_FLBA_Generic, 16)
->Apply(ByteStreamSplitApply);
BENCHMARK(BM_ByteStreamSplitEncode_Float_Generic)->Apply(ByteStreamSplitApply);
BENCHMARK(BM_ByteStreamSplitEncode_Double_Generic)->Apply(ByteStreamSplitApply);
BENCHMARK_TEMPLATE(BM_ByteStreamSplitEncode_FLBA_Generic, 2)->Apply(ByteStreamSplitApply);
BENCHMARK_TEMPLATE(BM_ByteStreamSplitEncode_FLBA_Generic, 7)->Apply(ByteStreamSplitApply);
BENCHMARK_TEMPLATE(BM_ByteStreamSplitEncode_FLBA_Generic, 16)
->Apply(ByteStreamSplitApply);
BENCHMARK(BM_ByteStreamSplitDecode_Float_Scalar)->Apply(ByteStreamSplitApply);
BENCHMARK(BM_ByteStreamSplitDecode_Double_Scalar)->Apply(ByteStreamSplitApply);
BENCHMARK(BM_ByteStreamSplitEncode_Float_Scalar)->Apply(ByteStreamSplitApply);
BENCHMARK(BM_ByteStreamSplitEncode_Double_Scalar)->Apply(ByteStreamSplitApply);
#if defined(ARROW_HAVE_SSE4_2)
static void BM_ByteStreamSplitDecode_Float_Sse2(benchmark::State& state) {
BM_ByteStreamSplitDecode<float>(
state, ::arrow::util::internal::ByteStreamSplitDecodeSimd128<sizeof(float)>);
}
static void BM_ByteStreamSplitDecode_Double_Sse2(benchmark::State& state) {
BM_ByteStreamSplitDecode<double>(
state, ::arrow::util::internal::ByteStreamSplitDecodeSimd128<sizeof(double)>);
}
static void BM_ByteStreamSplitEncode_Float_Sse2(benchmark::State& state) {
BM_ByteStreamSplitEncode<float>(
state, ::arrow::util::internal::ByteStreamSplitEncodeSimd128<sizeof(float)>);
}
static void BM_ByteStreamSplitEncode_Double_Sse2(benchmark::State& state) {
BM_ByteStreamSplitEncode<double>(
state, ::arrow::util::internal::ByteStreamSplitEncodeSimd128<sizeof(double)>);
}
BENCHMARK(BM_ByteStreamSplitDecode_Float_Sse2)->Apply(ByteStreamSplitApply);
BENCHMARK(BM_ByteStreamSplitDecode_Double_Sse2)->Apply(ByteStreamSplitApply);
BENCHMARK(BM_ByteStreamSplitEncode_Float_Sse2)->Apply(ByteStreamSplitApply);
BENCHMARK(BM_ByteStreamSplitEncode_Double_Sse2)->Apply(ByteStreamSplitApply);
#endif
#if defined(ARROW_HAVE_AVX2)
static void BM_ByteStreamSplitDecode_Float_Avx2(benchmark::State& state) {
BM_ByteStreamSplitDecode<float>(
state, ::arrow::util::internal::ByteStreamSplitDecodeAvx2<sizeof(float)>);
}
static void BM_ByteStreamSplitDecode_Double_Avx2(benchmark::State& state) {
BM_ByteStreamSplitDecode<double>(
state, ::arrow::util::internal::ByteStreamSplitDecodeAvx2<sizeof(double)>);
}
static void BM_ByteStreamSplitEncode_Float_Avx2(benchmark::State& state) {
BM_ByteStreamSplitEncode<float>(
state, ::arrow::util::internal::ByteStreamSplitEncodeAvx2<sizeof(float)>);
}
static void BM_ByteStreamSplitEncode_Double_Avx2(benchmark::State& state) {
BM_ByteStreamSplitEncode<double>(
state, ::arrow::util::internal::ByteStreamSplitEncodeAvx2<sizeof(double)>);
}
BENCHMARK(BM_ByteStreamSplitDecode_Float_Avx2)->Apply(ByteStreamSplitApply);
BENCHMARK(BM_ByteStreamSplitDecode_Double_Avx2)->Apply(ByteStreamSplitApply);
BENCHMARK(BM_ByteStreamSplitEncode_Float_Avx2)->Apply(ByteStreamSplitApply);
BENCHMARK(BM_ByteStreamSplitEncode_Double_Avx2)->Apply(ByteStreamSplitApply);
#endif
#if defined(ARROW_HAVE_NEON)
static void BM_ByteStreamSplitDecode_Float_Neon(benchmark::State& state) {
BM_ByteStreamSplitDecode<float>(
state, ::arrow::util::internal::ByteStreamSplitDecodeSimd128<sizeof(float)>);
}
static void BM_ByteStreamSplitDecode_Double_Neon(benchmark::State& state) {
BM_ByteStreamSplitDecode<double>(
state, ::arrow::util::internal::ByteStreamSplitDecodeSimd128<sizeof(double)>);
}
static void BM_ByteStreamSplitEncode_Float_Neon(benchmark::State& state) {
BM_ByteStreamSplitEncode<float>(
state, ::arrow::util::internal::ByteStreamSplitEncodeSimd128<sizeof(float)>);
}
static void BM_ByteStreamSplitEncode_Double_Neon(benchmark::State& state) {
BM_ByteStreamSplitEncode<double>(
state, ::arrow::util::internal::ByteStreamSplitEncodeSimd128<sizeof(double)>);
}
BENCHMARK(BM_ByteStreamSplitDecode_Float_Neon)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK(BM_ByteStreamSplitDecode_Double_Neon)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK(BM_ByteStreamSplitEncode_Float_Neon)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK(BM_ByteStreamSplitEncode_Double_Neon)->Range(MIN_RANGE, MAX_RANGE);
#endif
template <typename DType>
static auto MakeDeltaBitPackingInputFixed(size_t length) {
using T = typename DType::c_type;
return std::vector<T>(length, 42);
}
template <typename DType>
static auto MakeDeltaBitPackingInputNarrow(size_t length) {
using T = typename DType::c_type;
auto numbers = std::vector<T>(length);
::arrow::randint<T, T>(length, 0, 1000, &numbers);
return numbers;
}
template <typename DType>
static auto MakeDeltaBitPackingInputWide(size_t length) {
using T = typename DType::c_type;
auto numbers = std::vector<T>(length);
::arrow::randint<T, T>(length, std::numeric_limits<T>::min() >> 2,
std::numeric_limits<T>::max() >> 2, &numbers);
return numbers;
}
template <typename DType, typename NumberGenerator>
static void BM_DeltaBitPackingEncode(benchmark::State& state, NumberGenerator gen) {
using T = typename DType::c_type;
std::vector<T> values = gen(state.range(0));
auto encoder = MakeTypedEncoder<DType>(Encoding::DELTA_BINARY_PACKED);
for (auto _ : state) {
encoder->Put(values.data(), static_cast<int>(values.size()));
encoder->FlushValues();
}
state.SetBytesProcessed(state.iterations() * values.size() * sizeof(T));
state.SetItemsProcessed(state.iterations() * values.size());
}
static void BM_DeltaBitPackingEncode_Int32_Fixed(benchmark::State& state) {
BM_DeltaBitPackingEncode<Int32Type>(state, MakeDeltaBitPackingInputFixed<Int32Type>);
}
static void BM_DeltaBitPackingEncode_Int64_Fixed(benchmark::State& state) {
BM_DeltaBitPackingEncode<Int64Type>(state, MakeDeltaBitPackingInputFixed<Int64Type>);
}
static void BM_DeltaBitPackingEncode_Int32_Narrow(benchmark::State& state) {
BM_DeltaBitPackingEncode<Int32Type>(state, MakeDeltaBitPackingInputNarrow<Int32Type>);
}
static void BM_DeltaBitPackingEncode_Int64_Narrow(benchmark::State& state) {
BM_DeltaBitPackingEncode<Int64Type>(state, MakeDeltaBitPackingInputNarrow<Int64Type>);
}
static void BM_DeltaBitPackingEncode_Int32_Wide(benchmark::State& state) {
BM_DeltaBitPackingEncode<Int32Type>(state, MakeDeltaBitPackingInputWide<Int32Type>);
}
static void BM_DeltaBitPackingEncode_Int64_Wide(benchmark::State& state) {
BM_DeltaBitPackingEncode<Int64Type>(state, MakeDeltaBitPackingInputWide<Int64Type>);
}
BENCHMARK(BM_DeltaBitPackingEncode_Int32_Fixed)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK(BM_DeltaBitPackingEncode_Int64_Fixed)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK(BM_DeltaBitPackingEncode_Int32_Narrow)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK(BM_DeltaBitPackingEncode_Int64_Narrow)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK(BM_DeltaBitPackingEncode_Int32_Wide)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK(BM_DeltaBitPackingEncode_Int64_Wide)->Range(MIN_RANGE, MAX_RANGE);
template <typename DType, typename NumberGenerator>
static void BM_DeltaBitPackingDecode(benchmark::State& state, NumberGenerator gen) {
using T = typename DType::c_type;
std::vector<T> values = gen(state.range(0));
auto encoder = MakeTypedEncoder<DType>(Encoding::DELTA_BINARY_PACKED);
encoder->Put(values.data(), static_cast<int>(values.size()));
std::shared_ptr<Buffer> buf = encoder->FlushValues();
auto decoder = MakeTypedDecoder<DType>(Encoding::DELTA_BINARY_PACKED);
for (auto _ : state) {
decoder->SetData(static_cast<int>(values.size()), buf->data(),
static_cast<int>(buf->size()));
decoder->Decode(values.data(), static_cast<int>(values.size()));
}
state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(T));
state.SetItemsProcessed(state.iterations() * state.range(0));
}
static void BM_DeltaBitPackingDecode_Int32_Fixed(benchmark::State& state) {
BM_DeltaBitPackingDecode<Int32Type>(state, MakeDeltaBitPackingInputFixed<Int32Type>);
}
static void BM_DeltaBitPackingDecode_Int64_Fixed(benchmark::State& state) {
BM_DeltaBitPackingDecode<Int64Type>(state, MakeDeltaBitPackingInputFixed<Int64Type>);
}
static void BM_DeltaBitPackingDecode_Int32_Narrow(benchmark::State& state) {
BM_DeltaBitPackingDecode<Int32Type>(state, MakeDeltaBitPackingInputNarrow<Int32Type>);
}
static void BM_DeltaBitPackingDecode_Int64_Narrow(benchmark::State& state) {
BM_DeltaBitPackingDecode<Int64Type>(state, MakeDeltaBitPackingInputNarrow<Int64Type>);
}
static void BM_DeltaBitPackingDecode_Int32_Wide(benchmark::State& state) {
BM_DeltaBitPackingDecode<Int32Type>(state, MakeDeltaBitPackingInputWide<Int32Type>);
}
static void BM_DeltaBitPackingDecode_Int64_Wide(benchmark::State& state) {
BM_DeltaBitPackingDecode<Int64Type>(state, MakeDeltaBitPackingInputWide<Int64Type>);
}
BENCHMARK(BM_DeltaBitPackingDecode_Int32_Fixed)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK(BM_DeltaBitPackingDecode_Int64_Fixed)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK(BM_DeltaBitPackingDecode_Int32_Narrow)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK(BM_DeltaBitPackingDecode_Int64_Narrow)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK(BM_DeltaBitPackingDecode_Int32_Wide)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK(BM_DeltaBitPackingDecode_Int64_Wide)->Range(MIN_RANGE, MAX_RANGE);
static void ByteArrayCustomArguments(benchmark::internal::Benchmark* b) {
b->ArgsProduct({{8, 64, 1024}, {512, 2048}})
->ArgNames({"max-string-length", "batch-size"});
}
void EncodingByteArrayBenchmark(benchmark::State& state, Encoding::type encoding) {
::arrow::random::RandomArrayGenerator rag(0);
// Using arrow generator to generate random data.
int32_t max_length = static_cast<int32_t>(state.range(0));
int32_t array_size = static_cast<int32_t>(state.range(1));
auto array =
rag.String(/* size */ array_size, /* min_length */ 0, /* max_length */ max_length,
/* null_probability */ 0);
const auto array_actual =
::arrow::internal::checked_pointer_cast<::arrow::StringArray>(array);
auto encoder = MakeTypedEncoder<ByteArrayType>(encoding);
std::vector<ByteArray> values;
for (int i = 0; i < array_actual->length(); ++i) {
values.emplace_back(array_actual->GetView(i));
}
for (auto _ : state) {
encoder->Put(values.data(), static_cast<int>(values.size()));
encoder->FlushValues();
}
state.SetItemsProcessed(state.iterations() * array_actual->length());
state.SetBytesProcessed(state.iterations() * (array_actual->value_data()->size() +
array_actual->value_offsets()->size()));
}
static void BM_DeltaLengthEncodingByteArray(benchmark::State& state) {
EncodingByteArrayBenchmark(state, Encoding::DELTA_LENGTH_BYTE_ARRAY);
}
static void BM_PlainEncodingByteArray(benchmark::State& state) {
EncodingByteArrayBenchmark(state, Encoding::PLAIN);
}
void DecodingByteArrayBenchmark(benchmark::State& state, Encoding::type encoding) {
::arrow::random::RandomArrayGenerator rag(0);
int32_t max_length = static_cast<int32_t>(state.range(0));
int32_t array_size = static_cast<int32_t>(state.range(1));
// Using arrow to write, because we just benchmark decoding here.
auto array =
rag.String(/* size */ array_size, /* min_length */ 0, /* max_length */ max_length,
/* null_probability */ 0);
const auto array_actual =
::arrow::internal::checked_pointer_cast<::arrow::StringArray>(array);
auto encoder = MakeTypedEncoder<ByteArrayType>(encoding);
encoder->Put(*array);
std::shared_ptr<Buffer> buf = encoder->FlushValues();
std::vector<ByteArray> values;
values.resize(array->length());
for (auto _ : state) {
auto decoder = MakeTypedDecoder<ByteArrayType>(encoding);
decoder->SetData(static_cast<int>(array->length()), buf->data(),
static_cast<int>(buf->size()));
decoder->Decode(values.data(), static_cast<int>(values.size()));
::benchmark::DoNotOptimize(values);
}
state.SetItemsProcessed(state.iterations() * array->length());
state.SetBytesProcessed(state.iterations() * (array_actual->value_data()->size() +
array_actual->value_offsets()->size()));
}
static void BM_PlainDecodingByteArray(benchmark::State& state) {
DecodingByteArrayBenchmark(state, Encoding::PLAIN);
}
static void BM_DeltaLengthDecodingByteArray(benchmark::State& state) {
DecodingByteArrayBenchmark(state, Encoding::DELTA_LENGTH_BYTE_ARRAY);
}
BENCHMARK(BM_PlainEncodingByteArray)->Apply(ByteArrayCustomArguments);
BENCHMARK(BM_DeltaLengthEncodingByteArray)->Apply(ByteArrayCustomArguments);
BENCHMARK(BM_PlainDecodingByteArray)->Apply(ByteArrayCustomArguments);
BENCHMARK(BM_DeltaLengthDecodingByteArray)->Apply(ByteArrayCustomArguments);
static void BM_DecodingByteArraySpaced(benchmark::State& state, Encoding::type encoding) {
const double null_percent = 0.02;
auto rand = ::arrow::random::RandomArrayGenerator(0);
int32_t max_length = static_cast<int32_t>(state.range(0));
int32_t num_values = static_cast<int32_t>(state.range(1));
const auto array = rand.String(num_values, /* min_length */ 0,
/* max_length */ max_length, null_percent);
const auto valid_bits = array->null_bitmap_data();
const int null_count = static_cast<int>(array->null_count());
const auto array_actual =
::arrow::internal::checked_pointer_cast<::arrow::StringArray>(array);
std::vector<ByteArray> byte_arrays;
byte_arrays.reserve(array_actual->length());
for (int i = 0; i < array_actual->length(); ++i) {
byte_arrays.emplace_back(array_actual->GetView(i));
}
auto encoder = MakeTypedEncoder<ByteArrayType>(encoding);
encoder->PutSpaced(byte_arrays.data(), num_values, valid_bits, 0);
std::shared_ptr<Buffer> buf = encoder->FlushValues();
auto decoder = MakeTypedDecoder<ByteArrayType>(encoding);
std::vector<uint8_t> decode_values(num_values * sizeof(ByteArray));
auto decode_buf = reinterpret_cast<ByteArray*>(decode_values.data());
for (auto _ : state) {
decoder->SetData(num_values - null_count, buf->data(), static_cast<int>(buf->size()));
decoder->DecodeSpaced(decode_buf, num_values, null_count, valid_bits, 0);
::benchmark::DoNotOptimize(decode_buf);
}
state.counters["null_percent"] = null_percent * 100;
state.SetItemsProcessed(state.iterations() * array_actual->length());
state.SetBytesProcessed(state.iterations() * (array_actual->value_data()->size() +
array_actual->value_offsets()->size()));
}
static void BM_PlainDecodingSpacedByteArray(benchmark::State& state) {
BM_DecodingByteArraySpaced(state, Encoding::PLAIN);
}
static void BM_DeltaLengthDecodingSpacedByteArray(benchmark::State& state) {
BM_DecodingByteArraySpaced(state, Encoding::DELTA_LENGTH_BYTE_ARRAY);
}
BENCHMARK(BM_PlainDecodingSpacedByteArray)->Apply(ByteArrayCustomArguments);
BENCHMARK(BM_DeltaLengthDecodingSpacedByteArray)->Apply(ByteArrayCustomArguments);
struct DeltaByteArrayState {
int32_t min_size = 0;
int32_t max_size;
int32_t array_length;
int32_t total_data_size = 0;
double prefixed_probability;
std::vector<uint8_t> buf;
explicit DeltaByteArrayState(const benchmark::State& state)
: max_size(static_cast<int32_t>(state.range(0))),
array_length(static_cast<int32_t>(state.range(1))),
prefixed_probability(state.range(2) / 100.0) {}
std::vector<ByteArray> MakeRandomByteArray(uint32_t seed) {
std::default_random_engine gen(seed);
std::uniform_int_distribution<int> dist_size(min_size, max_size);
std::uniform_int_distribution<int> dist_byte(0, 255);
std::bernoulli_distribution dist_has_prefix(prefixed_probability);
std::uniform_real_distribution<double> dist_prefix_length(0, 1);
std::vector<ByteArray> out(array_length);
buf.resize(max_size * array_length);
auto buf_ptr = buf.data();
total_data_size = 0;
for (int32_t i = 0; i < array_length; ++i) {
int len = dist_size(gen);
out[i].len = len;
out[i].ptr = buf_ptr;
bool do_prefix = i > 0 && dist_has_prefix(gen);
int prefix_len = 0;
if (do_prefix) {
int max_prefix_len = std::min(len, static_cast<int>(out[i - 1].len));
prefix_len =
static_cast<int>(std::ceil(max_prefix_len * dist_prefix_length(gen)));
}
for (int j = 0; j < prefix_len; ++j) {
buf_ptr[j] = out[i - 1].ptr[j];
}
for (int j = prefix_len; j < len; ++j) {
buf_ptr[j] = static_cast<uint8_t>(dist_byte(gen));
}
buf_ptr += len;
total_data_size += len;
}
return out;
}
};
static void BM_DeltaEncodingByteArray(benchmark::State& state) {
DeltaByteArrayState delta_state(state);
std::vector<ByteArray> values = delta_state.MakeRandomByteArray(/*seed=*/42);
auto encoder = MakeTypedEncoder<ByteArrayType>(Encoding::DELTA_BYTE_ARRAY);
const int64_t plain_encoded_size =
delta_state.total_data_size + 4 * delta_state.array_length;
int64_t encoded_size = 0;
for (auto _ : state) {
encoder->Put(values.data(), static_cast<int>(values.size()));
encoded_size = encoder->FlushValues()->size();
}
state.SetItemsProcessed(state.iterations() * delta_state.array_length);
state.SetBytesProcessed(state.iterations() * delta_state.total_data_size);
state.counters["compression_ratio"] =
static_cast<double>(plain_encoded_size) / encoded_size;
}
static void BM_DeltaDecodingByteArray(benchmark::State& state) {
DeltaByteArrayState delta_state(state);
std::vector<ByteArray> values = delta_state.MakeRandomByteArray(/*seed=*/42);
auto encoder = MakeTypedEncoder<ByteArrayType>(Encoding::DELTA_BYTE_ARRAY);
encoder->Put(values.data(), static_cast<int>(values.size()));
std::shared_ptr<Buffer> buf = encoder->FlushValues();
const int64_t plain_encoded_size =
delta_state.total_data_size + 4 * delta_state.array_length;
const int64_t encoded_size = buf->size();
auto decoder = MakeTypedDecoder<ByteArrayType>(Encoding::DELTA_BYTE_ARRAY);
for (auto _ : state) {
decoder->SetData(delta_state.array_length, buf->data(),
static_cast<int>(buf->size()));
decoder->Decode(values.data(), static_cast<int>(values.size()));
::benchmark::DoNotOptimize(values);
}
state.SetItemsProcessed(state.iterations() * delta_state.array_length);
state.SetBytesProcessed(state.iterations() * delta_state.total_data_size);
state.counters["compression_ratio"] =
static_cast<double>(plain_encoded_size) / encoded_size;
}
static void ByteArrayDeltaCustomArguments(benchmark::internal::Benchmark* b) {
for (int max_string_length : {8, 64, 1024}) {
for (int batch_size : {512, 2048}) {
for (int prefixed_percent : {10, 90, 99}) {
b->Args({max_string_length, batch_size, prefixed_percent});
}
}
}
b->ArgNames({"max-string-length", "batch-size", "prefixed-percent"});
}
BENCHMARK(BM_DeltaEncodingByteArray)->Apply(ByteArrayDeltaCustomArguments);
BENCHMARK(BM_DeltaDecodingByteArray)->Apply(ByteArrayDeltaCustomArguments);
static void BM_RleEncodingBoolean(benchmark::State& state) {
std::vector<bool> values(state.range(0), true);
auto encoder = MakeEncoder(Type::BOOLEAN, Encoding::RLE);
auto typed_encoder = dynamic_cast<BooleanEncoder*>(encoder.get());
for (auto _ : state) {
typed_encoder->Put(values, static_cast<int>(values.size()));
typed_encoder->FlushValues();
}
state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool));
}
BENCHMARK(BM_RleEncodingBoolean)->Range(MIN_RANGE, MAX_RANGE);
static void BM_RleDecodingBoolean(benchmark::State& state) {
std::vector<bool> values(state.range(0), true);
bool* output = new bool[state.range(0)];
auto encoder = MakeEncoder(Type::BOOLEAN, Encoding::RLE);
auto typed_encoder = dynamic_cast<BooleanEncoder*>(encoder.get());
typed_encoder->Put(values, static_cast<int>(values.size()));
std::shared_ptr<Buffer> buf = encoder->FlushValues();
auto decoder = MakeTypedDecoder<BooleanType>(Encoding::RLE);
for (auto _ : state) {
decoder->SetData(static_cast<int>(values.size()), buf->data(),
static_cast<int>(buf->size()));
decoder->Decode(output, static_cast<int>(values.size()));
}
state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool));
delete[] output;
}
BENCHMARK(BM_RleDecodingBoolean)->Range(MIN_RANGE, MAX_RANGE);
static void BM_RleEncodingSpacedBoolean(benchmark::State& state) {
BM_EncodingSpaced<BooleanType>(state, Encoding::RLE);
}
BENCHMARK(BM_RleEncodingSpacedBoolean)->Apply(BM_SpacedArgs);
static void BM_RleDecodingSpacedBoolean(benchmark::State& state) {
BM_DecodingSpaced<BooleanType>(state, Encoding::RLE);
}
BENCHMARK(BM_RleDecodingSpacedBoolean)->Apply(BM_SpacedArgs);
template <typename Type>
static void EncodeDict(const std::vector<typename Type::c_type>& values,
benchmark::State& state) {
using T = typename Type::c_type;
int num_values = static_cast<int>(values.size());
MemoryPool* allocator = default_memory_pool();
std::shared_ptr<ColumnDescriptor> descr = Int64Schema(Repetition::REQUIRED);
auto base_encoder = MakeEncoder(Type::type_num, Encoding::RLE_DICTIONARY,
/*use_dictionary=*/true, descr.get(), allocator);
auto encoder =
dynamic_cast<typename EncodingTraits<Type>::Encoder*>(base_encoder.get());
for (auto _ : state) {
encoder->Put(values.data(), num_values);
encoder->FlushValues();
}
state.SetBytesProcessed(state.iterations() * num_values * sizeof(T));
state.SetItemsProcessed(state.iterations() * num_values);
}
template <typename Type>
static void DecodeDict(const std::vector<typename Type::c_type>& values,
benchmark::State& state) {
typedef typename Type::c_type T;
int num_values = static_cast<int>(values.size());
MemoryPool* allocator = default_memory_pool();
std::shared_ptr<ColumnDescriptor> descr = Int64Schema(Repetition::REQUIRED);
auto base_encoder =
MakeEncoder(Type::type_num, Encoding::PLAIN, true, descr.get(), allocator);
auto encoder =
dynamic_cast<typename EncodingTraits<Type>::Encoder*>(base_encoder.get());
auto dict_traits = dynamic_cast<DictEncoder<Type>*>(base_encoder.get());
encoder->Put(values.data(), num_values);
std::shared_ptr<ResizableBuffer> dict_buffer =
AllocateBuffer(allocator, dict_traits->dict_encoded_size());
std::shared_ptr<ResizableBuffer> indices =
AllocateBuffer(allocator, encoder->EstimatedDataEncodedSize());
dict_traits->WriteDict(dict_buffer->mutable_data());
int actual_bytes = dict_traits->WriteIndices(indices->mutable_data(),
static_cast<int>(indices->size()));
PARQUET_THROW_NOT_OK(indices->Resize(actual_bytes));
std::vector<T> decoded_values(num_values);
for (auto _ : state) {
auto dict_decoder = MakeTypedDecoder<Type>(Encoding::PLAIN, descr.get());
dict_decoder->SetData(dict_traits->num_entries(), dict_buffer->data(),
static_cast<int>(dict_buffer->size()));
auto decoder = MakeDictDecoder<Type>(descr.get());
decoder->SetDict(dict_decoder.get());
decoder->SetData(num_values, indices->data(), static_cast<int>(indices->size()));
decoder->Decode(decoded_values.data(), num_values);
}
state.SetBytesProcessed(state.iterations() * num_values * sizeof(T));
state.SetItemsProcessed(state.iterations() * num_values);
}
static void BM_DictDecodingInt64_repeats(benchmark::State& state) {
typedef Int64Type Type;
typedef typename Type::c_type T;
std::vector<T> values(state.range(0), 64);
DecodeDict<Type>(values, state);
}
BENCHMARK(BM_DictDecodingInt64_repeats)->Range(MIN_RANGE, MAX_RANGE);
static void BM_DictEncodingInt64_repeats(benchmark::State& state) {
typedef Int64Type Type;
typedef typename Type::c_type T;
std::vector<T> values(state.range(0), 64);
EncodeDict<Type>(values, state);
}
BENCHMARK(BM_DictEncodingInt64_repeats)->Range(MIN_RANGE, MAX_RANGE);
static void BM_DictDecodingInt64_literals(benchmark::State& state) {
typedef Int64Type Type;
typedef typename Type::c_type T;
std::vector<T> values(state.range(0));
std::iota(values.begin(), values.end(), 0);
DecodeDict<Type>(values, state);
}
BENCHMARK(BM_DictDecodingInt64_literals)->Range(MIN_RANGE, MAX_RANGE);
static void BM_DictEncodingInt64_literals(benchmark::State& state) {
using Type = Int64Type;
using T = typename Type::c_type;
std::vector<T> values(state.range(0));
std::iota(values.begin(), values.end(), 0);
EncodeDict<Type>(values, state);
}
BENCHMARK(BM_DictEncodingInt64_literals)->Range(MIN_RANGE, MAX_RANGE);
static void BM_DictDecodingByteArray(benchmark::State& state) {
::arrow::random::RandomArrayGenerator rag(0);
// Using arrow generator to generate random data.
int32_t max_length = static_cast<int32_t>(state.range(0));
int32_t array_size = static_cast<int32_t>(state.range(1));
auto array =
rag.String(/* size */ array_size, /* min_length */ 0, /* max_length */ max_length,
/* null_probability */ 0);
const auto array_actual =
::arrow::internal::checked_pointer_cast<::arrow::StringArray>(array);
auto encoder = MakeDictDecoder<ByteArrayType>();
std::vector<ByteArray> values;
for (int i = 0; i < array_actual->length(); ++i) {
values.emplace_back(array_actual->GetView(i));
}
DecodeDict<ByteArrayType>(values, state);
state.SetItemsProcessed(state.iterations() * array_actual->length());
state.SetBytesProcessed(state.iterations() * (array_actual->value_data()->size() +
array_actual->value_offsets()->size()));
}
BENCHMARK(BM_DictDecodingByteArray)->Apply(ByteArrayCustomArguments);
// ----------------------------------------------------------------------
// Shared benchmarks for decoding using arrow builders
using ::arrow::BinaryBuilder;
using ::arrow::BinaryDictionary32Builder;
template <typename ParquetType>
class BenchmarkDecodeArrowBase : public ::benchmark::Fixture {
public:
virtual ~BenchmarkDecodeArrowBase() = default;
void SetUp(const ::benchmark::State& state) override {
num_values_ = static_cast<int>(state.range());
InitDataInputs();
DoEncodeArrow();
}
void TearDown(const ::benchmark::State& state) override {
buffer_.reset();
input_array_.reset();
values_.clear();
}
virtual void InitDataInputs() = 0;
virtual void DoEncodeArrow() = 0;
virtual void DoEncodeLowLevel() = 0;
virtual std::unique_ptr<TypedDecoder<ParquetType>> InitializeDecoder() = 0;
virtual typename EncodingTraits<ParquetType>::Accumulator CreateAccumulator() = 0;
void EncodeArrowBenchmark(benchmark::State& state) {
for (auto _ : state) {
DoEncodeArrow();
}
state.SetBytesProcessed(state.iterations() * total_size_);
state.SetItemsProcessed(state.iterations() * num_values_);
}
void EncodeLowLevelBenchmark(benchmark::State& state) {
for (auto _ : state) {
DoEncodeLowLevel();
}
state.SetBytesProcessed(state.iterations() * total_size_);
state.SetItemsProcessed(state.iterations() * num_values_);
}
void DecodeArrowDenseBenchmark(benchmark::State& state) {
for (auto _ : state) {
auto decoder = InitializeDecoder();
auto acc = CreateAccumulator();
decoder->DecodeArrow(num_values_, 0, valid_bits_, 0, &acc);
}
state.SetBytesProcessed(state.iterations() * total_size_);
state.SetItemsProcessed(state.iterations() * num_values_);
}
void DecodeArrowNonNullDenseBenchmark(benchmark::State& state) {
for (auto _ : state) {
auto decoder = InitializeDecoder();
auto acc = CreateAccumulator();
decoder->DecodeArrowNonNull(num_values_, &acc);
}
state.SetBytesProcessed(state.iterations() * total_size_);
state.SetItemsProcessed(state.iterations() * num_values_);
}
void DecodeArrowDictBenchmark(benchmark::State& state) {
for (auto _ : state) {
auto decoder = InitializeDecoder();
BinaryDictionary32Builder builder(default_memory_pool());
decoder->DecodeArrow(num_values_, 0, valid_bits_, 0, &builder);
}
state.SetBytesProcessed(state.iterations() * total_size_);
state.SetItemsProcessed(state.iterations() * num_values_);
}
void DecodeArrowNonNullDictBenchmark(benchmark::State& state) {
for (auto _ : state) {
auto decoder = InitializeDecoder();
BinaryDictionary32Builder builder(default_memory_pool());
decoder->DecodeArrowNonNull(num_values_, &builder);
}
state.SetBytesProcessed(state.iterations() * total_size_);
state.SetItemsProcessed(state.iterations() * num_values_);
}
protected:
int num_values_{0};
std::shared_ptr<::arrow::Array> input_array_;
uint64_t total_size_{0};
const uint8_t* valid_bits_{nullptr};
std::shared_ptr<Buffer> buffer_;
std::vector<typename ParquetType::c_type> values_;
};
class BenchmarkDecodeArrowByteArray : public BenchmarkDecodeArrowBase<ByteArrayType> {
public:
using ByteArrayAccumulator = typename EncodingTraits<ByteArrayType>::Accumulator;
ByteArrayAccumulator CreateAccumulator() final {
ByteArrayAccumulator acc;
acc.builder = std::make_unique<BinaryBuilder>(default_memory_pool());
return acc;
}
void InitDataInputs() final {
// Generate a random string dictionary without any nulls so that this dataset can
// be used for benchmarking the DecodeArrowNonNull API
constexpr int repeat_factor = 8;
constexpr int64_t min_length = 2;
constexpr int64_t max_length = 10;
::arrow::random::RandomArrayGenerator rag(0);
input_array_ = rag.StringWithRepeats(num_values_, num_values_ / repeat_factor,
min_length, max_length, /*null_probability=*/0);
valid_bits_ = input_array_->null_bitmap_data();
total_size_ = input_array_->data()->buffers[2]->size();
values_.resize(num_values_);
const auto& binary_array = static_cast<const ::arrow::BinaryArray&>(*input_array_);
for (int64_t i = 0; i < binary_array.length(); i++) {
values_[i] = binary_array.GetView(i);
}
}
protected:
std::vector<ByteArray> values_;
};
// ----------------------------------------------------------------------
// Benchmark Decoding from Plain Encoding
class BM_ArrowBinaryPlain : public BenchmarkDecodeArrowByteArray {
public:
void DoEncodeArrow() override {
auto encoder = MakeTypedEncoder<ByteArrayType>(Encoding::PLAIN);
encoder->Put(*input_array_);
buffer_ = encoder->FlushValues();
}
void DoEncodeLowLevel() override {
auto encoder = MakeTypedEncoder<ByteArrayType>(Encoding::PLAIN);
encoder->Put(values_.data(), num_values_);
buffer_ = encoder->FlushValues();
}
std::unique_ptr<ByteArrayDecoder> InitializeDecoder() override {
auto decoder = MakeTypedDecoder<ByteArrayType>(Encoding::PLAIN);
decoder->SetData(num_values_, buffer_->data(), static_cast<int>(buffer_->size()));
return decoder;
}
};
BENCHMARK_DEFINE_F(BM_ArrowBinaryPlain, EncodeArrow)
(benchmark::State& state) { EncodeArrowBenchmark(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryPlain, EncodeArrow)->Range(1 << 18, 1 << 20);
BENCHMARK_DEFINE_F(BM_ArrowBinaryPlain, EncodeLowLevel)
(benchmark::State& state) { EncodeLowLevelBenchmark(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryPlain, EncodeLowLevel)->Range(1 << 18, 1 << 20);
BENCHMARK_DEFINE_F(BM_ArrowBinaryPlain, DecodeArrow_Dense)
(benchmark::State& state) { DecodeArrowDenseBenchmark(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryPlain, DecodeArrow_Dense)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK_DEFINE_F(BM_ArrowBinaryPlain, DecodeArrowNonNull_Dense)
(benchmark::State& state) { DecodeArrowNonNullDenseBenchmark(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryPlain, DecodeArrowNonNull_Dense)
->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK_DEFINE_F(BM_ArrowBinaryPlain, DecodeArrow_Dict)
(benchmark::State& state) { DecodeArrowDictBenchmark(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryPlain, DecodeArrow_Dict)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK_DEFINE_F(BM_ArrowBinaryPlain, DecodeArrowNonNull_Dict)
(benchmark::State& state) { DecodeArrowNonNullDictBenchmark(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryPlain, DecodeArrowNonNull_Dict)
->Range(MIN_RANGE, MAX_RANGE);
// ----------------------------------------------------------------------
// Benchmark Decoding from Dictionary Encoding
class BM_ArrowBinaryDict : public BenchmarkDecodeArrowByteArray {
public:
template <typename PutValuesFunc>
void DoEncode(PutValuesFunc&& put_values) {
auto node = schema::ByteArray("name");
descr_ = std::make_unique<ColumnDescriptor>(node, 0, 0);
auto encoder = MakeTypedEncoder<ByteArrayType>(Encoding::PLAIN,
/*use_dictionary=*/true, descr_.get());
put_values(encoder.get());
buffer_ = encoder->FlushValues();
auto dict_encoder = dynamic_cast<DictEncoder<ByteArrayType>*>(encoder.get());
ASSERT_NE(dict_encoder, nullptr);
dict_buffer_ =
AllocateBuffer(default_memory_pool(), dict_encoder->dict_encoded_size());
dict_encoder->WriteDict(dict_buffer_->mutable_data());
num_dict_entries_ = dict_encoder->num_entries();
}
template <typename IndexType>
void EncodeDictBenchmark(benchmark::State& state) {
constexpr int64_t nunique = 100;
constexpr int64_t min_length = 32;
constexpr int64_t max_length = 32;
::arrow::random::RandomArrayGenerator rag(0);
auto dict = rag.String(nunique, min_length, max_length,
/*null_probability=*/0);
auto indices = rag.Numeric<IndexType, int32_t>(num_values_, 0, nunique - 1);
auto PutValues = [&](ByteArrayEncoder* encoder) {
auto dict_encoder = dynamic_cast<DictEncoder<ByteArrayType>*>(encoder);
dict_encoder->PutDictionary(*dict);
dict_encoder->PutIndices(*indices);
};
for (auto _ : state) {
DoEncode(std::move(PutValues));
}
state.SetItemsProcessed(state.iterations() * num_values_);
}
void DoEncodeArrow() override {
auto PutValues = [&](ByteArrayEncoder* encoder) {
ASSERT_NO_THROW(encoder->Put(*input_array_));
};
DoEncode(std::move(PutValues));
}
void DoEncodeLowLevel() override {
auto PutValues = [&](ByteArrayEncoder* encoder) {
encoder->Put(values_.data(), num_values_);
};
DoEncode(std::move(PutValues));
}
std::unique_ptr<ByteArrayDecoder> InitializeDecoder() override {
auto decoder = MakeTypedDecoder<ByteArrayType>(Encoding::PLAIN, descr_.get());
decoder->SetData(num_dict_entries_, dict_buffer_->data(),
static_cast<int>(dict_buffer_->size()));
auto dict_decoder = MakeDictDecoder<ByteArrayType>(descr_.get());
dict_decoder->SetDict(decoder.get());
dict_decoder->SetData(num_values_, buffer_->data(),
static_cast<int>(buffer_->size()));
return std::unique_ptr<ByteArrayDecoder>(
dynamic_cast<ByteArrayDecoder*>(dict_decoder.release()));
}
void TearDown(const ::benchmark::State& state) override {
BenchmarkDecodeArrowByteArray::TearDown(state);
dict_buffer_.reset();
descr_.reset();
}
protected:
std::unique_ptr<ColumnDescriptor> descr_;
std::shared_ptr<Buffer> dict_buffer_;
int num_dict_entries_{0};
};
BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, EncodeArrow)
(benchmark::State& state) { EncodeArrowBenchmark(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, EncodeArrow)->Range(1 << 18, 1 << 20);
BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, EncodeDictDirectInt8)
(benchmark::State& state) { EncodeDictBenchmark<::arrow::Int8Type>(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, EncodeDictDirectInt8)->Range(1 << 20, 1 << 20);
BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, EncodeDictDirectInt16)
(benchmark::State& state) { EncodeDictBenchmark<::arrow::Int16Type>(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, EncodeDictDirectInt16)->Range(1 << 20, 1 << 20);
BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, EncodeDictDirectInt32)
(benchmark::State& state) { EncodeDictBenchmark<::arrow::Int32Type>(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, EncodeDictDirectInt32)->Range(1 << 20, 1 << 20);
BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, EncodeDictDirectInt64)
(benchmark::State& state) { EncodeDictBenchmark<::arrow::Int64Type>(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, EncodeDictDirectInt64)->Range(1 << 20, 1 << 20);
BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, EncodeLowLevel)
(benchmark::State& state) { EncodeLowLevelBenchmark(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, EncodeLowLevel)->Range(1 << 18, 1 << 20);
BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, DecodeArrow_Dense)(benchmark::State& state) {
DecodeArrowDenseBenchmark(state);
}
BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, DecodeArrow_Dense)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, DecodeArrowNonNull_Dense)
(benchmark::State& state) { DecodeArrowNonNullDenseBenchmark(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, DecodeArrowNonNull_Dense)
->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, DecodeArrow_Dict)
(benchmark::State& state) { DecodeArrowDictBenchmark(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, DecodeArrow_Dict)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, DecodeArrowNonNull_Dict)
(benchmark::State& state) { DecodeArrowNonNullDictBenchmark(state); }
BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, DecodeArrowNonNull_Dict)
->Range(MIN_RANGE, MAX_RANGE);
class BenchmarkDecodeArrowBoolean : public BenchmarkDecodeArrowBase<BooleanType> {
public:
void InitDataInputs() final {
// Generate a random boolean array with `null_probability_`.
::arrow::random::RandomArrayGenerator rag(0);
input_array_ = rag.Boolean(num_values_, /*true_probability=*/0.5, null_probability_);
valid_bits_ = input_array_->null_bitmap_data();
// Arrow uses a bitmap representation for boolean arrays,
// so, we uses this as "total_size" for the benchmark.
total_size_ = ::arrow::bit_util::BytesForBits(num_values_);
values_.resize(num_values_);
const auto& boolean_array = static_cast<const ::arrow::BooleanArray&>(*input_array_);
for (int64_t i = 0; i < boolean_array.length(); i++) {
values_[i] = boolean_array.Value(i);
}
}
typename EncodingTraits<BooleanType>::Accumulator CreateAccumulator() final {
return typename EncodingTraits<BooleanType>::Accumulator();
}
void DoEncodeLowLevel() final { ParquetException::NYI(); }
void DecodeArrowWithNullDenseBenchmark(benchmark::State& state);
protected:
void DoEncodeArrowImpl(Encoding::type encoding) {
auto encoder = MakeTypedEncoder<BooleanType>(encoding);
encoder->Put(*input_array_);
buffer_ = encoder->FlushValues();
}
std::unique_ptr<TypedDecoder<BooleanType>> InitializeDecoderImpl(
Encoding::type encoding) const {
auto decoder = MakeTypedDecoder<BooleanType>(encoding);
decoder->SetData(num_values_, buffer_->data(), static_cast<int>(buffer_->size()));
return decoder;
}
protected:
double null_probability_ = 0.0;
};
void BenchmarkDecodeArrowBoolean::DecodeArrowWithNullDenseBenchmark(
benchmark::State& state) {
// Change null_probability
null_probability_ = static_cast<double>(state.range(1)) / 10000;
InitDataInputs();
this->DoEncodeArrow();
int num_values_with_nulls = this->num_values_;
for (auto _ : state) {
auto decoder = this->InitializeDecoder();
auto acc = this->CreateAccumulator();
decoder->DecodeArrow(
num_values_with_nulls,
/*null_count=*/static_cast<int>(this->input_array_->null_count()),
this->valid_bits_, 0, &acc);
}
state.SetBytesProcessed(state.iterations() * static_cast<int64_t>(total_size_));
state.SetItemsProcessed(state.iterations() * state.range(0));
}
class BM_DecodeArrowBooleanPlain : public BenchmarkDecodeArrowBoolean {
public:
void DoEncodeArrow() final { DoEncodeArrowImpl(Encoding::PLAIN); }
std::unique_ptr<TypedDecoder<BooleanType>> InitializeDecoder() override {
return InitializeDecoderImpl(Encoding::PLAIN);
}
};
class BM_DecodeArrowBooleanRle : public BenchmarkDecodeArrowBoolean {
public:
void DoEncodeArrow() final { DoEncodeArrowImpl(Encoding::RLE); }
std::unique_ptr<TypedDecoder<BooleanType>> InitializeDecoder() override {
return InitializeDecoderImpl(Encoding::RLE);
}
};
static void BooleanWithNullCustomArguments(benchmark::internal::Benchmark* b) {
b->ArgsProduct({
benchmark::CreateRange(MIN_RANGE, MAX_RANGE, /*multi=*/4),
{1, 100, 1000, 5000, 10000},
})
->ArgNames({"num_values", "null_in_ten_thousand"});
}
BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanRle, DecodeArrow)(benchmark::State& state) {
DecodeArrowDenseBenchmark(state);
}
BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanRle, DecodeArrow)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanRle, DecodeArrowNonNull)
(benchmark::State& state) { DecodeArrowNonNullDenseBenchmark(state); }
BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanRle, DecodeArrowNonNull)
->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanRle, DecodeArrowWithNull)
(benchmark::State& state) { DecodeArrowWithNullDenseBenchmark(state); }
BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanRle, DecodeArrowWithNull)
->Apply(BooleanWithNullCustomArguments);
BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanPlain, DecodeArrow)
(benchmark::State& state) { DecodeArrowDenseBenchmark(state); }
BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanPlain, DecodeArrow)
->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanPlain, DecodeArrowNonNull)
(benchmark::State& state) { DecodeArrowNonNullDenseBenchmark(state); }
BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanPlain, DecodeArrowNonNull)
->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanPlain, DecodeArrowWithNull)
(benchmark::State& state) { DecodeArrowWithNullDenseBenchmark(state); }
BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanPlain, DecodeArrowWithNull)
->Apply(BooleanWithNullCustomArguments);
} // namespace parquet