in cpp/src/arrow/compute/kernels/aggregate_basic.cc [1023:1191]
void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
static auto default_count_options = CountOptions::Defaults();
auto func = std::make_shared<ScalarAggregateFunction>("count_all", Arity::Nullary(),
count_all_doc, NULLPTR);
// Takes no input (counts all rows), outputs int64 scalar
AddAggKernel(KernelSignature::Make({}, int64()), CountAllInit, func.get());
DCHECK_OK(registry->AddFunction(std::move(func)));
func = std::make_shared<ScalarAggregateFunction>("count", Arity::Unary(), count_doc,
&default_count_options);
// Takes any input, outputs int64 scalar
InputType any_input;
AddAggKernel(KernelSignature::Make({any_input}, int64()), CountInit, func.get());
DCHECK_OK(registry->AddFunction(std::move(func)));
func = std::make_shared<ScalarAggregateFunction>(
"count_distinct", Arity::Unary(), count_distinct_doc, &default_count_options);
// Takes any input, outputs int64 scalar
AddCountDistinctKernels(func.get());
DCHECK_OK(registry->AddFunction(std::move(func)));
func = std::make_shared<ScalarAggregateFunction>("sum", Arity::Unary(), sum_doc,
&default_scalar_aggregate_options);
AddArrayScalarAggKernels(SumInit, {boolean()}, uint64(), func.get());
AddAggKernel(KernelSignature::Make({Type::DECIMAL128}, FirstType), SumInit, func.get(),
SimdLevel::NONE);
AddAggKernel(KernelSignature::Make({Type::DECIMAL256}, FirstType), SumInit, func.get(),
SimdLevel::NONE);
AddArrayScalarAggKernels(SumInit, SignedIntTypes(), int64(), func.get());
AddArrayScalarAggKernels(SumInit, UnsignedIntTypes(), uint64(), func.get());
AddArrayScalarAggKernels(SumInit, FloatingPointTypes(), float64(), func.get());
AddArrayScalarAggKernels(SumInit, {null()}, int64(), func.get());
// Add the SIMD variants for sum
#if defined(ARROW_HAVE_RUNTIME_AVX2) || defined(ARROW_HAVE_RUNTIME_AVX512)
auto cpu_info = arrow::internal::CpuInfo::GetInstance();
#endif
#if defined(ARROW_HAVE_RUNTIME_AVX2)
if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) {
AddSumAvx2AggKernels(func.get());
}
#endif
#if defined(ARROW_HAVE_RUNTIME_AVX512)
if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX512)) {
AddSumAvx512AggKernels(func.get());
}
#endif
DCHECK_OK(registry->AddFunction(std::move(func)));
func = std::make_shared<ScalarAggregateFunction>("mean", Arity::Unary(), mean_doc,
&default_scalar_aggregate_options);
AddArrayScalarAggKernels(MeanInit, {boolean()}, float64(), func.get());
AddArrayScalarAggKernels(MeanInit, NumericTypes(), float64(), func.get());
AddAggKernel(KernelSignature::Make({Type::DECIMAL128}, FirstType), MeanInit, func.get(),
SimdLevel::NONE);
AddAggKernel(KernelSignature::Make({Type::DECIMAL256}, FirstType), MeanInit, func.get(),
SimdLevel::NONE);
AddArrayScalarAggKernels(MeanInit, {null()}, float64(), func.get());
// Add the SIMD variants for mean
#if defined(ARROW_HAVE_RUNTIME_AVX2)
if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) {
AddMeanAvx2AggKernels(func.get());
}
#endif
#if defined(ARROW_HAVE_RUNTIME_AVX512)
if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX512)) {
AddMeanAvx512AggKernels(func.get());
}
#endif
DCHECK_OK(registry->AddFunction(std::move(func)));
// Add first last function
func = std::make_shared<ScalarAggregateFunction>(
"first_last", Arity::Unary(), first_last_doc, &default_scalar_aggregate_options);
auto first_last_func = func.get();
AddFirstLastKernels(FirstLastInit, {boolean(), fixed_size_binary(1)}, func.get());
AddFirstLastKernels(FirstLastInit, NumericTypes(), func.get());
AddFirstLastKernels(FirstLastInit, BaseBinaryTypes(), func.get());
AddFirstLastKernels(FirstLastInit, TemporalTypes(), func.get());
DCHECK_OK(registry->AddFunction(std::move(func)));
// Add first/last as convenience functions
func = std::make_shared<ScalarAggregateFunction>("first", Arity::Unary(), first_doc,
&default_scalar_aggregate_options);
AddFirstOrLastAggKernel<FirstOrLast::First>(func.get(), first_last_func);
DCHECK_OK(registry->AddFunction(std::move(func)));
func = std::make_shared<ScalarAggregateFunction>("last", Arity::Unary(), last_doc,
&default_scalar_aggregate_options);
AddFirstOrLastAggKernel<FirstOrLast::Last>(func.get(), first_last_func);
DCHECK_OK(registry->AddFunction(std::move(func)));
// Add min max function
func = std::make_shared<ScalarAggregateFunction>("min_max", Arity::Unary(), min_max_doc,
&default_scalar_aggregate_options);
AddMinMaxKernels(MinMaxInitDefault, {null(), boolean()}, func.get());
AddMinMaxKernels(MinMaxInitDefault, NumericTypes(), func.get());
AddMinMaxKernels(MinMaxInitDefault, TemporalTypes(), func.get());
AddMinMaxKernels(MinMaxInitDefault, BaseBinaryTypes(), func.get());
AddMinMaxKernel(MinMaxInitDefault, Type::FIXED_SIZE_BINARY, func.get());
AddMinMaxKernel(MinMaxInitDefault, Type::INTERVAL_MONTHS, func.get());
AddMinMaxKernel(MinMaxInitDefault, Type::DECIMAL128, func.get());
AddMinMaxKernel(MinMaxInitDefault, Type::DECIMAL256, func.get());
// Add the SIMD variants for min max
#if defined(ARROW_HAVE_RUNTIME_AVX2)
if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) {
AddMinMaxAvx2AggKernels(func.get());
}
#endif
#if defined(ARROW_HAVE_RUNTIME_AVX512)
if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX512)) {
AddMinMaxAvx512AggKernels(func.get());
}
#endif
auto min_max_func = func.get();
DCHECK_OK(registry->AddFunction(std::move(func)));
// Add min/max as convenience functions
func = std::make_shared<ScalarAggregateFunction>("min", Arity::Unary(), min_or_max_doc,
&default_scalar_aggregate_options);
AddMinOrMaxAggKernel<MinOrMax::Min>(func.get(), min_max_func);
DCHECK_OK(registry->AddFunction(std::move(func)));
func = std::make_shared<ScalarAggregateFunction>("max", Arity::Unary(), min_or_max_doc,
&default_scalar_aggregate_options);
AddMinOrMaxAggKernel<MinOrMax::Max>(func.get(), min_max_func);
DCHECK_OK(registry->AddFunction(std::move(func)));
func = std::make_shared<ScalarAggregateFunction>("product", Arity::Unary(), product_doc,
&default_scalar_aggregate_options);
AddArrayScalarAggKernels(ProductInit::Init, {boolean()}, uint64(), func.get());
AddArrayScalarAggKernels(ProductInit::Init, SignedIntTypes(), int64(), func.get());
AddArrayScalarAggKernels(ProductInit::Init, UnsignedIntTypes(), uint64(), func.get());
AddArrayScalarAggKernels(ProductInit::Init, FloatingPointTypes(), float64(),
func.get());
AddAggKernel(KernelSignature::Make({Type::DECIMAL128}, FirstType), ProductInit::Init,
func.get(), SimdLevel::NONE);
AddAggKernel(KernelSignature::Make({Type::DECIMAL256}, FirstType), ProductInit::Init,
func.get(), SimdLevel::NONE);
AddArrayScalarAggKernels(ProductInit::Init, {null()}, int64(), func.get());
DCHECK_OK(registry->AddFunction(std::move(func)));
// any
func = std::make_shared<ScalarAggregateFunction>("any", Arity::Unary(), any_doc,
&default_scalar_aggregate_options);
AddArrayScalarAggKernels(AnyInit, {boolean()}, boolean(), func.get());
DCHECK_OK(registry->AddFunction(std::move(func)));
// all
func = std::make_shared<ScalarAggregateFunction>("all", Arity::Unary(), all_doc,
&default_scalar_aggregate_options);
AddArrayScalarAggKernels(AllInit, {boolean()}, boolean(), func.get());
DCHECK_OK(registry->AddFunction(std::move(func)));
// index
func = std::make_shared<ScalarAggregateFunction>("index", Arity::Unary(), index_doc);
AddBasicAggKernels(IndexInit::Init, BaseBinaryTypes(), int64(), func.get());
AddBasicAggKernels(IndexInit::Init, PrimitiveTypes(), int64(), func.get());
AddBasicAggKernels(IndexInit::Init, TemporalTypes(), int64(), func.get());
AddBasicAggKernels(IndexInit::Init,
{fixed_size_binary(1), decimal128(1, 0), decimal256(1, 0), null()},
int64(), func.get());
DCHECK_OK(registry->AddFunction(std::move(func)));
}