arrow/benches/cast_kernels.rs (270 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #[macro_use] extern crate criterion; use criterion::Criterion; use rand::distr::{Distribution, StandardUniform, Uniform}; use rand::Rng; use chrono::DateTime; use std::sync::Arc; extern crate arrow; use arrow::array::*; use arrow::compute::cast; use arrow::datatypes::*; use arrow::util::bench_util::*; use arrow::util::test_util::seedable_rng; fn build_array<T: ArrowPrimitiveType>(size: usize) -> ArrayRef where StandardUniform: Distribution<T::Native>, { let array = create_primitive_array::<T>(size, 0.1); Arc::new(array) } fn build_utf8_date_array(size: usize, with_nulls: bool) -> ArrayRef { use chrono::NaiveDate; // use random numbers to avoid spurious compiler optimizations wrt to branching let mut rng = seedable_rng(); let mut builder = StringBuilder::new(); let range = Uniform::new(0, 737776).unwrap(); for _ in 0..size { if with_nulls && rng.random::<f32>() > 0.8 { builder.append_null(); } else { let string = NaiveDate::from_num_days_from_ce_opt(rng.sample(range)) .unwrap() .format("%Y-%m-%d") .to_string(); builder.append_value(&string); } } Arc::new(builder.finish()) } fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef { // use random numbers to avoid spurious compiler optimizations wrt to branching let mut rng = seedable_rng(); let mut builder = StringBuilder::new(); let range = Uniform::new(0, 1608071414123).unwrap(); for _ in 0..size { if with_nulls && rng.random::<f32>() > 0.8 { builder.append_null(); } else { let string = DateTime::from_timestamp(rng.sample(range), 0) .unwrap() .format("%Y-%m-%dT%H:%M:%S") .to_string(); builder.append_value(&string); } } Arc::new(builder.finish()) } fn build_decimal128_array(size: usize, precision: u8, scale: i8) -> ArrayRef { let mut rng = seedable_rng(); let mut builder = Decimal128Builder::with_capacity(size); for _ in 0..size { builder.append_value(rng.random_range::<i128, _>(0..1000000000)); } Arc::new( builder .finish() .with_precision_and_scale(precision, scale) .unwrap(), ) } fn build_decimal256_array(size: usize, precision: u8, scale: i8) -> ArrayRef { let mut rng = seedable_rng(); let mut builder = Decimal256Builder::with_capacity(size); let mut bytes = [0; 32]; for _ in 0..size { let num = rng.random_range::<i128, _>(0..1000000000); bytes[0..16].clone_from_slice(&num.to_le_bytes()); builder.append_value(i256::from_le_bytes(bytes)); } Arc::new( builder .finish() .with_precision_and_scale(precision, scale) .unwrap(), ) } fn build_string_array(size: usize) -> ArrayRef { let mut builder = StringBuilder::new(); for v in 0..size { match v % 3 { 0 => builder.append_value("small"), 1 => builder.append_value("larger string more than 12 bytes"), _ => builder.append_null(), } } Arc::new(builder.finish()) } fn build_dict_array(size: usize) -> ArrayRef { let values = StringArray::from_iter([ Some("small"), Some("larger string more than 12 bytes"), None, ]); let keys = UInt64Array::from_iter((0..size as u64).map(|v| v % 3)); Arc::new(DictionaryArray::new(keys, Arc::new(values))) } // cast array from specified primitive array type to desired data type fn cast_array(array: &ArrayRef, to_type: DataType) { criterion::black_box(cast(array, &to_type).unwrap()); } fn add_benchmark(c: &mut Criterion) { let i32_array = build_array::<Int32Type>(512); let i64_array = build_array::<Int64Type>(512); let f32_array = build_array::<Float32Type>(512); let f32_utf8_array = cast(&build_array::<Float32Type>(512), &DataType::Utf8).unwrap(); let f64_array = build_array::<Float64Type>(512); let date64_array = build_array::<Date64Type>(512); let date32_array = build_array::<Date32Type>(512); let time32s_array = build_array::<Time32SecondType>(512); let time64ns_array = build_array::<Time64NanosecondType>(512); let time_ns_array = build_array::<TimestampNanosecondType>(512); let time_ms_array = build_array::<TimestampMillisecondType>(512); let utf8_date_array = build_utf8_date_array(512, true); let utf8_date_time_array = build_utf8_date_time_array(512, true); let decimal128_array = build_decimal128_array(512, 10, 3); let decimal256_array = build_decimal256_array(512, 50, 3); let string_array = build_string_array(512); let wide_string_array = cast(&string_array, &DataType::LargeUtf8).unwrap(); let dict_array = build_dict_array(10_000); let string_view_array = cast(&dict_array, &DataType::Utf8View).unwrap(); let binary_view_array = cast(&string_view_array, &DataType::BinaryView).unwrap(); c.bench_function("cast int32 to int32 512", |b| { b.iter(|| cast_array(&i32_array, DataType::Int32)) }); c.bench_function("cast int32 to uint32 512", |b| { b.iter(|| cast_array(&i32_array, DataType::UInt32)) }); c.bench_function("cast int32 to float32 512", |b| { b.iter(|| cast_array(&i32_array, DataType::Float32)) }); c.bench_function("cast int32 to float64 512", |b| { b.iter(|| cast_array(&i32_array, DataType::Float64)) }); c.bench_function("cast int32 to int64 512", |b| { b.iter(|| cast_array(&i32_array, DataType::Int64)) }); c.bench_function("cast float32 to int32 512", |b| { b.iter(|| cast_array(&f32_array, DataType::Int32)) }); c.bench_function("cast float64 to float32 512", |b| { b.iter(|| cast_array(&f64_array, DataType::Float32)) }); c.bench_function("cast float64 to uint64 512", |b| { b.iter(|| cast_array(&f64_array, DataType::UInt64)) }); c.bench_function("cast int64 to int32 512", |b| { b.iter(|| cast_array(&i64_array, DataType::Int32)) }); c.bench_function("cast date64 to date32 512", |b| { b.iter(|| cast_array(&date64_array, DataType::Date32)) }); c.bench_function("cast date32 to date64 512", |b| { b.iter(|| cast_array(&date32_array, DataType::Date64)) }); c.bench_function("cast time32s to time32ms 512", |b| { b.iter(|| cast_array(&time32s_array, DataType::Time32(TimeUnit::Millisecond))) }); c.bench_function("cast time32s to time64us 512", |b| { b.iter(|| cast_array(&time32s_array, DataType::Time64(TimeUnit::Microsecond))) }); c.bench_function("cast time64ns to time32s 512", |b| { b.iter(|| cast_array(&time64ns_array, DataType::Time32(TimeUnit::Second))) }); c.bench_function("cast timestamp_ns to timestamp_s 512", |b| { b.iter(|| { cast_array( &time_ns_array, DataType::Timestamp(TimeUnit::Nanosecond, None), ) }) }); c.bench_function("cast timestamp_ms to timestamp_ns 512", |b| { b.iter(|| { cast_array( &time_ms_array, DataType::Timestamp(TimeUnit::Nanosecond, None), ) }) }); c.bench_function("cast utf8 to f32", |b| { b.iter(|| cast_array(&f32_utf8_array, DataType::Float32)) }); c.bench_function("cast i64 to string 512", |b| { b.iter(|| cast_array(&i64_array, DataType::Utf8)) }); c.bench_function("cast f32 to string 512", |b| { b.iter(|| cast_array(&f32_array, DataType::Utf8)) }); c.bench_function("cast f64 to string 512", |b| { b.iter(|| cast_array(&f64_array, DataType::Utf8)) }); c.bench_function("cast timestamp_ms to i64 512", |b| { b.iter(|| cast_array(&time_ms_array, DataType::Int64)) }); c.bench_function("cast utf8 to date32 512", |b| { b.iter(|| cast_array(&utf8_date_array, DataType::Date32)) }); c.bench_function("cast utf8 to date64 512", |b| { b.iter(|| cast_array(&utf8_date_time_array, DataType::Date64)) }); c.bench_function("cast decimal128 to decimal128 512", |b| { b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(30, 5))) }); c.bench_function("cast decimal128 to decimal128 512 lower precision", |b| { b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(6, 5))) }); c.bench_function("cast decimal128 to decimal256 512", |b| { b.iter(|| cast_array(&decimal128_array, DataType::Decimal256(50, 5))) }); c.bench_function("cast decimal256 to decimal128 512", |b| { b.iter(|| cast_array(&decimal256_array, DataType::Decimal128(38, 2))) }); c.bench_function("cast decimal256 to decimal256 512", |b| { b.iter(|| cast_array(&decimal256_array, DataType::Decimal256(50, 5))) }); c.bench_function("cast decimal128 to decimal128 512 with same scale", |b| { b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(30, 3))) }); c.bench_function( "cast decimal128 to decimal128 512 with lower scale (infallible)", |b| b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(7, -1))), ); c.bench_function("cast decimal256 to decimal256 512 with same scale", |b| { b.iter(|| cast_array(&decimal256_array, DataType::Decimal256(60, 3))) }); c.bench_function("cast dict to string view", |b| { b.iter(|| cast_array(&dict_array, DataType::Utf8View)) }); c.bench_function("cast string view to dict", |b| { b.iter(|| { cast_array( &string_view_array, DataType::Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)), ) }) }); c.bench_function("cast string view to string", |b| { b.iter(|| cast_array(&string_view_array, DataType::Utf8)) }); c.bench_function("cast string view to wide string", |b| { b.iter(|| cast_array(&string_view_array, DataType::LargeUtf8)) }); c.bench_function("cast binary view to string", |b| { b.iter(|| cast_array(&binary_view_array, DataType::Utf8)) }); c.bench_function("cast binary view to wide string", |b| { b.iter(|| cast_array(&binary_view_array, DataType::LargeUtf8)) }); c.bench_function("cast string to binary view 512", |b| { b.iter(|| cast_array(&string_array, DataType::BinaryView)) }); c.bench_function("cast wide string to binary view 512", |b| { b.iter(|| cast_array(&wide_string_array, DataType::BinaryView)) }); c.bench_function("cast string view to binary view", |b| { b.iter(|| cast_array(&string_view_array, DataType::BinaryView)) }); c.bench_function("cast binary view to string view", |b| { b.iter(|| cast_array(&binary_view_array, DataType::Utf8View)) }); } criterion_group!(benches, add_benchmark); criterion_main!(benches);