in native/spark-expr/src/conversion_funcs/cast.rs [866:988]
fn cast_array(
array: ArrayRef,
to_type: &DataType,
cast_options: &SparkCastOptions,
) -> DataFusionResult<ArrayRef> {
use DataType::*;
let array = array_with_timezone(array, cast_options.timezone.clone(), Some(to_type))?;
let from_type = array.data_type().clone();
let native_cast_options: CastOptions = CastOptions {
safe: !matches!(cast_options.eval_mode, EvalMode::Ansi), // take safe mode from cast_options passed
format_options: FormatOptions::new()
.with_timestamp_tz_format(TIMESTAMP_FORMAT)
.with_timestamp_format(TIMESTAMP_FORMAT),
};
let array = match &from_type {
Dictionary(key_type, value_type)
if key_type.as_ref() == &Int32
&& (value_type.as_ref() == &Utf8
|| value_type.as_ref() == &LargeUtf8
|| value_type.as_ref() == &Binary
|| value_type.as_ref() == &LargeBinary) =>
{
let dict_array = array
.as_any()
.downcast_ref::<DictionaryArray<Int32Type>>()
.expect("Expected a dictionary array");
let casted_dictionary = DictionaryArray::<Int32Type>::new(
dict_array.keys().clone(),
cast_array(Arc::clone(dict_array.values()), to_type, cast_options)?,
);
let casted_result = match to_type {
Dictionary(_, _) => Arc::new(casted_dictionary.clone()),
_ => take(casted_dictionary.values().as_ref(), dict_array.keys(), None)?,
};
return Ok(spark_cast_postprocess(casted_result, &from_type, to_type));
}
_ => array,
};
let from_type = array.data_type();
let eval_mode = cast_options.eval_mode;
let cast_result = match (from_type, to_type) {
(Utf8, Boolean) => spark_cast_utf8_to_boolean::<i32>(&array, eval_mode),
(LargeUtf8, Boolean) => spark_cast_utf8_to_boolean::<i64>(&array, eval_mode),
(Utf8, Timestamp(_, _)) => {
cast_string_to_timestamp(&array, to_type, eval_mode, &cast_options.timezone)
}
(Utf8, Date32) => cast_string_to_date(&array, to_type, eval_mode),
(Int64, Int32)
| (Int64, Int16)
| (Int64, Int8)
| (Int32, Int16)
| (Int32, Int8)
| (Int16, Int8)
if eval_mode != EvalMode::Try =>
{
spark_cast_int_to_int(&array, eval_mode, from_type, to_type)
}
(Utf8, Int8 | Int16 | Int32 | Int64) => {
cast_string_to_int::<i32>(to_type, &array, eval_mode)
}
(LargeUtf8, Int8 | Int16 | Int32 | Int64) => {
cast_string_to_int::<i64>(to_type, &array, eval_mode)
}
(Float64, Utf8) => spark_cast_float64_to_utf8::<i32>(&array, eval_mode),
(Float64, LargeUtf8) => spark_cast_float64_to_utf8::<i64>(&array, eval_mode),
(Float32, Utf8) => spark_cast_float32_to_utf8::<i32>(&array, eval_mode),
(Float32, LargeUtf8) => spark_cast_float32_to_utf8::<i64>(&array, eval_mode),
(Float32, Decimal128(precision, scale)) => {
cast_float32_to_decimal128(&array, *precision, *scale, eval_mode)
}
(Float64, Decimal128(precision, scale)) => {
cast_float64_to_decimal128(&array, *precision, *scale, eval_mode)
}
(Float32, Int8)
| (Float32, Int16)
| (Float32, Int32)
| (Float32, Int64)
| (Float64, Int8)
| (Float64, Int16)
| (Float64, Int32)
| (Float64, Int64)
| (Decimal128(_, _), Int8)
| (Decimal128(_, _), Int16)
| (Decimal128(_, _), Int32)
| (Decimal128(_, _), Int64)
if eval_mode != EvalMode::Try =>
{
spark_cast_nonintegral_numeric_to_integral(&array, eval_mode, from_type, to_type)
}
(Struct(_), Utf8) => Ok(casts_struct_to_string(array.as_struct(), cast_options)?),
(Struct(_), Struct(_)) => Ok(cast_struct_to_struct(
array.as_struct(),
from_type,
to_type,
cast_options,
)?),
(UInt8 | UInt16 | UInt32 | UInt64, Int8 | Int16 | Int32 | Int64)
if cast_options.allow_cast_unsigned_ints =>
{
Ok(cast_with_options(&array, to_type, &CAST_OPTIONS)?)
}
_ if cast_options.is_adapting_schema
|| is_datafusion_spark_compatible(from_type, to_type, cast_options.allow_incompat) =>
{
// use DataFusion cast only when we know that it is compatible with Spark
Ok(cast_with_options(&array, to_type, &native_cast_options)?)
}
_ => {
// we should never reach this code because the Scala code should be checking
// for supported cast operations and falling back to Spark for anything that
// is not yet supported
Err(SparkError::Internal(format!(
"Native cast invoked for unsupported cast from {from_type:?} to {to_type:?}"
)))
}
};
Ok(spark_cast_postprocess(cast_result?, from_type, to_type))
}