in cpp-ch/local-engine/Functions/SparkFunctionHashingExtended.h [90:175]
static ToType applyGeneric(const DB::Field & field, UInt64 seed, const DB::DataTypePtr & type)
{
/// Do nothing when field is null
if (field.isNull())
return seed;
DB::DataTypePtr non_nullable_type = removeNullable(type);
DB::WhichDataType which(non_nullable_type);
if (which.isNothing())
return seed;
else if (which.isUInt8())
return applyNumber<UInt8>(field.safeGet<UInt8>(), seed);
else if (which.isUInt16())
return applyNumber<UInt16>(field.safeGet<UInt16>(), seed);
else if (which.isUInt32())
return applyNumber<UInt32>(field.safeGet<UInt32>(), seed);
else if (which.isUInt64())
return applyNumber<UInt64>(field.safeGet<UInt64>(), seed);
else if (which.isInt8())
return applyNumber<Int8>(field.safeGet<Int8>(), seed);
else if (which.isInt16())
return applyNumber<Int16>(field.safeGet<Int16>(), seed);
else if (which.isInt32())
return applyNumber<Int32>(field.safeGet<Int32>(), seed);
else if (which.isInt64())
return applyNumber<Int64>(field.safeGet<Int64>(), seed);
else if (which.isFloat32())
return applyNumber<Float32>(field.safeGet<Float32>(), seed);
else if (which.isFloat64())
return applyNumber<Float64>(field.safeGet<Float64>(), seed);
else if (which.isDate())
return applyNumber<UInt16>(field.safeGet<UInt16>(), seed);
else if (which.isDate32())
return applyNumber<Int32>(field.safeGet<Int32>(), seed);
else if (which.isDateTime())
return applyNumber<UInt32>(field.safeGet<UInt32>(), seed);
else if (which.isDateTime64())
return applyDecimal<DB::DateTime64>(field.safeGet<DB::DateTime64>(), seed);
else if (which.isDecimal32())
return applyDecimal<DB::Decimal32>(field.safeGet<DB::Decimal32>(), seed);
else if (which.isDecimal64())
return applyDecimal<DB::Decimal64>(field.safeGet<DB::Decimal64>(), seed);
else if (which.isDecimal128())
return applyDecimal<DB::Decimal128>(field.safeGet<DB::Decimal128>(), seed);
else if (which.isStringOrFixedString())
{
const String & str = field.safeGet<String>();
return applyUnsafeBytes(str.data(), str.size(), seed);
}
else if (which.isTuple())
{
const auto * tuple_type = checkAndGetDataType<DB::DataTypeTuple>(non_nullable_type.get());
assert(tuple_type);
const auto & elements = tuple_type->getElements();
const DB::Tuple & tuple = field.safeGet<DB::Tuple>();
assert(tuple.size() == elements.size());
for (size_t i = 0; i < elements.size(); ++i)
{
seed = applyGeneric(tuple[i], seed, elements[i]);
}
return seed;
}
else if (which.isArray())
{
const auto * array_type = checkAndGetDataType<DB::DataTypeArray>(non_nullable_type.get());
assert(array_type);
const auto & nested_type = array_type->getNestedType();
const DB::Array & array = field.safeGet<DB::Array>();
for (size_t i=0; i < array.size(); ++i)
{
seed = applyGeneric(array[i], seed, nested_type);
}
return seed;
}
else
{
/// Note: No need to implement for big int type in gluten
/// Note: No need to implement for uuid/ipv4/ipv6/enum* type in gluten
/// Note: No need to implement for decimal256 type in gluten
/// Note: No need to implement for map type as long as spark.sql.legacy.allowHashOnMapType is false(default)
throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Unsupported type {}", type->getName());
}
}