in cpp-ch/local-engine/Parser/TypeParser.cpp [87:242]
DB::DataTypePtr TypeParser::parseType(const substrait::Type & substrait_type, std::list<String> * field_names)
{
DB::DataTypePtr ch_type = nullptr;
std::string_view field_name;
if (field_names)
{
assert(!field_names->empty());
field_name = field_names->front();
field_names->pop_front();
}
if (substrait_type.has_bool_())
{
ch_type = DB::DataTypeFactory::instance().get("Bool");
ch_type = tryWrapNullable(substrait_type.bool_().nullability(), ch_type);
}
else if (substrait_type.has_i8())
{
ch_type = std::make_shared<DB::DataTypeInt8>();
ch_type = tryWrapNullable(substrait_type.i8().nullability(), ch_type);
}
else if (substrait_type.has_i16())
{
ch_type = std::make_shared<DB::DataTypeInt16>();
ch_type = tryWrapNullable(substrait_type.i16().nullability(), ch_type);
}
else if (substrait_type.has_i32())
{
ch_type = std::make_shared<DB::DataTypeInt32>();
ch_type = tryWrapNullable(substrait_type.i32().nullability(), ch_type);
}
else if (substrait_type.has_i64())
{
ch_type = std::make_shared<DB::DataTypeInt64>();
ch_type = tryWrapNullable(substrait_type.i64().nullability(), ch_type);
}
else if (substrait_type.has_string())
{
ch_type = std::make_shared<DB::DataTypeString>();
ch_type = tryWrapNullable(substrait_type.string().nullability(), ch_type);
}
else if (substrait_type.has_binary())
{
ch_type = std::make_shared<DB::DataTypeString>();
ch_type = tryWrapNullable(substrait_type.binary().nullability(), ch_type);
}
else if (substrait_type.has_fixed_char())
{
const auto & fixed_char = substrait_type.fixed_char();
ch_type = std::make_shared<DB::DataTypeFixedString>(fixed_char.length());
ch_type = tryWrapNullable(fixed_char.nullability(), ch_type);
}
else if (substrait_type.has_fixed_binary())
{
const auto & fixed_binary = substrait_type.fixed_binary();
ch_type = std::make_shared<DB::DataTypeFixedString>(fixed_binary.length());
ch_type = tryWrapNullable(fixed_binary.nullability(), ch_type);
}
else if (substrait_type.has_fp32())
{
ch_type = std::make_shared<DB::DataTypeFloat32>();
ch_type = tryWrapNullable(substrait_type.fp32().nullability(), ch_type);
}
else if (substrait_type.has_fp64())
{
ch_type = std::make_shared<DB::DataTypeFloat64>();
ch_type = tryWrapNullable(substrait_type.fp64().nullability(), ch_type);
}
else if (substrait_type.has_timestamp())
{
ch_type = std::make_shared<DB::DataTypeDateTime64>(6);
ch_type = tryWrapNullable(substrait_type.timestamp().nullability(), ch_type);
}
else if (substrait_type.has_date())
{
ch_type = std::make_shared<DB::DataTypeDate32>();
ch_type = tryWrapNullable(substrait_type.date().nullability(), ch_type);
}
else if (substrait_type.has_decimal())
{
UInt32 precision = substrait_type.decimal().precision();
UInt32 scale = substrait_type.decimal().scale();
if (precision > DB::DataTypeDecimal128::maxPrecision())
throw DB::Exception(DB::ErrorCodes::UNKNOWN_TYPE, "Spark doesn't support decimal type with precision {}", precision);
ch_type = DB::createDecimal<DB::DataTypeDecimal>(precision, scale);
ch_type = tryWrapNullable(substrait_type.decimal().nullability(), ch_type);
}
else if (substrait_type.has_struct_())
{
const auto & types = substrait_type.struct_().types();
DB::DataTypes struct_field_types(types.size());
DB::Strings struct_field_names;
if (field_names)
{
/// Construct CH tuple type following the DFS rule.
/// Refer to NamedStruct in https://github.com/oap-project/gluten/blob/main/cpp-ch/local-engine/proto/substrait/type.proto
for (int i = 0; i < types.size(); ++i)
{
struct_field_names.push_back(field_names->front());
struct_field_types[i] = parseType(types[i], field_names);
}
}
else
{
/// Construct CH tuple type without DFS rule.
for (int i = 0; i < types.size(); ++i)
struct_field_types[i] = parseType(types[i]);
const auto & names = substrait_type.struct_().names();
for (const auto & name : names)
if (!name.empty())
struct_field_names.push_back(name);
}
if (!struct_field_names.empty())
ch_type = std::make_shared<DB::DataTypeTuple>(struct_field_types, struct_field_names);
else
ch_type = std::make_shared<DB::DataTypeTuple>(struct_field_types);
ch_type = tryWrapNullable(substrait_type.struct_().nullability(), ch_type);
}
else if (substrait_type.has_list())
{
auto ch_nested_type = parseType(substrait_type.list().type());
ch_type = std::make_shared<DB::DataTypeArray>(ch_nested_type);
ch_type = tryWrapNullable(substrait_type.list().nullability(), ch_type);
}
else if (substrait_type.has_map())
{
if (substrait_type.map().key().has_nothing())
{
// special case
ch_type = std::make_shared<DB::DataTypeMap>(std::make_shared<DB::DataTypeNothing>(), std::make_shared<DB::DataTypeNothing>());
ch_type = tryWrapNullable(substrait_type.map().nullability(), ch_type);
}
else
{
auto ch_key_type = parseType(substrait_type.map().key());
auto ch_val_type = parseType(substrait_type.map().value());
ch_type = std::make_shared<DB::DataTypeMap>(ch_key_type, ch_val_type);
ch_type = tryWrapNullable(substrait_type.map().nullability(), ch_type);
}
}
else if (substrait_type.has_nothing())
{
ch_type = std::make_shared<DB::DataTypeNothing>();
ch_type = tryWrapNullable(substrait::Type_Nullability::Type_Nullability_NULLABILITY_NULLABLE, ch_type);
}
else
throw DB::Exception(DB::ErrorCodes::UNKNOWN_TYPE, "Spark doesn't support type {}", substrait_type.DebugString());
/// TODO(taiyang-li): consider Time/IntervalYear/IntervalDay/TimestampTZ/UUID/VarChar/FixedBinary/UserDefined
return ch_type;
}