in cpp-ch/local-engine/Parser/ExpressionParser.cpp [58:259]
std::pair<DB::DataTypePtr, DB::Field> LiteralParser::parse(const substrait::Expression_Literal & literal)
{
DB::DataTypePtr type;
DB::Field field;
switch (literal.literal_type_case())
{
case substrait::Expression_Literal::kFp64: {
type = std::make_shared<DB::DataTypeFloat64>();
field = literal.fp64();
break;
}
case substrait::Expression_Literal::kFp32: {
type = std::make_shared<DB::DataTypeFloat32>();
field = literal.fp32();
break;
}
case substrait::Expression_Literal::kString: {
type = std::make_shared<DB::DataTypeString>();
field = literal.string();
break;
}
case substrait::Expression_Literal::kBinary: {
type = std::make_shared<DB::DataTypeString>();
field = literal.binary();
break;
}
case substrait::Expression_Literal::kI64: {
type = std::make_shared<DB::DataTypeInt64>();
field = literal.i64();
break;
}
case substrait::Expression_Literal::kI32: {
type = std::make_shared<DB::DataTypeInt32>();
field = literal.i32();
break;
}
case substrait::Expression_Literal::kBoolean: {
type = DB::DataTypeFactory::instance().get("Bool");
field = literal.boolean() ? UInt8(1) : UInt8(0);
break;
}
case substrait::Expression_Literal::kI16: {
type = std::make_shared<DB::DataTypeInt16>();
field = literal.i16();
break;
}
case substrait::Expression_Literal::kI8: {
type = std::make_shared<DB::DataTypeInt8>();
field = literal.i8();
break;
}
case substrait::Expression_Literal::kDate: {
type = std::make_shared<DB::DataTypeDate32>();
field = literal.date();
break;
}
case substrait::Expression_Literal::kTimestamp: {
type = std::make_shared<DB::DataTypeDateTime64>(6);
field = DecimalField<DB::DateTime64>(literal.timestamp(), 6);
break;
}
case substrait::Expression_Literal::kDecimal: {
UInt32 precision = literal.decimal().precision();
UInt32 scale = literal.decimal().scale();
const auto & bytes = literal.decimal().value();
if (precision <= DB::DataTypeDecimal32::maxPrecision())
{
type = std::make_shared<DB::DataTypeDecimal32>(precision, scale);
auto value = *reinterpret_cast<const Int32 *>(bytes.data());
field = DecimalField<DB::Decimal32>(value, scale);
}
else if (precision <= DataTypeDecimal64::maxPrecision())
{
type = std::make_shared<DB::DataTypeDecimal64>(precision, scale);
auto value = *reinterpret_cast<const Int64 *>(bytes.data());
field = DecimalField<DB::Decimal64>(value, scale);
}
else if (precision <= DataTypeDecimal128::maxPrecision())
{
type = std::make_shared<DB::DataTypeDecimal128>(precision, scale);
String bytes_copy(bytes);
auto value = *reinterpret_cast<DB::Decimal128 *>(bytes_copy.data());
field = DecimalField<DB::Decimal128>(value, scale);
}
else
throw DB::Exception(DB::ErrorCodes::UNKNOWN_TYPE, "Spark doesn't support decimal type with precision {}", precision);
break;
}
case substrait::Expression_Literal::kList: {
const auto & values = literal.list().values();
if (values.empty())
{
type = std::make_shared<DataTypeArray>(std::make_shared<DB::DataTypeNothing>());
field = Array();
break;
}
DB::DataTypePtr common_type;
std::tie(common_type, std::ignore) = parse(values[0]);
size_t list_len = values.size();
Array array(list_len);
for (int i = 0; i < static_cast<int>(list_len); ++i)
{
auto type_and_field = parse(values[i]);
common_type = getLeastSupertype(DataTypes{common_type, type_and_field.first});
array[i] = std::move(type_and_field.second);
}
type = std::make_shared<DB::DataTypeArray>(common_type);
field = std::move(array);
break;
}
case substrait::Expression_Literal::kEmptyList: {
type = std::make_shared<DB::DataTypeArray>(std::make_shared<DB::DataTypeNothing>());
field = Array();
break;
}
case substrait::Expression_Literal::kMap: {
const auto & key_values = literal.map().key_values();
if (key_values.empty())
{
type = std::make_shared<DB::DataTypeMap>(std::make_shared<DB::DataTypeNothing>(), std::make_shared<DB::DataTypeNothing>());
field = Map();
break;
}
const auto & first_key_value = key_values[0];
DB::DataTypePtr common_key_type;
std::tie(common_key_type, std::ignore) = parse(first_key_value.key());
DB::DataTypePtr common_value_type;
std::tie(common_value_type, std::ignore) = parse(first_key_value.value());
Map map;
map.reserve(key_values.size());
for (const auto & key_value : key_values)
{
Tuple tuple(2);
DB::DataTypePtr key_type;
std::tie(key_type, tuple[0]) = parse(key_value.key());
/// Each key should has the same type
if (!common_key_type->equals(*key_type))
throw DB::Exception(
DB::ErrorCodes::LOGICAL_ERROR,
"Literal map key type mismatch:{} and {}",
common_key_type->getName(),
key_type->getName());
DB::DataTypePtr value_type;
std::tie(value_type, tuple[1]) = parse(key_value.value());
/// Each value should has least super type for all of them
common_value_type = getLeastSupertype(DB::DataTypes{common_value_type, value_type});
map.emplace_back(std::move(tuple));
}
type = std::make_shared<DB::DataTypeMap>(common_key_type, common_value_type);
field = std::move(map);
break;
}
case substrait::Expression_Literal::kEmptyMap: {
type = std::make_shared<DB::DataTypeMap>(std::make_shared<DB::DataTypeNothing>(), std::make_shared<DB::DataTypeNothing>());
field = Map();
break;
}
case substrait::Expression_Literal::kStruct: {
const auto & fields = literal.struct_().fields();
DB::DataTypes types;
types.reserve(fields.size());
Tuple tuple;
tuple.reserve(fields.size());
for (const auto & f : fields)
{
DB::DataTypePtr field_type;
DB::Field field_value;
std::tie(field_type, field_value) = parse(f);
types.emplace_back(std::move(field_type));
tuple.emplace_back(std::move(field_value));
}
type = std::make_shared<DB::DataTypeTuple>(types);
field = std::move(tuple);
break;
}
case substrait::Expression_Literal::kNull: {
type = TypeParser::parseType(literal.null());
field = DB::Field{};
break;
}
default: {
throw DB::Exception(
DB::ErrorCodes::UNKNOWN_TYPE, "Unsupported spark literal type {}", magic_enum::enum_name(literal.literal_type_case()));
}
}
return std::make_pair(std::move(type), std::move(field));
}