DB::DataTypePtr TypeParser::parseType()

in cpp-ch/local-engine/Parser/TypeParser.cpp [87:242]


DB::DataTypePtr TypeParser::parseType(const substrait::Type & substrait_type, std::list<String> * field_names)
{
    DB::DataTypePtr ch_type = nullptr;

    std::string_view field_name;
    if (field_names)
    {
        assert(!field_names->empty());
        field_name = field_names->front();
        field_names->pop_front();
    }

    if (substrait_type.has_bool_())
    {
        ch_type = DB::DataTypeFactory::instance().get("Bool");
        ch_type = tryWrapNullable(substrait_type.bool_().nullability(), ch_type);
    }
    else if (substrait_type.has_i8())
    {
        ch_type = std::make_shared<DB::DataTypeInt8>();
        ch_type = tryWrapNullable(substrait_type.i8().nullability(), ch_type);
    }
    else if (substrait_type.has_i16())
    {
        ch_type = std::make_shared<DB::DataTypeInt16>();
        ch_type = tryWrapNullable(substrait_type.i16().nullability(), ch_type);
    }
    else if (substrait_type.has_i32())
    {
        ch_type = std::make_shared<DB::DataTypeInt32>();
        ch_type = tryWrapNullable(substrait_type.i32().nullability(), ch_type);
    }
    else if (substrait_type.has_i64())
    {
        ch_type = std::make_shared<DB::DataTypeInt64>();
        ch_type = tryWrapNullable(substrait_type.i64().nullability(), ch_type);
    }
    else if (substrait_type.has_string())
    {
        ch_type = std::make_shared<DB::DataTypeString>();
        ch_type = tryWrapNullable(substrait_type.string().nullability(), ch_type);
    }
    else if (substrait_type.has_binary())
    {
        ch_type = std::make_shared<DB::DataTypeString>();
        ch_type = tryWrapNullable(substrait_type.binary().nullability(), ch_type);
    }
    else if (substrait_type.has_fixed_char())
    {
        const auto & fixed_char = substrait_type.fixed_char();
        ch_type = std::make_shared<DB::DataTypeFixedString>(fixed_char.length());
        ch_type = tryWrapNullable(fixed_char.nullability(), ch_type);
    }
    else if (substrait_type.has_fixed_binary())
    {
        const auto & fixed_binary = substrait_type.fixed_binary();
        ch_type = std::make_shared<DB::DataTypeFixedString>(fixed_binary.length());
        ch_type = tryWrapNullable(fixed_binary.nullability(), ch_type);
    }
    else if (substrait_type.has_fp32())
    {
        ch_type = std::make_shared<DB::DataTypeFloat32>();
        ch_type = tryWrapNullable(substrait_type.fp32().nullability(), ch_type);
    }
    else if (substrait_type.has_fp64())
    {
        ch_type = std::make_shared<DB::DataTypeFloat64>();
        ch_type = tryWrapNullable(substrait_type.fp64().nullability(), ch_type);
    }
    else if (substrait_type.has_timestamp())
    {
        ch_type = std::make_shared<DB::DataTypeDateTime64>(6);
        ch_type = tryWrapNullable(substrait_type.timestamp().nullability(), ch_type);
    }
    else if (substrait_type.has_date())
    {
        ch_type = std::make_shared<DB::DataTypeDate32>();
        ch_type = tryWrapNullable(substrait_type.date().nullability(), ch_type);
    }
    else if (substrait_type.has_decimal())
    {
        UInt32 precision = substrait_type.decimal().precision();
        UInt32 scale = substrait_type.decimal().scale();
        if (precision > DB::DataTypeDecimal128::maxPrecision())
            throw DB::Exception(DB::ErrorCodes::UNKNOWN_TYPE, "Spark doesn't support decimal type with precision {}", precision);
        ch_type = DB::createDecimal<DB::DataTypeDecimal>(precision, scale);
        ch_type = tryWrapNullable(substrait_type.decimal().nullability(), ch_type);
    }
    else if (substrait_type.has_struct_())
    {
        const auto & types = substrait_type.struct_().types();
        DB::DataTypes struct_field_types(types.size());
        DB::Strings struct_field_names;

        if (field_names)
        {
            /// Construct CH tuple type following the DFS rule.
            /// Refer to NamedStruct in https://github.com/oap-project/gluten/blob/main/cpp-ch/local-engine/proto/substrait/type.proto
            for (int i = 0; i < types.size(); ++i)
            {
                struct_field_names.push_back(field_names->front());
                struct_field_types[i] = parseType(types[i], field_names);
            }
        }
        else
        {
            /// Construct CH tuple type without DFS rule.
            for (int i = 0; i < types.size(); ++i)
                struct_field_types[i] = parseType(types[i]);

            const auto & names = substrait_type.struct_().names();
            for (const auto & name : names)
                if (!name.empty())
                    struct_field_names.push_back(name);
        }

        if (!struct_field_names.empty())
            ch_type = std::make_shared<DB::DataTypeTuple>(struct_field_types, struct_field_names);
        else
            ch_type = std::make_shared<DB::DataTypeTuple>(struct_field_types);

        ch_type = tryWrapNullable(substrait_type.struct_().nullability(), ch_type);
    }
    else if (substrait_type.has_list())
    {
        auto ch_nested_type = parseType(substrait_type.list().type());
        ch_type = std::make_shared<DB::DataTypeArray>(ch_nested_type);
        ch_type = tryWrapNullable(substrait_type.list().nullability(), ch_type);
    }
    else if (substrait_type.has_map())
    {
        if (substrait_type.map().key().has_nothing())
        {
            // special case
            ch_type = std::make_shared<DB::DataTypeMap>(std::make_shared<DB::DataTypeNothing>(), std::make_shared<DB::DataTypeNothing>());
            ch_type = tryWrapNullable(substrait_type.map().nullability(), ch_type);
        }
        else
        {
            auto ch_key_type = parseType(substrait_type.map().key());
            auto ch_val_type = parseType(substrait_type.map().value());
            ch_type = std::make_shared<DB::DataTypeMap>(ch_key_type, ch_val_type);
            ch_type = tryWrapNullable(substrait_type.map().nullability(), ch_type);
        }
    }
    else if (substrait_type.has_nothing())
    {
        ch_type = std::make_shared<DB::DataTypeNothing>();
        ch_type = tryWrapNullable(substrait::Type_Nullability::Type_Nullability_NULLABILITY_NULLABLE, ch_type);
    }
    else
        throw DB::Exception(DB::ErrorCodes::UNKNOWN_TYPE, "Spark doesn't support type {}", substrait_type.DebugString());

    /// TODO(taiyang-li): consider Time/IntervalYear/IntervalDay/TimestampTZ/UUID/VarChar/FixedBinary/UserDefined
    return ch_type;
}