in spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/DppUtils.java [116:172]
public static DataType getDataTypeFromColumn(EtlJobConfig.EtlColumn column, boolean regardDistinctColumnAsBinary) {
DataType dataType = DataTypes.StringType;
switch (column.columnType) {
case "BOOLEAN":
dataType = DataTypes.StringType;
break;
case "TINYINT":
dataType = DataTypes.ByteType;
break;
case "SMALLINT":
dataType = DataTypes.ShortType;
break;
case "INT":
dataType = DataTypes.IntegerType;
break;
case "DATETIME":
case "DATETIMEV2":
dataType = DataTypes.TimestampType;
break;
case "BIGINT":
dataType = DataTypes.LongType;
break;
case "LARGEINT":
dataType = DataTypes.StringType;
break;
case "FLOAT":
dataType = DataTypes.FloatType;
break;
case "DOUBLE":
dataType = DataTypes.DoubleType;
break;
case "DATE":
case "DATEV2":
dataType = DataTypes.DateType;
break;
case "CHAR":
case "VARCHAR":
case "STRING":
case "TEXT":
case "OBJECT":
dataType = DataTypes.StringType;
break;
case "HLL":
case "BITMAP":
dataType = regardDistinctColumnAsBinary ? DataTypes.BinaryType : DataTypes.StringType;
break;
case "DECIMALV2":
case "DECIMAL32":
case "DECIMAL64":
case "DECIMAL128":
dataType = DecimalType.apply(column.precision, column.scale);
break;
default:
throw new RuntimeException("Reason: invalid column type:" + column);
}
return dataType;
}