in spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java [383:429]
public boolean validateData(Object srcValue, EtlJobConfig.EtlColumn etlColumn, ColumnParser columnParser, Row row) {
switch (etlColumn.columnType.toUpperCase()) {
case "DECIMALV2":
case "DECIMAL32":
case "DECIMAL64":
case "DECIMAL128":
// TODO(wb): support decimal round; see be DecimalV2Value::round
DecimalParser decimalParser = (DecimalParser) columnParser;
BigDecimal srcBigDecimal = (BigDecimal) srcValue;
if (srcValue != null && (decimalParser.getMaxValue().compareTo(srcBigDecimal) < 0
|| decimalParser.getMinValue().compareTo(srcBigDecimal) > 0)) {
LOG.warn(String.format("decimal value is not valid for defination, column=%s,"
+ " value=%s,precision=%s,scale=%s",
etlColumn.columnName, srcValue, srcBigDecimal.precision(), srcBigDecimal.scale()));
return false;
}
break;
case "CHAR":
case "VARCHAR":
// TODO(wb) padding char type
int strSize = 0;
if (srcValue != null && (strSize = srcValue.toString().getBytes(StandardCharsets.UTF_8).length)
> etlColumn.stringLength) {
LOG.warn(String.format("the length of input is too long than schema."
+ " column_name:%s,input_str[%s],schema length:%s,actual length:%s",
etlColumn.columnName, row.toString(), etlColumn.stringLength, strSize));
return false;
}
break;
case "STRING":
case "TEXT":
// TODO(zjf) padding string type
int strDataSize = 0;
if (srcValue != null && (strDataSize = srcValue.toString().getBytes(StandardCharsets.UTF_8).length)
> DppUtils.STRING_LENGTH_LIMIT) {
LOG.warn(String.format("The string type is limited to a maximum of %s bytes."
+ " column_name:%s,input_str[%s],actual length:%s",
DppUtils.STRING_LENGTH_LIMIT, etlColumn.columnName, row.toString(), strDataSize));
return false;
}
break;
default:
return true;
}
return true;
}