in spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/etl/SparkEtlJob.java [126:174]
private void checkConfig() throws Exception {
for (Map.Entry<Long, EtlTable> entry : etlJobConfig.tables.entrySet()) {
boolean isHiveSource = false;
Set<String> bitmapDictColumns = Sets.newHashSet();
Set<String> binaryBitmapColumns = Sets.newHashSet();
for (EtlFileGroup fileGroup : entry.getValue().fileGroups) {
if (fileGroup.sourceType == EtlJobConfig.SourceType.HIVE) {
isHiveSource = true;
}
Map<String, EtlColumnMapping> newColumnMappings = Maps.newHashMap();
for (Map.Entry<String, EtlColumnMapping> mappingEntry : fileGroup.columnMappings.entrySet()) {
String columnName = mappingEntry.getKey();
String exprStr = mappingEntry.getValue().toDescription();
String funcName = functions.expr(exprStr).expr().prettyName();
if (funcName.equalsIgnoreCase(BITMAP_HASH)) {
throw new SparkDppException("spark load not support bitmap_hash now");
}
if (funcName.equalsIgnoreCase(BINARY_BITMAP)) {
binaryBitmapColumns.add(columnName.toLowerCase());
} else if (funcName.equalsIgnoreCase(BITMAP_DICT_FUNC)) {
bitmapDictColumns.add(columnName.toLowerCase());
} else if (!funcName.equalsIgnoreCase(TO_BITMAP_FUNC)) {
newColumnMappings.put(mappingEntry.getKey(), mappingEntry.getValue());
}
}
// reset new columnMappings
fileGroup.columnMappings = newColumnMappings;
}
if (isHiveSource) {
hiveSourceTables.add(entry.getKey());
}
if (!bitmapDictColumns.isEmpty()) {
tableToBitmapDictColumns.put(entry.getKey(), bitmapDictColumns);
}
if (!binaryBitmapColumns.isEmpty()) {
tableToBinaryBitmapColumns.put(entry.getKey(), binaryBitmapColumns);
}
}
LOG.info("init hiveSourceTables: " + hiveSourceTables
+ ",tableToBitmapDictColumns: " + tableToBitmapDictColumns);
// spark etl must have only one table with bitmap type column to process.
if (hiveSourceTables.size() > 1
|| tableToBitmapDictColumns.size() > 1
|| tableToBinaryBitmapColumns.size() > 1) {
throw new Exception("spark etl job must have only one hive table with bitmap type column to process");
}
}