in processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java [147:310]
public void build(
Map<String, String> options,
Map<String, String> optionsFinal,
CarbonLoadModel carbonLoadModel,
Configuration hadoopConf,
Map<String, String> partitions,
boolean isDataFrame) throws InvalidLoadOptionException, IOException {
carbonLoadModel.setTableName(table.getTableName());
carbonLoadModel.setDatabaseName(table.getDatabaseName());
carbonLoadModel.setTablePath(table.getTablePath());
carbonLoadModel.setTableName(table.getTableName());
carbonLoadModel.setCarbonTransactionalTable(table.isTransactionalTable());
CarbonDataLoadSchema dataLoadSchema = new CarbonDataLoadSchema(table);
// Need to fill dimension relation
carbonLoadModel.setCarbonDataLoadSchema(dataLoadSchema);
String sortScope = optionsFinal.get("sort_scope");
String badRecordsLoggerEnable = optionsFinal.get("bad_records_logger_enable");
String badRecordsAction = optionsFinal.get("bad_records_action");
String badRecordPath = optionsFinal.get("bad_record_path");
String globalSortPartitions = optionsFinal.get("global_sort_partitions");
String timestampformat = optionsFinal.get("timestampformat");
String dateFormat = optionsFinal.get("dateformat");
String delimiter = optionsFinal.get("delimiter");
String complexDelimiterLevel1 = optionsFinal.get("complex_delimiter_level_1");
String complexDelimiterLevel2 = optionsFinal.get("complex_delimiter_level_2");
String complexDelimiterLevel3 = optionsFinal.get("complex_delimiter_level_3");
String complexDelimiterLevel4 = optionsFinal.get("complex_delimiter_level_4");
validateDateTimeFormat(timestampformat, "TimestampFormat");
validateDateTimeFormat(dateFormat, "DateFormat");
if (Boolean.parseBoolean(badRecordsLoggerEnable) ||
LoggerAction.REDIRECT.name().equalsIgnoreCase(badRecordsAction)) {
if (!StringUtils.isEmpty(badRecordPath)) {
badRecordPath = CarbonUtil.checkAndAppendHDFSUrl(badRecordPath);
} else {
throw new InvalidLoadOptionException(
"Cannot redirect bad records as bad record location is not provided.");
}
}
carbonLoadModel.setBadRecordsLocation(badRecordPath);
validateGlobalSortPartitions(globalSortPartitions);
carbonLoadModel.setEscapeChar(checkDefaultValue(optionsFinal.get("escapechar"), "\\"));
carbonLoadModel.setQuoteChar(
CarbonUtil.unescapeChar(checkDefaultValue(optionsFinal.get("quotechar"), "\"")));
carbonLoadModel.setCommentChar(checkDefaultValue(optionsFinal.get("commentchar"), "#"));
String lineSeparator = CarbonUtil.unescapeChar(options.get("line_separator"));
if (lineSeparator != null) {
carbonLoadModel.setLineSeparator(lineSeparator);
}
// if there isn't file header in csv file and load sql doesn't provide FILEHEADER option,
// we should use table schema to generate file header.
String fileHeader = optionsFinal.get("fileheader");
String headerOption = optionsFinal.get("header");
if (StringUtils.isNotEmpty(headerOption)) {
if (!headerOption.equalsIgnoreCase("true") &&
!headerOption.equalsIgnoreCase("false")) {
throw new InvalidLoadOptionException(
"'header' option should be either 'true' or 'false'.");
}
// whether the csv file has file header, the default value is true
if (Boolean.valueOf(headerOption)) {
if (!StringUtils.isEmpty(fileHeader)) {
throw new InvalidLoadOptionException(
"When 'header' option is true, 'fileheader' option is not required.");
}
} else {
if (StringUtils.isEmpty(fileHeader)) {
List<CarbonColumn> columns = table.getCreateOrderColumn();
List<String> columnNames = new ArrayList<>();
List<String> partitionColumns = new ArrayList<>();
for (int i = 0; i < columns.size(); i++) {
columnNames.add(columns.get(i).getColName());
}
columnNames.addAll(partitionColumns);
fileHeader = Strings.mkString(columnNames.toArray(new String[columnNames.size()]), ",");
}
}
}
String binaryDecoder = options.get("binary_decoder");
carbonLoadModel.setBinaryDecoder(binaryDecoder);
carbonLoadModel.setTimestampFormat(timestampformat);
carbonLoadModel.setDateFormat(dateFormat);
carbonLoadModel.setDefaultTimestampFormat(
CarbonProperties.getInstance().getProperty(
CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT));
carbonLoadModel.setDefaultDateFormat(
CarbonProperties.getInstance().getProperty(
CarbonCommonConstants.CARBON_DATE_FORMAT,
CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT));
carbonLoadModel.setSerializationNullFormat(
TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName() + "," +
optionsFinal.get("serialization_null_format"));
carbonLoadModel.setBadRecordsLoggerEnable(
TableOptionConstant.BAD_RECORDS_LOGGER_ENABLE.getName() + "," + badRecordsLoggerEnable);
carbonLoadModel.setBadRecordsAction(
TableOptionConstant.BAD_RECORDS_ACTION.getName() + "," + badRecordsAction.toUpperCase());
carbonLoadModel.setIsEmptyDataBadRecord(
DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD + "," +
optionsFinal.get("is_empty_data_bad_record"));
carbonLoadModel.setSkipEmptyLine(optionsFinal.get("skip_empty_line"));
carbonLoadModel.setSortScope(sortScope);
if (globalSortPartitions == null) {
globalSortPartitions = table.getGlobalSortPartitions();
}
carbonLoadModel.setGlobalSortPartitions(globalSortPartitions);
if (delimiter.equalsIgnoreCase(complexDelimiterLevel1) ||
complexDelimiterLevel1.equalsIgnoreCase(complexDelimiterLevel2) ||
delimiter.equalsIgnoreCase(complexDelimiterLevel2) ||
delimiter.equalsIgnoreCase(complexDelimiterLevel3)) {
throw new InvalidLoadOptionException("Field Delimiter and Complex types delimiter are same");
} else {
carbonLoadModel.setComplexDelimiter(complexDelimiterLevel1);
carbonLoadModel.setComplexDelimiter(complexDelimiterLevel2);
carbonLoadModel.setComplexDelimiter(complexDelimiterLevel3);
carbonLoadModel.setComplexDelimiter(complexDelimiterLevel4);
}
carbonLoadModel.setCsvDelimiter(CarbonUtil.unescapeChar(delimiter));
carbonLoadModel.setCsvHeader(fileHeader);
List<String> ignoreColumns = new ArrayList<>();
if (!isDataFrame) {
for (Map.Entry<String, String> partition : partitions.entrySet()) {
if (partition.getValue() != null) {
ignoreColumns.add(partition.getKey());
}
}
}
carbonLoadModel.setCsvHeaderColumns(
LoadOption.getCsvHeaderColumns(carbonLoadModel, hadoopConf, ignoreColumns));
int validatedMaxColumns = validateMaxColumns(
carbonLoadModel.getCsvHeaderColumns(),
optionsFinal.get("maxcolumns"));
carbonLoadModel.setMaxColumns(String.valueOf(validatedMaxColumns));
if (carbonLoadModel.isCarbonTransactionalTable()) {
carbonLoadModel.readAndSetLoadMetadataDetails();
}
carbonLoadModel.setSortColumnsBoundsStr(optionsFinal.get("sort_column_bounds"));
carbonLoadModel.setLoadMinSize(
optionsFinal.get(CarbonCommonConstants.CARBON_LOAD_MIN_SIZE_INMB));
validateAndSetLoadMinSize(carbonLoadModel);
validateAndSetColumnCompressor(carbonLoadModel);
validateAndSetBinaryDecoder(carbonLoadModel);
validateRangeColumn(optionsFinal, carbonLoadModel);
carbonLoadModel.setMetrics(new DataLoadMetrics());
}