in cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.cpp [224:310]
bool ExcelTextFormatReader::readField(
DB::IColumn & column,
const DB::DataTypePtr & type,
const DB::SerializationPtr & serialization,
bool is_last_file_column,
const String &)
{
if (isEndOfLine() && format_settings.csv.empty_as_default)
{
column.insertDefault();
return false;
}
preSkipNullValue();
size_t column_size = column.size();
if (format_settings.csv.trim_whitespaces && isNumber(removeNullable(type)))
skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
const bool at_delimiter = !buf->eof() && *buf->position() == format_settings.csv.delimiter;
const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || *buf->position() == '\r');
/// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default
/// only one empty or NULL column will be expected
if ((at_delimiter || at_last_column_line_end) && (format_settings.csv.empty_as_default || !isStringOrFixedString(removeNullable(type))))
{
/// Treat empty unquoted column value as default value, if
/// specified in the settings. Tuple columns might seem
/// problematic, because they are never quoted but still contain
/// commas, which might be also used as delimiters. However,
/// they do not contain empty unquoted fields, so this check
/// works for tuples as well.
column.insertDefault();
return false;
}
char maybe_quote = *buf->position();
bool has_quote = false;
if ((format_settings.csv.allow_single_quotes && maybe_quote == '\'')
|| (format_settings.csv.allow_double_quotes && maybe_quote == '\"'))
has_quote = true;
auto column_back_func = [&column_size](DB::IColumn & column_back) -> void
{
if (column_back.isNullable())
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column_back);
if (col.getNullMapData().size() == column_size + 1)
col.getNullMapData().pop_back();
if (col.getNestedColumn().size() == column_size + 1)
col.getNestedColumn().popBack(1);
}
};
try
{
/// Read the column normally.
serialization->deserializeTextCSV(column, *buf, format_settings);
}
catch (Exception & e)
{
/// Logic for possible skipping of errors.
if (!isParseError(e.code()))
throw;
skipErrorChars(*buf, has_quote, maybe_quote, escape, format_settings);
column_back_func(column);
column.insertDefault();
return false;
}
// See https://github.com/ClickHouse/ClickHouse/pull/60556
// In case of failing to parse, we will always push element into nullmap.
// so, we need using nestedColumn to check if error occurs.
/// FIXME: move it to ExcelSerialization ???
const auto nestedColumn = DB::removeNullable(column.getPtr());
if (column_size == nestedColumn->size())
{
skipErrorChars(*buf, has_quote, maybe_quote, escape, format_settings);
column_back_func(column);
column.insertDefault();
return false;
}
return true;
}