bool ExcelTextFormatReader::readField()

in cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.cpp [224:310]


bool ExcelTextFormatReader::readField(
    DB::IColumn & column,
    const DB::DataTypePtr & type,
    const DB::SerializationPtr & serialization,
    bool is_last_file_column,
    const String &)
{
    if (isEndOfLine() && format_settings.csv.empty_as_default)
    {
        column.insertDefault();
        return false;
    }

    preSkipNullValue();
    size_t column_size = column.size();

    if (format_settings.csv.trim_whitespaces && isNumber(removeNullable(type)))
        skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);

    const bool at_delimiter = !buf->eof() && *buf->position() == format_settings.csv.delimiter;
    const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || *buf->position() == '\r');

    /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default
    /// only one empty or NULL column will be expected
    if ((at_delimiter || at_last_column_line_end) && (format_settings.csv.empty_as_default || !isStringOrFixedString(removeNullable(type))))
    {
        /// Treat empty unquoted column value as default value, if
        /// specified in the settings. Tuple columns might seem
        /// problematic, because they are never quoted but still contain
        /// commas, which might be also used as delimiters. However,
        /// they do not contain empty unquoted fields, so this check
        /// works for tuples as well.
        column.insertDefault();
        return false;
    }

    char maybe_quote = *buf->position();
    bool has_quote = false;
    if ((format_settings.csv.allow_single_quotes && maybe_quote == '\'')
        || (format_settings.csv.allow_double_quotes && maybe_quote == '\"'))
        has_quote = true;

    auto column_back_func = [&column_size](DB::IColumn & column_back) -> void
    {
        if (column_back.isNullable())
        {
            ColumnNullable & col = assert_cast<ColumnNullable &>(column_back);
            if (col.getNullMapData().size() == column_size + 1)
                col.getNullMapData().pop_back();
            if (col.getNestedColumn().size() == column_size + 1)
                col.getNestedColumn().popBack(1);
        }
    };

    try
    {
        /// Read the column normally.
        serialization->deserializeTextCSV(column, *buf, format_settings);
    }
    catch (Exception & e)
    {
        /// Logic for possible skipping of errors.
        if (!isParseError(e.code()))
            throw;

        skipErrorChars(*buf, has_quote, maybe_quote, escape, format_settings);
        column_back_func(column);
        column.insertDefault();

        return false;
    }

    // See https://github.com/ClickHouse/ClickHouse/pull/60556
    // In case of failing to parse, we will always push element into nullmap.
    // so, we need using nestedColumn to check if error occurs.
    /// FIXME:  move it to ExcelSerialization ???
    const auto nestedColumn = DB::removeNullable(column.getPtr());
    if (column_size == nestedColumn->size())
    {
        skipErrorChars(*buf, has_quote, maybe_quote, escape, format_settings);
        column_back_func(column);
        column.insertDefault();
        return false;
    }

    return true;
}