std::optional Csv_record_reader::read_line()

in src/mlio/record_readers/csv_record_reader.cc [75:238]


std::optional<Record> Csv_record_reader::read_line(Memory_slice &chunk, bool ignore_leftover)
{
    auto chars = as_span<const char>(chunk);
    if (chars.empty()) {
        if (ignore_leftover) {
            return {};
        }

        throw Corrupt_record_error{"The text line ends with a corrupt character."};
    }

    auto pos = chars.begin();

    char chr{};

    Parser_state state;

new_field:
    state = Parser_state::new_field;

    if (!try_get_next_char(chars, pos, chr)) {
        goto end;  // NOLINT
    }

    if (chr == params_->delimiter) {
        goto new_field;  // NOLINT
    }
    else if (chr == params_->quote_char) {
        goto in_quoted_field;  // NOLINT
    }
    else if (chr == '\n') {
        goto new_line;  // NOLINT
    }
    else if (chr == '\r') {
        goto has_carriage;  // NOLINT
    }
    else {
        goto in_field;  // NOLINT
    }

in_field:
    state = Parser_state::in_field;

    if (!try_get_next_char(chars, pos, chr)) {
        goto end;  // NOLINT
    }

    if (chr == params_->delimiter) {
        goto new_field;  // NOLINT
    }
    else if (chr == '\n') {
        goto new_line;  // NOLINT
    }
    else if (chr == '\r') {
        goto has_carriage;  // NOLINT
    }
    else {
        goto in_field;  // NOLINT
    }

in_quoted_field:
    state = Parser_state::in_quoted_field;

    if (!try_get_next_char(chars, pos, chr)) {
        goto end;  // NOLINT
    }

    if (chr == params_->quote_char) {
        goto quote_in_quoted_field;  // NOLINT
    }
    else {
        goto in_quoted_field;  // NOLINT
    }

quote_in_quoted_field:
    state = Parser_state::quote_in_quoted_field;

    if (!try_get_next_char(chars, pos, chr)) {
        goto end;  // NOLINT
    }

    if (chr == params_->delimiter) {
        goto new_field;  // NOLINT
    }
    else if (chr == params_->quote_char) {
        goto in_quoted_field;  // NOLINT
    }
    else if (chr == '\n') {
        goto new_line;  // NOLINT
    }
    else if (chr == '\r') {
        goto has_carriage;  // NOLINT
    }
    else {
        goto in_field;  // NOLINT
    }

has_carriage:
    state = Parser_state::has_carriage;

    if (!try_get_next_char(chars, pos, chr)) {
        goto end;  // NOLINT
    }

    // If we only have a carriage without a new-line character move back
    // to not lose the character we just read.
    if (chr != '\n') {
        state = Parser_state::new_field;

        --pos;
    }

    goto new_line;  // NOLINT

new_line : {
    if (params_->max_line_length) {
        check_line_length(chars, pos, *params_->max_line_length);
    }

    auto offset = sizeof(char) * as_size(pos - chars.begin());

    Memory_slice payload;
    if (state == Parser_state::has_carriage) {
        payload = chunk.first(offset - sizeof(char) * 2);
    }
    else {
        payload = chunk.first(offset - sizeof(char));
    }

    chunk = chunk.subslice(offset);

    return Record{std::move(payload)};
}

end:
    if (params_->max_line_length) {
        check_line_length(chars, pos, *params_->max_line_length);
    }

    if (ignore_leftover) {
        return {};
    }

    Memory_slice payload;

    switch (state) {
    case Parser_state::new_field:
    case Parser_state::in_field:
    case Parser_state::quote_in_quoted_field:
        payload = std::move(chunk);
        break;

    case Parser_state::has_carriage:
        payload = chunk.first(chunk.end() - sizeof(char));
        break;

    case Parser_state::in_quoted_field:
        throw Corrupt_record_error{"EOF reached inside a quoted field."};
    }

    chunk = {};

    return Record{std::move(payload)};
}