in src/mlio/record_readers/csv_record_reader.cc [75:238]
std::optional<Record> Csv_record_reader::read_line(Memory_slice &chunk, bool ignore_leftover)
{
auto chars = as_span<const char>(chunk);
if (chars.empty()) {
if (ignore_leftover) {
return {};
}
throw Corrupt_record_error{"The text line ends with a corrupt character."};
}
auto pos = chars.begin();
char chr{};
Parser_state state;
new_field:
state = Parser_state::new_field;
if (!try_get_next_char(chars, pos, chr)) {
goto end; // NOLINT
}
if (chr == params_->delimiter) {
goto new_field; // NOLINT
}
else if (chr == params_->quote_char) {
goto in_quoted_field; // NOLINT
}
else if (chr == '\n') {
goto new_line; // NOLINT
}
else if (chr == '\r') {
goto has_carriage; // NOLINT
}
else {
goto in_field; // NOLINT
}
in_field:
state = Parser_state::in_field;
if (!try_get_next_char(chars, pos, chr)) {
goto end; // NOLINT
}
if (chr == params_->delimiter) {
goto new_field; // NOLINT
}
else if (chr == '\n') {
goto new_line; // NOLINT
}
else if (chr == '\r') {
goto has_carriage; // NOLINT
}
else {
goto in_field; // NOLINT
}
in_quoted_field:
state = Parser_state::in_quoted_field;
if (!try_get_next_char(chars, pos, chr)) {
goto end; // NOLINT
}
if (chr == params_->quote_char) {
goto quote_in_quoted_field; // NOLINT
}
else {
goto in_quoted_field; // NOLINT
}
quote_in_quoted_field:
state = Parser_state::quote_in_quoted_field;
if (!try_get_next_char(chars, pos, chr)) {
goto end; // NOLINT
}
if (chr == params_->delimiter) {
goto new_field; // NOLINT
}
else if (chr == params_->quote_char) {
goto in_quoted_field; // NOLINT
}
else if (chr == '\n') {
goto new_line; // NOLINT
}
else if (chr == '\r') {
goto has_carriage; // NOLINT
}
else {
goto in_field; // NOLINT
}
has_carriage:
state = Parser_state::has_carriage;
if (!try_get_next_char(chars, pos, chr)) {
goto end; // NOLINT
}
// If we only have a carriage without a new-line character move back
// to not lose the character we just read.
if (chr != '\n') {
state = Parser_state::new_field;
--pos;
}
goto new_line; // NOLINT
new_line : {
if (params_->max_line_length) {
check_line_length(chars, pos, *params_->max_line_length);
}
auto offset = sizeof(char) * as_size(pos - chars.begin());
Memory_slice payload;
if (state == Parser_state::has_carriage) {
payload = chunk.first(offset - sizeof(char) * 2);
}
else {
payload = chunk.first(offset - sizeof(char));
}
chunk = chunk.subslice(offset);
return Record{std::move(payload)};
}
end:
if (params_->max_line_length) {
check_line_length(chars, pos, *params_->max_line_length);
}
if (ignore_leftover) {
return {};
}
Memory_slice payload;
switch (state) {
case Parser_state::new_field:
case Parser_state::in_field:
case Parser_state::quote_in_quoted_field:
payload = std::move(chunk);
break;
case Parser_state::has_carriage:
payload = chunk.first(chunk.end() - sizeof(char));
break;
case Parser_state::in_quoted_field:
throw Corrupt_record_error{"EOF reached inside a quoted field."};
}
chunk = {};
return Record{std::move(payload)};
}