in src/common/utils/RapidCsv.h [1294:1378]
void ParseCsv(std::istream &pStream, std::streamsize p_FileLength) {
const std::streamsize bufLength = 64 * 1024;
std::vector<char> buffer(bufLength);
std::vector<std::string> row;
std::string cell;
bool quoted = false;
int cr = 0;
int lf = 0;
while (p_FileLength > 0) {
const std::streamsize toReadLength = std::min<std::streamsize>(p_FileLength, bufLength);
pStream.read(buffer.data(), toReadLength);
// With user-specified istream opened in non-binary mode on windows, we may have a
// data length mismatch, so ensure we don't parse outside actual data length read.
const std::streamsize readLength = pStream.gcount();
if (readLength <= 0) {
break;
}
for (size_t i = 0; i < static_cast<size_t>(readLength); ++i) {
if (buffer[i] == mSeparatorParams.mQuoteChar) {
if (cell.empty() || (cell[0] == mSeparatorParams.mQuoteChar)) {
quoted = !quoted;
}
cell += buffer[i];
} else if (buffer[i] == mSeparatorParams.mSeparator) {
if (!quoted) {
row.push_back(Unquote(Trim(cell)));
cell.clear();
} else {
cell += buffer[i];
}
} else if (buffer[i] == '\r') {
if (mSeparatorParams.mQuotedLinebreaks && quoted) {
cell += buffer[i];
} else {
++cr;
}
} else if (buffer[i] == '\n') {
if (mSeparatorParams.mQuotedLinebreaks && quoted) {
cell += buffer[i];
} else {
++lf;
if (mLineReaderParams.mSkipEmptyLines && row.empty() && cell.empty()) {
// skip empty line
} else {
row.push_back(Unquote(Trim(cell)));
if (mLineReaderParams.mSkipCommentLines && !row.at(0).empty() &&
(row.at(0)[0] == mLineReaderParams.mCommentPrefix)) {
// skip comment line
} else {
mData.push_back(row);
}
cell.clear();
row.clear();
quoted = false;
}
}
} else {
cell += buffer[i];
}
}
p_FileLength -= readLength;
}
// Handle last line without linebreak
if (!cell.empty() || !row.empty()) {
row.push_back(Unquote(Trim(cell)));
cell.clear();
mData.push_back(row);
row.clear();
}
// Assume CR/LF if at least half the linebreaks have CR
mSeparatorParams.mHasCR = (cr > (lf / 2));
// Set up column labels
UpdateColumnNames();
// Set up row labels
UpdateRowNames();
}