in gobblin-core/src/main/java/org/apache/gobblin/source/extractor/utils/InputStreamCSVReader.java [139:229]
private ArrayList<String> getNextRecordFromStream() throws IOException {
if (this.atEOF) {
return null;
}
ArrayList<String> record = new ArrayList<>(this.maxFieldCount);
StringBuilder fieldValue = null;
while (true) {
int token = this.parser.nextToken();
if (token == StreamTokenizer.TT_EOF) {
addField(record, fieldValue);
this.atEOF = true;
break;
}
if (token == StreamTokenizer.TT_EOL) {
addField(record, fieldValue);
break;
}
if (token == this.separator) {
addField(record, fieldValue);
fieldValue = null;
continue;
}
if (token == StreamTokenizer.TT_WORD) {
if (fieldValue != null) {
throw new CSVParseException("Unknown error", this.parser.lineno());
}
fieldValue = new StringBuilder(this.parser.sval);
continue;
}
if (token == '"') {
if (fieldValue != null) {
throw new CSVParseException("Found unescaped quote. A value with quote should be within a quote",
this.parser.lineno());
}
while (true) {
token = this.parser.nextToken();
if (token == StreamTokenizer.TT_EOF) {
this.atEOF = true;
throw new CSVParseException("EOF reached before closing an opened quote", this.parser.lineno());
}
if (token == this.separator) {
fieldValue = appendFieldValue(fieldValue, token);
continue;
}
if (token == StreamTokenizer.TT_EOL) {
fieldValue = appendFieldValue(fieldValue, "\n");
continue;
}
if (token == StreamTokenizer.TT_WORD) {
fieldValue = appendFieldValue(fieldValue, this.parser.sval);
continue;
}
if (token == '"') {
int nextToken = this.parser.nextToken();
if (nextToken == '"') {
fieldValue = appendFieldValue(fieldValue, nextToken);
continue;
}
if (nextToken == StreamTokenizer.TT_WORD) {
throw new CSVParseException("Not expecting more text after end quote", this.parser.lineno());
}
this.parser.pushBack();
break;
}
}
}
}
if (record.size() > this.maxFieldCount) {
this.maxFieldCount = record.size();
}
return record;
}