private ArrayList getNextRecordFromStream()

in gobblin-core/src/main/java/org/apache/gobblin/source/extractor/utils/InputStreamCSVReader.java [139:229]


  private ArrayList<String> getNextRecordFromStream() throws IOException {
    if (this.atEOF) {
      return null;
    }

    ArrayList<String> record = new ArrayList<>(this.maxFieldCount);

    StringBuilder fieldValue = null;

    while (true) {
      int token = this.parser.nextToken();

      if (token == StreamTokenizer.TT_EOF) {
        addField(record, fieldValue);
        this.atEOF = true;
        break;
      }

      if (token == StreamTokenizer.TT_EOL) {
        addField(record, fieldValue);
        break;
      }

      if (token == this.separator) {
        addField(record, fieldValue);
        fieldValue = null;
        continue;
      }

      if (token == StreamTokenizer.TT_WORD) {
        if (fieldValue != null) {
          throw new CSVParseException("Unknown error", this.parser.lineno());
        }

        fieldValue = new StringBuilder(this.parser.sval);
        continue;
      }

      if (token == '"') {
        if (fieldValue != null) {
          throw new CSVParseException("Found unescaped quote. A value with quote should be within a quote",
              this.parser.lineno());
        }

        while (true) {
          token = this.parser.nextToken();

          if (token == StreamTokenizer.TT_EOF) {
            this.atEOF = true;
            throw new CSVParseException("EOF reached before closing an opened quote", this.parser.lineno());
          }

          if (token == this.separator) {
            fieldValue = appendFieldValue(fieldValue, token);
            continue;
          }

          if (token == StreamTokenizer.TT_EOL) {
            fieldValue = appendFieldValue(fieldValue, "\n");
            continue;
          }

          if (token == StreamTokenizer.TT_WORD) {
            fieldValue = appendFieldValue(fieldValue, this.parser.sval);
            continue;
          }

          if (token == '"') {
            int nextToken = this.parser.nextToken();

            if (nextToken == '"') {
              fieldValue = appendFieldValue(fieldValue, nextToken);
              continue;
            }

            if (nextToken == StreamTokenizer.TT_WORD) {
              throw new CSVParseException("Not expecting more text after end quote", this.parser.lineno());
            }
            this.parser.pushBack();
            break;
          }
        }
      }
    }

    if (record.size() > this.maxFieldCount) {
      this.maxFieldCount = record.size();
    }

    return record;
  }