public boolean nextRecord()

in hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java [93:187]


    public boolean nextRecord() throws IOException {
        recordCount++;
        fieldCount = 0;
        while (true) {
            switch (state) {
                case INIT:
                    boolean eof = !readMore();
                    if (eof) {
                        state = State.EOF;
                        return false;
                    } else {
                        state = State.IN_RECORD;
                        return true;
                    }

                case IN_RECORD:
                    int p = start;
                    while (true) {
                        if (p >= end) {
                            int s = start;
                            eof = !readMore();
                            if (eof) {
                                state = State.EOF;
                                return start < end;
                            }
                            p -= (s - start);
                            lastQuotePosition -= (s - start);
                            lastDoubleQuotePosition -= (s - start);
                            lastDelimiterPosition -= (s - start);
                        }
                        char ch = buffer[p];
                        // We perform rough format correctness (delimiter, quote) check here
                        // to set the starting position of a record.
                        // In the field level, more checking will be conducted.
                        if (ch == quote) {
                            startedQuote = true;
                            // check two quotes in a row - "". This is an escaped quote
                            if (lastQuotePosition == p - 1 && start != p - 1 && lastDoubleQuotePosition != p - 1) {
                                lastDoubleQuotePosition = p;
                            }
                            lastQuotePosition = p;
                        } else if (ch == fieldDelimiter) {
                            if (startedQuote && lastQuotePosition == p - 1 && lastDoubleQuotePosition != p - 1) {
                                startedQuote = false;
                                lastDelimiterPosition = p;
                            }
                        } else if (ch == '\n' && !startedQuote) {
                            start = p + 1;
                            state = State.EOR;
                            lastDelimiterPosition = p;
                            break;
                        } else if (ch == '\r' && !startedQuote) {
                            start = p + 1;
                            state = State.CR;
                            lastDelimiterPosition = p;
                            break;
                        }
                        ++p;
                    }
                    break;

                case CR:
                    if (start >= end) {
                        eof = !readMore();
                        if (eof) {
                            state = State.EOF;
                            return false;
                        }
                    }
                    char ch = buffer[start];
                    if (ch == '\n' && !startedQuote) {
                        ++start;
                        state = State.EOR;
                    } else {
                        state = State.IN_RECORD;
                        return true;
                    }

                case EOR:
                    if (start >= end) {
                        eof = !readMore();
                        if (eof) {
                            state = State.EOF;
                            return false;
                        }
                    }
                    state = State.IN_RECORD;
                    lastDelimiterPosition = start;
                    return start < end;

                case EOF:
                    return false;
            }
        }
    }