protected final void parse_to_next_value()

in src/com/amazon/ion/impl/IonReaderTextRawX.java [775:1066]


    protected final void parse_to_next_value() throws IOException
    {
        int t;
        int action, temp_state;
        boolean trailing_whitespace = false;  // TODO: there's a better way to do this
        StringBuilder sb;

        // FIXME: check depth and type before doing anything further
        //        if we're on a collection and at the correct depth
        //        we need to skip over the contents of the collection
        //        before doing any more parsing

        // we'll need a token to get started here
        // we'll also remember where we were when we started if the
        // user later wants to get a span over this value.  In the
        // case where we just before a comma, after the comma we'll
        // reset this offset since for the span the comma isn't part
        // of the span when it's hoisted
        _value_start_offset = _scanner.getStartingOffset();
        _value_start_line   = _scanner.getLineNumber();
        _value_start_column = _scanner.getLineOffset();

        t = _scanner.nextToken();

        for (;;) {
            int idx = get_state_int() * IonTokenConstsX.TOKEN_count + t;
            action = TransitionActions2[idx];
            // this used to be (but the 2d array is 9072ms vs 8786ms
            // timing, 3% of total file parse time!):
            // action = TransitionActions[get_state_int()][t];
            switch (action) {
            case ACTION_NOT_DEFINED:
                {
                    // TODO why would we get here?
                    boolean span_eof = false;

                    if (_nesting_parent != null) {
                        switch (_nesting_parent) {
                            case LIST:
                                if (t == IonTokenConstsX.TOKEN_CLOSE_SQUARE) {
                                    span_eof = true;
                                }
                                break;
                            case SEXP:
                                if (t == IonTokenConstsX.TOKEN_CLOSE_PAREN){
                                    span_eof = true;
                                }
                                break;
                            case STRUCT:
                                if (t == IonTokenConstsX.TOKEN_CLOSE_BRACE) {
                                    span_eof = true;
                                }
                                break;
                            default:
                                break;
                        }
                    }
                    if (span_eof != true) {
                        String message = "invalid syntax [state:"
                                       + get_state_name()
                                       + " on token:"
                                       +IonTokenConstsX.getTokenName(t)
                                       +"]";
                        parse_error(message);
                    }
                    set_state(STATE_EOF);
                    _eof = true;
                    return;
                }
            case ACTION_EOF:
                set_state(STATE_EOF);
                _eof = true;
                return;
            case ACTION_LOAD_FIELD_NAME:
            {
                if (!is_in_struct_internal()) {
                    throw new IllegalStateException("field names have to be in structs");
                }
                //finish_value(_current_value_save_point);
                finish_and_save_value();

                sb = token_contents_load(t);

                SymbolToken sym = parseSymbolToken("a field name", sb, t);
                set_fieldname(sym);
                clear_current_value_buffer();

                t = _scanner.nextToken();
                if (t != IonTokenConstsX.TOKEN_COLON) {
                    String message = "field name must be followed by a colon, not a "
                                   + IonTokenConstsX.getTokenName(t);
                    parse_error(message);
                }
                _scanner.tokenIsFinished();
                set_state(STATE_BEFORE_ANNOTATION_CONTAINED);
                t = _scanner.nextToken();
                break;
            }
            case ACTION_LOAD_ANNOTATION:
            {
                sb = token_contents_load(t);

                trailing_whitespace = _scanner.skip_whitespace();
                if (!_scanner.skipDoubleColon()) {
                    // unnecessary: set_current_value(sp);
                    // this will "loop around" to ACTION_LOAD_SCALAR
                    // since this is necessarily a symbol of one
                    // sort of another
                    temp_state = get_state_after_annotation();
                    set_state(temp_state);
                    break;
                }

                // We have an annotation!
                SymbolToken sym = parseSymbolToken("an annotation", sb, t);
                append_annotation(sym);
                clear_current_value_buffer();

                // Consumed the annotation, move on.
                // note: that peekDoubleColon() consumed the two colons
                // so nextToken won't see them
                t = _scanner.nextToken();
                switch(t) {
                case IonTokenConstsX.TOKEN_SYMBOL_IDENTIFIER:
                case IonTokenConstsX.TOKEN_SYMBOL_QUOTED:
                    // This may be another annotation, so stay in this state
                    // and come around the horn again to check it out.
                    break;
                default:
                    // we leave the error handling to the transition
                    temp_state = get_state_after_annotation();
                    set_state(temp_state);
                    break;
                }
                break;
            }
            case ACTION_START_STRUCT:
                _value_type = IonType.STRUCT;
                temp_state = STATE_BEFORE_FIELD_NAME;
                set_state(temp_state);
                return;
            case ACTION_START_LIST:
                _value_type = IonType.LIST;
                temp_state = STATE_BEFORE_ANNOTATION_CONTAINED;
                set_state(temp_state);
                return;
            case ACTION_START_SEXP:
                _value_type = IonType.SEXP;
                temp_state = STATE_BEFORE_ANNOTATION_SEXP;
                set_state(temp_state);
                return;
            case ACTION_START_LOB:
                switch (_scanner.peekLobStartPunctuation()) {
                case IonTokenConstsX.TOKEN_STRING_DOUBLE_QUOTE:
                    set_state(STATE_IN_CLOB_DOUBLE_QUOTED_CONTENT);
                    _lob_token = IonTokenConstsX.TOKEN_STRING_DOUBLE_QUOTE;
                    _value_type = IonType.CLOB;
                    break;
                case IonTokenConstsX.TOKEN_STRING_TRIPLE_QUOTE:
                    set_state(STATE_IN_CLOB_TRIPLE_QUOTED_CONTENT);
                    _lob_token = IonTokenConstsX.TOKEN_STRING_TRIPLE_QUOTE;
                    _value_type = IonType.CLOB;
                    break;
                default:
                    set_state(STATE_IN_BLOB_CONTENT);
                    _lob_token = IonTokenConstsX.TOKEN_OPEN_DOUBLE_BRACE;
                    _value_type = IonType.BLOB;
                    break;
                }
                return;
            case ACTION_LOAD_SCALAR:
                if (t == IonTokenConstsX.TOKEN_SYMBOL_IDENTIFIER) {
                    sb = token_contents_load(t);
                    _value_keyword = IonTokenConstsX.keyword(sb, 0, sb.length());
                    switch (_value_keyword) {
                    case IonTokenConstsX.KEYWORD_NULL:
                    {
                        int kwt = trailing_whitespace ? IonTokenConstsX.KEYWORD_none : _scanner.peekNullTypeSymbol();
                        switch (kwt) {
                        case IonTokenConstsX.KEYWORD_NULL:      _null_type = IonType.NULL;       break;
                        case IonTokenConstsX.KEYWORD_BOOL:      _null_type = IonType.BOOL;       break;
                        case IonTokenConstsX.KEYWORD_INT:       _null_type = IonType.INT;        break;
                        case IonTokenConstsX.KEYWORD_FLOAT:     _null_type = IonType.FLOAT;      break;
                        case IonTokenConstsX.KEYWORD_DECIMAL:   _null_type = IonType.DECIMAL;    break;
                        case IonTokenConstsX.KEYWORD_TIMESTAMP: _null_type = IonType.TIMESTAMP;  break;
                        case IonTokenConstsX.KEYWORD_SYMBOL:    _null_type = IonType.SYMBOL;     break;
                        case IonTokenConstsX.KEYWORD_STRING:    _null_type = IonType.STRING;     break;
                        case IonTokenConstsX.KEYWORD_BLOB:      _null_type = IonType.BLOB;       break;
                        case IonTokenConstsX.KEYWORD_CLOB:      _null_type = IonType.CLOB;       break;
                        case IonTokenConstsX.KEYWORD_LIST:      _null_type = IonType.LIST;       break;
                        case IonTokenConstsX.KEYWORD_SEXP:      _null_type = IonType.SEXP;       break;
                        case IonTokenConstsX.KEYWORD_STRUCT:    _null_type = IonType.STRUCT;     break;
                        case IonTokenConstsX.KEYWORD_none:      _null_type = IonType.NULL;       break; // this happens when there isn't a '.' otherwise peek throws the error or returns none
                        default: parse_error("invalid keyword id ("+kwt+") encountered while parsing a null");
                        }
                        // at this point we've consumed a dot '.' and it's preceding whitespace
                        // clear_value();
                        current_value_is_null(_null_type);
                        // set to null_type in above call: _value_type = IonType.NULL;
                        break;
                    }
                    case IonTokenConstsX.KEYWORD_TRUE:
                        _value_type = IonType.BOOL;
                        current_value_is_bool(true);
                        break;
                    case IonTokenConstsX.KEYWORD_FALSE:
                        _value_type = IonType.BOOL;
                        current_value_is_bool(false);
                        break;
                    case IonTokenConstsX.KEYWORD_NAN:
                        _value_type = IonType.FLOAT;
                        clear_current_value_buffer();
                        _v.setValue(Double.NaN);
                        _v.setAuthoritativeType(AS_TYPE.double_value);
                        break;
                    case IonTokenConstsX.KEYWORD_sid:
                    {
                        int sid = IonTokenConstsX.decodeSid(sb);
                        _v.setValue(sid);
                        _v.setAuthoritativeType(AS_TYPE.int_value);
                    }
                    default:
                        // We don't care about any other 'keywords'
                        _value_type = IonType.SYMBOL;
                        break;
                    }
                }
                else if (t == IonTokenConstsX.TOKEN_DOT) {
                    _value_type = IonType.SYMBOL;
                    clear_current_value_buffer();
                    _v.setValue(".");
                    _v.setAuthoritativeType(AS_TYPE.string_value);
                }
                else {
                    // if it's not a symbol we just look at the token type
                    _value_type = IonTokenConstsX.ion_type_of_scalar(t);
                }
                int state_after_scalar = get_state_after_value();
                set_state(state_after_scalar);
                return;
            case ACTION_PLUS_INF:
                _value_type = IonType.FLOAT;
                clear_current_value_buffer();
                _v.setValue(Double.POSITIVE_INFINITY);
                _v.setAuthoritativeType(AS_TYPE.double_value);
                state_after_scalar = get_state_after_value();
                set_state(state_after_scalar);
                return;
            case ACTION_MINUS_INF:
                _value_type = IonType.FLOAT;
                clear_current_value_buffer();
                _v.setValue(Double.NEGATIVE_INFINITY);
                _v.setAuthoritativeType(AS_TYPE.double_value);
                state_after_scalar = get_state_after_value();
                set_state(state_after_scalar);
                return;
            case ACTION_EAT_COMMA:
                if (_container_prohibits_commas) {
                    parse_error("commas aren't used to separate values in "+getContainerType().toString());
                }
                int new_state = STATE_BEFORE_ANNOTATION_CONTAINED;
                if (_container_is_struct) {
                    new_state = STATE_BEFORE_FIELD_NAME;
                }
                set_state(new_state);
                _scanner.tokenIsFinished();
                // when we eat a comma we need to reset the current
                // value start used to define a span, since the comma
                // isn't part of the span when it's hoisted
                _value_start_offset = _scanner.getStartingOffset();
                t = _scanner.nextToken();
                break;
            case ACTION_FINISH_CONTAINER:
                new_state = get_state_after_container(t);
                set_state(new_state);
                _eof = true;
                return;
            case ACTION_FINISH_LOB:
                state_after_scalar = get_state_after_value();
                set_state(state_after_scalar);
                return;
            case ACTION_FINISH_DATAGRAM:
                if (getDepth() != 0) {
                    parse_error("state failure end of datagram encounterd with a non-container stack");
                }
                set_state(STATE_EOF);
                _eof = true;
                return;
            default: parse_error("unexpected token encountered: "+IonTokenConstsX.getTokenName(t));
            }
        }
    }