in src/com/amazon/ion/impl/IonReaderTextRawX.java [775:1066]
protected final void parse_to_next_value() throws IOException
{
int t;
int action, temp_state;
boolean trailing_whitespace = false; // TODO: there's a better way to do this
StringBuilder sb;
// FIXME: check depth and type before doing anything further
// if we're on a collection and at the correct depth
// we need to skip over the contents of the collection
// before doing any more parsing
// we'll need a token to get started here
// we'll also remember where we were when we started if the
// user later wants to get a span over this value. In the
// case where we just before a comma, after the comma we'll
// reset this offset since for the span the comma isn't part
// of the span when it's hoisted
_value_start_offset = _scanner.getStartingOffset();
_value_start_line = _scanner.getLineNumber();
_value_start_column = _scanner.getLineOffset();
t = _scanner.nextToken();
for (;;) {
int idx = get_state_int() * IonTokenConstsX.TOKEN_count + t;
action = TransitionActions2[idx];
// this used to be (but the 2d array is 9072ms vs 8786ms
// timing, 3% of total file parse time!):
// action = TransitionActions[get_state_int()][t];
switch (action) {
case ACTION_NOT_DEFINED:
{
// TODO why would we get here?
boolean span_eof = false;
if (_nesting_parent != null) {
switch (_nesting_parent) {
case LIST:
if (t == IonTokenConstsX.TOKEN_CLOSE_SQUARE) {
span_eof = true;
}
break;
case SEXP:
if (t == IonTokenConstsX.TOKEN_CLOSE_PAREN){
span_eof = true;
}
break;
case STRUCT:
if (t == IonTokenConstsX.TOKEN_CLOSE_BRACE) {
span_eof = true;
}
break;
default:
break;
}
}
if (span_eof != true) {
String message = "invalid syntax [state:"
+ get_state_name()
+ " on token:"
+IonTokenConstsX.getTokenName(t)
+"]";
parse_error(message);
}
set_state(STATE_EOF);
_eof = true;
return;
}
case ACTION_EOF:
set_state(STATE_EOF);
_eof = true;
return;
case ACTION_LOAD_FIELD_NAME:
{
if (!is_in_struct_internal()) {
throw new IllegalStateException("field names have to be in structs");
}
//finish_value(_current_value_save_point);
finish_and_save_value();
sb = token_contents_load(t);
SymbolToken sym = parseSymbolToken("a field name", sb, t);
set_fieldname(sym);
clear_current_value_buffer();
t = _scanner.nextToken();
if (t != IonTokenConstsX.TOKEN_COLON) {
String message = "field name must be followed by a colon, not a "
+ IonTokenConstsX.getTokenName(t);
parse_error(message);
}
_scanner.tokenIsFinished();
set_state(STATE_BEFORE_ANNOTATION_CONTAINED);
t = _scanner.nextToken();
break;
}
case ACTION_LOAD_ANNOTATION:
{
sb = token_contents_load(t);
trailing_whitespace = _scanner.skip_whitespace();
if (!_scanner.skipDoubleColon()) {
// unnecessary: set_current_value(sp);
// this will "loop around" to ACTION_LOAD_SCALAR
// since this is necessarily a symbol of one
// sort of another
temp_state = get_state_after_annotation();
set_state(temp_state);
break;
}
// We have an annotation!
SymbolToken sym = parseSymbolToken("an annotation", sb, t);
append_annotation(sym);
clear_current_value_buffer();
// Consumed the annotation, move on.
// note: that peekDoubleColon() consumed the two colons
// so nextToken won't see them
t = _scanner.nextToken();
switch(t) {
case IonTokenConstsX.TOKEN_SYMBOL_IDENTIFIER:
case IonTokenConstsX.TOKEN_SYMBOL_QUOTED:
// This may be another annotation, so stay in this state
// and come around the horn again to check it out.
break;
default:
// we leave the error handling to the transition
temp_state = get_state_after_annotation();
set_state(temp_state);
break;
}
break;
}
case ACTION_START_STRUCT:
_value_type = IonType.STRUCT;
temp_state = STATE_BEFORE_FIELD_NAME;
set_state(temp_state);
return;
case ACTION_START_LIST:
_value_type = IonType.LIST;
temp_state = STATE_BEFORE_ANNOTATION_CONTAINED;
set_state(temp_state);
return;
case ACTION_START_SEXP:
_value_type = IonType.SEXP;
temp_state = STATE_BEFORE_ANNOTATION_SEXP;
set_state(temp_state);
return;
case ACTION_START_LOB:
switch (_scanner.peekLobStartPunctuation()) {
case IonTokenConstsX.TOKEN_STRING_DOUBLE_QUOTE:
set_state(STATE_IN_CLOB_DOUBLE_QUOTED_CONTENT);
_lob_token = IonTokenConstsX.TOKEN_STRING_DOUBLE_QUOTE;
_value_type = IonType.CLOB;
break;
case IonTokenConstsX.TOKEN_STRING_TRIPLE_QUOTE:
set_state(STATE_IN_CLOB_TRIPLE_QUOTED_CONTENT);
_lob_token = IonTokenConstsX.TOKEN_STRING_TRIPLE_QUOTE;
_value_type = IonType.CLOB;
break;
default:
set_state(STATE_IN_BLOB_CONTENT);
_lob_token = IonTokenConstsX.TOKEN_OPEN_DOUBLE_BRACE;
_value_type = IonType.BLOB;
break;
}
return;
case ACTION_LOAD_SCALAR:
if (t == IonTokenConstsX.TOKEN_SYMBOL_IDENTIFIER) {
sb = token_contents_load(t);
_value_keyword = IonTokenConstsX.keyword(sb, 0, sb.length());
switch (_value_keyword) {
case IonTokenConstsX.KEYWORD_NULL:
{
int kwt = trailing_whitespace ? IonTokenConstsX.KEYWORD_none : _scanner.peekNullTypeSymbol();
switch (kwt) {
case IonTokenConstsX.KEYWORD_NULL: _null_type = IonType.NULL; break;
case IonTokenConstsX.KEYWORD_BOOL: _null_type = IonType.BOOL; break;
case IonTokenConstsX.KEYWORD_INT: _null_type = IonType.INT; break;
case IonTokenConstsX.KEYWORD_FLOAT: _null_type = IonType.FLOAT; break;
case IonTokenConstsX.KEYWORD_DECIMAL: _null_type = IonType.DECIMAL; break;
case IonTokenConstsX.KEYWORD_TIMESTAMP: _null_type = IonType.TIMESTAMP; break;
case IonTokenConstsX.KEYWORD_SYMBOL: _null_type = IonType.SYMBOL; break;
case IonTokenConstsX.KEYWORD_STRING: _null_type = IonType.STRING; break;
case IonTokenConstsX.KEYWORD_BLOB: _null_type = IonType.BLOB; break;
case IonTokenConstsX.KEYWORD_CLOB: _null_type = IonType.CLOB; break;
case IonTokenConstsX.KEYWORD_LIST: _null_type = IonType.LIST; break;
case IonTokenConstsX.KEYWORD_SEXP: _null_type = IonType.SEXP; break;
case IonTokenConstsX.KEYWORD_STRUCT: _null_type = IonType.STRUCT; break;
case IonTokenConstsX.KEYWORD_none: _null_type = IonType.NULL; break; // this happens when there isn't a '.' otherwise peek throws the error or returns none
default: parse_error("invalid keyword id ("+kwt+") encountered while parsing a null");
}
// at this point we've consumed a dot '.' and it's preceding whitespace
// clear_value();
current_value_is_null(_null_type);
// set to null_type in above call: _value_type = IonType.NULL;
break;
}
case IonTokenConstsX.KEYWORD_TRUE:
_value_type = IonType.BOOL;
current_value_is_bool(true);
break;
case IonTokenConstsX.KEYWORD_FALSE:
_value_type = IonType.BOOL;
current_value_is_bool(false);
break;
case IonTokenConstsX.KEYWORD_NAN:
_value_type = IonType.FLOAT;
clear_current_value_buffer();
_v.setValue(Double.NaN);
_v.setAuthoritativeType(AS_TYPE.double_value);
break;
case IonTokenConstsX.KEYWORD_sid:
{
int sid = IonTokenConstsX.decodeSid(sb);
_v.setValue(sid);
_v.setAuthoritativeType(AS_TYPE.int_value);
}
default:
// We don't care about any other 'keywords'
_value_type = IonType.SYMBOL;
break;
}
}
else if (t == IonTokenConstsX.TOKEN_DOT) {
_value_type = IonType.SYMBOL;
clear_current_value_buffer();
_v.setValue(".");
_v.setAuthoritativeType(AS_TYPE.string_value);
}
else {
// if it's not a symbol we just look at the token type
_value_type = IonTokenConstsX.ion_type_of_scalar(t);
}
int state_after_scalar = get_state_after_value();
set_state(state_after_scalar);
return;
case ACTION_PLUS_INF:
_value_type = IonType.FLOAT;
clear_current_value_buffer();
_v.setValue(Double.POSITIVE_INFINITY);
_v.setAuthoritativeType(AS_TYPE.double_value);
state_after_scalar = get_state_after_value();
set_state(state_after_scalar);
return;
case ACTION_MINUS_INF:
_value_type = IonType.FLOAT;
clear_current_value_buffer();
_v.setValue(Double.NEGATIVE_INFINITY);
_v.setAuthoritativeType(AS_TYPE.double_value);
state_after_scalar = get_state_after_value();
set_state(state_after_scalar);
return;
case ACTION_EAT_COMMA:
if (_container_prohibits_commas) {
parse_error("commas aren't used to separate values in "+getContainerType().toString());
}
int new_state = STATE_BEFORE_ANNOTATION_CONTAINED;
if (_container_is_struct) {
new_state = STATE_BEFORE_FIELD_NAME;
}
set_state(new_state);
_scanner.tokenIsFinished();
// when we eat a comma we need to reset the current
// value start used to define a span, since the comma
// isn't part of the span when it's hoisted
_value_start_offset = _scanner.getStartingOffset();
t = _scanner.nextToken();
break;
case ACTION_FINISH_CONTAINER:
new_state = get_state_after_container(t);
set_state(new_state);
_eof = true;
return;
case ACTION_FINISH_LOB:
state_after_scalar = get_state_after_value();
set_state(state_after_scalar);
return;
case ACTION_FINISH_DATAGRAM:
if (getDepth() != 0) {
parse_error("state failure end of datagram encounterd with a non-container stack");
}
set_state(STATE_EOF);
_eof = true;
return;
default: parse_error("unexpected token encountered: "+IonTokenConstsX.getTokenName(t));
}
}
}