in hphp/runtime/ext/json/JSON_parser.cpp [1236:1724]
bool JSON_parser(Variant &z, const char *p, int length, bool const assoc,
int depth, int64_t options) {
// No GC safepoints during JSON parsing, please. Code is not re-entrant.
NoHandleSurpriseScope no_surprise(SafepointFlags);
json_parser *json = s_json_parser.get(); /* the parser state */
// Clear and reuse the thread-local string buffers. They are only freed if
// they exceed kMaxPersistentStringBufferCapacity at exit or if the thread
// is explicitly flushed (e.g., due to being idle).
json->initSb(length);
if (depth <= 0) {
json->error_code = json_error_codes::JSON_ERROR_DEPTH;
return false;
}
SCOPE_EXIT {
constexpr int kMaxPersistentStringBufferCapacity = 256 * 1024;
if (json->sb_cap > kMaxPersistentStringBufferCapacity) json->flushSb();
};
// SimpleParser only handles the most common set of options. Also, only use it
// if its array nesting depth check is *more* restrictive than what the user
// asks for, to ensure that the precise semantics of the general case is
// applied for all nesting overflows.
if (assoc &&
options == (options & (k_JSON_FB_LOOSE |
k_JSON_FB_DARRAYS |
k_JSON_FB_DARRAYS_AND_VARRAYS |
k_JSON_FB_HACK_ARRAYS |
k_JSON_FB_THRIFT_SIMPLE_JSON)) &&
depth >= SimpleParser::kMaxArrayDepth &&
length <= RuntimeOption::EvalSimpleJsonMaxLength &&
SimpleParser::TryParse(p, length, json->tl_buffer.tv, z,
get_container_type_from_options(options),
options & k_JSON_FB_THRIFT_SIMPLE_JSON)) {
return true;
}
int b; /* the next character */
int c; /* the next character class */
int s; /* the next state */
int state = 0;
/*<fb>*/
bool const loose = options & k_JSON_FB_LOOSE;
JSONContainerType const container_type =
get_container_type_from_options(options);
int qchr = 0;
int8_t const *byte_class;
int8_t const (*next_state_table)[32];
if (loose) {
byte_class = loose_ascii_class;
next_state_table = loose_state_transition_table;
} else {
byte_class = ascii_class;
next_state_table = state_transition_table;
}
/*</fb>*/
UncheckedBuffer *buf = &json->sb_buf;
UncheckedBuffer *key = &json->sb_key;
DataType type = kInvalidDataType;
unsigned short escaped_bytes = 0;
auto reset_type = [&] { type = kInvalidDataType; };
json->depth = depth;
// Since the stack is maintainined on a per request basis, for performance
// reasons, it only makes sense to expand if necessary and cycles are wasted
// contracting. Calls with a depth other than default should be rare.
if (depth > json->stack.size()) {
auto const newSize = sizeof(json_parser::json_state) * depth;
if (newSize > kMaxSmallSize && tl_heap->preAllocOOM(newSize)) {
check_non_safepoint_surprise();
}
json->stack.resize(depth);
}
SCOPE_EXIT {
if (json->stack.empty()) return;
for (int i = 0; i <= json->mark; i++) {
json->stack[i].key.reset();
json->stack[i].val.unset();
}
json->mark = -1;
};
json->mark = json->top = -1;
push(json, Mode::DONE);
UTF8To16Decoder decoder(p, length, loose);
for (;;) {
b = decoder.decode();
// Fast-case most common transition: append a simple string character.
if (state == 3 && type == KindOfString) {
while (b != '\"' && b != '\\' && b != '\'' && b <= 127 && b >= ' ') {
buf->append((char)b);
b = decoder.decode();
}
}
if (b == UTF8_END) break; // UTF-8 decoding finishes successfully.
if (b == UTF8_ERROR) {
s_json_parser->error_code = JSON_ERROR_UTF8;
return false;
}
assertx(b >= 0);
if ((b & 127) == b) {
/*<fb>*/
c = byte_class[b];
/*</fb>*/
if (c <= S_ERR) {
s_json_parser->error_code = JSON_ERROR_CTRL_CHAR;
return false;
}
} else {
c = S_ETC;
}
/*
Get the next state from the transition table.
*/
/*<fb>*/
s = next_state_table[state][c];
if (s == -4) {
if (b != qchr) {
s = 3;
} else {
qchr = 0;
}
}
/*</fb>*/
if (s < 0) {
/*
Perform one of the predefined actions.
*/
switch (s) {
/*
empty }
*/
case -9:
/*<fb>*/
if (json->top == 1) z = json->stack[json->top].val;
else {
/*</fb>*/
attach_zval(json, json->stack[json->top].key, assoc, container_type);
/*<fb>*/
}
/*</fb>*/
if (!pop(json, Mode::KEY)) {
return false;
}
state = 9;
break;
/*
{
*/
case -8:
if (!push(json, Mode::KEY)) {
s_json_parser->error_code = JSON_ERROR_DEPTH;
return false;
}
state = 1;
if (json->top > 0) {
Variant &top = json->stack[json->top].val;
/*<fb>*/
if (container_type == JSONContainerType::COLLECTIONS) {
// stable_maps is meaningless
top = req::make<c_Map>();
} else {
/*</fb>*/
if (!assoc) {
top = SystemLib::AllocStdClassObject();
/* <fb> */
} else if (container_type == JSONContainerType::HACK_ARRAYS) {
top = Array::CreateDict();
} else if (container_type == JSONContainerType::DARRAYS ||
container_type == JSONContainerType::DARRAYS_AND_VARRAYS)
{
top = Array::CreateDict();
/* </fb> */
} else {
top = Array::CreateDict();
}
/*<fb>*/
}
/*</fb>*/
json->stack[json->top].key = copy_and_clear(*key);
reset_type();
}
break;
/*
}
*/
case -7:
/*** BEGIN Facebook: json_utf8_loose ***/
/*
If this is a trailing comma in an object definition,
we're in Mode::KEY. In that case, throw that off the
stack and restore Mode::OBJECT so that we pretend the
trailing comma just didn't happen.
*/
if (loose) {
if (pop(json, Mode::KEY)) {
push(json, Mode::OBJECT);
}
}
/*** END Facebook: json_utf8_loose ***/
if (type != kInvalidDataType &&
json->stack[json->top].mode == Mode::OBJECT) {
Variant mval;
json_create_zval(mval, *buf, type, options);
Variant &top = json->stack[json->top].val;
object_set(json, top, copy_and_clear(*key),
mval, assoc, container_type);
buf->clear();
reset_type();
}
/*<fb>*/
if (json->top == 1) z = json->stack[json->top].val;
else {
/*</fb>*/
attach_zval(json, json->stack[json->top].key,
assoc, container_type);
/*<fb>*/
}
/*</fb>*/
if (!pop(json, Mode::OBJECT)) {
s_json_parser->error_code = JSON_ERROR_STATE_MISMATCH;
return false;
}
state = 9;
break;
/*
[
*/
case -6:
if (!push(json, Mode::ARRAY)) {
s_json_parser->error_code = JSON_ERROR_DEPTH;
return false;
}
state = 2;
if (json->top > 0) {
Variant &top = json->stack[json->top].val;
/*<fb>*/
if (container_type == JSONContainerType::COLLECTIONS) {
top = req::make<c_Vector>();
} else if (container_type == JSONContainerType::HACK_ARRAYS) {
top = Array::CreateVec();
} else if (container_type == JSONContainerType::DARRAYS_AND_VARRAYS) {
top = Array::CreateVec();
} else if (container_type == JSONContainerType::DARRAYS) {
top = Array::CreateDict();
} else {
top = Array::CreateDict();
}
/*</fb>*/
json->stack[json->top].key = copy_and_clear(*key);
reset_type();
}
break;
/*
]
*/
case -5:
{
if (type != kInvalidDataType &&
json->stack[json->top].mode == Mode::ARRAY) {
Variant mval;
json_create_zval(mval, *buf, type, options);
auto& top = json->stack[json->top].val;
appendToContainer(container_type, top, mval);
buf->clear();
reset_type();
}
/*<fb>*/
if (json->top == 1) z = json->stack[json->top].val;
else {
/*</fb>*/
attach_zval(json, json->stack[json->top].key, assoc,
container_type);
/*<fb>*/
}
/*</fb>*/
if (!pop(json, Mode::ARRAY)) {
s_json_parser->error_code = JSON_ERROR_STATE_MISMATCH;
return false;
}
state = 9;
}
break;
/*
"
*/
case -4:
switch (json->stack[json->top].mode) {
case Mode::KEY:
state = 27;
std::swap(buf, key);
reset_type();
break;
case Mode::ARRAY:
case Mode::OBJECT:
state = 9;
break;
case Mode::DONE:
if (type == KindOfString) {
z = copy_and_clear(*buf);
state = 9;
break;
}
/* fall through if not KindOfString */
default:
s_json_parser->error_code = JSON_ERROR_SYNTAX;
return false;
}
break;
/*
,
*/
case -3:
{
Variant mval;
if (type != kInvalidDataType &&
(json->stack[json->top].mode == Mode::OBJECT ||
json->stack[json->top].mode == Mode::ARRAY)) {
json_create_zval(mval, *buf, type, options);
}
switch (json->stack[json->top].mode) {
case Mode::OBJECT:
if (pop(json, Mode::OBJECT) &&
push(json, Mode::KEY)) {
if (type != kInvalidDataType) {
Variant &top = json->stack[json->top].val;
object_set(
json,
top,
copy_and_clear(*key),
mval,
assoc,
container_type
);
}
state = 29;
}
break;
case Mode::ARRAY:
if (type != kInvalidDataType) {
auto& top = json->stack[json->top].val;
appendToContainer(container_type, top, mval);
}
state = 28;
break;
default:
s_json_parser->error_code = JSON_ERROR_SYNTAX;
return false;
}
buf->clear();
reset_type();
check_non_safepoint_surprise();
}
break;
/*<fb>*/
/*
: (after unquoted string)
*/
case -10:
if (json->stack[json->top].mode == Mode::KEY) {
state = 27;
std::swap(buf, key);
reset_type();
s = -2;
} else {
s = 3;
break;
}
/*</fb>*/
/*
:
*/
case -2:
if (pop(json, Mode::KEY) && push(json, Mode::OBJECT)) {
state = 28;
break;
}
/*
syntax error
*/
case -1:
s_json_parser->error_code = JSON_ERROR_SYNTAX;
return false;
}
} else {
/*
Change the state and iterate.
*/
bool is_tsimplejson = options & k_JSON_FB_THRIFT_SIMPLE_JSON;
if (type == KindOfString) {
if (/*<fb>*/(/*</fb>*/s == 3/*<fb>*/ || s == 30)/*</fb>*/ &&
state != 8) {
if (state != 4) {
utf16_to_utf8(*buf, b);
} else {
switch (b) {
case 'b': buf->append('\b'); break;
case 't': buf->append('\t'); break;
case 'n': buf->append('\n'); break;
case 'f': buf->append('\f'); break;
case 'r': buf->append('\r'); break;
default:
utf16_to_utf8(*buf, b);
break;
}
}
} else if (s == 6) {
if (UNLIKELY(is_tsimplejson)) {
if (UNLIKELY(b != '0')) {
s_json_parser->error_code = JSON_ERROR_SYNTAX;
return false;
}
escaped_bytes = 0;
} else {
escaped_bytes = dehexchar(b) << 12;
}
} else if (s == 7) {
if (UNLIKELY(is_tsimplejson)) {
if (UNLIKELY(b != '0')) {
s_json_parser->error_code = JSON_ERROR_SYNTAX;
return false;
}
} else {
escaped_bytes += dehexchar(b) << 8;
}
} else if (s == 8) {
escaped_bytes += dehexchar(b) << 4;
} else if (s == 3 && state == 8) {
escaped_bytes += dehexchar(b);
if (UNLIKELY(is_tsimplejson)) {
buf->append((char)escaped_bytes);
} else {
utf16_to_utf8(*buf, escaped_bytes);
}
}
} else if ((type == kInvalidDataType || type == KindOfNull) &&
(c == S_DIG || c == S_ZER)) {
type = KindOfInt64;
buf->append((char)b);
} else if (type == KindOfInt64 && s == 24) {
type = KindOfDouble;
buf->append((char)b);
} else if ((type == kInvalidDataType || type == KindOfNull ||
type == KindOfInt64) &&
c == S_DOT) {
type = KindOfDouble;
buf->append((char)b);
} else if (type != KindOfString && c == S_QUO) {
type = KindOfString;
/*<fb>*/qchr = b;/*</fb>*/
} else if ((type == kInvalidDataType || type == KindOfNull ||
type == KindOfInt64 || type == KindOfDouble) &&
((state == 12 && s == 9) ||
(state == 16 && s == 9))) {
type = KindOfBoolean;
} else if (type == kInvalidDataType && state == 19 && s == 9) {
type = KindOfNull;
} else if (type != KindOfString && c > S_WSP) {
utf16_to_utf8(*buf, b);
}
state = s;
}
}
if (state == 9 && pop(json, Mode::DONE)) {
s_json_parser->error_code = JSON_ERROR_NONE;
return true;
}
s_json_parser->error_code = JSON_ERROR_SYNTAX;
return false;
}