in amplify/backend/function/iamxawswrangler/lib/python/pandas/_libs/src/ujson/lib/ultrajsondec.c [680:958]
JSOBJ FASTCALL_MSVC decode_string(struct DecoderState *ds) {
JSUTF16 sur[2] = {0};
int iSur = 0;
int index;
wchar_t *escOffset;
wchar_t *escStart;
size_t escLen = (ds->escEnd - ds->escStart);
JSUINT8 *inputOffset;
JSUINT8 oct;
JSUTF32 ucs;
ds->lastType = JT_INVALID;
ds->start++;
if ((size_t)(ds->end - ds->start) > escLen) {
size_t newSize = (ds->end - ds->start);
if (ds->escHeap) {
if (newSize > (SIZE_MAX / sizeof(wchar_t))) {
return SetError(ds, -1, "Could not reserve memory block");
}
escStart = (wchar_t *)ds->dec->realloc(ds->escStart,
newSize * sizeof(wchar_t));
if (!escStart) {
ds->dec->free(ds->escStart);
return SetError(ds, -1, "Could not reserve memory block");
}
ds->escStart = escStart;
} else {
wchar_t *oldStart = ds->escStart;
if (newSize > (SIZE_MAX / sizeof(wchar_t))) {
return SetError(ds, -1, "Could not reserve memory block");
}
ds->escStart =
(wchar_t *)ds->dec->malloc(newSize * sizeof(wchar_t));
if (!ds->escStart) {
return SetError(ds, -1, "Could not reserve memory block");
}
ds->escHeap = 1;
memcpy(ds->escStart, oldStart, escLen * sizeof(wchar_t));
}
ds->escEnd = ds->escStart + newSize;
}
escOffset = ds->escStart;
inputOffset = (JSUINT8 *)ds->start;
for (;;) {
switch (g_decoderLookup[(JSUINT8)(*inputOffset)]) {
case DS_ISNULL: {
return SetError(ds, -1,
"Unmatched ''\"' when when decoding 'string'");
}
case DS_ISQUOTE: {
ds->lastType = JT_UTF8;
inputOffset++;
ds->start += ((char *)inputOffset - (ds->start));
return ds->dec->newString(ds->prv, ds->escStart, escOffset);
}
case DS_UTFLENERROR: {
return SetError(
ds, -1,
"Invalid UTF-8 sequence length when decoding 'string'");
}
case DS_ISESCAPE:
inputOffset++;
switch (*inputOffset) {
case '\\':
*(escOffset++) = L'\\';
inputOffset++;
continue;
case '\"':
*(escOffset++) = L'\"';
inputOffset++;
continue;
case '/':
*(escOffset++) = L'/';
inputOffset++;
continue;
case 'b':
*(escOffset++) = L'\b';
inputOffset++;
continue;
case 'f':
*(escOffset++) = L'\f';
inputOffset++;
continue;
case 'n':
*(escOffset++) = L'\n';
inputOffset++;
continue;
case 'r':
*(escOffset++) = L'\r';
inputOffset++;
continue;
case 't':
*(escOffset++) = L'\t';
inputOffset++;
continue;
case 'u': {
int index;
inputOffset++;
for (index = 0; index < 4; index++) {
switch (*inputOffset) {
case '\0':
return SetError(ds, -1,
"Unterminated unicode "
"escape sequence when "
"decoding 'string'");
default:
return SetError(ds, -1,
"Unexpected character in "
"unicode escape sequence "
"when decoding 'string'");
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
sur[iSur] = (sur[iSur] << 4) +
(JSUTF16)(*inputOffset - '0');
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
sur[iSur] = (sur[iSur] << 4) + 10 +
(JSUTF16)(*inputOffset - 'a');
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
sur[iSur] = (sur[iSur] << 4) + 10 +
(JSUTF16)(*inputOffset - 'A');
break;
}
inputOffset++;
}
if (iSur == 0) {
if ((sur[iSur] & 0xfc00) == 0xd800) {
// First of a surrogate pair, continue parsing
iSur++;
break;
}
(*escOffset++) = (wchar_t)sur[iSur];
iSur = 0;
} else {
// Decode pair
if ((sur[1] & 0xfc00) != 0xdc00) {
return SetError(ds, -1,
"Unpaired high surrogate when "
"decoding 'string'");
}
#if WCHAR_MAX == 0xffff
(*escOffset++) = (wchar_t)sur[0];
(*escOffset++) = (wchar_t)sur[1];
#else
(*escOffset++) =
(wchar_t)0x10000 +
(((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00));
#endif
iSur = 0;
}
break;
}
case '\0':
return SetError(ds, -1,
"Unterminated escape sequence when "
"decoding 'string'");
default:
return SetError(ds, -1,
"Unrecognized escape sequence when "
"decoding 'string'");
}
break;
case 1: {
*(escOffset++) = (wchar_t)(*inputOffset++);
break;
}
case 2: {
ucs = (*inputOffset++) & 0x1f;
ucs <<= 6;
if (((*inputOffset) & 0x80) != 0x80) {
return SetError(ds, -1,
"Invalid octet in UTF-8 sequence when "
"decoding 'string'");
}
ucs |= (*inputOffset++) & 0x3f;
if (ucs < 0x80)
return SetError(ds, -1,
"Overlong 2 byte UTF-8 sequence detected "
"when decoding 'string'");
*(escOffset++) = (wchar_t)ucs;
break;
}
case 3: {
JSUTF32 ucs = 0;
ucs |= (*inputOffset++) & 0x0f;
for (index = 0; index < 2; index++) {
ucs <<= 6;
oct = (*inputOffset++);
if ((oct & 0x80) != 0x80) {
return SetError(ds, -1,
"Invalid octet in UTF-8 sequence when "
"decoding 'string'");
}
ucs |= oct & 0x3f;
}
if (ucs < 0x800)
return SetError(ds, -1,
"Overlong 3 byte UTF-8 sequence detected "
"when encoding string");
*(escOffset++) = (wchar_t)ucs;
break;
}
case 4: {
JSUTF32 ucs = 0;
ucs |= (*inputOffset++) & 0x07;
for (index = 0; index < 3; index++) {
ucs <<= 6;
oct = (*inputOffset++);
if ((oct & 0x80) != 0x80) {
return SetError(ds, -1,
"Invalid octet in UTF-8 sequence when "
"decoding 'string'");
}
ucs |= oct & 0x3f;
}
if (ucs < 0x10000)
return SetError(ds, -1,
"Overlong 4 byte UTF-8 sequence detected "
"when decoding 'string'");
#if WCHAR_MAX == 0xffff
if (ucs >= 0x10000) {
ucs -= 0x10000;
*(escOffset++) = (wchar_t)(ucs >> 10) + 0xd800;
*(escOffset++) = (wchar_t)(ucs & 0x3ff) + 0xdc00;
} else {
*(escOffset++) = (wchar_t)ucs;
}
#else
*(escOffset++) = (wchar_t)ucs;
#endif
break;
}
}
}
}