JSOBJ FASTCALL_MSVC decode_string()

in amplify/backend/function/iamxawswrangler/lib/python/pandas/_libs/src/ujson/lib/ultrajsondec.c [680:958]


JSOBJ FASTCALL_MSVC decode_string(struct DecoderState *ds) {
    JSUTF16 sur[2] = {0};
    int iSur = 0;
    int index;
    wchar_t *escOffset;
    wchar_t *escStart;
    size_t escLen = (ds->escEnd - ds->escStart);
    JSUINT8 *inputOffset;
    JSUINT8 oct;
    JSUTF32 ucs;
    ds->lastType = JT_INVALID;
    ds->start++;

    if ((size_t)(ds->end - ds->start) > escLen) {
        size_t newSize = (ds->end - ds->start);

        if (ds->escHeap) {
            if (newSize > (SIZE_MAX / sizeof(wchar_t))) {
                return SetError(ds, -1, "Could not reserve memory block");
            }
            escStart = (wchar_t *)ds->dec->realloc(ds->escStart,
                                                   newSize * sizeof(wchar_t));
            if (!escStart) {
                ds->dec->free(ds->escStart);
                return SetError(ds, -1, "Could not reserve memory block");
            }
            ds->escStart = escStart;
        } else {
            wchar_t *oldStart = ds->escStart;
            if (newSize > (SIZE_MAX / sizeof(wchar_t))) {
                return SetError(ds, -1, "Could not reserve memory block");
            }
            ds->escStart =
                (wchar_t *)ds->dec->malloc(newSize * sizeof(wchar_t));
            if (!ds->escStart) {
                return SetError(ds, -1, "Could not reserve memory block");
            }
            ds->escHeap = 1;
            memcpy(ds->escStart, oldStart, escLen * sizeof(wchar_t));
        }

        ds->escEnd = ds->escStart + newSize;
    }

    escOffset = ds->escStart;
    inputOffset = (JSUINT8 *)ds->start;

    for (;;) {
        switch (g_decoderLookup[(JSUINT8)(*inputOffset)]) {
            case DS_ISNULL: {
                return SetError(ds, -1,
                                "Unmatched ''\"' when when decoding 'string'");
            }
            case DS_ISQUOTE: {
                ds->lastType = JT_UTF8;
                inputOffset++;
                ds->start += ((char *)inputOffset - (ds->start));
                return ds->dec->newString(ds->prv, ds->escStart, escOffset);
            }
            case DS_UTFLENERROR: {
                return SetError(
                    ds, -1,
                    "Invalid UTF-8 sequence length when decoding 'string'");
            }
            case DS_ISESCAPE:
                inputOffset++;
                switch (*inputOffset) {
                    case '\\':
                        *(escOffset++) = L'\\';
                        inputOffset++;
                        continue;
                    case '\"':
                        *(escOffset++) = L'\"';
                        inputOffset++;
                        continue;
                    case '/':
                        *(escOffset++) = L'/';
                        inputOffset++;
                        continue;
                    case 'b':
                        *(escOffset++) = L'\b';
                        inputOffset++;
                        continue;
                    case 'f':
                        *(escOffset++) = L'\f';
                        inputOffset++;
                        continue;
                    case 'n':
                        *(escOffset++) = L'\n';
                        inputOffset++;
                        continue;
                    case 'r':
                        *(escOffset++) = L'\r';
                        inputOffset++;
                        continue;
                    case 't':
                        *(escOffset++) = L'\t';
                        inputOffset++;
                        continue;

                    case 'u': {
                        int index;
                        inputOffset++;

                        for (index = 0; index < 4; index++) {
                            switch (*inputOffset) {
                                case '\0':
                                    return SetError(ds, -1,
                                                    "Unterminated unicode "
                                                    "escape sequence when "
                                                    "decoding 'string'");
                                default:
                                    return SetError(ds, -1,
                                                    "Unexpected character in "
                                                    "unicode escape sequence "
                                                    "when decoding 'string'");

                                case '0':
                                case '1':
                                case '2':
                                case '3':
                                case '4':
                                case '5':
                                case '6':
                                case '7':
                                case '8':
                                case '9':
                                    sur[iSur] = (sur[iSur] << 4) +
                                                (JSUTF16)(*inputOffset - '0');
                                    break;

                                case 'a':
                                case 'b':
                                case 'c':
                                case 'd':
                                case 'e':
                                case 'f':
                                    sur[iSur] = (sur[iSur] << 4) + 10 +
                                                (JSUTF16)(*inputOffset - 'a');
                                    break;

                                case 'A':
                                case 'B':
                                case 'C':
                                case 'D':
                                case 'E':
                                case 'F':
                                    sur[iSur] = (sur[iSur] << 4) + 10 +
                                                (JSUTF16)(*inputOffset - 'A');
                                    break;
                            }

                            inputOffset++;
                        }

                        if (iSur == 0) {
                            if ((sur[iSur] & 0xfc00) == 0xd800) {
                                // First of a surrogate pair, continue parsing
                                iSur++;
                                break;
                            }
                            (*escOffset++) = (wchar_t)sur[iSur];
                            iSur = 0;
                        } else {
                            // Decode pair
                            if ((sur[1] & 0xfc00) != 0xdc00) {
                                return SetError(ds, -1,
                                                "Unpaired high surrogate when "
                                                "decoding 'string'");
                            }
#if WCHAR_MAX == 0xffff
                            (*escOffset++) = (wchar_t)sur[0];
                            (*escOffset++) = (wchar_t)sur[1];
#else
                            (*escOffset++) =
                                (wchar_t)0x10000 +
                                (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00));
#endif
                            iSur = 0;
                        }
                        break;
                    }

                    case '\0':
                        return SetError(ds, -1,
                                        "Unterminated escape sequence when "
                                        "decoding 'string'");
                    default:
                        return SetError(ds, -1,
                                        "Unrecognized escape sequence when "
                                        "decoding 'string'");
                }
                break;

            case 1: {
                *(escOffset++) = (wchar_t)(*inputOffset++);
                break;
            }

            case 2: {
                ucs = (*inputOffset++) & 0x1f;
                ucs <<= 6;
                if (((*inputOffset) & 0x80) != 0x80) {
                    return SetError(ds, -1,
                                    "Invalid octet in UTF-8 sequence when "
                                    "decoding 'string'");
                }
                ucs |= (*inputOffset++) & 0x3f;
                if (ucs < 0x80)
                    return SetError(ds, -1,
                                    "Overlong 2 byte UTF-8 sequence detected "
                                    "when decoding 'string'");
                *(escOffset++) = (wchar_t)ucs;
                break;
            }

            case 3: {
                JSUTF32 ucs = 0;
                ucs |= (*inputOffset++) & 0x0f;

                for (index = 0; index < 2; index++) {
                    ucs <<= 6;
                    oct = (*inputOffset++);

                    if ((oct & 0x80) != 0x80) {
                        return SetError(ds, -1,
                                        "Invalid octet in UTF-8 sequence when "
                                        "decoding 'string'");
                    }

                    ucs |= oct & 0x3f;
                }

                if (ucs < 0x800)
                    return SetError(ds, -1,
                                    "Overlong 3 byte UTF-8 sequence detected "
                                    "when encoding string");
                *(escOffset++) = (wchar_t)ucs;
                break;
            }

            case 4: {
                JSUTF32 ucs = 0;
                ucs |= (*inputOffset++) & 0x07;

                for (index = 0; index < 3; index++) {
                    ucs <<= 6;
                    oct = (*inputOffset++);

                    if ((oct & 0x80) != 0x80) {
                        return SetError(ds, -1,
                                        "Invalid octet in UTF-8 sequence when "
                                        "decoding 'string'");
                    }

                    ucs |= oct & 0x3f;
                }

                if (ucs < 0x10000)
                    return SetError(ds, -1,
                                    "Overlong 4 byte UTF-8 sequence detected "
                                    "when decoding 'string'");

#if WCHAR_MAX == 0xffff
                if (ucs >= 0x10000) {
                    ucs -= 0x10000;
                    *(escOffset++) = (wchar_t)(ucs >> 10) + 0xd800;
                    *(escOffset++) = (wchar_t)(ucs & 0x3ff) + 0xdc00;
                } else {
                    *(escOffset++) = (wchar_t)ucs;
                }
#else
                *(escOffset++) = (wchar_t)ucs;
#endif
                break;
            }
        }
    }
}