static int get_class_atom()

in src/couch_quickjs/quickjs/libregexp.c [634:746]


static int get_class_atom(REParseState *s, CharRange *cr,
                          const uint8_t **pp, BOOL inclass)
{
    const uint8_t *p;
    uint32_t c;
    int ret;

    p = *pp;

    c = *p;
    switch(c) {
    case '\\':
        p++;
        if (p >= s->buf_end)
            goto unexpected_end;
        c = *p++;
        switch(c) {
        case 'd':
            c = CHAR_RANGE_d;
            goto class_range;
        case 'D':
            c = CHAR_RANGE_D;
            goto class_range;
        case 's':
            c = CHAR_RANGE_s;
            goto class_range;
        case 'S':
            c = CHAR_RANGE_S;
            goto class_range;
        case 'w':
            c = CHAR_RANGE_w;
            goto class_range;
        case 'W':
            c = CHAR_RANGE_W;
        class_range:
            if (cr_init_char_range(s, cr, c))
                return -1;
            c = CLASS_RANGE_BASE;
            break;
        case 'c':
            c = *p;
            if ((c >= 'a' && c <= 'z') ||
                (c >= 'A' && c <= 'Z') ||
                (((c >= '0' && c <= '9') || c == '_') &&
                 inclass && !s->is_unicode)) {   /* Annex B.1.4 */
                c &= 0x1f;
                p++;
            } else if (s->is_unicode) {
                goto invalid_escape;
            } else {
                /* otherwise return '\' and 'c' */
                p--;
                c = '\\';
            }
            break;
        case '-':
            if (!inclass && s->is_unicode)
                goto invalid_escape;
            break;
#ifdef CONFIG_ALL_UNICODE
        case 'p':
        case 'P':
            if (s->is_unicode) {
                if (parse_unicode_property(s, cr, &p, (c == 'P')))
                    return -1;
                c = CLASS_RANGE_BASE;
                break;
            }
            /* fall thru */
#endif
        default:
            p--;
            ret = lre_parse_escape(&p, s->is_unicode * 2);
            if (ret >= 0) {
                c = ret;
            } else {
                if (ret == -2 && *p != '\0' && strchr("^$\\.*+?()[]{}|/", *p)) {
                    /* always valid to escape these characters */
                    goto normal_char;
                } else if (s->is_unicode) {
                invalid_escape:
                    return re_parse_error(s, "invalid escape sequence in regular expression");
                } else {
                    /* just ignore the '\' */
                    goto normal_char;
                }
            }
            break;
        }
        break;
    case '\0':
        if (p >= s->buf_end) {
        unexpected_end:
            return re_parse_error(s, "unexpected end");
        }
        /* fall thru */
    default:
    normal_char:
        /* normal char */
        if (c >= 128) {
            c = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p);
            if ((unsigned)c > 0xffff && !s->is_unicode) {
                /* XXX: should handle non BMP-1 code points */
                return re_parse_error(s, "malformed unicode char");
            }
        } else {
            p++;
        }
        break;
    }
    *pp = p;
    return c;
}