private static boolean parseTerm()

in rhino/src/main/java/org/mozilla/javascript/regexp/NativeRegExp.java [1370:1677]


    private static boolean parseTerm(CompilerState state, ParserParameters params) {
        char[] src = state.cpbegin;
        char c = src[state.cp++];
        int parenBaseCount = state.parenCount;
        int num;
        RENode term;
        int termStart;

        switch (c) {
            /* assertions and atoms */
            case '^':
                state.result = new RENode(REOP_BOL);
                state.progLength++;
                return true;
            case '$':
                state.result = new RENode(REOP_EOL);
                state.progLength++;
                return true;
            case '\\':
                // atom escape; B.1.2 of the ECMAScript specification
                if (state.cp < state.cpend) {
                    c = src[state.cp++];
                    switch (c) {
                        /* assertion escapes */
                        case 'b':
                            state.result = new RENode(REOP_WBDRY);
                            state.progLength++;
                            return true;
                        case 'B':
                            state.result = new RENode(REOP_WNONBDRY);
                            state.progLength++;
                            return true;
                        case '1':
                        case '2':
                        case '3':
                        case '4':
                        case '5':
                        case '6':
                        case '7':
                        case '8':
                        case '9':
                            // decimal escape
                            termStart = state.cp - 1;
                            num = getDecimalValue(c, state, "msg.overlarge.backref");
                            if (num > state.backReferenceLimit) {
                                reportWarning(state.cx, "msg.bad.backref", "");
                                state.cp = termStart;
                                if (!parseCharacterAndCharacterClassEscape(state, params))
                                    return false;
                            } else {
                                state.result = new RENode(REOP_BACKREF);
                                state.result.parenIndex = num - 1;
                                state.progLength += 3;
                                if (state.maxBackReference < num) {
                                    state.maxBackReference = num;
                                }
                            }
                            break;
                        case '0':
                            if (state.cp < state.cpend && src[state.cp] == '0') {
                                /*
                                 * We're deliberately violating the ECMA 5.1 specification and allow octal
                                 * escapes to follow spidermonkey and general 'web reality':
                                 * http://wiki.ecmascript.org/doku.php?id=harmony:regexp_match_web_reality
                                 * http://wiki.ecmascript.org/doku.php?id=strawman:match_web_reality_spec
                                 */

                                // follow spidermonkey and allow multiple leading zeros,
                                // e.g. let /\0000/ match the string "\0"
                                parseMultipleLeadingZerosAsOctalEscape(state);
                                break;
                            }
                        /* fall through */
                        default:
                            state.cp--;
                            if (!parseCharacterAndCharacterClassEscape(state, params)) {
                                if (c == 'k' && params.namedCaptureGroups) {
                                    state.cp++;
                                    String groupName =
                                            extractCaptureGroupName(src, state.cp, state.cpend);
                                    if (groupName != null) {
                                        state.result = new RENode(REOP_NAMED_BACKREF);
                                        state.result.captureGroupNameIndex =
                                                state.cp + 1; // skip '<'
                                        state.result.captureGroupNameLength = groupName.length();
                                        state.cp += groupName.length() + 2; // include '<' and '>'
                                        // REOP_NAMED_BACKREF GROUPNAMEINDEX GROUPNAMELENGTH
                                        state.progLength += 5;
                                    } else reportError("msg.invalid.named.backref", "");
                                } else if ('c'
                                        == c) { // when lookahead=c, parse the \\ as a literal
                                    doFlat(state, '\\');
                                } else {
                                    return false;
                                }
                            }
                    }
                    break;
                }
                /* a trailing '\' is an error */
                reportError("msg.trail.backslash", "");
                break;
            case '(':
                {
                    RENode result = null;
                    if (state.cp + 1 < state.cpend
                            && src[state.cp] == '?'
                            && ((c = src[state.cp + 1]) == '=' || c == '!' || c == ':')) {
                        state.cp += 2;
                        if (c == '=') {
                            result = new RENode(REOP_ASSERT);
                            /* ASSERT, <next>, ... ASSERTTEST */
                            state.progLength += 4;
                        } else if (c == '!') {
                            result = new RENode(REOP_ASSERT_NOT);
                            /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
                            state.progLength += 4;
                        }
                    } else if (state.cp + 2 < state.cpend
                            && src[state.cp] == '?'
                            && src[state.cp + 1] == '<'
                            && ((c = src[state.cp + 2]) == '=' || c == '!')) {
                        state.cp += 3;
                        if (c == '=') {
                            result = new RENode(REOP_ASSERTBACK);
                            /* ASSERT, <next>, ... ASSERTBACKTEST */
                            state.progLength += 4;
                        } else { // c == '!'
                            result = new RENode(REOP_ASSERTBACK_NOT);
                            /* ASSERTNOT, <next>, ... ASSERTBACKNOTTEST */
                            state.progLength += 4;
                        }
                    } else {
                        result = new RENode(REOP_LPAREN);
                        if (state.cp + 2 < state.cpend
                                && src[state.cp] == '?'
                                && src[state.cp + 1] == '<') {
                            state.cp += 1;
                            String name = extractCaptureGroupName(src, state.cp, state.cpend);
                            if (name == null) {
                                reportError("msg.invalid.group.name", "");
                                return false;
                            }

                            result.captureGroupNameIndex = state.cp + 1; // skip '<'
                            result.captureGroupNameLength = name.length(); // skip '<' and '>'
                            state.namedCaptureGroupsFound = true;
                            state.cp += name.length() + 2; // include '<' and '>'
                        }
                        /* LPAREN, <index>, ... RPAREN, <index> */
                        state.progLength += 6;
                        result.parenIndex = state.parenCount++;
                    }
                    ++state.parenNesting;
                    if (!parseDisjunction(state, params)) return false;
                    if (state.cp == state.cpend || src[state.cp] != ')') {
                        reportError("msg.unterm.paren", "");
                        return false;
                    }
                    ++state.cp;
                    --state.parenNesting;
                    if (result != null) {
                        /* if we have a lookbehind then we reverse state.result linked list */
                        if (result.op == REOP_ASSERTBACK || result.op == REOP_ASSERTBACK_NOT) {
                            state.result = reverseNodeList(state.result);
                        }
                        result.kid = state.result;
                        state.result = result;
                    }
                    break;
                }
            case ')':
                reportError("msg.re.unmatched.right.paren", "");
                return false;
            case '[':
                ClassContents classContents = parseClassContents(state, params);
                if (classContents == null) {
                    reportError("msg.unterm.class", "");
                    return false;
                }
                state.result = new RENode(REOP_CLASS);
                state.result.classContents = classContents;
                state.result.index = state.classCount++;
                /*
                 * Call calculateBitmapSize now as we want any errors it finds
                 * to be reported during the parse phase, not at execution.
                 */
                if (!calculateBitmapSize(state.flags, classContents, state.result)) return false;
                state.progLength += 3; /* CLASS, <index> */
                break;

            case '.':
                state.result = new RENode(REOP_DOT);
                state.progLength++;
                break;
            case '*':
            case '+':
            case '?':
                reportError("msg.bad.quant", String.valueOf(src[state.cp - 1]));
                return false;
            default:
                state.result = new RENode(REOP_FLAT);
                state.result.chr = c;
                state.result.length = 1;
                state.result.flatIndex = state.cp - 1;
                state.progLength += 3;
                break;
        }

        term = state.result;
        if (state.cp == state.cpend) {
            return true;
        }
        boolean hasQ = false;
        switch (src[state.cp]) {
            case '+':
                state.result = new RENode(REOP_QUANT);
                state.result.min = 1;
                state.result.max = -1;
                /* <PLUS>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */
                state.progLength += 8;
                hasQ = true;
                break;
            case '*':
                state.result = new RENode(REOP_QUANT);
                state.result.min = 0;
                state.result.max = -1;
                /* <STAR>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */
                state.progLength += 8;
                hasQ = true;
                break;
            case '?':
                state.result = new RENode(REOP_QUANT);
                state.result.min = 0;
                state.result.max = 1;
                /* <OPT>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */
                state.progLength += 8;
                hasQ = true;
                break;
            case '{': /* balance '}' */
                {
                    int min = 0;
                    int max = -1;
                    int leftCurl = state.cp;

                    /* For Perl etc. compatibility, if quantifier does not match
                     * \{\d+(,\d*)?\} exactly back off from it
                     * being a quantifier, and chew it up as a literal
                     * atom next time instead.
                     */

                    if (++state.cp < src.length && isDigit(c = src[state.cp])) {
                        ++state.cp;
                        min = getDecimalValue(c, state, "msg.overlarge.min");
                        if (state.cp < src.length) {
                            c = src[state.cp];
                            if (c == ',' && ++state.cp < src.length) {
                                c = src[state.cp];
                                if (isDigit(c) && ++state.cp < src.length) {
                                    max = getDecimalValue(c, state, "msg.overlarge.max");
                                    c = src[state.cp];
                                    if (min > max) {
                                        String msg =
                                                ScriptRuntime.getMessageById(
                                                        "msg.max.lt.min",
                                                        Integer.valueOf(max),
                                                        Integer.valueOf(min));
                                        throw ScriptRuntime.constructError("SyntaxError", msg);
                                    }
                                }
                            } else {
                                max = min;
                            }
                            /* balance '{' */
                            if (c == '}') {
                                state.result = new RENode(REOP_QUANT);
                                state.result.min = min;
                                state.result.max = max;
                                // QUANT, <min>, <max>, <parencount>,
                                // <parenindex>, <next> ... <ENDCHILD>
                                state.progLength += 12;
                                hasQ = true;
                            }
                        }
                    }
                    if (!hasQ) {
                        state.cp = leftCurl;
                    }
                    break;
                }
        }
        if (!hasQ) return true;

        if (term.op == REOP_ASSERTBACK || term.op == REOP_ASSERTBACK_NOT) {
            reportError("msg.bad.quant", "");
            return false;
        }

        ++state.cp;
        state.result.kid = term;
        state.result.parenIndex = parenBaseCount;
        state.result.parenCount = state.parenCount - parenBaseCount;
        if ((state.cp < state.cpend) && (src[state.cp] == '?')) {
            ++state.cp;
            state.result.greedy = false;
        } else state.result.greedy = true;
        return true;
    }