in rhino/src/main/java/org/mozilla/javascript/regexp/NativeRegExp.java [1370:1677]
private static boolean parseTerm(CompilerState state, ParserParameters params) {
char[] src = state.cpbegin;
char c = src[state.cp++];
int parenBaseCount = state.parenCount;
int num;
RENode term;
int termStart;
switch (c) {
/* assertions and atoms */
case '^':
state.result = new RENode(REOP_BOL);
state.progLength++;
return true;
case '$':
state.result = new RENode(REOP_EOL);
state.progLength++;
return true;
case '\\':
// atom escape; B.1.2 of the ECMAScript specification
if (state.cp < state.cpend) {
c = src[state.cp++];
switch (c) {
/* assertion escapes */
case 'b':
state.result = new RENode(REOP_WBDRY);
state.progLength++;
return true;
case 'B':
state.result = new RENode(REOP_WNONBDRY);
state.progLength++;
return true;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
// decimal escape
termStart = state.cp - 1;
num = getDecimalValue(c, state, "msg.overlarge.backref");
if (num > state.backReferenceLimit) {
reportWarning(state.cx, "msg.bad.backref", "");
state.cp = termStart;
if (!parseCharacterAndCharacterClassEscape(state, params))
return false;
} else {
state.result = new RENode(REOP_BACKREF);
state.result.parenIndex = num - 1;
state.progLength += 3;
if (state.maxBackReference < num) {
state.maxBackReference = num;
}
}
break;
case '0':
if (state.cp < state.cpend && src[state.cp] == '0') {
/*
* We're deliberately violating the ECMA 5.1 specification and allow octal
* escapes to follow spidermonkey and general 'web reality':
* http://wiki.ecmascript.org/doku.php?id=harmony:regexp_match_web_reality
* http://wiki.ecmascript.org/doku.php?id=strawman:match_web_reality_spec
*/
// follow spidermonkey and allow multiple leading zeros,
// e.g. let /\0000/ match the string "\0"
parseMultipleLeadingZerosAsOctalEscape(state);
break;
}
/* fall through */
default:
state.cp--;
if (!parseCharacterAndCharacterClassEscape(state, params)) {
if (c == 'k' && params.namedCaptureGroups) {
state.cp++;
String groupName =
extractCaptureGroupName(src, state.cp, state.cpend);
if (groupName != null) {
state.result = new RENode(REOP_NAMED_BACKREF);
state.result.captureGroupNameIndex =
state.cp + 1; // skip '<'
state.result.captureGroupNameLength = groupName.length();
state.cp += groupName.length() + 2; // include '<' and '>'
// REOP_NAMED_BACKREF GROUPNAMEINDEX GROUPNAMELENGTH
state.progLength += 5;
} else reportError("msg.invalid.named.backref", "");
} else if ('c'
== c) { // when lookahead=c, parse the \\ as a literal
doFlat(state, '\\');
} else {
return false;
}
}
}
break;
}
/* a trailing '\' is an error */
reportError("msg.trail.backslash", "");
break;
case '(':
{
RENode result = null;
if (state.cp + 1 < state.cpend
&& src[state.cp] == '?'
&& ((c = src[state.cp + 1]) == '=' || c == '!' || c == ':')) {
state.cp += 2;
if (c == '=') {
result = new RENode(REOP_ASSERT);
/* ASSERT, <next>, ... ASSERTTEST */
state.progLength += 4;
} else if (c == '!') {
result = new RENode(REOP_ASSERT_NOT);
/* ASSERTNOT, <next>, ... ASSERTNOTTEST */
state.progLength += 4;
}
} else if (state.cp + 2 < state.cpend
&& src[state.cp] == '?'
&& src[state.cp + 1] == '<'
&& ((c = src[state.cp + 2]) == '=' || c == '!')) {
state.cp += 3;
if (c == '=') {
result = new RENode(REOP_ASSERTBACK);
/* ASSERT, <next>, ... ASSERTBACKTEST */
state.progLength += 4;
} else { // c == '!'
result = new RENode(REOP_ASSERTBACK_NOT);
/* ASSERTNOT, <next>, ... ASSERTBACKNOTTEST */
state.progLength += 4;
}
} else {
result = new RENode(REOP_LPAREN);
if (state.cp + 2 < state.cpend
&& src[state.cp] == '?'
&& src[state.cp + 1] == '<') {
state.cp += 1;
String name = extractCaptureGroupName(src, state.cp, state.cpend);
if (name == null) {
reportError("msg.invalid.group.name", "");
return false;
}
result.captureGroupNameIndex = state.cp + 1; // skip '<'
result.captureGroupNameLength = name.length(); // skip '<' and '>'
state.namedCaptureGroupsFound = true;
state.cp += name.length() + 2; // include '<' and '>'
}
/* LPAREN, <index>, ... RPAREN, <index> */
state.progLength += 6;
result.parenIndex = state.parenCount++;
}
++state.parenNesting;
if (!parseDisjunction(state, params)) return false;
if (state.cp == state.cpend || src[state.cp] != ')') {
reportError("msg.unterm.paren", "");
return false;
}
++state.cp;
--state.parenNesting;
if (result != null) {
/* if we have a lookbehind then we reverse state.result linked list */
if (result.op == REOP_ASSERTBACK || result.op == REOP_ASSERTBACK_NOT) {
state.result = reverseNodeList(state.result);
}
result.kid = state.result;
state.result = result;
}
break;
}
case ')':
reportError("msg.re.unmatched.right.paren", "");
return false;
case '[':
ClassContents classContents = parseClassContents(state, params);
if (classContents == null) {
reportError("msg.unterm.class", "");
return false;
}
state.result = new RENode(REOP_CLASS);
state.result.classContents = classContents;
state.result.index = state.classCount++;
/*
* Call calculateBitmapSize now as we want any errors it finds
* to be reported during the parse phase, not at execution.
*/
if (!calculateBitmapSize(state.flags, classContents, state.result)) return false;
state.progLength += 3; /* CLASS, <index> */
break;
case '.':
state.result = new RENode(REOP_DOT);
state.progLength++;
break;
case '*':
case '+':
case '?':
reportError("msg.bad.quant", String.valueOf(src[state.cp - 1]));
return false;
default:
state.result = new RENode(REOP_FLAT);
state.result.chr = c;
state.result.length = 1;
state.result.flatIndex = state.cp - 1;
state.progLength += 3;
break;
}
term = state.result;
if (state.cp == state.cpend) {
return true;
}
boolean hasQ = false;
switch (src[state.cp]) {
case '+':
state.result = new RENode(REOP_QUANT);
state.result.min = 1;
state.result.max = -1;
/* <PLUS>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */
state.progLength += 8;
hasQ = true;
break;
case '*':
state.result = new RENode(REOP_QUANT);
state.result.min = 0;
state.result.max = -1;
/* <STAR>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */
state.progLength += 8;
hasQ = true;
break;
case '?':
state.result = new RENode(REOP_QUANT);
state.result.min = 0;
state.result.max = 1;
/* <OPT>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */
state.progLength += 8;
hasQ = true;
break;
case '{': /* balance '}' */
{
int min = 0;
int max = -1;
int leftCurl = state.cp;
/* For Perl etc. compatibility, if quantifier does not match
* \{\d+(,\d*)?\} exactly back off from it
* being a quantifier, and chew it up as a literal
* atom next time instead.
*/
if (++state.cp < src.length && isDigit(c = src[state.cp])) {
++state.cp;
min = getDecimalValue(c, state, "msg.overlarge.min");
if (state.cp < src.length) {
c = src[state.cp];
if (c == ',' && ++state.cp < src.length) {
c = src[state.cp];
if (isDigit(c) && ++state.cp < src.length) {
max = getDecimalValue(c, state, "msg.overlarge.max");
c = src[state.cp];
if (min > max) {
String msg =
ScriptRuntime.getMessageById(
"msg.max.lt.min",
Integer.valueOf(max),
Integer.valueOf(min));
throw ScriptRuntime.constructError("SyntaxError", msg);
}
}
} else {
max = min;
}
/* balance '{' */
if (c == '}') {
state.result = new RENode(REOP_QUANT);
state.result.min = min;
state.result.max = max;
// QUANT, <min>, <max>, <parencount>,
// <parenindex>, <next> ... <ENDCHILD>
state.progLength += 12;
hasQ = true;
}
}
}
if (!hasQ) {
state.cp = leftCurl;
}
break;
}
}
if (!hasQ) return true;
if (term.op == REOP_ASSERTBACK || term.op == REOP_ASSERTBACK_NOT) {
reportError("msg.bad.quant", "");
return false;
}
++state.cp;
state.result.kid = term;
state.result.parenIndex = parenBaseCount;
state.result.parenCount = state.parenCount - parenBaseCount;
if ((state.cp < state.cpend) && (src[state.cp] == '?')) {
++state.cp;
state.result.greedy = false;
} else state.result.greedy = true;
return true;
}