in src/org/apache/xerces/impl/xpath/regex/RegexParser.java [749:837]
Token parseAtom() throws ParseException {
int ch = this.read();
Token tok = null;
switch (ch) {
case T_LPAREN: return this.processParen();
case T_LPAREN2: return this.processParen2(); // '(?:'
case T_CONDITION: return this.processCondition(); // '(?('
case T_MODIFIERS: return this.processModifiers(); // (?modifiers ... )
case T_INDEPENDENT: return this.processIndependent();
case T_DOT:
this.next(); // Skips '.'
tok = Token.token_dot;
break;
/**
* char-class ::= '[' ( '^'? range ','?)+ ']'
* range ::= '\d' | '\w' | '\s' | category-block | range-char
* | range-char '-' range-char
* range-char ::= '\[' | '\]' | '\\' | '\' [,-efnrtv] | bmp-code | character-2
* bmp-char ::= '\' 'u' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]
*/
case T_LBRACKET: return this.parseCharacterClass(true);
case T_SET_OPERATIONS: return this.parseSetOperations();
case T_BACKSOLIDUS:
switch (this.chardata) {
case 'd': case 'D':
case 'w': case 'W':
case 's': case 'S':
tok = this.getTokenForShorthand(this.chardata);
this.next();
return tok;
case 'e': case 'f': case 'n': case 'r':
case 't': case 'u': case 'v': case 'x':
{
int ch2 = this.decodeEscaped();
if (ch2 < 0x10000) {
tok = Token.createChar(ch2);
} else {
tok = Token.createString(REUtil.decomposeToSurrogates(ch2));
}
}
break;
case 'c': return this.processBacksolidus_c();
case 'C': return this.processBacksolidus_C();
case 'i': return this.processBacksolidus_i();
case 'I': return this.processBacksolidus_I();
case 'g': return this.processBacksolidus_g();
case 'X': return this.processBacksolidus_X();
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return this.processBackreference();
case 'P':
case 'p':
int pstart = this.offset;
tok = processBacksolidus_pP(this.chardata);
if (tok == null) throw this.ex("parser.atom.5", pstart);
break;
default:
tok = Token.createChar(this.chardata);
}
this.next();
break;
case T_CHAR:
if (this.chardata == ']' || this.chardata == '{' || this.chardata == '}')
throw this.ex("parser.atom.4", this.offset-1);
tok = Token.createChar(this.chardata);
int high = this.chardata;
this.next();
if (REUtil.isHighSurrogate(high)
&& this.read() == T_CHAR && REUtil.isLowSurrogate(this.chardata)) {
char[] sur = new char[2];
sur[0] = (char)high;
sur[1] = (char)this.chardata;
tok = Token.createParen(Token.createString(new String(sur)), 0);
this.next();
}
break;
default:
throw this.ex("parser.atom.4", this.offset-1);
}
return tok;
}