in src/main/java/org/apache/xmlbeans/impl/regex/RegexParser.java [788:902]
protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException {
this.setContext(S_INBRACKETS);
this.next(); // '['
boolean nrange = false;
RangeToken base = null;
RangeToken tok;
if (this.read() == T_CHAR && this.chardata == '^') {
nrange = true;
this.next(); // '^'
if (useNrange) {
tok = Token.createNRange();
} else {
base = Token.createRange();
base.addRange(0, Token.UTF16_MAX);
tok = Token.createRange();
}
} else {
tok = Token.createRange();
}
int type;
boolean firstloop = true;
while ((type = this.read()) != T_EOF) {
if (type == T_CHAR && this.chardata == ']' && !firstloop)
break;
firstloop = false;
int c = this.chardata;
boolean end = false;
if (type == T_BACKSOLIDUS) {
switch (c) {
case 'd': case 'D':
case 'w': case 'W':
case 's': case 'S':
tok.mergeRanges(this.getTokenForShorthand(c));
end = true;
break;
case 'i': case 'I':
case 'c': case 'C':
c = this.processCIinCharacterClass(tok, c);
if (c < 0) end = true;
break;
case 'p':
case 'P':
int pstart = this.offset;
RangeToken tok2 = this.processBacksolidus_pP(c);
if (tok2 == null) throw this.ex("parser.atom.5", pstart);
tok.mergeRanges(tok2);
end = true;
break;
default:
c = this.decodeEscaped();
} // \ + c
} // backsolidus
// POSIX Character class such as [:alnum:]
else if (type == T_POSIX_CHARCLASS_START) {
int nameend = this.regex.indexOf(':', this.offset);
if (nameend < 0) throw this.ex("parser.cc.1", this.offset);
boolean positive = true;
if (this.regex.charAt(this.offset) == '^') {
this.offset ++;
positive = false;
}
String name = this.regex.substring(this.offset, nameend);
RangeToken range = Token.getRange(name, positive,
this.isSet(RegularExpression.XMLSCHEMA_MODE));
if (range == null) throw this.ex("parser.cc.3", this.offset);
tok.mergeRanges(range);
end = true;
if (nameend+1 >= this.regexlen || this.regex.charAt(nameend+1) != ']')
throw this.ex("parser.cc.1", nameend);
this.offset = nameend+2;
}
this.next();
if (!end) { // if not shorthands...
if (this.read() != T_CHAR || this.chardata != '-') { // Here is no '-'.
tok.addRange(c, c);
} else {
this.next(); // Skips '-'
if ((type = this.read()) == T_EOF) throw this.ex("parser.cc.2", this.offset);
if (type == T_CHAR && this.chardata == ']') {
tok.addRange(c, c);
tok.addRange('-', '-');
} else {
int rangeend = this.chardata;
if (type == T_BACKSOLIDUS)
rangeend = this.decodeEscaped();
this.next();
tok.addRange(c, rangeend);
}
}
}
if (this.isSet(RegularExpression.SPECIAL_COMMA)
&& this.read() == T_CHAR && this.chardata == ',')
this.next();
}
if (this.read() == T_EOF)
throw this.ex("parser.cc.2", this.offset);
if (!useNrange && nrange) {
base.subtractRanges(tok);
tok = base;
}
tok.sortRanges();
tok.compactRanges();
//tok.dumpRanges();
/*
if (this.isSet(RegularExpression.IGNORE_CASE))
tok = RangeToken.createCaseInsensitiveToken(tok);
*/
this.setContext(S_NORMAL);
this.next(); // Skips ']'
return tok;
}