in src/prettify/parser/CombinePrefixPattern.java [195:296]
protected String allowAnywhereFoldCaseAndRenumberGroups(Pattern regex) {
// Split into character sets, escape sequences, punctuation strings
// like ('(', '(?:', ')', '^'), and runs of characters that do not
// include any of the above.
String[] parts = Util.match(Pattern.compile("(?:"
+ "\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]" // a character set
+ "|\\\\u[A-Fa-f0-9]{4}" // a unicode escape
+ "|\\\\x[A-Fa-f0-9]{2}" // a hex escape
+ "|\\\\[0-9]+" // a back-reference or octal escape
+ "|\\\\[^ux0-9]" // other escape sequence
+ "|\\(\\?[:!=]" // start of a non-capturing group
+ "|[\\(\\)\\^]" // start/end of a group, or line start
+ "|[^\\x5B\\x5C\\(\\)\\^]+" // run of other characters
+ ")"), regex.pattern(), true);
int n = parts.length;
// Maps captured group numbers to the number they will occupy in
// the output or to -1 if that has not been determined, or to
// undefined if they need not be capturing in the output.
Map<Integer, Integer> capturedGroups = new HashMap<Integer, Integer>();
// Walk over and identify back references to build the capturedGroups
// mapping.
for (int i = 0, groupIndex = 0; i < n; ++i) {
String p = parts[i];
if (p.equals("(")) {
// groups are 1-indexed, so max group index is count of '('
++groupIndex;
} else if ('\\' == p.charAt(0)) {
try {
int decimalValue = Math.abs(Integer.parseInt(p.substring(1)));
if (decimalValue <= groupIndex) {
capturedGroups.put(decimalValue, -1);
} else {
// Replace with an unambiguous escape sequence so that
// an octal escape sequence does not turn into a backreference
// to a capturing group from an earlier regex.
parts[i] = encodeEscape(decimalValue);
}
} catch (NumberFormatException ex) {
}
}
}
// Renumber groups and reduce capturing groups to non-capturing groups
// where possible.
for (int i : capturedGroups.keySet()) {
if (-1 == capturedGroups.get(i)) {
capturedGroups.put(i, ++capturedGroupIndex);
}
}
for (int i = 0, groupIndex = 0; i < n; ++i) {
String p = parts[i];
if (p.equals("(")) {
++groupIndex;
if (capturedGroups.get(groupIndex) == null) {
parts[i] = "(?:";
}
} else if ('\\' == p.charAt(0)) {
try {
int decimalValue = Math.abs(Integer.parseInt(p.substring(1)));
if (decimalValue <= groupIndex) {
parts[i] = "\\" + capturedGroups.get(decimalValue);
}
} catch (NumberFormatException ex) {
}
}
}
// Remove any prefix anchors so that the output will match anywhere.
// ^^ really does mean an anchored match though.
for (int i = 0; i < n; ++i) {
if ("^".equals(parts[i]) && !"^".equals(parts[i + 1])) {
parts[i] = "";
}
}
// Expand letters to groups to handle mixing of case-sensitive and
// case-insensitive patterns if necessary.
if ((regex.flags() & Pattern.CASE_INSENSITIVE) != 0 && needToFoldCase) {
for (int i = 0; i < n; ++i) {
String p = parts[i];
char ch0 = p.length() > 0 ? p.charAt(0) : 0;
if (p.length() >= 2 && ch0 == '[') {
parts[i] = caseFoldCharset(p);
} else if (ch0 != '\\') {
// TODO: handle letters in numeric escapes.
StringBuffer sb = new StringBuffer();
Matcher _matcher = Pattern.compile("[a-zA-Z]").matcher(p);
while (_matcher.find()) {
int cc = _matcher.group(0).codePointAt(0);
_matcher.appendReplacement(sb, "");
sb.append("[").append(Character.toString((char) (cc & ~32))).append(Character.toString((char) (cc | 32))).append("]");
}
_matcher.appendTail(sb);
parts[i] = sb.toString();
}
}
}
return Util.join(parts);
}