protected String allowAnywhereFoldCaseAndRenumberGroups()

in src/prettify/parser/CombinePrefixPattern.java [195:296]


  protected String allowAnywhereFoldCaseAndRenumberGroups(Pattern regex) {
    // Split into character sets, escape sequences, punctuation strings
    // like ('(', '(?:', ')', '^'), and runs of characters that do not
    // include any of the above.
    String[] parts = Util.match(Pattern.compile("(?:"
            + "\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]" // a character set
            + "|\\\\u[A-Fa-f0-9]{4}" // a unicode escape
            + "|\\\\x[A-Fa-f0-9]{2}" // a hex escape
            + "|\\\\[0-9]+" // a back-reference or octal escape
            + "|\\\\[^ux0-9]" // other escape sequence
            + "|\\(\\?[:!=]" // start of a non-capturing group
            + "|[\\(\\)\\^]" // start/end of a group, or line start
            + "|[^\\x5B\\x5C\\(\\)\\^]+" // run of other characters
            + ")"), regex.pattern(), true);
    int n = parts.length;

    // Maps captured group numbers to the number they will occupy in
    // the output or to -1 if that has not been determined, or to
    // undefined if they need not be capturing in the output.
    Map<Integer, Integer> capturedGroups = new HashMap<Integer, Integer>();

    // Walk over and identify back references to build the capturedGroups
    // mapping.
    for (int i = 0, groupIndex = 0; i < n; ++i) {
      String p = parts[i];
      if (p.equals("(")) {
        // groups are 1-indexed, so max group index is count of '('
        ++groupIndex;
      } else if ('\\' == p.charAt(0)) {
        try {
          int decimalValue = Math.abs(Integer.parseInt(p.substring(1)));
          if (decimalValue <= groupIndex) {
            capturedGroups.put(decimalValue, -1);
          } else {
            // Replace with an unambiguous escape sequence so that
            // an octal escape sequence does not turn into a backreference
            // to a capturing group from an earlier regex.
            parts[i] = encodeEscape(decimalValue);
          }
        } catch (NumberFormatException ex) {
        }
      }
    }

    // Renumber groups and reduce capturing groups to non-capturing groups
    // where possible.
    for (int i : capturedGroups.keySet()) {
      if (-1 == capturedGroups.get(i)) {
        capturedGroups.put(i, ++capturedGroupIndex);
      }
    }
    for (int i = 0, groupIndex = 0; i < n; ++i) {
      String p = parts[i];
      if (p.equals("(")) {
        ++groupIndex;
        if (capturedGroups.get(groupIndex) == null) {
          parts[i] = "(?:";
        }
      } else if ('\\' == p.charAt(0)) {
        try {
          int decimalValue = Math.abs(Integer.parseInt(p.substring(1)));
          if (decimalValue <= groupIndex) {
            parts[i] = "\\" + capturedGroups.get(decimalValue);
          }
        } catch (NumberFormatException ex) {
        }
      }
    }

    // Remove any prefix anchors so that the output will match anywhere.
    // ^^ really does mean an anchored match though.
    for (int i = 0; i < n; ++i) {
      if ("^".equals(parts[i]) && !"^".equals(parts[i + 1])) {
        parts[i] = "";
      }
    }

    // Expand letters to groups to handle mixing of case-sensitive and
    // case-insensitive patterns if necessary.
    if ((regex.flags() & Pattern.CASE_INSENSITIVE) != 0 && needToFoldCase) {
      for (int i = 0; i < n; ++i) {
        String p = parts[i];
        char ch0 = p.length() > 0 ? p.charAt(0) : 0;
        if (p.length() >= 2 && ch0 == '[') {
          parts[i] = caseFoldCharset(p);
        } else if (ch0 != '\\') {
          // TODO: handle letters in numeric escapes.
          StringBuffer sb = new StringBuffer();
          Matcher _matcher = Pattern.compile("[a-zA-Z]").matcher(p);
          while (_matcher.find()) {
            int cc = _matcher.group(0).codePointAt(0);
            _matcher.appendReplacement(sb, "");
            sb.append("[").append(Character.toString((char) (cc & ~32))).append(Character.toString((char) (cc | 32))).append("]");
          }
          _matcher.appendTail(sb);
          parts[i] = sb.toString();
        }
      }
    }

    return Util.join(parts);
  }