private String unescapePath()

in core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java [329:386]


    private String unescapePath(String path) {
        Matcher matcher = illegalEscapePattern.matcher(path);

        StringBuilder sb = null;
        int end = 0;

        while (matcher.find()) {
            if (sb == null) {
                sb = new StringBuilder();
            }
            // Append everything up to this group
            sb.append(path.substring(end, matcher.start()));
            String group = matcher.group(1);
            int letter = Integer.valueOf(group, 16);
            sb.append((char) letter);
            end = matcher.end();
        }

        // we got a replacement
        if (sb != null) {
            // append whatever is left
            sb.append(path.substring(end));
            path = sb.toString();
            end = 0;
        }

        matcher = unescapeRulePattern.matcher(path);

        if (!matcher.find()) {
            return path;
        }

        sb = new StringBuilder();

        // Traverse over all encoded groups
        do {
            // Append everything up to this group
            sb.append(path, end, matcher.start());

            // Get the integer representation of this hexadecimal encoded
            // character
            int letter = Integer.valueOf(matcher.group(1), 16);
            if (letter < 128 && unescapedCharacters[letter]) {
                // character should be unescaped in URLs
                sb.append((char) letter);
            } else {
                // Append the whole sequence as uppercase
                sb.append(matcher.group().toUpperCase(Locale.ROOT));
            }

            end = matcher.end();
        } while (matcher.find());

        // Append the rest if there's anything left
        sb.append(path.substring(end));

        return sb.toString();
    }