in core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java [64:83]
static {
for (int c = 0; c < 128; c++) {
/*
* https://tools.ietf.org/html/rfc3986#section-2.2 For consistency,
* percent-encoded octets in the ranges of ALPHA (%41-%5A and
* %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E), underscore
* (%5F), or tilde (%7E) should not be created by URI producers and,
* when found in a URI, should be decoded to their corresponding
* unreserved characters by URI normalizers.
*/
unescapedCharacters[c] =
(0x41 <= c && c <= 0x5A)
|| (0x61 <= c && c <= 0x7A)
|| (0x30 <= c && c <= 0x39)
|| c == 0x2D
|| c == 0x2E
|| c == 0x5F
|| c == 0x7E;
}
}