in src/main/java/org/apache/maven/xinclude/stax/NCName.java [111:175]
public static boolean is11NameChar(int c, boolean ncname) {
if (c <= 0x7A) { // 'z' or earlier
if (c >= 0x61) { // 'a' - 'z' are ok
return true;
}
if (c <= 0x5A) {
if (c >= 0x41) { // 'A' - 'Z' ok too
return true;
}
// As are 0-9, '.' and '-'
if ((c >= 0x30 && c <= 0x39) || (c == '.') || (c == '-')) {
return true;
}
// And finally, colon, in non-ns-aware mode
if (c == ':' && !ncname) { // ':' == 0x3A
return true;
}
} else if (c == 0x5F) { // '_' is ok too
return true;
}
}
// Others are checked block-by-block:
if (c <= 0x2FEF) {
if (c < 0x2000) { // only 8-bit ctrl chars and 0x37E to filter out
return (c >= 0x00C0 && c != 0x37E) || (c == 0xB7);
}
if (c >= 0x2C00) {
// 0x100 - 0x1FFF, 0x2C00 - 0x2FEF are ok
return true;
}
if (c < 0x200C || c > 0x218F) {
// 0x2000 - 0x200B, 0x2190 - 0x2BFF invalid
return false;
}
if (c >= 0x2070) {
// 0x2070 - 0x218F are ok
return true;
}
// And finally, 0x200C - 0x200D, 0x203F - 0x2040 are ok
return (c == 0x200C || c == 0x200D || c == 0x203F || c == 0x2040);
}
// 0x3000 and above:
if (c >= 0x3001) {
/* Hmmh, let's allow surrogate heres, without checking that
* they have proper ordering. For non-first name chars, both are
* ok, for valid names. Crude basic support,
* I know, but allows valid combinations, just doesn't catch
* invalid ones
*/
if (c <= 0xDFFF) { // 0x3001 - 0xD7FF (chars),
// 0xD800 - 0xDFFF (high, low surrogate) are ok:
return true;
}
if (c >= 0xF900 && c <= 0xFFFD) {
/* Check above removes other invalid chars (below valid
* range), and byte-order markers (0xFFFE, 0xFFFF).
*/
return (c <= 0xFDCF || c >= 0xFDF0);
}
}
return false;
}