in sources/java-incremental-compilation/jvm-inc-builder/src/com/intellij/tools/build/bazel/org/jdom/Verifier.java [431:498]
public static String checkCharacterData(final String text) {
if (text == null) {
return "A null is not a legal XML value";
}
final int len = text.length();
for (int i = 0; i < len; i++) {
// we are expecting a normal char, but may be a surrogate.
// the isXMLCharacter method takes an int argument, but we have a char.
// we save a lot of time by doing the test directly here without
// doing the unnecessary cast-to-int and double-checking ranges
// for the char.
// Also, note that we only need to check for non-zero flags, instead
// of checking for an actual bit, because all the other
// character roles are a pure subset of CharacterData. Put another way,
// any character with any bit set, will always also have the
// CharacterData bit set.
while (CHARFLAGS[text.charAt(i)] != (byte)0) {
// fast-loop through the chars until we find something that's not.
//noinspection AssignmentToForLoopParameter
if (++i == len) {
// we passed all the characters...
return null;
}
}
// the character is not a normal character.
// we need to sort out what it is. Neither high nor low
// surrogate pairs are valid characters, so they will get here.
if (isHighSurrogate(text.charAt(i))) {
// we have the valid high char of a pair.
// we will expect the low char on the next index,
//noinspection AssignmentToForLoopParameter
i++;
if (i >= len) {
return String.format("Truncated Surrogate Pair 0x%04x????",
(int)text.charAt(i - 1));
}
if (isLowSurrogate(text.charAt(i))) {
// we now have the low char of a pair, decode and validate
if (!isXMLCharacter(decodeSurrogatePair(
text.charAt(i - 1), text.charAt(i)))) {
// Likely this character can't be easily displayed
// because it's a control, so we use it'd hexadecimal
// representation in the reason.
return String.format("0x%06x is not a legal XML character",
decodeSurrogatePair(
text.charAt(i - 1), text.charAt(i)));
}
}
else {
// we got a normal character, but we wanted a low surrogate
return String.format("Illegal Surrogate Pair 0x%04x%04x",
(int)text.charAt(i - 1), (int)text.charAt(i));
}
}
else {
// Likely this character can't be easily displayed
// because it's a control, so we use its hexadecimal
// representation in the reason.
return String.format("0x%04x is not a legal XML character",
(int)text.charAt(i));
}
}
// If we got here, everything is OK
return null;
}