in src/main/java/org/apache/xml/security/c14n/implementations/UtfHelpper.java [117:182]
public static void writeStringToUtf8(
final String str, final OutputStream out
) throws IOException {
final int length = str.length();
int i = 0;
int c;
while (i < length) {
c = str.codePointAt(i);
i += Character.charCount(c);
if (!Character.isValidCodePoint(c) || c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) {
// valid code point: c >= 0x0000 && c <= 0x10FFFF
out.write(0x3f);
continue;
}
if (OLD_UTF8 && c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
// version 2 or before output 2 question mark characters for 32 bit chars
out.write(0x3f);
out.write(0x3f);
continue;
}
if (c < 0x80) {
out.write(c);
continue;
}
byte extraByte = 0;
if (c < 0x800) {
// 0x00000080 - 0x000007FF
// 110xxxxx 10xxxxxx
extraByte = 1;
} else if (c < 0x10000) {
// 0x00000800 - 0x0000FFFF
// 1110xxxx 10xxxxxx 10xxxxxx
extraByte = 2;
} else if (c < 0x200000) {
// 0x00010000 - 0x001FFFFF
// 11110xxx 10xxxxx 10xxxxxx 10xxxxxx
extraByte = 3;
} else if (c < 0x4000000) {
// 0x00200000 - 0x03FFFFFF
// 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
// already outside valid Character range, just for completeness
extraByte = 4;
} else if (c <= 0x7FFFFFFF) {
// 0x04000000 - 0x7FFFFFFF
// 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
// already outside valid Character range, just for completeness
extraByte = 5;
} else {
// 0x80000000 - 0xFFFFFFFF
// case not possible as java has no unsigned int
out.write(0x3f);
continue;
}
byte write;
int shift = 6 * extraByte;
write = (byte)((0xFE << (6 - extraByte)) | (c >>> shift));
out.write(write);
for (int j = extraByte - 1; j >= 0; j--) {
shift -= 6;
write = (byte)(0x80 | ((c >>> shift) & 0x3F));
out.write(write);
}
}
}