in sdk/serialization/azure-xml/src/main/java/com/azure/xml/implementation/aalto/in/ReaderScanner.java [1676:1859]
protected void finishCharacters() throws XMLStreamException {
int outPtr;
char[] outputBuffer;
// Ok, so what was the first char / entity?
{
int c = mTmpChar;
if (c < 0) { // from entity; can just copy as is
c = -c;
outputBuffer = _textBuilder.resetWithEmpty();
outPtr = 0;
if ((c >> 16) != 0) { // surrogate pair?
c -= 0x10000;
/* Note: after resetting the buffer, it's known to have
* space for more than 2 chars we need to add
*/
outputBuffer[outPtr++] = (char) (0xD800 | (c >> 10));
c = 0xDC00 | (c & 0x3FF);
}
outputBuffer[outPtr++] = (char) c;
} else { // white space that we are interested in?
if (c == INT_CR || c == INT_LF) {
++_inputPtr; // wasn't advanced yet, in this case
outPtr = checkInTreeIndentation((char) c);
if (outPtr < 0) {
return;
}
// Above call also initializes the text builder appropriately
outputBuffer = _textBuilder.getBufferWithoutReset();
} else {
outputBuffer = _textBuilder.resetWithEmpty();
outPtr = 0;
}
}
}
final int[] TYPES = sCharTypes.TEXT_CHARS;
final char[] inputBuffer = _inputBuffer;
main_loop: while (true) {
char c;
// Then the tight ascii non-funny-char loop:
ascii_loop: while (true) {
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
loadMoreGuaranteed();
ptr = _inputPtr;
}
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
int max = _inputEnd;
{
int max2 = ptr + (outputBuffer.length - outPtr);
if (max2 < max) {
max = max2;
}
}
while (ptr < max) {
c = inputBuffer[ptr++];
if (c <= 0xFF) {
if (TYPES[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
} else if (c >= 0xD800) { // surrogates and 0xFFFE/0xFFFF
_inputPtr = ptr;
break ascii_loop;
}
outputBuffer[outPtr++] = c;
}
_inputPtr = ptr;
}
// And then exceptions:
if (c <= 0xFF) {
switch (TYPES[c]) {
case XmlCharTypes.CT_INVALID:
handleInvalidXmlChar(c);
case XmlCharTypes.CT_WS_CR: {
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
loadMoreGuaranteed();
ptr = _inputPtr;
}
if (inputBuffer[ptr] == '\n') {
++_inputPtr;
}
markLF();
}
c = '\n';
break;
case XmlCharTypes.CT_WS_LF:
markLF();
break;
case XmlCharTypes.CT_LT:
--_inputPtr;
break main_loop;
case XmlCharTypes.CT_AMP: {
int d = handleEntityInText();
if (d == 0) { // unexpandable general parsed entity
// _inputPtr set by entity expansion method
_entityPending = true;
break main_loop;
}
// Ok; does it need a surrogate though? (over 16 bits)
if ((d >> 16) != 0) {
d -= 0x10000;
outputBuffer[outPtr++] = (char) (0xD800 | (d >> 10));
// Need to ensure room for one more char
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
d = (0xDC00 | (d & 0x3FF));
}
c = (char) d;
}
break;
case XmlCharTypes.CT_RBRACKET: // ']]>'?
{
// Let's then just count number of brackets --
// in case they are not followed by '>'
int count = 1;
while (true) {
if (_inputPtr >= _inputEnd) {
loadMoreGuaranteed();
}
c = inputBuffer[_inputPtr];
if (c != ']') {
break;
}
++_inputPtr; // to skip past bracket
++count;
}
if (c == '>' && count > 1) {
reportIllegalCDataEnd();
}
// Nope. Need to output all brackets, then; except
// for one that can be left for normal output
while (count > 1) {
outputBuffer[outPtr++] = ']';
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
// Need to ensure room for one more char
--count;
}
}
// Can just output the first ']' along normal output
c = ']';
break;
// default:
// Other types are not important here...
}
} else { // high-range, surrogates etc
if (c < 0xE000) {
// if ok, returns second surrogate; otherwise exception
char d = checkSurrogate(c);
outputBuffer[outPtr++] = c;
if (outPtr >= outputBuffer.length) {
outputBuffer = _textBuilder.finishCurrentSegment();
outPtr = 0;
}
c = d;
} else if (c >= 0xFFFE) {
c = handleInvalidXmlChar(c);
}
}
outputBuffer[outPtr++] = c;
}
_textBuilder.setCurrentLength(outPtr);
// 03-Feb-2009, tatu: Need to support coalescing mode too:
if (_cfgCoalescing && !_entityPending) {
finishCoalescedText();
}
}