in src/com/amazon/ion/impl/IonBinary.java [2104:2247]
public void appendToLongValue(int terminator
,boolean longstring
,boolean onlyByteSizedCharacters
,boolean decodeEscapeSequences
,PushbackReader r
)
throws IOException, UnexpectedEofException
{
int c;
if (debugValidation) {
if (terminator == -1 && longstring) {
throw new IllegalStateException("longstrings have to have a terminator, no eof termination");
}
_validate();
}
assert(terminator != '\\');
for (;;) {
c = r.read(); // we put off the surrogate logic as long as possible (so not here)
if (c == terminator) {
if (!longstring || isLongTerminator(terminator, r)) {
// if it's not a long string one quote is enough otherwise look ahead
break;
}
}
else if (c == -1) {
throw new UnexpectedEofException();
}
else if (c == '\n' || c == '\r') {
// here we'll handle embedded new line detection and escaped characters
if ((terminator != -1) && !longstring) {
throw new IonException("unexpected line terminator encountered in quoted string");
}
}
else if (decodeEscapeSequences && c == '\\') {
// if this is an escape sequence we need to process it now
// since we allow a surrogate to be encoded using \ u (or \ U)
// encoding
c = IonTokenReader.readEscapedCharacter(r, onlyByteSizedCharacters);
if (c == IonTokenReader.EMPTY_ESCAPE_SEQUENCE) {
continue;
}
}
if (onlyByteSizedCharacters) {
assert(_pending_high_surrogate == 0); // if it's byte sized only, then we shouldn't have a dangling surrogate
if ((c & (~0xff)) != 0) {
throw new IonException("escaped character value too large in clob (0 to 255 only)");
}
write((byte)(0xff & c));
}
else {
// for larger characters we have to glue together surrogates, regardless
// of how they were encoded. If we have a high surrogate and go to peek
// for the low surrogate and hit the end of a segment of a long string
// (triple quoted multi-line string) we leave a dangling high surrogate
// that will get picked up on the next call into this routine when the
// next segment of the long string is processed
if (_pending_high_surrogate != 0) {
if ((c & _Private_IonConstants.surrogate_mask) != _Private_IonConstants.low_surrogate_value) {
String message =
"Text contains unmatched UTF-16 high surrogate " +
IonTextUtils.printCodePointAsString(_pending_high_surrogate);
throw new IonException(message);
}
c = _Private_IonConstants.makeUnicodeScalar(_pending_high_surrogate, c);
_pending_high_surrogate = 0;
}
else if ((c & _Private_IonConstants.surrogate_mask) == _Private_IonConstants.high_surrogate_value) {
int c2 = r.read();
if (c2 == terminator) {
if (longstring && isLongTerminator(terminator, r)) {
// if it's a long string termination we'll hang onto the current c as the pending surrogate
_pending_high_surrogate = c;
c = terminator;
break;
}
// otherwise this is an error
String message =
"Text contains unmatched UTF-16 high surrogate " +
IonTextUtils.printCodePointAsString(c);
throw new IonException(message);
}
else if (c2 == -1) {
// eof is also an error - really two errors
throw new UnexpectedEofException();
}
//here we convert escape sequences into characters and continue until
//we encounter a non-newline escape (typically immediately)
while (decodeEscapeSequences && c2 == '\\') {
c2 = IonTokenReader.readEscapedCharacter(r, onlyByteSizedCharacters);
if (c2 != IonTokenReader.EMPTY_ESCAPE_SEQUENCE) break;
c2 = r.read();
if (c2 == terminator) {
if (longstring && isLongTerminator(terminator, r)) {
// if it's a long string termination we'll hang onto the current c as the pending surrogate
_pending_high_surrogate = c;
c = c2; // we'll be checking this below
break;
}
// otherwise this is an error
String message =
"Text contains unmatched UTF-16 high surrogate " +
IonTextUtils.printCodePointAsString(c);
throw new IonException(message);
}
else if (c2 == -1) {
// eof is also an error - really two errors
throw new UnexpectedEofException();
}
}
// check to see how we broke our of the while loop above, we may be "done"
if (_pending_high_surrogate != 0) {
break;
}
if ((c2 & _Private_IonConstants.surrogate_mask) != _Private_IonConstants.low_surrogate_value) {
String message =
"Text contains unmatched UTF-16 high surrogate " +
IonTextUtils.printCodePointAsString(c);
throw new IonException(message);
}
c = _Private_IonConstants.makeUnicodeScalar(c, c2);
}
else if ((c & _Private_IonConstants.surrogate_mask) == _Private_IonConstants.low_surrogate_value) {
String message =
"Text contains unmatched UTF-16 low surrogate " +
IonTextUtils.printCodePointAsString(c);
throw new IonException(message);
}
writeUnicodeScalarAsUTF8(c);
}
}
if (c != terminator) {
// TODO determine if this can really happen.
throw new UnexpectedEofException();
}
if (debugValidation) _validate();
return;
}