public void appendToLongValue()

in src/com/amazon/ion/impl/IonBinary.java [2104:2247]


        public void appendToLongValue(int terminator
                                     ,boolean longstring
                                     ,boolean onlyByteSizedCharacters
                                     ,boolean decodeEscapeSequences
                                     ,PushbackReader r
                                     )
           throws IOException, UnexpectedEofException
        {
            int c;

            if (debugValidation) {
                if (terminator == -1 && longstring) {
                    throw new IllegalStateException("longstrings have to have a terminator, no eof termination");
                }
                _validate();
            }

            assert(terminator != '\\');
            for (;;) {
                c = r.read();  // we put off the surrogate logic as long as possible (so not here)

                if (c == terminator) {
                    if (!longstring || isLongTerminator(terminator, r)) {
                        // if it's not a long string one quote is enough otherwise look ahead
                        break;
                    }
                }
                else if (c == -1) {
                    throw new UnexpectedEofException();
                }
                else if (c == '\n' || c == '\r') {
                    // here we'll handle embedded new line detection and escaped characters
                    if ((terminator != -1) && !longstring) {
                        throw new IonException("unexpected line terminator encountered in quoted string");
                    }
                        }
                else if (decodeEscapeSequences && c == '\\') {
                    // if this is an escape sequence we need to process it now
                    // since we allow a surrogate to be encoded using \ u (or \ U)
                    // encoding
                    c = IonTokenReader.readEscapedCharacter(r, onlyByteSizedCharacters);
                    if (c == IonTokenReader.EMPTY_ESCAPE_SEQUENCE) {
                        continue;
                    }
                }

                if (onlyByteSizedCharacters) {
                    assert(_pending_high_surrogate == 0); // if it's byte sized only, then we shouldn't have a dangling surrogate
                    if ((c & (~0xff)) != 0) {
                        throw new IonException("escaped character value too large in clob (0 to 255 only)");
                    }
                    write((byte)(0xff & c));
                }
                else {
                    // for larger characters we have to glue together surrogates, regardless
                    // of how they were encoded.  If we have a high surrogate and go to peek
                    // for the low surrogate and hit the end of a segment of a long string
                    // (triple quoted multi-line string) we leave a dangling high surrogate
                    // that will get picked up on the next call into this routine when the
                    // next segment of the long string is processed
                    if (_pending_high_surrogate != 0) {
                        if ((c & _Private_IonConstants.surrogate_mask) != _Private_IonConstants.low_surrogate_value) {
                            String message =
                                "Text contains unmatched UTF-16 high surrogate " +
                                IonTextUtils.printCodePointAsString(_pending_high_surrogate);
                            throw new IonException(message);
                        }
                        c = _Private_IonConstants.makeUnicodeScalar(_pending_high_surrogate, c);
                        _pending_high_surrogate = 0;
                    }
                    else if ((c & _Private_IonConstants.surrogate_mask) == _Private_IonConstants.high_surrogate_value) {
                        int c2 = r.read();
                        if (c2 == terminator) {
                            if (longstring && isLongTerminator(terminator, r)) {
                                // if it's a long string termination we'll hang onto the current c as the pending surrogate
                                _pending_high_surrogate = c;
                                c = terminator;
                                break;
                            }
                            // otherwise this is an error
                            String message =
                                "Text contains unmatched UTF-16 high surrogate " +
                                IonTextUtils.printCodePointAsString(c);
                            throw new IonException(message);
                        }
                        else if (c2 == -1) {
                            // eof is also an error - really two errors
                            throw new UnexpectedEofException();
                        }
                        //here we convert escape sequences into characters and continue until
                        //we encounter a non-newline escape (typically immediately)
                        while (decodeEscapeSequences && c2 == '\\') {
                            c2 = IonTokenReader.readEscapedCharacter(r, onlyByteSizedCharacters);
                            if (c2 != IonTokenReader.EMPTY_ESCAPE_SEQUENCE) break;
                            c2 = r.read();
                            if (c2 == terminator) {
                                if (longstring && isLongTerminator(terminator, r)) {
                                    // if it's a long string termination we'll hang onto the current c as the pending surrogate
                                    _pending_high_surrogate = c;
                                    c = c2; // we'll be checking this below
                                    break;
                                }
                                // otherwise this is an error
                                String message =
                                    "Text contains unmatched UTF-16 high surrogate " +
                                    IonTextUtils.printCodePointAsString(c);
                                throw new IonException(message);
                            }
                            else if (c2 == -1) {
                                // eof is also an error - really two errors
                                throw new UnexpectedEofException();
                            }
                        }
                        // check to see how we broke our of the while loop above, we may be "done"
                        if (_pending_high_surrogate != 0) {
                            break;
                        }

                        if ((c2 & _Private_IonConstants.surrogate_mask) != _Private_IonConstants.low_surrogate_value) {
                            String message =
                                "Text contains unmatched UTF-16 high surrogate " +
                                IonTextUtils.printCodePointAsString(c);
                            throw new IonException(message);
                        }
                        c = _Private_IonConstants.makeUnicodeScalar(c, c2);
                    }
                    else if ((c & _Private_IonConstants.surrogate_mask) == _Private_IonConstants.low_surrogate_value) {
                        String message =
                            "Text contains unmatched UTF-16 low surrogate " +
                            IonTextUtils.printCodePointAsString(c);
                        throw new IonException(message);
                    }
                    writeUnicodeScalarAsUTF8(c);
                }
            }

            if (c != terminator) {
                // TODO determine if this can really happen.
                throw new UnexpectedEofException();
            }
            if (debugValidation) _validate();

            return;
        }