private void writeIonStringContent()

in src/com/amazon/ion/impl/lite/ReverseBinaryEncoder.java [953:1059]


    private void writeIonStringContent(String str)
    {
        int strlen = str.length();
        byte[] buffer = myBuffer;
        int offset = myOffset;

        // The number of UTF-8 code units (bytes) we will write is at least as
        // large as the number of UTF-16 code units (ints) that are in the
        // input string.  Ensure we have at least that much capacity, to reduce
        // the number of times we need to grow the buffer.
        offset -= strlen;
        if (offset < 0)
        {
            offset = growBuffer(offset);
            buffer = myBuffer;
        }
        offset += strlen;

        // Optimize for ASCII, under the assumption that it happens a lot.
        // This fits within the capacity allocated above, so we don't have to
        // grow the buffer within this loop.
        int i = strlen - 1;
        for (; i >= 0; --i)
        {
            int c = str.charAt(i);
            if (!(c <= 0x7f))
                break;
            buffer[--offset] = (byte) c;
        }

        for (; i >= 0; --i)
        {
            int c = str.charAt(i);

            if (c <= 0x7f)              // U+0000 to U+007f codepoints
            {
                if (--offset < 0)
                {
                    offset = growBuffer(offset);
                    buffer = myBuffer;
                }
                buffer[offset] = (byte) c;
            }
            else if (c <= 0x7ff)        // U+0080 to U+07ff codepoints
            {
                if ((offset -= 2) < 0)
                {
                    offset = growBuffer(offset);
                    buffer = myBuffer;
                }
                buffer[offset]     = (byte) (0xc0 | ((c >> 6) & 0x1f));
                buffer[offset + 1] = (byte) (0x80 | (c & 0x3f));
            }
            else if (c >= 0xd800 && c <= 0xdfff) // Surrogate!
            {
                // high surrogate not followed by low surrogate
                if (c <= 0xdbff)
                {
                    throw new IonException("invalid string, unpaired high surrogate character");
                }

                // string starts with low surrogate
                if (i == 0)
                {
                    throw new IonException("invalid string, unpaired low surrogate character");
                }

                // low surrogate not preceded by high surrogate
                // charAt(--i) is never out of bounds as i == 0 is asserted to
                // be false in previous if-block
                int c2 = str.charAt(--i);
                if (!(c2 >= 0xd800 && c2 <= 0xdbff))
                {
                    throw new IonException("invalid string, unpaired low surrogate character");
                }

                // valid surrogate pair: (c2, c)
                int codepoint = 0x10000 + (((c2 & 0x3ff) << 10) | (c & 0x3ff));

                if ((offset -= 4) < 0)
                {
                    offset = growBuffer(offset);
                    buffer = myBuffer;
                }
                buffer[offset]     = (byte) (0xF0 | ((codepoint >> 18) & 0x07));
                buffer[offset + 1] = (byte) (0x80 | ((codepoint >> 12) & 0x3F));
                buffer[offset + 2] = (byte) (0x80 | ((codepoint >> 6)  & 0x3F));
                buffer[offset + 3] = (byte) (0x80 | ((codepoint >> 0)  & 0x3F));
            }
            else // U+0800 to U+D7FF and U+E000 to U+FFFF codepoints
            {
                if ((offset -= 3) < 0)
                {
                    offset = growBuffer(offset);
                    buffer = myBuffer;
                }
                buffer[offset]     = (byte) (0xE0 | ((c >> 12) & 0x0F));
                buffer[offset + 1] = (byte) (0x80 | ((c >> 6) & 0x3F));
                buffer[offset + 2] = (byte) (0x80 | (c & 0x3F));
            }
        }

        int length = myOffset - offset;
        myOffset = offset;

        writePrefix(TYPE_STRING, length);
    }