in src/com/amazon/ion/impl/lite/ReverseBinaryEncoder.java [953:1059]
private void writeIonStringContent(String str)
{
int strlen = str.length();
byte[] buffer = myBuffer;
int offset = myOffset;
// The number of UTF-8 code units (bytes) we will write is at least as
// large as the number of UTF-16 code units (ints) that are in the
// input string. Ensure we have at least that much capacity, to reduce
// the number of times we need to grow the buffer.
offset -= strlen;
if (offset < 0)
{
offset = growBuffer(offset);
buffer = myBuffer;
}
offset += strlen;
// Optimize for ASCII, under the assumption that it happens a lot.
// This fits within the capacity allocated above, so we don't have to
// grow the buffer within this loop.
int i = strlen - 1;
for (; i >= 0; --i)
{
int c = str.charAt(i);
if (!(c <= 0x7f))
break;
buffer[--offset] = (byte) c;
}
for (; i >= 0; --i)
{
int c = str.charAt(i);
if (c <= 0x7f) // U+0000 to U+007f codepoints
{
if (--offset < 0)
{
offset = growBuffer(offset);
buffer = myBuffer;
}
buffer[offset] = (byte) c;
}
else if (c <= 0x7ff) // U+0080 to U+07ff codepoints
{
if ((offset -= 2) < 0)
{
offset = growBuffer(offset);
buffer = myBuffer;
}
buffer[offset] = (byte) (0xc0 | ((c >> 6) & 0x1f));
buffer[offset + 1] = (byte) (0x80 | (c & 0x3f));
}
else if (c >= 0xd800 && c <= 0xdfff) // Surrogate!
{
// high surrogate not followed by low surrogate
if (c <= 0xdbff)
{
throw new IonException("invalid string, unpaired high surrogate character");
}
// string starts with low surrogate
if (i == 0)
{
throw new IonException("invalid string, unpaired low surrogate character");
}
// low surrogate not preceded by high surrogate
// charAt(--i) is never out of bounds as i == 0 is asserted to
// be false in previous if-block
int c2 = str.charAt(--i);
if (!(c2 >= 0xd800 && c2 <= 0xdbff))
{
throw new IonException("invalid string, unpaired low surrogate character");
}
// valid surrogate pair: (c2, c)
int codepoint = 0x10000 + (((c2 & 0x3ff) << 10) | (c & 0x3ff));
if ((offset -= 4) < 0)
{
offset = growBuffer(offset);
buffer = myBuffer;
}
buffer[offset] = (byte) (0xF0 | ((codepoint >> 18) & 0x07));
buffer[offset + 1] = (byte) (0x80 | ((codepoint >> 12) & 0x3F));
buffer[offset + 2] = (byte) (0x80 | ((codepoint >> 6) & 0x3F));
buffer[offset + 3] = (byte) (0x80 | ((codepoint >> 0) & 0x3F));
}
else // U+0800 to U+D7FF and U+E000 to U+FFFF codepoints
{
if ((offset -= 3) < 0)
{
offset = growBuffer(offset);
buffer = myBuffer;
}
buffer[offset] = (byte) (0xE0 | ((c >> 12) & 0x0F));
buffer[offset + 1] = (byte) (0x80 | ((c >> 6) & 0x3F));
buffer[offset + 2] = (byte) (0x80 | (c & 0x3F));
}
}
int length = myOffset - offset;
myOffset = offset;
writePrefix(TYPE_STRING, length);
}