in src/LttngHelpers.c [181:245]
static unsigned Utf16ToUtf8(
char16_t const *pch16,
unsigned cch16,
unsigned char *pch8,
unsigned cch8)
lttng_ust_notrace;
static unsigned Utf16ToUtf8(
char16_t const *pch16,
unsigned cch16,
unsigned char *pch8,
unsigned cch8)
{
unsigned ich8 = 0;
unsigned ich16 = 0;
// Since we never get cch16 > 65535, we can safely skip testing for overflow of ich8.
assert(cch16 <= (0xFFFFFFFF / 3));
while (ich16 != cch16)
{
// Note that this algorithm accepts unmatched surrogate pairs.
// That's probably the right decision for logging - we want to preserve
// them so they can be noticed and fixed.
unsigned val16 = pch16[ich16];
ich16 += 1;
if (caa_likely(val16 < 0x80))
{
if (caa_unlikely(ich8 == cch8))
break;
pch8[ich8++] = (unsigned char)val16;
}
else if (caa_likely(val16 < 0x800))
{
if (caa_unlikely(ich8 + 1 >= cch8))
break;
pch8[ich8++] = (unsigned char)(((val16 >> 6)) | 0xc0);
pch8[ich8++] = (unsigned char)(((val16)&0x3f) | 0x80);
}
else if (
0xd800 <= val16 && val16 < 0xdc00 &&
ich16 != cch16 &&
0xdc00 <= pch16[ich16] && pch16[ich16] < 0xe000)
{
// Valid surrogate pair.
if (caa_unlikely(ich8 + 3 >= cch8))
break;
val16 = 0x010000u + (((val16 - 0xd800u) << 10) | (pch16[ich16] - 0xdc00u));
ich16 += 1;
pch8[ich8++] = (unsigned char)(((val16 >> 18)) | 0xf0);
pch8[ich8++] = (unsigned char)(((val16 >> 12) & 0x3f) | 0x80);
pch8[ich8++] = (unsigned char)(((val16 >> 6) & 0x3f) | 0x80);
pch8[ich8++] = (unsigned char)(((val16)&0x3f) | 0x80);
}
else
{
if (caa_unlikely(ich8 + 2 >= cch8))
break;
pch8[ich8++] = (unsigned char)(((val16 >> 12)) | 0xe0);
pch8[ich8++] = (unsigned char)(((val16 >> 6) & 0x3f) | 0x80);
pch8[ich8++] = (unsigned char)(((val16)&0x3f) | 0x80);
}
}
return ich8;
}