static unsigned Utf16ToUtf8()

in src/LttngHelpers.c [181:245]


static unsigned Utf16ToUtf8(
    char16_t const *pch16,
    unsigned cch16,
    unsigned char *pch8,
    unsigned cch8)
    lttng_ust_notrace;
static unsigned Utf16ToUtf8(
    char16_t const *pch16,
    unsigned cch16,
    unsigned char *pch8,
    unsigned cch8)
{
    unsigned ich8 = 0;
    unsigned ich16 = 0;

    // Since we never get cch16 > 65535, we can safely skip testing for overflow of ich8.
    assert(cch16 <= (0xFFFFFFFF / 3));

    while (ich16 != cch16)
    {
        // Note that this algorithm accepts unmatched surrogate pairs.
        // That's probably the right decision for logging - we want to preserve
        // them so they can be noticed and fixed.
        unsigned val16 = pch16[ich16];
        ich16 += 1;
        if (caa_likely(val16 < 0x80))
        {
            if (caa_unlikely(ich8 == cch8))
                break;
            pch8[ich8++] = (unsigned char)val16;
        }
        else if (caa_likely(val16 < 0x800))
        {
            if (caa_unlikely(ich8 + 1 >= cch8))
                break;
            pch8[ich8++] = (unsigned char)(((val16 >> 6)) | 0xc0);
            pch8[ich8++] = (unsigned char)(((val16)&0x3f) | 0x80);
        }
        else if (
            0xd800 <= val16 && val16 < 0xdc00 &&
            ich16 != cch16 &&
            0xdc00 <= pch16[ich16] && pch16[ich16] < 0xe000)
        {
            // Valid surrogate pair.
            if (caa_unlikely(ich8 + 3 >= cch8))
                break;
            val16 = 0x010000u + (((val16 - 0xd800u) << 10) | (pch16[ich16] - 0xdc00u));
            ich16 += 1;
            pch8[ich8++] = (unsigned char)(((val16 >> 18)) | 0xf0);
            pch8[ich8++] = (unsigned char)(((val16 >> 12) & 0x3f) | 0x80);
            pch8[ich8++] = (unsigned char)(((val16 >> 6) & 0x3f) | 0x80);
            pch8[ich8++] = (unsigned char)(((val16)&0x3f) | 0x80);
        }
        else
        {
            if (caa_unlikely(ich8 + 2 >= cch8))
                break;
            pch8[ich8++] = (unsigned char)(((val16 >> 12)) | 0xe0);
            pch8[ich8++] = (unsigned char)(((val16 >> 6) & 0x3f) | 0x80);
            pch8[ich8++] = (unsigned char)(((val16)&0x3f) | 0x80);
        }
    }

    return ich8;
}