static unsigned Utf16ToUtf8Size()

in src/LttngHelpers.c [135:179]


static unsigned Utf16ToUtf8Size(
    char16_t const *pch16,
    unsigned cch16)
    lttng_ust_notrace;
static unsigned Utf16ToUtf8Size(
    char16_t const *pch16,
    unsigned cch16)
{
    unsigned ich8 = 0;
    unsigned ich16 = 0;

    // Since we never get cch16 > 65535, we can safely skip testing for overflow of ich8.
    assert(cch16 <= (0xFFFFFFFF / 3));

    while (ich16 != cch16)
    {
        // Note that this algorithm accepts unmatched surrogate pairs.
        // That's probably the right decision for logging - we want to preserve
        // them so they can be noticed and fixed.
        unsigned val16 = pch16[ich16];
        ich16 += 1;
        if (caa_likely(val16 < 0x80))
        {
            ich8 += 1;
        }
        else if (caa_likely(val16 < 0x800))
        {
            ich8 += 2;
        }
        else if (
            0xd800 <= val16 && val16 < 0xdc00 &&
            ich16 != cch16 &&
            0xdc00 <= pch16[ich16] && pch16[ich16] < 0xe000)
        {
            // Valid surrogate pair.
            ich16 += 1;
            ich8 += 4;
        }
        else
        {
            ich8 += 3;
        }
    }
    return ich8;
}