size_t SystemLocale::Utf8To16()

in source/shared/localizationimpl.cpp [417:540]


size_t SystemLocale::Utf8To16( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode )
{
    const unsigned char *usrc = reinterpret_cast<const unsigned char*>(src);
    const unsigned char *srcEnd = usrc + cchSrc;
    const WCHAR *destEnd = dest + cchDest;
    DWORD dummyError;
    if (!pErrorCode)
    {
        pErrorCode = &dummyError;
    }
    *pErrorCode = 0;

    while(usrc < srcEnd && dest < destEnd)
    {
        DWORD ucode = *usrc++;
        if(ucode <= 127) // Most common case for ASCII
        {
            *dest++ = ucode;
        }
        else if(ucode < 0xC0) // unexpected trailing byte 10xxxxxx
        {
            goto Invalid;
        }
        else if(ucode < 0xE0) // 110abcde 10fghijk
        {
            if (usrc >= srcEnd || *usrc < 0x80 || *usrc > 0xBF ||
                (*dest = (ucode & 0x1F)<<6 | (*usrc++ & 0x3F)) < 0x80)
            {
                *dest = 0xFFFD;
            }
            dest++;
        }
        else if(ucode < 0xF0) // 1110abcd 10efghij 10klmnop
        {
            if (usrc >= srcEnd)
            {
                goto Invalid;
            }
            DWORD c1 = *usrc;
            if (c1 < 0x80 || c1 > 0xBF)
            {
                goto Invalid;
            }
            usrc++;
            if (usrc >= srcEnd)
            {
                goto Invalid;
            }
            DWORD c2 = *usrc;
            if (c2 < 0x80 || c2 > 0xBF)
            {
                goto Invalid;
            }
            usrc++;
            ucode = (ucode&15)<<12 | (c1&0x3F)<<6 | (c2&0x3F);
            if (ucode < 0x800 || (ucode >= 0xD800 && ucode <= 0xDFFF))
            {
                goto Invalid;
            }
            *dest++ = ucode;
        }
        else if(ucode < 0xF8) // 11110abc 10defghi 10jklmno 10pqrstu
        {
            if (usrc >= srcEnd)
            {
                goto Invalid;
            }
            DWORD c1 = *usrc;
            if (c1 < 0x80 || c1 > 0xBF)
            {
                goto Invalid;
            }
            usrc++;
            if (usrc >= srcEnd)
            {
                goto Invalid;
            }
            DWORD c2 = *usrc;
            if (c2 < 0x80 || c2 > 0xBF)
            {
                goto Invalid;
            }
            usrc++;
            if (usrc >= srcEnd)
            {
                goto Invalid;
            }
            DWORD c3 = *usrc;
            if (c3 < 0x80 || c3 > 0xBF)
            {
                goto Invalid;
            }
            usrc++;
            ucode = (ucode&7)<<18 | (c1&0x3F)<<12 | (c2&0x3F)<<6 | (c3&0x3F);

            if (ucode < 0x10000   // overlong encoding
             || ucode > 0x10FFFF  // exceeds Unicode range
             || (ucode >= 0xD800 && ucode <= 0xDFFF)) // surrogate pairs
            {
                goto Invalid;
            }
            if (dest >= destEnd - 1)
            {
                *pErrorCode = ERROR_INSUFFICIENT_BUFFER;
                return cchDest - (destEnd - dest);
            }
            ucode -= 0x10000;
            // Lead surrogate
            *dest++ = 0xD800 + (ucode >> 10);
            // Trail surrogate
            *dest++ = 0xDC00 + (ucode & 0x3FF);
        }
        else // invalid
        {
        Invalid:
            *dest++ = 0xFFFD;
        }
    }
    if (!*pErrorCode)
    {
        *pErrorCode = (dest == destEnd && usrc != srcEnd) ? ERROR_INSUFFICIENT_BUFFER : ERROR_SUCCESS;
    }
    return cchDest - (destEnd - dest);
}