size_t SystemLocale::Utf8From16()

in source/shared/localizationimpl.cpp [731:868]


size_t SystemLocale::Utf8From16( const WCHAR *src, SSIZE_T cchSrc, char *dest, size_t cchDest, DWORD *pErrorCode )
{
    const WCHAR *srcEnd = src + cchSrc;
    char *destEnd = dest + cchDest;
    DWORD dummyError;
    if (!pErrorCode)
    {
        pErrorCode = &dummyError;
    }
    *pErrorCode = 0;

    // null dest is a special mode to calculate the output size required.
    if (!dest)
    {
        size_t cbOut = 0;
        while (src < srcEnd)
        {
            DWORD wch = *src++;
            if (wch < 128) // most common case.
            {
                cbOut++;
            }
            else if (wch < 0x800) // 127 to 2047: 2 bytes
            {
                cbOut += 2;
            }
            else if (wch < 0xD800 || wch > 0xDFFF) // 2048 to 55295 and 57344 to 65535: 3 bytes
            {
                cbOut += 3;
            }
            else if (wch < 0xDC00) // 65536 to end of Unicode: 4 bytes
            {
                if (src >= srcEnd)
                {
                    cbOut += 3; // lone surrogate at end
                }
                else if (*src < 0xDC00 || *src > 0xDFFF)
                {
                    cbOut += 3; // low surrogate not followed by high
                }
                else
                {
                    cbOut += 4;
                }
            }
            else // unexpected trail surrogate
            {
                cbOut += 3;
            }
        }
        return cbOut;
    }
    while ( src < srcEnd && dest < destEnd )
    {
        DWORD wch = *src++;
        if (wch < 128) // most common case.
        {
            *dest++ = wch;
        }
        else if (wch < 0x800) // 127 to 2047: 2 bytes
        {
            if (destEnd - dest < 2)
            {
                *pErrorCode = ERROR_INSUFFICIENT_BUFFER;
                return 0;
            }
            *dest++ = 0xC0 | (wch >> 6);
            *dest++ = 0x80 | (wch & 0x3F);
        }
        else if (wch < 0xD800 || wch > 0xDFFF) // 2048 to 55295 and 57344 to 65535: 3 bytes
        {
            if (destEnd - dest < 3)
            {
                *pErrorCode = ERROR_INSUFFICIENT_BUFFER;
                return 0;
            }
            *dest++ = 0xE0 | (wch >> 12);
            *dest++ = 0x80 | ((wch >> 6)&0x3F);
            *dest++ = 0x80 | (wch &0x3F);
        }
        else if (wch < 0xDC00) // 65536 to end of Unicode: 4 bytes
        {
            if (src >= srcEnd)
            {
                *pErrorCode = ERROR_NO_UNICODE_TRANSLATION; // lone surrogate at end
                if (destEnd - dest < 3)
                {
                    *pErrorCode = ERROR_INSUFFICIENT_BUFFER;
                    return 0;
                }
                *dest++ = 0xEF;
                *dest++ = 0xBF;
                *dest++ = 0xBD;
                continue;
            }
            if (*src < 0xDC00 || *src > 0xDFFF)
            {
                // low surrogate not followed by high
                if (destEnd - dest < 3)
                {
                    *pErrorCode = ERROR_INSUFFICIENT_BUFFER;
                    return 0;
                }
                *dest++ = 0xEF;
                *dest++ = 0xBF;
                *dest++ = 0xBD;
                continue;
            }
            wch = 0x10000 + ((wch - 0xD800)<<10) + *src++ - 0xDC00;
            if (destEnd - dest < 4)
            {
                *pErrorCode = ERROR_INSUFFICIENT_BUFFER;
                return 0;
            }
            *dest++ = 0xF0 | (wch >> 18);
            *dest++ = 0x80 | ((wch >>12)&0x3F);
            *dest++ = 0x80 | ((wch >> 6)&0x3F);
            *dest++ = 0x80 | (wch&0x3F);
        }
        else // unexpected trail surrogate
        {
            *pErrorCode = ERROR_NO_UNICODE_TRANSLATION; // lone surrogate at end
            if (destEnd - dest < 3)
            {
                *pErrorCode = ERROR_INSUFFICIENT_BUFFER;
                return 0;
            }
            *dest++ = 0xEF;
            *dest++ = 0xBF;
            *dest++ = 0xBD;
        }
    }
    if (!*pErrorCode)
    {
        *pErrorCode = (dest == destEnd && src != srcEnd) ? ERROR_INSUFFICIENT_BUFFER : ERROR_SUCCESS;
    }
    return *pErrorCode == ERROR_INSUFFICIENT_BUFFER ? 0 : cchDest - (destEnd - dest);
}