in source/shared/localizationimpl.cpp [542:666]
size_t SystemLocale::Utf8To16Strict( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode )
{
const unsigned char *usrc = reinterpret_cast<const unsigned char*>(src);
const unsigned char *srcEnd = usrc + cchSrc;
const WCHAR *destEnd = dest + cchDest;
DWORD dummyError;
if (!pErrorCode)
{
pErrorCode = &dummyError;
}
*pErrorCode = 0;
while(usrc < srcEnd && dest < destEnd)
{
DWORD ucode = *usrc++;
if(ucode <= 127) // Most common case for ASCII
{
*dest++ = ucode;
}
else if(ucode < 0xC0) // unexpected trailing byte 10xxxxxx
{
goto Invalid;
}
else if(ucode < 0xE0) // 110abcde 10fghijk
{
if (usrc >= srcEnd || *usrc < 0x80 || *usrc > 0xBF ||
(*dest = (ucode & 0x1F)<<6 | (*usrc++ & 0x3F)) < 0x80)
{
goto Invalid;
}
dest++;
}
else if(ucode < 0xF0) // 1110abcd 10efghij 10klmnop
{
if (usrc >= srcEnd)
{
goto Invalid;
}
DWORD c1 = *usrc;
if (c1 < 0x80 || c1 > 0xBF)
{
goto Invalid;
}
usrc++;
if (usrc >= srcEnd)
{
goto Invalid;
}
DWORD c2 = *usrc;
if (c2 < 0x80 || c2 > 0xBF)
{
goto Invalid;
}
usrc++;
ucode = (ucode&15)<<12 | (c1&0x3F)<<6 | (c2&0x3F);
if (ucode < 0x800 || (ucode >= 0xD800 && ucode <= 0xDFFF))
{
goto Invalid;
}
*dest++ = ucode;
}
else if(ucode < 0xF8) // 11110abc 10defghi 10jklmno 10pqrstu
{
if (usrc >= srcEnd)
{
goto Invalid;
}
DWORD c1 = *usrc;
if (c1 < 0x80 || c1 > 0xBF)
{
goto Invalid;
}
usrc++;
if (usrc >= srcEnd)
{
goto Invalid;
}
DWORD c2 = *usrc;
if (c2 < 0x80 || c2 > 0xBF)
{
goto Invalid;
}
usrc++;
if (usrc >= srcEnd)
{
goto Invalid;
}
DWORD c3 = *usrc;
if (c3 < 0x80 || c3 > 0xBF)
{
goto Invalid;
}
usrc++;
ucode = (ucode&7)<<18 | (c1&0x3F)<<12 | (c2&0x3F)<<6 | (c3&0x3F);
if (ucode < 0x10000 // overlong encoding
|| ucode > 0x10FFFF // exceeds Unicode range
|| (ucode >= 0xD800 && ucode <= 0xDFFF)) // surrogate pairs
{
goto Invalid;
}
if (dest >= destEnd - 1)
{
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
return cchDest - (destEnd - dest);
}
ucode -= 0x10000;
// Lead surrogate
*dest++ = 0xD800 + (ucode >> 10);
// Trail surrogate
*dest++ = 0xDC00 + (ucode & 0x3FF);
}
else // invalid
{
Invalid:
*pErrorCode = ERROR_NO_UNICODE_TRANSLATION;
return 0 ;
}
}
if (!*pErrorCode)
{
*pErrorCode = (dest == destEnd && usrc != srcEnd) ? ERROR_INSUFFICIENT_BUFFER : ERROR_SUCCESS;
}
return cchDest - (destEnd - dest);
}