in Frameworks/CoreFoundation/String.subproj/CFStringEncodings.c [117:478]
Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {
CFIndex idx;
const uint8_t *chars = (const uint8_t *)bytes;
const uint8_t *end = chars + len;
Boolean result = TRUE;
if (useClientsMemoryPtr) *useClientsMemoryPtr = false;
buffer->isASCII = !alwaysUnicode;
buffer->shouldFreeChars = false;
buffer->numChars = 0;
if (0 == len) return true;
buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());
if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) { // UTF-16
const UTF16Char *src = (const UTF16Char *)bytes;
const UTF16Char *limit = src + (len / sizeof(UTF16Char)); // <rdar://problem/7854378> avoiding odd len issue
bool swap = false;
if (kCFStringEncodingUTF16 == encoding) {
UTF16Char bom = ((*src == 0xFFFE) || (*src == 0xFEFF) ? *(src++) : 0);
#if __CF_BIG_ENDIAN__
if (bom == 0xFFFE) swap = true;
#else
if (bom != 0xFEFF) swap = true;
#endif
if (bom) useClientsMemoryPtr = NULL;
} else {
#if __CF_BIG_ENDIAN__
if (kCFStringEncodingUTF16LE == encoding) swap = true;
#else
if (kCFStringEncodingUTF16BE == encoding) swap = true;
#endif
}
buffer->numChars = limit - src;
if (useClientsMemoryPtr && !swap) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
*useClientsMemoryPtr = true;
buffer->chars.unicode = (UniChar *)src;
buffer->isASCII = false;
} else {
if (buffer->isASCII) { // Let's see if we can reduce the Unicode down to ASCII...
const UTF16Char *characters = src;
UTF16Char mask = (swap ? 0x80FF : 0xFF80);
while (characters < limit) {
if (*(characters++) & mask) {
buffer->isASCII = false;
break;
}
}
}
if (buffer->isASCII) {
uint8_t *dst;
if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
if (buffer->numChars > MAX_LOCAL_CHARS) {
buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
if (!buffer->chars.ascii) goto memoryErrorExit;
buffer->shouldFreeChars = true;
} else {
buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
}
}
dst = buffer->chars.ascii;
if (swap) {
while (src < limit) *(dst++) = (*(src++) >> 8);
} else {
while (src < limit) *(dst++) = (uint8_t)*(src++);
}
} else {
UTF16Char *dst;
if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
if (buffer->numChars > MAX_LOCAL_UNICHARS) {
buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
if (!buffer->chars.unicode) goto memoryErrorExit;
buffer->shouldFreeChars = true;
} else {
buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
}
}
dst = buffer->chars.unicode;
if (swap) {
while (src < limit) *(dst++) = CFSwapInt16(*(src++));
} else {
memmove(dst, src, buffer->numChars * sizeof(UTF16Char));
}
}
}
} else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
const UTF32Char *src = (const UTF32Char *)bytes;
const UTF32Char *limit = src + (len / sizeof(UTF32Char)); // <rdar://problem/7854378> avoiding odd len issue
bool swap = false;
static bool strictUTF32 = (bool)-1;
if ((bool)-1 == strictUTF32) strictUTF32 = (1 != 0);
if (kCFStringEncodingUTF32 == encoding) {
UTF32Char bom = ((*src == 0xFFFE0000) || (*src == 0x0000FEFF) ? *(src++) : 0);
#if __CF_BIG_ENDIAN__
if (bom == 0xFFFE0000) swap = true;
#else
if (bom != 0x0000FEFF) swap = true;
#endif
} else {
#if __CF_BIG_ENDIAN__
if (kCFStringEncodingUTF32LE == encoding) swap = true;
#else
if (kCFStringEncodingUTF32BE == encoding) swap = true;
#endif
}
buffer->numChars = limit - src;
{
// Let's see if we have non-ASCII or non-BMP
const UTF32Char *characters = src;
UTF32Char asciiMask = (swap ? 0x80FFFFFF : 0xFFFFFF80);
UTF32Char bmpMask = (swap ? 0x0000FFFF : 0xFFFF0000);
while (characters < limit) {
if (*characters & asciiMask) {
buffer->isASCII = false;
if (*characters & bmpMask) {
if (strictUTF32 && ((swap ? (UTF32Char)CFSwapInt32(*characters) : *characters) > 0x10FFFF)) return false; // outside of Unicode Scaler Value. Haven't allocated buffer, yet.
++(buffer->numChars);
}
}
++characters;
}
}
if (buffer->isASCII) {
uint8_t *dst;
if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
if (buffer->numChars > MAX_LOCAL_CHARS) {
buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
if (!buffer->chars.ascii) goto memoryErrorExit;
buffer->shouldFreeChars = true;
} else {
buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
}
}
dst = buffer->chars.ascii;
if (swap) {
while (src < limit) *(dst++) = (*(src++) >> 24);
} else {
while (src < limit) *(dst++) = *(src++);
}
} else {
if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
if (buffer->numChars > MAX_LOCAL_UNICHARS) {
buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
if (!buffer->chars.unicode) goto memoryErrorExit;
buffer->shouldFreeChars = true;
} else {
buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
}
}
result = (CFUniCharFromUTF32(src, limit - src, buffer->chars.unicode, (strictUTF32 ? false : true), __CF_BIG_ENDIAN__ ? !swap : swap) ? TRUE : FALSE);
}
} else if (kCFStringEncodingUTF8 == encoding) {
if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) { // If UTF8 BOM, skip
chars += 3;
len -= 3;
if (0 == len) return true;
}
if (buffer->isASCII) {
for (idx = 0; idx < len; idx++) {
if (128 <= chars[idx]) {
buffer->isASCII = false;
break;
}
}
}
if (buffer->isASCII) {
buffer->numChars = len;
buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
if (!buffer->chars.ascii) goto memoryErrorExit;
memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
} else {
CFIndex numDone;
static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
if (!__CFFromUTF8) {
const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
__CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
}
buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
if (!buffer->chars.unicode) goto memoryErrorExit;
buffer->numChars = 0;
while (chars < end) {
numDone = 0;
chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
if (0 == numDone) {
result = FALSE;
break;
}
buffer->numChars += numDone;
}
}
} else if (kCFStringEncodingNonLossyASCII == encoding) {
UTF16Char currentValue = 0;
uint8_t character;
int8_t mode = __NSNonLossyASCIIMode;
buffer->isASCII = false;
buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
if (!buffer->chars.unicode) goto memoryErrorExit;
buffer->numChars = 0;
while (chars < end) {
character = (*chars++);
switch (mode) {
case __NSNonLossyASCIIMode:
if (character == '\\') {
mode = __NSNonLossyBackslashMode;
} else if (character < 0x80) {
currentValue = character;
} else {
mode = __NSNonLossyErrorMode;
}
break;
case __NSNonLossyBackslashMode:
if ((character == 'U') || (character == 'u')) {
mode = __NSNonLossyHexInitialMode;
currentValue = 0;
} else if ((character >= '0') && (character <= '9')) {
mode = __NSNonLossyOctalInitialMode;
currentValue = character - '0';
} else if (character == '\\') {
mode = __NSNonLossyASCIIMode;
currentValue = character;
} else {
mode = __NSNonLossyErrorMode;
}
break;
default:
if (mode < __NSNonLossyHexFinalMode) {
if ((character >= '0') && (character <= '9')) {
currentValue = (currentValue << 4) | (character - '0');
if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
} else {
if (character >= 'a') character -= ('a' - 'A');
if ((character >= 'A') && (character <= 'F')) {
currentValue = (currentValue << 4) | ((character - 'A') + 10);
if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
} else {
mode = __NSNonLossyErrorMode;
}
}
} else {
if ((character >= '0') && (character <= '9')) {
currentValue = (currentValue << 3) | (character - '0');
if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
} else {
mode = __NSNonLossyErrorMode;
}
}
break;
}
if (mode == __NSNonLossyASCIIMode) {
buffer->chars.unicode[buffer->numChars++] = currentValue;
} else if (mode == __NSNonLossyErrorMode) {
break;
}
}
result = ((mode == __NSNonLossyASCIIMode) ? YES : NO);
} else {
const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
if (!converter) return false;
Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
if (!isASCIISuperset) buffer->isASCII = false;
if (buffer->isASCII) {
for (idx = 0; idx < len; idx++) {
if (128 <= chars[idx]) {
buffer->isASCII = false;
break;
}
}
}
if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
if (buffer->isASCII) {
buffer->numChars = len;
buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
if (!buffer->chars.ascii) goto memoryErrorExit;
memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
} else {
buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
if (!buffer->chars.unicode) goto memoryErrorExit;
buffer->numChars = len;
if (kCFStringEncodingASCII == encoding || kCFStringEncodingISOLatin1 == encoding) {
for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = (UniChar)chars[idx];
} else {
for (idx = 0; idx < len; idx++) {
if (chars[idx] < 0x80 && isASCIISuperset) {
buffer->chars.unicode[idx] = (UniChar)chars[idx];
} else if (!((CFStringEncodingCheapEightBitToUnicodeProc)converter->toUnicode)(0, chars[idx], buffer->chars.unicode + idx)) {
result = FALSE;
break;
}
}
}
}
} else {
if (buffer->isASCII) {
buffer->numChars = len;
buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
if (!buffer->chars.ascii) goto memoryErrorExit;
memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
} else {
CFIndex guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
static UInt32 lossyFlag = (UInt32)-1;
buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
if (!buffer->chars.unicode) goto memoryErrorExit;
if (lossyFlag == (UInt32)-1) lossyFlag = 0;
if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) result = FALSE;
}
}
}
if (FALSE == result) {
memoryErrorExit: // Added for <rdar://problem/6581621>, but it's not clear whether an exception would be a better option
result = FALSE; // In case we come here from a goto
if (buffer->shouldFreeChars && buffer->chars.unicode) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
buffer->isASCII = !alwaysUnicode;
buffer->shouldFreeChars = false;
buffer->chars.ascii = NULL;
buffer->numChars = 0;
}
return result;
}