in CoreFoundation/StringEncodings.subproj/CFStringEncodingConverter.c [651:836]
uint32_t CFStringEncodingUnicodeToBytes(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars, CFIndex *usedCharLen, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
if (encoding == kCFStringEncodingUTF8) {
static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
CFIndex convertedCharLen;
CFIndex usedLen;
if ((flags & kCFStringEncodingUseCanonical) || (flags & kCFStringEncodingUseHFSPlusCanonical)) {
(void)CFUniCharDecompose(characters, numChars, &convertedCharLen, (void *)bytes, maxByteLen, &usedLen, true, kCFUniCharUTF8Format, (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false));
} else {
if (!__CFToUTF8) {
const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
__CFToUTF8 = utf8Converter->toBytes.standard;
}
convertedCharLen = __CFToUTF8(0, characters, numChars, bytes, maxByteLen, &usedLen);
}
if (usedCharLen) *usedCharLen = convertedCharLen;
if (usedByteLen) *usedByteLen = usedLen;
if (convertedCharLen == numChars) {
return kCFStringEncodingConversionSuccess;
} else if ((maxByteLen > 0) && ((maxByteLen - usedLen) < 10)) { // could be filled outbuf
UTF16Char character = characters[convertedCharLen];
if (((character >= kSurrogateLowStart) && (character <= kSurrogateLowEnd)) || ((character >= kSurrogateHighStart) && (character <= kSurrogateHighEnd) && ((1 == (numChars - convertedCharLen)) || (characters[convertedCharLen + 1] < kSurrogateLowStart) || (characters[convertedCharLen + 1] > kSurrogateLowEnd)))) return kCFStringEncodingInvalidInputStream;
return kCFStringEncodingInsufficientOutputBufferLength;
} else {
return kCFStringEncodingInvalidInputStream;
}
} else {
const _CFEncodingConverter *converter = __CFGetConverter(encoding);
CFIndex usedLen = 0;
CFIndex localUsedByteLen;
CFIndex theUsedByteLen = 0;
uint32_t theResult = kCFStringEncodingConversionSuccess;
CFStringEncodingToBytesPrecomposeProc toBytesPrecompose = NULL;
CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar = NULL;
if (!converter) return kCFStringEncodingConverterUnavailable;
if (flags & kCFStringEncodingSubstituteCombinings) {
if (!(flags & kCFStringEncodingAllowLossyConversion)) isValidCombiningChar = converter->definition->isValidCombiningChar;
} else {
isValidCombiningChar = converter->definition->isValidCombiningChar;
if (!(flags & kCFStringEncodingIgnoreCombinings)) {
toBytesPrecompose = converter->definition->toBytesPrecompose;
flags |= kCFStringEncodingComposeCombinings;
}
}
#if TARGET_OS_MAC || TARGET_OS_WIN32 || TARGET_OS_LINUX
if (kCFStringEncodingConverterICU == converter->definition->encodingClass) return __CFStringEncodingICUToBytes((const char *)converter->toBytes, flags, characters, numChars, usedCharLen, bytes, maxByteLen, usedByteLen);
#endif
/* Platform converter */
if (kCFStringEncodingConverterPlatformSpecific == converter->definition->encodingClass) return __CFStringEncodingPlatformUnicodeToBytes(encoding, flags, characters, numChars, usedCharLen, bytes, maxByteLen, usedByteLen);
while ((usedLen < numChars) && (!maxByteLen || (theUsedByteLen < maxByteLen))) {
if ((usedLen += TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) < numChars) {
CFIndex dummy;
if (isValidCombiningChar && (usedLen > 0) && isValidCombiningChar(characters[usedLen])) {
if (toBytesPrecompose) {
CFIndex localUsedLen = usedLen;
while (usedLen > 0) {
usedLen -= 1;
if (usedLen < 0) {
theResult = kCFStringEncodingInvalidInputStream;
break;
}
if (!isValidCombiningChar(characters[usedLen])) {
break;
}
}
if (theResult == kCFStringEncodingInvalidInputStream) {
break;
}
theUsedByteLen += localUsedByteLen;
if (converter->definition->maxBytesPerChar > 1) {
TO_BYTE(converter, flags, characters + usedLen, localUsedLen - usedLen, NULL, 0, &localUsedByteLen);
theUsedByteLen -= localUsedByteLen;
} else {
theUsedByteLen--;
}
if ((localUsedLen = toBytesPrecompose(flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) > 0) {
usedLen += localUsedLen;
if ((usedLen < numChars) && isValidCombiningChar(characters[usedLen])) { // There is a non-base char not combined remaining
theUsedByteLen += localUsedByteLen;
theResult = kCFStringEncodingInvalidInputStream;
break;
}
} else if (flags & kCFStringEncodingAllowLossyConversion) {
uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
if (lossyByte) {
while (++usedLen < numChars) {
if (!isValidCombiningChar(characters[usedLen])) {
break;
}
}
localUsedByteLen = 1;
if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
} else {
++usedLen;
usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
}
} else {
theResult = kCFStringEncodingInvalidInputStream;
break;
}
} else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
theUsedByteLen += localUsedByteLen;
theResult = kCFStringEncodingInsufficientOutputBufferLength;
break;
} else if (flags & kCFStringEncodingIgnoreCombinings) {
while ((++usedLen < numChars) && isValidCombiningChar(characters[usedLen]));
} else {
uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
theUsedByteLen += localUsedByteLen;
if (lossyByte) {
++usedLen;
localUsedByteLen = 1;
if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
} else {
usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
}
}
} else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
theUsedByteLen += localUsedByteLen;
if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
CFIndex localUsedLen;
localUsedByteLen = 0;
// after the buffer is full, we still try out all the rest of the characters
// if all characters cannot be converted, we mark the result as insufficient output buffer
while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) {
if (localUsedByteLen == 0) {
usedLen += localUsedLen;
}
}
}
if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
break;
} else if (flags & kCFStringEncodingAllowLossyConversion) {
uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
theUsedByteLen += localUsedByteLen;
if (lossyByte) {
++usedLen;
localUsedByteLen = 1;
if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
} else {
usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
}
} else {
theUsedByteLen += localUsedByteLen;
theResult = kCFStringEncodingInvalidInputStream;
break;
}
}
theUsedByteLen += localUsedByteLen;
}
if (usedLen < numChars && maxByteLen && theResult == kCFStringEncodingConversionSuccess) {
if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
CFIndex localUsedLen;
localUsedByteLen = 0;
while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) {
if (!localUsedByteLen) {
usedLen += localUsedLen;
}
}
}
if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
}
if (usedByteLen) *usedByteLen = theUsedByteLen;
if (usedCharLen) *usedCharLen = usedLen;
return theResult;
}
}