uint32_t CFStringEncodingUnicodeToBytes()

in CoreFoundation/StringEncodings.subproj/CFStringEncodingConverter.c [651:836]


uint32_t CFStringEncodingUnicodeToBytes(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars, CFIndex *usedCharLen, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
    if (encoding == kCFStringEncodingUTF8) {
        static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
        CFIndex convertedCharLen;
        CFIndex usedLen;


        if ((flags & kCFStringEncodingUseCanonical) || (flags & kCFStringEncodingUseHFSPlusCanonical)) {
            (void)CFUniCharDecompose(characters, numChars, &convertedCharLen, (void *)bytes, maxByteLen, &usedLen, true, kCFUniCharUTF8Format, (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false));
        } else {
            if (!__CFToUTF8) {
                const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
                __CFToUTF8 = utf8Converter->toBytes.standard;
            }
            convertedCharLen = __CFToUTF8(0, characters, numChars, bytes, maxByteLen, &usedLen);
        }
        if (usedCharLen) *usedCharLen = convertedCharLen;
        if (usedByteLen) *usedByteLen = usedLen;

        if (convertedCharLen == numChars) {
            return kCFStringEncodingConversionSuccess;
        } else if ((maxByteLen > 0) && ((maxByteLen - usedLen) < 10)) { // could be filled outbuf
            UTF16Char character = characters[convertedCharLen];
            
            if (((character >= kSurrogateLowStart) && (character <= kSurrogateLowEnd)) || ((character >= kSurrogateHighStart) && (character <= kSurrogateHighEnd) && ((1 == (numChars - convertedCharLen)) || (characters[convertedCharLen + 1] < kSurrogateLowStart) || (characters[convertedCharLen + 1] > kSurrogateLowEnd)))) return kCFStringEncodingInvalidInputStream;
            
            return kCFStringEncodingInsufficientOutputBufferLength;
        } else {
            return kCFStringEncodingInvalidInputStream;
        }
    } else {
        const _CFEncodingConverter *converter = __CFGetConverter(encoding);
        CFIndex usedLen = 0;
        CFIndex localUsedByteLen;
        CFIndex theUsedByteLen = 0;
        uint32_t theResult = kCFStringEncodingConversionSuccess;
        CFStringEncodingToBytesPrecomposeProc toBytesPrecompose = NULL;
        CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar = NULL;

        if (!converter) return kCFStringEncodingConverterUnavailable;

        if (flags & kCFStringEncodingSubstituteCombinings) {
            if (!(flags & kCFStringEncodingAllowLossyConversion)) isValidCombiningChar = converter->definition->isValidCombiningChar;
       } else {
            isValidCombiningChar = converter->definition->isValidCombiningChar;
            if (!(flags & kCFStringEncodingIgnoreCombinings)) {
                toBytesPrecompose = converter->definition->toBytesPrecompose;
                flags |= kCFStringEncodingComposeCombinings;
            }
        }

#if TARGET_OS_MAC || TARGET_OS_WIN32 || TARGET_OS_LINUX
        if (kCFStringEncodingConverterICU == converter->definition->encodingClass) return __CFStringEncodingICUToBytes((const char *)converter->toBytes, flags, characters, numChars, usedCharLen, bytes, maxByteLen, usedByteLen);
#endif

        /* Platform converter */
        if (kCFStringEncodingConverterPlatformSpecific == converter->definition->encodingClass) return __CFStringEncodingPlatformUnicodeToBytes(encoding, flags, characters, numChars, usedCharLen, bytes, maxByteLen, usedByteLen);

        while ((usedLen < numChars) && (!maxByteLen || (theUsedByteLen < maxByteLen))) {
            if ((usedLen += TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) < numChars) {
                CFIndex dummy;

                if (isValidCombiningChar && (usedLen > 0) && isValidCombiningChar(characters[usedLen])) {
                    if (toBytesPrecompose) {
                        CFIndex localUsedLen = usedLen;

                        while (usedLen > 0) {
                            usedLen -= 1;
                            if (usedLen < 0) {
                                theResult = kCFStringEncodingInvalidInputStream;
                                break;
                            }
                            if (!isValidCombiningChar(characters[usedLen])) {
                                break;
                            }
                        }
                        if (theResult == kCFStringEncodingInvalidInputStream) {
                            break;
                        }
                        theUsedByteLen += localUsedByteLen;
                        if (converter->definition->maxBytesPerChar > 1) {
                            TO_BYTE(converter, flags, characters + usedLen, localUsedLen - usedLen, NULL, 0, &localUsedByteLen);
                            theUsedByteLen -= localUsedByteLen;
                        } else {
                            theUsedByteLen--;
                        }
                        if ((localUsedLen = toBytesPrecompose(flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) > 0) {
                            usedLen += localUsedLen;
                            if ((usedLen < numChars) && isValidCombiningChar(characters[usedLen])) { // There is a non-base char not combined remaining
                                theUsedByteLen += localUsedByteLen;
                                theResult = kCFStringEncodingInvalidInputStream;
                                break;
                            }
                        } else if (flags & kCFStringEncodingAllowLossyConversion) {
                            uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);

                            if (lossyByte) {
                                while (++usedLen < numChars) {
                                    if (!isValidCombiningChar(characters[usedLen])) {
                                        break;
                                    }
                                }
                                localUsedByteLen = 1;
                                if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
                            } else {
                                ++usedLen;
                                usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
                            }
                        } else {
                            theResult = kCFStringEncodingInvalidInputStream;
                            break;
                        }
                    } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
                                    theUsedByteLen += localUsedByteLen;
                                    theResult = kCFStringEncodingInsufficientOutputBufferLength;
                                    break;
                    } else if (flags & kCFStringEncodingIgnoreCombinings) {
                        while ((++usedLen < numChars) && isValidCombiningChar(characters[usedLen]));
                    } else {
                        uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);

                        theUsedByteLen += localUsedByteLen;
                        if (lossyByte) {
                            ++usedLen;
                            localUsedByteLen = 1;
                            if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
                        } else {
                            usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
                        }
                    }
                } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
                    theUsedByteLen += localUsedByteLen;

                    if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
                        CFIndex localUsedLen;

                        localUsedByteLen = 0;
                        // after the buffer is full, we still try out all the rest of the characters
                        // if all characters cannot be converted, we mark the result as insufficient output buffer
                        while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) {
                            if (localUsedByteLen == 0) {
                                usedLen += localUsedLen;
                            }
                        }
                    }
                    if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
                    break;
                } else if (flags & kCFStringEncodingAllowLossyConversion) {
                    uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);

                    theUsedByteLen += localUsedByteLen;
                    if (lossyByte) {
                        ++usedLen;
                        localUsedByteLen = 1;
                        if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
                    } else {
                        usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
                    }
                } else {
                    theUsedByteLen += localUsedByteLen;
                    theResult = kCFStringEncodingInvalidInputStream;
                    break;
                }
            }
            theUsedByteLen += localUsedByteLen;
        }

        if (usedLen < numChars && maxByteLen && theResult == kCFStringEncodingConversionSuccess) {
            if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
                CFIndex localUsedLen;

                localUsedByteLen = 0;
                while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) {
                    if (!localUsedByteLen) {
                        usedLen += localUsedLen;
                    }
                }
            }
            if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
        }
        if (usedByteLen) *usedByteLen = theUsedByteLen;
        if (usedCharLen) *usedCharLen = usedLen;

        return theResult;
    }
}