CFIndex __CFStringEncodeByteStream()

in Frameworks/CoreFoundation/String.subproj/CFStringEncodings.c [493:800]


CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, uint8_t lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {
    CFIndex totalBytesWritten = 0;  /* Number of written bytes */
    CFIndex numCharsProcessed = 0;  /* Number of processed chars */
    const UniChar *unichars;

    if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {
        static CFStringEncodingToBytesProc __CFToUTF8 = NULL;

        if (!__CFToUTF8) {
            const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
            __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
        }
        numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);

    } else if (encoding == kCFStringEncodingNonLossyASCII) {
    const char *hex = "0123456789abcdef";
    UniChar ch;
    CFStringInlineBuffer buf;
    CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
    while (numCharsProcessed < rangeLen) {
        CFIndex reqLength; /* Required number of chars to encode this UniChar */
        CFIndex cnt;
        char tmp[6];
        ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
        if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {
        reqLength = 1;
        tmp[0] = (char)ch;
        } else {
        if (ch == '\\') {
            tmp[1] = '\\';
            reqLength = 2;
        } else if (ch < 256) {  /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
            tmp[1] = '0' + (ch >> 6);
            tmp[2] = '0' + ((ch >> 3) & 7);
            tmp[3] = '0' + (ch & 7);
            reqLength = 4;
        } else {    /* \Unnnn */
            tmp[1] = 'u'; // Changed to small+u in order to be aligned with Java
            tmp[2] = hex[(ch >> 12) & 0x0f];
            tmp[3] = hex[(ch >> 8) & 0x0f];
            tmp[4] = hex[(ch >> 4) & 0x0f];
            tmp[5] = hex[ch & 0x0f];
            reqLength = 6;
        }
        tmp[0] = '\\';
        }
            if (buffer) {
                if (totalBytesWritten + reqLength > max) break; /* Doesn't fit..
.*/
                for (cnt = 0; cnt < reqLength; cnt++) {
                    buffer[totalBytesWritten + cnt] = tmp[cnt];
                }
            }
        totalBytesWritten += reqLength;
        numCharsProcessed++;
    }
    } else if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) {
    CFIndex extraForBOM = (generatingExternalFile && (encoding == kCFStringEncodingUTF16) ? sizeof(UniChar) : 0);
        numCharsProcessed = rangeLen;
        if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
            numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
        }
        totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
    if (buffer) {
        if (extraForBOM) {  /* Generate BOM */
#if __CF_BIG_ENDIAN__
        *buffer++ = 0xfe; *buffer++ = 0xff;
#else
        *buffer++ = 0xff; *buffer++ = 0xfe;
#endif
        }
        CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
            if ((__CF_BIG_ENDIAN__ ?  kCFStringEncodingUTF16LE : kCFStringEncodingUTF16BE) == encoding) { // Need to swap
                UTF16Char *characters = (UTF16Char *)buffer;
                const UTF16Char *limit = characters + numCharsProcessed;

                while (characters < limit) {
                    *characters = CFSwapInt16(*characters);
                    ++characters;
                }
            }
    }
    } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
        UTF32Char character;
        CFStringInlineBuffer buf;
        UTF32Char *characters = (UTF32Char *)buffer;

        bool swap = (encoding == (__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF32LE : kCFStringEncodingUTF32BE) ? true : false);
        if (generatingExternalFile && (encoding == kCFStringEncodingUTF32)) {
            totalBytesWritten += sizeof(UTF32Char);
            if (characters) {
                if (totalBytesWritten > max) { // insufficient buffer
                    totalBytesWritten = 0;
                } else {
                    *(characters++) = 0x0000FEFF;
                }
            }
        }

        CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
        while (numCharsProcessed < rangeLen) {
            character = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);

            if (CFUniCharIsSurrogateHighCharacter(character)) {
                UTF16Char otherCharacter;

                if (((numCharsProcessed + 1) < rangeLen) && CFUniCharIsSurrogateLowCharacter((otherCharacter = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed + 1)))) {
                    character = CFUniCharGetLongCharacterForSurrogatePair(character, otherCharacter);
                } else if (lossByte) {
                    character = lossByte;
                } else {
                    break;
                }
            } else if (CFUniCharIsSurrogateLowCharacter(character)) {
                if (lossByte) {
                    character = lossByte;
                } else {
                    break;
                }
            }

            totalBytesWritten += sizeof(UTF32Char);

            if (characters) {
                if (totalBytesWritten > max) {
                    totalBytesWritten -= sizeof(UTF32Char);
                    break;
                }
                *(characters++) = (swap ? CFSwapInt32(character) : character);
            }

            numCharsProcessed += (character > 0xFFFF ? 2 : 1);
        }
    } else {
        CFIndex numChars;
        UInt32 flags;
        const unsigned char *cString = NULL;
        Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);

        if (!CFStringEncodingIsValidEncoding(encoding)) return 0;

        // WINOBJC: Don't check for OBJC. The check isn't needed because the bridging strategy for WinObjC ensures eventually
        // the real CF object is used, ending any recursion.
        // if (!CF_IS_OBJC(CFStringGetTypeID(), string) && isASCIISuperset) { // Checking for NSString to avoid infinite recursion
        if (isASCIISuperset) {
            const unsigned char *ptr;
            if ((cString = (const unsigned char *)CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
                ptr = (cString += rangeLoc);
                if (__CFStringGetEightBitStringEncoding() == encoding) {
                    numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
                    if (buffer) memmove(buffer, cString, numCharsProcessed);
                    if (usedBufLen) *usedBufLen = numCharsProcessed;
                    return numCharsProcessed;
                }
        
                CFIndex uninterestingTailLen = buffer ? (rangeLen - MIN(max, rangeLen)) : 0;
                while (*ptr < 0x80 && rangeLen > uninterestingTailLen) {
                    ++ptr;
                    --rangeLen;
                }
                numCharsProcessed = ptr - cString;
                if (buffer) {
                    numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
                    memmove(buffer, cString, numCharsProcessed);
                    buffer += numCharsProcessed;
            max -= numCharsProcessed;
                }
                if (!rangeLen || (buffer && (max == 0))) {
                    if (usedBufLen) *usedBufLen = numCharsProcessed;
                    return numCharsProcessed;
                }
                rangeLoc += numCharsProcessed;
                totalBytesWritten += numCharsProcessed;
            }
            if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
                ptr = (cString += (rangeLoc + 1));
                if (__CFStringGetEightBitStringEncoding() == encoding) {
                    numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
                    if (buffer) memmove(buffer, cString, numCharsProcessed);
                    if (usedBufLen) *usedBufLen = numCharsProcessed;
                    return numCharsProcessed;
                }
                while (*ptr < 0x80 && rangeLen > 0) {
                    ++ptr;
                    --rangeLen;
                }
                numCharsProcessed = ptr - cString;
                if (buffer) {
                    numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
                    memmove(buffer, cString, numCharsProcessed);
                    buffer += numCharsProcessed;
            max -= numCharsProcessed;
                }
                if (!rangeLen || (buffer && (max == 0))) {
                    if (usedBufLen) *usedBufLen = numCharsProcessed;
                    return numCharsProcessed;
                }
                rangeLoc += numCharsProcessed;
                totalBytesWritten += numCharsProcessed;
            }
        }

        if (!buffer) max = 0;

        // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
        // Aki 11/24/04 __CFGetASCIICompatibleFlag() is called only for non-ASCII superset encodings. Otherwise, it could lead to a deadlock (see 3890536).
        flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | (isASCIISuperset ? 0 : __CFGetASCIICompatibleFlag());

        if (!cString && (cString = (const unsigned char *)CFStringGetCharactersPtr(string))) { // Must be Unicode string
            CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar *)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);
        } else {
            UniChar charBuf[kCFCharConversionBufferLength];
            CFIndex currentLength;
            CFIndex usedLen;
            CFIndex lastUsedLen = 0, lastNumChars = 0;
            uint32_t result;
            uint32_t streamingMask;
            uint32_t streamID = 0;
#define MAX_DECOMP_LEN (6)

            while (rangeLen > 0) {
                currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);
                CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);

                // could be in the middle of surrogate pair; back up.
                if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;

                streamingMask = ((rangeLen > currentLength) ? kCFStringEncodingPartialInput : 0)|CFStringEncodingStreamIDToMask(streamID);

                result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, buffer, max, &usedLen);
                streamID = CFStringEncodingStreamIDFromMask(result);
                result &= ~CFStringEncodingStreamIDMask;

                if (result != kCFStringEncodingConversionSuccess) {
                    if (kCFStringEncodingInvalidInputStream == result) {
                        CFRange composedRange;
                        // Check the tail
                        if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {
                            composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);
                            
                            if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {
                                result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);
                                streamID = CFStringEncodingStreamIDFromMask(result);
                                result &= ~CFStringEncodingStreamIDMask;
                            }
                        }
                        
                        // Check the head
                        if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {
                            composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);
                            
                            if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {
                                // Try if the composed range can be converted
                                CFStringGetCharacters(string, composedRange, charBuf);
                                
                                if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
                                    CFIndex lastRangeLoc = rangeLoc - lastNumChars;
                                    
                                    currentLength = composedRange.location - lastRangeLoc;
                                    CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);

                                    result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen);
                                    streamID = CFStringEncodingStreamIDFromMask(result);
                                    result &= ~CFStringEncodingStreamIDMask;

                                    if (result == kCFStringEncodingConversionSuccess) { // OK let's try the last run
                                        // Looks good. back up
                                        totalBytesWritten -= lastUsedLen;
                                        numCharsProcessed -= lastNumChars;
                                        
                                        rangeLoc = lastRangeLoc;
                                        rangeLen += lastNumChars;
                                        
                                        if (max) {
                                            buffer -= lastUsedLen;
                                            max += lastUsedLen;
                                        }
                                    }
                                }
                            }
                        }
                    }
                    
                    if (kCFStringEncodingConversionSuccess != result) { // really failed
                        totalBytesWritten += usedLen;
                        numCharsProcessed += numChars;
                        break;
                    }
                }
                
                totalBytesWritten += usedLen;
                numCharsProcessed += numChars;

                rangeLoc += numChars;
                rangeLen -= numChars;
                if (max) {
                    buffer += usedLen;
                    max -= usedLen;
                    if (max <= 0) break;
                }
                lastUsedLen = usedLen; lastNumChars = numChars;
                flags &= ~kCFStringEncodingPrependBOM;
            }
        }
    }
    if (usedBufLen) *usedBufLen = totalBytesWritten;
    return numCharsProcessed;
}