in Frameworks/CoreFoundation/String.subproj/CFStringEncodings.c [493:800]
CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, uint8_t lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {
CFIndex totalBytesWritten = 0; /* Number of written bytes */
CFIndex numCharsProcessed = 0; /* Number of processed chars */
const UniChar *unichars;
if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {
static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
if (!__CFToUTF8) {
const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
__CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
}
numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);
} else if (encoding == kCFStringEncodingNonLossyASCII) {
const char *hex = "0123456789abcdef";
UniChar ch;
CFStringInlineBuffer buf;
CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
while (numCharsProcessed < rangeLen) {
CFIndex reqLength; /* Required number of chars to encode this UniChar */
CFIndex cnt;
char tmp[6];
ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {
reqLength = 1;
tmp[0] = (char)ch;
} else {
if (ch == '\\') {
tmp[1] = '\\';
reqLength = 2;
} else if (ch < 256) { /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
tmp[1] = '0' + (ch >> 6);
tmp[2] = '0' + ((ch >> 3) & 7);
tmp[3] = '0' + (ch & 7);
reqLength = 4;
} else { /* \Unnnn */
tmp[1] = 'u'; // Changed to small+u in order to be aligned with Java
tmp[2] = hex[(ch >> 12) & 0x0f];
tmp[3] = hex[(ch >> 8) & 0x0f];
tmp[4] = hex[(ch >> 4) & 0x0f];
tmp[5] = hex[ch & 0x0f];
reqLength = 6;
}
tmp[0] = '\\';
}
if (buffer) {
if (totalBytesWritten + reqLength > max) break; /* Doesn't fit..
.*/
for (cnt = 0; cnt < reqLength; cnt++) {
buffer[totalBytesWritten + cnt] = tmp[cnt];
}
}
totalBytesWritten += reqLength;
numCharsProcessed++;
}
} else if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) {
CFIndex extraForBOM = (generatingExternalFile && (encoding == kCFStringEncodingUTF16) ? sizeof(UniChar) : 0);
numCharsProcessed = rangeLen;
if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
}
totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
if (buffer) {
if (extraForBOM) { /* Generate BOM */
#if __CF_BIG_ENDIAN__
*buffer++ = 0xfe; *buffer++ = 0xff;
#else
*buffer++ = 0xff; *buffer++ = 0xfe;
#endif
}
CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
if ((__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF16LE : kCFStringEncodingUTF16BE) == encoding) { // Need to swap
UTF16Char *characters = (UTF16Char *)buffer;
const UTF16Char *limit = characters + numCharsProcessed;
while (characters < limit) {
*characters = CFSwapInt16(*characters);
++characters;
}
}
}
} else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
UTF32Char character;
CFStringInlineBuffer buf;
UTF32Char *characters = (UTF32Char *)buffer;
bool swap = (encoding == (__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF32LE : kCFStringEncodingUTF32BE) ? true : false);
if (generatingExternalFile && (encoding == kCFStringEncodingUTF32)) {
totalBytesWritten += sizeof(UTF32Char);
if (characters) {
if (totalBytesWritten > max) { // insufficient buffer
totalBytesWritten = 0;
} else {
*(characters++) = 0x0000FEFF;
}
}
}
CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
while (numCharsProcessed < rangeLen) {
character = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
if (CFUniCharIsSurrogateHighCharacter(character)) {
UTF16Char otherCharacter;
if (((numCharsProcessed + 1) < rangeLen) && CFUniCharIsSurrogateLowCharacter((otherCharacter = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed + 1)))) {
character = CFUniCharGetLongCharacterForSurrogatePair(character, otherCharacter);
} else if (lossByte) {
character = lossByte;
} else {
break;
}
} else if (CFUniCharIsSurrogateLowCharacter(character)) {
if (lossByte) {
character = lossByte;
} else {
break;
}
}
totalBytesWritten += sizeof(UTF32Char);
if (characters) {
if (totalBytesWritten > max) {
totalBytesWritten -= sizeof(UTF32Char);
break;
}
*(characters++) = (swap ? CFSwapInt32(character) : character);
}
numCharsProcessed += (character > 0xFFFF ? 2 : 1);
}
} else {
CFIndex numChars;
UInt32 flags;
const unsigned char *cString = NULL;
Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
if (!CFStringEncodingIsValidEncoding(encoding)) return 0;
// WINOBJC: Don't check for OBJC. The check isn't needed because the bridging strategy for WinObjC ensures eventually
// the real CF object is used, ending any recursion.
// if (!CF_IS_OBJC(CFStringGetTypeID(), string) && isASCIISuperset) { // Checking for NSString to avoid infinite recursion
if (isASCIISuperset) {
const unsigned char *ptr;
if ((cString = (const unsigned char *)CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
ptr = (cString += rangeLoc);
if (__CFStringGetEightBitStringEncoding() == encoding) {
numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
if (buffer) memmove(buffer, cString, numCharsProcessed);
if (usedBufLen) *usedBufLen = numCharsProcessed;
return numCharsProcessed;
}
CFIndex uninterestingTailLen = buffer ? (rangeLen - MIN(max, rangeLen)) : 0;
while (*ptr < 0x80 && rangeLen > uninterestingTailLen) {
++ptr;
--rangeLen;
}
numCharsProcessed = ptr - cString;
if (buffer) {
numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
memmove(buffer, cString, numCharsProcessed);
buffer += numCharsProcessed;
max -= numCharsProcessed;
}
if (!rangeLen || (buffer && (max == 0))) {
if (usedBufLen) *usedBufLen = numCharsProcessed;
return numCharsProcessed;
}
rangeLoc += numCharsProcessed;
totalBytesWritten += numCharsProcessed;
}
if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
ptr = (cString += (rangeLoc + 1));
if (__CFStringGetEightBitStringEncoding() == encoding) {
numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
if (buffer) memmove(buffer, cString, numCharsProcessed);
if (usedBufLen) *usedBufLen = numCharsProcessed;
return numCharsProcessed;
}
while (*ptr < 0x80 && rangeLen > 0) {
++ptr;
--rangeLen;
}
numCharsProcessed = ptr - cString;
if (buffer) {
numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
memmove(buffer, cString, numCharsProcessed);
buffer += numCharsProcessed;
max -= numCharsProcessed;
}
if (!rangeLen || (buffer && (max == 0))) {
if (usedBufLen) *usedBufLen = numCharsProcessed;
return numCharsProcessed;
}
rangeLoc += numCharsProcessed;
totalBytesWritten += numCharsProcessed;
}
}
if (!buffer) max = 0;
// Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
// Aki 11/24/04 __CFGetASCIICompatibleFlag() is called only for non-ASCII superset encodings. Otherwise, it could lead to a deadlock (see 3890536).
flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | (isASCIISuperset ? 0 : __CFGetASCIICompatibleFlag());
if (!cString && (cString = (const unsigned char *)CFStringGetCharactersPtr(string))) { // Must be Unicode string
CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar *)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);
} else {
UniChar charBuf[kCFCharConversionBufferLength];
CFIndex currentLength;
CFIndex usedLen;
CFIndex lastUsedLen = 0, lastNumChars = 0;
uint32_t result;
uint32_t streamingMask;
uint32_t streamID = 0;
#define MAX_DECOMP_LEN (6)
while (rangeLen > 0) {
currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);
CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);
// could be in the middle of surrogate pair; back up.
if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;
streamingMask = ((rangeLen > currentLength) ? kCFStringEncodingPartialInput : 0)|CFStringEncodingStreamIDToMask(streamID);
result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, buffer, max, &usedLen);
streamID = CFStringEncodingStreamIDFromMask(result);
result &= ~CFStringEncodingStreamIDMask;
if (result != kCFStringEncodingConversionSuccess) {
if (kCFStringEncodingInvalidInputStream == result) {
CFRange composedRange;
// Check the tail
if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {
composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);
if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {
result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);
streamID = CFStringEncodingStreamIDFromMask(result);
result &= ~CFStringEncodingStreamIDMask;
}
}
// Check the head
if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {
composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);
if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {
// Try if the composed range can be converted
CFStringGetCharacters(string, composedRange, charBuf);
if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
CFIndex lastRangeLoc = rangeLoc - lastNumChars;
currentLength = composedRange.location - lastRangeLoc;
CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);
result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen);
streamID = CFStringEncodingStreamIDFromMask(result);
result &= ~CFStringEncodingStreamIDMask;
if (result == kCFStringEncodingConversionSuccess) { // OK let's try the last run
// Looks good. back up
totalBytesWritten -= lastUsedLen;
numCharsProcessed -= lastNumChars;
rangeLoc = lastRangeLoc;
rangeLen += lastNumChars;
if (max) {
buffer -= lastUsedLen;
max += lastUsedLen;
}
}
}
}
}
}
if (kCFStringEncodingConversionSuccess != result) { // really failed
totalBytesWritten += usedLen;
numCharsProcessed += numChars;
break;
}
}
totalBytesWritten += usedLen;
numCharsProcessed += numChars;
rangeLoc += numChars;
rangeLen -= numChars;
if (max) {
buffer += usedLen;
max -= usedLen;
if (max <= 0) break;
}
lastUsedLen = usedLen; lastNumChars = numChars;
flags &= ~kCFStringEncodingPrependBOM;
}
}
}
if (usedBufLen) *usedBufLen = totalBytesWritten;
return numCharsProcessed;
}