in CoreFoundation/String.subproj/CFString.c [2435:2654]
static CFIndex __CFStringFoldCharacterClusterAtIndex(UTF32Char character, CFStringInlineBuffer *buffer, CFIndex index, CFOptionFlags flags, const uint8_t *langCode, UTF32Char *outCharacters, CFIndex maxBufferLength, CFIndex *consumedLength, bool *insufficientBufferSpace) {
CFIndex filledLength = 0, currentIndex = index;
if (0 != character) {
UTF16Char lowSurrogate;
CFIndex planeNo = (character >> 16);
bool isTurkikCapitalI = false;
static const uint8_t *decompBMP = NULL;
static const uint8_t *graphemeBMP = NULL;
if (NULL == decompBMP) {
decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
}
currentIndex += ((character > 0xFFFF) ? 2 : 1);
if ((character < 0x0080) && ((NULL == langCode) || (character != 'I'))) { // ASCII
if ((flags & kCFCompareCaseInsensitive) && (character >= 'A') && (character <= 'Z')) {
character += ('a' - 'A');
*outCharacters = character;
filledLength = 1;
}
} else {
// do width-insensitive mapping
if ((flags & kCFCompareWidthInsensitive) && (character >= 0xFF00) && (character <= 0xFFEF)) {
(void)CFUniCharCompatibilityDecompose(&character, 1, 1);
*outCharacters = character;
filledLength = 1;
}
// map surrogates
if ((0 == planeNo) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)))) {
character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
++currentIndex;
planeNo = (character >> 16);
}
// decompose
if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) {
UTF32Char original = character;
filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength);
character = *outCharacters;
if ((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) {
filledLength = 1; // reset if Roman, Greek, Cyrillic
} else if (0 == (flags & kCFCompareNonliteral)) {
character = original;
filledLength = 0;
} else if (filledLength == 0 && NULL != insufficientBufferSpace) {
*insufficientBufferSpace = true;
}
}
}
// fold case
if (flags & kCFCompareCaseInsensitive) {
const uint8_t *nonBaseBitmap;
bool filterNonBase = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? true : false);
static const uint8_t *lowerBMP = NULL;
static const uint8_t *caseFoldBMP = NULL;
if (NULL == lowerBMP) {
lowerBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, 0);
caseFoldBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, 0);
}
if ((NULL != langCode) && ('I' == character) && ((0 == strcmp((const char *)langCode, "tr")) || (0 == strcmp((const char *)langCode, "az")))) { // do Turkik special-casing
if (filledLength > 1) {
if (0x0307 == outCharacters[1]) {
if (--filledLength > 1) memmove((outCharacters + 1), (outCharacters + 2), sizeof(UTF32Char) * (filledLength - 1));
character = *outCharacters = 'i';
isTurkikCapitalI = true;
}
} else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)) {
character = *outCharacters = 'i';
filledLength = 1;
++currentIndex;
isTurkikCapitalI = true;
}
}
if (!isTurkikCapitalI && (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? lowerBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, planeNo))) || CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? caseFoldBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, planeNo))))) {
UTF16Char caseFoldBuffer[MAX_CASE_MAPPING_BUF];
const UTF16Char *bufferP = caseFoldBuffer, *bufferLimit;
UTF32Char *outCharactersP = outCharacters;
uint32_t bufferLength = CFUniCharMapCaseTo(character, caseFoldBuffer, MAX_CASE_MAPPING_BUF, kCFUniCharCaseFold, 0, langCode);
bufferLimit = bufferP + bufferLength;
if (filledLength > 0) --filledLength; // decrement filledLength (will add back later)
// make space for casefold characters
if ((filledLength > 0) && (bufferLength > 1)) {
CFIndex totalScalerLength = 0;
while (bufferP < bufferLimit) {
if (CFUniCharIsSurrogateHighCharacter(*(bufferP++)) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) ++bufferP;
++totalScalerLength;
}
memmove(outCharacters + totalScalerLength, outCharacters + 1, filledLength * sizeof(UTF32Char));
bufferP = caseFoldBuffer;
}
// fill
while (bufferP < bufferLimit) {
character = *(bufferP++);
if (CFUniCharIsSurrogateHighCharacter(character) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) {
character = CFUniCharGetLongCharacterForSurrogatePair(character, *(bufferP++));
nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
} else {
nonBaseBitmap = graphemeBMP;
}
if (!filterNonBase || !CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
*(outCharactersP++) = character;
++filledLength;
}
}
}
}
}
// collect following combining marks
if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
const uint8_t *nonBaseBitmap;
const uint8_t *decompBitmap;
bool doFill = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? false : true);
if (0 == filledLength) {
*outCharacters = character; // filledLength will be updated below on demand
if (doFill) { // check if really needs to fill
UTF32Char nonBaseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
nonBaseCharacter = CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter, lowSurrogate);
nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (nonBaseCharacter >> 16));
decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (nonBaseCharacter >> 16));
} else {
nonBaseBitmap = graphemeBMP;
decompBitmap = decompBMP;
}
if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) {
filledLength = 1; // For the base character
if ((0 == (flags & kCFCompareDiacriticInsensitive)) || (nonBaseCharacter > 0x050F)) {
if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) {
CFIndex decomposedLength = CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength);
filledLength += decomposedLength;
if (decomposedLength == 0 && NULL != insufficientBufferSpace) {
*insufficientBufferSpace = true;
}
} else {
outCharacters[filledLength++] = nonBaseCharacter;
}
}
currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
} else {
doFill = false;
}
}
}
bool endedCharacterCluster = false;
while (filledLength < maxBufferLength) { // do the rest
character = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (character >> 16));
} else {
nonBaseBitmap = graphemeBMP;
decompBitmap = decompBMP;
}
if (isTurkikCapitalI) {
isTurkikCapitalI = false;
} else if (CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
if (doFill) {
if (CFUniCharIsMemberOfBitmap(character, decompBitmap)) {
CFIndex currentLength = CFUniCharDecomposeCharacter(character, &(outCharacters[filledLength]), maxBufferLength - filledLength);
if (0 == currentLength) break; // didn't fit
filledLength += currentLength;
} else {
outCharacters[filledLength++] = character;
}
} else if (0 == filledLength) {
filledLength = 1; // For the base character
}
currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
} else {
endedCharacterCluster = true;
break;
}
}
if (!endedCharacterCluster && NULL != insufficientBufferSpace) {
*insufficientBufferSpace = true;
}
if (filledLength > 1) {
UTF32Char *sortCharactersLimit = outCharacters + filledLength;
UTF32Char *sortCharacters = sortCharactersLimit - 1;
while ((outCharacters < sortCharacters) && CFUniCharIsMemberOfBitmap(*sortCharacters, ((*sortCharacters < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (*sortCharacters >> 16))))) --sortCharacters;
if ((sortCharactersLimit - sortCharacters) > 1) CFUniCharPrioritySort(sortCharacters, (sortCharactersLimit - sortCharacters)); // priority sort
}
}
}
if ((filledLength > 0) && (NULL != consumedLength)) *consumedLength = (currentIndex - index);
return filledLength;
}