in Frameworks/CoreFoundation/String.subproj/CFString.c [2940:3346]
Boolean CFStringFindWithOptionsAndLocale(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFLocaleRef locale, CFRange *result) {
/* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
CFIndex findStrLen = CFStringGetLength(stringToFind);
Boolean didFind = false;
bool lengthVariants = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive)) ? true : false);
CFCharacterSetInlineBuffer *ignoredChars = NULL;
CFCharacterSetInlineBuffer csetBuffer;
if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
ignoredChars = &csetBuffer;
lengthVariants = true;
}
if ((findStrLen > 0) && (rangeToSearch.length > 0) && ((findStrLen <= rangeToSearch.length) || lengthVariants)) {
UTF32Char strBuf1[kCFStringStackBufferLength];
UTF32Char strBuf2[kCFStringStackBufferLength];
CFStringInlineBuffer inlineBuf1, inlineBuf2;
UTF32Char str1Char = 0, str2Char = 0;
CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(stringToFind, eightBitEncoding);
const UTF32Char *characters, *charactersLimit;
const uint8_t *langCode = NULL;
CFIndex fromLoc, toLoc;
CFIndex str1Index, str2Index;
CFIndex strBuf1Len, strBuf2Len;
CFIndex maxStr1Index = (rangeToSearch.location + rangeToSearch.length);
bool equalityOptions = ((lengthVariants || (compareOptions & kCFCompareWidthInsensitive)) ? true : false);
bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
bool forwardAnchor = ((kCFCompareAnchored == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
bool backwardAnchor = (((kCFCompareBackwards|kCFCompareAnchored) == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
int8_t delta;
if (NULL == locale) {
if (compareOptions & kCFCompareLocalized) {
CFLocaleRef currentLocale = CFLocaleCopyCurrent();
langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(currentLocale, true);
CFRelease(currentLocale);
}
} else {
langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale, true);
}
CFStringInitInlineBuffer(string, &inlineBuf1, CFRangeMake(0, rangeToSearch.location + rangeToSearch.length));
CFStringInitInlineBuffer(stringToFind, &inlineBuf2, CFRangeMake(0, findStrLen));
if (compareOptions & kCFCompareBackwards) {
fromLoc = rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen);
toLoc = (((compareOptions & kCFCompareAnchored) && !lengthVariants) ? fromLoc : rangeToSearch.location);
} else {
fromLoc = rangeToSearch.location;
toLoc = ((compareOptions & kCFCompareAnchored) ? fromLoc : rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen));
}
delta = ((fromLoc <= toLoc) ? 1 : -1);
if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
uint8_t str1Byte, str2Byte;
while (1) {
str1Index = fromLoc;
str2Index = 0;
while ((str1Index < maxStr1Index) && (str2Index < findStrLen)) {
str1Byte = str1Bytes[str1Index];
str2Byte = str2Bytes[str2Index];
if (str1Byte != str2Byte) {
if (equalityOptions) {
if ((str1Byte < 0x80) && ((NULL == langCode) || ('I' != str1Byte))) {
if (caseInsensitive && (str1Byte >= 'A') && (str1Byte <= 'Z')) str1Byte += ('a' - 'A');
*strBuf1 = str1Byte;
strBuf1Len = 1;
} else {
str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
if (1 > strBuf1Len) {
*strBuf1 = str1Char;
strBuf1Len = 1;
}
}
if ((NULL != ignoredChars) && (forwardAnchor || (str1Index != fromLoc)) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str1Byte < 0x80) ? str1Byte : str1Char))) {
++str1Index;
continue;
}
if ((str2Byte < 0x80) && ((NULL == langCode) || ('I' != str2Byte))) {
if (caseInsensitive && (str2Byte >= 'A') && (str2Byte <= 'Z')) str2Byte += ('a' - 'A');
*strBuf2 = str2Byte;
strBuf2Len = 1;
} else {
str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
if (1 > strBuf2Len) {
*strBuf2 = str2Char;
strBuf2Len = 1;
}
}
if ((NULL != ignoredChars) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str2Byte < 0x80) ? str2Byte : str2Char))) {
++str2Index;
continue;
}
if ((1 == strBuf1Len) && (1 == strBuf2Len)) { // normal case
if (*strBuf1 != *strBuf2) break;
} else {
CFIndex delta;
if (!caseInsensitive && (strBuf1Len != strBuf2Len)) break;
if (memcmp(strBuf1, strBuf2, sizeof(UTF32Char) * __CFMin(strBuf1Len, strBuf2Len))) break;
if (strBuf1Len < strBuf2Len) {
delta = strBuf2Len - strBuf1Len;
if ((str1Index + strBuf1Len + delta) > maxStr1Index) break;
characters = &(strBuf2[strBuf1Len]);
charactersLimit = characters + delta;
while (characters < charactersLimit) {
strBuf1Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1), &inlineBuf1, str1Index + 1, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
if ((strBuf1Len > 0) || (*characters != *strBuf1)) break;
++characters; ++str1Index;
}
if (characters < charactersLimit) break;
} else if (strBuf2Len < strBuf1Len) {
delta = strBuf1Len - strBuf2Len;
if ((str2Index + strBuf2Len + delta) > findStrLen) break;
characters = &(strBuf1[strBuf2Len]);
charactersLimit = characters + delta;
while (characters < charactersLimit) {
strBuf2Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str1Index + 1), &inlineBuf2, str2Index + 1, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
if ((strBuf2Len > 0) || (*characters != *strBuf2)) break;
++characters; ++str2Index;
}
if (characters < charactersLimit) break;
}
}
} else {
break;
}
}
++str1Index; ++str2Index;
}
if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
while (str2Index < findStrLen) {
str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
++str2Index;
}
}
if (str2Index == findStrLen) {
if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
while (str1Index < maxStr1Index) {
str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
++str1Index;
}
}
if (!backwardAnchor || (str1Index == maxStr1Index)) {
didFind = true;
if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
}
break;
}
if (fromLoc == toLoc) break;
fromLoc += delta;
}
} else if (equalityOptions) {
UTF16Char otherChar;
CFIndex str1UsedLen, str2UsedLen, strBuf1Index = 0, strBuf2Index = 0;
bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
const uint8_t *combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
while (1) {
str1Index = fromLoc;
str2Index = 0;
strBuf1Len = strBuf2Len = 0;
while (str2Index < findStrLen) {
if (strBuf1Len == 0) {
str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I'))) str1Char += ('a' - 'A');
str1UsedLen = 1;
} else {
str1Char = strBuf1[strBuf1Index++];
}
if (strBuf2Len == 0) {
str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I'))) str2Char += ('a' - 'A');
str2UsedLen = 1;
} else {
str2Char = strBuf2[strBuf2Index++];
}
if (str1Char != str2Char) {
if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars) && ((NULL == langCode) || !caseInsensitive)) break;
if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
str1UsedLen = 2;
}
if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
str2UsedLen = 2;
}
if (NULL != ignoredChars) {
if ((forwardAnchor || (str1Index != fromLoc)) && (str1Index < maxStr1Index) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
if (strBuf1Len == 0) str1Index += str1UsedLen;
if (strBuf2Len > 0) --strBuf2Index;
continue;
}
if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
if (strBuf2Len == 0) str2Index += str2UsedLen;
if (strBuf1Len > 0) -- strBuf1Index;
continue;
}
}
if (diacriticsInsensitive && (str1Index > fromLoc)) {
bool str1Skip = false;
bool str2Skip = false;
if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
str1Char = str2Char;
str1Skip = true;
}
if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
str2Char = str1Char;
str2Skip = true;
}
if (str1Skip != str2Skip) {
if (str1Skip) str2Index -= str2UsedLen;
if (str2Skip) str1Index -= str1UsedLen;
}
}
if (str1Char != str2Char) {
if (0 == strBuf1Len) {
strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
if (strBuf1Len > 0) {
str1Char = *strBuf1;
strBuf1Index = 1;
}
}
if ((0 == strBuf1Len) && (0 < strBuf2Len)) break;
if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
if ((0 == strBuf2Len) || (str1Char != *strBuf2)) break;
strBuf2Index = 1;
}
}
if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
++strBuf1Index; ++strBuf2Index;
}
if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) break;
}
}
if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
if (strBuf1Len == 0) str1Index += str1UsedLen;
if (strBuf2Len == 0) str2Index += str2UsedLen;
}
if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
while (str2Index < findStrLen) {
str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
}
if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
str2Index += ((str2Char < 0x10000) ? 1 : 2);
}
}
if (str2Index == findStrLen) {
bool match = true;
if (strBuf1Len > 0) {
match = false;
if (diacriticsInsensitive && (strBuf1[0] < 0x0510)) {
while (strBuf1Index < strBuf1Len) {
if (!CFUniCharIsMemberOfBitmap(strBuf1[strBuf1Index], ((strBuf1[strBuf1Index] < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (strBuf1[strBuf1Index] >> 16))))) break;
++strBuf1Index;
}
if (strBuf1Index == strBuf1Len) {
str1Index += str1UsedLen;
match = true;
}
}
}
if (match && (compareOptions & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) && (str1Index < maxStr1Index)) {
const uint8_t *nonBaseBitmap;
str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16));
} else {
nonBaseBitmap = graphemeBMP;
}
if (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBitmap)) {
if (diacriticsInsensitive) {
if (str1Char < 0x10000) {
CFIndex index = str1Index;
do {
str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, --index);
} while (CFUniCharIsMemberOfBitmap(str1Char, graphemeBMP), (rangeToSearch.location < index));
if (str1Char < 0x0510) {
while (++str1Index < maxStr1Index) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index), graphemeBMP)) break;
}
}
} else {
match = false;
}
} else if (!diacriticsInsensitive) {
otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index - 1);
// this is assuming viramas are only in BMP ???
if ((str1Char == COMBINING_GRAPHEME_JOINER) || (otherChar == COMBINING_GRAPHEME_JOINER) || (otherChar == ZERO_WIDTH_JOINER) || ((otherChar >= HANGUL_CHOSEONG_START) && (otherChar <= HANGUL_JONGSEONG_END)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar, combClassBMP) == 9)) {
CFRange clusterRange = CFStringGetRangeOfCharacterClusterAtIndex(string, str1Index - 1, kCFStringGraphemeCluster);
if (str1Index < (clusterRange.location + clusterRange.length)) match = false;
}
}
}
if (match) {
if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
while (str1Index < maxStr1Index) {
str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
}
if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
str1Index += ((str1Char < 0x10000) ? 1 : 2);
}
}
if (!backwardAnchor || (str1Index == maxStr1Index)) {
didFind = true;
if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
}
break;
}
}
if (fromLoc == toLoc) break;
fromLoc += delta;
}
} else {
while (1) {
str1Index = fromLoc;
str2Index = 0;
while (str2Index < findStrLen) {
if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index)) break;
++str1Index; ++str2Index;
}
if (str2Index == findStrLen) {
didFind = true;
if (NULL != result) *result = CFRangeMake(fromLoc, findStrLen);
break;
}
if (fromLoc == toLoc) break;
fromLoc += delta;
}
}
}
return didFind;
}