CFComparisonResult CFStringCompareWithOptionsAndLocale()

in Frameworks/CoreFoundation/String.subproj/CFString.c [2584:2931]


CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions, CFLocaleRef locale) {
    /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
    UTF32Char strBuf1[kCFStringStackBufferLength];
    UTF32Char strBuf2[kCFStringStackBufferLength];
    CFStringInlineBuffer inlineBuf1, inlineBuf2;
    UTF32Char str1Char, str2Char;
    CFIndex str1UsedLen, str2UsedLen;
    CFIndex str1Index = 0, str2Index = 0, strBuf1Index = 0, strBuf2Index = 0, strBuf1Len = 0, strBuf2Len = 0;
    CFIndex str1LocalizedIndex = 0, str2LocalizedIndex = 0;
    CFIndex forcedIndex1 = 0, forcedIndex2 = 0;
    CFIndex str2Len = CFStringGetLength(string2);
    bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
    bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
    bool equalityOptions = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive)) ? true : false);
    bool numerically = ((compareOptions & kCFCompareNumerically) ? true : false);
    bool forceOrdering = ((compareOptions & kCFCompareForcedOrdering) ? true : false);
    const uint8_t *langCode;
    CFComparisonResult compareResult = kCFCompareEqualTo;
    UTF16Char otherChar;
    Boolean freeLocale = false;
    CFCharacterSetInlineBuffer *ignoredChars = NULL;
    CFCharacterSetInlineBuffer csetBuffer;
    bool numericEquivalence = false;

    if ((compareOptions & kCFCompareLocalized) && (NULL == locale)) {
        locale = CFLocaleCopyCurrent();
    freeLocale = true;
    }

    langCode = ((NULL == locale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale, true));

    if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
    ignoredChars = &csetBuffer;
    equalityOptions = true;
    }

    if ((NULL == locale) && (NULL == ignoredChars) && !numerically) { // could do binary comp (be careful when adding new flags)
        CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
        const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
        const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(string2, eightBitEncoding);
        CFIndex factor = sizeof(uint8_t);

        if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
            compareOptions &= ~kCFCompareNonliteral; // remove non-literal

            if ((kCFStringEncodingASCII == eightBitEncoding) && (false == forceOrdering)) {
                if (caseInsensitive) {
                    int cmpResult = strncasecmp_l((const char *)str1Bytes + rangeToCompare.location, (const char *)str2Bytes, __CFMin(rangeToCompare.length, str2Len), NULL);
                    
                    if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
                    
                    return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
                }
            } else if (caseInsensitive || diacriticsInsensitive) {
                CFIndex limitLength = __CFMin(rangeToCompare.length, str2Len);

                str1Bytes += rangeToCompare.location;

                while (str1Index < limitLength) {
                    str1Char = str1Bytes[str1Index];
                    str2Char = str2Bytes[str1Index];

                    if (str1Char != str2Char) {
                        if ((str1Char < 0x80) && (str2Char < 0x80)) {
                if (forceOrdering && (kCFCompareEqualTo == compareResult) && (str1Char != str2Char)) compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
                if (caseInsensitive) {
                if ((str1Char >= 'A') && (str1Char <= 'Z')) str1Char += ('a' - 'A');
                if ((str2Char >= 'A') && (str2Char <= 'Z')) str2Char += ('a' - 'A');
                }

                            if (str1Char != str2Char) return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
                        } else {
                            str1Bytes = NULL;
                            break;
                        }
                    }
                    ++str1Index;
                }

                str2Index = str1Index;
                
                if (str1Index == limitLength) {
                    int cmpResult = rangeToCompare.length - str2Len;
                    
                    return ((0 == cmpResult) ? compareResult : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
                }
            }
        } else if (!equalityOptions && (NULL == str1Bytes) && (NULL == str2Bytes)) {
            str1Bytes = (const uint8_t *)CFStringGetCharactersPtr(string);
            str2Bytes = (const uint8_t *)CFStringGetCharactersPtr(string2);
            factor = sizeof(UTF16Char);
#if __LITTLE_ENDIAN__
            if ((NULL != str1Bytes) && (NULL != str2Bytes)) { // we cannot use memcmp
                const UTF16Char *str1 = ((const UTF16Char *)str1Bytes) + rangeToCompare.location;
                const UTF16Char *str1Limit = str1 + __CFMin(rangeToCompare.length, str2Len);
                const UTF16Char *str2 = (const UTF16Char *)str2Bytes;
                CFIndex cmpResult = 0;

                while ((0 == cmpResult) && (str1 < str1Limit)) cmpResult = (CFIndex)*(str1++) - (CFIndex)*(str2++);

                if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
                
                return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
            }
#endif /* __LITTLE_ENDIAN__ */
        }
        if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
            int cmpResult = memcmp(str1Bytes + (rangeToCompare.location * factor), str2Bytes, __CFMin(rangeToCompare.length, str2Len) * factor);
            
            if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
            
            return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
        }
    }
    
    const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
    
    CFStringInitInlineBuffer(string, &inlineBuf1, rangeToCompare);
    CFStringInitInlineBuffer(string2, &inlineBuf2, CFRangeMake(0, str2Len));

    if (NULL != locale) {
    str1LocalizedIndex = str1Index;
    str2LocalizedIndex = str2Index;

    // We temporarily disable kCFCompareDiacriticInsensitive for SL <rdar://problem/6767096>. Should be revisited in NMOS <rdar://problem/7003830>
    if (forceOrdering) {
        diacriticsInsensitive = false;
        compareOptions &= ~kCFCompareDiacriticInsensitive;
    }
    }
    while ((str1Index < rangeToCompare.length) && (str2Index < str2Len)) {
        if (strBuf1Len == 0) {
            str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
            if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str1Char += ('a' - 'A');
            str1UsedLen = 1;
        } else {
            str1Char = strBuf1[strBuf1Index++];
        }
        if (strBuf2Len == 0) {
            str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
            if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str2Char += ('a' - 'A');
            str2UsedLen = 1;
        } else {
            str2Char = strBuf2[strBuf2Index++];
        }

        if (numerically && ((0 == strBuf1Len) && (str1Char <= '9') && (str1Char >= '0')) && ((0 == strBuf2Len) && (str2Char <= '9') && (str2Char >= '0'))) { // If both are not ASCII digits, then don't do numerical comparison here
            uint64_t intValue1 = 0, intValue2 = 0;  // !!! Doesn't work if numbers are > max uint64_t
            CFIndex str1NumRangeIndex = str1Index;
            CFIndex str2NumRangeIndex = str2Index;

            do {
                intValue1 = (intValue1 * 10) + (str1Char - '0');
                str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, ++str1Index);
            } while ((str1Char <= '9') && (str1Char >= '0'));

            do {
                intValue2 = intValue2 * 10 + (str2Char - '0');
                str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, ++str2Index);
            } while ((str2Char <= '9') && (str2Char >= '0'));

            if (intValue1 == intValue2) {
                if (forceOrdering && (kCFCompareEqualTo == compareResult) && ((str1Index - str1NumRangeIndex) != (str2Index - str2NumRangeIndex))) {
                    compareResult = (((str1Index - str1NumRangeIndex) < (str2Index - str2NumRangeIndex)) ? kCFCompareLessThan : kCFCompareGreaterThan);
                    numericEquivalence = true;
                    forcedIndex1 = str1NumRangeIndex;
                    forcedIndex2 = str2NumRangeIndex;
                }

                continue;
            } else if (intValue1 < intValue2) {
        if (freeLocale && locale) {
            CFRelease(locale);
        }
                return kCFCompareLessThan;
            } else {
        if (freeLocale && locale) {
            CFRelease(locale);
        }
                return kCFCompareGreaterThan;
            }
        }

        if (str1Char != str2Char) {
            if (!equalityOptions) {
        compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale));
                if (freeLocale && locale) {
                    CFRelease(locale);
                }
        return compareResult;
        }

            if (forceOrdering && (kCFCompareEqualTo == compareResult)) {
        compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
        forcedIndex1 = str1LocalizedIndex;
        forcedIndex2 = str2LocalizedIndex;
        }

            if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars)) {
                if (NULL != locale) {
            compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale);
            if (freeLocale && locale) {
            CFRelease(locale);
            }
            return compareResult;
                } else if (!caseInsensitive) {
            if (freeLocale && locale) {
            CFRelease(locale);
            }
                    return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
                }
            }

            if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
                str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
                str1UsedLen = 2;
            }
            
            if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
                str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
                str2UsedLen = 2;
            }
            
        if (NULL != ignoredChars) {
        if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
            if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
            if (strBuf1Len == 0) str1Index += str1UsedLen;
            if (strBuf2Len > 0) --strBuf2Index;
            continue;
        }
        if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
            if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
            if (strBuf2Len == 0) str2Index += str2UsedLen;
            if (strBuf1Len > 0) -- strBuf1Index;
            continue;
        }       
        }
        
            if (diacriticsInsensitive && (str1Index > 0)) {
                bool str1Skip = false;
                bool str2Skip = false;
                
                if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
                    str1Char = str2Char;
                    str1Skip = true;
                }
                if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
                    str2Char = str1Char;
                    str2Skip = true;
                }
                
                if (str1Skip != str2Skip) {
                    if (str1Skip) str2Index -= str2UsedLen;
                    if (str2Skip) str1Index -= str1UsedLen;
                }
            }

            if (str1Char != str2Char) {
                if (0 == strBuf1Len) {
                    strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
                    if (strBuf1Len > 0) {
                        str1Char = *strBuf1;
                        strBuf1Index = 1;
                    }
                }
                
                if ((0 == strBuf1Len) && (0 < strBuf2Len)) {
            compareResult =  ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
            if (freeLocale && locale) {
            CFRelease(locale);
            }
            return compareResult;
        }
                
                if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
                    strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
                    if (strBuf2Len > 0) {
                        str2Char = *strBuf2;
                        strBuf2Index = 1;
                    }
                    if ((0 == strBuf2Len) || (str1Char != str2Char)) {
            compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
            if (freeLocale && locale) {
                CFRelease(locale);
            }
            return compareResult;
            }
                }
            }
            
            if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
                while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
                    if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
                    ++strBuf1Index; ++strBuf2Index;
                }
                if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
            CFComparisonResult res = ((NULL == locale) ? ((strBuf1[strBuf1Index] < strBuf2[strBuf2Index]) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
            if (freeLocale && locale) {
            CFRelease(locale);
            }
            return res;
        }
            }
        }
        
        if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
        if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
        
        if (strBuf1Len == 0) str1Index += str1UsedLen;
        if (strBuf2Len == 0) str2Index += str2UsedLen;
    if ((strBuf1Len == 0) && (strBuf2Len == 0)) {
        str1LocalizedIndex = str1Index;
        str2LocalizedIndex = str2Index;
    }
    }

    if (diacriticsInsensitive || (NULL != ignoredChars)) {
        while (str1Index < rangeToCompare.length) {
            str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
            if ((str1Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII

            if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);

            if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char))) break;

            str1Index += ((str1Char < 0x10000) ? 1 : 2);
        }

        while (str2Index < str2Len) {
            str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
            if ((str2Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
                
            if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);

            if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char))) break;

            str2Index += ((str2Char < 0x10000) ? 1 : 2);
        }
    }
    // Need to recalc localized result here for forced ordering, ICU cannot do numericEquivalence
    if (!numericEquivalence && (NULL != locale) && (kCFCompareEqualTo != compareResult) && (str1Index == rangeToCompare.length) && (str2Index == str2Len)) compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(forcedIndex1, rangeToCompare.length - forcedIndex1), &inlineBuf2, CFRangeMake(forcedIndex2, str2Len - forcedIndex2), compareOptions, locale);

    if (freeLocale && locale) {
    CFRelease(locale);
    }

    return ((str1Index < rangeToCompare.length) ? kCFCompareGreaterThan : ((str2Index < str2Len) ? kCFCompareLessThan : compareResult));
}