CFComparisonResult CFStringCompareWithOptionsAndLocale()

in CoreFoundation/String.subproj/CFString.c [2710:3075]


CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions, CFLocaleRef locale) {
    /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
    UTF32Char strBuf1[kCFStringStackBufferLength];
    UTF32Char strBuf2[kCFStringStackBufferLength];
    CFStringInlineBuffer inlineBuf1, inlineBuf2;
    UTF32Char str1Char, str2Char;
    CFIndex str1UsedLen, str2UsedLen;
    CFIndex str1Index = 0, str2Index = 0, strBuf1Index = 0, strBuf2Index = 0, strBuf1Len = 0, strBuf2Len = 0;
    CFIndex str1LocalizedIndex = 0, str2LocalizedIndex = 0;
    CFIndex forcedIndex1 = 0, forcedIndex2 = 0;
    CFIndex str2Len = CFStringGetLength(string2);
    bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
    bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
    bool equalityOptions = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive)) ? true : false);
    bool numerically = ((compareOptions & kCFCompareNumerically) ? true : false);
    bool forceOrdering = ((compareOptions & kCFCompareForcedOrdering) ? true : false);
    const uint8_t *langCode;
    CFComparisonResult compareResult = kCFCompareEqualTo;
    UTF16Char otherChar;
    Boolean freeLocale = false;
    CFCharacterSetInlineBuffer *ignoredChars = NULL;
    CFCharacterSetInlineBuffer csetBuffer;
    bool numericEquivalence = false;

    if ((compareOptions & kCFCompareLocalized) && (NULL == locale)) {
        locale = CFLocaleCopyCurrent();
	freeLocale = true;
    }

    langCode = ((NULL == locale) ? NULL : (const uint8_t *)_CFStrGetSpecialCaseHandlingLanguageIdentifierForLocale(locale, true));

    if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
	ignoredChars = &csetBuffer;
	equalityOptions = true;
    }

    if ((NULL == locale) && (NULL == ignoredChars) && !numerically) { // could do binary comp (be careful when adding new flags)
        CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
        const uint8_t *str1Bytes = (const uint8_t *)_CFStringGetCStringPtrInternal(string, eightBitEncoding, false, true);
        const uint8_t *str2Bytes = (const uint8_t *)_CFStringGetCStringPtrInternal(string2, eightBitEncoding, false, true);
        CFIndex factor = sizeof(uint8_t);

        if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
            compareOptions &= ~kCFCompareNonliteral; // remove non-literal

            if ((kCFStringEncodingASCII == eightBitEncoding) && (false == forceOrdering)) {
                if (caseInsensitive) {
                    // Here we call our own __CFStringCompareASCIICaseInsensitive rather than strncasecmp_l to continue comparing after embedded null bytes
                    int cmpResult = __CFStringCompareASCIICaseInsensitive(str1Bytes + rangeToCompare.location, str2Bytes, __CFMin(rangeToCompare.length, str2Len));
                    
                    if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
                    
                    return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
                }
            } else if (caseInsensitive || diacriticsInsensitive) {
                CFIndex limitLength = __CFMin(rangeToCompare.length, str2Len);

                str1Bytes += rangeToCompare.location;

                while (str1Index < limitLength) {
                    str1Char = str1Bytes[str1Index];
                    str2Char = str2Bytes[str1Index];

                    if (str1Char != str2Char) {
                        if ((str1Char < 0x80) && (str2Char < 0x80)) {
			    if (forceOrdering && (kCFCompareEqualTo == compareResult) && (str1Char != str2Char)) compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
			    if (caseInsensitive) {
				if ((str1Char >= 'A') && (str1Char <= 'Z')) str1Char += ('a' - 'A');
				if ((str2Char >= 'A') && (str2Char <= 'Z')) str2Char += ('a' - 'A');
			    }

                            if (str1Char != str2Char) return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
                        } else {
                            str1Bytes = NULL;
                            break;
                        }
                    }
                    ++str1Index;
                }

                str2Index = str1Index;
                
                if (str1Index == limitLength) {
                    int cmpResult = rangeToCompare.length - str2Len;
                    
                    return ((0 == cmpResult) ? compareResult : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
                }
            }
        } else if (!equalityOptions && (NULL == str1Bytes) && (NULL == str2Bytes)) {
            str1Bytes = (const uint8_t *)CFStringGetCharactersPtr(string);
            str2Bytes = (const uint8_t *)CFStringGetCharactersPtr(string2);
            factor = sizeof(UTF16Char);
#if __LITTLE_ENDIAN__
            if ((NULL != str1Bytes) && (NULL != str2Bytes)) { // we cannot use memcmp
                const UTF16Char *str1 = ((const UTF16Char *)str1Bytes) + rangeToCompare.location;
                const UTF16Char *str1Limit = str1 + __CFMin(rangeToCompare.length, str2Len);
                const UTF16Char *str2 = (const UTF16Char *)str2Bytes;
                CFIndex cmpResult = 0;

                while ((0 == cmpResult) && (str1 < str1Limit)) cmpResult = (CFIndex)*(str1++) - (CFIndex)*(str2++);

                if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
                
                return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
            }
#endif /* __LITTLE_ENDIAN__ */
        }
        if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
            int cmpResult = memcmp(str1Bytes + (rangeToCompare.location * factor), str2Bytes, __CFMin(rangeToCompare.length, str2Len) * factor);
            
            if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
            
            return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
        }
    }
    
    const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
    
    _CFStringInitInlineBufferInternal(string, &inlineBuf1, rangeToCompare, true);
    _CFStringInitInlineBufferInternal(string2, &inlineBuf2, CFRangeMake(0, str2Len), true);

    if (NULL != locale) {
	str1LocalizedIndex = str1Index;
	str2LocalizedIndex = str2Index;

	// We temporarily disable kCFCompareDiacriticInsensitive for SL <rdar://problem/6767096>. Should be revisited in NMOS <rdar://problem/7003830>
	if (forceOrdering) {
	    diacriticsInsensitive = false;
	    compareOptions &= ~kCFCompareDiacriticInsensitive;
	}
    }
    
    CFIndex preventStr1FoldingUntil = 0, preventStr2FoldingUntil = 0;
    
    while ((str1Index < rangeToCompare.length) && (str2Index < str2Len)) {
        if (strBuf1Len == 0) {
            str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
            if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str1Char += ('a' - 'A');
            str1UsedLen = 1;
        } else {
            str1Char = strBuf1[strBuf1Index++];
        }
        if (strBuf2Len == 0) {
            str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
            if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str2Char += ('a' - 'A');
            str2UsedLen = 1;
        } else {
            str2Char = strBuf2[strBuf2Index++];
        }

        if (numerically && ((0 == strBuf1Len) && (str1Char <= '9') && (str1Char >= '0')) && ((0 == strBuf2Len) && (str2Char <= '9') && (str2Char >= '0'))) { // If both are not ASCII digits, then don't do numerical comparison here
            uint64_t intValue1 = 0, intValue2 = 0;	// !!! Doesn't work if numbers are > max uint64_t
            CFIndex str1NumRangeIndex = str1Index;
            CFIndex str2NumRangeIndex = str2Index;

            do {
                intValue1 = (intValue1 * 10) + (str1Char - '0');
                str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, ++str1Index);
            } while ((str1Char <= '9') && (str1Char >= '0'));

            do {
                intValue2 = intValue2 * 10 + (str2Char - '0');
                str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, ++str2Index);
            } while ((str2Char <= '9') && (str2Char >= '0'));

            if (intValue1 == intValue2) {
                if (forceOrdering && (kCFCompareEqualTo == compareResult) && ((str1Index - str1NumRangeIndex) != (str2Index - str2NumRangeIndex))) {
                    compareResult = (((str1Index - str1NumRangeIndex) < (str2Index - str2NumRangeIndex)) ? kCFCompareLessThan : kCFCompareGreaterThan);
                    numericEquivalence = true;
                    forcedIndex1 = str1NumRangeIndex;
                    forcedIndex2 = str2NumRangeIndex;
                }

                continue;
            } else if (intValue1 < intValue2) {
		if (freeLocale && locale) {
		    CFRelease(locale);
		}
                return kCFCompareLessThan;
            } else {
		if (freeLocale && locale) {
		    CFRelease(locale);
		}
                return kCFCompareGreaterThan;
            }
        }

        if (str1Char != str2Char) {
            if (!equalityOptions) {
		compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale));
                if (freeLocale && locale) {
                    CFRelease(locale);
                }
		return compareResult;
	    }

            if (forceOrdering && (kCFCompareEqualTo == compareResult)) {
		compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
		forcedIndex1 = str1LocalizedIndex;
		forcedIndex2 = str2LocalizedIndex;
	    }

            if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars)) {
                if (NULL != locale) {
		    compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale);
		    if (freeLocale && locale) {
			CFRelease(locale);
		    }
		    return compareResult;
                } else if (!caseInsensitive) {
		    if (freeLocale && locale) {
			CFRelease(locale);
		    }
                    return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
                }
            }

            if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
                str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
                str1UsedLen = 2;
            }
            
            if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
                str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
                str2UsedLen = 2;
            }
            
	    if (NULL != ignoredChars) {
		if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
		    if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
		    if (strBuf1Len == 0) str1Index += str1UsedLen;
		    if (strBuf2Len > 0) --strBuf2Index;
		    continue;
		}
		if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
		    if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
		    if (strBuf2Len == 0) str2Index += str2UsedLen;
		    if (strBuf1Len > 0) -- strBuf1Index;
		    continue;
		}	    
	    }
	    
            if (diacriticsInsensitive && (str1Index > 0)) {
                bool str1Skip = false;
                bool str2Skip = false;
                
                if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
                    str1Char = str2Char;
                    str1Skip = true;
                }
                if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
                    str2Char = str1Char;
                    str2Skip = true;
                }
                
                if (str1Skip != str2Skip) {
                    if (str1Skip) str2Index -= str2UsedLen;
                    if (str2Skip) str1Index -= str1UsedLen;
                }
            }

            if (str1Char != str2Char) {
                if (0 == strBuf1Len && (preventStr1FoldingUntil == 0 || preventStr1FoldingUntil == str1Index)) {
                    preventStr1FoldingUntil = 0;
                    bool insufficientBuffer = false;
                    strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen, &insufficientBuffer);
                    if (strBuf1Len > 0) {
                        str1Char = *strBuf1;
                        strBuf1Index = 1;
                    }
                    if (insufficientBuffer) {
                        // We have a character cluster larger than our maximum folding size. This is likely a malformed string, so do not fold the remainder of this cluster
                        CFRange currentCluster = CFStringGetRangeOfCharacterClusterAtIndex(string, str1Index, kCFStringGraphemeCluster);
                        preventStr1FoldingUntil = currentCluster.location + currentCluster.length;
                    }
                }
                
                if ((0 == strBuf1Len) && (0 < strBuf2Len)) {
		    compareResult =  ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
		    if (freeLocale && locale) {
			CFRelease(locale);
		    }
		    return compareResult;
		}
                
                if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char)) && (preventStr2FoldingUntil == 0 || preventStr2FoldingUntil == str2Index)) {
                    preventStr2FoldingUntil = 0;
                    bool insufficientBuffer = false;
                    strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen, &insufficientBuffer);
                    if (strBuf2Len > 0) {
                        str2Char = *strBuf2;
                        strBuf2Index = 1;
                    }
                    if ((0 == strBuf2Len) || (str1Char != str2Char)) {
			compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
			if (freeLocale && locale) {
			    CFRelease(locale);
			}
			return compareResult;
		    }
                    if (insufficientBuffer) {
                        // We have a character cluster larger than our maximum folding size. This is likely a malformed string, so do not fold the remainder of this cluster
                        CFRange currentCluster = CFStringGetRangeOfCharacterClusterAtIndex(string2, str2Index, kCFStringGraphemeCluster);
                        preventStr2FoldingUntil = currentCluster.location + currentCluster.length;
                    }
                }
            }
            
            if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
                while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
                    if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
                    ++strBuf1Index; ++strBuf2Index;
                }
                if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
		    CFComparisonResult res = ((NULL == locale) ? ((strBuf1[strBuf1Index] < strBuf2[strBuf2Index]) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
		    if (freeLocale && locale) {
			CFRelease(locale);
		    }
		    return res;
		}
            }
        }
        
        if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
        if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
        
        if (strBuf1Len == 0) str1Index += str1UsedLen;
        if (strBuf2Len == 0) str2Index += str2UsedLen;
	if ((strBuf1Len == 0) && (strBuf2Len == 0)) {
	    str1LocalizedIndex = str1Index;
	    str2LocalizedIndex = str2Index;
	}
    }

    if (diacriticsInsensitive || (NULL != ignoredChars)) {
        while (str1Index < rangeToCompare.length) {
            str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
            if ((str1Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII

            if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);

            if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char))) break;

            str1Index += ((str1Char < 0x10000) ? 1 : 2);
        }

        while (str2Index < str2Len) {
            str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
            if ((str2Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
                
            if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);

            if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char))) break;

            str2Index += ((str2Char < 0x10000) ? 1 : 2);
        }
    }
    // Need to recalc localized result here for forced ordering, ICU cannot do numericEquivalence
    if (!numericEquivalence && (NULL != locale) && (kCFCompareEqualTo != compareResult) && (str1Index == rangeToCompare.length) && (str2Index == str2Len)) compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(forcedIndex1, rangeToCompare.length - forcedIndex1), &inlineBuf2, CFRangeMake(forcedIndex2, str2Len - forcedIndex2), compareOptions, locale);

    if (freeLocale && locale) {
	CFRelease(locale);
    }

    return ((str1Index < rangeToCompare.length) ? kCFCompareGreaterThan : ((str2Index < str2Len) ? kCFCompareLessThan : compareResult));
}