in Frameworks/CoreFoundation/String.subproj/CFString.c [5073:5375]
void CFStringNormalize(CFMutableStringRef string, CFStringNormalizationForm theForm) {
CFIndex currentIndex = 0;
CFIndex length;
bool needToReorder = true;
CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfNormalize:theForm);
__CFAssertIsStringAndMutable(string);
length = __CFStrLength(string);
if (__CFStrIsEightBit(string)) {
uint8_t *contents;
if (theForm == kCFStringNormalizationFormC) return; // 8bit form has no decomposition
contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
for (;currentIndex < length;currentIndex++) {
if (contents[currentIndex] > 127) {
__CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); // need to do harm way
needToReorder = false;
break;
}
}
}
if (currentIndex < length) {
UTF16Char *limit = (UTF16Char *)__CFStrContents(string) + length;
UTF16Char *contents = (UTF16Char *)__CFStrContents(string) + currentIndex;
UTF32Char buffer[MAX_DECOMP_BUF];
UTF32Char *mappedCharacters = buffer;
CFIndex allocatedLength = MAX_DECOMP_BUF;
CFIndex mappedLength;
CFIndex currentLength;
UTF32Char currentChar;
const uint8_t *decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
const uint8_t *nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
const uint8_t *combiningBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
while (contents < limit) {
if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
currentLength = 2;
contents += 2;
} else {
currentChar = *(contents++);
currentLength = 1;
}
mappedLength = 0;
if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16)))) && (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, ((currentChar < 0x10000) ? combiningBMP : (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16)))))) {
if ((theForm & kCFStringNormalizationFormC) == 0 || currentChar < HANGUL_SBASE || currentChar > (HANGUL_SBASE + HANGUL_SCOUNT)) { // We don't have to decompose Hangul Syllables if we're precomposing again
mappedLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters, MAX_DECOMP_BUF);
}
}
if ((needToReorder || (theForm & kCFStringNormalizationFormC)) && ((contents < limit) || (mappedLength == 0))) {
if (mappedLength > 0) {
if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
} else {
currentChar = *contents;
}
}
if (0 != CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) {
uint32_t decompLength;
if (mappedLength == 0) {
contents -= (currentChar & 0xFFFF0000 ? 2 : 1);
if (currentIndex > 0) {
if (CFUniCharIsSurrogateLowCharacter(*(contents - 1)) && (currentIndex > 1) && CFUniCharIsSurrogateHighCharacter(*(contents - 2))) {
*mappedCharacters = CFUniCharGetLongCharacterForSurrogatePair(*(contents - 2), *(contents - 1));
currentIndex -= 2;
currentLength += 2;
} else {
*mappedCharacters = *(contents - 1);
--currentIndex;
++currentLength;
}
mappedLength = 1;
}
} else {
currentLength += (currentChar & 0xFFFF0000 ? 2 : 1);
}
contents += (currentChar & 0xFFFF0000 ? 2 : 1);
if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
mappedLength += decompLength;
} else {
mappedCharacters[mappedLength++] = currentChar;
}
while (contents < limit) {
if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
} else {
currentChar = *contents;
}
if (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) break;
if (currentChar & 0xFFFF0000) {
contents += 2;
currentLength += 2;
} else {
++contents;
++currentLength;
}
if (mappedLength == allocatedLength) {
allocatedLength += MAX_DECOMP_BUF;
if (mappedCharacters == buffer) {
mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
} else {
mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
}
}
if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
mappedLength += decompLength;
} else {
mappedCharacters[mappedLength++] = currentChar;
}
}
}
if (needToReorder && mappedLength > 1) CFUniCharPrioritySort(mappedCharacters, mappedLength);
}
if (theForm & kCFStringNormalizationFormKD) {
CFIndex newLength = 0;
if (mappedLength == 0 && CFUniCharIsMemberOf(currentChar, kCFUniCharCompatibilityDecomposableCharacterSet)) {
mappedCharacters[mappedLength++] = currentChar;
}
while (newLength < mappedLength) {
newLength = CFUniCharCompatibilityDecompose(mappedCharacters, mappedLength, allocatedLength);
if (newLength == 0) {
allocatedLength += MAX_DECOMP_BUF;
if (mappedCharacters == buffer) {
mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
} else {
mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
}
}
}
mappedLength = newLength;
}
if (theForm & kCFStringNormalizationFormC) {
UTF32Char nextChar;
if (mappedLength > 1) {
CFIndex consumedLength = 1;
UTF32Char *currentBase = mappedCharacters;
uint8_t currentClass, lastClass = 0;
bool didCombine = false;
currentChar = *mappedCharacters;
while (consumedLength < mappedLength) {
nextChar = mappedCharacters[consumedLength];
currentClass = CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)((nextChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))));
if (theForm & kCFStringNormalizationFormKD) {
if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) {
SInt8 lIndex = currentChar - HANGUL_LBASE;
if ((0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
SInt16 vIndex = nextChar - HANGUL_VBASE;
if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
SInt16 tIndex = 0;
CFIndex usedLength = mappedLength;
mappedCharacters[consumedLength++] = 0xFFFD;
if (consumedLength < mappedLength) {
tIndex = mappedCharacters[consumedLength] - HANGUL_TBASE;
if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
tIndex = 0;
} else {
mappedCharacters[consumedLength++] = 0xFFFD;
}
}
*currentBase = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
while (--usedLength > 0) {
if (mappedCharacters[usedLength] == 0xFFFD) {
--mappedLength;
--consumedLength;
memmove(mappedCharacters + usedLength, mappedCharacters + usedLength + 1, (mappedLength - usedLength) * sizeof(UTF32Char));
}
}
currentBase = mappedCharacters + consumedLength;
currentChar = *currentBase;
++consumedLength;
continue;
}
}
}
if (!CFUniCharIsMemberOfBitmap(nextChar, ((nextChar < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))))) {
*currentBase = currentChar;
currentBase = mappedCharacters + consumedLength;
currentChar = nextChar;
++consumedLength;
continue;
}
}
if ((lastClass == 0) || (currentClass > lastClass)) {
nextChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
if (nextChar == 0xFFFD) {
lastClass = currentClass;
} else {
mappedCharacters[consumedLength] = 0xFFFD;
didCombine = true;
currentChar = nextChar;
}
}
++consumedLength;
}
*currentBase = currentChar;
if (didCombine) {
consumedLength = mappedLength;
while (--consumedLength > 0) {
if (mappedCharacters[consumedLength] == 0xFFFD) {
--mappedLength;
memmove(mappedCharacters + consumedLength, mappedCharacters + consumedLength + 1, (mappedLength - consumedLength) * sizeof(UTF32Char));
}
}
}
} else if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo
SInt8 lIndex = currentChar - HANGUL_LBASE;
if ((contents < limit) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
SInt16 vIndex = *contents - HANGUL_VBASE;
if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
SInt16 tIndex = 0;
++contents; ++currentLength;
if (contents < limit) {
tIndex = *contents - HANGUL_TBASE;
if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
tIndex = 0;
} else {
++contents; ++currentLength;
}
}
*mappedCharacters = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
mappedLength = 1;
}
}
} else { // collect class 0 non-base characters
while (contents < limit) {
nextChar = *contents;
if (CFUniCharIsSurrogateHighCharacter(nextChar) && ((contents + 1) < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
nextChar = CFUniCharGetLongCharacterForSurrogatePair(nextChar, *(contents + 1));
if (!CFUniCharIsMemberOfBitmap(nextChar, (const uint8_t *)CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))))) break;
} else {
if (!CFUniCharIsMemberOfBitmap(nextChar, nonBaseBMP) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, combiningBMP))) break;
}
currentChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
if (0xFFFD == currentChar) break;
if (nextChar < 0x10000) {
++contents; ++currentLength;
} else {
contents += 2;
currentLength += 2;
}
*mappedCharacters = currentChar;
mappedLength = 1;
}
}
}
if (mappedLength > 0) {
CFIndex utf16Length = __CFGetUTF16Length(mappedCharacters, mappedLength);
if (utf16Length != currentLength) {
__CFStringChangeSize(string, CFRangeMake(currentIndex, currentLength), utf16Length, true);
currentLength = utf16Length;
}
contents = (UTF16Char *)__CFStrContents(string);
limit = contents + __CFStrLength(string);
contents += currentIndex;
__CFFillInUTF16(mappedCharacters, contents, mappedLength);
contents += utf16Length;
}
currentIndex += currentLength;
}
if (mappedCharacters != buffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, mappedCharacters);
}
}