in src/couch_quickjs/quickjs/libunicode.c [1168:1239]
int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
UnicodeNormalizationEnum n_type,
void *opaque, DynBufReallocFunc *realloc_func)
{
int *buf, buf_len, i, p, starter_pos, cc, last_cc, out_len;
BOOL is_compat;
DynBuf dbuf_s, *dbuf = &dbuf_s;
is_compat = n_type >> 1;
dbuf_init2(dbuf, opaque, realloc_func);
if (dbuf_realloc(dbuf, sizeof(int) * src_len))
goto fail;
/* common case: latin1 is unaffected by NFC */
if (n_type == UNICODE_NFC) {
for(i = 0; i < src_len; i++) {
if (src[i] >= 0x100)
goto not_latin1;
}
buf = (int *)dbuf->buf;
memcpy(buf, src, src_len * sizeof(int));
*pdst = (uint32_t *)buf;
return src_len;
not_latin1: ;
}
to_nfd_rec(dbuf, (const int *)src, src_len, is_compat);
if (dbuf_error(dbuf)) {
fail:
*pdst = NULL;
return -1;
}
buf = (int *)dbuf->buf;
buf_len = dbuf->size / sizeof(int);
sort_cc(buf, buf_len);
if (buf_len <= 1 || (n_type & 1) != 0) {
/* NFD / NFKD */
*pdst = (uint32_t *)buf;
return buf_len;
}
i = 1;
out_len = 1;
while (i < buf_len) {
/* find the starter character and test if it is blocked from
the character at 'i' */
last_cc = unicode_get_cc(buf[i]);
starter_pos = out_len - 1;
while (starter_pos >= 0) {
cc = unicode_get_cc(buf[starter_pos]);
if (cc == 0)
break;
if (cc >= last_cc)
goto next;
last_cc = 256;
starter_pos--;
}
if (starter_pos >= 0 &&
(p = compose_pair(buf[starter_pos], buf[i])) != 0) {
buf[starter_pos] = p;
i++;
} else {
next:
buf[out_len++] = buf[i++];
}
}
*pdst = (uint32_t *)buf;
return out_len;
}