int unicode_normalize()

in src/couch_quickjs/quickjs/libunicode.c [1168:1239]


int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
                      UnicodeNormalizationEnum n_type,
                      void *opaque, DynBufReallocFunc *realloc_func)
{
    int *buf, buf_len, i, p, starter_pos, cc, last_cc, out_len;
    BOOL is_compat;
    DynBuf dbuf_s, *dbuf = &dbuf_s;

    is_compat = n_type >> 1;

    dbuf_init2(dbuf, opaque, realloc_func);
    if (dbuf_realloc(dbuf, sizeof(int) * src_len))
        goto fail;

    /* common case: latin1 is unaffected by NFC */
    if (n_type == UNICODE_NFC) {
        for(i = 0; i < src_len; i++) {
            if (src[i] >= 0x100)
                goto not_latin1;
        }
        buf = (int *)dbuf->buf;
        memcpy(buf, src, src_len * sizeof(int));
        *pdst = (uint32_t *)buf;
        return src_len;
    not_latin1: ;
    }

    to_nfd_rec(dbuf, (const int *)src, src_len, is_compat);
    if (dbuf_error(dbuf)) {
    fail:
        *pdst = NULL;
        return -1;
    }
    buf = (int *)dbuf->buf;
    buf_len = dbuf->size / sizeof(int);

    sort_cc(buf, buf_len);

    if (buf_len <= 1 || (n_type & 1) != 0) {
        /* NFD / NFKD */
        *pdst = (uint32_t *)buf;
        return buf_len;
    }

    i = 1;
    out_len = 1;
    while (i < buf_len) {
        /* find the starter character and test if it is blocked from
           the character at 'i' */
        last_cc = unicode_get_cc(buf[i]);
        starter_pos = out_len - 1;
        while (starter_pos >= 0) {
            cc = unicode_get_cc(buf[starter_pos]);
            if (cc == 0)
                break;
            if (cc >= last_cc)
                goto next;
            last_cc = 256;
            starter_pos--;
        }
        if (starter_pos >= 0 &&
            (p = compose_pair(buf[starter_pos], buf[i])) != 0) {
            buf[starter_pos] = p;
            i++;
        } else {
        next:
            buf[out_len++] = buf[i++];
        }
    }
    *pdst = (uint32_t *)buf;
    return out_len;
}