int cr_regexp_canonicalize()

in src/couch_quickjs/quickjs/libunicode.c [657:739]


int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode)
{
    CharRange cr_inter, cr_mask, cr_result, cr_sub;
    uint32_t v, code, len, i, idx, start, end, c, d_start, d_end, d;

    cr_init(&cr_mask, cr->mem_opaque, cr->realloc_func);
    cr_init(&cr_inter, cr->mem_opaque, cr->realloc_func);
    cr_init(&cr_result, cr->mem_opaque, cr->realloc_func);
    cr_init(&cr_sub, cr->mem_opaque, cr->realloc_func);

    if (unicode_case1(&cr_mask, is_unicode ? CASE_F : CASE_U))
        goto fail;
    if (cr_op(&cr_inter, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
        goto fail;

    if (cr_invert(&cr_mask))
        goto fail;
    if (cr_op(&cr_sub, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
        goto fail;

    /* cr_inter = cr & cr_mask */
    /* cr_sub = cr & ~cr_mask */

    /* use the case conversion table to compute the result */
    d_start = -1;
    d_end = -1;
    idx = 0;
    v = case_conv_table1[idx];
    code = v >> (32 - 17);
    len = (v >> (32 - 17 - 7)) & 0x7f;
    for(i = 0; i < cr_inter.len; i += 2) {
        start = cr_inter.points[i];
        end = cr_inter.points[i + 1];

        for(c = start; c < end; c++) {
            for(;;) {
                if (c >= code && c < code + len)
                    break;
                idx++;
                assert(idx < countof(case_conv_table1));
                v = case_conv_table1[idx];
                code = v >> (32 - 17);
                len = (v >> (32 - 17 - 7)) & 0x7f;
            }
            d = lre_case_folding_entry(c, idx, v, is_unicode);
            /* try to merge with the current interval */
            if (d_start == -1) {
                d_start = d;
                d_end = d + 1;
            } else if (d_end == d) {
                d_end++;
            } else {
                cr_add_interval(&cr_result, d_start, d_end);
                d_start = d;
                d_end = d + 1;
            }
        }
    }
    if (d_start != -1) {
        if (cr_add_interval(&cr_result, d_start, d_end))
            goto fail;
    }

    /* the resulting ranges are not necessarily sorted and may overlap */
    cr_sort_and_remove_overlap(&cr_result);

    /* or with the character not affected by the case folding */
    cr->len = 0;
    if (cr_op(cr, cr_result.points, cr_result.len, cr_sub.points, cr_sub.len, CR_OP_UNION))
        goto fail;

    cr_free(&cr_inter);
    cr_free(&cr_mask);
    cr_free(&cr_result);
    cr_free(&cr_sub);
    return 0;
 fail:
    cr_free(&cr_inter);
    cr_free(&cr_mask);
    cr_free(&cr_result);
    cr_free(&cr_sub);
    return -1;
}