static int unicode_general_category1()

in src/couch_quickjs/quickjs/libunicode.c [1392:1451]


static int unicode_general_category1(CharRange *cr, uint32_t gc_mask)
{
    const uint8_t *p, *p_end;
    uint32_t c, c0, b, n, v;

    p = unicode_gc_table;
    p_end = unicode_gc_table + countof(unicode_gc_table);
    c = 0;
    /* Compressed range encoding:
       initial byte:
       bits 0..4: category number (special case 31)
       bits 5..7: range length (add 1)
       special case bits 5..7 == 7: read an extra byte
       - 00..7F: range length (add 7 + 1)
       - 80..BF: 6-bits plus extra byte for range length (add 7 + 128)
       - C0..FF: 6-bits plus 2 extra bytes for range length (add 7 + 128 + 16384)
     */
    while (p < p_end) {
        b = *p++;
        n = b >> 5;
        v = b & 0x1f;
        if (n == 7) {
            n = *p++;
            if (n < 128) {
                n += 7;
            } else if (n < 128 + 64) {
                n = (n - 128) << 8;
                n |= *p++;
                n += 7 + 128;
            } else {
                n = (n - 128 - 64) << 16;
                n |= *p++ << 8;
                n |= *p++;
                n += 7 + 128 + (1 << 14);
            }
        }
        c0 = c;
        c += n + 1;
        if (v == 31) {
            /* run of Lu / Ll */
            b = gc_mask & (M(Lu) | M(Ll));
            if (b != 0) {
                if (b == (M(Lu) | M(Ll))) {
                    goto add_range;
                } else {
                    c0 += ((gc_mask & M(Ll)) != 0);
                    for(; c0 < c; c0 += 2) {
                        if (cr_add_interval(cr, c0, c0 + 1))
                            return -1;
                    }
                }
            }
        } else if ((gc_mask >> v) & 1) {
        add_range:
            if (cr_add_interval(cr, c0, c))
                return -1;
        }
    }
    return 0;
}