void parse_unicode_data()

in GaiaXAndroidQuickJS/quickjs/gxquickjs/unicode_gen.c [284:419]


void parse_unicode_data(const char *filename)
{
    FILE *f;
    char line[1024];
    char buf1[256];
    const char *p;
    int code, lc, uc, last_code;
    CCInfo *ci, *tab = unicode_db;
    
    f = fopen(filename, "rb");
    if (!f) {
        perror(filename);
        exit(1);
    }

    last_code = 0;
    for(;;) {
        if (!get_line(line, sizeof(line), f))
            break;
        p = line;
        while (isspace(*p))
            p++;
        if (*p == '#')
            continue;

        p = get_field(line, 0);
        if (!p)
            continue;
        code = strtoul(p, NULL, 16);
        lc = 0;
        uc = 0;
        
        p = get_field(line, 12);
        if (p && *p != ';') {
            uc = strtoul(p, NULL, 16);
        }

        p = get_field(line, 13);
        if (p && *p != ';') {
            lc = strtoul(p, NULL, 16);
        }
        ci = &tab[code];
        if (uc > 0 || lc > 0) {
            assert(code <= CHARCODE_MAX);
            if (uc > 0) {
                assert(ci->u_len == 0);
                ci->u_len = 1;
                ci->u_data[0] = uc;
            }
            if (lc > 0) {
                assert(ci->l_len == 0);
                ci->l_len = 1;
                ci->l_data[0] = lc;
            }
        }

        {
            int i;
            get_field_buf(buf1, sizeof(buf1), line, 2);
            i = find_name(unicode_gc_name, countof(unicode_gc_name), buf1);
            if (i < 0) {
                fprintf(stderr, "General category '%s' not found\n",
                        buf1);
                exit(1);
            }
            ci->general_category = i;
        }
        
        p = get_field(line, 3);
        if (p && *p != ';' && *p != '\0') {
            int cc;
            cc = strtoul(p, NULL, 0);
            if (cc != 0) {
                assert(code <= CHARCODE_MAX);
                ci->combining_class = cc;
                //                printf("%05x: %d\n", code, ci->combining_class);
            }
        }

        p = get_field(line, 5);
        if (p && *p != ';' && *p != '\0') {
            int size;
            assert(code <= CHARCODE_MAX);
            ci->is_compat = 0;
            if (*p == '<') {
                while (*p != '\0' && *p != '>')
                    p++;
                if (*p == '>')
                    p++;
                ci->is_compat = 1;
            }
            size = 0;
            for(;;) {
                while (isspace(*p))
                    p++;
                if (!isxdigit(*p))
                    break;
                add_char(&ci->decomp_data, &size, &ci->decomp_len, strtoul(p, (char **)&p, 16));
            }
#if 0
            {
                int i;
                static int count, d_count;

                printf("%05x: %c", code, ci->is_compat ? 'C': ' ');
                for(i = 0; i < ci->decomp_len; i++)
                    printf(" %05x", ci->decomp_data[i]);
                printf("\n");
                count++;
                d_count += ci->decomp_len;
                //                printf("%d %d\n", count, d_count);
            }
#endif
        }

        p = get_field(line, 9);
        if (p && *p == 'Y') {
            set_prop(code, PROP_Bidi_Mirrored, 1);
        }
        
        /* handle ranges */
        get_field_buf(buf1, sizeof(buf1), line, 1);
        if (strstr(buf1, " Last>")) {
            int i;
            //            printf("range: 0x%x-%0x\n", last_code, code);
            assert(ci->decomp_len == 0);
            assert(ci->script_ext_len == 0);
            for(i = last_code + 1; i < code; i++) {
                unicode_db[i] = *ci;
            }
        }
        last_code = code;
    }
        
    fclose(f);
}