static int load_uca_file()

in mysql_strings/uca-dump.cc [63:202]


static int load_uca_file(MY_UCA *uca, size_t maxchar, int *pageloaded) {
  char str[512];
  size_t lineno, out_of_range_chars = 0;
  char *weights[MY_UCA_MAXWEIGHT_TO_PARSE];

  for (lineno = 0; fgets(str, sizeof(str), stdin); lineno++) {
    char *comment;
    char *weight;
    char *s, *ch[MY_UCA_MAX_CONTRACTION];
    size_t codenum, i, code;
    MY_UCA_ITEM *item = NULL;

    /* Skip comment lines */
    if (*str == '\r' || *str == '\n' || *str == '#') continue;

    /* Detect version */
    if (*str == '@' && !strncmp(str, "@version ", 9)) {
      const char *value;
      if (strtok(str, " \r\n\t") && (value = strtok(NULL, " \r\n\t")))
        snprintf(uca->version, MY_UCA_VERSION_SIZE, value);
      continue;
    }

    /* Skip big characters */
    if ((code = strtol(str, NULL, 16)) > maxchar) {
      out_of_range_chars++;
      continue;
    }

    if ((comment = strchr(str, '#'))) {
      *comment++ = '\0';
      for (; *comment == ' '; comment++)
        ;
    } else {
      fprintf(stderr, "Warning: could not parse line #%d:\n'%s'\n", lineno,
              str);
      continue;
    }

    if ((weight = strchr(str, ';'))) {
      *weight++ = '\0';
      for (; *weight == ' '; weight++)
        ;
    } else {
      fprintf(stderr, "Warning: could not parse line #%d:\n%s\n", lineno, str);
      continue;
    }

    for (codenum = 0, s = strtok(str, " \t"); s;
         codenum++, s = strtok(NULL, " \t")) {
      if (codenum == MY_UCA_MAX_CONTRACTION) {
        fprintf(stderr, "Contraction length is too long (%d) line #%d", codenum,
                lineno);
        exit(1);
      }
      ch[codenum] = s;
      ch[codenum + 1] = 0;
    }

    if (codenum > 1) {
      MY_UCA_CONTRACTION *c = &uca->contraction[uca->ncontractions++];
      size_t i;
      /* Multi-character weight (contraction) - not supported yet. */

      if (uca->ncontractions >= MY_UCA_NCONTRACTIONS) {
        fprintf(stderr,
                "Too many contractions (%d) at line #%d\n"
                "Rebuild with a bigger MY_UCA_MAXCONTRACTIONS value\n",
                uca->ncontractions, lineno);
        exit(1);
      }
      /* Copy codepoints of the contraction parts */
      for (i = 0; i < MY_UCA_MAX_CONTRACTION; i++) {
        c->ch[i] = (i < codenum) ? (uint)strtol(ch[i], NULL, 16) : 0;
      }

      if (uca->debug)
        fprintf(stderr, "Contraction: %04X-%04X-%04X\n", c->ch[0], c->ch[1],
                c->ch[2]);
      item = &c->item;
    } else {
      item = &uca->item[code];
    }

    /*
      Split weight string into separate weights

      "[p1.s1.t1.q1][p2.s2.t2.q2][p3.s3.t3.q3]" ->

      "p1.s1.t1.q1" "p2.s2.t2.q2" "p3.s3.t3.q3"
    */
    item->num = 0;
    s = strtok(weight, " []");
    while (s) {
      if (item->num >= MY_UCA_MAXWEIGHT_TO_PARSE) {
        fprintf(stderr, "Line #%d has more than %d weights\n", lineno,
                MY_UCA_MAXWEIGHT_TO_PARSE);
        fprintf(stderr, "Can't continue.\n");
        exit(1);
      }
      weights[item->num] = s;
      s = strtok(NULL, " []");
      item->num++;
    }

    for (i = 0; i < item->num; i++) {
      size_t level = 0;

      if (i >= MY_UCA_MAXWEIGHT_TO_DUMP) {
        fprintf(stderr,
                "Warning: at line %d: character %04X has"
                " more than %d many weights (%d). "
                "Skipping the extra weights.\n",
                lineno, code, MY_UCA_MAXWEIGHT_TO_DUMP, item->num);
        item->num = MY_UCA_MAXWEIGHT_TO_DUMP;
        break;
      }

      for (s = weights[i]; *s;) {
        char *endptr;
        size_t part = strtol(s + 1, &endptr, 16);
        if (i < MY_UCA_MAXWEIGHT_TO_DUMP) {
          item->weight[level][i] = part;
        } else {
          fprintf(stderr, "Too many weights (%d) at line %d\n", i, lineno);
          exit(1);
        }
        s = endptr;
        level++;
      }
    }
    /* Mark that a character from this page was loaded */
    pageloaded[code >> MY_UCA_PSHIFT]++;
  }

  if (out_of_range_chars)
    fprintf(stderr, "%d out-of-range characters skipped\n", out_of_range_chars);

  return 0;
}