in mysql_strings/uca-dump.cc [63:202]
static int load_uca_file(MY_UCA *uca, size_t maxchar, int *pageloaded) {
char str[512];
size_t lineno, out_of_range_chars = 0;
char *weights[MY_UCA_MAXWEIGHT_TO_PARSE];
for (lineno = 0; fgets(str, sizeof(str), stdin); lineno++) {
char *comment;
char *weight;
char *s, *ch[MY_UCA_MAX_CONTRACTION];
size_t codenum, i, code;
MY_UCA_ITEM *item = NULL;
/* Skip comment lines */
if (*str == '\r' || *str == '\n' || *str == '#') continue;
/* Detect version */
if (*str == '@' && !strncmp(str, "@version ", 9)) {
const char *value;
if (strtok(str, " \r\n\t") && (value = strtok(NULL, " \r\n\t")))
snprintf(uca->version, MY_UCA_VERSION_SIZE, value);
continue;
}
/* Skip big characters */
if ((code = strtol(str, NULL, 16)) > maxchar) {
out_of_range_chars++;
continue;
}
if ((comment = strchr(str, '#'))) {
*comment++ = '\0';
for (; *comment == ' '; comment++)
;
} else {
fprintf(stderr, "Warning: could not parse line #%d:\n'%s'\n", lineno,
str);
continue;
}
if ((weight = strchr(str, ';'))) {
*weight++ = '\0';
for (; *weight == ' '; weight++)
;
} else {
fprintf(stderr, "Warning: could not parse line #%d:\n%s\n", lineno, str);
continue;
}
for (codenum = 0, s = strtok(str, " \t"); s;
codenum++, s = strtok(NULL, " \t")) {
if (codenum == MY_UCA_MAX_CONTRACTION) {
fprintf(stderr, "Contraction length is too long (%d) line #%d", codenum,
lineno);
exit(1);
}
ch[codenum] = s;
ch[codenum + 1] = 0;
}
if (codenum > 1) {
MY_UCA_CONTRACTION *c = &uca->contraction[uca->ncontractions++];
size_t i;
/* Multi-character weight (contraction) - not supported yet. */
if (uca->ncontractions >= MY_UCA_NCONTRACTIONS) {
fprintf(stderr,
"Too many contractions (%d) at line #%d\n"
"Rebuild with a bigger MY_UCA_MAXCONTRACTIONS value\n",
uca->ncontractions, lineno);
exit(1);
}
/* Copy codepoints of the contraction parts */
for (i = 0; i < MY_UCA_MAX_CONTRACTION; i++) {
c->ch[i] = (i < codenum) ? (uint)strtol(ch[i], NULL, 16) : 0;
}
if (uca->debug)
fprintf(stderr, "Contraction: %04X-%04X-%04X\n", c->ch[0], c->ch[1],
c->ch[2]);
item = &c->item;
} else {
item = &uca->item[code];
}
/*
Split weight string into separate weights
"[p1.s1.t1.q1][p2.s2.t2.q2][p3.s3.t3.q3]" ->
"p1.s1.t1.q1" "p2.s2.t2.q2" "p3.s3.t3.q3"
*/
item->num = 0;
s = strtok(weight, " []");
while (s) {
if (item->num >= MY_UCA_MAXWEIGHT_TO_PARSE) {
fprintf(stderr, "Line #%d has more than %d weights\n", lineno,
MY_UCA_MAXWEIGHT_TO_PARSE);
fprintf(stderr, "Can't continue.\n");
exit(1);
}
weights[item->num] = s;
s = strtok(NULL, " []");
item->num++;
}
for (i = 0; i < item->num; i++) {
size_t level = 0;
if (i >= MY_UCA_MAXWEIGHT_TO_DUMP) {
fprintf(stderr,
"Warning: at line %d: character %04X has"
" more than %d many weights (%d). "
"Skipping the extra weights.\n",
lineno, code, MY_UCA_MAXWEIGHT_TO_DUMP, item->num);
item->num = MY_UCA_MAXWEIGHT_TO_DUMP;
break;
}
for (s = weights[i]; *s;) {
char *endptr;
size_t part = strtol(s + 1, &endptr, 16);
if (i < MY_UCA_MAXWEIGHT_TO_DUMP) {
item->weight[level][i] = part;
} else {
fprintf(stderr, "Too many weights (%d) at line %d\n", i, lineno);
exit(1);
}
s = endptr;
level++;
}
}
/* Mark that a character from this page was loaded */
pageloaded[code >> MY_UCA_PSHIFT]++;
}
if (out_of_range_chars)
fprintf(stderr, "%d out-of-range characters skipped\n", out_of_range_chars);
return 0;
}