21 #include <my_global.h>
42 {
"Nl", _MY_NMR|_MY_U|_MY_L},
43 {
"No", _MY_NMR|_MY_PNT},
45 {
"Mn", _MY_L|_MY_PNT},
46 {
"Mc", _MY_L|_MY_PNT},
47 {
"Me", _MY_L|_MY_PNT},
76 ctypestr2num(
const char *tok)
79 for (p= my_ctype_name; p->name; p++)
81 if (!strncasecmp(p->name, tok, 2))
88 #define MAX_CHAR 0x10FFFF
89 #define MAX_DECOMPOSITION_LENGTH 2
96 char general_category[3];
98 int bidirectional_category;
99 uint decomposition_mapping[MAX_DECOMPOSITION_LENGTH];
100 uint decimal_digit_value;
104 char *unicode_1_0_name;
105 char *iso10646_comment_field;
106 uint uppercase_mapping;
107 uint lowercase_mapping;
108 uint titlecase_mapping;
130 p->maxchar= MAX_CHAR;
143 FILE *f= prm->fname ? fopen(prm->fname,
"r") : stdin;
146 fprintf(stderr,
"Can't open file %s\n", prm->fname);
150 while (fgets(str,
sizeof(str), f))
155 memset(&ch, 0,
sizeof(ch));
157 for(n= 0, s= str; s; n++)
159 char *end, tok[1024]=
"";
161 if((e= strchr(s,
';')))
163 strncpy(tok, s, (
unsigned int) (e - s));
171 end= tok + strlen(tok);
175 case 0: ch.code= strtol(tok, &end, 16);
break;
178 ch.general_category[0]= tok[0];
179 ch.general_category[1]= tok[1];
180 ch.general_category[2]=
'\0';
181 ch.mysql_ctype= ctypestr2num(tok);
185 ch.combining_class= atoi(tok);
197 for (dec= strtok_r(tok,
" \t", &endptr), i= 0;
199 dec= strtok_r(NULL,
" \t", &endptr), i++)
201 if (i >= MAX_DECOMPOSITION_LENGTH)
203 fprintf(stderr,
"Decomposition length is too long for character %04X\n", ch.code);
206 ch.decomposition_mapping[
i]= strtol(dec, NULL, 16);
212 ch.decimal_digit_value= atoi(tok);
216 ch.digit_value= atoi(tok);
231 if(ch.code <= prm->maxchar)
238 unidata_char_set_cjk(
MY_UNIDATA_CHAR *unidata,
int max_char,
int cur_char)
240 if (cur_char < max_char)
243 ch->mysql_ctype= _MY_L | _MY_U;
244 strcpy(ch->general_category,
"Lo");
254 for (i=
'0'; i <=
'9'; i++)
255 unidata[i].mysql_ctype= _MY_NMR;
257 for (i=
'a'; i <=
'z'; i++)
258 unidata[i].mysql_ctype|= _MY_X;
259 for (i=
'A'; i <=
'Z'; i++)
260 unidata[i].mysql_ctype|= _MY_X;
264 for(i= 0x3400; i <= 0x4DB5; i++)
265 unidata_char_set_cjk(unidata, prm->maxchar, i);
268 for(i= 0x4E00; i <= 0x9FA5; i++)
269 unidata_char_set_cjk(unidata, prm->maxchar, i);
272 for(i= 0xAC00; i <= 0xD7A3; i++)
273 unidata_char_set_cjk(unidata, prm->maxchar, i);
279 for (i= 0x20000; i <= 0x2A6D6; i++)
280 unidata_char_set_cjk(unidata, prm->maxchar, i);
286 for (i= 0x2A700; i <= 0x2B734; i++)
287 unidata_char_set_cjk(unidata, prm->maxchar, i);
318 for (i= 1; i < nchars; i++)
320 if (data[i].mysql_ctype != data->mysql_ctype)
323 return data->mysql_ctype;
330 int page, max_page= (prm->maxchar + 255) / 256;
333 printf(
" Unicode ctype data\n");
334 printf(
" Generated from %s\n", prm->fname ? prm->fname :
"stdin");
338 for(page= 0; page < max_page; page++)
340 if (page_ctype(unidata + page * 256, 256) < 0)
343 printf(
"static unsigned char uctype%s_page%02X[256]=\n{\n",
345 for(num= 0, charnum=0; charnum < 256; charnum++)
347 printf(
" %2d%s", unidata[page * 256 + charnum].mysql_ctype,
348 charnum < 255 ?
"," :
"");
360 printf(
"MY_UNI_CTYPE my_uni_ctype%s[%d]={\n", prm->varname, max_page);
361 for(page= 0; page < max_page; page++)
363 char page_name[128]=
"NULL";
365 if ((ctype= page_ctype(unidata + page * 256, 256)) < 0)
367 sprintf(page_name,
"uctype%s_page%02X", prm->varname, page);
370 printf(
"\t{%d,%s}%s\n", ctype, page_name, page < max_page - 1 ?
"," :
"");
390 uint pageno, uint nchars)
392 uint
i, ofs= pageno * 256;
393 printf(
"static MY_UNI_DECOMPOSITION decomp%s_p%02X[256]= {\n",
394 prm->varname, pageno);
395 for (i= 0; i < nchars; i++)
399 printf(
"/* %04X */ {0x%04X,0x%04X},",
400 ofs + i, ch->decomposition_mapping[0], ch->decomposition_mapping[1]);
402 if (ch->decomposition_mapping[0])
403 printf(
" %s/* [%s-%s][%d-%d] */",
404 ch->decomposition_mapping[0] < 0x10000 ?
" " :
"",
405 unidata[ch->decomposition_mapping[0]].general_category,
406 unidata[ch->decomposition_mapping[1]].general_category,
407 unidata[ch->decomposition_mapping[0]].combining_class,
408 unidata[ch->decomposition_mapping[1]].combining_class);
419 for (n= i= 0; i < nchars; i++)
421 if (unidata[i].decomposition_mapping[0])
431 int i, npages= (prm->maxchar + 255) / 256;
434 printf(
" Unicode canonical decomposition data\n");
435 printf(
" Generated from %s\n", prm->fname ? prm->fname :
"stdin");
439 for (i= 0; i < npages; i++)
442 if (calc_decompositions(page, 256))
443 dump_decomposition_page(prm, unidata, i, 256);
447 printf(
"static MY_UNI_DECOMPOSITION *my_uni_decomp%s[%d]=\n{\n",
448 prm->varname, npages);
449 for (i= 0; i < npages; i++)
452 if (calc_decompositions(page, 256))
453 printf(
"decom%s_p%02X,", prm->varname, i);
464 usage(FILE *f,
int rc)
471 get_int_option(
const char *str,
const char *
name,
int *num)
473 size_t namelen= strlen(name);
474 if (!strncmp(str, name, namelen))
476 const char *val= str + namelen;
477 if (val[0] ==
'0' && val[1] ==
'x')
479 *num= strtol(val, NULL, 16);
484 if (*num == 0 && *val !=
'0')
486 fprintf(stderr,
"\nBad numeric option value: %s\n\n", str);
497 get_const_str_option(
const char *str,
const char *name,
const char **val)
499 size_t namelen= strlen(name);
500 if (!strncmp(str, name, namelen))
513 unidata_param_init(prm);
514 for (i= 1; i < ac ; i++)
517 if (av[i][0] !=
'-' || av[i][1] !=
'-')
519 if (!get_const_str_option(av[i],
"--name=", &prm->varname) &&
520 !get_int_option(av[i],
"--maxchar=", &prm->maxchar) &&
521 !get_int_option(av[i],
"--ctype=", &prm->ctype) &&
522 !get_int_option(av[i],
"--decomp=", &prm->decomp) &&
523 !get_int_option(av[i],
"--debug=", &prm->debug))
525 fprintf(stderr,
"\nUnknown option: %s\n\n", av[i]);
533 int main(
int ac,
char ** av)
538 process_options(&prm, ac, av);
539 memset(unidata, 0,
sizeof(unidata));
540 fill_implicit_ctype(&prm, unidata);
541 load_unidata(&prm, unidata);
544 dump_ctype(&prm, unidata);
547 dump_decomposition(&prm, unidata);