16 #include "mysys_priv.h"
17 #include "mysys_err.h"
36 return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname));
41 get_collation_number_internal(
const char *
name)
44 for (cs= all_charsets;
45 cs < all_charsets + array_elements(all_charsets);
48 if ( cs[0] && cs[0]->name &&
49 !my_strcasecmp(&my_charset_latin1, cs[0]->name, name))
62 if (!(cs->state_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
65 if (!(cs->ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
68 state_map= cs->state_map;
69 ident_map= cs->ident_map;
72 for (i=0; i < 256 ; i++)
75 state_map[
i]=(uchar) MY_LEX_IDENT;
76 else if (my_isdigit(cs,i))
77 state_map[i]=(uchar) MY_LEX_NUMBER_IDENT;
78 #if defined(USE_MB) && defined(USE_MB_IDENT)
79 else if (my_mbcharlen(cs, i)>1)
80 state_map[i]=(uchar) MY_LEX_IDENT;
82 else if (my_isspace(cs,i))
83 state_map[i]=(uchar) MY_LEX_SKIP;
85 state_map[
i]=(uchar) MY_LEX_CHAR;
87 state_map[(uchar)
'_']=state_map[(uchar)
'$']=(uchar) MY_LEX_IDENT;
88 state_map[(uchar)
'\'']=(uchar) MY_LEX_STRING;
89 state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT;
90 state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP;
91 state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP;
92 state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL;
93 state_map[(uchar)'
#']=(uchar) MY_LEX_COMMENT;
94 state_map[(uchar)
';']=(uchar) MY_LEX_SEMICOLON;
95 state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR;
96 state_map[0]=(uchar) MY_LEX_EOL;
97 state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE;
98 state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT;
99 state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT;
100 state_map[(uchar)'@']= (uchar) MY_LEX_USER_END;
101 state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER;
102 state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER;
107 for (i=0; i < 256 ; i++)
109 ident_map[
i]= (uchar) (state_map[i] == MY_LEX_IDENT ||
110 state_map[i] == MY_LEX_NUMBER_IDENT);
114 state_map[(uchar)
'x']= state_map[(uchar)
'X']= (uchar) MY_LEX_IDENT_OR_HEX;
115 state_map[(uchar)
'b']= state_map[(uchar)
'B']= (uchar) MY_LEX_IDENT_OR_BIN;
116 state_map[(uchar)
'n']= state_map[(uchar)
'N']= (uchar) MY_LEX_IDENT_OR_NCHAR;
123 if (cs->state & MY_CS_BINSORT)
124 cs->coll= &my_collation_8bit_bin_handler;
126 cs->coll= &my_collation_8bit_simple_ci_handler;
128 cs->cset= &my_charset_8bit_handler;
135 to->number= from->number ? from->number : to->number;
138 if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME))))
142 if (!(to->name= my_once_strdup(from->name,MYF(MY_WME))))
146 if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME))))
151 if (!(to->ctype= (uchar*) my_once_memdup((
char*) from->ctype,
152 MY_CS_CTYPE_TABLE_SIZE,
155 if (init_state_maps(to))
159 if (!(to->to_lower= (uchar*) my_once_memdup((
char*) from->to_lower,
160 MY_CS_TO_LOWER_TABLE_SIZE,
165 if (!(to->to_upper= (uchar*) my_once_memdup((
char*) from->to_upper,
166 MY_CS_TO_UPPER_TABLE_SIZE,
169 if (from->sort_order)
171 if (!(to->sort_order= (uchar*) my_once_memdup((
char*) from->sort_order,
172 MY_CS_SORT_ORDER_TABLE_SIZE,
177 if (from->tab_to_uni)
179 uint sz= MY_CS_TO_UNI_TABLE_SIZE*
sizeof(uint16);
180 if (!(to->tab_to_uni= (uint16*) my_once_memdup((
char*)from->tab_to_uni,
185 if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME))))
198 return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
200 (cs->number && cs->name &&
201 (cs->sort_order || (cs->state & MY_CS_BINSORT) )));
208 to->cset= from->cset;
209 to->coll= from->coll;
210 to->strxfrm_multiply= from->strxfrm_multiply;
211 to->min_sort_char= from->min_sort_char;
212 to->max_sort_char= from->max_sort_char;
213 to->mbminlen= from->mbminlen;
214 to->mbmaxlen= from->mbmaxlen;
215 to->caseup_multiply= from->caseup_multiply;
216 to->casedn_multiply= from->casedn_multiply;
217 to->state|= MY_CS_AVAILABLE | MY_CS_LOADED |
218 MY_CS_STRNXFRM | MY_CS_UNICODE;
224 if (cs->name && (cs->number ||
225 (cs->number=get_collation_number_internal(cs->name))) &&
226 cs->number < array_elements(all_charsets))
228 if (!all_charsets[cs->number])
230 if (!(all_charsets[cs->number]=
233 memset(all_charsets[cs->number], 0,
sizeof(
CHARSET_INFO));
236 if (cs->primary_number == cs->number)
237 cs->state |= MY_CS_PRIMARY;
239 if (cs->binary_number == cs->number)
240 cs->state |= MY_CS_BINSORT;
242 all_charsets[cs->number]->state|= cs->state;
244 if (!(all_charsets[cs->number]->state & MY_CS_COMPILED))
247 if (cs_copy_data(all_charsets[cs->number],cs))
250 newcs->caseup_multiply= newcs->casedn_multiply= 1;
251 newcs->levels_for_compare= 1;
252 newcs->levels_for_order= 1;
254 if (!strcmp(cs->csname,
"ucs2") )
256 #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
257 copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci);
258 newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
261 else if (!strcmp(cs->csname,
"utf8") || !strcmp(cs->csname,
"utf8mb3"))
263 #if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS)
264 copy_uca_collation(newcs, &my_charset_utf8_unicode_ci);
265 newcs->ctype= my_charset_utf8_unicode_ci.ctype;
266 if (init_state_maps(newcs))
270 else if (!strcmp(cs->csname,
"utf8mb4"))
272 #if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS)
273 copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci);
274 newcs->ctype= my_charset_utf8mb4_unicode_ci.ctype;
275 newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
278 else if (!strcmp(cs->csname,
"utf16"))
280 #if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
281 copy_uca_collation(newcs, &my_charset_utf16_unicode_ci);
282 newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
285 else if (!strcmp(cs->csname,
"utf32"))
287 #if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
288 copy_uca_collation(newcs, &my_charset_utf32_unicode_ci);
289 newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
294 uchar *sort_order= all_charsets[cs->number]->sort_order;
295 simple_cs_init_functions(all_charsets[cs->number]);
298 if (simple_cs_is_full(all_charsets[cs->number]))
300 all_charsets[cs->number]->state |= MY_CS_LOADED;
302 all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
310 if (sort_order && sort_order[
'A'] < sort_order[
'a'] &&
311 sort_order[
'a'] < sort_order[
'B'])
312 all_charsets[cs->number]->state|= MY_CS_CSSORT;
314 if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
315 all_charsets[cs->number]->state|= MY_CS_PUREASCII;
316 if (!my_charset_is_ascii_compatible(cs))
317 all_charsets[cs->number]->state|= MY_CS_NONASCII;
332 dst->number= cs->number;
334 if (!(dst->comment= my_once_strdup(cs->comment,MYF(MY_WME))))
337 if (!(dst->csname= my_once_strdup(cs->csname,MYF(MY_WME))))
340 if (!(dst->name= my_once_strdup(cs->name,MYF(MY_WME))))
344 cs->primary_number= 0;
345 cs->binary_number= 0;
348 cs->sort_order= NULL;
360 default_reporter(
enum loglevel
level __attribute__ ((unused)),
361 const char *format __attribute__ ((unused)),
365 my_error_reporter my_charset_error_reporter= default_reporter;
373 my_once_alloc_c(
size_t size)
374 {
return my_once_alloc(size, MYF(MY_WME)); }
378 my_malloc_c(
size_t size)
379 {
return my_malloc(size, MYF(MY_WME)); }
383 my_realloc_c(
void *old,
size_t size)
384 {
return my_realloc(old, size, MYF(MY_WME)); }
394 loader->error[0]=
'\0';
395 loader->once_alloc= my_once_alloc_c;
396 loader->malloc= my_malloc_c;
397 loader->realloc= my_realloc_c;
398 loader->free= my_free;
399 loader->reporter= my_charset_error_reporter;
400 loader->add_collation= add_collation;
404 #define MY_MAX_ALLOWED_BUF 1024*1024
405 #define MY_CHARSET_INDEX "Index.xml"
407 const char *charsets_dir= NULL;
412 const char *filename,
420 if (!my_stat(filename, &stat_info, MYF(myflags)) ||
421 ((len= (uint)stat_info.st_size) > MY_MAX_ALLOWED_BUF) ||
422 !(buf= (uchar*) my_malloc(len,myflags)))
425 if ((fd=
mysql_file_open(key_file_charset, filename, O_RDONLY, myflags)) < 0)
432 if (my_parse_charset_xml(loader, (
char *) buf, len))
434 my_printf_error(EE_UNKNOWN_CHARSET,
"Error while parsing '%s': %s\n",
435 MYF(0), filename, loader->error);
448 char *get_charsets_dir(
char *buf)
450 const char *sharedir= SHAREDIR;
452 DBUG_ENTER(
"get_charsets_dir");
454 if (charsets_dir != NULL)
455 strmake(buf, charsets_dir, FN_REFLEN-1);
458 if (test_if_hard_path(sharedir) ||
459 is_prefix(sharedir, DEFAULT_CHARSET_HOME))
460 strxmov(buf, sharedir,
"/", CHARSET_DIR, NullS);
462 strxmov(buf, DEFAULT_CHARSET_HOME,
"/", sharedir,
"/", CHARSET_DIR,
465 res= convert_dirname(buf,buf,NullS);
466 DBUG_PRINT(
"info",(
"charsets dir: '%s'", buf));
470 CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE]={NULL};
471 CHARSET_INFO *default_charset_info = &my_charset_latin1;
475 DBUG_ASSERT(cs->number < array_elements(all_charsets));
476 all_charsets[cs->number]= cs;
477 cs->state|= MY_CS_AVAILABLE;
481 static my_pthread_once_t charsets_initialized= MY_PTHREAD_ONCE_INIT;
482 static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT;
484 static void init_available_charsets(
void)
486 char fname[FN_REFLEN +
sizeof(MY_CHARSET_INDEX)];
490 memset(&all_charsets, 0,
sizeof(all_charsets));
491 init_compiled_charsets(MYF(0));
494 for (cs=all_charsets;
495 cs < all_charsets+array_elements(all_charsets)-1 ;
501 if (init_state_maps(*cs))
506 my_charset_loader_init_mysys(&loader);
507 strmov(get_charsets_dir(fname), MY_CHARSET_INDEX);
508 my_read_charset_file(&loader, fname, MYF(0));
512 void free_charsets(
void)
514 charsets_initialized= charsets_template;
519 get_collation_name_alias(
const char *name,
char *buf,
size_t bufsize)
521 if (!strncasecmp(name,
"utf8mb3_", 8))
523 my_snprintf(buf, bufsize,
"utf8_%s", name + 8);
530 uint get_collation_number(
const char *name)
534 my_pthread_once(&charsets_initialized, init_available_charsets);
535 if ((
id= get_collation_number_internal(name)))
537 if ((name= get_collation_name_alias(name, alias,
sizeof(alias))))
538 return get_collation_number_internal(name);
544 get_charset_number_internal(
const char *charset_name, uint cs_flags)
548 for (cs= all_charsets;
549 cs < all_charsets + array_elements(all_charsets);
552 if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) &&
553 !my_strcasecmp(&my_charset_latin1, cs[0]->csname, charset_name))
554 return cs[0]->number;
561 get_charset_name_alias(
const char *name)
563 if (!my_strcasecmp(&my_charset_latin1, name,
"utf8mb3"))
569 uint get_charset_number(
const char *charset_name, uint cs_flags)
572 my_pthread_once(&charsets_initialized, init_available_charsets);
573 if ((
id= get_charset_number_internal(charset_name, cs_flags)))
575 if ((charset_name= get_charset_name_alias(charset_name)))
576 return get_charset_number_internal(charset_name, cs_flags);
581 const char *get_charset_name(uint charset_number)
583 my_pthread_once(&charsets_initialized, init_available_charsets);
585 if (charset_number < array_elements(all_charsets))
589 if (cs && (cs->number == charset_number) && cs->name)
590 return (
char*) cs->name;
603 DBUG_ASSERT(cs_number < array_elements(all_charsets));
605 if ((cs= all_charsets[cs_number]))
607 if (cs->state & MY_CS_READY)
616 if (!(cs->state & (MY_CS_COMPILED|MY_CS_LOADED)))
619 strxmov(get_charsets_dir(buf), cs->csname,
".xml", NullS);
620 my_charset_loader_init_mysys(&loader);
621 my_read_charset_file(&loader, buf, flags);
624 if (cs->state & MY_CS_AVAILABLE)
626 if (!(cs->state & MY_CS_READY))
628 if ((cs->cset->init && cs->cset->init(cs, loader)) ||
629 (cs->coll->init && cs->coll->init(cs, loader)))
634 cs->state|= MY_CS_READY;
651 if (cs_number == default_charset_info->number)
652 return default_charset_info;
654 my_pthread_once(&charsets_initialized, init_available_charsets);
656 if (cs_number >= array_elements(all_charsets))
659 my_charset_loader_init_mysys(&loader);
660 cs= get_internal_charset(&loader, cs_number, flags);
662 if (!cs && (flags & MY_WME))
664 char index_file[FN_REFLEN +
sizeof(MY_CHARSET_INDEX)], cs_string[23];
665 strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
667 int10_to_str(cs_number, cs_string+1, 10);
668 my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);
685 const char *name, myf flags)
689 my_pthread_once(&charsets_initialized, init_available_charsets);
691 cs_number= get_collation_number(name);
692 my_charset_loader_init_mysys(loader);
693 cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL;
695 if (!cs && (flags & MY_WME))
697 char index_file[FN_REFLEN +
sizeof(MY_CHARSET_INDEX)];
698 strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
699 my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), name, index_file);
705 CHARSET_INFO *get_charset_by_name(
const char *cs_name, myf flags)
708 my_charset_loader_init_mysys(&loader);
709 return my_collation_get_by_name(&loader, cs_name, flags);
724 const char *cs_name, uint cs_flags, myf flags)
728 DBUG_ENTER(
"get_charset_by_csname");
729 DBUG_PRINT(
"enter",(
"name: '%s'", cs_name));
731 my_pthread_once(&charsets_initialized, init_available_charsets);
733 cs_number= get_charset_number(cs_name, cs_flags);
734 cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL;
736 if (!cs && (flags & MY_WME))
738 char index_file[FN_REFLEN +
sizeof(MY_CHARSET_INDEX)];
739 strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
740 my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
748 get_charset_by_csname(
const char *cs_name, uint cs_flags, myf flags)
751 my_charset_loader_init_mysys(&loader);
752 return my_charset_get_by_name(&loader, cs_name, cs_flags, flags);
772 my_bool resolve_charset(
const char *cs_name,
776 *cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0));
804 my_bool resolve_collation(
const char *cl_name,
808 *cl= get_charset_by_name(cl_name, MYF(0));
845 size_t escape_string_for_mysql(
const CHARSET_INFO *charset_info,
846 char *to,
size_t to_length,
847 const char *from,
size_t length)
849 const char *to_start=
to;
850 const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
851 my_bool overflow= FALSE;
853 my_bool use_mb_flag= use_mb(charset_info);
855 for (end= from + length; from < end; from++)
860 if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
862 if (to + tmp_length > to_end)
883 if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1)
931 return overflow ? (size_t) -1 : (
size_t) (to - to_start);
935 #ifdef BACKSLASH_MBTAIL
943 GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE,
944 buf+2,
sizeof(buf)-3);
954 #ifdef HAVE_CHARSET_cp932
955 !strcmp(buf,
"cp932") ? &my_charset_cp932_japanese_ci :
959 return fs_cset_cache;
988 size_t escape_quotes_for_mysql(
CHARSET_INFO *charset_info,
989 char *to,
size_t to_length,
990 const char *from,
size_t length)
992 const char *to_start=
to;
993 const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
994 my_bool overflow= FALSE;
996 my_bool use_mb_flag= use_mb(charset_info);
998 for (end= from + length; from < end; from++)
1002 if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
1004 if (to + tmp_length > to_end)
1009 while (tmp_length--)
1022 if (to + 2 > to_end)
1032 if (to + 1 > to_end)
1041 return overflow ? (ulong)~0 : (ulong) (to - to_start);