MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
charset.c
1 /* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 #include "mysys_priv.h"
17 #include "mysys_err.h"
18 #include <m_ctype.h>
19 #include <m_string.h>
20 #include <my_dir.h>
21 #include <my_xml.h>
22 
23 
24 /*
25  The code below implements this functionality:
26 
27  - Initializing charset related structures
28  - Loading dynamic charsets
29  - Searching for a proper CHARSET_INFO
30  using charset name, collation name or collation ID
31  - Setting server default character set
32 */
33 
34 my_bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2)
35 {
36  return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname));
37 }
38 
39 
40 static uint
41 get_collation_number_internal(const char *name)
42 {
43  CHARSET_INFO **cs;
44  for (cs= all_charsets;
45  cs < all_charsets + array_elements(all_charsets);
46  cs++)
47  {
48  if ( cs[0] && cs[0]->name &&
49  !my_strcasecmp(&my_charset_latin1, cs[0]->name, name))
50  return cs[0]->number;
51  }
52  return 0;
53 }
54 
55 
56 static my_bool init_state_maps(CHARSET_INFO *cs)
57 {
58  uint i;
59  uchar *state_map;
60  uchar *ident_map;
61 
62  if (!(cs->state_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
63  return 1;
64 
65  if (!(cs->ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
66  return 1;
67 
68  state_map= cs->state_map;
69  ident_map= cs->ident_map;
70 
71  /* Fill state_map with states to get a faster parser */
72  for (i=0; i < 256 ; i++)
73  {
74  if (my_isalpha(cs,i))
75  state_map[i]=(uchar) MY_LEX_IDENT;
76  else if (my_isdigit(cs,i))
77  state_map[i]=(uchar) MY_LEX_NUMBER_IDENT;
78 #if defined(USE_MB) && defined(USE_MB_IDENT)
79  else if (my_mbcharlen(cs, i)>1)
80  state_map[i]=(uchar) MY_LEX_IDENT;
81 #endif
82  else if (my_isspace(cs,i))
83  state_map[i]=(uchar) MY_LEX_SKIP;
84  else
85  state_map[i]=(uchar) MY_LEX_CHAR;
86  }
87  state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT;
88  state_map[(uchar)'\'']=(uchar) MY_LEX_STRING;
89  state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT;
90  state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP;
91  state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP;
92  state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL;
93  state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT;
94  state_map[(uchar)';']=(uchar) MY_LEX_SEMICOLON;
95  state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR;
96  state_map[0]=(uchar) MY_LEX_EOL;
97  state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE;
98  state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT;
99  state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT;
100  state_map[(uchar)'@']= (uchar) MY_LEX_USER_END;
101  state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER;
102  state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER;
103 
104  /*
105  Create a second map to make it faster to find identifiers
106  */
107  for (i=0; i < 256 ; i++)
108  {
109  ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT ||
110  state_map[i] == MY_LEX_NUMBER_IDENT);
111  }
112 
113  /* Special handling of hex and binary strings */
114  state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX;
115  state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN;
116  state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR;
117  return 0;
118 }
119 
120 
121 static void simple_cs_init_functions(CHARSET_INFO *cs)
122 {
123  if (cs->state & MY_CS_BINSORT)
124  cs->coll= &my_collation_8bit_bin_handler;
125  else
126  cs->coll= &my_collation_8bit_simple_ci_handler;
127 
128  cs->cset= &my_charset_8bit_handler;
129 }
130 
131 
132 
133 static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
134 {
135  to->number= from->number ? from->number : to->number;
136 
137  if (from->csname)
138  if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME))))
139  goto err;
140 
141  if (from->name)
142  if (!(to->name= my_once_strdup(from->name,MYF(MY_WME))))
143  goto err;
144 
145  if (from->comment)
146  if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME))))
147  goto err;
148 
149  if (from->ctype)
150  {
151  if (!(to->ctype= (uchar*) my_once_memdup((char*) from->ctype,
152  MY_CS_CTYPE_TABLE_SIZE,
153  MYF(MY_WME))))
154  goto err;
155  if (init_state_maps(to))
156  goto err;
157  }
158  if (from->to_lower)
159  if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower,
160  MY_CS_TO_LOWER_TABLE_SIZE,
161  MYF(MY_WME))))
162  goto err;
163 
164  if (from->to_upper)
165  if (!(to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper,
166  MY_CS_TO_UPPER_TABLE_SIZE,
167  MYF(MY_WME))))
168  goto err;
169  if (from->sort_order)
170  {
171  if (!(to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order,
172  MY_CS_SORT_ORDER_TABLE_SIZE,
173  MYF(MY_WME))))
174  goto err;
175 
176  }
177  if (from->tab_to_uni)
178  {
179  uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16);
180  if (!(to->tab_to_uni= (uint16*) my_once_memdup((char*)from->tab_to_uni,
181  sz, MYF(MY_WME))))
182  goto err;
183  }
184  if (from->tailoring)
185  if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME))))
186  goto err;
187 
188  return 0;
189 
190 err:
191  return 1;
192 }
193 
194 
195 
196 static my_bool simple_cs_is_full(CHARSET_INFO *cs)
197 {
198  return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
199  cs->to_lower) &&
200  (cs->number && cs->name &&
201  (cs->sort_order || (cs->state & MY_CS_BINSORT) )));
202 }
203 
204 
205 static void
206 copy_uca_collation(CHARSET_INFO *to, CHARSET_INFO *from)
207 {
208  to->cset= from->cset;
209  to->coll= from->coll;
210  to->strxfrm_multiply= from->strxfrm_multiply;
211  to->min_sort_char= from->min_sort_char;
212  to->max_sort_char= from->max_sort_char;
213  to->mbminlen= from->mbminlen;
214  to->mbmaxlen= from->mbmaxlen;
215  to->caseup_multiply= from->caseup_multiply;
216  to->casedn_multiply= from->casedn_multiply;
217  to->state|= MY_CS_AVAILABLE | MY_CS_LOADED |
218  MY_CS_STRNXFRM | MY_CS_UNICODE;
219 }
220 
221 
222 static int add_collation(CHARSET_INFO *cs)
223 {
224  if (cs->name && (cs->number ||
225  (cs->number=get_collation_number_internal(cs->name))) &&
226  cs->number < array_elements(all_charsets))
227  {
228  if (!all_charsets[cs->number])
229  {
230  if (!(all_charsets[cs->number]=
231  (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0))))
232  return MY_XML_ERROR;
233  memset(all_charsets[cs->number], 0, sizeof(CHARSET_INFO));
234  }
235 
236  if (cs->primary_number == cs->number)
237  cs->state |= MY_CS_PRIMARY;
238 
239  if (cs->binary_number == cs->number)
240  cs->state |= MY_CS_BINSORT;
241 
242  all_charsets[cs->number]->state|= cs->state;
243 
244  if (!(all_charsets[cs->number]->state & MY_CS_COMPILED))
245  {
246  CHARSET_INFO *newcs= all_charsets[cs->number];
247  if (cs_copy_data(all_charsets[cs->number],cs))
248  return MY_XML_ERROR;
249 
250  newcs->caseup_multiply= newcs->casedn_multiply= 1;
251  newcs->levels_for_compare= 1;
252  newcs->levels_for_order= 1;
253 
254  if (!strcmp(cs->csname,"ucs2") )
255  {
256 #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
257  copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci);
258  newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
259 #endif
260  }
261  else if (!strcmp(cs->csname, "utf8") || !strcmp(cs->csname, "utf8mb3"))
262  {
263 #if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS)
264  copy_uca_collation(newcs, &my_charset_utf8_unicode_ci);
265  newcs->ctype= my_charset_utf8_unicode_ci.ctype;
266  if (init_state_maps(newcs))
267  return MY_XML_ERROR;
268 #endif
269  }
270  else if (!strcmp(cs->csname, "utf8mb4"))
271  {
272 #if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS)
273  copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci);
274  newcs->ctype= my_charset_utf8mb4_unicode_ci.ctype;
275  newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
276 #endif
277  }
278  else if (!strcmp(cs->csname, "utf16"))
279  {
280 #if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
281  copy_uca_collation(newcs, &my_charset_utf16_unicode_ci);
282  newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
283 #endif
284  }
285  else if (!strcmp(cs->csname, "utf32"))
286  {
287 #if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
288  copy_uca_collation(newcs, &my_charset_utf32_unicode_ci);
289  newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
290 #endif
291  }
292  else
293  {
294  uchar *sort_order= all_charsets[cs->number]->sort_order;
295  simple_cs_init_functions(all_charsets[cs->number]);
296  newcs->mbminlen= 1;
297  newcs->mbmaxlen= 1;
298  if (simple_cs_is_full(all_charsets[cs->number]))
299  {
300  all_charsets[cs->number]->state |= MY_CS_LOADED;
301  }
302  all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
303 
304  /*
305  Check if case sensitive sort order: A < a < B.
306  We need MY_CS_FLAG for regex library, and for
307  case sensitivity flag for 5.0 client protocol,
308  to support isCaseSensitive() method in JDBC driver
309  */
310  if (sort_order && sort_order['A'] < sort_order['a'] &&
311  sort_order['a'] < sort_order['B'])
312  all_charsets[cs->number]->state|= MY_CS_CSSORT;
313 
314  if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
315  all_charsets[cs->number]->state|= MY_CS_PUREASCII;
316  if (!my_charset_is_ascii_compatible(cs))
317  all_charsets[cs->number]->state|= MY_CS_NONASCII;
318  }
319  }
320  else
321  {
322  /*
323  We need the below to make get_charset_name()
324  and get_charset_number() working even if a
325  character set has not been really incompiled.
326  The above functions are used for example
327  in error message compiler extra/comp_err.c.
328  If a character set was compiled, this information
329  will get lost and overwritten in add_compiled_collation().
330  */
331  CHARSET_INFO *dst= all_charsets[cs->number];
332  dst->number= cs->number;
333  if (cs->comment)
334  if (!(dst->comment= my_once_strdup(cs->comment,MYF(MY_WME))))
335  return MY_XML_ERROR;
336  if (cs->csname)
337  if (!(dst->csname= my_once_strdup(cs->csname,MYF(MY_WME))))
338  return MY_XML_ERROR;
339  if (cs->name)
340  if (!(dst->name= my_once_strdup(cs->name,MYF(MY_WME))))
341  return MY_XML_ERROR;
342  }
343  cs->number= 0;
344  cs->primary_number= 0;
345  cs->binary_number= 0;
346  cs->name= NULL;
347  cs->state= 0;
348  cs->sort_order= NULL;
349  cs->state= 0;
350  }
351  return MY_XML_OK;
352 }
353 
354 
359 static void
360 default_reporter(enum loglevel level __attribute__ ((unused)),
361  const char *format __attribute__ ((unused)),
362  ...)
363 {
364 }
365 my_error_reporter my_charset_error_reporter= default_reporter;
366 
367 
372 static void *
373 my_once_alloc_c(size_t size)
374 { return my_once_alloc(size, MYF(MY_WME)); }
375 
376 
377 static void *
378 my_malloc_c(size_t size)
379 { return my_malloc(size, MYF(MY_WME)); }
380 
381 
382 static void *
383 my_realloc_c(void *old, size_t size)
384 { return my_realloc(old, size, MYF(MY_WME)); }
385 
386 
391 void
392 my_charset_loader_init_mysys(MY_CHARSET_LOADER *loader)
393 {
394  loader->error[0]= '\0';
395  loader->once_alloc= my_once_alloc_c;
396  loader->malloc= my_malloc_c;
397  loader->realloc= my_realloc_c;
398  loader->free= my_free;
399  loader->reporter= my_charset_error_reporter;
400  loader->add_collation= add_collation;
401 }
402 
403 
404 #define MY_MAX_ALLOWED_BUF 1024*1024
405 #define MY_CHARSET_INDEX "Index.xml"
406 
407 const char *charsets_dir= NULL;
408 
409 
410 static my_bool
411 my_read_charset_file(MY_CHARSET_LOADER *loader,
412  const char *filename,
413  myf myflags)
414 {
415  uchar *buf;
416  int fd;
417  size_t len, tmp_len;
418  MY_STAT stat_info;
419 
420  if (!my_stat(filename, &stat_info, MYF(myflags)) ||
421  ((len= (uint)stat_info.st_size) > MY_MAX_ALLOWED_BUF) ||
422  !(buf= (uchar*) my_malloc(len,myflags)))
423  return TRUE;
424 
425  if ((fd= mysql_file_open(key_file_charset, filename, O_RDONLY, myflags)) < 0)
426  goto error;
427  tmp_len= mysql_file_read(fd, buf, len, myflags);
428  mysql_file_close(fd, myflags);
429  if (tmp_len != len)
430  goto error;
431 
432  if (my_parse_charset_xml(loader, (char *) buf, len))
433  {
434  my_printf_error(EE_UNKNOWN_CHARSET, "Error while parsing '%s': %s\n",
435  MYF(0), filename, loader->error);
436  goto error;
437  }
438 
439  my_free(buf);
440  return FALSE;
441 
442 error:
443  my_free(buf);
444  return TRUE;
445 }
446 
447 
448 char *get_charsets_dir(char *buf)
449 {
450  const char *sharedir= SHAREDIR;
451  char *res;
452  DBUG_ENTER("get_charsets_dir");
453 
454  if (charsets_dir != NULL)
455  strmake(buf, charsets_dir, FN_REFLEN-1);
456  else
457  {
458  if (test_if_hard_path(sharedir) ||
459  is_prefix(sharedir, DEFAULT_CHARSET_HOME))
460  strxmov(buf, sharedir, "/", CHARSET_DIR, NullS);
461  else
462  strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,
463  NullS);
464  }
465  res= convert_dirname(buf,buf,NullS);
466  DBUG_PRINT("info",("charsets dir: '%s'", buf));
467  DBUG_RETURN(res);
468 }
469 
470 CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE]={NULL};
471 CHARSET_INFO *default_charset_info = &my_charset_latin1;
472 
473 void add_compiled_collation(CHARSET_INFO *cs)
474 {
475  DBUG_ASSERT(cs->number < array_elements(all_charsets));
476  all_charsets[cs->number]= cs;
477  cs->state|= MY_CS_AVAILABLE;
478 }
479 
480 
481 static my_pthread_once_t charsets_initialized= MY_PTHREAD_ONCE_INIT;
482 static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT;
483 
484 static void init_available_charsets(void)
485 {
486  char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
487  CHARSET_INFO **cs;
488  MY_CHARSET_LOADER loader;
489 
490  memset(&all_charsets, 0, sizeof(all_charsets));
491  init_compiled_charsets(MYF(0));
492 
493  /* Copy compiled charsets */
494  for (cs=all_charsets;
495  cs < all_charsets+array_elements(all_charsets)-1 ;
496  cs++)
497  {
498  if (*cs)
499  {
500  if (cs[0]->ctype)
501  if (init_state_maps(*cs))
502  *cs= NULL;
503  }
504  }
505 
506  my_charset_loader_init_mysys(&loader);
507  strmov(get_charsets_dir(fname), MY_CHARSET_INDEX);
508  my_read_charset_file(&loader, fname, MYF(0));
509 }
510 
511 
512 void free_charsets(void)
513 {
514  charsets_initialized= charsets_template;
515 }
516 
517 
518 static const char*
519 get_collation_name_alias(const char *name, char *buf, size_t bufsize)
520 {
521  if (!strncasecmp(name, "utf8mb3_", 8))
522  {
523  my_snprintf(buf, bufsize, "utf8_%s", name + 8);
524  return buf;
525  }
526  return NULL;
527 }
528 
529 
530 uint get_collation_number(const char *name)
531 {
532  uint id;
533  char alias[64];
534  my_pthread_once(&charsets_initialized, init_available_charsets);
535  if ((id= get_collation_number_internal(name)))
536  return id;
537  if ((name= get_collation_name_alias(name, alias, sizeof(alias))))
538  return get_collation_number_internal(name);
539  return 0;
540 }
541 
542 
543 static uint
544 get_charset_number_internal(const char *charset_name, uint cs_flags)
545 {
546  CHARSET_INFO **cs;
547 
548  for (cs= all_charsets;
549  cs < all_charsets + array_elements(all_charsets);
550  cs++)
551  {
552  if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) &&
553  !my_strcasecmp(&my_charset_latin1, cs[0]->csname, charset_name))
554  return cs[0]->number;
555  }
556  return 0;
557 }
558 
559 
560 static const char*
561 get_charset_name_alias(const char *name)
562 {
563  if (!my_strcasecmp(&my_charset_latin1, name, "utf8mb3"))
564  return "utf8";
565  return NULL;
566 }
567 
568 
569 uint get_charset_number(const char *charset_name, uint cs_flags)
570 {
571  uint id;
572  my_pthread_once(&charsets_initialized, init_available_charsets);
573  if ((id= get_charset_number_internal(charset_name, cs_flags)))
574  return id;
575  if ((charset_name= get_charset_name_alias(charset_name)))
576  return get_charset_number_internal(charset_name, cs_flags);
577  return 0;
578 }
579 
580 
581 const char *get_charset_name(uint charset_number)
582 {
583  my_pthread_once(&charsets_initialized, init_available_charsets);
584 
585  if (charset_number < array_elements(all_charsets))
586  {
587  CHARSET_INFO *cs= all_charsets[charset_number];
588 
589  if (cs && (cs->number == charset_number) && cs->name)
590  return (char*) cs->name;
591  }
592 
593  return "?"; /* this mimics find_type() */
594 }
595 
596 
597 static CHARSET_INFO *
598 get_internal_charset(MY_CHARSET_LOADER *loader, uint cs_number, myf flags)
599 {
600  char buf[FN_REFLEN];
601  CHARSET_INFO *cs;
602 
603  DBUG_ASSERT(cs_number < array_elements(all_charsets));
604 
605  if ((cs= all_charsets[cs_number]))
606  {
607  if (cs->state & MY_CS_READY) /* if CS is already initialized */
608  return cs;
609 
610  /*
611  To make things thread safe we are not allowing other threads to interfere
612  while we may changing the cs_info_table
613  */
614  mysql_mutex_lock(&THR_LOCK_charset);
615 
616  if (!(cs->state & (MY_CS_COMPILED|MY_CS_LOADED))) /* if CS is not in memory */
617  {
618  MY_CHARSET_LOADER loader;
619  strxmov(get_charsets_dir(buf), cs->csname, ".xml", NullS);
620  my_charset_loader_init_mysys(&loader);
621  my_read_charset_file(&loader, buf, flags);
622  }
623 
624  if (cs->state & MY_CS_AVAILABLE)
625  {
626  if (!(cs->state & MY_CS_READY))
627  {
628  if ((cs->cset->init && cs->cset->init(cs, loader)) ||
629  (cs->coll->init && cs->coll->init(cs, loader)))
630  {
631  cs= NULL;
632  }
633  else
634  cs->state|= MY_CS_READY;
635  }
636  }
637  else
638  cs= NULL;
639 
640  mysql_mutex_unlock(&THR_LOCK_charset);
641  }
642  return cs;
643 }
644 
645 
646 CHARSET_INFO *get_charset(uint cs_number, myf flags)
647 {
648  CHARSET_INFO *cs;
649  MY_CHARSET_LOADER loader;
650 
651  if (cs_number == default_charset_info->number)
652  return default_charset_info;
653 
654  my_pthread_once(&charsets_initialized, init_available_charsets);
655 
656  if (cs_number >= array_elements(all_charsets))
657  return NULL;
658 
659  my_charset_loader_init_mysys(&loader);
660  cs= get_internal_charset(&loader, cs_number, flags);
661 
662  if (!cs && (flags & MY_WME))
663  {
664  char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23];
665  strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
666  cs_string[0]='#';
667  int10_to_str(cs_number, cs_string+1, 10);
668  my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);
669  }
670  return cs;
671 }
672 
673 
683 CHARSET_INFO *
684 my_collation_get_by_name(MY_CHARSET_LOADER *loader,
685  const char *name, myf flags)
686 {
687  uint cs_number;
688  CHARSET_INFO *cs;
689  my_pthread_once(&charsets_initialized, init_available_charsets);
690 
691  cs_number= get_collation_number(name);
692  my_charset_loader_init_mysys(loader);
693  cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL;
694 
695  if (!cs && (flags & MY_WME))
696  {
697  char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
698  strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
699  my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), name, index_file);
700  }
701  return cs;
702 }
703 
704 
705 CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
706 {
707  MY_CHARSET_LOADER loader;
708  my_charset_loader_init_mysys(&loader);
709  return my_collation_get_by_name(&loader, cs_name, flags);
710 }
711 
712 
722 CHARSET_INFO *
723 my_charset_get_by_name(MY_CHARSET_LOADER *loader,
724  const char *cs_name, uint cs_flags, myf flags)
725 {
726  uint cs_number;
727  CHARSET_INFO *cs;
728  DBUG_ENTER("get_charset_by_csname");
729  DBUG_PRINT("enter",("name: '%s'", cs_name));
730 
731  my_pthread_once(&charsets_initialized, init_available_charsets);
732 
733  cs_number= get_charset_number(cs_name, cs_flags);
734  cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL;
735 
736  if (!cs && (flags & MY_WME))
737  {
738  char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
739  strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
740  my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
741  }
742 
743  DBUG_RETURN(cs);
744 }
745 
746 
747 CHARSET_INFO *
748 get_charset_by_csname(const char *cs_name, uint cs_flags, myf flags)
749 {
750  MY_CHARSET_LOADER loader;
751  my_charset_loader_init_mysys(&loader);
752  return my_charset_get_by_name(&loader, cs_name, cs_flags, flags);
753 }
754 
755 
772 my_bool resolve_charset(const char *cs_name,
773  const CHARSET_INFO *default_cs,
774  const CHARSET_INFO **cs)
775 {
776  *cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0));
777 
778  if (*cs == NULL)
779  {
780  *cs= default_cs;
781  return TRUE;
782  }
783 
784  return FALSE;
785 }
786 
787 
804 my_bool resolve_collation(const char *cl_name,
805  const CHARSET_INFO *default_cl,
806  const CHARSET_INFO **cl)
807 {
808  *cl= get_charset_by_name(cl_name, MYF(0));
809 
810  if (*cl == NULL)
811  {
812  *cl= default_cl;
813  return TRUE;
814  }
815 
816  return FALSE;
817 }
818 
819 
820 /*
821  Escape string with backslashes (\)
822 
823  SYNOPSIS
824  escape_string_for_mysql()
825  charset_info Charset of the strings
826  to Buffer for escaped string
827  to_length Length of destination buffer, or 0
828  from The string to escape
829  length The length of the string to escape
830 
831  DESCRIPTION
832  This escapes the contents of a string by adding backslashes before special
833  characters, and turning others into specific escape sequences, such as
834  turning newlines into \n and null bytes into \0.
835 
836  NOTE
837  To maintain compatibility with the old C API, to_length may be 0 to mean
838  "big enough"
839 
840  RETURN VALUES
841  (size_t) -1 The escaped string did not fit in the to buffer
842  # The length of the escaped string
843 */
844 
845 size_t escape_string_for_mysql(const CHARSET_INFO *charset_info,
846  char *to, size_t to_length,
847  const char *from, size_t length)
848 {
849  const char *to_start= to;
850  const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
851  my_bool overflow= FALSE;
852 #ifdef USE_MB
853  my_bool use_mb_flag= use_mb(charset_info);
854 #endif
855  for (end= from + length; from < end; from++)
856  {
857  char escape= 0;
858 #ifdef USE_MB
859  int tmp_length;
860  if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
861  {
862  if (to + tmp_length > to_end)
863  {
864  overflow= TRUE;
865  break;
866  }
867  while (tmp_length--)
868  *to++= *from++;
869  from--;
870  continue;
871  }
872  /*
873  If the next character appears to begin a multi-byte character, we
874  escape that first byte of that apparent multi-byte character. (The
875  character just looks like a multi-byte character -- if it were actually
876  a multi-byte character, it would have been passed through in the test
877  above.)
878 
879  Without this check, we can create a problem by converting an invalid
880  multi-byte character into a valid one. For example, 0xbf27 is not
881  a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
882  */
883  if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1)
884  escape= *from;
885  else
886 #endif
887  switch (*from) {
888  case 0: /* Must be escaped for 'mysql' */
889  escape= '0';
890  break;
891  case '\n': /* Must be escaped for logs */
892  escape= 'n';
893  break;
894  case '\r':
895  escape= 'r';
896  break;
897  case '\\':
898  escape= '\\';
899  break;
900  case '\'':
901  escape= '\'';
902  break;
903  case '"': /* Better safe than sorry */
904  escape= '"';
905  break;
906  case '\032': /* This gives problems on Win32 */
907  escape= 'Z';
908  break;
909  }
910  if (escape)
911  {
912  if (to + 2 > to_end)
913  {
914  overflow= TRUE;
915  break;
916  }
917  *to++= '\\';
918  *to++= escape;
919  }
920  else
921  {
922  if (to + 1 > to_end)
923  {
924  overflow= TRUE;
925  break;
926  }
927  *to++= *from;
928  }
929  }
930  *to= 0;
931  return overflow ? (size_t) -1 : (size_t) (to - to_start);
932 }
933 
934 
935 #ifdef BACKSLASH_MBTAIL
936 static CHARSET_INFO *fs_cset_cache= NULL;
937 
938 CHARSET_INFO *fs_character_set()
939 {
940  if (!fs_cset_cache)
941  {
942  char buf[10]= "cp";
943  GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE,
944  buf+2, sizeof(buf)-3);
945  /*
946  We cannot call get_charset_by_name here
947  because fs_character_set() is executed before
948  LOCK_THD_charset mutex initialization, which
949  is used inside get_charset_by_name.
950  As we're now interested in cp932 only,
951  let's just detect it using strcmp().
952  */
953  fs_cset_cache=
954  #ifdef HAVE_CHARSET_cp932
955  !strcmp(buf, "cp932") ? &my_charset_cp932_japanese_ci :
956  #endif
957  &my_charset_bin;
958  }
959  return fs_cset_cache;
960 }
961 #endif
962 
963 /*
964  Escape apostrophes by doubling them up
965 
966  SYNOPSIS
967  escape_quotes_for_mysql()
968  charset_info Charset of the strings
969  to Buffer for escaped string
970  to_length Length of destination buffer, or 0
971  from The string to escape
972  length The length of the string to escape
973 
974  DESCRIPTION
975  This escapes the contents of a string by doubling up any apostrophes that
976  it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
977  effect on the server.
978 
979  NOTE
980  To be consistent with escape_string_for_mysql(), to_length may be 0 to
981  mean "big enough"
982 
983  RETURN VALUES
984  ~0 The escaped string did not fit in the to buffer
985  >=0 The length of the escaped string
986 */
987 
988 size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info,
989  char *to, size_t to_length,
990  const char *from, size_t length)
991 {
992  const char *to_start= to;
993  const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
994  my_bool overflow= FALSE;
995 #ifdef USE_MB
996  my_bool use_mb_flag= use_mb(charset_info);
997 #endif
998  for (end= from + length; from < end; from++)
999  {
1000 #ifdef USE_MB
1001  int tmp_length;
1002  if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
1003  {
1004  if (to + tmp_length > to_end)
1005  {
1006  overflow= TRUE;
1007  break;
1008  }
1009  while (tmp_length--)
1010  *to++= *from++;
1011  from--;
1012  continue;
1013  }
1014  /*
1015  We don't have the same issue here with a non-multi-byte character being
1016  turned into a multi-byte character by the addition of an escaping
1017  character, because we are only escaping the ' character with itself.
1018  */
1019 #endif
1020  if (*from == '\'')
1021  {
1022  if (to + 2 > to_end)
1023  {
1024  overflow= TRUE;
1025  break;
1026  }
1027  *to++= '\'';
1028  *to++= '\'';
1029  }
1030  else
1031  {
1032  if (to + 1 > to_end)
1033  {
1034  overflow= TRUE;
1035  break;
1036  }
1037  *to++= *from;
1038  }
1039  }
1040  *to= 0;
1041  return overflow ? (ulong)~0 : (ulong) (to - to_start);
1042 }