19 #include <my_global.h>
27 #if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2)
28 #define HAVE_CHARSET_mb2
32 #if defined(HAVE_CHARSET_mb2) || defined(HAVE_CHARSET_utf32)
33 #define HAVE_CHARSET_mb2_or_mb4
42 #define ULONGLONG_MAX (~(ulonglong) 0)
43 #define MAX_NEGATIVE_NUMBER ((ulonglong) LL(0x8000000000000000))
45 #define LFACTOR ULL(1000000000)
46 #define LFACTOR1 ULL(10000000000)
47 #define LFACTOR2 ULL(100000000000)
49 static unsigned long lfactor[9]=
50 { 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L };
54 #ifdef HAVE_CHARSET_mb2_or_mb4
56 my_bincmp(
const uchar *s,
const uchar *se,
57 const uchar *t,
const uchar *te)
59 int slen= (int) (se - s), tlen= (int) (te - t);
60 int len= MY_MIN(slen, tlen);
61 int cmp= memcmp(s, t, len);
62 return cmp ? cmp : slen - tlen;
67 my_caseup_str_mb2_or_mb4(
const CHARSET_INFO * cs __attribute__((unused)),
68 char * s __attribute__((unused)))
76 my_casedn_str_mb2_or_mb4(
const CHARSET_INFO *cs __attribute__((unused)),
77 char * s __attribute__((unused)))
85 my_strcasecmp_mb2_or_mb4(
const CHARSET_INFO *cs __attribute__((unused)),
86 const char *s __attribute__((unused)),
87 const char *t __attribute__((unused)))
96 const char *nptr,
size_t l,
int base,
97 char **endptr,
int *err)
103 register unsigned int cutlim;
104 register uint32 cutoff;
106 register const uchar *s= (
const uchar*) nptr;
107 register const uchar *e= (
const uchar*) nptr+l;
113 if ((cnv= cs->cset->mb_wc(cs, &wc, s, e))>0)
119 case '-' : negative= !negative;
break;
128 err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
139 cutoff= ((uint32)~0L) / (uint32) base;
140 cutlim= (uint) (((uint32)~0L) % (uint32) base);
143 if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
146 if (wc >=
'0' && wc <=
'9')
148 else if (wc >=
'A' && wc <=
'Z')
150 else if (wc >=
'a' && wc <=
'z')
156 if (res > cutoff || (res == cutoff && wc > cutlim))
164 else if (cnv == MY_CS_ILSEQ)
179 *endptr = (
char *) s;
189 if (res > (uint32) INT_MIN32)
192 else if (res > INT_MAX32)
198 return negative ? INT_MIN32 : INT_MAX32;
201 return (negative ? -((
long) res) : (
long) res);
207 const char *nptr,
size_t l,
int base,
208 char **endptr,
int *err)
214 register unsigned int cutlim;
215 register uint32 cutoff;
217 register const uchar *s= (
const uchar*) nptr;
218 register const uchar *e= (
const uchar*) nptr + l;
224 if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
230 case '-' : negative= !negative;
break;
239 err[0]= (cnv == MY_CS_ILSEQ) ? EILSEQ : EDOM;
250 cutoff= ((uint32)~0L) / (uint32) base;
251 cutlim= (uint) (((uint32)~0L) % (uint32) base);
255 if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
258 if (wc >=
'0' && wc <=
'9')
260 else if (wc >=
'A' && wc <=
'Z')
262 else if (wc >=
'a' && wc <=
'z')
266 if ((
int) wc >= base)
268 if (res > cutoff || (res == cutoff && wc > cutlim))
276 else if (cnv == MY_CS_ILSEQ)
302 return (~(uint32) 0);
305 return (negative ? -((
long) res) : (
long) res);
311 const char *nptr,
size_t l,
int base,
312 char **endptr,
int *err)
318 register ulonglong cutoff;
319 register unsigned int cutlim;
320 register ulonglong res;
321 register const uchar *s= (
const uchar*) nptr;
322 register const uchar *e= (
const uchar*) nptr+l;
328 if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
334 case '-' : negative= !negative;
break;
343 err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
354 cutoff = (~(ulonglong) 0) / (
unsigned long int) base;
355 cutlim = (uint) ((~(ulonglong) 0) % (
unsigned long int) base);
358 if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
361 if ( wc>=
'0' && wc<=
'9')
363 else if ( wc>=
'A' && wc<=
'Z')
365 else if ( wc>=
'a' && wc<=
'z')
371 if (res > cutoff || (res == cutoff && wc > cutlim))
375 res *= (ulonglong) base;
379 else if (cnv==MY_CS_ILSEQ)
394 *endptr = (
char *) s;
404 if (res > (ulonglong) LONGLONG_MIN)
407 else if (res > (ulonglong) LONGLONG_MAX)
413 return negative ? LONGLONG_MIN : LONGLONG_MAX;
416 return (negative ? -((longlong)res) : (longlong)res);
422 const char *nptr,
size_t l,
int base,
423 char **endptr,
int *err)
429 register ulonglong cutoff;
430 register unsigned int cutlim;
431 register ulonglong res;
432 register const uchar *s= (
const uchar*) nptr;
433 register const uchar *e= (
const uchar*) nptr + l;
439 if ((cnv= cs->cset->mb_wc(cs,&wc,s,e)) > 0)
445 case '-' : negative= !negative;
break;
454 err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
465 cutoff = (~(ulonglong) 0) / (
unsigned long int) base;
466 cutlim = (uint) ((~(ulonglong) 0) % (
unsigned long int) base);
470 if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
473 if ( wc>=
'0' && wc<=
'9')
475 else if ( wc>=
'A' && wc<=
'Z')
477 else if ( wc>=
'a' && wc<=
'z')
483 if (res > cutoff || (res == cutoff && wc > cutlim))
487 res *= (ulonglong) base;
491 else if (cnv==MY_CS_ILSEQ)
506 *endptr = (
char *) s;
517 return (~(ulonglong) 0);
520 return (negative ? -((longlong) res) : (longlong) res);
526 char *nptr,
size_t length,
527 char **endptr,
int *err)
531 register char *b=
buf;
532 register const uchar *s= (
const uchar*) nptr;
539 if (length >=
sizeof(buf))
540 length=
sizeof(buf) - 1;
543 while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0)
546 if (wc > (
int) (uchar)
'e' || !wc)
552 res= my_strtod(buf, endptr, err);
553 *endptr= nptr + cs->mbminlen * (size_t) (*endptr - buf);
560 const char *nptr,
size_t length,
562 char **endptr,
int *err)
564 char buf[256], *b=
buf;
566 const uchar *end, *s= (
const uchar*) nptr;
571 if (length >=
sizeof(buf))
572 length=
sizeof(buf)-1;
575 while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0)
578 if (wc > (
int) (uchar)
'e' || !wc)
583 res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err);
584 *endptr= (
char*) nptr + cs->mbminlen * (
size_t) (*endptr -
buf);
595 char *dst,
size_t len,
int radix,
long int val)
598 register char *p, *db, *de;
601 unsigned long int uval = (
unsigned long int) val;
603 p= &buffer[
sizeof(buffer) - 1];
612 uval = (
unsigned long int)0 - uval;
616 new_val = (long) (uval / 10);
617 *--p =
'0'+ (char) (uval - (
unsigned long) new_val * 10);
623 *--p=
'0' + (char) (val - new_val * 10);
632 for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
634 int cnvres= cs->cset->wc_mb(cs,(my_wc_t)p[0],(uchar*) dst, (uchar*) de);
640 return (
int) (dst - db);
646 char *dst,
size_t len,
int radix, longlong val)
649 register char *p, *db, *de;
652 ulonglong uval= (ulonglong) val;
660 uval = (ulonglong)0 - uval;
664 p= &buffer[
sizeof(buffer)-1];
673 while (uval > (ulonglong) LONG_MAX)
675 ulonglong quo= uval/(uint) 10;
676 uint rem= (uint) (uval- quo* (uint) 10);
681 long_val= (long) uval;
682 while (long_val != 0)
684 long quo= long_val/10;
685 *--p= (char) (
'0' + (long_val - quo*10));
695 for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
697 int cnvres= cs->cset->wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de);
703 return (
int) (dst -db);
709 #ifdef HAVE_CHARSET_mb2
712 const char *nptr,
char **endptr,
int *error)
714 const char *s, *end, *start, *n_end, *true_end;
716 unsigned long i, j, k;
719 ulong cutoff, cutoff2, cutoff3;
732 DBUG_ASSERT((*endptr - s) % 2 == 0);
733 end= s + ((*endptr - s) / 2) * 2;
737 res= cs->cset->mb_wc(cs, &wc, (
const uchar *) s, (
const uchar *) end);
741 if (wc !=
' ' && wc !=
'\t')
757 res= cs->cset->mb_wc(cs, &wc, (
const uchar *) s, (
const uchar *) end);
761 cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
762 cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
763 cutoff3= MAX_NEGATIVE_NUMBER % 100;
770 res= cs->cset->mb_wc(cs, &wc, (
const uchar *) s, (
const uchar *) end);
775 cutoff= ULONGLONG_MAX / LFACTOR2;
776 cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
777 cutoff3= ULONGLONG_MAX % 100;
789 res= cs->cset->mb_wc(cs, &wc, (
const uchar *) s, (
const uchar *) end);
796 n_end= s + 2 * INIT_CNT;
801 if ((c= (wc -
'0')) > 9)
804 n_end= s + 2 * (INIT_CNT-1);
812 res= cs->cset->mb_wc(cs, &wc, (
const uchar *) s, (
const uchar *) n_end);
816 if ((c= (wc -
'0')) > 9)
826 n_end= true_end= s + 2 * INIT_CNT;
831 res= cs->cset->mb_wc(cs, &wc, (
const uchar *) s, (
const uchar *) end);
835 if ((c= (wc -
'0')) > 9)
838 }
while (s != n_end);
845 res= cs->cset->mb_wc(cs, &wc, (
const uchar *) s, (
const uchar *) end);
849 if ((c= (wc -
'0')) > 9)
856 res= cs->cset->mb_wc(cs, &wc, (
const uchar *) s, (
const uchar *) end);
860 if ((c= (wc -
'0')) > 9)
866 if (s != end && (c= (wc -
'0')) <= 9)
870 if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
873 li=i*LFACTOR2+ (ulonglong) j*100 + k;
874 return (longlong) li;
877 *error= MY_ERRNO_ERANGE;
878 return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
882 return (negative ? ((longlong) -(long) i) : (longlong) i);
885 li= (ulonglong) i * lfactor[(
size_t) (s-start) / 2] + j;
887 return (negative ? -((longlong) li) : (longlong) li);
890 li=(ulonglong) i*LFACTOR+ (ulonglong) j;
892 return (negative ? -((longlong) li) : (longlong) li);
895 li=(ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
899 if (li > MAX_NEGATIVE_NUMBER)
901 return -((longlong) li);
903 return (longlong) li;
907 *error= MY_ERRNO_EDOM;
908 *endptr= (
char *) nptr;
915 const char *str,
const char *end,
int sequence_type)
917 const char *str0= str;
921 switch (sequence_type)
924 for (res= cs->cset->mb_wc(cs, &wc,
925 (
const uchar *) str, (
const uchar *) end);
926 res > 0 && wc ==
' ';
928 res= cs->cset->mb_wc(cs, &wc,
929 (
const uchar *) str, (
const uchar *) end))
932 return (
size_t) (str - str0);
940 my_fill_mb2(
const CHARSET_INFO *cs,
char *s,
size_t slen,
int fill)
945 DBUG_ASSERT((slen % 2) == 0);
947 buflen= cs->cset->wc_mb(cs, (my_wc_t) fill, (uchar*) buf,
948 (uchar*) buf +
sizeof(buf));
950 DBUG_ASSERT(buflen > 0);
952 while (slen >= (
size_t) buflen)
955 memcpy(s, buf, (
size_t) buflen);
964 for ( ; slen; slen--)
972 my_vsnprintf_mb2(
char *dst,
size_t n,
const char*
fmt, va_list ap)
974 char *start=dst, *end= dst + n - 1;
990 while ( (*fmt >=
'0' && *fmt <=
'9') || *fmt ==
'.' || *fmt ==
'-')
998 char *par= va_arg(ap,
char *);
1000 size_t left_len= (size_t)(end-dst);
1002 par= (
char*)
"(null)";
1004 if (left_len <= plen * 2)
1005 plen = left_len / 2 - 1;
1007 for ( ; plen ; plen--, dst+=2, par++)
1014 else if (*fmt ==
'd' || *fmt ==
'u')
1020 if ((
size_t) (end - dst) < 32)
1022 iarg= va_arg(ap,
int);
1024 int10_to_str((
long) iarg, nbuf, -10);
1026 int10_to_str((
long) (uint) iarg, nbuf,10);
1028 for (; pbuf[0]; pbuf++)
1043 DBUG_ASSERT(dst <= end);
1045 return (
size_t) (dst - start);
1050 my_snprintf_mb2(
const CHARSET_INFO *cs __attribute__((unused)),
1051 char*
to,
size_t n,
const char* fmt, ...)
1055 return my_vsnprintf_mb2(to, n, fmt, args);
1060 my_lengthsp_mb2(
const CHARSET_INFO *cs __attribute__((unused)),
1061 const char *ptr,
size_t length)
1063 const char *end= ptr + length;
1064 while (end > ptr + 1 && end[-1] ==
' ' && end[-2] ==
'\0')
1066 return (
size_t) (end - ptr);
1074 #ifdef HAVE_CHARSET_utf16
1081 #define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
1082 #define MY_UTF16_SURROGATE_HIGH_LAST 0xDBFF
1083 #define MY_UTF16_SURROGATE_LOW_FIRST 0xDC00
1084 #define MY_UTF16_SURROGATE_LOW_LAST 0xDFFF
1086 #define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
1087 #define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
1088 #define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800)
1090 #define MY_UTF16_WC2(a, b) ((a << 8) + b)
1098 #define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
1099 ((c & 3) << 8) + d + 0x10000)
1102 my_utf16_uni(
const CHARSET_INFO *cs __attribute__((unused)),
1103 my_wc_t *pwc,
const uchar *s,
const uchar *e)
1106 return MY_CS_TOOSMALL2;
1114 if (MY_UTF16_HIGH_HEAD(*s))
1117 return MY_CS_TOOSMALL4;
1119 if (!MY_UTF16_LOW_HEAD(s[2]))
1122 *pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
1126 if (MY_UTF16_LOW_HEAD(*s))
1129 *pwc= MY_UTF16_WC2(s[0], s[1]);
1135 my_uni_utf16(
const CHARSET_INFO *cs __attribute__((unused)),
1136 my_wc_t wc, uchar *s, uchar *e)
1141 return MY_CS_TOOSMALL2;
1142 if (MY_UTF16_SURROGATE(wc))
1144 *s++= (uchar) (wc >> 8);
1145 *s= (uchar) (wc & 0xFF);
1152 return MY_CS_TOOSMALL4;
1153 *s++= (uchar) ((wc-= 0x10000) >> 18) | 0xD8;
1154 *s++= (uchar) (wc >> 10) & 0xFF;
1155 *s++= (uchar) ((wc >> 8) & 3) | 0xDC;
1156 *s= (uchar) wc & 0xFF;
1168 if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1169 *wc= page[*wc & 0xFF].tolower;
1177 if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1178 *wc= page[*wc & 0xFF].toupper;
1185 if (*wc <= uni_plane->maxchar)
1188 if ((page= uni_plane->page[*wc >> 8]))
1189 *wc= page[*wc & 0xFF].sort;
1193 *wc= MY_CS_REPLACEMENT_CHARACTER;
1200 my_caseup_utf16(
const CHARSET_INFO *cs,
char *src,
size_t srclen,
1201 char *dst __attribute__((unused)),
1202 size_t dstlen __attribute__((unused)))
1206 char *srcend= src + srclen;
1208 DBUG_ASSERT(src == dst && srclen == dstlen);
1210 while ((src < srcend) &&
1211 (res= cs->cset->mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
1213 my_toupper_utf16(uni_plane, &wc);
1214 if (res != cs->cset->wc_mb(cs, wc, (uchar *) src, (uchar *) srcend))
1223 my_hash_sort_utf16(
const CHARSET_INFO *cs,
const uchar *s,
size_t slen,
1224 ulong *n1, ulong *n2)
1228 const uchar *e= s + cs->cset->lengthsp(cs, (
const char *) s, slen);
1231 while ((s < e) && (res= cs->cset->mb_wc(cs, &wc,
1232 (uchar *) s, (uchar *) e)) > 0)
1234 my_tosort_utf16(uni_plane, &wc);
1235 n1[0]^= (((n1[0] & 63) + n2[0]) * (wc & 0xFF)) + (n1[0] << 8);
1237 n1[0]^= (((n1[0] & 63) + n2[0]) * (wc >> 8)) + (n1[0] << 8);
1245 my_casedn_utf16(
const CHARSET_INFO *cs,
char *src,
size_t srclen,
1246 char *dst __attribute__((unused)),
1247 size_t dstlen __attribute__((unused)))
1251 char *srcend= src + srclen;
1253 DBUG_ASSERT(src == dst && srclen == dstlen);
1255 while ((src < srcend) &&
1256 (res= cs->cset->mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
1258 my_tolower_utf16(uni_plane, &wc);
1259 if (res != cs->cset->wc_mb(cs, wc, (uchar *) src, (uchar *) srcend))
1269 const uchar *s,
size_t slen,
1270 const uchar *t,
size_t tlen,
1271 my_bool t_is_prefix)
1274 my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
1275 const uchar *se= s + slen;
1276 const uchar *te= t + tlen;
1279 while (s < se && t < te)
1281 s_res= cs->cset->mb_wc(cs, &s_wc, s, se);
1282 t_res= cs->cset->mb_wc(cs, &t_wc, t, te);
1284 if (s_res <= 0 || t_res <= 0)
1287 return my_bincmp(s, se, t, te);
1290 my_tosort_utf16(uni_plane, &s_wc);
1291 my_tosort_utf16(uni_plane, &t_wc);
1295 return s_wc > t_wc ? 1 : -1;
1301 return (
int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
1333 const uchar *s,
size_t slen,
1334 const uchar *t,
size_t tlen,
1335 my_bool diff_if_only_endspace_difference)
1338 my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
1339 const uchar *se= s + slen, *te= t + tlen;
1342 DBUG_ASSERT((slen % 2) == 0);
1343 DBUG_ASSERT((tlen % 2) == 0);
1345 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
1346 diff_if_only_endspace_difference= FALSE;
1349 while (s < se && t < te)
1351 int s_res= cs->cset->mb_wc(cs, &s_wc, s, se);
1352 int t_res= cs->cset->mb_wc(cs, &t_wc, t, te);
1354 if (s_res <= 0 || t_res <= 0)
1357 return my_bincmp(s, se, t, te);
1360 my_tosort_utf16(uni_plane, &s_wc);
1361 my_tosort_utf16(uni_plane, &t_wc);
1365 return s_wc > t_wc ? 1 : -1;
1372 slen= (size_t) (se - s);
1373 tlen= (size_t) (te - t);
1379 if (diff_if_only_endspace_difference)
1390 for ( ; s < se; s+= s_res)
1392 if ((s_res= cs->cset->mb_wc(cs, &s_wc, s, se)) < 0)
1398 return (s_wc <
' ') ? -swap : swap;
1406 my_ismbchar_utf16(
const CHARSET_INFO *cs,
const char *b,
const char *e)
1409 int res= cs->cset->mb_wc(cs, &wc, (
const uchar *) b, (
const uchar *) e);
1410 return (uint) (res > 0 ? res : 0);
1415 my_mbcharlen_utf16(
const CHARSET_INFO *cs __attribute__((unused)),
1416 uint c __attribute__((unused)))
1419 return MY_UTF16_HIGH_HEAD(c) ? 4 : 2;
1425 const char *b,
const char *e)
1430 size_t charlen= my_ismbchar_utf16(cs, b, e);
1441 const char *b,
const char *e,
size_t pos)
1446 for ( ; pos; b+= charlen, pos--)
1448 if (!(charlen= my_ismbchar(cs, b, e)))
1449 return (e + 2 - b0);
1451 return (
size_t) (pos ? (e + 2 - b0) : (b - b0));
1457 const char *b,
const char *e,
1458 size_t nchars,
int *error)
1464 for ( ; nchars; b+= charlen, nchars--)
1466 if (!(charlen= my_ismbchar(cs, b, e)))
1468 *error= b < e ? 1 : 0;
1472 return (
size_t) (b - b0);
1478 const char *str,
const char *str_end,
1479 const char *wildstr,
const char *wildend,
1480 int escape,
int w_one,
int w_many)
1483 return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
1484 escape, w_one, w_many, uni_plane);
1490 const char *str,
const char *str_end,
1491 const char *wildstr,
const char *wildend,
1492 int escape,
int w_one,
int w_many)
1494 return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
1495 escape, w_one, w_many, NULL);
1501 const uchar *s,
size_t slen,
1502 const uchar *t,
size_t tlen,
1503 my_bool t_is_prefix)
1506 my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
1507 const uchar *se=s+slen;
1508 const uchar *te=t+tlen;
1510 while ( s < se && t < te )
1512 s_res= cs->cset->mb_wc(cs, &s_wc, s, se);
1513 t_res= cs->cset->mb_wc(cs, &t_wc, t, te);
1515 if (s_res <= 0 || t_res <= 0)
1518 return my_bincmp(s, se, t, te);
1522 return s_wc > t_wc ? 1 : -1;
1528 return (
int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
1534 const uchar *s,
size_t slen,
1535 const uchar *t,
size_t tlen,
1536 my_bool diff_if_only_endspace_difference)
1539 my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
1540 const uchar *se= s + slen, *te= t + tlen;
1542 DBUG_ASSERT((slen % 2) == 0);
1543 DBUG_ASSERT((tlen % 2) == 0);
1545 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
1546 diff_if_only_endspace_difference= FALSE;
1549 while (s < se && t < te)
1551 int s_res= cs->cset->mb_wc(cs, &s_wc, s, se);
1552 int t_res= cs->cset->mb_wc(cs, &t_wc, t, te);
1554 if (s_res <= 0 || t_res <= 0)
1557 return my_bincmp(s, se, t, te);
1562 return s_wc > t_wc ? 1 : -1;
1569 slen= (size_t) (se - s);
1570 tlen= (size_t) (te - t);
1576 if (diff_if_only_endspace_difference)
1587 for ( ; s < se; s+= s_res)
1589 if ((s_res= cs->cset->mb_wc(cs, &s_wc, s, se)) < 0)
1595 return (s_wc <
' ') ? -swap : swap;
1604 const uchar *pos,
size_t len, ulong *nr1, ulong *nr2)
1606 const uchar *end= pos + cs->cset->lengthsp(cs, (
const char *) pos, len);
1607 for ( ; pos < end ; pos++)
1609 nr1[0]^= (ulong) ((((uint) nr1[0] & 63) + nr2[0]) *
1610 ((uint)*pos)) + (nr1[0] << 8);
1620 my_strnncollsp_utf16,
1621 my_strnxfrm_unicode,
1622 my_strnxfrmlen_simple,
1623 my_like_range_generic,
1624 my_wildcmp_utf16_ci,
1625 my_strcasecmp_mb2_or_mb4,
1635 my_strnncoll_utf16_bin,
1636 my_strnncollsp_utf16_bin,
1637 my_strnxfrm_unicode_full_bin,
1638 my_strnxfrmlen_unicode_full_bin,
1639 my_like_range_generic,
1640 my_wildcmp_utf16_bin,
1641 my_strcasecmp_mb2_or_mb4,
1643 my_hash_sort_utf16_bin,
1655 my_well_formed_len_utf16,
1661 my_caseup_str_mb2_or_mb4,
1662 my_casedn_str_mb2_or_mb4,
1666 my_l10tostr_mb2_or_mb4,
1667 my_ll10tostr_mb2_or_mb4,
1669 my_strntol_mb2_or_mb4,
1670 my_strntoul_mb2_or_mb4,
1671 my_strntoll_mb2_or_mb4,
1672 my_strntoull_mb2_or_mb4,
1673 my_strntod_mb2_or_mb4,
1675 my_strntoull10rnd_mb2_or_mb4,
1683 MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1695 &my_unicase_default,
1709 &my_charset_utf16_handler,
1710 &my_collation_utf16_general_ci_handler
1717 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1729 &my_unicase_default,
1743 &my_charset_utf16_handler,
1744 &my_collation_utf16_bin_handler
1749 my_utf16le_uni(
const CHARSET_INFO *cs __attribute__((unused)),
1750 my_wc_t *pwc,
const uchar *s,
const uchar *e)
1755 return MY_CS_TOOSMALL2;
1757 if ((*pwc= uint2korr(s)) < MY_UTF16_SURROGATE_HIGH_FIRST ||
1758 (*pwc > MY_UTF16_SURROGATE_LOW_LAST))
1761 if (*pwc >= MY_UTF16_SURROGATE_LOW_FIRST)
1765 return MY_CS_TOOSMALL4;
1769 if ((lo= uint2korr(s)) < MY_UTF16_SURROGATE_LOW_FIRST ||
1770 lo > MY_UTF16_SURROGATE_LOW_LAST)
1773 *pwc= 0x10000 + (((*pwc & 0x3FF) << 10) | (lo & 0x3FF));
1779 my_uni_utf16le(
const CHARSET_INFO *cs __attribute__((unused)),
1780 my_wc_t wc, uchar *s, uchar *e)
1782 if (wc < MY_UTF16_SURROGATE_HIGH_FIRST ||
1783 (wc > MY_UTF16_SURROGATE_LOW_LAST &&
1787 return MY_CS_TOOSMALL2;
1792 if (wc < 0xFFFF || wc > 0x10FFFF)
1796 return MY_CS_TOOSMALL4;
1799 int2store(s, (0xD800 | ((wc >> 10) & 0x3FF))); s+= 2;
1800 int2store(s, (0xDC00 | (wc & 0x3FF)));
1806 my_lengthsp_utf16le(
const CHARSET_INFO *cs __attribute__((unused)),
1807 const char *ptr,
size_t length)
1809 const char *end= ptr + length;
1810 while (end > ptr + 1 && uint2korr(end - 2) == 0x20)
1812 return (
size_t) (end - ptr);
1823 my_well_formed_len_utf16,
1824 my_lengthsp_utf16le,
1829 my_caseup_str_mb2_or_mb4,
1830 my_casedn_str_mb2_or_mb4,
1834 my_l10tostr_mb2_or_mb4,
1835 my_ll10tostr_mb2_or_mb4,
1837 my_strntol_mb2_or_mb4,
1838 my_strntoul_mb2_or_mb4,
1839 my_strntoll_mb2_or_mb4,
1840 my_strntoull_mb2_or_mb4,
1841 my_strntod_mb2_or_mb4,
1843 my_strntoull10rnd_mb2_or_mb4,
1851 MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1853 "utf16le_general_ci",
1863 &my_unicase_default,
1877 &my_charset_utf16le_handler,
1878 &my_collation_utf16_general_ci_handler
1885 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1897 &my_unicase_default,
1911 &my_charset_utf16le_handler,
1912 &my_collation_utf16_bin_handler
1919 #ifdef HAVE_CHARSET_utf32
1922 my_utf32_uni(
const CHARSET_INFO *cs __attribute__((unused)),
1923 my_wc_t *pwc,
const uchar *s,
const uchar *e)
1926 return MY_CS_TOOSMALL4;
1927 *pwc= (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + (s[3]);
1933 my_uni_utf32(
const CHARSET_INFO *cs __attribute__((unused)),
1934 my_wc_t wc, uchar *s, uchar *e)
1937 return MY_CS_TOOSMALL4;
1939 s[0]= (uchar) (wc >> 24);
1940 s[1]= (uchar) (wc >> 16) & 0xFF;
1941 s[2]= (uchar) (wc >> 8) & 0xFF;
1942 s[3]= (uchar) wc & 0xFF;
1951 if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1952 *wc= page[*wc & 0xFF].tolower;
1960 if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1961 *wc= page[*wc & 0xFF].toupper;
1968 if (*wc <= uni_plane->maxchar)
1971 if ((page= uni_plane->page[*wc >> 8]))
1972 *wc= page[*wc & 0xFF].sort;
1976 *wc= MY_CS_REPLACEMENT_CHARACTER;
1982 my_caseup_utf32(
const CHARSET_INFO *cs,
char *src,
size_t srclen,
1983 char *dst __attribute__((unused)),
1984 size_t dstlen __attribute__((unused)))
1988 char *srcend= src + srclen;
1990 DBUG_ASSERT(src == dst && srclen == dstlen);
1992 while ((src < srcend) &&
1993 (res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
1995 my_toupper_utf32(uni_plane, &wc);
1996 if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
2005 my_hash_add(ulong *n1, ulong *n2, uint ch)
2007 n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8);
2013 my_hash_sort_utf32(
const CHARSET_INFO *cs,
const uchar *s,
size_t slen,
2014 ulong *n1, ulong *n2)
2018 const uchar *e= s + slen;
2022 while (e > s + 3 && e[-1] ==
' ' && !e[-2] && !e[-3] && !e[-4])
2025 while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
2027 my_tosort_utf32(uni_plane, &wc);
2028 my_hash_add(n1, n2, (uint) (wc >> 24));
2029 my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF);
2030 my_hash_add(n1, n2, (uint) (wc >> 8) & 0xFF);
2031 my_hash_add(n1, n2, (uint) (wc & 0xFF));
2038 my_casedn_utf32(
const CHARSET_INFO *cs,
char *src,
size_t srclen,
2039 char *dst __attribute__((unused)),
2040 size_t dstlen __attribute__((unused)))
2044 char *srcend= src + srclen;
2046 DBUG_ASSERT(src == dst && srclen == dstlen);
2048 while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
2050 my_tolower_utf32(uni_plane,&wc);
2051 if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
2061 const uchar *s,
size_t slen,
2062 const uchar *t,
size_t tlen,
2063 my_bool t_is_prefix)
2065 my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc);
2066 const uchar *se= s + slen;
2067 const uchar *te= t + tlen;
2070 while (s < se && t < te)
2072 int s_res= my_utf32_uni(cs, &s_wc, s, se);
2073 int t_res= my_utf32_uni(cs, &t_wc, t, te);
2075 if ( s_res <= 0 || t_res <= 0)
2078 return my_bincmp(s, se, t, te);
2081 my_tosort_utf32(uni_plane, &s_wc);
2082 my_tosort_utf32(uni_plane, &t_wc);
2086 return s_wc > t_wc ? 1 : -1;
2092 return (
int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
2125 const uchar *s,
size_t slen,
2126 const uchar *t,
size_t tlen,
2127 my_bool diff_if_only_endspace_difference)
2130 my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
2131 const uchar *se= s + slen, *te= t + tlen;
2134 DBUG_ASSERT((slen % 4) == 0);
2135 DBUG_ASSERT((tlen % 4) == 0);
2137 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
2138 diff_if_only_endspace_difference= FALSE;
2141 while ( s < se && t < te )
2143 int s_res= my_utf32_uni(cs, &s_wc, s, se);
2144 int t_res= my_utf32_uni(cs, &t_wc, t, te);
2146 if ( s_res <= 0 || t_res <= 0 )
2149 return my_bincmp(s, se, t, te);
2152 my_tosort_utf32(uni_plane, &s_wc);
2153 my_tosort_utf32(uni_plane, &t_wc);
2157 return s_wc > t_wc ? 1 : -1;
2164 slen= (size_t) (se - s);
2165 tlen= (size_t) (te - t);
2171 if (diff_if_only_endspace_difference)
2182 for ( ; s < se; s+= s_res)
2184 if ((s_res= my_utf32_uni(cs, &s_wc, s, se)) < 0)
2190 return (s_wc <
' ') ? -swap : swap;
2198 my_strnxfrmlen_utf32(
const CHARSET_INFO *cs __attribute__((unused)),
2206 my_ismbchar_utf32(
const CHARSET_INFO *cs __attribute__((unused)),
2207 const char *b __attribute__((unused)),
2208 const char *e __attribute__((unused)))
2215 my_mbcharlen_utf32(
const CHARSET_INFO *cs __attribute__((unused)) ,
2216 uint c __attribute__((unused)))
2223 my_vsnprintf_utf32(
char *dst,
size_t n,
const char* fmt, va_list ap)
2225 char *start= dst, *end= dst +
n;
2226 DBUG_ASSERT((n % 4) == 0);
2227 for (; *
fmt ; fmt++)
2244 while ( (*fmt>=
'0' && *fmt<=
'9') || *fmt ==
'.' || *fmt ==
'-')
2252 reg2
char *par= va_arg(ap,
char *);
2254 size_t left_len= (size_t)(end - dst);
2255 if (!par) par= (
char*)
"(null)";
2257 if (left_len <= plen*4)
2258 plen= left_len / 4 - 1;
2260 for ( ; plen ; plen--, dst+= 4, par++)
2269 else if (*fmt ==
'd' || *fmt ==
'u')
2275 if ((
size_t) (end - dst) < 64)
2277 iarg= va_arg(ap,
int);
2279 int10_to_str((
long) iarg, nbuf, -10);
2281 int10_to_str((
long) (uint) iarg,nbuf,10);
2283 for (; pbuf[0]; pbuf++)
2302 DBUG_ASSERT(dst < end);
2307 return (
size_t) (dst - start - 4);
2312 my_snprintf_utf32(
const CHARSET_INFO *cs __attribute__((unused)),
2313 char*
to,
size_t n,
const char* fmt, ...)
2317 return my_vsnprintf_utf32(to, n, fmt, args);
2322 my_strtoll10_utf32(
const CHARSET_INFO *cs __attribute__((unused)),
2323 const char *nptr,
char **endptr,
int *error)
2325 const char *s, *end, *start, *n_end, *true_end;
2327 unsigned long i, j, k;
2330 ulong cutoff, cutoff2, cutoff3;
2337 end= s + ((*endptr - s) / 4) * 4;
2338 while (s < end && !s[0] && !s[1] && !s[2] &&
2339 (s[3] ==
' ' || s[3] ==
'\t'))
2352 if (!s[0] && !s[1] && !s[2] && s[3] ==
'-')
2359 cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
2360 cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
2361 cutoff3= MAX_NEGATIVE_NUMBER % 100;
2366 if (!s[0] && !s[1] && !s[2] && s[3] ==
'+')
2372 cutoff= ULONGLONG_MAX / LFACTOR2;
2373 cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
2374 cutoff3= ULONGLONG_MAX % 100;
2378 if (!s[0] && !s[1] && !s[2] && s[3] ==
'0')
2387 while (!s[0] && !s[1] && !s[2] && s[3] ==
'0');
2388 n_end= s + 4 * INIT_CNT;
2393 if (s[0] || s[1] || s[2] || (c= (s[3]-
'0')) > 9)
2397 n_end= s + 4 * (INIT_CNT-1);
2403 for (; s != n_end ; s+= 4)
2405 if (s[0] || s[1] || s[2] || (c= (s[3] -
'0')) > 9)
2415 n_end= true_end= s + 4 * INIT_CNT;
2420 if (s[0] || s[1] || s[2] || (c= (s[3] -
'0')) > 9)
2424 }
while (s != n_end);
2431 if (s[0] || s[1] || s[2] || (c= (s[3] -
'0')) > 9)
2437 if (s == end || s[0] || s[1] || s[2] || (c= (s[3]-
'0')) > 9)
2444 if (s != end && !s[0] && !s[1] && !s[2] && (c= (s[3] -
'0')) <= 9)
2448 if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
2451 li= i * LFACTOR2+ (ulonglong) j * 100 + k;
2452 return (longlong) li;
2455 *error= MY_ERRNO_ERANGE;
2456 return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
2460 return (negative ? ((longlong) -(long) i) : (longlong) i);
2463 li= (ulonglong) i * lfactor[(
size_t) (s-start) / 4] + j;
2465 return (negative ? -((longlong) li) : (longlong) li);
2468 li= (ulonglong) i*LFACTOR+ (ulonglong) j;
2470 return (negative ? -((longlong) li) : (longlong) li);
2473 li= (ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
2477 if (li > MAX_NEGATIVE_NUMBER)
2479 return -((longlong) li);
2481 return (longlong) li;
2485 *error= MY_ERRNO_EDOM;
2486 *endptr= (
char *) nptr;
2492 my_numchars_utf32(
const CHARSET_INFO *cs __attribute__((unused)),
2493 const char *b,
const char *e)
2495 return (
size_t) (e - b) / 4;
2500 my_charpos_utf32(
const CHARSET_INFO *cs __attribute__((unused)),
2501 const char *b,
const char *e,
size_t pos)
2503 size_t string_length= (size_t) (e - b);
2504 return pos * 4 > string_length ? string_length + 4 : pos * 4;
2509 my_well_formed_len_utf32(
const CHARSET_INFO *cs __attribute__((unused)),
2510 const char *b,
const char *e,
2511 size_t nchars,
int *error)
2515 size_t length= e - b;
2516 DBUG_ASSERT((length % 4) == 0);
2519 if (length > nchars)
2524 for (; b < e; b+= 4)
2527 if (b[0] || (uchar) b[1] > 0x10)
2539 char *s,
size_t slen,
int fill)
2545 DBUG_ASSERT((slen % 4) == 0);
2547 buflen= cs->cset->wc_mb(cs, (my_wc_t) fill, (uchar*) buf,
2548 (uchar*) buf +
sizeof(buf));
2549 DBUG_ASSERT(buflen == 4);
2559 my_lengthsp_utf32(
const CHARSET_INFO *cs __attribute__((unused)),
2560 const char *ptr,
size_t length)
2562 const char *end= ptr + length;
2563 DBUG_ASSERT((length % 4) == 0);
2564 while (end > ptr + 3 && end[-1] ==
' ' && !end[-2] && !end[-3] && !end[-4])
2566 return (
size_t) (end - ptr);
2572 const char *str,
const char *str_end,
2573 const char *wildstr,
const char *wildend,
2574 int escape,
int w_one,
int w_many)
2577 return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
2578 escape, w_one, w_many, uni_plane);
2584 const char *str,
const char *str_end,
2585 const char *wildstr,
const char *wildend,
2586 int escape,
int w_one,
int w_many)
2588 return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
2589 escape, w_one, w_many, NULL);
2595 const uchar *s,
size_t slen,
2596 const uchar *t,
size_t tlen,
2597 my_bool t_is_prefix)
2599 my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
2600 const uchar *se= s + slen;
2601 const uchar *te= t + tlen;
2603 while (s < se && t < te)
2605 int s_res= my_utf32_uni(cs, &s_wc, s, se);
2606 int t_res= my_utf32_uni(cs, &t_wc, t, te);
2608 if (s_res <= 0 || t_res <= 0)
2611 return my_bincmp(s, se, t, te);
2615 return s_wc > t_wc ? 1 : -1;
2621 return (
int) (t_is_prefix ? (t-te) : ((se - s) - (te - t)));
2625 static inline my_wc_t
2626 my_utf32_get(
const uchar *s)
2629 ((my_wc_t) s[0] << 24) +
2630 ((my_wc_t) s[1] << 16) +
2631 ((my_wc_t) s[2] << 8) +
2637 my_strnncollsp_utf32_bin(
const CHARSET_INFO *cs __attribute__((unused)),
2638 const uchar *s,
size_t slen,
2639 const uchar *t,
size_t tlen,
2640 my_bool diff_if_only_endspace_difference
2641 __attribute__((unused)))
2643 const uchar *se, *te;
2646 DBUG_ASSERT((slen % 4) == 0);
2647 DBUG_ASSERT((tlen % 4) == 0);
2652 for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 4)
2654 my_wc_t s_wc= my_utf32_get(s);
2655 my_wc_t t_wc= my_utf32_get(t);
2657 return s_wc > t_wc ? 1 : -1;
2673 for ( ; s < se ; s+= 4)
2675 my_wc_t s_wc= my_utf32_get(s);
2677 return (s_wc <
' ') ? -swap : swap;
2686 const char *str,
const char *end,
int sequence_type)
2688 const char *str0= str;
2690 switch (sequence_type)
2693 for ( ; str < end; )
2696 int res= my_utf32_uni(cs, &wc, (uchar*) str, (uchar*) end);
2697 if (res < 0 || wc !=
' ')
2701 return (
size_t) (str - str0);
2712 my_strnncollsp_utf32,
2713 my_strnxfrm_unicode,
2714 my_strnxfrmlen_utf32,
2715 my_like_range_generic,
2716 my_wildcmp_utf32_ci,
2717 my_strcasecmp_mb2_or_mb4,
2727 my_strnncoll_utf32_bin,
2728 my_strnncollsp_utf32_bin,
2729 my_strnxfrm_unicode_full_bin,
2730 my_strnxfrmlen_unicode_full_bin,
2731 my_like_range_generic,
2732 my_wildcmp_utf32_bin,
2733 my_strcasecmp_mb2_or_mb4,
2747 my_well_formed_len_utf32,
2753 my_caseup_str_mb2_or_mb4,
2754 my_casedn_str_mb2_or_mb4,
2758 my_l10tostr_mb2_or_mb4,
2759 my_ll10tostr_mb2_or_mb4,
2761 my_strntol_mb2_or_mb4,
2762 my_strntoul_mb2_or_mb4,
2763 my_strntoll_mb2_or_mb4,
2764 my_strntoull_mb2_or_mb4,
2765 my_strntod_mb2_or_mb4,
2767 my_strntoull10rnd_mb2_or_mb4,
2775 MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
2787 &my_unicase_default,
2801 &my_charset_utf32_handler,
2802 &my_collation_utf32_general_ci_handler
2809 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
2821 &my_unicase_default,
2835 &my_charset_utf32_handler,
2836 &my_collation_utf32_bin_handler
2843 #ifdef HAVE_CHARSET_ucs2
2845 static uchar ctype_ucs2[] = {
2847 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
2848 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2849 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
2850 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
2851 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2852 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
2853 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2854 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
2855 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2856 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2857 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2858 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2859 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2860 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2861 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2862 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2865 static uchar to_lower_ucs2[] = {
2866 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2867 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2868 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2869 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
2870 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
2871 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
2872 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
2873 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
2874 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
2875 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
2876 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
2877 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
2878 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
2879 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
2880 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
2881 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
2884 static uchar to_upper_ucs2[] = {
2885 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2886 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2887 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2888 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
2889 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
2890 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
2891 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
2892 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
2893 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
2894 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
2895 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
2896 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
2897 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
2898 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
2899 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
2900 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
2904 static int my_ucs2_uni(
const CHARSET_INFO *cs __attribute__((unused)),
2905 my_wc_t * pwc,
const uchar *s,
const uchar *e)
2908 return MY_CS_TOOSMALL2;
2910 *pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]);
2914 static int my_uni_ucs2(
const CHARSET_INFO *cs __attribute__((unused)) ,
2915 my_wc_t wc, uchar *r, uchar *e)
2918 return MY_CS_TOOSMALL2;
2923 r[0]= (uchar) (wc >> 8);
2924 r[1]= (uchar) (wc & 0xFF);
2933 if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
2934 *wc= page[*wc & 0xFF].tolower;
2942 if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
2943 *wc= page[*wc & 0xFF].toupper;
2951 if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
2952 *wc= page[*wc & 0xFF].sort;
2956 static size_t my_caseup_ucs2(
const CHARSET_INFO *cs,
char *src,
size_t srclen,
2957 char *dst __attribute__((unused)),
2958 size_t dstlen __attribute__((unused)))
2962 char *srcend= src + srclen;
2964 DBUG_ASSERT(src == dst && srclen == dstlen);
2966 while ((src < srcend) &&
2967 (res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
2969 my_toupper_ucs2(uni_plane, &wc);
2970 if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
2978 static void my_hash_sort_ucs2(
const CHARSET_INFO *cs,
const uchar *s,
2979 size_t slen, ulong *n1, ulong *n2)
2983 const uchar *e=s+slen;
2986 while (e > s+1 && e[-1] ==
' ' && e[-2] ==
'\0')
2989 while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
2991 my_tosort_ucs2(uni_plane, &wc);
2992 n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
2994 n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
3001 static size_t my_casedn_ucs2(
const CHARSET_INFO *cs,
char *src,
size_t srclen,
3002 char *dst __attribute__((unused)),
3003 size_t dstlen __attribute__((unused)))
3007 char *srcend= src + srclen;
3009 DBUG_ASSERT(src == dst && srclen == dstlen);
3011 while ((src < srcend) &&
3012 (res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
3014 my_tolower_ucs2(uni_plane, &wc);
3015 if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
3024 my_fill_ucs2(
const CHARSET_INFO *cs __attribute__((unused)),
3025 char *s,
size_t l,
int fill)
3027 DBUG_ASSERT(fill <= 0xFFFF);
3028 for ( ; l >= 2; s[0]= (fill >> 8), s[1]= (fill & 0xFF), s+= 2, l-= 2);
3033 const uchar *s,
size_t slen,
3034 const uchar *t,
size_t tlen,
3035 my_bool t_is_prefix)
3038 my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc);
3039 const uchar *se=s+slen;
3040 const uchar *te=t+tlen;
3043 while ( s < se && t < te )
3045 s_res=my_ucs2_uni(cs,&s_wc, s, se);
3046 t_res=my_ucs2_uni(cs,&t_wc, t, te);
3048 if ( s_res <= 0 || t_res <= 0 )
3051 return ((
int)s[0]-(
int)t[0]);
3054 my_tosort_ucs2(uni_plane, &s_wc);
3055 my_tosort_ucs2(uni_plane, &t_wc);
3059 return s_wc > t_wc ? 1 : -1;
3065 return (
int) (t_is_prefix ? t-te : ((se-s) - (te-t)));
3095 static int my_strnncollsp_ucs2(
const CHARSET_INFO *cs __attribute__((unused)),
3096 const uchar *s,
size_t slen,
3097 const uchar *t,
size_t tlen,
3098 my_bool diff_if_only_endspace_difference
3099 __attribute__((unused)))
3101 const uchar *se, *te;
3112 for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2)
3114 int s_wc = uni_plane->page[s[0]] ? (int) uni_plane->page[s[0]][s[1]].sort :
3115 (((
int) s[0]) << 8) + (
int) s[1];
3117 int t_wc = uni_plane->page[t[0]] ? (int) uni_plane->page[t[0]][t[1]].sort :
3118 (((
int) t[0]) << 8) + (int) t[1];
3120 return s_wc > t_wc ? 1 : -1;
3136 for ( ; s < se ; s+= 2)
3138 if (s[0] || s[1] !=
' ')
3139 return (s[0] == 0 && s[1] <
' ') ? -swap : swap;
3146 static uint my_ismbchar_ucs2(
const CHARSET_INFO *cs __attribute__((unused)),
3147 const char *b __attribute__((unused)),
3148 const char *e __attribute__((unused)))
3154 static uint my_mbcharlen_ucs2(
const CHARSET_INFO *cs __attribute__((unused)) ,
3155 uint c __attribute__((unused)))
3162 size_t my_numchars_ucs2(
const CHARSET_INFO *cs __attribute__((unused)),
3163 const char *b,
const char *e)
3165 return (
size_t) (e-b)/2;
3170 size_t my_charpos_ucs2(
const CHARSET_INFO *cs __attribute__((unused)),
3171 const char *b __attribute__((unused)),
3172 const char *e __attribute__((unused)),
3175 size_t string_length= (size_t) (e - b);
3176 return pos > string_length ? string_length + 2 : pos * 2;
3181 size_t my_well_formed_len_ucs2(
const CHARSET_INFO *cs __attribute__((unused)),
3182 const char *b,
const char *e,
3183 size_t nchars,
int *error)
3186 size_t nbytes= ((size_t) (e-b)) & ~(
size_t) 1;
3189 return MY_MIN(nbytes, nchars);
3195 const char *str,
const char *str_end,
3196 const char *wildstr,
const char *wildend,
3197 int escape,
int w_one,
int w_many)
3200 return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
3201 escape,w_one,w_many,uni_plane);
3207 const char *str,
const char *str_end,
3208 const char *wildstr,
const char *wildend,
3209 int escape,
int w_one,
int w_many)
3211 return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
3212 escape,w_one,w_many,NULL);
3218 const uchar *s,
size_t slen,
3219 const uchar *t,
size_t tlen,
3220 my_bool t_is_prefix)
3223 my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc);
3224 const uchar *se=s+slen;
3225 const uchar *te=t+tlen;
3227 while ( s < se && t < te )
3229 s_res=my_ucs2_uni(cs,&s_wc, s, se);
3230 t_res=my_ucs2_uni(cs,&t_wc, t, te);
3232 if ( s_res <= 0 || t_res <= 0 )
3235 return ((
int)s[0]-(
int)t[0]);
3239 return s_wc > t_wc ? 1 : -1;
3245 return (
int) (t_is_prefix ? t-te : ((se-s) - (te-t)));
3248 static int my_strnncollsp_ucs2_bin(
const CHARSET_INFO *cs
3249 __attribute__((unused)),
3250 const uchar *s,
size_t slen,
3251 const uchar *t,
size_t tlen,
3252 my_bool diff_if_only_endspace_difference
3253 __attribute__((unused)))
3255 const uchar *se, *te;
3259 slen= (slen >> 1) << 1;
3260 tlen= (tlen >> 1) << 1;
3265 for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2)
3267 int s_wc= s[0] * 256 + s[1];
3268 int t_wc= t[0] * 256 + t[1];
3270 return s_wc > t_wc ? 1 : -1;
3286 for ( ; s < se ; s+= 2)
3288 if (s[0] || s[1] !=
' ')
3289 return (s[0] == 0 && s[1] <
' ') ? -swap : swap;
3297 void my_hash_sort_ucs2_bin(
const CHARSET_INFO *cs __attribute__((unused)),
3298 const uchar *key,
size_t len,ulong *nr1, ulong *nr2)
3300 const uchar *pos = key;
3304 while (key > pos+1 && key[-1] ==
' ' && key[-2] ==
'\0')
3307 for (; pos < (uchar*) key ; pos++)
3309 nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
3310 ((uint)*pos)) + (nr1[0] << 8);
3320 my_strnncollsp_ucs2,
3321 my_strnxfrm_unicode,
3322 my_strnxfrmlen_simple,
3323 my_like_range_generic,
3325 my_strcasecmp_mb2_or_mb4,
3335 my_strnncoll_ucs2_bin,
3336 my_strnncollsp_ucs2_bin,
3337 my_strnxfrm_unicode,
3338 my_strnxfrmlen_simple,
3339 my_like_range_generic,
3340 my_wildcmp_ucs2_bin,
3341 my_strcasecmp_mb2_or_mb4,
3343 my_hash_sort_ucs2_bin,
3355 my_well_formed_len_ucs2,
3361 my_caseup_str_mb2_or_mb4,
3362 my_casedn_str_mb2_or_mb4,
3366 my_l10tostr_mb2_or_mb4,
3367 my_ll10tostr_mb2_or_mb4,
3369 my_strntol_mb2_or_mb4,
3370 my_strntoul_mb2_or_mb4,
3371 my_strntoll_mb2_or_mb4,
3372 my_strntoull_mb2_or_mb4,
3373 my_strntod_mb2_or_mb4,
3375 my_strntoull10rnd_mb2_or_mb4,
3383 MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
3395 &my_unicase_default,
3409 &my_charset_ucs2_handler,
3410 &my_collation_ucs2_general_ci_handler
3417 MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
3419 "ucs2_general_mysql500_ci",
3429 &my_unicase_mysql500,
3443 &my_charset_ucs2_handler,
3444 &my_collation_ucs2_general_ci_handler
3451 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
3463 &my_unicase_default,
3477 &my_charset_ucs2_handler,
3478 &my_collation_ucs2_bin_handler