MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
chartype.c
1 /* $NetBSD: chartype.c,v 1.10 2011/08/16 16:25:15 christos Exp $ */
2 
3 /*-
4  * Copyright (c) 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  * must display the following acknowledgement:
17  * This product includes software developed by the NetBSD
18  * Foundation, Inc. and its contributors.
19  * 4. Neither the name of The NetBSD Foundation nor the names of its
20  * contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /*
37  * chartype.c: character classification and meta information
38  */
39 #include "config.h"
40 #if !defined(lint) && !defined(SCCSID)
41 #endif /* not lint && not SCCSID */
42 #include "el.h"
43 #include <stdlib.h>
44 
45 #define CT_BUFSIZ ((size_t)1024)
46 
47 #ifdef WIDECHAR
48 protected void
49 ct_conv_buff_resize(ct_buffer_t *conv, size_t mincsize, size_t minwsize)
50 {
51  void *p;
52  if (mincsize > conv->csize) {
53  conv->csize = mincsize;
54  p = el_realloc(conv->cbuff, conv->csize * sizeof(*conv->cbuff));
55  if (p == NULL) {
56  conv->csize = 0;
57  el_free(conv->cbuff);
58  conv->cbuff = NULL;
59  } else
60  conv->cbuff = p;
61  }
62 
63  if (minwsize > conv->wsize) {
64  conv->wsize = minwsize;
65  p = el_realloc(conv->wbuff, conv->wsize * sizeof(*conv->wbuff));
66  if (p == NULL) {
67  conv->wsize = 0;
68  el_free(conv->wbuff);
69  conv->wbuff = NULL;
70  } else
71  conv->wbuff = p;
72  }
73 }
74 
75 
76 public char *
77 ct_encode_string(const Char *s, ct_buffer_t *conv)
78 {
79  char *dst;
80  ssize_t used = 0;
81  mbstate_t state;
82 
83  memset(&state, 0, sizeof(mbstate_t));
84 
85  if (!s)
86  return NULL;
87  if (!conv->cbuff)
88  ct_conv_buff_resize(conv, CT_BUFSIZ, (size_t)0);
89  if (!conv->cbuff)
90  return NULL;
91 
92  dst = conv->cbuff;
93  while (*s) {
94  used = (ssize_t)(conv->csize - (size_t)(dst - conv->cbuff));
95  if (used < 5) {
96  used = dst - conv->cbuff;
97  ct_conv_buff_resize(conv, conv->csize + CT_BUFSIZ,
98  (size_t)0);
99  if (!conv->cbuff)
100  return NULL;
101  dst = conv->cbuff + used;
102  }
103  used = ct_encode_char(dst, (size_t)5, *s, &state);
104  if (used == -1) /* failed to encode, need more buffer space */
105  abort();
106  ++s;
107  dst += used;
108  }
109  *dst = '\0';
110  return conv->cbuff;
111 }
112 
113 public Char *
114 ct_decode_string(const char *s, ct_buffer_t *conv)
115 {
116  size_t len = 0;
117 
118  if (!s)
119  return NULL;
120  if (!conv->wbuff)
121  ct_conv_buff_resize(conv, (size_t)0, CT_BUFSIZ);
122  if (!conv->wbuff)
123  return NULL;
124 
125  len = ct_mbstowcs(NULL, s, (size_t)0);
126  if (len == (size_t)-1)
127  return NULL;
128  if (len > conv->wsize)
129  ct_conv_buff_resize(conv, (size_t)0, len + 1);
130  if (!conv->wbuff)
131  return NULL;
132  ct_mbstowcs(conv->wbuff, s, conv->wsize);
133  return conv->wbuff;
134 }
135 
136 
137 protected Char **
138 ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv)
139 {
140  size_t bufspace;
141  int i;
142  Char *p;
143  Char **wargv;
144  ssize_t bytes;
145  mbstate_t state;
146 
147  /* Make sure we have enough space in the conversion buffer to store all
148  * the argv strings. */
149  for (i = 0, bufspace = 0; i < argc; ++i)
150  bufspace += argv[i] ? strlen(argv[i]) + 1 : 0;
151  ct_conv_buff_resize(conv, (size_t)0, bufspace);
152  if (!conv->wsize)
153  return NULL;
154 
155  wargv = el_malloc((size_t)argc * sizeof(*wargv));
156 
157  for (i = 0, p = conv->wbuff; i < argc; ++i) {
158  if (!argv[i]) { /* don't pass null pointers to mbsrtowcs */
159  wargv[i] = NULL;
160  continue;
161  } else {
162  wargv[i] = p;
163  memset(&state, 0, sizeof(mbstate_t));
164  bytes = (ssize_t)mbsrtowcs(p, argv + i, bufspace, &state);
165  }
166  if (bytes == -1) {
167  el_free(wargv);
168  return NULL;
169  } else
170  bytes++; /* include '\0' in the count */
171  bufspace -= (size_t)bytes;
172  p += bytes;
173  }
174 
175  return wargv;
176 }
177 
178 
179 protected size_t
180 ct_enc_width(Char c)
181 {
182  /* UTF-8 encoding specific values */
183  if (c < 0x80)
184  return 1;
185  else if (c < 0x0800)
186  return 2;
187  else if (c < 0x10000)
188  return 3;
189  else if (c < 0x110000)
190  return 4;
191  else
192  return 0; /* not a valid codepoint */
193 }
194 
195 protected ssize_t
196 ct_encode_char(char *dst, size_t len, Char c, mbstate_t *state)
197 {
198  ssize_t l = 0;
199 
200  if (len < ct_enc_width(c))
201  return -1;
202 
203  l = wcrtomb(dst, c, state);
204 
205  if (l < 0) {
206  memset (state, 0, sizeof (mbstate_t));
207  l = 0;
208  }
209  return l;
210 }
211 #endif
212 
213 protected const Char *
214 ct_visual_string(const Char *s)
215 {
216  static Char *buff = NULL;
217  static size_t buffsize = 0;
218  void *p;
219  Char *dst;
220  ssize_t used = 0;
221 
222  if (!s)
223  return NULL;
224  if (!buff) {
225  buffsize = CT_BUFSIZ;
226  buff = el_malloc(buffsize * sizeof(*buff));
227  }
228  dst = buff;
229  while (*s) {
230  used = ct_visual_char(dst, buffsize - (size_t)(dst - buff), *s);
231  if (used == -1) { /* failed to encode, need more buffer space */
232  used = dst - buff;
233  buffsize += CT_BUFSIZ;
234  p = el_realloc(buff, buffsize * sizeof(*buff));
235  if (p == NULL)
236  goto out;
237  buff = p;
238  dst = buff + used;
239  /* don't increment s here - we want to retry it! */
240  }
241  else
242  ++s;
243  dst += used;
244  }
245  if (dst >= (buff + buffsize)) { /* sigh */
246  buffsize += 1;
247  p = el_realloc(buff, buffsize * sizeof(*buff));
248  if (p == NULL)
249  goto out;
250  buff = p;
251  dst = buff + buffsize - 1;
252  }
253  *dst = 0;
254  return buff;
255 out:
256  el_free(buff);
257  buffsize = 0;
258  return NULL;
259 }
260 
261 
262 protected int
263 ct_visual_width(Char c)
264 {
265  int t = ct_chr_class(c);
266  switch (t) {
267  case CHTYPE_ASCIICTL:
268  return 2; /* ^@ ^? etc. */
269  case CHTYPE_TAB:
270  return 1; /* Hmm, this really need to be handled outside! */
271  case CHTYPE_NL:
272  return 0; /* Should this be 1 instead? */
273 #ifdef WIDECHAR
274  case CHTYPE_PRINT:
275  return wcwidth(c);
276  case CHTYPE_NONPRINT:
277  if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
278  return 8; /* \U+12345 */
279  else
280  return 7; /* \U+1234 */
281 #else
282  case CHTYPE_PRINT:
283  return 1;
284  case CHTYPE_NONPRINT:
285  return 4; /* \123 */
286 #endif
287  default:
288  return 0; /* should not happen */
289  }
290 }
291 
292 
293 protected ssize_t
294 ct_visual_char(Char *dst, size_t len, Char c)
295 {
296  int t = ct_chr_class(c);
297  switch (t) {
298  case CHTYPE_TAB:
299  case CHTYPE_NL:
300  case CHTYPE_ASCIICTL:
301  if (len < 2)
302  return -1; /* insufficient space */
303  *dst++ = '^';
304  if (c == '\177')
305  *dst = '?'; /* DEL -> ^? */
306  else
307  *dst = c | 0100; /* uncontrolify it */
308  return 2;
309  case CHTYPE_PRINT:
310  if (len < 1)
311  return -1; /* insufficient space */
312  *dst = c;
313  return 1;
314  case CHTYPE_NONPRINT:
315  /* we only use single-width glyphs for display,
316  * so this is right */
317  if ((ssize_t)len < ct_visual_width(c))
318  return -1; /* insufficient space */
319 #ifdef WIDECHAR
320  *dst++ = '\\';
321  *dst++ = 'U';
322  *dst++ = '+';
323 #define tohexdigit(v) "0123456789ABCDEF"[v]
324  if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
325  *dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf);
326  *dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf);
327  *dst++ = tohexdigit(((unsigned int) c >> 8) & 0xf);
328  *dst++ = tohexdigit(((unsigned int) c >> 4) & 0xf);
329  *dst = tohexdigit(((unsigned int) c ) & 0xf);
330  return c > 0xffff ? 8 : 7;
331 #else
332  *dst++ = '\\';
333 #define tooctaldigit(v) ((v) + '0')
334  *dst++ = tooctaldigit(((unsigned int) c >> 6) & 0x7);
335  *dst++ = tooctaldigit(((unsigned int) c >> 3) & 0x7);
336  *dst++ = tooctaldigit(((unsigned int) c ) & 0x7);
337 #endif
338  /*FALLTHROUGH*/
339  /* these two should be handled outside this function */
340  default: /* we should never hit the default */
341  return 0;
342  }
343 }
344 
345 
346 
347 
348 protected int
349 ct_chr_class(Char c)
350 {
351  if (c == '\t')
352  return CHTYPE_TAB;
353  else if (c == '\n')
354  return CHTYPE_NL;
355  else if (IsASCII(c) && Iscntrl(c))
356  return CHTYPE_ASCIICTL;
357  else if (Isprint(c))
358  return CHTYPE_PRINT;
359  else
360  return CHTYPE_NONPRINT;
361 }