16 #include "my_global.h"
21 #define MY_XML_UNKNOWN 'U'
22 #define MY_XML_EOF 'E'
23 #define MY_XML_STRING 'S'
24 #define MY_XML_IDENT 'I'
28 #define MY_XML_SLASH '/'
29 #define MY_XML_COMMENT 'C'
30 #define MY_XML_TEXT 'T'
31 #define MY_XML_QUESTION '?'
32 #define MY_XML_EXCLAM '!'
33 #define MY_XML_CDATA 'D'
45 #define MY_XML_ID0 0x01
46 #define MY_XML_ID1 0x02
47 #define MY_XML_SPC 0x08
57 static char my_xml_ctype[256]=
59 0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0,
60 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
61 8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,
62 2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0,
63 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
64 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3,
65 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
66 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,
67 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
68 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
69 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
70 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
71 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
72 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
73 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
74 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
77 #define my_xml_is_space(c) (my_xml_ctype[(uchar) (c)] & MY_XML_SPC)
78 #define my_xml_is_id0(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID0)
79 #define my_xml_is_id1(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID1)
82 static const char *lex2str(
int lex)
86 case MY_XML_EOF:
return "END-OF-INPUT";
87 case MY_XML_STRING:
return "STRING";
88 case MY_XML_IDENT:
return "IDENT";
89 case MY_XML_CDATA:
return "CDATA";
90 case MY_XML_EQ:
return "'='";
91 case MY_XML_LT:
return "'<'";
92 case MY_XML_GT:
return "'>'";
93 case MY_XML_SLASH:
return "'/'";
94 case MY_XML_COMMENT:
return "COMMENT";
95 case MY_XML_TEXT:
return "TEXT";
96 case MY_XML_QUESTION:
return "'?'";
97 case MY_XML_EXCLAM:
return "'!'";
99 return "unknown token";
104 for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ );
105 for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- );
109 static inline my_bool
110 my_xml_parser_prefix_cmp(
MY_XML_PARSER *p,
const char *s,
size_t slen)
112 return (p->cur + slen > p->end) || memcmp(p->cur, s, slen);
120 for (; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ; p->cur++);
122 if (p->cur >= p->end)
133 if (!my_xml_parser_prefix_cmp(p, C_STRING_WITH_LEN(
"<!--")))
135 for (; p->cur < p->end; p->cur++)
137 if (!my_xml_parser_prefix_cmp(p, C_STRING_WITH_LEN(
"-->")))
146 else if (!my_xml_parser_prefix_cmp(p, C_STRING_WITH_LEN(
"<![CDATA[")))
149 for (; p->cur < p->end - 2 ; p->cur++)
151 if (p->cur[0] ==
']' && p->cur[1] ==
']' && p->cur[2] ==
'>')
160 else if (strchr(
"?=/<>!",p->cur[0]))
166 else if ( (p->cur[0] ==
'"') || (p->cur[0] ==
'\'') )
173 for (; ( p->cur < p->end ) && (p->cur[0] != a->beg[0]); p->cur++)
179 if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION))
183 else if (my_xml_is_id0(p->cur[0]))
186 while (p->cur < p->end && my_xml_is_id1(p->cur[0]))
196 printf(
"LEX=%s[%d]\n",lex2str(lex),a->end-a->beg);
204 static int my_xml_value(
MY_XML_PARSER *st,
const char *str,
size_t len)
206 return (st->value) ? (st->value)(st,str,len) : MY_XML_OK;
222 static int my_xml_attr_ensure_space(
MY_XML_PARSER *st,
size_t len)
224 size_t ofs= st->attr.end - st->attr.start;
226 if (ofs + len > st->attr.buffer_size)
228 st->attr.buffer_size= (SIZE_T_MAX - len) / 2 > st->attr.buffer_size ?
229 st->attr.buffer_size * 2 + len : SIZE_T_MAX;
231 if (!st->attr.buffer)
233 st->attr.buffer= (
char *) my_str_malloc(st->attr.buffer_size);
235 memcpy(st->attr.buffer, st->attr.static_buffer, ofs + 1 );
238 st->attr.buffer= (
char *) my_str_realloc(st->attr.buffer,
239 st->attr.buffer_size);
240 st->attr.start= st->attr.buffer;
241 st->attr.end= st->attr.start + ofs;
243 return st->attr.buffer ? MY_XML_OK : MY_XML_ERROR;
253 p->attr.end= p->attr.start;
257 static int my_xml_enter(
MY_XML_PARSER *st,
const char *str,
size_t len)
259 if (my_xml_attr_ensure_space(st, len + 1 ))
262 if (st->attr.end > st->attr.start)
264 st->attr.end[0]=
'/';
267 memcpy(st->attr.end, str, len);
269 st->attr.end[0]=
'\0';
270 if (st->flags & MY_XML_FLAG_RELATIVE_NAMES)
271 return st->enter ? st->enter(st, str, len) : MY_XML_OK;
274 st->enter(st, st->attr.start, st->attr.end - st->attr.start) : MY_XML_OK;
278 static void mstr(
char *s,
const char *src,
size_t l1,
size_t l2)
280 l1 = l1<l2 ? l1 : l2;
286 static int my_xml_leave(
MY_XML_PARSER *p,
const char *str,
size_t slen)
295 for (e= p->attr.end; (e > p->attr.start) && (e[0] !=
'/') ; e--);
296 glen= (size_t) ((e[0] ==
'/') ? (p->attr.end - e - 1) : p->attr.end - e);
298 if (str && (slen != glen))
300 mstr(s,str,
sizeof(s)-1,slen);
303 mstr(g,e+1,
sizeof(g)-1,glen),
304 sprintf(p->errstr,
"'</%s>' unexpected ('</%s>' wanted)",s,g);
307 sprintf(p->errstr,
"'</%s>' unexpected (END-OF-INPUT wanted)", s);
311 if (p->flags & MY_XML_FLAG_RELATIVE_NAMES)
312 rc= p->leave_xml ? p->leave_xml(p, str, slen) : MY_XML_OK;
315 p->leave_xml(p, p->attr.start, p->attr.end - p->attr.start) :
325 int my_xml_parse(
MY_XML_PARSER *p,
const char *str,
size_t len)
328 my_xml_attr_rewind(p);
334 while ( p->cur < p->end )
337 if (p->cur[0] ==
'<')
343 lex=my_xml_scan(p,&a);
345 if (MY_XML_COMMENT == lex)
348 if (lex == MY_XML_CDATA)
352 my_xml_value(p, a.beg, (
size_t) (a.end-a.beg));
356 lex=my_xml_scan(p,&a);
358 if (MY_XML_SLASH == lex)
360 if (MY_XML_IDENT != (lex=my_xml_scan(p,&a)))
362 sprintf(p->errstr,
"%s unexpected (ident wanted)",lex2str(lex));
365 if (MY_XML_OK != my_xml_leave(p,a.beg,(
size_t) (a.end-a.beg)))
367 lex=my_xml_scan(p,&a);
371 if (MY_XML_EXCLAM == lex)
373 lex=my_xml_scan(p,&a);
376 else if (MY_XML_QUESTION == lex)
378 lex=my_xml_scan(p,&a);
382 if (MY_XML_IDENT == lex)
384 p->current_node_type= MY_XML_NODE_TAG;
385 if (MY_XML_OK != my_xml_enter(p,a.beg,(
size_t) (a.end-a.beg)))
390 sprintf(p->errstr,
"%s unexpected (ident or '/' wanted)",
395 while ((MY_XML_IDENT == (lex=my_xml_scan(p,&a))) ||
396 ((MY_XML_STRING == lex && exclam)))
399 if (MY_XML_EQ == (lex=my_xml_scan(p,&b)))
401 lex=my_xml_scan(p,&b);
402 if ( (lex == MY_XML_IDENT) || (lex == MY_XML_STRING) )
404 p->current_node_type= MY_XML_NODE_ATTR;
405 if ((MY_XML_OK != my_xml_enter(p,a.beg,(
size_t) (a.end-a.beg))) ||
406 (MY_XML_OK != my_xml_value(p,b.beg,(
size_t) (b.end-b.beg))) ||
407 (MY_XML_OK != my_xml_leave(p,a.beg,(
size_t) (a.end-a.beg))))
412 sprintf(p->errstr,
"%s unexpected (ident or string wanted)",
417 else if (MY_XML_IDENT == lex)
419 p->current_node_type= MY_XML_NODE_ATTR;
420 if ((MY_XML_OK != my_xml_enter(p,a.beg,(
size_t) (a.end-a.beg))) ||
421 (MY_XML_OK != my_xml_leave(p,a.beg,(
size_t) (a.end-a.beg))))
424 else if ((MY_XML_STRING == lex) && exclam)
437 if (lex == MY_XML_SLASH)
439 if (MY_XML_OK != my_xml_leave(p,NULL,0))
441 lex=my_xml_scan(p,&a);
447 if (lex != MY_XML_QUESTION)
449 sprintf(p->errstr,
"%s unexpected ('?' wanted)",lex2str(lex));
452 if (MY_XML_OK != my_xml_leave(p,NULL,0))
454 lex=my_xml_scan(p,&a);
459 if (MY_XML_OK != my_xml_leave(p,NULL,0))
463 if (lex != MY_XML_GT)
465 sprintf(p->errstr,
"%s unexpected ('>' wanted)",lex2str(lex));
472 for ( ; (p->cur < p->end) && (p->cur[0] !=
'<') ; p->cur++);
475 if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION))
476 my_xml_norm_text(&a);
479 my_xml_value(p,a.beg,(
size_t) (a.end-a.beg));
484 if (p->attr.start[0])
486 sprintf(p->errstr,
"unexpected END-OF-INPUT");
495 memset(p, 0,
sizeof(p[0]));
499 p->attr.start= p->attr.end= p->attr.static_buffer;
500 p->attr.buffer_size=
sizeof(p->attr.static_buffer);
508 my_str_free(p->attr.buffer);
509 p->attr.buffer= NULL;
539 p->user_data=user_data;
551 const char *beg=p->beg;
553 for ( s=p->beg ; s<p->cur; s++)
558 return (
size_t) (p->cur-beg);
565 for (s=p->beg ; s<p->cur; s++)