35 my_regex_stack_check_t my_regex_enough_mem_in_stack= NULL;
41 struct cclass cclasses[CCLASS_LAST+1]= {
42 {
"alnum",
"",
"", _MY_U | _MY_L | _MY_NMR},
43 {
"alpha",
"",
"", _MY_U | _MY_L },
44 {
"blank",
"",
"", _MY_B },
45 {
"cntrl",
"",
"", _MY_CTR },
46 {
"digit",
"",
"", _MY_NMR },
47 {
"graph",
"",
"", _MY_PNT | _MY_U | _MY_L | _MY_NMR},
48 {
"lower",
"",
"", _MY_L },
49 {
"print",
"",
"", _MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B },
50 {
"punct",
"",
"", _MY_PNT },
51 {
"space",
"",
"", _MY_SPC },
52 {
"upper",
"",
"", _MY_U },
53 {
"xdigit",
"",
"", _MY_X },
61 #define PEEK() (*p->next)
62 #define PEEK2() (*(p->next+1))
63 #define MORE() (p->next < p->end)
64 #define MORE2() (p->next+1 < p->end)
65 #define SEE(c) (MORE() && PEEK() == (c))
66 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
67 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
68 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
69 #define NEXT() (p->next++)
70 #define NEXT2() (p->next += 2)
71 #define NEXTn(n) (p->next += (n))
72 #define GETNEXT() (*p->next++)
73 #define SETERROR(e) seterr(p, (e))
74 #define REQUIRE(co, e) ((co) || SETERROR(e))
75 #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
76 #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
77 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
78 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
79 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
80 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
81 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
82 #define HERE() (p->slen)
83 #define THERE() (p->slen - 1)
84 #define THERETHERE() (p->slen - 2)
85 #define DROP(n) (p->slen -= (n))
106 my_regcomp(preg, pattern, cflags, charset)
114 register struct parse *p = &pa;
118 # define GOODFLAGS(f) (f)
120 # define GOODFLAGS(f) ((f)&~MY_REG_DUMP)
123 my_regex_init(charset, NULL);
124 preg->charset=charset;
125 cflags = GOODFLAGS(cflags);
126 if ((cflags&MY_REG_EXTENDED) && (cflags&MY_REG_NOSPEC))
127 return(MY_REG_INVARG);
129 if (cflags&MY_REG_PEND) {
130 if (preg->re_endp < pattern)
131 return(MY_REG_INVARG);
132 len = preg->re_endp - pattern;
134 len = strlen((
char *)pattern);
138 (NC-1)*
sizeof(cat_t));
140 return(MY_REG_ESPACE);
141 p->ssize = (long) (len/(
size_t)2*(size_t)3 + (
size_t)1);
142 p->strip = (sop *)malloc(p->ssize *
sizeof(sop));
144 if (p->strip == NULL) {
146 return(MY_REG_ESPACE);
151 p->next = (
char *)pattern;
152 p->end = p->next + len;
155 p->charset = preg->charset;
156 for (i = 0; i < NPAREN; i++) {
172 g->categories = &g->catspace[-(CHAR_MIN)];
173 (void) memset((
char *)g->catspace, 0, NC*
sizeof(cat_t));
178 g->firststate = THERE();
179 if (cflags&MY_REG_EXTENDED)
181 else if (cflags&MY_REG_NOSPEC)
186 g->laststate = THERE();
192 g->nplus = pluscount(p, g);
194 preg->re_nsub = g->nsub;
196 preg->re_magic = MAGIC1;
200 SETERROR(MY_REG_ASSERT);
215 register struct
parse *p;
219 register sopno UNINIT_VAR(prevback);
220 register sopno UNINIT_VAR(prevfwd);
222 register int first = 1;
227 while (MORE() && (c = PEEK()) !=
'|' && c != stop)
229 if (my_regex_enough_mem_in_stack &&
230 my_regex_enough_mem_in_stack(0))
232 SETERROR(MY_REG_ESPACE);
237 if(REQUIRE(HERE() != conc, MY_REG_EMPTY)) {}
248 ASTERN(OOR1, prevback);
257 ASTERN(O_CH, prevback);
260 assert(!MORE() || SEE(stop));
269 register struct
parse *p;
275 register sopno subno;
284 if(REQUIRE(MORE(), MY_REG_EPAREN)) {}
286 subno = (sopno) p->g->nsub;
288 p->pbegin[subno] = HERE();
289 EMIT(OLPAREN, subno);
292 if (subno < NPAREN) {
293 p->pend[subno] = HERE();
294 assert(p->pend[subno] != 0);
296 EMIT(ORPAREN, subno);
297 if(MUSTEAT(
')', MY_REG_EPAREN)) {}
301 p->g->iflags |= USEBOL;
307 p->g->iflags |= USEEOL;
311 SETERROR(MY_REG_EMPTY);
316 SETERROR(MY_REG_BADRPT);
319 if (p->g->cflags&MY_REG_NEWLINE)
328 if(REQUIRE(MORE(), MY_REG_EESCAPE)) {}
333 if(REQUIRE(!MORE() || !my_isdigit(p->charset,PEEK()), MY_REG_BADRPT)) {}
344 if (!( c ==
'*' || c ==
'+' || c ==
'?' ||
345 (c ==
'{' && MORE2() &&
346 my_isdigit(p->charset,PEEK2())) ))
350 if(REQUIRE(!wascaret, MY_REG_BADRPT)) {}
356 INSERT(OQUEST_, pos);
357 ASTERN(O_QUEST, pos);
370 ASTERN(O_CH, THERETHERE());
375 if (my_isdigit(p->charset,PEEK())) {
377 if(REQUIRE(count <= count2, MY_REG_BADBR)) {}
379 count2 = RE_INFINITY;
382 repeat(p, pos, count, count2);
384 while (MORE() && PEEK() !=
'}')
386 if(REQUIRE(MORE(), MY_REG_EBRACE)) {}
387 SETERROR(MY_REG_BADBR);
395 if (!( c ==
'*' || c ==
'+' || c ==
'?' ||
396 (c ==
'{' && MORE2() &&
397 my_isdigit(p->charset,PEEK2())) ) )
399 SETERROR(MY_REG_BADRPT);
408 register struct
parse *p;
410 if(REQUIRE(MORE(), MY_REG_EMPTY)) {}
412 ordinary(p, GETNEXT());
429 register struct
parse *p;
433 register sopno start = HERE();
434 register int first = 1;
435 register int wasdollar = 0;
439 p->g->iflags |= USEBOL;
442 while (MORE() && !SEETWO(end1, end2)) {
443 wasdollar = p_simp_re(p, first);
449 p->g->iflags |= USEEOL;
453 if(REQUIRE(HERE() != start, MY_REG_EMPTY)) {}
461 p_simp_re(p, starordinary)
462 register struct
parse *p;
470 register sopno subno;
471 # define BACKSL (1<<CHAR_BIT)
478 if(REQUIRE(MORE(), MY_REG_EESCAPE)) {}
479 c = BACKSL | (
unsigned char)GETNEXT();
483 if (p->g->cflags&MY_REG_NEWLINE)
492 SETERROR(MY_REG_BADRPT);
496 subno = (sopno) p->g->nsub;
498 p->pbegin[subno] = HERE();
499 EMIT(OLPAREN, subno);
501 if (MORE() && !SEETWO(
'\\',
')'))
503 if (subno < NPAREN) {
504 p->pend[subno] = HERE();
505 assert(p->pend[subno] != 0);
507 EMIT(ORPAREN, subno);
508 if(REQUIRE(EATTWO(
'\\',
')'), MY_REG_EPAREN)) {}
512 SETERROR(MY_REG_EPAREN);
523 i = (c&~BACKSL) -
'0';
525 if (p->pend[i] != 0) {
526 assert((uint) i <= p->g->nsub);
528 assert(p->pbegin[i] != 0);
529 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
530 assert(OP(p->strip[p->pend[i]]) == ORPAREN);
531 (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
534 SETERROR(MY_REG_ESUBREG);
538 if(REQUIRE(starordinary, MY_REG_BADRPT)) {}
541 ordinary(p, c &~ BACKSL);
549 INSERT(OQUEST_, pos);
550 ASTERN(O_QUEST, pos);
551 }
else if (EATTWO(
'\\',
'{')) {
554 if (MORE() && my_isdigit(p->charset,PEEK())) {
556 if(REQUIRE(count <= count2, MY_REG_BADBR)) {}
558 count2 = RE_INFINITY;
561 repeat(p, pos, count, count2);
562 if (!EATTWO(
'\\',
'}')) {
563 while (MORE() && !SEETWO(
'\\',
'}'))
565 if(REQUIRE(MORE(), MY_REG_EBRACE)) {}
566 SETERROR(MY_REG_BADBR);
568 }
else if (c == (
unsigned char)
'$')
580 register struct
parse *p;
582 register int count = 0;
583 register int ndigits = 0;
585 while (MORE() && my_isdigit(p->charset,PEEK()) && count <= DUPMAX) {
586 count = count*10 + (GETNEXT() -
'0');
590 if(REQUIRE(ndigits > 0 && count <= DUPMAX, MY_REG_BADBR)) {}
603 register struct
parse *p;
605 register cset *cs = allocset(p);
606 register int invert = 0;
609 if (p->next + 5 < p->end && strncmp(p->next,
"[:<:]]", 6) == 0) {
614 if (p->next + 5 < p->end && strncmp(p->next,
"[:>:]]", 6) == 0) {
626 while (MORE() && PEEK() !=
']' && !SEETWO(
'-',
']'))
630 if(MUSTEAT(
']', MY_REG_EBRACK)) {}
635 if (p->g->cflags&MY_REG_ICASE) {
639 for (i = p->g->csetsize - 1; i >= 0; i--)
640 if (CHIN(cs, i) && my_isalpha(p->charset,i)) {
641 ci = othercase(p->charset,i);
645 if (cs->multis != NULL)
651 for (i = p->g->csetsize - 1; i >= 0; i--)
656 if (p->g->cflags&MY_REG_NEWLINE)
658 if (cs->multis != NULL)
662 assert(cs->multis == NULL);
664 if (nch(p, cs) == 1) {
665 ordinary(p, firstch(p, cs));
668 EMIT(OANYOF, freezeset(p, cs));
677 register struct
parse *p;
681 register char start, finish;
685 switch ((MORE()) ? PEEK() :
'\0') {
687 c = (MORE2()) ? PEEK2() :
'\0';
690 SETERROR(MY_REG_ERANGE);
700 if(REQUIRE(MORE(), MY_REG_EBRACK)) {}
702 if(REQUIRE(c !=
'-' && c !=
']', MY_REG_ECTYPE)) {}
704 if(REQUIRE(MORE(), MY_REG_EBRACK)) {}
705 if(REQUIRE(EATTWO(
':',
']'), MY_REG_ECTYPE)) {}
709 if(REQUIRE(MORE(), MY_REG_EBRACK)) {}
711 if(REQUIRE(c !=
'-' && c !=
']', MY_REG_ECOLLATE)) {}
713 if(REQUIRE(MORE(), MY_REG_EBRACK)) {}
714 if(REQUIRE(EATTWO(
'=',
']'), MY_REG_ECOLLATE)) {}
718 start = p_b_symbol(p);
719 if (SEE(
'-') && MORE2() && PEEK2() !=
']') {
725 finish = p_b_symbol(p);
729 if(REQUIRE(start <= finish, MY_REG_ERANGE)) {}
730 for (i = start; i <= finish; i++)
742 register struct
parse *p;
745 register char *sp = p->next;
746 register struct cclass *cp;
749 while (MORE() && my_isalpha(p->charset,PEEK()))
752 for (cp = cclasses; cp->name != NULL; cp++)
753 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] ==
'\0')
755 if (cp->name == NULL) {
757 SETERROR(MY_REG_ECTYPE);
761 #ifndef USE_ORIG_REGEX_CODE
764 for (i=1 ; i<256 ; i++)
765 if (p->charset->ctype[i+1] & cp->mask)
770 register char *u = (
char*) cp->chars;
773 while ((c = *u++) !=
'\0')
776 for (u = (
char*) cp->multis; *u !=
'\0'; u += strlen(u) + 1)
791 register struct
parse *p;
796 c = p_b_coll_elem(p,
'=');
806 register struct
parse *p;
810 if(REQUIRE(MORE(), MY_REG_EBRACK)) {}
811 if (!EATTWO(
'[',
'.'))
815 value = p_b_coll_elem(p,
'.');
816 if(REQUIRE(EATTWO(
'.',
']'), MY_REG_ECOLLATE)) {}
825 p_b_coll_elem(p, endc)
826 register struct
parse *p;
829 register char *sp = p->next;
830 register struct cname *cp;
832 register __int64 len;
836 while (MORE() && !SEETWO(endc,
']'))
839 SETERROR(MY_REG_EBRACK);
843 for (cp = cnames; cp->name != NULL; cp++)
844 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] ==
'\0')
848 SETERROR(MY_REG_ECOLLATE);
857 othercase(charset,ch)
872 assert(my_isalpha(charset,ch));
873 if (my_isupper(charset,ch))
875 return(charset->to_lower ? my_tolower(charset,ch) :
878 else if (my_islower(charset,ch))
880 return(charset->to_upper ? my_toupper(charset,ch) :
895 register struct
parse *p;
898 register char *oldnext = p->next;
899 register char *oldend = p->end;
902 assert(othercase(p->charset, ch) != ch);
909 assert(p->next == bracket+2);
920 register struct
parse *p;
923 register cat_t *cap = p->g->categories;
925 if ((p->g->cflags&MY_REG_ICASE) && my_isalpha(p->charset,ch) &&
926 othercase(p->charset,ch) != ch)
929 EMIT(OCHAR, (
unsigned char)ch);
931 cap[ch] = p->g->ncategories++;
943 register struct
parse *p;
945 register char *oldnext = p->next;
946 register char *oldend = p->end;
956 assert(p->next == bracket+3);
966 repeat(p, start, from,
to)
967 register struct
parse *p;
972 register sopno finish = HERE();
975 # define REP(f, t) ((f)*8 + (t))
976 # define MAP(n) (((n) <= 1) ? (n) : ((n) == RE_INFINITY) ? INF : N)
984 switch (REP(MAP(from), MAP(to))) {
993 repeat(p, start+1, 1, to);
998 ASTERN(O_CH, THERETHERE());
1005 INSERT(OCH_, start);
1006 ASTERN(OOR1, start);
1010 ASTERN(O_CH, THERETHERE());
1011 copy = dupl(p, start+1, finish+1);
1012 assert(copy == finish+4);
1013 repeat(p, copy, 1, to-1);
1016 INSERT(OPLUS_, start);
1017 ASTERN(O_PLUS, start);
1020 copy = dupl(p, start, finish);
1021 repeat(p, copy, from-1, to-1);
1024 copy = dupl(p, start, finish);
1025 repeat(p, copy, from-1, to);
1028 SETERROR(MY_REG_ASSERT);
1039 register struct
parse *p;
1055 register struct
parse *p;
1057 register int no = p->g->ncsets++;
1059 register size_t nbytes;
1061 register size_t css = (size_t)p->g->csetsize;
1064 if (no >= p->ncsalloc) {
1065 p->ncsalloc += CHAR_BIT;
1067 assert(nc % CHAR_BIT == 0);
1068 nbytes = nc / CHAR_BIT * css;
1069 if (p->g->sets == NULL)
1070 p->g->sets = (
cset *)malloc(nc *
sizeof(
cset));
1072 p->g->sets = (
cset *)realloc((
char *)p->g->sets,
1074 if (p->g->setbits == NULL)
1075 p->g->setbits = (uch *)malloc(nbytes);
1077 p->g->setbits = (uch *)realloc((
char *)p->g->setbits,
1080 for (i = 0; i < no; i++)
1081 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1083 if (p->g->sets != NULL && p->g->setbits != NULL)
1084 (void) memset((
char *)p->g->setbits + (nbytes - css),
1088 SETERROR(MY_REG_ESPACE);
1093 assert(p->g->sets != NULL);
1094 cs = &p->g->sets[no];
1095 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1096 cs->mask = 1 << ((no) % CHAR_BIT);
1110 register struct
parse *p;
1114 register cset *top = &p->g->sets[p->g->ncsets];
1115 register size_t css = (size_t)p->g->csetsize;
1117 for (i = 0; i < css; i++)
1135 register struct
parse *p;
1138 register uch h = cs->hash;
1140 register cset *top = &p->g->sets[p->g->ncsets];
1142 register size_t css = (size_t)p->g->csetsize;
1145 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1146 if (cs2->hash == h && cs2 != cs) {
1148 for (i = 0; i < css; i++)
1149 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1160 return((
int)(cs - p->g->sets));
1169 register struct
parse *p;
1173 register size_t css = (size_t)p->g->csetsize;
1175 for (i = 0; i < css; i++)
1188 register struct
parse *p;
1192 register size_t css = (size_t)p->g->csetsize;
1195 for (i = 0; i < css; i++)
1201 #ifdef USE_ORIG_REGEX_CODE
1209 register struct
parse *p;
1213 register size_t oldend = cs->smultis;
1215 cs->smultis += strlen(cp) + 1;
1216 if (cs->multis == NULL)
1217 cs->multis = malloc(cs->smultis);
1219 cs->multis = realloc(cs->multis, cs->smultis);
1220 if (cs->multis == NULL) {
1221 SETERROR(MY_REG_ESPACE);
1225 (void) strcpy(cs->multis + oldend - 1, cp);
1226 cs->multis[cs->smultis - 1] =
'\0';
1239 register struct
parse *p __attribute__((unused));
1240 register
cset *cs __attribute__((unused));
1242 assert(cs->multis == NULL);
1254 register struct
parse *p __attribute__((unused));
1255 register
cset *cs __attribute__((unused));
1257 assert(cs->multis == NULL);
1271 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1272 register unsigned uc = (
unsigned char)c;
1274 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1292 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1293 register unsigned uc1 = (
unsigned char)c1;
1294 register unsigned uc2 = (
unsigned char)c2;
1296 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1297 if (col[uc1] != col[uc2])
1311 register cat_t *cats = g->categories;
1320 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1321 if (cats[c] == 0 && isinsets(g, c)) {
1322 cat = g->ncategories++;
1324 for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1325 if (cats[c2] == 0 && samesets(g, c, c2))
1335 dupl(p, start, finish)
1336 register struct
parse *p;
1340 register sopno
ret = HERE();
1341 register sopno len = finish - start;
1343 assert(finish >= start);
1346 enlarge(p, p->ssize + len);
1347 assert(p->ssize >= p->slen + len);
1348 (void) memcpy((
char *)(p->strip + p->slen),
1349 (
char *)(p->strip + start), (
size_t)len*
sizeof(sop));
1364 register struct
parse *p;
1373 assert(opnd < 1<<OPSHIFT);
1376 if (p->slen >= p->ssize)
1377 enlarge(p, (p->ssize+1) / 2 * 3);
1378 assert(p->slen < p->ssize);
1381 p->strip[p->slen++] = SOP(op, opnd);
1389 doinsert(p, op, opnd, pos)
1390 register struct
parse *p;
1405 assert(HERE() == sn+1);
1410 for (i = 1; i < NPAREN; i++) {
1411 if (p->pbegin[i] >= pos) {
1414 if (p->pend[i] >= pos) {
1419 int length=(HERE()-pos-1)*
sizeof(sop);
1420 bmove_upp((uchar *) &p->strip[pos+1]+length,
1421 (uchar *) &p->strip[pos]+length,
1425 memmove((
char *)&p->strip[pos+1], (
char *)&p->strip[pos],
1426 (HERE()-pos-1)*
sizeof(sop));
1436 dofwd(p, pos, value)
1437 register struct
parse *p;
1445 assert(value < 1<<OPSHIFT);
1446 p->strip[pos] = OP(p->strip[pos]) | value;
1455 register struct
parse *p;
1456 register sopno
size;
1460 if (p->ssize >= size)
1463 sp = (sop *)realloc(p->strip, size*
sizeof(sop));
1465 SETERROR(MY_REG_ESPACE);
1478 register struct
parse *p;
1481 g->nstates = p->slen;
1482 g->strip = (sop *)realloc((
char *)p->strip, p->slen *
sizeof(sop));
1483 if (g->strip == NULL) {
1484 SETERROR(MY_REG_ESPACE);
1485 g->strip = p->strip;
1505 sop *UNINIT_VAR(start);
1506 register sop *UNINIT_VAR(newstart);
1507 register sopno newlen;
1518 scan = g->strip + 1;
1524 newstart = scan - 1;
1538 if (OP(s) != O_QUEST && OP(s) != O_CH &&
1543 }
while (OP(s) != O_QUEST && OP(s) != O_CH);
1546 if (newlen > g->mlen) {
1553 }
while (OP(s) != OEND);
1559 g->must = malloc((
size_t)g->mlen + 1);
1560 if (g->must == NULL) {
1566 for (i = g->mlen; i > 0; i--) {
1567 while (OP(s = *scan++) != OCHAR)
1569 assert(cp < g->must + g->mlen);
1570 *cp++ = (char)OPND(s);
1572 assert(cp == g->must + g->mlen);
1587 register sopno plusnest = 0;
1588 register sopno maxnest = 0;
1593 scan = g->strip + 1;
1601 if (plusnest > maxnest)
1606 }
while (OP(s) != OEND);