9 #define matcher smatcher
12 #define dissect sdissect
13 #define backref sbackref
20 #define matcher lmatcher
23 #define dissect ldissect
24 #define backref lbackref
51 #define SP(t, s, c) print(m, t, s, c, stdout)
52 #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
53 #define NOTE(str) { if (m->eflags&MY_REG_TRACE) printf("=%s\n", (str)); }
56 #define AT(t, p1, p2, s1, s2)
66 matcher(charset,g, str, nmatch, pmatch, eflags)
77 register struct match *m = &mv;
79 register const sopno gf = g->firststate+1;
80 register const sopno gl = g->laststate;
85 if (g->cflags&MY_REG_NOSUB)
87 if (eflags&MY_REG_STARTEND) {
88 start = str + pmatch[0].rm_so;
89 stop = str + pmatch[0].rm_eo;
92 stop = start + strlen(start);
95 return(MY_REG_INVARG);
98 if (g->must != NULL) {
99 for (dp = start; dp < stop; dp++)
100 if (*dp == g->must[0] && stop - dp >= g->mlen &&
101 memcmp(dp, g->must, (
size_t)g->mlen) == 0)
104 return(MY_REG_NOMATCH);
124 endp = fast(charset, m, start, stop, gf, gl);
126 if (m->pmatch != NULL)
127 free((
char *)m->pmatch);
128 if (m->lastpos != NULL)
129 free((
char *)m->lastpos);
131 return(MY_REG_NOMATCH);
133 if (nmatch == 0 && !g->backrefs)
137 assert(m->coldp != NULL);
139 NOTE(
"finding start");
140 endp = slow(charset, m, m->coldp, stop, gf, gl);
143 assert(m->coldp < m->endp);
146 if (nmatch == 1 && !g->backrefs)
150 if (m->pmatch == NULL)
153 if (m->pmatch == NULL) {
154 if (m->lastpos != NULL)
155 free((
char *)m->lastpos);
157 return(MY_REG_ESPACE);
159 for (i = 1; i <= m->g->nsub; i++)
160 m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
161 if (!g->backrefs && !(m->eflags&MY_REG_BACKR)) {
163 dp = dissect(charset, m, m->coldp, endp, gf, gl);
165 if (g->nplus > 0 && m->lastpos == NULL)
166 m->lastpos = (
char **)malloc((g->nplus+1) *
168 if (g->nplus > 0 && m->lastpos == NULL) {
171 return(MY_REG_ESPACE);
173 NOTE(
"backref dissect");
174 dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0);
181 assert(g->nplus == 0 || m->lastpos != NULL);
183 if (dp != NULL || endp <= m->coldp)
186 endp = slow(charset, m, m->coldp, endp-1, gf, gl);
191 for (i = 1; i <= m->g->nsub; i++) {
192 assert(m->pmatch[i].rm_so == -1);
193 assert(m->pmatch[i].rm_eo == -1);
196 NOTE(
"backoff dissect");
197 dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0);
199 assert(dp == NULL || dp == endp);
205 start = m->coldp + 1;
206 assert(start <= stop);
211 pmatch[0].rm_so = m->coldp - m->offp;
212 pmatch[0].rm_eo = endp - m->offp;
215 assert(m->pmatch != NULL);
216 for (i = 1; i < nmatch; i++)
218 pmatch[
i] = m->pmatch[
i];
220 pmatch[
i].rm_so = -1;
221 pmatch[
i].rm_eo = -1;
225 if (m->pmatch != NULL)
226 free((
char *)m->pmatch);
227 if (m->lastpos != NULL)
228 free((
char *)m->lastpos);
239 dissect(charset, m, start, stop, startst, stopst)
241 register struct
match *m;
258 register char *oldssp;
260 AT(
"diss", start, stop, startst, stopst);
262 for (ss = startst; ss < stopst; ss = es) {
265 switch (OP(m->g->strip[es])) {
268 es += OPND(m->g->strip[es]);
271 while (OP(m->g->strip[es]) != O_CH)
272 es += OPND(m->g->strip[es]);
278 switch (OP(m->g->strip[ss])) {
303 rest = slow(charset, m, sp, stp, ss, es);
304 assert(rest != NULL);
306 tail = slow(charset, m, rest, stop, es, stopst);
316 if (slow(charset, m, sp, rest, ssub, esub) != NULL)
317 sp = dissect(charset, m, sp, rest, ssub, esub);
325 rest = slow(charset, m, sp, stp, ss, es);
326 assert(rest != NULL);
328 tail = slow(charset, m, rest, stop, es, stopst);
340 sep = slow(charset, m, ssp, rest, ssub, esub);
341 if (sep == NULL || sep == ssp)
352 assert(slow(charset, m, ssp, sep, ssub, esub) == rest);
353 sp = dissect(charset, m, ssp, sep, ssub, esub);
361 rest = slow(charset, m, sp, stp, ss, es);
362 assert(rest != NULL);
364 tail = slow(charset, m, rest, stop, es, stopst);
372 esub = ss + OPND(m->g->strip[ss]) - 1;
373 assert(OP(m->g->strip[esub]) == OOR1);
375 if (slow(charset, m, sp, rest, ssub, esub) == rest)
378 assert(OP(m->g->strip[esub]) == OOR1);
380 assert(OP(m->g->strip[esub]) == OOR2);
382 esub += OPND(m->g->strip[esub]);
383 if (OP(m->g->strip[esub]) == OOR2)
386 assert(OP(m->g->strip[esub]) == O_CH);
388 sp = dissect(charset, m, sp, rest, ssub, esub);
400 i = OPND(m->g->strip[ss]);
401 assert(0 < i && i <= m->g->nsub);
402 m->pmatch[
i].rm_so = sp - m->offp;
405 i = OPND(m->g->strip[ss]);
406 assert(0 < i && i <= m->g->nsub);
407 m->pmatch[
i].rm_eo = sp - m->offp;
425 backref(charset,m, start, stop, startst, stopst, lev)
427 register struct
match *m;
444 register my_regoff_t offsave;
447 AT(
"back", start, stop, startst, stopst);
452 for (ss = startst; !hard && ss < stopst; ss++)
453 switch (OP(s = m->g->strip[ss])) {
455 if (sp == stop || *sp++ != (
char)OPND(s))
464 cs = &m->g->sets[OPND(s)];
465 if (sp == stop || !CHIN(cs, *sp++))
469 if ( (sp == m->beginp && !(m->eflags&MY_REG_NOTBOL)) ||
470 (sp < m->endp && *(sp-1) ==
'\n' &&
471 (m->g->cflags&MY_REG_NEWLINE)) )
477 if ( (sp == m->endp && !(m->eflags&MY_REG_NOTEOL)) ||
478 (sp < m->endp && *sp ==
'\n' &&
479 (m->g->cflags&MY_REG_NEWLINE)) )
485 if (( (sp == m->beginp && !(m->eflags&MY_REG_NOTBOL)) ||
486 (sp < m->endp && *(sp-1) ==
'\n' &&
487 (m->g->cflags&MY_REG_NEWLINE)) ||
489 !ISWORD(charset,*(sp-1))) ) &&
490 (sp < m->endp && ISWORD(charset,*sp)) )
496 if (( (sp == m->endp && !(m->eflags&MY_REG_NOTEOL)) ||
497 (sp < m->endp && *sp ==
'\n' &&
498 (m->g->cflags&MY_REG_NEWLINE)) ||
499 (sp < m->endp && !ISWORD(charset,*sp)) ) &&
500 (sp > m->beginp && ISWORD(charset,*(sp-1))) )
511 assert(OP(s) == OOR2);
513 }
while (OP(s = m->g->strip[ss]) != O_CH);
528 AT(
"hard", sp, stop, ss, stopst);
533 assert(0 < i && i <= m->g->nsub);
534 if (m->pmatch[i].rm_eo == -1)
536 assert(m->pmatch[i].rm_so != -1);
537 len = m->pmatch[
i].rm_eo - m->pmatch[
i].rm_so;
538 assert((
size_t) (stop - m->beginp) >= len);
541 ssp = m->offp + m->pmatch[
i].rm_so;
542 if (memcmp(sp, ssp, len) != 0)
544 while (m->g->strip[ss] != SOP(O_BACK, i))
546 return(backref(charset, m, sp+len, stop, ss+1, stopst, lev));
549 dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
552 return(backref(charset, m, sp, stop, ss+OPND(s)+1, stopst, lev));
555 assert(m->lastpos != NULL);
556 assert(lev+1 <= m->g->nplus);
557 m->lastpos[lev+1] = sp;
558 return(backref(charset, m, sp, stop, ss+1, stopst, lev+1));
561 if (sp == m->lastpos[lev])
562 return(backref(charset, m, sp, stop, ss+1, stopst, lev-1));
564 m->lastpos[lev] = sp;
565 dp = backref(charset, m, sp, stop, ss-OPND(s)+1, stopst, lev);
567 return(backref(charset, m, sp, stop, ss+1, stopst, lev-1));
573 esub = ss + OPND(s) - 1;
574 assert(OP(m->g->strip[esub]) == OOR1);
576 dp = backref(charset, m, sp, stop, ssub, esub, lev);
580 if (OP(m->g->strip[esub]) == O_CH)
583 assert(OP(m->g->strip[esub]) == OOR2);
585 esub += OPND(m->g->strip[esub]);
586 if (OP(m->g->strip[esub]) == OOR2)
589 assert(OP(m->g->strip[esub]) == O_CH);
594 assert(0 < i && i <= m->g->nsub);
595 offsave = m->pmatch[
i].rm_so;
596 m->pmatch[
i].rm_so = sp - m->offp;
597 dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
600 m->pmatch[
i].rm_so = offsave;
605 assert(0 < i && i <= m->g->nsub);
606 offsave = m->pmatch[
i].rm_eo;
607 m->pmatch[
i].rm_eo = sp - m->offp;
608 dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
611 m->pmatch[
i].rm_eo = offsave;
631 fast(charset, m, start, stop, startst, stopst)
633 register struct
match *m;
639 register states st = m->st;
640 register states fresh = m->fresh;
641 register states tmp = m->tmp;
642 register char *p = start;
643 register int c = (start == m->beginp) ? OUT : *(start-1);
647 register char *coldp;
651 st = step(m->g, startst, stopst, st, NOTHING, st);
658 c = (p == m->endp) ? OUT : *p;
665 if ( (lastc ==
'\n' && m->g->cflags&MY_REG_NEWLINE) ||
666 (lastc == OUT && !(m->eflags&MY_REG_NOTBOL)) ) {
670 if ( (c ==
'\n' && m->g->cflags&MY_REG_NEWLINE) ||
671 (c == OUT && !(m->eflags&MY_REG_NOTEOL)) ) {
672 flagch = (flagch == BOL) ? BOLEOL : EOL;
677 st = step(m->g, startst, stopst, st, flagch, st);
682 if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) &&
683 (c != OUT && ISWORD(charset,c)) ) {
686 if ( (lastc != OUT && ISWORD(charset,lastc)) &&
687 (flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) {
690 if (flagch == BOW || flagch == EOW) {
691 st = step(m->g, startst, stopst, st, flagch, st);
696 if (ISSET(st, stopst) || p == stop)
703 st = step(m->g, startst, stopst, tmp, c, st);
705 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
709 assert(coldp != NULL);
711 if (ISSET(st, stopst))
723 slow(charset, m, start, stop, startst, stopst)
725 register struct
match *m;
731 register states st = m->st;
732 register states empty = m->empty;
733 register states tmp = m->tmp;
734 register char *p = start;
735 register int c = (start == m->beginp) ? OUT : *(start-1);
739 register char *matchp;
741 AT(
"slow", start, stop, startst, stopst);
744 SP(
"sstart", st, *p);
745 st = step(m->g, startst, stopst, st, NOTHING, st);
750 c = (p == m->endp) ? OUT : *p;
755 if ( (lastc ==
'\n' && m->g->cflags&MY_REG_NEWLINE) ||
756 (lastc == OUT && !(m->eflags&MY_REG_NOTBOL)) ) {
760 if ( (c ==
'\n' && m->g->cflags&MY_REG_NEWLINE) ||
761 (c == OUT && !(m->eflags&MY_REG_NOTEOL)) ) {
762 flagch = (flagch == BOL) ? BOLEOL : EOL;
767 st = step(m->g, startst, stopst, st, flagch, st);
768 SP(
"sboleol", st, c);
772 if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) &&
773 (c != OUT && ISWORD(charset,c)) ) {
776 if ( (lastc != OUT && ISWORD(charset,lastc)) &&
777 (flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) {
780 if (flagch == BOW || flagch == EOW) {
781 st = step(m->g, startst, stopst, st, flagch, st);
782 SP(
"sboweow", st, c);
786 if (ISSET(st, stopst))
788 if (EQ(st, empty) || p == stop)
795 st = step(m->g, startst, stopst, tmp, c, st);
797 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
820 step(g, start, stop, bef, ch, aft)
831 register onestate here;
835 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
839 assert(pc == stop-1);
843 assert(!NONCHAR(ch) || ch != (
char)OPND(s));
844 if (ch == (
char)OPND(s))
848 if (ch == BOL || ch == BOLEOL)
852 if (ch == EOL || ch == BOLEOL)
868 cs = &g->sets[OPND(s)];
869 if (!NONCHAR(ch) && CHIN(cs, ch))
881 i = ISSETBACK(aft, OPND(s));
882 BACK(aft, aft, OPND(s));
883 if (!i && ISSETBACK(aft, OPND(s))) {
891 FWD(aft, aft, OPND(s));
902 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
903 FWD(aft, aft, OPND(s));
906 if (ISSTATEIN(aft, here)) {
908 OP(s = g->strip[pc+look]) != O_CH;
910 assert(OP(s) == OOR2);
916 if (OP(g->strip[pc+OPND(s)]) != O_CH) {
917 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
918 FWD(aft, aft, OPND(s));
942 print(m, caption, st, ch, d)
949 register struct re_guts *g = m->g;
951 register int first = 1;
954 if (!(m->eflags&MY_REG_TRACE))
957 fprintf(d,
"%s", caption);
959 fprintf(d,
" %s", printchar(ch,buf));
960 for (i = 0; i < g->nstates; i++)
962 fprintf(d,
"%s%d", (first) ?
"\t" :
", ", i);
976 at(m, title, start, stop, startst, stopst)
985 if (!(m->eflags&MY_REG_TRACE))
988 printf(
"%s %s-", title, printchar(*start,buf));
989 printf(
"%s ", printchar(*stop,buf));
990 printf(
"%ld-%ld\n", (
long)startst, (
long)stopst,buf);
1011 if (isprint(ch) || ch ==
' ')
1012 sprintf(pbuf,
"%c", ch);
1014 sprintf(pbuf,
"\\%o", ch);