61 #include "my_global.h"
162 #define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
163 #define OPERAND(p) ((p) + 3)
176 #define error(X,Y) fprintf(stderr, X, Y)
178 #define regerror(X) error("Regexp: %s\n",X);
179 #define SPECIAL 0x100
180 #define LBRAC ('('|SPECIAL)
181 #define RBRAC (')'|SPECIAL)
182 #define ASTERIX ('*'|SPECIAL)
183 #define PLUS ('+'|SPECIAL)
184 #define OR_OP ('|'|SPECIAL)
185 #define DOLLAR ('$'|SPECIAL)
186 #define DOT ('.'|SPECIAL)
187 #define CARET ('^'|SPECIAL)
188 #define LSQBRAC ('['|SPECIAL)
189 #define RSQBRAC (']'|SPECIAL)
190 #define LSHBRAC ('<'|SPECIAL)
191 #define RSHBRAC ('>'|SPECIAL)
192 #define FAIL(m) { regerror(m); return(NULL); }
193 #define ISMULT(c) ((c) == ASTERIX || (c)==PLUS)
194 #define META "^$.[()|*+\\"
196 #define CHARBITS 0xff
197 #define UCHARAT(p) ((int)*(unsigned char *)(p))
199 #define UCHARAT(p) ((int)*(p)&CHARBITS)
201 #define ISWORDPART(c) ( isalnum(c) || (c) == '_' )
211 #define STRCHR(A,B) strchr(A,B)
217 static short *regparse;
219 static char regdummy;
220 static char *regcode;
227 #define STATIC static
230 STATIC
char *regbranch();
231 STATIC
char *regpiece();
232 STATIC
char *regatom();
233 STATIC
char *regnode();
234 STATIC
char *regnext();
236 STATIC
void reginsert();
237 STATIC
void regtail();
238 STATIC
void regoptail();
255 regexp *regcomp(exp,excompat)
261 register char *longest;
266 if (exp == (
char *)NULL)
267 FAIL(
"NULL argument");
269 exp2=(
short*)malloc( (strlen(exp)+1) * (
sizeof(
short[8])/
sizeof(
char[8])) );
270 for ( scan=exp,dest=exp2;( c= UCHARAT(scan++)); ) {
274 *dest++ = excompat ? c : c | SPECIAL;
284 *dest++ = c | SPECIAL;
287 switch ( c = *scan++ ) {
290 *dest++ = excompat ? c | SPECIAL : c;
294 *dest++ = c | SPECIAL;
298 FAIL(
"sorry, unimplemented operator");
299 case 'b': *dest++ =
'\b';
break;
300 case 't': *dest++ =
'\t';
break;
301 case 'r': *dest++ =
'\r';
break;
317 if (reg(0, &flags) == (
char *)NULL)
318 return ((regexp *)NULL);
321 if (regsize >= 32767L)
322 FAIL(
"regexp too big");
325 r = (regexp *) malloc(
sizeof(regexp) + (unsigned) regsize);
326 if (r == (regexp *) NULL)
327 FAIL(
"out of space");
328 memset(r, 0,
sizeof(regexp) + (
unsigned)regsize);
333 regcode = r->program;
335 if (reg(0, &flags) == NULL)
336 return ((regexp *) NULL);
343 scan = r->program + 1;
344 if (OP(regnext(scan)) == END) {
345 scan = OPERAND(scan);
348 if (OP(scan) == EXACTLY)
349 r->regstart = *OPERAND(scan);
350 else if (OP(scan) == BOL)
361 if (flags & SPSTART) {
364 for (; scan != NULL; scan = regnext(scan))
365 if (OP(scan) == EXACTLY &&
366 (int)strlen(OPERAND(scan)) >= len) {
367 longest = OPERAND(scan);
368 len = strlen(OPERAND(scan));
370 r->regmust = longest;
387 static char *reg(paren, flagp)
393 register char *ender;
394 register int parno=0;
401 if (regnpar >= NSUBEXP)
405 ret = regnode(OPEN + parno);
410 br = regbranch(&flags);
411 if (br == (
char *)NULL)
412 return ((
char *)NULL);
413 if (ret != (
char *)NULL)
417 if (!(flags & HASWIDTH))
419 *flagp |= flags & SPSTART;
420 while (*regparse == OR_OP) {
422 br = regbranch(&flags);
423 if (br == (
char *)NULL)
424 return ((
char *)NULL);
426 if (!(flags & HASWIDTH))
428 *flagp |= flags & SPSTART;
432 ender = regnode((paren) ? CLOSE + parno : END);
436 for (br = ret; br != (
char *)NULL; br = regnext(br))
437 regoptail(br, ender);
440 if (paren && *regparse++ != RBRAC) {
441 FAIL(
"unmatched ()");
442 }
else if (!paren && *regparse !=
'\0') {
443 if (*regparse == RBRAC) {
444 FAIL(
"unmatched ()");
457 static char *regbranch(flagp)
461 register char *chain;
462 register char *latest;
467 ret = regnode(BRANCH);
468 chain = (
char *)NULL;
469 while (*regparse !=
'\0' && *regparse != OR_OP && *regparse != RBRAC) {
470 latest = regpiece(&flags);
471 if (latest == (
char *)NULL)
472 return ((
char *)NULL);
473 *flagp |= flags & HASWIDTH;
474 if (chain == (
char *)NULL)
475 *flagp |= flags & SPSTART;
477 regtail(chain, latest);
480 if (chain == (
char *)NULL)
494 static char *regpiece(flagp)
502 ret = regatom(&flags);
503 if (ret == (
char *)NULL)
504 return ((
char *)NULL);
511 if (!(flags & HASWIDTH))
512 FAIL(
"* or + operand could be empty");
513 *flagp = (WORST | SPSTART);
519 reginsert(STAR, ret);
524 reginsert(BRANCH, ret);
525 regoptail(ret, regnode(BACK));
527 regtail(ret, regnode(BRANCH));
528 regtail(ret, regnode(NOTHING));
536 reginsert(BRANCH, tmp);
539 regtail(ret, regnode(BRANCH));
540 regtail(ret, regnode(NOTHING));
544 if (ISMULT(*regparse))
545 FAIL(
"nested * or +");
558 static char *regatom(flagp)
566 switch (*regparse++) {
575 *flagp |= HASWIDTH | SIMPLE;
578 ret = regnode(WORDSTART);
581 ret = regnode(WORDEND);
585 register int classend;
587 if (*regparse == CARET) {
588 ret = regnode(ANYBUT);
591 ret = regnode(ANYOF);
592 if (*regparse == RSQBRAC || *regparse ==
'-')
594 while (*regparse !=
'\0' && *regparse != RSQBRAC) {
595 if (*regparse ==
'-') {
597 if (*regparse == RSQBRAC || *regparse ==
'\0')
600 class = (CHARBITS & *(regparse - 2)) + 1;
601 classend = (CHARBITS & *(regparse));
602 if (
class > classend + 1)
603 FAIL(
"invalid [] range");
604 for (;
class <= classend;
class++)
612 if (*regparse != RSQBRAC)
613 FAIL(
"unmatched []");
615 *flagp |= HASWIDTH | SIMPLE;
619 ret = reg(1, &flags);
620 if (ret == (
char *)NULL)
621 return ((
char *)NULL);
622 *flagp |= flags & (HASWIDTH | SPSTART);
627 FAIL(
"internal urp");
630 FAIL(
"* follows nothing\n");
634 register short ender;
637 for (len=0; regparse[len] &&
638 !(regparse[len]&SPECIAL) && regparse[len] != RSQBRAC; len++) ;
641 FAIL(
"internal disaster");
643 ender = *(regparse + len);
644 if (len > 1 && ISMULT(ender))
649 ret = regnode(EXACTLY);
665 static char *regnode(op)
672 if (ret == ®dummy) {
691 if (regcode != ®dummy)
702 static void reginsert(op, opnd)
708 register char *place;
710 if (regcode == ®dummy) {
729 static void regtail(p, val)
743 temp = regnext(scan);
744 if (temp == (
char *)NULL)
749 if (OP(scan) == BACK)
753 *(scan + 1) = (offset >> 8) & 0377;
754 *(scan + 2) = offset & 0377;
760 static void regoptail(p, val)
765 if (p == (
char *)NULL || p == ®dummy || OP(p) != BRANCH)
767 regtail(OPERAND(p), val);
777 static char *reginput;
779 static char **regstartp;
780 static char **regendp;
786 STATIC
int regmatch();
787 STATIC
int regrepeat();
792 STATIC
char *regprop();
798 int regexec(prog,
string)
799 register regexp *prog;
800 register
char *
string;
805 if (prog == (regexp *)NULL ||
string == (
char *)NULL) {
806 regerror(
"NULL parameter");
810 if (UCHARAT(prog->program) != MAGIC) {
811 regerror(
"corrupted program");
815 if (prog->regmust != (
char *)NULL) {
817 while ((s = STRCHR(s, prog->regmust[0])) != (
char *)NULL) {
818 if (strncmp(s, prog->regmust, prog->regmlen) == 0)
822 if (s == (
char *)NULL)
830 return (regtry(prog,
string));
834 if (prog->regstart !=
'\0')
836 while ((s = STRCHR(s, prog->regstart)) != (
char *)NULL) {
846 }
while (*s++ !=
'\0');
856 static int regtry(regexp *prog,
char *
string)
863 regstartp = prog->startp;
864 regendp = prog->endp;
868 for (i = NSUBEXP; i > 0; i--) {
869 *sp++ = (
char *)NULL;
870 *ep++ = (
char *)NULL;
872 if (regmatch(prog->program + 1)) {
873 prog->startp[0] = string;
874 prog->endp[0] = reginput;
891 static int regmatch(
char *prog)
898 if (scan != (
char *)NULL && regnarrate)
899 fprintf(stderr,
"%s(\n", regprop(scan));
901 while (scan != (
char *)NULL) {
904 fprintf(stderr,
"%s...\n", regprop(scan));
910 if (reginput != regbol)
914 if (*reginput !=
'\0')
918 if (*reginput ==
'\0')
923 if (reginput == regbol)
925 if (*reginput ==
'\0' ||
926 ISWORDPART( *(reginput-1) ) || !ISWORDPART( *reginput ) )
930 if (*reginput ==
'\0')
932 if ( reginput == regbol ||
933 !ISWORDPART( *(reginput-1) ) || ISWORDPART( *reginput ) )
940 opnd = OPERAND(scan);
942 if (*opnd != *reginput)
945 if (len > 1 && strncmp(opnd, reginput, len) != 0)
951 if (*reginput ==
'\0' ||
952 STRCHR(OPERAND(scan), *reginput) == (
char *)NULL)
957 if (*reginput ==
'\0' ||
958 STRCHR(OPERAND(scan), *reginput) != (
char *)NULL)
978 no = OP(scan) - OPEN;
986 if (regstartp[no] == (
char *)NULL)
987 regstartp[no] = save;
1003 register char *save;
1005 no = OP(scan) - CLOSE;
1008 if (regmatch(nxt)) {
1013 if (regendp[no] == (
char *)NULL)
1021 register char *save;
1023 if (OP(nxt) != BRANCH)
1024 nxt = OPERAND(scan);
1028 if (regmatch(OPERAND(scan)))
1031 scan = regnext(scan);
1032 }
while (scan != (
char *)NULL && OP(scan) == BRANCH);
1039 register char nextch;
1041 register char *save;
1042 register int minimum;
1049 if (OP(nxt) == EXACTLY)
1050 nextch = *OPERAND(nxt);
1051 minimum = (OP(scan) == STAR) ? 0 : 1;
1053 no = regrepeat(OPERAND(scan));
1054 while (no >= minimum) {
1056 if (nextch ==
'\0' || *reginput == nextch)
1061 reginput = save + no;
1070 regerror(
"memory corruption");
1082 regerror(
"corrupted pointers");
1090 static int regrepeat(
char *p)
1092 register int count = 0;
1093 register char *scan;
1094 register char *opnd;
1100 count = strlen(scan);
1104 while (*opnd == *scan) {
1110 while (*scan !=
'\0' && STRCHR(opnd, *scan) != (
char *)NULL) {
1116 while (*scan !=
'\0' && STRCHR(opnd, *scan) == (
char *)NULL) {
1122 regerror(
"internal foulup");
1136 static char *regnext(
register char *p)
1141 return ((
char *)NULL);
1145 return ((
char *)NULL);
1148 return (p - offset);
1150 return (p + offset);
1155 STATIC
char *regprop();
1160 void regdump(regexp *r)
1163 register char op = EXACTLY;
1169 printf(
"%2ld%s", (
long)(s - r->program), regprop(s));
1171 if (nxt == (
char *)NULL)
1174 printf(
"(%ld)", (
long)( (s - r->program) + (nxt - s)));
1176 if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
1178 while (*s !=
'\0') {
1188 if (r->regstart !=
'\0')
1189 printf(
"start `%c' ", r->regstart);
1191 printf(
"anchored ");
1192 if (r->regmust != (
char *)NULL)
1193 printf(
"must have \"%s\"", r->regmust);
1201 static char *regprop(
char *op)
1204 static char buf[50];
1248 sprintf(buf + strlen(buf),
"OPEN%d", OP(op) - OPEN);
1260 sprintf(buf + strlen(buf),
"CLOSE%d", OP(op) - CLOSE);
1267 regerror(
"corrupted opcode");
1271 if (p != (
char *)NULL)
1281 char *regsub(regexp *prog,
char *source,
char *dest,
int n)
1288 extern char *strncpy();
1290 if (prog == (regexp *)NULL ||
1291 source == (
char *)NULL || dest == (
char *)NULL) {
1292 regerror(
"NULL parm to regsub");
1295 if (UCHARAT(prog->program) != MAGIC) {
1296 regerror(
"damaged regexp fed to regsub");
1301 while ((c = *src++) !=
'\0') {
1304 else if (c ==
'\\' &&
'0' <= *src && *src <=
'9')
1310 if (c ==
'\\' && (*src ==
'\\' || *src ==
'&'))
1313 regerror(
"line too long");
1317 }
else if (prog->startp[no] != (
char *)NULL &&
1318 prog->endp[no] != (
char *)NULL) {
1319 len = prog->endp[no] - prog->startp[no];
1320 if ( (n-=len) < 0 ) {
1321 regerror(
"line too long");
1324 strncpy(dst, prog->startp[no], len);
1326 if (len != 0 && *(dst - 1) ==
'\0') {
1327 regerror(
"damaged match string");
1333 regerror(
"line too long");
1343 void regerror(
char *s)
1345 fprintf(stderr,
"regexp(3): %s", s);