61 #include "my_global.h" 
  162 #define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) 
  163 #define OPERAND(p)      ((p) + 3) 
  176 #define error(X,Y) fprintf(stderr, X, Y) 
  178 #define regerror(X) error("Regexp: %s\n",X); 
  179 #define SPECIAL 0x100 
  180 #define LBRAC   ('('|SPECIAL) 
  181 #define RBRAC   (')'|SPECIAL) 
  182 #define ASTERIX ('*'|SPECIAL) 
  183 #define PLUS    ('+'|SPECIAL) 
  184 #define OR_OP   ('|'|SPECIAL) 
  185 #define DOLLAR  ('$'|SPECIAL) 
  186 #define DOT     ('.'|SPECIAL) 
  187 #define CARET   ('^'|SPECIAL) 
  188 #define LSQBRAC ('['|SPECIAL) 
  189 #define RSQBRAC (']'|SPECIAL) 
  190 #define LSHBRAC ('<'|SPECIAL) 
  191 #define RSHBRAC ('>'|SPECIAL) 
  192 #define FAIL(m) { regerror(m); return(NULL); } 
  193 #define ISMULT(c)       ((c) == ASTERIX || (c)==PLUS) 
  194 #define META    "^$.[()|*+\\" 
  196 #define CHARBITS        0xff 
  197 #define UCHARAT(p)      ((int)*(unsigned char *)(p)) 
  199 #define UCHARAT(p)      ((int)*(p)&CHARBITS) 
  201 #define ISWORDPART(c) ( isalnum(c) || (c) == '_' ) 
  211 #define STRCHR(A,B)     strchr(A,B) 
  217 static short   *regparse;       
 
  219 static char     regdummy;
 
  220 static char    *regcode;        
 
  227 #define STATIC  static 
  230 STATIC 
char    *regbranch();
 
  231 STATIC 
char    *regpiece();
 
  232 STATIC 
char    *regatom();
 
  233 STATIC 
char    *regnode();
 
  234 STATIC 
char    *regnext();
 
  236 STATIC 
void     reginsert();
 
  237 STATIC 
void     regtail();
 
  238 STATIC 
void     regoptail();
 
  255 regexp *regcomp(exp,excompat)
 
  261     register char  *longest;
 
  266     if (exp == (
char *)NULL)
 
  267         FAIL(
"NULL argument");
 
  269     exp2=(
short*)malloc( (strlen(exp)+1) * (
sizeof(
short[8])/
sizeof(
char[8])) );
 
  270     for ( scan=exp,dest=exp2;( c= UCHARAT(scan++)); ) {
 
  274                 *dest++ = excompat ? c : c | SPECIAL;
 
  284                 *dest++ =  c | SPECIAL;
 
  287                 switch ( c = *scan++ ) {
 
  290                         *dest++ = excompat ? c | SPECIAL : c;
 
  294                         *dest++ = c | SPECIAL;
 
  298                         FAIL(
"sorry, unimplemented operator");
 
  299                     case 'b': *dest++ = 
'\b'; 
break;
 
  300                     case 't': *dest++ = 
'\t'; 
break;
 
  301                     case 'r': *dest++ = 
'\r'; 
break;
 
  317     if (reg(0, &flags) == (
char *)NULL)
 
  318         return ((regexp *)NULL);
 
  321     if (regsize >= 32767L)      
 
  322         FAIL(
"regexp too big");
 
  325     r = (regexp *) malloc(
sizeof(regexp) + (unsigned) regsize);
 
  326     if (r == (regexp *) NULL)
 
  327         FAIL(
"out of space");
 
  328     memset(r, 0, 
sizeof(regexp) + (
unsigned)regsize);
 
  333     regcode = r->program;
 
  335     if (reg(0, &flags) == NULL)
 
  336         return ((regexp *) NULL);
 
  343     scan = r->program + 1;      
 
  344     if (OP(regnext(scan)) == END) {     
 
  345         scan = OPERAND(scan);
 
  348         if (OP(scan) == EXACTLY)
 
  349             r->regstart = *OPERAND(scan);
 
  350         else if (OP(scan) == BOL)
 
  361         if (flags & SPSTART) {
 
  364             for (; scan != NULL; scan = regnext(scan))
 
  365                 if (OP(scan) == EXACTLY &&
 
  366                     (int)strlen(OPERAND(scan)) >= len) {
 
  367                     longest = OPERAND(scan);
 
  368                     len = strlen(OPERAND(scan));
 
  370             r->regmust = longest;
 
  387 static char *reg(paren, flagp)
 
  393     register char  *ender;
 
  394     register int    parno=0; 
 
  401         if (regnpar >= NSUBEXP)
 
  405         ret = regnode(OPEN + parno);
 
  410     br = regbranch(&flags);
 
  411     if (br == (
char *)NULL)
 
  412         return ((
char *)NULL);
 
  413     if (ret != (
char *)NULL)
 
  417     if (!(flags & HASWIDTH))
 
  419     *flagp |= flags & SPSTART;
 
  420     while (*regparse == OR_OP) {
 
  422         br = regbranch(&flags);
 
  423         if (br == (
char *)NULL)
 
  424             return ((
char *)NULL);
 
  426         if (!(flags & HASWIDTH))
 
  428         *flagp |= flags & SPSTART;
 
  432     ender = regnode((paren) ? CLOSE + parno : END);
 
  436     for (br = ret; br != (
char *)NULL; br = regnext(br))
 
  437         regoptail(br, ender);
 
  440     if (paren && *regparse++ != RBRAC) {
 
  441         FAIL(
"unmatched ()");
 
  442     } 
else if (!paren && *regparse != 
'\0') {
 
  443         if (*regparse == RBRAC) {
 
  444             FAIL(
"unmatched ()");
 
  457 static char  *regbranch(flagp)
 
  461     register char  *chain;
 
  462     register char  *latest;
 
  467     ret = regnode(BRANCH);
 
  468     chain = (
char *)NULL;
 
  469     while (*regparse != 
'\0' && *regparse != OR_OP && *regparse != RBRAC) {
 
  470         latest = regpiece(&flags);
 
  471         if (latest == (
char *)NULL)
 
  472             return ((
char *)NULL);
 
  473         *flagp |= flags & HASWIDTH;
 
  474         if (chain == (
char *)NULL)      
 
  475             *flagp |= flags & SPSTART;
 
  477             regtail(chain, latest);
 
  480     if (chain == (
char *)NULL)          
 
  494 static char *regpiece(flagp)
 
  502   ret = regatom(&flags);
 
  503   if (ret == (
char *)NULL)
 
  504     return ((
char *)NULL);
 
  511   if (!(flags & HASWIDTH))
 
  512     FAIL(
"* or + operand could be empty");
 
  513   *flagp = (WORST | SPSTART);
 
  519       reginsert(STAR, ret);
 
  524       reginsert(BRANCH, ret);   
 
  525       regoptail(ret, regnode(BACK)); 
 
  527       regtail(ret, regnode(BRANCH)); 
 
  528       regtail(ret, regnode(NOTHING)); 
 
  536     reginsert(BRANCH, tmp);
 
  539     regtail(ret, regnode(BRANCH));
 
  540     regtail(ret, regnode(NOTHING));
 
  544   if (ISMULT(*regparse))
 
  545     FAIL(
"nested * or +");
 
  558 static char *regatom(flagp)
 
  566     switch (*regparse++) {
 
  575         *flagp |= HASWIDTH | SIMPLE;
 
  578         ret = regnode(WORDSTART);
 
  581         ret = regnode(WORDEND);
 
  585             register int    classend;
 
  587             if (*regparse == CARET) {   
 
  588                 ret = regnode(ANYBUT);
 
  591                 ret = regnode(ANYOF);
 
  592             if (*regparse == RSQBRAC || *regparse == 
'-')
 
  594             while (*regparse != 
'\0' && *regparse != RSQBRAC) {
 
  595                 if (*regparse == 
'-') {
 
  597                     if (*regparse == RSQBRAC || *regparse == 
'\0')
 
  600                         class = (CHARBITS & *(regparse - 2)) + 1;
 
  601                         classend = (CHARBITS & *(regparse));
 
  602                         if (
class > classend + 1)
 
  603                             FAIL(
"invalid [] range");
 
  604                         for (; 
class <= classend; 
class++)
 
  612             if (*regparse != RSQBRAC)
 
  613                 FAIL(
"unmatched []");
 
  615             *flagp |= HASWIDTH | SIMPLE;
 
  619         ret = reg(1, &flags);
 
  620         if (ret == (
char *)NULL)
 
  621             return ((
char *)NULL);
 
  622         *flagp |= flags & (HASWIDTH | SPSTART);
 
  627         FAIL(
"internal urp");   
 
  630         FAIL(
"* follows nothing\n");
 
  634             register short  ender;
 
  637             for (len=0; regparse[len] &&
 
  638                 !(regparse[len]&SPECIAL) && regparse[len] != RSQBRAC; len++) ;
 
  641               FAIL(
"internal disaster");
 
  643             ender = *(regparse + len);
 
  644             if (len > 1 && ISMULT(ender))
 
  649             ret = regnode(EXACTLY);
 
  665 static char *regnode(op)
 
  672     if (ret == ®dummy) {
 
  691     if (regcode != ®dummy)
 
  702 static void reginsert(op, opnd)
 
  708     register char  *place;
 
  710     if (regcode == ®dummy) {
 
  729 static void regtail(p, val)
 
  743         temp = regnext(scan);
 
  744         if (temp == (
char *)NULL)
 
  749     if (OP(scan) == BACK)
 
  753     *(scan + 1) = (offset >> 8) & 0377;
 
  754     *(scan + 2) = offset & 0377;
 
  760 static void regoptail(p, val)
 
  765     if (p == (
char *)NULL || p == ®dummy || OP(p) != BRANCH)
 
  767     regtail(OPERAND(p), val);
 
  777 static char    *reginput;       
 
  779 static char   **regstartp;      
 
  780 static char   **regendp;        
 
  786 STATIC 
int      regmatch();
 
  787 STATIC 
int      regrepeat();
 
  792 STATIC 
char    *regprop();
 
  798 int regexec(prog, 
string)
 
  799 register regexp *prog;
 
  800 register 
char  *
string;
 
  805     if (prog == (regexp *)NULL || 
string == (
char *)NULL) {
 
  806         regerror(
"NULL parameter");
 
  810     if (UCHARAT(prog->program) != MAGIC) {
 
  811         regerror(
"corrupted program");
 
  815     if (prog->regmust != (
char *)NULL) {
 
  817         while ((s = STRCHR(s, prog->regmust[0])) != (
char *)NULL) {
 
  818             if (strncmp(s, prog->regmust, prog->regmlen) == 0)
 
  822         if (s == (
char *)NULL)          
 
  830         return (regtry(prog, 
string));
 
  834     if (prog->regstart != 
'\0')
 
  836         while ((s = STRCHR(s, prog->regstart)) != (
char *)NULL) {
 
  846         } 
while (*s++ != 
'\0');
 
  856 static int regtry(regexp *prog, 
char *
string)
 
  863     regstartp = prog->startp;
 
  864     regendp = prog->endp;
 
  868     for (i = NSUBEXP; i > 0; i--) {
 
  869         *sp++ = (
char *)NULL;
 
  870         *ep++ = (
char *)NULL;
 
  872     if (regmatch(prog->program + 1)) {
 
  873         prog->startp[0] = string;
 
  874         prog->endp[0] = reginput;
 
  891 static int regmatch(
char *prog)
 
  898     if (scan != (
char *)NULL && regnarrate)
 
  899         fprintf(stderr, 
"%s(\n", regprop(scan));
 
  901     while (scan != (
char *)NULL) {
 
  904             fprintf(stderr, 
"%s...\n", regprop(scan));
 
  910             if (reginput != regbol)
 
  914             if (*reginput != 
'\0')
 
  918             if (*reginput == 
'\0')
 
  923             if (reginput == regbol)
 
  925             if (*reginput == 
'\0' ||
 
  926                ISWORDPART( *(reginput-1) ) || !ISWORDPART( *reginput ) )
 
  930             if (*reginput == 
'\0')
 
  932             if ( reginput == regbol ||
 
  933                !ISWORDPART( *(reginput-1) ) || ISWORDPART( *reginput ) )
 
  940                 opnd = OPERAND(scan);
 
  942                 if (*opnd != *reginput)
 
  945                 if (len > 1 && strncmp(opnd, reginput, len) != 0)
 
  951             if (*reginput == 
'\0' ||
 
  952                  STRCHR(OPERAND(scan), *reginput) == (
char *)NULL)
 
  957             if (*reginput == 
'\0' ||
 
  958                  STRCHR(OPERAND(scan), *reginput) != (
char *)NULL)
 
  978                 no = OP(scan) - OPEN;
 
  986                     if (regstartp[no] == (
char *)NULL)
 
  987                         regstartp[no] = save;
 
 1003                 register char  *save;
 
 1005                 no = OP(scan) - CLOSE;
 
 1008                 if (regmatch(nxt)) {
 
 1013                     if (regendp[no] == (
char *)NULL)
 
 1021                 register char  *save;
 
 1023                 if (OP(nxt) != BRANCH)  
 
 1024                     nxt = OPERAND(scan);        
 
 1028                         if (regmatch(OPERAND(scan)))
 
 1031                         scan = regnext(scan);
 
 1032                     } 
while (scan != (
char *)NULL && OP(scan) == BRANCH);
 
 1039                 register char   nextch;
 
 1041                 register char  *save;
 
 1042                 register int    minimum;
 
 1049                 if (OP(nxt) == EXACTLY)
 
 1050                     nextch = *OPERAND(nxt);
 
 1051                 minimum = (OP(scan) == STAR) ? 0 : 1;
 
 1053                 no = regrepeat(OPERAND(scan));
 
 1054                 while (no >= minimum) {
 
 1056                     if (nextch == 
'\0' || *reginput == nextch)
 
 1061                     reginput = save + no;
 
 1070             regerror(
"memory corruption");
 
 1082     regerror(
"corrupted pointers");
 
 1090 static int regrepeat(
char *p)
 
 1092     register int    count = 0;
 
 1093     register char  *scan;
 
 1094     register char  *opnd;
 
 1100         count = strlen(scan);
 
 1104         while (*opnd == *scan) {
 
 1110         while (*scan != 
'\0' && STRCHR(opnd, *scan) != (
char *)NULL) {
 
 1116         while (*scan != 
'\0' && STRCHR(opnd, *scan) == (
char *)NULL) {
 
 1122         regerror(
"internal foulup");
 
 1136 static char *regnext(
register char *p)
 
 1141         return ((
char *)NULL);
 
 1145         return ((
char *)NULL);
 
 1148         return (p - offset);
 
 1150         return (p + offset);
 
 1155 STATIC 
char    *regprop();
 
 1160 void regdump(regexp *r)
 
 1163     register char   op = EXACTLY;       
 
 1169         printf(
"%2ld%s", (
long)(s - r->program), regprop(s));   
 
 1171         if (nxt == (
char *)NULL)        
 
 1174             printf(
"(%ld)", (
long)( (s - r->program) + (nxt - s)));
 
 1176         if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
 
 1178             while (*s != 
'\0') {
 
 1188     if (r->regstart != 
'\0')
 
 1189         printf(
"start `%c' ", r->regstart);
 
 1191         printf(
"anchored ");
 
 1192     if (r->regmust != (
char *)NULL)
 
 1193         printf(
"must have \"%s\"", r->regmust);
 
 1201 static char *regprop(
char *op)
 
 1204     static char     buf[50];
 
 1248         sprintf(buf + strlen(buf), 
"OPEN%d", OP(op) - OPEN);
 
 1260         sprintf(buf + strlen(buf), 
"CLOSE%d", OP(op) - CLOSE);
 
 1267         regerror(
"corrupted opcode");
 
 1271     if (p != (
char *)NULL)
 
 1281 char *regsub(regexp *prog, 
char *source, 
char *dest, 
int n)
 
 1288     extern char    *strncpy();
 
 1290     if (prog == (regexp *)NULL ||
 
 1291         source == (
char *)NULL || dest == (
char *)NULL) {
 
 1292         regerror(
"NULL parm to regsub");
 
 1295     if (UCHARAT(prog->program) != MAGIC) {
 
 1296         regerror(
"damaged regexp fed to regsub");
 
 1301     while ((c = *src++) != 
'\0') {
 
 1304         else if (c == 
'\\' && 
'0' <= *src && *src <= 
'9')
 
 1310             if (c == 
'\\' && (*src == 
'\\' || *src == 
'&'))
 
 1313                 regerror(
"line too long");
 
 1317         } 
else if (prog->startp[no] != (
char *)NULL &&
 
 1318                    prog->endp[no] != (
char *)NULL) {
 
 1319             len = prog->endp[no] - prog->startp[no];
 
 1320             if ( (n-=len) < 0 ) {               
 
 1321                 regerror(
"line too long");
 
 1324             strncpy(dst, prog->startp[no], len);
 
 1326             if (len != 0 && *(dst - 1) == 
'\0') {       
 
 1327                 regerror(
"damaged match string");
 
 1333         regerror(
"line too long");
 
 1343 void regerror(
char *s)
 
 1345     fprintf(stderr, 
"regexp(3): %s", s);