45 #include <my_global.h>
58 uint array_allocs,max_count,length,max_length;
61 #define SPACE_CHAR 256
62 #define START_OF_LINE 257
63 #define END_OF_LINE 258
64 #define LAST_CHAR_CODE 259
79 #define WORD_BIT (8*sizeof(uint))
84 static int static_get_options(
int *argc,
char * * *argv);
85 static int get_replace_strings(
int *argc,
char * * *argv,
90 static int convert_pipe(
REPLACE *,FILE *,FILE *);
91 static int convert_file(
REPLACE *,
char *);
92 static REPLACE *init_replace(
char * *from,
char * *
to,uint count,
93 char * word_end_chars);
94 static uint replace_strings(
REPLACE *rep,
char * *start,uint *max_length,
96 static int initialize_buffer(
void);
97 static void reset_buffer(
void);
98 static void free_buffer(
void);
100 static int silent=0,verbose=0,updated=0;
104 int main(
int argc,
char *argv[])
107 char word_end_chars[256],*pos;
112 if (static_get_options(&argc,&argv))
114 if (get_replace_strings(&argc,&argv,&from,&to))
117 for (i=1,pos=word_end_chars ; i < 256 ; i++)
118 if (my_isspace(&my_charset_latin1,i))
121 if (!(replace=init_replace((
char**) from.typelib.type_names,
122 (
char**) to.typelib.type_names,
123 (uint) from.typelib.count,word_end_chars)))
125 free_pointer_array(&from);
126 free_pointer_array(&to);
127 if (initialize_buffer())
132 error=convert_pipe(replace,stdin,stdout);
137 error=convert_file(replace,*(argv++));
141 my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
150 static int static_get_options(argc,argv)
152 register
char **argv[];
157 silent=verbose=help=0;
159 while (--*argc > 0 && *(pos = *(++*argv)) ==
'-' && pos[1] !=
'-') {
179 printf(
"%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
183 puts(
"This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
184 puts(
"This program replaces strings in files or from stdin to stdout.\n"
185 "It accepts a list of from-string/to-string pairs and replaces\n"
186 "each occurrence of a from-string with the corresponding to-string.\n"
187 "The first occurrence of a found string is matched. If there is\n"
188 "more than one possibility for the string to replace, longer\n"
189 "matches are preferred before shorter matches.\n\n"
190 "A from-string can contain these special characters:\n"
191 " \\^ Match start of line.\n"
192 " \\$ Match end of line.\n"
193 " \\b Match space-character, start of line or end of line.\n"
194 " For a end \\b the next replace starts locking at the end\n"
195 " space-character. A \\b alone in a string matches only a\n"
196 " space-character.\n");
197 printf(
"Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
199 printf(
"Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
201 puts(
"Options: -? or -I \"Info\" -s \"silent\" -v \"verbose\"");
204 fprintf(stderr,
"illegal option: -%c\n",*pos);
212 my_message(0,
"No replace options given",MYF(ME_BELL));
219 static int get_replace_strings(argc,argv,from_array,to_array)
221 register
char **argv[];
226 memset(from_array, 0,
sizeof(from_array[0]));
227 memset(to_array, 0,
sizeof(to_array[0]));
228 while (*argc > 0 && (*(pos = *(*argv)) !=
'-' || pos[1] !=
'-' || pos[2]))
230 insert_pointer_name(from_array,pos);
233 if (!*argc || !strcmp(**argv,
"--"))
235 my_message(0,
"No to-string for last from-string",MYF(ME_BELL));
238 insert_pointer_name(to_array,**argv);
252 uint
i,length,old_count;
254 const char **new_array;
255 DBUG_ENTER(
"insert_pointer_name");
257 if (! pa->typelib.count)
259 if (!(pa->typelib.type_names=(
const char **)
260 my_malloc(((PC_MALLOC-MALLOC_OVERHEAD)/
261 (
sizeof(
char *)+
sizeof(*pa->flag))*
262 (
sizeof(
char *)+
sizeof(*pa->flag))),MYF(MY_WME))))
264 if (!(pa->str= (uchar*) my_malloc((uint) (PS_MALLOC-MALLOC_OVERHEAD),
267 my_free(pa->typelib.type_names);
270 pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(
sizeof(uchar*)+
272 pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
274 pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
277 length=(uint) strlen(name)+1;
278 if (pa->length+length >= pa->max_length)
280 pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
281 pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
282 if (!(new_pos= (uchar*) my_realloc((uchar*) pa->str,
283 (uint) pa->max_length,
286 if (new_pos != pa->str)
288 my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
289 for (i=0 ; i < pa->typelib.count ; i++)
290 pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
295 if (pa->typelib.count >= pa->max_count-1)
299 len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
300 if (!(new_array=(
const char **) my_realloc((uchar*) pa->typelib.type_names,
302 (
sizeof(uchar*)+
sizeof(*pa->flag))*
303 (
sizeof(uchar*)+
sizeof(*pa->flag)),
306 pa->typelib.type_names=new_array;
307 old_count=pa->max_count;
308 pa->max_count=len/(
sizeof(uchar*) +
sizeof(*pa->flag));
309 pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
310 memcpy((uchar*) pa->flag,(
char *) (pa->typelib.type_names+old_count),
311 old_count*
sizeof(*pa->flag));
313 pa->flag[pa->typelib.count]=0;
314 pa->typelib.type_names[pa->typelib.count++]= (
char*) (pa->str+pa->length);
315 pa->typelib.type_names[pa->typelib.count]= NullS;
316 (void) strmov((
char*) pa->str + pa->length,
name);
326 if (pa->typelib.count)
329 my_free(pa->typelib.type_names);
330 pa->typelib.type_names=0;
339 #define SET_MALLOC_HUNC 64
343 short next[LAST_CHAR_CODE];
371 static int init_sets(
REP_SETS *sets,uint states);
373 static void make_sets_invisible(
REP_SETS *sets);
374 static void free_last_set(
REP_SETS *sets);
375 static void free_sets(
REP_SETS *sets);
376 static void internal_set_bit(
REP_SET *
set, uint bit);
377 static void internal_clear_bit(
REP_SET *
set, uint bit);
381 static int get_next_bit(
REP_SET *
set,uint lastpos);
383 static short find_found(
FOUND_SET *found_set,uint table_offset,
385 static uint start_at_word(
char * pos);
386 static uint end_of_word(
char * pos);
387 static uint replace_len(
char * pos);
389 static uint found_sets=0;
394 static REPLACE *init_replace(
char * *from,
char * *to,uint count,
395 char * word_end_chars)
397 uint
i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
400 char used_chars[LAST_CHAR_CODE],is_word_end[256];
401 char * pos, *to_pos, **to_array;
403 REP_SET *
set,*start_states,*word_states,*new_set;
408 DBUG_ENTER(
"init_replace");
411 for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
413 len=replace_len(from[i]);
417 my_message(0,
"No to-string for last from-string",MYF(ME_BELL));
421 result_len+=(uint) strlen(to[i])+1;
422 if (len > max_length)
425 memset(is_word_end, 0,
sizeof(is_word_end));
426 for (i=0 ; word_end_chars[
i] ; i++)
427 is_word_end[(uchar) word_end_chars[
i]]=1;
429 if (init_sets(&sets,states))
438 (void) make_new_set(&sets);
439 make_sets_invisible(&sets);
441 word_states=make_new_set(&sets);
442 start_states=make_new_set(&sets);
443 if (!(follow=(
FOLLOWS*) my_malloc((states+2)*
sizeof(
FOLLOWS),MYF(MY_WME))))
451 for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
453 if (from[i][0] ==
'\\' && from[i][1] ==
'^')
455 internal_set_bit(start_states,states+1);
458 start_states->table_offset=
i;
459 start_states->found_offset=1;
462 else if (from[i][0] ==
'\\' && from[i][1] ==
'$')
464 internal_set_bit(start_states,states);
465 internal_set_bit(word_states,states);
466 if (!from[i][2] && start_states->table_offset == (uint) ~0)
468 start_states->table_offset=
i;
469 start_states->found_offset=0;
474 internal_set_bit(word_states,states);
475 if (from[i][0] ==
'\\' && (from[i][1] ==
'b' && from[i][2]))
476 internal_set_bit(start_states,states+1);
478 internal_set_bit(start_states,states);
480 for (pos=from[i], len=0; *pos ; pos++)
482 if (*pos ==
'\\' && *(pos+1))
487 follow_ptr->chr = SPACE_CHAR;
490 follow_ptr->chr = START_OF_LINE;
493 follow_ptr->chr = END_OF_LINE;
496 follow_ptr->chr =
'\r';
499 follow_ptr->chr =
'\t';
502 follow_ptr->chr =
'\v';
505 follow_ptr->chr = (uchar) *pos;
510 follow_ptr->chr= (uchar) *pos;
511 follow_ptr->table_offset=
i;
512 follow_ptr->len= ++len;
516 follow_ptr->table_offset=
i;
519 states+=(uint) len+1;
523 for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
530 for (i= (uint) ~0; (i=get_next_bit(
set,i)) ;)
535 default_state= find_found(found_set,set->table_offset,
536 set->found_offset+1);
539 copy_bits(sets.set+used_sets,
set);
541 or_bits(sets.set+used_sets,sets.set);
544 memset(used_chars, 0,
sizeof(used_chars));
545 for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
547 used_chars[follow[
i].chr]=1;
548 if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
549 follow[i].len > 1) || follow[
i].chr == END_OF_LINE)
554 if (used_chars[SPACE_CHAR])
555 for (pos= word_end_chars ; *pos ; pos++)
556 used_chars[(
int) (uchar) *pos] = 1;
559 for (chr= 0 ; chr < 256 ; chr++)
561 if (! used_chars[chr])
562 set->next[chr]= (short) (chr ? default_state : -1);
565 new_set=make_new_set(&sets);
567 new_set->table_offset=
set->table_offset;
568 new_set->found_len=
set->found_len;
569 new_set->found_offset=
set->found_offset+1;
572 for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
574 if (!follow[i].chr || follow[i].chr == chr ||
575 (follow[i].chr == SPACE_CHAR &&
577 (!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
578 (follow[i].chr == END_OF_LINE && ! chr))
580 if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
581 follow[i].len > found_end)
582 found_end=follow[
i].len;
583 if (chr && follow[i].chr)
584 internal_set_bit(new_set,i+1);
586 internal_set_bit(new_set,i);
591 new_set->found_len=0;
593 for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
595 if ((follow[i].chr == SPACE_CHAR ||
596 follow[i].chr == END_OF_LINE) && ! chr)
600 if (follow[bit_nr-1].len < found_end ||
601 (new_set->found_len &&
602 (chr == 0 || !follow[bit_nr].chr)))
603 internal_clear_bit(new_set,i);
606 if (chr == 0 || !follow[bit_nr].chr)
608 new_set->table_offset=follow[bit_nr].table_offset;
609 if (chr || (follow[i].chr == SPACE_CHAR ||
610 follow[i].chr == END_OF_LINE))
611 new_set->found_offset=found_end;
612 new_set->found_len=found_end;
619 set->next[chr] = find_found(found_set,
620 new_set->table_offset,
621 new_set->found_offset);
622 free_last_set(&sets);
625 set->next[chr] = find_set(&sets,new_set);
628 set->next[chr] = find_set(&sets,new_set);
637 sizeof(
char *)*count+result_len,
638 MYF(MY_WME | MY_ZEROFILL))))
641 to_array=(
char **) (rep_str+found_sets+1);
642 to_pos=(
char *) (to_array+count);
643 for (i=0 ; i < count ; i++)
646 to_pos=strmov(to_pos,to[i])+1;
649 rep_str[0].replace_string=0;
650 for (i=1 ; i <= found_sets ; i++)
652 pos=from[found_set[i-1].table_offset];
653 rep_str[
i].found= (my_bool) (!memcmp(pos,
"\\^",3) ? 2 : 1);
654 rep_str[
i].replace_string=to_array[found_set[i-1].table_offset];
655 rep_str[
i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
656 rep_str[
i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
659 for (i=0 ; i < sets.count ; i++)
661 for (j=0 ; j < 256 ; j++)
662 if (sets.set[i].next[j] >= 0)
663 replace[
i].next[j]=replace+sets.set[
i].next[j];
665 replace[
i].next[j]=(
REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
671 DBUG_PRINT(
"exit",(
"Replace table has %d states",sets.count));
672 DBUG_RETURN(replace);
676 static int init_sets(
REP_SETS *sets,uint states)
678 memset(sets, 0,
sizeof(*sets));
679 sets->size_of_bits=((states+7)/8);
680 if (!(sets->set_buffer=(
REP_SET*) my_malloc(
sizeof(
REP_SET)*SET_MALLOC_HUNC,
683 if (!(sets->bit_buffer=(uint*) my_malloc(
sizeof(uint)*sets->size_of_bits*
684 SET_MALLOC_HUNC,MYF(MY_WME))))
694 static void make_sets_invisible(
REP_SETS *sets)
696 sets->invisible=sets->count;
697 sets->set+=sets->count;
703 uint
i,count,*bit_buffer;
708 set=sets->set+ sets->count++;
709 memset(set->bits, 0,
sizeof(uint)*sets->size_of_bits);
710 memset(&set->next[0], 0,
sizeof(set->next[0])*LAST_CHAR_CODE);
713 set->table_offset= (uint) ~0;
714 set->size_of_bits=sets->size_of_bits;
717 count=sets->count+sets->invisible+SET_MALLOC_HUNC;
718 if (!(
set=(
REP_SET*) my_realloc((uchar*) sets->set_buffer,
722 sets->set_buffer=
set;
723 sets->set=
set+sets->invisible;
724 if (!(bit_buffer=(uint*) my_realloc((uchar*) sets->bit_buffer,
725 (
sizeof(uint)*sets->size_of_bits)*count,
728 sets->bit_buffer=bit_buffer;
729 for (i=0 ; i < count ; i++)
731 sets->set_buffer[
i].bits=bit_buffer;
732 bit_buffer+=sets->size_of_bits;
734 sets->extra=SET_MALLOC_HUNC;
735 return make_new_set(sets);
738 static void free_last_set(
REP_SETS *sets)
745 static void free_sets(
REP_SETS *sets)
747 my_free(sets->set_buffer);
748 my_free(sets->bit_buffer);
752 static void internal_set_bit(
REP_SET *
set, uint bit)
754 set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
758 static void internal_clear_bit(
REP_SET *
set, uint bit)
760 set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
768 for (i=0 ; i < to->size_of_bits ; i++)
769 to->bits[i]|=from->bits[i];
775 memcpy((uchar*) to->bits,(uchar*) from->bits,
776 (
size_t) (
sizeof(uint) * to->size_of_bits));
781 return memcmp(set1->bits, set2->bits,
782 sizeof(uint) * set1->size_of_bits);
788 static int get_next_bit(
REP_SET *
set,uint lastpos)
790 uint pos,*start,*end,bits;
792 start=
set->bits+ ((lastpos+1) / WORD_BIT);
793 end=
set->bits +
set->size_of_bits;
794 bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
796 while (! bits && ++start < end)
800 pos=(uint) (start-set->bits)*WORD_BIT;
816 for (i=0 ; i < sets->count-1 ; i++)
818 if (!cmp_bits(sets->set+i,find))
836 static short find_found(
FOUND_SET *found_set,uint table_offset,
840 for (i=0 ; (uint) i < found_sets ; i++)
841 if (found_set[i].table_offset == table_offset &&
842 found_set[i].found_offset == found_offset)
843 return (
short) (-i-2);
844 found_set[
i].table_offset=table_offset;
845 found_set[
i].found_offset=found_offset;
847 return (
short) (-i-2);
852 static uint start_at_word(
char * pos)
854 return (((!memcmp(pos,
"\\b",2) && pos[2]) || !memcmp(pos,
"\\^",2)) ? 1 : 0);
857 static uint end_of_word(
char * pos)
859 char * end=strend(pos);
860 return ((end > pos+2 && !memcmp(end-2,
"\\b",2)) ||
861 (end >= pos+2 && !memcmp(end-2,
"\\$",2))) ?
866 static uint replace_len(
char * str)
871 if (str[0] ==
'\\' && str[1])
882 static uint replace_strings(
REPLACE *rep,
char **start, uint *max_length,
887 char *
to, *end, *pos, *
new;
889 end=(to= *start) + *max_length-1;
893 while (!rep_pos->found)
895 rep_pos= rep_pos->next[(uchar) *from];
899 if (!(
new=my_realloc(*start,*max_length,MYF(MY_WME))))
901 to=
new+(to - *start);
902 end=(*start=
new)+ *max_length-1;
907 return (uint) (to - *start)-1;
909 to-=rep_str->to_offset;
910 for (pos=rep_str->replace_string; *pos ; pos++)
915 if (!(
new=my_realloc(*start,*max_length,MYF(MY_WME))))
917 to=
new+(to - *start);
918 end=(*start=
new)+ *max_length-1;
922 if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
923 return (uint) (to - *start);
930 static int bufread,my_eof;
931 static uint bufalloc;
932 static char *out_buff;
933 static uint out_length;
935 static int initialize_buffer()
938 bufalloc = bufread + bufread / 2;
939 if (!(buffer = my_malloc(bufalloc+1,MYF(MY_WME))))
943 if (!(out_buff=my_malloc(out_length,MYF(MY_WME))))
948 static void reset_buffer()
953 static void free_buffer()
966 static int fill_buffer_retaining(fd,
n)
973 if ((
int) bufalloc - n <= bufread)
975 while ((
int) bufalloc - n <= bufread)
980 buffer = my_realloc(buffer, bufalloc+1, MYF(MY_WME));
986 bmove(buffer,buffer+bufbytes-n,(uint) n);
993 if ((i=(
int) my_read(fd, (uchar*) buffer + bufbytes,
994 (
size_t) bufread, MYF(MY_WME))) < 0)
998 if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] !=
'\n')
1001 buffer[bufbytes] =
'\n';
1011 static int convert_pipe(rep,in,out)
1017 char save_char,*end_of_line,*start_of_line;
1018 DBUG_ENTER(
"convert_pipe");
1023 while ((error=fill_buffer_retaining(fileno(in),retain)) > 0)
1025 end_of_line=buffer ;
1029 start_of_line=end_of_line;
1030 while (end_of_line[0] !=
'\n' && end_of_line[0])
1032 if (end_of_line == buffer+bufbytes)
1034 retain= (int) (end_of_line - start_of_line);
1037 save_char=end_of_line[0];
1040 if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1044 out_buff[length++]=save_char;
1045 if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1053 static int convert_file(
REPLACE *rep,
char * name)
1057 char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name =
name;
1058 #ifdef HAVE_READLINK
1059 char link_name[FN_REFLEN];
1062 size_t dir_buff_length;
1063 DBUG_ENTER(
"convert_file");
1066 #ifdef HAVE_READLINK
1067 org_name= (!my_disable_symlinks &&
1068 !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1070 if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1072 dirname_part(dir_buff, org_name, &dir_buff_length);
1073 if ((temp_file= create_temp_file(tempname, dir_buff,
"PR", O_WRONLY,
1076 my_fclose(in,MYF(0));
1079 if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1081 my_fclose(in,MYF(0));
1085 error=convert_pipe(rep,in,out);
1086 my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1088 if (updated && ! error)
1089 my_redel(org_name,tempname,MYF(MY_WME | MY_LINK_WARNING));
1091 my_delete(tempname,MYF(MY_WME));
1092 if (!silent && ! error)
1095 printf(
"%s converted\n",name);
1097 printf(
"%s left unchanged\n",name);