34 return ha_compare_text(cs, (uchar*) w1->pos, w1->len,
35 (uchar*) w2->pos, w2->len, 0, 0);
40 word->weight=LWS_IN_USE;
41 docstat->sum+=word->weight;
42 memcpy((docstat->list)++, word,
sizeof(
FT_WORD));
52 DBUG_ENTER(
"ft_linearize");
55 (1+wtree->elements_in_tree))))
58 docstat.uniq=wtree->elements_in_tree;
60 tree_walk(wtree,(tree_walk_action)&walk_and_copy,&docstat,left_root_right);
66 docstat.list->pos=NULL;
68 for (p=wlist;p->pos;p++)
70 p->weight=PRENORM_IN_USE;
73 for (p=wlist;p->pos;p++)
75 p->weight/=NORM_IN_USE;
81 my_bool ft_boolean_check_syntax_string(
const uchar *str)
86 (strlen((
char*) str)+1 !=
sizeof(DEFAULT_FTB_SYNTAX)) ||
87 (str[0] !=
' ' && str[1] !=
' '))
89 for (i=0; i<
sizeof(DEFAULT_FTB_SYNTAX); i++)
92 if ((
unsigned char)(str[
i]) > 127 || my_isalnum(default_charset_info, str[i]))
95 if (str[i] == str[j] && (i != 11 || j != 10))
109 uchar ft_get_word(
const CHARSET_INFO *cs, uchar **start, uchar *end,
117 param->yesno=(FTB_YES==
' ') ? 1 : (param->quot != 0);
118 param->weight_adjust= param->wasign= 0;
119 param->type= FT_TOKEN_EOF;
123 for (; doc < end; doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
125 mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
126 if (true_word_char(ctype, *doc))
128 if (*doc == FTB_RQUOT && param->quot)
131 param->type= FT_TOKEN_RIGHT_PAREN;
136 if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT)
140 if (*doc == FTB_LQUOT)
141 param->quot= (
char*) 1;
142 param->type= (*doc == FTB_RBR ? FT_TOKEN_RIGHT_PAREN : FT_TOKEN_LEFT_PAREN);
145 if (param->prev ==
' ')
147 if (*doc == FTB_YES ) { param->yesno=+1;
continue; }
else
148 if (*doc == FTB_EGAL) { param->yesno= 0;
continue; }
else
149 if (*doc == FTB_NO ) { param->yesno=-1;
continue; }
else
150 if (*doc == FTB_INC ) { param->weight_adjust++;
continue; }
else
151 if (*doc == FTB_DEC ) { param->weight_adjust--;
continue; }
else
152 if (*doc == FTB_NEG ) { param->wasign= !param->wasign;
continue; }
156 param->yesno=(FTB_YES==
' ') ? 1 : (param->quot != 0);
157 param->weight_adjust= param->wasign= 0;
161 for (word->pos= doc; doc < end; length++,
162 doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
164 mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
165 if (true_word_char(ctype, *doc))
167 else if (!misc_word_char(*doc) || mwc)
173 word->len= (uint)(doc-word->pos) - mwc;
174 if ((param->trunc=(doc<end && *doc == FTB_TRUNC)))
177 if (((length >= ft_min_word_len && !is_stopword((
char*) word->pos,
179 || param->trunc) && length < ft_max_word_len)
182 param->type= FT_TOKEN_WORD;
188 param->type= FT_TOKEN_STOPWORD;
202 uchar ft_simple_get_word(
const CHARSET_INFO *cs, uchar **start,
204 FT_WORD *word, my_bool skip_stopwords)
210 DBUG_ENTER(
"ft_simple_get_word");
214 for (;; doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
218 mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
219 if (true_word_char(ctype, *doc))
224 for (word->pos= doc; doc < end; length++,
225 doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
227 mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
228 if (true_word_char(ctype, *doc))
230 else if (!misc_word_char(*doc) || mwc)
236 word->len= (uint)(doc-word->pos) - mwc;
238 if (skip_stopwords == FALSE ||
239 (length >= ft_min_word_len && length < ft_max_word_len &&
240 !is_stopword((
char*) word->pos, word->len)))
251 DBUG_ENTER(
"ft_parse_init");
252 if (!is_tree_inited(wtree))
253 init_tree(wtree,0,0,
sizeof(
FT_WORD),(qsort_cmp2)&FT_WORD_cmp,0,NULL, cs);
259 char *word,
int word_len,
265 DBUG_ENTER(
"ft_add_word");
266 wtree= ft_param->wtree;
267 if (param->flags & MYSQL_FTFLAGS_NEED_COPY)
270 DBUG_ASSERT(wtree->with_delete == 0);
271 ptr= (uchar *)alloc_root(ft_param->mem_root, word_len);
272 memcpy(ptr, word, word_len);
276 w.pos= (uchar*) word;
278 if (!tree_insert(wtree, &w, 0, wtree->custom_arg))
288 char *doc_arg,
int doc_len)
290 uchar *doc= (uchar*) doc_arg;
291 uchar *end= doc + doc_len;
293 TREE *wtree= ft_param->wtree;
295 DBUG_ENTER(
"ft_parse_internal");
297 while (ft_simple_get_word(wtree->custom_arg, &doc, end, &w, TRUE))
298 if (param->mysql_add_word(param, (
char*) w.pos, w.len, 0))
304 int ft_parse(
TREE *wtree, uchar *doc,
int doclen,
309 DBUG_ENTER(
"ft_parse");
312 my_param.wtree= wtree;
313 my_param.mem_root= mem_root;
315 param->mysql_parse= ft_parse_internal;
316 param->mysql_add_word= ft_add_word;
317 param->mysql_ftparam= &my_param;
318 param->cs= wtree->custom_arg;
319 param->doc= (
char*) doc;
320 param->length= doclen;
321 param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
322 DBUG_RETURN(parser->parse(param));
326 #define MAX_PARAM_NR 2
330 if (!info->ftparser_param)
344 info->s->ftkeys, MYF(MY_WME | MY_ZEROFILL));
345 init_alloc_root(&info->ft_memroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0);
347 return info->ftparser_param;
352 uint keynr, uint paramnr)
357 if (!ftparser_alloc_param(info))
360 if (keynr == NO_SUCH_KEY)
363 parser= &ft_default_parser;
367 ftparser_nr= info->s->keyinfo[keynr].ftkey_nr;
368 parser= info->s->keyinfo[keynr].parser;
370 DBUG_ASSERT(paramnr < MAX_PARAM_NR);
371 ftparser_nr= ftparser_nr*MAX_PARAM_NR + paramnr;
372 if (! info->ftparser_param[ftparser_nr].mysql_add_word)
378 info->ftparser_param[ftparser_nr].mysql_add_word=
381 if (parser->init && parser->init(&info->ftparser_param[ftparser_nr]))
384 return &info->ftparser_param[ftparser_nr];
387 void ftparser_call_deinitializer(
MI_INFO *info)
389 uint
i, j, keys= info->s->state.header.keys;
390 free_root(&info->ft_memroot, MYF(0));
391 if (! info->ftparser_param)
393 for (i= 0; i < keys; i++)
396 for (j=0; j < MAX_PARAM_NR; j++)
399 &info->ftparser_param[keyinfo->ftkey_nr * MAX_PARAM_NR + j];
400 if (keyinfo->flag & HA_FULLTEXT && ftparser_param->mysql_add_word)
402 if (keyinfo->parser->deinit)
403 keyinfo->parser->deinit(ftparser_param);
404 ftparser_param->mysql_add_word= 0;