59 static double _wghts[11]=
72 static double *wghts=_wghts+5;
74 static double _nwghts[11]=
87 static double *nwghts=_nwghts+5;
89 #define FTB_FLAG_TRUNC 1
91 #define FTB_FLAG_YES 2
93 #define FTB_FLAG_WONLY 8
95 #define CMP_NUM(a,b) (((a) < (b)) ? -1 : ((a) == (b)) ? 0 : 1)
146 enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE } state;
154 if (v && a->docid[0] == *v)
158 i=CMP_NUM(a->docid[0], b->docid[0]);
160 i=CMP_NUM(b->ndepth,a->ndepth);
167 int i= ha_compare_text(cs, (uchar*) (*a)->word + 1, (*a)->len - 1,
168 (uchar*) (*b)->word + 1, (*b)->len - 1, 0, 0);
170 i= CMP_NUM((*a)->ndepth, (*b)->ndepth);
185 char *
word,
int word_len,
193 int r= info->weight_adjust;
194 float weight= (float)
195 (info->wasign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)];
197 switch (info->type) {
199 ftbw= (
FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root,
201 (info->trunc ? MI_MAX_KEY_BUFF :
202 word_len * ftb_param->ftb->charset->mbmaxlen +
204 ftb_param->ftb->info->s->rec_reflength));
205 ftbw->len= word_len + 1;
208 if (info->yesno > 0) ftbw->flags|= FTB_FLAG_YES;
209 if (info->yesno < 0) ftbw->flags|= FTB_FLAG_NO;
210 if (info->trunc) ftbw->flags|= FTB_FLAG_TRUNC;
211 ftbw->weight= weight;
212 ftbw->up= ftb_param->ftbe;
213 ftbw->docid[0]= ftbw->docid[1]= HA_OFFSET_ERROR;
214 ftbw->ndepth= (info->yesno < 0) + ftb_param->depth;
215 ftbw->key_root= HA_OFFSET_ERROR;
216 memcpy(ftbw->word + 1, word, word_len);
217 ftbw->word[0]= word_len;
218 if (info->yesno > 0) ftbw->up->ythresh++;
219 ftb_param->ftb->queue.max_elements++;
220 ftbw->prev= ftb_param->ftb->last_word;
221 ftb_param->ftb->last_word= ftbw;
222 ftb_param->ftb->with_scan|= (info->trunc & FTB_FLAG_TRUNC);
223 for (tmp_expr= ftb_param->ftbe; tmp_expr->up; tmp_expr= tmp_expr->up)
224 if (! (tmp_expr->flags & FTB_FLAG_YES))
226 ftbw->max_docid_expr= tmp_expr;
228 case FT_TOKEN_STOPWORD:
229 if (! ftb_param->up_quot)
break;
230 phrase_word= (
FT_WORD *)alloc_root(&ftb_param->ftb->mem_root,
sizeof(
FT_WORD));
231 tmp_element= (
LIST *)alloc_root(&ftb_param->ftb->mem_root,
sizeof(
LIST));
232 phrase_word->pos= (uchar*) word;
233 phrase_word->len= word_len;
234 tmp_element->data= (
void *)phrase_word;
235 ftb_param->ftbe->phrase= list_add(ftb_param->ftbe->phrase, tmp_element);
238 tmp_element= (
LIST *)alloc_root(&ftb_param->ftb->mem_root,
sizeof(
LIST));
239 tmp_element->data= alloc_root(&ftb_param->ftb->mem_root,
sizeof(
FT_WORD));
240 ftb_param->ftbe->document=
241 list_add(ftb_param->ftbe->document, tmp_element);
243 case FT_TOKEN_LEFT_PAREN:
246 if (info->yesno > 0) ftbe->flags|= FTB_FLAG_YES;
247 if (info->yesno < 0) ftbe->flags|= FTB_FLAG_NO;
248 ftbe->weight= weight;
249 ftbe->up= ftb_param->ftbe;
250 ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
251 ftbe->docid[0]= ftbe->docid[1]= HA_OFFSET_ERROR;
254 if (info->quot) ftb_param->ftb->with_scan|= 2;
255 if (info->yesno > 0) ftbe->up->ythresh++;
256 ftb_param->ftbe= ftbe;
258 ftb_param->up_quot= (uchar*) info->quot;
260 case FT_TOKEN_RIGHT_PAREN:
261 if (ftb_param->ftbe->document)
264 for (tmp_element= ftb_param->ftbe->document;
265 tmp_element->next; tmp_element= tmp_element->next) ;
266 tmp_element->next= ftb_param->ftbe->document;
267 ftb_param->ftbe->document->prev= tmp_element;
270 if (ftb_param->ftbe->up)
272 DBUG_ASSERT(ftb_param->depth);
273 ftb_param->ftbe= ftb_param->ftbe->up;
275 ftb_param->up_quot= 0;
287 char *
query,
int len)
292 uchar **start= (uchar**) &query;
293 uchar *end= (uchar*) query + len;
298 while (ft_get_word(cs, start, end, &w, &info))
299 param->mysql_add_word(param, (
char*) w.pos, w.len, &info);
304 static int _ftb_parse_query(
FTB *ftb, uchar *query, uint len,
309 DBUG_ENTER(
"_ftb_parse_query");
312 if (ftb->state != UNINITIALIZED)
314 if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
319 ftb_param.ftbe= ftb->root;
320 ftb_param.up_quot= 0;
322 param->mysql_parse= ftb_parse_query_internal;
323 param->mysql_add_word= ftb_query_add_word;
324 param->mysql_ftparam= (
void *)&ftb_param;
325 param->cs= ftb->charset;
326 param->doc= (
char*) query;
329 param->mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO;
330 DBUG_RETURN(parser->parse(param));
334 static int _ftb_no_dupes_cmp(
const void* not_used __attribute__((unused)),
335 const void *a,
const void *b)
337 return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b)));
357 static int _ft2_search_no_lock(
FTB *ftb,
FTB_WORD *ftbw, my_bool init_search)
363 uint UNINIT_VAR(off),
extra= HA_FT_WLEN + info->s->rec_reflength;
364 uchar *lastkey_buf=ftbw->word+ftbw->off;
366 if (ftbw->flags & FTB_FLAG_TRUNC)
367 lastkey_buf+=ftbw->len;
371 ftbw->key_root=info->s->state.key_root[ftb->keynr];
372 ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
374 r=_mi_search(info, ftbw->keyinfo, (uchar*) ftbw->word, ftbw->len,
375 SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root);
379 uint sflag= SEARCH_BIGGER;
380 my_off_t max_docid=0;
383 for (tmp= ftbw->max_docid_expr; tmp; tmp= tmp->up)
384 set_if_bigger(max_docid, tmp->max_docid);
386 if (ftbw->docid[0] < max_docid)
389 _mi_dpointer(info, (uchar*) (lastkey_buf + HA_FT_WLEN +
390 (ftbw->off ? 0 : lastkey_buf[0] + 1)),
393 r=_mi_search(info, ftbw->keyinfo, (uchar*) lastkey_buf,
394 USE_WHOLE_KEY, sflag, ftbw->key_root);
397 can_go_down=(!ftbw->off && (init_search || (ftbw->flags & FTB_FLAG_TRUNC)));
404 off=info->lastkey_length-extra;
405 subkeys=ft_sintXkorr(info->lastkey+off);
407 if (subkeys<0 || info->lastpos < info->state->data_file_length)
409 r= _mi_search_next(info, ftbw->keyinfo, info->lastkey,
410 info->lastkey_length,
411 SEARCH_BIGGER, ftbw->key_root);
414 if (!r && !ftbw->off)
416 r= ha_compare_text(ftb->charset,
418 info->lastkey_length-extra-1,
419 (uchar*) ftbw->word+1,
421 (my_bool) (ftbw->flags & FTB_FLAG_TRUNC),0);
426 if (!ftbw->off || !(ftbw->flags & FTB_FLAG_TRUNC))
428 ftbw->docid[0]=HA_OFFSET_ERROR;
429 if ((ftbw->flags & FTB_FLAG_YES) && ftbw->up->up==0)
435 ftb->state=INDEX_DONE;
452 _mi_dpointer(info, (uchar*) (lastkey_buf+HA_FT_WLEN), ftbw->key_root);
453 ftbw->docid[0]= ftbw->key_root;
454 ftbw->key_root=info->s->state.key_root[ftb->keynr];
455 ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
457 return _ft2_search_no_lock(ftb, ftbw, 0);
461 memcpy(lastkey_buf, info->lastkey, info->lastkey_length);
462 if (lastkey_buf == ftbw->word)
463 ftbw->len=info->lastkey_length-extra;
473 ftbw->key_root=info->lastpos;
474 ftbw->keyinfo=& info->s->ft2_keyinfo;
475 r=_mi_search_first(info, ftbw->keyinfo, ftbw->key_root);
477 memcpy(lastkey_buf+off, info->lastkey, info->lastkey_length);
479 ftbw->docid[0]=info->lastpos;
480 if (ftbw->flags & FTB_FLAG_YES && !(ftbw->flags & FTB_FLAG_TRUNC))
481 ftbw->max_docid_expr->max_docid= info->lastpos;
485 static int _ft2_search(
FTB *ftb,
FTB_WORD *ftbw, my_bool init_search)
489 if (share->concurrent_insert)
491 r= _ft2_search_no_lock(ftb, ftbw, init_search);
492 if (share->concurrent_insert)
497 static void _ftb_init_index_search(
FT_INFO *ftb)
502 if (ftb->state == UNINITIALIZED || ftb->keynr == NO_SUCH_KEY)
504 ftb->state=INDEX_SEARCH;
506 for (i=ftb->queue.elements; i; i--)
508 ftbw=(
FTB_WORD *)(ftb->queue.root[i]);
510 if (ftbw->flags & FTB_FLAG_TRUNC)
531 ftbe->up && !(ftbe->up->flags & FTB_FLAG_TRUNC);
532 ftbe->up->flags|= FTB_FLAG_TRUNC, ftbe=ftbe->up)
534 if (ftbe->flags & FTB_FLAG_NO ||
535 ftbe->up->ythresh - ftbe->up->yweaks >
536 (uint)
test(ftbe->flags & FTB_FLAG_YES))
539 ftbw->docid[0]=HA_OFFSET_ERROR;
541 ftbe != top_ftbe && !(ftbe->flags & FTB_FLAG_NO);
551 if (!is_tree_inited(& ftb->no_dupes))
552 init_tree(& ftb->no_dupes,0,0,
sizeof(my_off_t),
553 _ftb_no_dupes_cmp,0,0,0);
555 reset_tree(& ftb->no_dupes);
559 if (_ft2_search(ftb, ftbw, 1))
562 queue_fix(& ftb->queue);
566 FT_INFO * ft_init_boolean_search(
MI_INFO *info, uint keynr, uchar *query,
573 if (!(ftb=(
FTB *)my_malloc(
sizeof(
FTB), MYF(MY_WME))))
575 ftb->please= (
struct _ft_vft *) & _ft_vft_boolean;
576 ftb->state=UNINITIALIZED;
580 DBUG_ASSERT(keynr==NO_SUCH_KEY || cs == info->s->keyinfo[keynr].seg->charset);
582 ftb->lastpos=HA_OFFSET_ERROR;
583 memset(&ftb->no_dupes, 0,
sizeof(
TREE));
586 init_alloc_root(&ftb->mem_root, 1024, 1024);
587 ftb->queue.max_elements= 0;
591 ftbe->flags=FTB_FLAG_YES;
594 ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
595 ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR;
599 if (unlikely(_ftb_parse_query(ftb, query, query_len,
600 keynr == NO_SUCH_KEY ? &ft_default_parser :
601 info->s->keyinfo[keynr].parser)))
607 if (! (ftb->queue.root= (uchar **)alloc_root(&ftb->mem_root,
608 (ftb->queue.max_elements + 1) *
611 reinit_queue(&ftb->queue, ftb->queue.max_elements, 0, 0,
612 (int (*)(
void*, uchar*, uchar*))FTB_WORD_cmp, 0);
613 for (ftbw= ftb->last_word; ftbw; ftbw= ftbw->prev)
614 queue_insert(&ftb->queue, (uchar *)ftbw);
615 ftb->list=(
FTB_WORD **)alloc_root(&ftb->mem_root,
616 sizeof(
FTB_WORD *)*ftb->queue.elements);
617 memcpy(ftb->list, ftb->queue.root+1,
sizeof(
FTB_WORD *)*ftb->queue.elements);
618 my_qsort2(ftb->list, ftb->queue.elements,
sizeof(
FTB_WORD *),
619 (qsort2_cmp)FTB_WORD_cmp_list, ftb->charset);
620 if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC;
624 free_root(& ftb->mem_root, MYF(0));
636 uint document_length;
642 char *word,
int word_len,
647 LIST *phrase, *document;
648 w->pos= (uchar*)
word;
650 phrase_param->document= phrase_param->document->prev;
651 if (phrase_param->phrase_length > phrase_param->document_length)
653 phrase_param->document_length++;
658 for (phrase= phrase_param->phrase, document= phrase_param->document->next;
659 phrase; phrase= phrase->next, document= document->next)
663 if (my_strnncoll(phrase_param->cs, (uchar*) phrase_word->pos,
665 (uchar*) document_word->pos, document_word->len))
668 phrase_param->match++;
674 char *document,
int len)
678 const uchar *docend= (uchar*) document + len;
679 while (ft_simple_get_word(phrase_param->cs, (uchar**) &document, docend,
682 param->mysql_add_word(param, (
char*) word.pos, word.len, 0);
683 if (phrase_param->match)
705 static int _ftb_check_phrase(
FTB *ftb,
const uchar *document, uint len,
710 DBUG_ENTER(
"_ftb_check_phrase");
713 if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr, 1)))
716 ftb_param.phrase= ftbe->phrase;
717 ftb_param.document= ftbe->document;
718 ftb_param.cs= ftb->charset;
719 ftb_param.phrase_length= list_length(ftbe->phrase);
720 ftb_param.document_length= 1;
723 param->mysql_parse= ftb_check_phrase_internal;
724 param->mysql_add_word= ftb_phrase_add_word;
725 param->mysql_ftparam= (
void *)&ftb_param;
726 param->cs= ftb->charset;
727 param->doc= (
char *) document;
730 param->mode= MYSQL_FTPARSER_WITH_STOPWORDS;
731 if (unlikely(parser->parse(param)))
733 DBUG_RETURN(ftb_param.match ? 1 : 0);
741 float weight=ftbw->weight;
742 int yn_flag= ftbw->flags, ythresh,
mode=(ftsi_orig != 0);
743 my_off_t curdoc=ftbw->docid[
mode];
746 ftb->info->s->keyinfo[ftb->keynr].parser;
748 for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
750 ythresh = ftbe->ythresh - (mode ? 0 : ftbe->yweaks);
751 if (ftbe->docid[mode] != curdoc)
754 ftbe->yesses=ftbe->nos=0;
755 ftbe->docid[
mode]=curdoc;
759 if (yn_flag & FTB_FLAG_YES)
761 weight /= ftbe->ythresh;
762 ftbe->cur_weight += weight;
763 if ((
int) ++ftbe->yesses == ythresh)
766 weight=ftbe->cur_weight*ftbe->weight;
767 if (mode && ftbe->phrase)
771 memcpy(&ftsi, ftsi_orig,
sizeof(ftsi));
772 while (_mi_ft_segiterator(&ftsi) && !found)
776 found= _ftb_check_phrase(ftb, ftsi.pos, ftsi.len, ftbe, parser);
777 if (unlikely(found < 0))
788 if (yn_flag & FTB_FLAG_NO)
805 ftbe->cur_weight += weight;
806 if ((
int) ftbe->yesses < ythresh)
808 if (!(yn_flag & FTB_FLAG_WONLY))
809 yn_flag= ((int) ftbe->yesses++ == ythresh) ? ftbe->flags : FTB_FLAG_WONLY ;
810 weight*= ftbe->weight;
824 if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE)
828 if ((
int) _mi_check_index(info, ftb->keynr) < 0)
830 if (_mi_readinfo(info, F_RDLCK, 1))
834 if (!ftb->queue.elements)
835 return my_errno=HA_ERR_END_OF_FILE;
838 ftb->queue.first_cmp_arg=(
void *)&curdoc;
840 while (ftb->state == INDEX_SEARCH &&
841 (curdoc=((
FTB_WORD *)queue_top(& ftb->queue))->docid[0]) !=
844 while (curdoc == (ftbw=(
FTB_WORD *)queue_top(& ftb->queue))->docid[0])
846 if (unlikely(_ftb_climb_the_tree(ftb, ftbw, 0)))
848 my_errno= HA_ERR_OUT_OF_MEM;
853 _ft2_search(ftb, ftbw, 0);
854 queue_replaced(& ftb->queue);
858 if (ftbe->docid[0]==curdoc && ftbe->cur_weight>0 &&
859 ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos)
862 if (is_tree_inited(&ftb->no_dupes) &&
863 tree_insert(&ftb->no_dupes, &curdoc, 0,
864 ftb->no_dupes.custom_arg)->count >1)
868 info->lastpos=curdoc;
870 info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
872 if (!(*info->read_record)(info,curdoc, (uchar*) record))
874 info->update|= HA_STATE_AKTIV;
875 if (ftb->with_scan &&
876 ft_boolean_find_relevance(ftb,(uchar*) record,0)==0)
884 ftb->state=INDEX_DONE;
885 my_errno=HA_ERR_END_OF_FILE;
887 ftb->queue.first_cmp_arg=(
void *)0;
911 for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2)
914 if (ha_compare_text(ftb->charset, (uchar*)word, len,
915 (uchar*)ftbw->word+1, ftbw->len-1,
916 (my_bool) (ftbw->flags & FTB_FLAG_TRUNC), 0) < 0)
941 if (ha_compare_text(ftb->charset, (uchar*)word, len,
942 (uchar*)ftbw->word + 1,ftbw->len - 1,
943 (my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0))
945 if (ftb->with_scan & FTB_FLAG_TRUNC)
950 if (ftbw->docid[1] == ftb->info->lastpos)
952 ftbw->docid[1]= ftb->info->lastpos;
953 if (unlikely(_ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi)))
965 uchar *end= (uchar*) doc + len;
967 while (ft_simple_get_word(ftb->charset, (uchar**) &doc, end, &w, TRUE))
968 param->mysql_add_word(param, (
char*) w.pos, w.len, 0);
973 float ft_boolean_find_relevance(
FT_INFO *ftb, uchar *record, uint length)
977 my_off_t docid=ftb->info->lastpos;
982 ftb->info->s->keyinfo[ftb->keynr].parser;
984 if (docid == HA_OFFSET_ERROR)
986 if (!ftb->queue.elements)
988 if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
991 if (ftb->state != INDEX_SEARCH && docid <= ftb->lastpos)
996 for (i=0; i < ftb->queue.elements; i++)
998 ftb->list[
i]->docid[1]=HA_OFFSET_ERROR;
999 for (x=ftb->list[i]->up; x; x=x->up)
1000 x->docid[1]=HA_OFFSET_ERROR;
1006 if (ftb->keynr==NO_SUCH_KEY)
1007 _mi_ft_segiterator_dummy_init(record, length, &ftsi);
1009 _mi_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi);
1010 memcpy(&ftsi2, &ftsi,
sizeof(ftsi));
1013 ftb_param.ftsi= &ftsi2;
1014 param->mysql_parse= ftb_find_relevance_parse;
1015 param->mysql_add_word= ftb_find_relevance_add_word;
1016 param->mysql_ftparam= (
void *)&ftb_param;
1018 param->cs= ftb->charset;
1019 param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
1020 while (_mi_ft_segiterator(&ftsi))
1024 param->doc= (
char *)ftsi.pos;
1025 param->length= ftsi.len;
1026 if (unlikely(parser->parse(param)))
1030 if (ftbe->docid[1]==docid && ftbe->cur_weight>0 &&
1031 ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
1033 return ftbe->cur_weight;
1042 void ft_boolean_close_search(
FT_INFO *ftb)
1044 if (is_tree_inited(& ftb->no_dupes))
1046 delete_tree(& ftb->no_dupes);
1048 free_root(& ftb->mem_root, MYF(0));
1053 float ft_boolean_get_relevance(
FT_INFO *ftb)
1055 return ftb->root->cur_weight;
1059 void ft_boolean_reinit_search(
FT_INFO *ftb)
1061 _ftb_init_index_search(ftb);