36 #define ROW_MERGE_READ_GET_NEXT(N) \
38 b[N] = row_merge_read_rec( \
39 block[N], buf[N], b[N], index, \
40 fd[N], &foffs[N], &mrec[N], offsets[N]); \
41 if (UNIV_UNLIKELY(!b[N])) { \
70 ibool* opt_doc_id_size)
82 index->
table->
name,
"tmp_fts_idx", 0, DICT_FTS, 3);
84 new_index->
id = index->
id;
90 idx_field = dict_index_get_nth_field(index, 0);
94 field = dict_index_get_nth_field(new_index, 0);
101 if (strcmp(charset->name,
"latin1_swedish_ci") == 0) {
112 field = dict_index_get_nth_field(new_index, 1);
118 *opt_doc_id_size = FALSE;
126 *opt_doc_id_size = TRUE;
136 *opt_doc_id_size = TRUE;
140 if (*opt_doc_id_size) {
141 field->
col->
len =
sizeof(ib_uint32_t);
148 field->
col->
prtype = DATA_NOT_NULL | DATA_BINARY_TYPE;
153 field = dict_index_get_nth_field(new_index, 2);
178 ibool opt_doc_id_size,
207 mem_alloc(
sizeof *common_info));
215 common_info->
dup = dup;
230 for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
232 psort_info[j].merge_file[
i] =
236 if (!psort_info[j].merge_file[i]) {
250 psort_info[j].block_alloc[
i] =
254 psort_info[j].merge_block[
i] =
257 psort_info[j].block_alloc[i], 1024));
259 if (!psort_info[j].merge_block[i]) {
265 psort_info[j].child_status = 0;
266 psort_info[j].state = 0;
267 psort_info[j].psort_common = common_info;
273 mem_alloc(FTS_NUM_AUX_INDEX *
sizeof *merge_info));
275 for (j = 0; j < FTS_NUM_AUX_INDEX; j++) {
278 merge_info[j].state = 0;
279 merge_info[j].psort_common = common_info;
304 for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
305 if (psort_info[j].merge_file[i]) {
307 psort_info[j].merge_file[i]);
310 if (psort_info[j].block_alloc[i]) {
311 ut_free(psort_info[j].block_alloc[i]);
313 mem_free(psort_info[j].merge_file[i]);
319 ut_free(merge_info[0].psort_common->dup);
320 mem_free(merge_info[0].psort_common);
344 for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
357 row_merge_fts_doc_tokenize(
365 ibool opt_doc_id_size,
377 ibool buf_full = FALSE;
379 ulint data_size[FTS_NUM_AUX_INDEX];
380 ulint n_tuple[FTS_NUM_AUX_INDEX];
385 memset(n_tuple, 0, FTS_NUM_AUX_INDEX *
sizeof(ulint));
386 memset(data_size, 0, FTS_NUM_AUX_INDEX *
sizeof(ulint));
390 for (i = t_ctx->
processed_len; i < doc->text.f_len; i += inc) {
393 ib_uint32_t position;
417 t_str.
f_str = (byte*) &str_buf;
423 &parent, &t_str) == 0) {
462 ib_uint32_t doc_id_32_bit;
464 if (!opt_doc_id_size) {
468 field, &write_doc_id,
sizeof(write_doc_id));
471 (byte*) &doc_id_32_bit, (ib_uint32_t) doc_id);
474 field, &doc_id_32_bit,
sizeof(doc_id_32_bit));
481 field->
type.
prtype = DATA_NOT_NULL | DATA_BINARY_TYPE;
497 ut_ad(len ==
sizeof(ib_uint32_t));
511 if (buf->
total_size + data_size[idx] + cur_len
521 data_size[idx] += cur_len;
526 for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
535 merge_file[
i]->
n_rec += n_tuple[
i];
562 ibool processed = FALSE;
565 int tmpfd[FTS_NUM_AUX_INDEX];
566 ulint mycount[FTS_NUM_AUX_INDEX];
567 ib_uint64_t total_rec = 0;
568 ulint num_doc_processed = 0;
585 memset(&doc, 0,
sizeof(doc));
586 memset(&t_ctx, 0,
sizeof(t_ctx));
587 memset(mycount, 0, FTS_NUM_AUX_INDEX *
sizeof(
int));
592 idx_field = dict_index_get_nth_field(
596 word_dtype.
mtype = (strcmp(doc.
charset->name,
"latin1_swedish_ci") == 0)
597 ? DATA_VARCHAR : DATA_VARMYSQL;
605 prev_doc_item = doc_item;
614 last_doc_id = doc_item->
doc_id;
623 prev_doc_item = doc_item;
635 dfield = doc_item->
field;
636 data =
static_cast<byte*
>(dfield_get_data(dfield));
643 zip_size, data_len, blob_heap);
658 processed = row_merge_fts_doc_tokenize(
659 buf, doc_item->
doc_id, &doc,
674 fprintf(stderr,
"number of doc processed %d\n",
675 (
int) num_doc_processed);
676 #ifdef FTS_INTERNAL_DIAG_PRINT
677 for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
678 fprintf(stderr,
"ID %d, partition %d, word "
680 (
int) i, (
int) mycount[i]);
696 prev_doc_item = doc_item;
697 if (last_doc_id != doc_item->
doc_id) {
727 }
else if (retried > 10000) {
730 fprintf(stderr,
"InnoDB: FTS parallel sort processed "
731 "%lu records, the sort queue has "
732 "%lu records. But sort cannot get "
733 "the next records", num_doc_processed,
742 }
else if (prev_doc_item) {
751 prev_doc_item = doc_item;
753 if (last_doc_id != doc_item->
doc_id) {
768 for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
772 buf[i], merge_file[i], block[i]);
801 if (merge_file[i]->offset != 0) {
803 merge_file[i]->offset++,
806 UNIV_MEM_INVALID(block[i][0],
816 DEBUG_FTS_SORT_PRINT(
" InnoDB_FTS: start merge sort\n");
819 for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
821 if (!merge_file[i]->offset) {
832 merge_file[i], block[i], &tmpfd[i]);
833 total_rec += merge_file[
i]->
n_rec;
839 DEBUG_FTS_SORT_PRINT(
" InnoDB_FTS: complete merge sort\n");
854 OS_THREAD_DUMMY_RETURN;
872 (
void*) &psort_info[i], &thd_id);
906 OS_THREAD_DUMMY_RETURN;
921 for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
933 static __attribute__((nonnull))
935 row_merge_write_fts_word(
948 charset, word->text.f_str, word->text.f_len);
952 while (ib_vector_size(word->nodes) > 0) {
956 fts_node =
static_cast<fts_node_t*
>(ib_vector_pop(word->nodes));
959 trx, &ins_graph[selected], fts_table, &word->text,
962 if (error != DB_SUCCESS) {
963 fprintf(stderr,
"InnoDB: failed to write"
964 " word %s to FTS auxiliary index"
965 " table, error (%s) \n",
971 fts_node->
ilist = NULL;
1000 if (ib_vector_size(word->
nodes) > 0) {
1002 ib_vector_last(word->
nodes));
1005 if (fts_node == NULL
1009 ib_vector_push(word->
nodes, NULL));
1011 memset(fts_node, 0x0,
sizeof(*fts_node));
1016 if (fts_node && ib_vector_size(positions) > 0) {
1018 NULL, fts_node, *in_doc_id,
1022 row_merge_write_fts_word(ins_ctx->
trx,
1033 dfield = dtuple_get_nth_field(dtuple, 0);
1037 token_word.
f_str =
static_cast<byte*
>(dfield_get_data(dfield));
1046 &word->
text, &token_word) != 0) {
1051 if (ib_vector_size(positions) > 0) {
1053 NULL, fts_node, *in_doc_id, positions);
1057 row_merge_write_fts_word(ins_ctx->
trx, ins_ctx->
ins_graph,
1064 num_item = ib_vector_size(positions);
1067 for (i = 0; i < num_item; i++) {
1068 ib_vector_pop(positions);
1073 memset(fts_node, 0x0,
sizeof(*fts_node));
1077 dfield = dtuple_get_nth_field(dtuple, 1);
1081 static_cast<byte*>(dfield_get_data(dfield)));
1084 static_cast<byte*>(dfield_get_data(dfield)));
1088 dfield = dtuple_get_nth_field(dtuple, 2);
1095 if (!(*in_doc_id) || *in_doc_id == doc_id) {
1096 ib_vector_push(positions, &position);
1098 ulint num_pos = ib_vector_size(positions);
1101 *in_doc_id, positions);
1102 for (i = 0; i < num_pos; i++) {
1103 ib_vector_pop(positions);
1105 ib_vector_push(positions, &position);
1109 *in_doc_id = doc_id;
1117 row_fts_sel_tree_propagate(
1131 parent = (propogated - 1) / 2;
1134 child_left = sel_tree[parent * 2 + 1];
1135 child_right = sel_tree[parent * 2 + 2];
1137 if (child_left == -1 || mrec[child_left] == NULL) {
1138 if (child_right == -1
1139 || mrec[child_right] == NULL) {
1142 selected = child_right ;
1144 }
else if (child_right == -1
1145 || mrec[child_right] == NULL) {
1146 selected = child_left;
1148 offsets[child_left],
1149 offsets[child_right],
1151 selected = child_left;
1153 selected = child_right;
1156 sel_tree[parent] = selected;
1166 row_fts_sel_tree_update(
1177 for (i = 1; i <= height; i++) {
1178 propagated = row_fts_sel_tree_propagate(
1179 propagated, sel_tree, mrec, offsets, index);
1182 return(sel_tree[0]);
1189 row_fts_build_sel_tree_level(
1203 start = (1 <<
level) - 1;
1204 num_item = (1 <<
level);
1206 for (i = 0; i < num_item; i++) {
1207 child_left = sel_tree[(start +
i) * 2 + 1];
1208 child_right = sel_tree[(start +
i) * 2 + 2];
1210 if (child_left == -1) {
1211 if (child_right == -1) {
1212 sel_tree[start +
i] = -1;
1214 sel_tree[start +
i] = child_right;
1217 }
else if (child_right == -1) {
1218 sel_tree[start +
i] = child_left;
1223 if (!mrec[child_left]) {
1224 if (!mrec[child_right]) {
1225 sel_tree[start +
i] = -1;
1227 sel_tree[start +
i] = child_right;
1230 }
else if (!mrec[child_right]) {
1231 sel_tree[start +
i] = child_left;
1237 mrec[child_left], mrec[child_right],
1238 offsets[child_left], offsets[child_right],
1241 sel_tree[start +
i] = cmp < 0 ? child_left : child_right;
1251 row_fts_build_sel_tree(
1258 ulint treelevel = 1;
1273 start = (1 << treelevel) - 1;
1276 sel_tree[i + start] =
i;
1279 for (i = treelevel - 1; i >=0; i--) {
1280 row_fts_build_sel_tree_level(sel_tree, i, mrec, offsets, index);
1321 ulint count_diag = 0;
1331 ins_ctx.
trx->
op_info =
"inserting index entries";
1361 num = 1 + REC_OFFS_HEADER_SIZE
1364 heap, num *
sizeof *offsets[i]));
1365 offsets[
i][0] = num;
1372 buf[
i] =
static_cast<unsigned char (*)[16384]
>(
1374 count_diag += (int) psort_info[i].merge_file[
id]->n_rec;
1379 fprintf(stderr,
" InnoDB_FTS: to inserted %lu records\n",
1380 (ulong) count_diag);
1384 heap_alloc = ib_heap_allocator_create(heap);
1386 memset(&new_word, 0,
sizeof(new_word));
1389 positions = ib_vector_create(heap_alloc,
sizeof(ulint), 32);
1394 n_bytes =
sizeof(
que_t*) * (FTS_NUM_AUX_INDEX + 1);
1396 memset(ins_ctx.
ins_graph, 0x0, n_bytes);
1405 if (psort_info[i].merge_file[
id]->n_rec == 0) {
1407 mrec[
i] = b[
i] = NULL;
1412 if (psort_info[i].merge_file[
id]->offset > 0
1424 height = row_fts_build_sel_tree(sel_tree, (
const mrec_t **) mrec,
1427 start = (1 << height) - 1;
1436 if (fts_sort_pll_degree <= 2) {
1437 while (!mrec[min_rec]) {
1442 &ins_ctx, &new_word,
1443 positions, &last_doc_id,
1456 mrec[i], mrec[min_rec],
1457 offsets[i], offsets[min_rec],
1463 min_rec = sel_tree[0];
1465 if (min_rec == -1) {
1467 &ins_ctx, &new_word,
1468 positions, &last_doc_id,
1476 mrec[min_rec], index, offsets[min_rec], &n_ext,
1480 &ins_ctx, &new_word, positions,
1481 &last_doc_id, dtuple);
1486 if (fts_sort_pll_degree > 2) {
1487 if (!mrec[min_rec]) {
1488 sel_tree[start + min_rec] = -1;
1491 row_fts_sel_tree_update(sel_tree, start + min_rec,
1508 for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
1510 fts_que_graph_free(ins_ctx.
ins_graph[i]);
1520 fprintf(stderr,
" InnoDB_FTS: inserted %lu records\n",