1
/*******************************************************
6
Created 12/19/1997 Heikki Tuuri
7
*******************************************************/
15
#include "dict0dict.h"
16
#include "dict0boot.h"
22
#include "mach0data.h"
28
#include "lock0lock.h"
29
#include "eval0eval.h"
31
#include "pars0pars.h"
32
#include "row0mysql.h"
33
#include "read0read.h"
36
/* Maximum number of rows to prefetch; MySQL interface has another parameter */
37
#define SEL_MAX_N_PREFETCH 16
39
/* Number of rows fetched, after which to start prefetching; MySQL interface
40
has another parameter */
41
#define SEL_PREFETCH_LIMIT 1
43
/* When a select has accessed about this many pages, it returns control back
44
to que_run_threads: this is to allow canceling runaway queries */
46
#define SEL_COST_LIMIT 100
48
/* Flags for search shortcut */
50
#define SEL_EXHAUSTED 1
53
/************************************************************************
54
Returns TRUE if the user-defined column values in a secondary index record
55
are alphabetically the same as the corresponding columns in the clustered
57
NOTE: the comparison is NOT done as a binary comparison, but character
58
fields are compared with collation! */
61
row_sel_sec_rec_is_for_clust_rec(
62
/*=============================*/
63
/* out: TRUE if the secondary
64
record is equal to the corresponding
65
fields in the clustered record,
66
when compared with collation */
67
rec_t* sec_rec, /* in: secondary index record */
68
dict_index_t* sec_index, /* in: secondary index */
69
rec_t* clust_rec, /* in: clustered index record */
70
dict_index_t* clust_index) /* in: clustered index */
78
mem_heap_t* heap = NULL;
79
ulint clust_offsets_[REC_OFFS_NORMAL_SIZE];
80
ulint sec_offsets_[REC_OFFS_SMALL_SIZE];
81
ulint* clust_offs = clust_offsets_;
82
ulint* sec_offs = sec_offsets_;
83
ibool is_equal = TRUE;
85
*clust_offsets_ = (sizeof clust_offsets_) / sizeof *clust_offsets_;
86
*sec_offsets_ = (sizeof sec_offsets_) / sizeof *sec_offsets_;
88
clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs,
89
ULINT_UNDEFINED, &heap);
90
sec_offs = rec_get_offsets(sec_rec, sec_index, sec_offs,
91
ULINT_UNDEFINED, &heap);
93
n = dict_index_get_n_ordering_defined_by_user(sec_index);
95
for (i = 0; i < n; i++) {
96
const dict_field_t* ifield;
97
const dict_col_t* col;
99
ifield = dict_index_get_nth_field(sec_index, i);
100
col = dict_field_get_col(ifield);
102
clust_field = rec_get_nth_field(
103
clust_rec, clust_offs,
104
dict_col_get_clust_pos(col, clust_index), &clust_len);
105
sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len);
107
if (ifield->prefix_len > 0 && clust_len != UNIV_SQL_NULL) {
109
clust_len = dtype_get_at_most_n_mbchars(
110
col->prtype, col->mbminlen, col->mbmaxlen,
112
clust_len, (char*) clust_field);
115
if (0 != cmp_data_data(col->mtype, col->prtype,
116
clust_field, clust_len,
117
sec_field, sec_len)) {
124
if (UNIV_LIKELY_NULL(heap)) {
130
/*************************************************************************
131
Creates a select node struct. */
136
/* out, own: select node struct */
137
mem_heap_t* heap) /* in: memory heap where created */
141
node = mem_heap_alloc(heap, sizeof(sel_node_t));
142
node->common.type = QUE_NODE_SELECT;
143
node->state = SEL_NODE_OPEN;
145
node->select_will_do_update = FALSE;
146
node->latch_mode = BTR_SEARCH_LEAF;
153
/*************************************************************************
154
Frees the memory private to a select node when a query graph is freed,
155
does not free the heap where the node was originally created. */
158
sel_node_free_private(
159
/*==================*/
160
sel_node_t* node) /* in: select node struct */
165
if (node->plans != NULL) {
166
for (i = 0; i < node->n_tables; i++) {
167
plan = sel_node_get_nth_plan(node, i);
169
btr_pcur_close(&(plan->pcur));
170
btr_pcur_close(&(plan->clust_pcur));
172
if (plan->old_vers_heap) {
173
mem_heap_free(plan->old_vers_heap);
179
/*************************************************************************
180
Evaluates the values in a select list. If there are aggregate functions,
181
their argument value is added to the aggregate total. */
184
sel_eval_select_list(
185
/*=================*/
186
sel_node_t* node) /* in: select node */
190
exp = node->select_list;
195
exp = que_node_get_next(exp);
199
/*************************************************************************
200
Assigns the values in the select list to the possible into-variables in
201
SELECT ... INTO ... */
204
sel_assign_into_var_values(
205
/*=======================*/
206
sym_node_t* var, /* in: first variable in a list of variables */
207
sel_node_t* node) /* in: select node */
216
exp = node->select_list;
221
eval_node_copy_val(var->alias, exp);
223
exp = que_node_get_next(exp);
224
var = que_node_get_next(var);
228
/*************************************************************************
229
Resets the aggregate value totals in the select list of an aggregate type
233
sel_reset_aggregate_vals(
234
/*=====================*/
235
sel_node_t* node) /* in: select node */
237
func_node_t* func_node;
239
ut_ad(node->is_aggregate);
241
func_node = node->select_list;
244
eval_node_set_int_val(func_node, 0);
246
func_node = que_node_get_next(func_node);
249
node->aggregate_already_fetched = FALSE;
252
/*************************************************************************
253
Copies the input variable values when an explicit cursor is opened. */
256
row_sel_copy_input_variable_vals(
257
/*=============================*/
258
sel_node_t* node) /* in: select node */
262
var = UT_LIST_GET_FIRST(node->copy_variables);
265
eval_node_copy_val(var, var->alias);
267
var->indirection = NULL;
269
var = UT_LIST_GET_NEXT(col_var_list, var);
273
/*************************************************************************
274
Fetches the column values from a record. */
277
row_sel_fetch_columns(
278
/*==================*/
279
dict_index_t* index, /* in: record index */
280
rec_t* rec, /* in: record in a clustered or non-clustered
282
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
283
sym_node_t* column) /* in: first column in a column list, or
292
ut_ad(rec_offs_validate(rec, index, offsets));
294
if (index->type & DICT_CLUSTERED) {
295
index_type = SYM_CLUST_FIELD_NO;
297
index_type = SYM_SEC_FIELD_NO;
301
mem_heap_t* heap = NULL;
304
field_no = column->field_nos[index_type];
306
if (field_no != ULINT_UNDEFINED) {
308
if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
311
/* Copy an externally stored field to the
314
heap = mem_heap_create(1);
316
data = btr_rec_copy_externally_stored_field(
317
rec, offsets, field_no, &len, heap);
319
ut_a(len != UNIV_SQL_NULL);
323
data = rec_get_nth_field(rec, offsets,
326
needs_copy = column->copy_val;
330
eval_node_copy_and_alloc_val(column, data,
333
val = que_node_get_val(column);
334
dfield_set_data(val, data, len);
337
if (UNIV_LIKELY_NULL(heap)) {
342
column = UT_LIST_GET_NEXT(col_var_list, column);
346
/*************************************************************************
347
Allocates a prefetch buffer for a column when prefetch is first time done. */
350
sel_col_prefetch_buf_alloc(
351
/*=======================*/
352
sym_node_t* column) /* in: symbol table node for a column */
357
ut_ad(que_node_get_type(column) == QUE_NODE_SYMBOL);
359
column->prefetch_buf = mem_alloc(SEL_MAX_N_PREFETCH
360
* sizeof(sel_buf_t));
361
for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
362
sel_buf = column->prefetch_buf + i;
364
sel_buf->data = NULL;
366
sel_buf->val_buf_size = 0;
370
/*************************************************************************
371
Frees a prefetch buffer for a column, including the dynamically allocated
372
memory for data stored there. */
375
sel_col_prefetch_buf_free(
376
/*======================*/
377
sel_buf_t* prefetch_buf) /* in, own: prefetch buffer */
382
for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
383
sel_buf = prefetch_buf + i;
385
if (sel_buf->val_buf_size > 0) {
387
mem_free(sel_buf->data);
392
/*************************************************************************
393
Pops the column values for a prefetched, cached row from the column prefetch
394
buffers and places them to the val fields in the column nodes. */
397
sel_pop_prefetched_row(
398
/*===================*/
399
plan_t* plan) /* in: plan node for a table */
408
ut_ad(plan->n_rows_prefetched > 0);
410
column = UT_LIST_GET_FIRST(plan->columns);
413
val = que_node_get_val(column);
415
if (!column->copy_val) {
416
/* We did not really push any value for the
419
ut_ad(!column->prefetch_buf);
420
ut_ad(que_node_get_val_buf_size(column) == 0);
422
dfield_set_data(val, NULL, 0);
427
ut_ad(column->prefetch_buf);
429
sel_buf = column->prefetch_buf + plan->first_prefetched;
431
data = sel_buf->data;
433
val_buf_size = sel_buf->val_buf_size;
435
/* We must keep track of the allocated memory for
436
column values to be able to free it later: therefore
437
we swap the values for sel_buf and val */
439
sel_buf->data = dfield_get_data(val);
440
sel_buf->len = dfield_get_len(val);
441
sel_buf->val_buf_size = que_node_get_val_buf_size(column);
443
dfield_set_data(val, data, len);
444
que_node_set_val_buf_size(column, val_buf_size);
446
column = UT_LIST_GET_NEXT(col_var_list, column);
449
plan->n_rows_prefetched--;
451
plan->first_prefetched++;
454
/*************************************************************************
455
Pushes the column values for a prefetched, cached row to the column prefetch
456
buffers from the val fields in the column nodes. */
459
sel_push_prefetched_row(
460
/*====================*/
461
plan_t* plan) /* in: plan node for a table */
471
if (plan->n_rows_prefetched == 0) {
473
plan->first_prefetched = 0;
475
pos = plan->n_rows_prefetched;
477
/* We have the convention that pushing new rows starts only
478
after the prefetch stack has been emptied: */
480
ut_ad(plan->first_prefetched == 0);
483
plan->n_rows_prefetched++;
485
ut_ad(pos < SEL_MAX_N_PREFETCH);
487
column = UT_LIST_GET_FIRST(plan->columns);
490
if (!column->copy_val) {
491
/* There is no sense to push pointers to database
492
page fields when we do not keep latch on the page! */
497
if (!column->prefetch_buf) {
498
/* Allocate a new prefetch buffer */
500
sel_col_prefetch_buf_alloc(column);
503
sel_buf = column->prefetch_buf + pos;
505
val = que_node_get_val(column);
507
data = dfield_get_data(val);
508
len = dfield_get_len(val);
509
val_buf_size = que_node_get_val_buf_size(column);
511
/* We must keep track of the allocated memory for
512
column values to be able to free it later: therefore
513
we swap the values for sel_buf and val */
515
dfield_set_data(val, sel_buf->data, sel_buf->len);
516
que_node_set_val_buf_size(column, sel_buf->val_buf_size);
518
sel_buf->data = data;
520
sel_buf->val_buf_size = val_buf_size;
522
column = UT_LIST_GET_NEXT(col_var_list, column);
526
/*************************************************************************
527
Builds a previous version of a clustered index record for a consistent read */
530
row_sel_build_prev_vers(
531
/*====================*/
532
/* out: DB_SUCCESS or error code */
533
read_view_t* read_view, /* in: read view */
534
dict_index_t* index, /* in: plan node for table */
535
rec_t* rec, /* in: record in a clustered index */
536
ulint** offsets, /* in/out: offsets returned by
537
rec_get_offsets(rec, plan->index) */
538
mem_heap_t** offset_heap, /* in/out: memory heap from which
539
the offsets are allocated */
540
mem_heap_t** old_vers_heap, /* out: old version heap to use */
541
rec_t** old_vers, /* out: old version, or NULL if the
542
record does not exist in the view:
543
i.e., it was freshly inserted
545
mtr_t* mtr) /* in: mtr */
549
if (*old_vers_heap) {
550
mem_heap_empty(*old_vers_heap);
552
*old_vers_heap = mem_heap_create(512);
555
err = row_vers_build_for_consistent_read(
556
rec, mtr, index, offsets, read_view, offset_heap,
557
*old_vers_heap, old_vers);
561
/*************************************************************************
562
Builds the last committed version of a clustered index record for a
563
semi-consistent read. */
566
row_sel_build_committed_vers_for_mysql(
567
/*===================================*/
568
/* out: DB_SUCCESS or error code */
569
dict_index_t* clust_index, /* in: clustered index */
570
row_prebuilt_t* prebuilt, /* in: prebuilt struct */
571
rec_t* rec, /* in: record in a clustered index */
572
ulint** offsets, /* in/out: offsets returned by
573
rec_get_offsets(rec, clust_index) */
574
mem_heap_t** offset_heap, /* in/out: memory heap from which
575
the offsets are allocated */
576
rec_t** old_vers, /* out: old version, or NULL if the
577
record does not exist in the view:
578
i.e., it was freshly inserted
580
mtr_t* mtr) /* in: mtr */
584
if (prebuilt->old_vers_heap) {
585
mem_heap_empty(prebuilt->old_vers_heap);
587
prebuilt->old_vers_heap = mem_heap_create(200);
590
err = row_vers_build_for_semi_consistent_read(
591
rec, mtr, clust_index, offsets, offset_heap,
592
prebuilt->old_vers_heap, old_vers);
596
/*************************************************************************
597
Tests the conditions which determine when the index segment we are searching
598
through has been exhausted. */
601
row_sel_test_end_conds(
602
/*===================*/
603
/* out: TRUE if row passed the tests */
604
plan_t* plan) /* in: plan for the table; the column values must
605
already have been retrieved and the right sides of
606
comparisons evaluated */
610
/* All conditions in end_conds are comparisons of a column to an
613
cond = UT_LIST_GET_FIRST(plan->end_conds);
616
/* Evaluate the left side of the comparison, i.e., get the
617
column value if there is an indirection */
619
eval_sym(cond->args);
621
/* Do the comparison */
623
if (!eval_cmp(cond)) {
628
cond = UT_LIST_GET_NEXT(cond_list, cond);
634
/*************************************************************************
635
Tests the other conditions. */
638
row_sel_test_other_conds(
639
/*=====================*/
640
/* out: TRUE if row passed the tests */
641
plan_t* plan) /* in: plan for the table; the column values must
642
already have been retrieved */
646
cond = UT_LIST_GET_FIRST(plan->other_conds);
651
if (!eval_node_get_ibool_val(cond)) {
656
cond = UT_LIST_GET_NEXT(cond_list, cond);
662
/*************************************************************************
663
Retrieves the clustered index record corresponding to a record in a
664
non-clustered index. Does the necessary locking. */
667
row_sel_get_clust_rec(
668
/*==================*/
669
/* out: DB_SUCCESS or error code */
670
sel_node_t* node, /* in: select_node */
671
plan_t* plan, /* in: plan node for table */
672
rec_t* rec, /* in: record in a non-clustered index */
673
que_thr_t* thr, /* in: query thread */
674
rec_t** out_rec,/* out: clustered record or an old version of
675
it, NULL if the old version did not exist
676
in the read view, i.e., it was a fresh
678
mtr_t* mtr) /* in: mtr used to get access to the
679
non-clustered record; the same mtr is used to
680
access the clustered index */
686
mem_heap_t* heap = NULL;
687
ulint offsets_[REC_OFFS_NORMAL_SIZE];
688
ulint* offsets = offsets_;
689
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
693
offsets = rec_get_offsets(rec,
694
btr_pcur_get_btr_cur(&plan->pcur)->index,
695
offsets, ULINT_UNDEFINED, &heap);
697
row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec, offsets);
699
index = dict_table_get_first_index(plan->table);
701
btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE,
702
node->latch_mode, &(plan->clust_pcur),
705
clust_rec = btr_pcur_get_rec(&(plan->clust_pcur));
707
/* Note: only if the search ends up on a non-infimum record is the
708
low_match value the real match to the search tuple */
710
if (!page_rec_is_user_rec(clust_rec)
711
|| btr_pcur_get_low_match(&(plan->clust_pcur))
712
< dict_index_get_n_unique(index)) {
714
ut_a(rec_get_deleted_flag(rec,
715
dict_table_is_comp(plan->table)));
716
ut_a(node->read_view);
718
/* In a rare case it is possible that no clust rec is found
719
for a delete-marked secondary index record: if in row0umod.c
720
in row_undo_mod_remove_clust_low() we have already removed
721
the clust rec, while purge is still cleaning and removing
722
secondary index records associated with earlier versions of
723
the clustered index record. In that case we know that the
724
clustered index record did not exist in the read view of
730
offsets = rec_get_offsets(clust_rec, index, offsets,
731
ULINT_UNDEFINED, &heap);
733
if (!node->read_view) {
734
/* Try to place a lock on the index record */
736
/* If innodb_locks_unsafe_for_binlog option is used
737
or this session is using READ COMMITTED isolation level
738
we lock only the record, i.e., next-key locking is
743
trx = thr_get_trx(thr);
745
if (srv_locks_unsafe_for_binlog
746
|| trx->isolation_level == TRX_ISO_READ_COMMITTED) {
747
lock_type = LOCK_REC_NOT_GAP;
749
lock_type = LOCK_ORDINARY;
752
err = lock_clust_rec_read_check_and_lock(
753
0, clust_rec, index, offsets,
754
node->row_lock_mode, lock_type, thr);
756
if (err != DB_SUCCESS) {
761
/* This is a non-locking consistent read: if necessary, fetch
762
a previous version of the record */
766
if (!lock_clust_rec_cons_read_sees(clust_rec, index, offsets,
769
err = row_sel_build_prev_vers(
770
node->read_view, index, clust_rec,
771
&offsets, &heap, &plan->old_vers_heap,
774
if (err != DB_SUCCESS) {
779
clust_rec = old_vers;
781
if (clust_rec == NULL) {
786
/* If we had to go to an earlier version of row or the
787
secondary index record is delete marked, then it may be that
788
the secondary index record corresponding to clust_rec
789
(or old_vers) is not rec; in that case we must ignore
790
such row because in our snapshot rec would not have existed.
791
Remember that from rec we cannot see directly which transaction
792
id corresponds to it: we have to go to the clustered index
793
record. A query where we want to fetch all rows where
794
the secondary index value is in some interval would return
795
a wrong result if we would not drop rows which we come to
796
visit through secondary index records that would not really
797
exist in our snapshot. */
800
|| rec_get_deleted_flag(rec, dict_table_is_comp(
802
&& !row_sel_sec_rec_is_for_clust_rec(rec, plan->index,
808
/* Fetch the columns needed in test conditions */
810
row_sel_fetch_columns(index, clust_rec, offsets,
811
UT_LIST_GET_FIRST(plan->columns));
812
*out_rec = clust_rec;
816
if (UNIV_LIKELY_NULL(heap)) {
822
/*************************************************************************
823
Sets a lock on a record. */
828
/* out: DB_SUCCESS or error code */
829
rec_t* rec, /* in: record */
830
dict_index_t* index, /* in: index */
831
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
832
ulint mode, /* in: lock mode */
833
ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or
835
que_thr_t* thr) /* in: query thread */
840
trx = thr_get_trx(thr);
842
if (UT_LIST_GET_LEN(trx->trx_locks) > 10000) {
843
if (buf_LRU_buf_pool_running_out()) {
845
return(DB_LOCK_TABLE_FULL);
849
if (index->type & DICT_CLUSTERED) {
850
err = lock_clust_rec_read_check_and_lock(
851
0, rec, index, offsets, mode, type, thr);
853
err = lock_sec_rec_read_check_and_lock(
854
0, rec, index, offsets, mode, type, thr);
860
/*************************************************************************
861
Opens a pcur to a table index. */
866
sel_node_t* node, /* in: select node */
867
plan_t* plan, /* in: table plan */
868
ibool search_latch_locked,
869
/* in: TRUE if the thread currently
870
has the search latch locked in
872
mtr_t* mtr) /* in: mtr */
878
ulint has_search_latch = 0; /* RW_S_LATCH or 0 */
881
if (search_latch_locked) {
882
has_search_latch = RW_S_LATCH;
887
/* Calculate the value of the search tuple: the exact match columns
888
get their expressions evaluated when we evaluate the right sides of
891
cond = UT_LIST_GET_FIRST(plan->end_conds);
894
eval_exp(que_node_get_next(cond->args));
896
cond = UT_LIST_GET_NEXT(cond_list, cond);
900
n_fields = dtuple_get_n_fields(plan->tuple);
902
if (plan->n_exact_match < n_fields) {
903
/* There is a non-exact match field which must be
904
evaluated separately */
906
eval_exp(plan->tuple_exps[n_fields - 1]);
909
for (i = 0; i < n_fields; i++) {
910
exp = plan->tuple_exps[i];
912
dfield_copy_data(dtuple_get_nth_field(plan->tuple, i),
913
que_node_get_val(exp));
916
/* Open pcur to the index */
918
btr_pcur_open_with_no_init(index, plan->tuple, plan->mode,
919
node->latch_mode, &(plan->pcur),
920
has_search_latch, mtr);
922
/* Open the cursor to the start or the end of the index
925
btr_pcur_open_at_index_side(plan->asc, index, node->latch_mode,
926
&(plan->pcur), FALSE, mtr);
929
ut_ad(plan->n_rows_prefetched == 0);
930
ut_ad(plan->n_rows_fetched == 0);
931
ut_ad(plan->cursor_at_end == FALSE);
933
plan->pcur_is_open = TRUE;
936
/*************************************************************************
937
Restores a stored pcur position to a table index. */
940
row_sel_restore_pcur_pos(
941
/*=====================*/
942
/* out: TRUE if the cursor should be moved to
943
the next record after we return from this
944
function (moved to the previous, in the case
945
of a descending cursor) without processing
946
again the current cursor record */
947
sel_node_t* node, /* in: select node */
948
plan_t* plan, /* in: table plan */
949
mtr_t* mtr) /* in: mtr */
951
ibool equal_position;
952
ulint relative_position;
954
ut_ad(!plan->cursor_at_end);
956
relative_position = btr_pcur_get_rel_pos(&(plan->pcur));
958
equal_position = btr_pcur_restore_position(node->latch_mode,
961
/* If the cursor is traveling upwards, and relative_position is
963
(1) BTR_PCUR_BEFORE: this is not allowed, as we did not have a lock
964
yet on the successor of the page infimum;
965
(2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
966
first record GREATER than the predecessor of a page supremum; we have
967
not yet processed the cursor record: no need to move the cursor to the
969
(3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
970
last record LESS or EQUAL to the old stored user record; (a) if
971
equal_position is FALSE, this means that the cursor is now on a record
972
less than the old user record, and we must move to the next record;
973
(b) if equal_position is TRUE, then if
974
plan->stored_cursor_rec_processed is TRUE, we must move to the next
975
record, else there is no need to move the cursor. */
978
if (relative_position == BTR_PCUR_ON) {
980
if (equal_position) {
982
return(plan->stored_cursor_rec_processed);
988
ut_ad(relative_position == BTR_PCUR_AFTER
989
|| relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
994
/* If the cursor is traveling downwards, and relative_position is
996
(1) BTR_PCUR_BEFORE: btr_pcur_restore_position placed the cursor on
997
the last record LESS than the successor of a page infimum; we have not
998
processed the cursor record: no need to move the cursor;
999
(2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
1000
first record GREATER than the predecessor of a page supremum; we have
1001
processed the cursor record: we should move the cursor to the previous
1003
(3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
1004
last record LESS or EQUAL to the old stored user record; (a) if
1005
equal_position is FALSE, this means that the cursor is now on a record
1006
less than the old user record, and we need not move to the previous
1007
record; (b) if equal_position is TRUE, then if
1008
plan->stored_cursor_rec_processed is TRUE, we must move to the previous
1009
record, else there is no need to move the cursor. */
1011
if (relative_position == BTR_PCUR_BEFORE
1012
|| relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
1017
if (relative_position == BTR_PCUR_ON) {
1019
if (equal_position) {
1021
return(plan->stored_cursor_rec_processed);
1027
ut_ad(relative_position == BTR_PCUR_AFTER
1028
|| relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
1033
/*************************************************************************
1034
Resets a plan cursor to a closed state. */
1039
plan_t* plan) /* in: plan */
1041
plan->pcur_is_open = FALSE;
1042
plan->cursor_at_end = FALSE;
1043
plan->n_rows_fetched = 0;
1044
plan->n_rows_prefetched = 0;
1047
/*************************************************************************
1048
Tries to do a shortcut to fetch a clustered index record with a unique key,
1049
using the hash index if possible (not always). */
1052
row_sel_try_search_shortcut(
1053
/*========================*/
1054
/* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
1055
sel_node_t* node, /* in: select node for a consistent read */
1056
plan_t* plan, /* in: plan for a unique search in clustered
1058
mtr_t* mtr) /* in: mtr */
1060
dict_index_t* index;
1062
mem_heap_t* heap = NULL;
1063
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1064
ulint* offsets = offsets_;
1066
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1068
index = plan->index;
1070
ut_ad(node->read_view);
1071
ut_ad(plan->unique_search);
1072
ut_ad(!plan->must_get_clust);
1073
#ifdef UNIV_SYNC_DEBUG
1074
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
1075
#endif /* UNIV_SYNC_DEBUG */
1077
row_sel_open_pcur(node, plan, TRUE, mtr);
1079
rec = btr_pcur_get_rec(&(plan->pcur));
1081
if (!page_rec_is_user_rec(rec)) {
1086
ut_ad(plan->mode == PAGE_CUR_GE);
1088
/* As the cursor is now placed on a user record after a search with
1089
the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
1090
fields in the user record matched to the search tuple */
1092
if (btr_pcur_get_up_match(&(plan->pcur)) < plan->n_exact_match) {
1094
return(SEL_EXHAUSTED);
1097
/* This is a non-locking consistent read: if necessary, fetch
1098
a previous version of the record */
1100
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
1102
if (index->type & DICT_CLUSTERED) {
1103
if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
1108
} else if (!lock_sec_rec_cons_read_sees(rec, index, node->read_view)) {
1114
/* Test deleted flag. Fetch the columns needed in test conditions. */
1116
row_sel_fetch_columns(index, rec, offsets,
1117
UT_LIST_GET_FIRST(plan->columns));
1119
if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))) {
1121
ret = SEL_EXHAUSTED;
1125
/* Test the rest of search conditions */
1127
if (!row_sel_test_other_conds(plan)) {
1129
ret = SEL_EXHAUSTED;
1133
ut_ad(plan->pcur.latch_mode == node->latch_mode);
1135
plan->n_rows_fetched++;
1138
if (UNIV_LIKELY_NULL(heap)) {
1139
mem_heap_free(heap);
1144
/*************************************************************************
1145
Performs a select step. */
1150
/* out: DB_SUCCESS or error code */
1151
sel_node_t* node, /* in: select node */
1152
que_thr_t* thr) /* in: query thread */
1154
dict_index_t* index;
1161
ibool search_latch_locked;
1162
ibool consistent_read;
1164
/* The following flag becomes TRUE when we are doing a
1165
consistent read from a non-clustered index and we must look
1166
at the clustered index to find out the previous delete mark
1167
state of the non-clustered record: */
1169
ibool cons_read_requires_clust_rec = FALSE;
1170
ulint cost_counter = 0;
1171
ibool cursor_just_opened;
1172
ibool must_go_to_next;
1173
ibool leaf_contains_updates = FALSE;
1174
/* TRUE if select_will_do_update is
1175
TRUE and the current clustered index
1176
leaf page has been updated during
1177
the current mtr: mtr must be committed
1178
at the same time as the leaf x-latch
1180
ibool mtr_has_extra_clust_latch = FALSE;
1181
/* TRUE if the search was made using
1182
a non-clustered index, and we had to
1183
access the clustered record: now &mtr
1184
contains a clustered index latch, and
1185
&mtr must be committed before we move
1186
to the next non-clustered record */
1189
mem_heap_t* heap = NULL;
1190
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1191
ulint* offsets = offsets_;
1192
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1194
ut_ad(thr->run_node == node);
1196
search_latch_locked = FALSE;
1198
if (node->read_view) {
1199
/* In consistent reads, we try to do with the hash index and
1200
not to use the buffer page get. This is to reduce memory bus
1201
load resulting from semaphore operations. The search latch
1202
will be s-locked when we access an index with a unique search
1203
condition, but not locked when we access an index with a
1204
less selective search condition. */
1206
consistent_read = TRUE;
1208
consistent_read = FALSE;
1214
This is the outer major loop in calculating a join. We come here when
1215
node->fetch_table changes, and after adding a row to aggregate totals
1216
and, of course, when this function is called. */
1218
ut_ad(leaf_contains_updates == FALSE);
1219
ut_ad(mtr_has_extra_clust_latch == FALSE);
1221
plan = sel_node_get_nth_plan(node, node->fetch_table);
1222
index = plan->index;
1224
if (plan->n_rows_prefetched > 0) {
1225
sel_pop_prefetched_row(plan);
1227
goto next_table_no_mtr;
1230
if (plan->cursor_at_end) {
1231
/* The cursor has already reached the result set end: no more
1232
rows to process for this table cursor, as also the prefetch
1235
ut_ad(plan->pcur_is_open);
1237
goto table_exhausted_no_mtr;
1240
/* Open a cursor to index, or restore an open cursor position */
1244
if (consistent_read && plan->unique_search && !plan->pcur_is_open
1245
&& !plan->must_get_clust
1246
&& !plan->table->big_rows) {
1247
if (!search_latch_locked) {
1248
rw_lock_s_lock(&btr_search_latch);
1250
search_latch_locked = TRUE;
1251
} else if (btr_search_latch.writer_is_wait_ex) {
1253
/* There is an x-latch request waiting: release the
1254
s-latch for a moment; as an s-latch here is often
1255
kept for some 10 searches before being released,
1256
a waiting x-latch request would block other threads
1257
from acquiring an s-latch for a long time, lowering
1258
performance significantly in multiprocessors. */
1260
rw_lock_s_unlock(&btr_search_latch);
1261
rw_lock_s_lock(&btr_search_latch);
1264
found_flag = row_sel_try_search_shortcut(node, plan, &mtr);
1266
if (found_flag == SEL_FOUND) {
1270
} else if (found_flag == SEL_EXHAUSTED) {
1272
goto table_exhausted;
1275
ut_ad(found_flag == SEL_RETRY);
1277
plan_reset_cursor(plan);
1283
if (search_latch_locked) {
1284
rw_lock_s_unlock(&btr_search_latch);
1286
search_latch_locked = FALSE;
1289
if (!plan->pcur_is_open) {
1290
/* Evaluate the expressions to build the search tuple and
1293
row_sel_open_pcur(node, plan, search_latch_locked, &mtr);
1295
cursor_just_opened = TRUE;
1297
/* A new search was made: increment the cost counter */
1300
/* Restore pcur position to the index */
1302
must_go_to_next = row_sel_restore_pcur_pos(node, plan, &mtr);
1304
cursor_just_opened = FALSE;
1306
if (must_go_to_next) {
1307
/* We have already processed the cursor record: move
1317
In this loop we use pcur and try to fetch a qualifying row, and
1318
also fill the prefetch buffer for this table if n_rows_fetched has
1319
exceeded a threshold. While we are inside this loop, the following
1321
(1) &mtr is started,
1322
(2) pcur is positioned and open.
1324
NOTE that if cursor_just_opened is TRUE here, it means that we came
1325
to this point right after row_sel_open_pcur. */
1327
ut_ad(mtr_has_extra_clust_latch == FALSE);
1329
rec = btr_pcur_get_rec(&(plan->pcur));
1331
/* PHASE 1: Set a lock if specified */
1333
if (!node->asc && cursor_just_opened
1334
&& !page_rec_is_supremum(rec)) {
1336
/* When we open a cursor for a descending search, we must set
1337
a next-key lock on the successor record: otherwise it would
1338
be possible to insert new records next to the cursor position,
1339
and it might be that these new records should appear in the
1340
search result set, resulting in the phantom problem. */
1342
if (!consistent_read) {
1344
/* If innodb_locks_unsafe_for_binlog option is used
1345
or this session is using READ COMMITTED isolation
1346
level, we lock only the record, i.e., next-key
1347
locking is not used. */
1349
rec_t* next_rec = page_rec_get_next(rec);
1353
trx = thr_get_trx(thr);
1355
offsets = rec_get_offsets(next_rec, index, offsets,
1356
ULINT_UNDEFINED, &heap);
1358
if (srv_locks_unsafe_for_binlog
1359
|| trx->isolation_level
1360
== TRX_ISO_READ_COMMITTED) {
1362
if (page_rec_is_supremum(next_rec)) {
1367
lock_type = LOCK_REC_NOT_GAP;
1369
lock_type = LOCK_ORDINARY;
1372
err = sel_set_rec_lock(next_rec, index, offsets,
1373
node->row_lock_mode,
1376
if (err != DB_SUCCESS) {
1377
/* Note that in this case we will store in pcur
1378
the PREDECESSOR of the record we are waiting
1381
goto lock_wait_or_error;
1387
if (page_rec_is_infimum(rec)) {
1389
/* The infimum record on a page cannot be in the result set,
1390
and neither can a record lock be placed on it: we skip such
1391
a record. We also increment the cost counter as we may have
1392
processed yet another page of index. */
1399
if (!consistent_read) {
1400
/* Try to place a lock on the index record */
1402
/* If innodb_locks_unsafe_for_binlog option is used
1403
or this session is using READ COMMITTED isolation level,
1404
we lock only the record, i.e., next-key locking is
1410
offsets = rec_get_offsets(rec, index, offsets,
1411
ULINT_UNDEFINED, &heap);
1413
trx = thr_get_trx(thr);
1415
if (srv_locks_unsafe_for_binlog
1416
|| trx->isolation_level == TRX_ISO_READ_COMMITTED) {
1418
if (page_rec_is_supremum(rec)) {
1423
lock_type = LOCK_REC_NOT_GAP;
1425
lock_type = LOCK_ORDINARY;
1428
err = sel_set_rec_lock(rec, index, offsets,
1429
node->row_lock_mode, lock_type, thr);
1431
if (err != DB_SUCCESS) {
1433
goto lock_wait_or_error;
1437
if (page_rec_is_supremum(rec)) {
1439
/* A page supremum record cannot be in the result set: skip
1440
it now when we have placed a possible lock on it */
1445
ut_ad(page_rec_is_user_rec(rec));
1447
if (cost_counter > SEL_COST_LIMIT) {
1449
/* Now that we have placed the necessary locks, we can stop
1450
for a while and store the cursor position; NOTE that if we
1451
would store the cursor position BEFORE placing a record lock,
1452
it might happen that the cursor would jump over some records
1453
that another transaction could meanwhile insert adjacent to
1454
the cursor: this would result in the phantom problem. */
1456
goto stop_for_a_while;
1459
/* PHASE 2: Check a mixed index mix id if needed */
1461
if (plan->unique_search && cursor_just_opened) {
1463
ut_ad(plan->mode == PAGE_CUR_GE);
1465
/* As the cursor is now placed on a user record after a search
1466
with the mode PAGE_CUR_GE, the up_match field in the cursor
1467
tells how many fields in the user record matched to the search
1470
if (btr_pcur_get_up_match(&(plan->pcur))
1471
< plan->n_exact_match) {
1472
goto table_exhausted;
1475
/* Ok, no need to test end_conds or mix id */
1479
/* We are ready to look at a possible new index entry in the result
1480
set: the cursor is now placed on a user record */
1482
/* PHASE 3: Get previous version in a consistent read */
1484
cons_read_requires_clust_rec = FALSE;
1485
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
1487
if (consistent_read) {
1488
/* This is a non-locking consistent read: if necessary, fetch
1489
a previous version of the record */
1491
if (index->type & DICT_CLUSTERED) {
1493
if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
1496
err = row_sel_build_prev_vers(
1497
node->read_view, index, rec,
1498
&offsets, &heap, &plan->old_vers_heap,
1501
if (err != DB_SUCCESS) {
1503
goto lock_wait_or_error;
1506
if (old_vers == NULL) {
1507
offsets = rec_get_offsets(
1508
rec, index, offsets,
1509
ULINT_UNDEFINED, &heap);
1510
row_sel_fetch_columns(
1511
index, rec, offsets,
1515
if (!row_sel_test_end_conds(plan)) {
1517
goto table_exhausted;
1525
} else if (!lock_sec_rec_cons_read_sees(rec, index,
1527
cons_read_requires_clust_rec = TRUE;
1531
/* PHASE 4: Test search end conditions and deleted flag */
1533
/* Fetch the columns needed in test conditions */
1535
row_sel_fetch_columns(index, rec, offsets,
1536
UT_LIST_GET_FIRST(plan->columns));
1538
/* Test the selection end conditions: these can only contain columns
1539
which already are found in the index, even though the index might be
1542
if (plan->unique_search && cursor_just_opened) {
1544
/* No test necessary: the test was already made above */
1546
} else if (!row_sel_test_end_conds(plan)) {
1548
goto table_exhausted;
1551
if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))
1552
&& !cons_read_requires_clust_rec) {
1554
/* The record is delete marked: we can skip it if this is
1555
not a consistent read which might see an earlier version
1556
of a non-clustered index record */
1558
if (plan->unique_search) {
1560
goto table_exhausted;
1566
/* PHASE 5: Get the clustered index record, if needed and if we did
1567
not do the search using the clustered index */
1569
if (plan->must_get_clust || cons_read_requires_clust_rec) {
1571
/* It was a non-clustered index and we must fetch also the
1572
clustered index record */
1574
err = row_sel_get_clust_rec(node, plan, rec, thr, &clust_rec,
1576
mtr_has_extra_clust_latch = TRUE;
1578
if (err != DB_SUCCESS) {
1580
goto lock_wait_or_error;
1583
/* Retrieving the clustered record required a search:
1584
increment the cost counter */
1588
if (clust_rec == NULL) {
1589
/* The record did not exist in the read view */
1590
ut_ad(consistent_read);
1595
if (rec_get_deleted_flag(clust_rec,
1596
dict_table_is_comp(plan->table))) {
1598
/* The record is delete marked: we can skip it */
1603
if (node->can_get_updated) {
1605
btr_pcur_store_position(&(plan->clust_pcur), &mtr);
1609
/* PHASE 6: Test the rest of search conditions */
1611
if (!row_sel_test_other_conds(plan)) {
1613
if (plan->unique_search) {
1615
goto table_exhausted;
1621
/* PHASE 7: We found a new qualifying row for the current table; push
1622
the row if prefetch is on, or move to the next table in the join */
1624
plan->n_rows_fetched++;
1626
ut_ad(plan->pcur.latch_mode == node->latch_mode);
1628
if (node->select_will_do_update) {
1629
/* This is a searched update and we can do the update in-place,
1632
row_upd_in_place_in_select(node, thr, &mtr);
1634
leaf_contains_updates = TRUE;
1636
/* When the database is in the online backup mode, the number
1637
of log records for a single mtr should be small: increment the
1638
cost counter to ensure it */
1640
cost_counter += 1 + (SEL_COST_LIMIT / 8);
1642
if (plan->unique_search) {
1644
goto table_exhausted;
1650
if ((plan->n_rows_fetched <= SEL_PREFETCH_LIMIT)
1651
|| plan->unique_search || plan->no_prefetch
1652
|| plan->table->big_rows) {
1654
/* No prefetch in operation: go to the next table */
1659
sel_push_prefetched_row(plan);
1661
if (plan->n_rows_prefetched == SEL_MAX_N_PREFETCH) {
1663
/* The prefetch buffer is now full */
1665
sel_pop_prefetched_row(plan);
1671
ut_ad(!search_latch_locked);
1673
if (mtr_has_extra_clust_latch) {
1675
/* We must commit &mtr if we are moving to the next
1676
non-clustered index record, because we could break the
1677
latching order if we would access a different clustered
1678
index page right away without releasing the previous. */
1680
goto commit_mtr_for_a_while;
1683
if (leaf_contains_updates
1684
&& btr_pcur_is_after_last_on_page(&(plan->pcur), &mtr)) {
1686
/* We must commit &mtr if we are moving to a different page,
1687
because we have done updates to the x-latched leaf page, and
1688
the latch would be released in btr_pcur_move_to_next, without
1689
&mtr getting committed there */
1693
goto commit_mtr_for_a_while;
1697
moved = btr_pcur_move_to_next(&(plan->pcur), &mtr);
1699
moved = btr_pcur_move_to_prev(&(plan->pcur), &mtr);
1704
goto table_exhausted;
1707
cursor_just_opened = FALSE;
1709
/* END OF RECORD LOOP
1710
------------------ */
1714
/* We found a record which satisfies the conditions: we can move to
1715
the next table or return a row in the result set */
1717
ut_ad(btr_pcur_is_on_user_rec(&(plan->pcur), &mtr));
1719
if (plan->unique_search && !node->can_get_updated) {
1721
plan->cursor_at_end = TRUE;
1723
ut_ad(!search_latch_locked);
1725
plan->stored_cursor_rec_processed = TRUE;
1727
btr_pcur_store_position(&(plan->pcur), &mtr);
1732
leaf_contains_updates = FALSE;
1733
mtr_has_extra_clust_latch = FALSE;
1736
/* If we use 'goto' to this label, it means that the row was popped
1737
from the prefetched rows stack, and &mtr is already committed */
1739
if (node->fetch_table + 1 == node->n_tables) {
1741
sel_eval_select_list(node);
1743
if (node->is_aggregate) {
1748
sel_assign_into_var_values(node->into_list, node);
1750
thr->run_node = que_node_get_parent(node);
1752
if (search_latch_locked) {
1753
rw_lock_s_unlock(&btr_search_latch);
1760
node->fetch_table++;
1762
/* When we move to the next table, we first reset the plan cursor:
1763
we do not care about resetting it when we backtrack from a table */
1765
plan_reset_cursor(sel_node_get_nth_plan(node, node->fetch_table));
1770
/* The table cursor pcur reached the result set end: backtrack to the
1771
previous table in the join if we do not have cached prefetched rows */
1773
plan->cursor_at_end = TRUE;
1777
leaf_contains_updates = FALSE;
1778
mtr_has_extra_clust_latch = FALSE;
1780
if (plan->n_rows_prefetched > 0) {
1781
/* The table became exhausted during a prefetch */
1783
sel_pop_prefetched_row(plan);
1785
goto next_table_no_mtr;
1788
table_exhausted_no_mtr:
1789
if (node->fetch_table == 0) {
1792
if (node->is_aggregate && !node->aggregate_already_fetched) {
1794
node->aggregate_already_fetched = TRUE;
1796
sel_assign_into_var_values(node->into_list, node);
1798
thr->run_node = que_node_get_parent(node);
1800
if (search_latch_locked) {
1801
rw_lock_s_unlock(&btr_search_latch);
1807
node->state = SEL_NODE_NO_MORE_ROWS;
1809
thr->run_node = que_node_get_parent(node);
1811
if (search_latch_locked) {
1812
rw_lock_s_unlock(&btr_search_latch);
1818
node->fetch_table--;
1823
/* Return control for a while to que_run_threads, so that runaway
1824
queries can be canceled. NOTE that when we come here, we must, in a
1825
locking read, have placed the necessary (possibly waiting request)
1826
record lock on the cursor record or its successor: when we reposition
1827
the cursor, this record lock guarantees that nobody can meanwhile have
1828
inserted new records which should have appeared in the result set,
1829
which would result in the phantom problem. */
1831
ut_ad(!search_latch_locked);
1833
plan->stored_cursor_rec_processed = FALSE;
1834
btr_pcur_store_position(&(plan->pcur), &mtr);
1838
#ifdef UNIV_SYNC_DEBUG
1839
ut_ad(sync_thread_levels_empty_gen(TRUE));
1840
#endif /* UNIV_SYNC_DEBUG */
1844
commit_mtr_for_a_while:
1845
/* Stores the cursor position and commits &mtr; this is used if
1846
&mtr may contain latches which would break the latching order if
1847
&mtr would not be committed and the latches released. */
1849
plan->stored_cursor_rec_processed = TRUE;
1851
ut_ad(!search_latch_locked);
1852
btr_pcur_store_position(&(plan->pcur), &mtr);
1856
leaf_contains_updates = FALSE;
1857
mtr_has_extra_clust_latch = FALSE;
1859
#ifdef UNIV_SYNC_DEBUG
1860
ut_ad(sync_thread_levels_empty_gen(TRUE));
1861
#endif /* UNIV_SYNC_DEBUG */
1866
/* See the note at stop_for_a_while: the same holds for this case */
1868
ut_ad(!btr_pcur_is_before_first_on_page(&(plan->pcur), &mtr)
1870
ut_ad(!search_latch_locked);
1872
plan->stored_cursor_rec_processed = FALSE;
1873
btr_pcur_store_position(&(plan->pcur), &mtr);
1877
#ifdef UNIV_SYNC_DEBUG
1878
ut_ad(sync_thread_levels_empty_gen(TRUE));
1879
#endif /* UNIV_SYNC_DEBUG */
1882
if (UNIV_LIKELY_NULL(heap)) {
1883
mem_heap_free(heap);
1888
/**************************************************************************
1889
Performs a select step. This is a high-level function used in SQL execution
1895
/* out: query thread to run next or NULL */
1896
que_thr_t* thr) /* in: query thread */
1899
sym_node_t* table_node;
1905
node = thr->run_node;
1907
ut_ad(que_node_get_type(node) == QUE_NODE_SELECT);
1909
/* If this is a new time this node is executed (or when execution
1910
resumes after wait for a table intention lock), set intention locks
1911
on the tables, or assign a read view */
1913
if (node->into_list && (thr->prev_node == que_node_get_parent(node))) {
1915
node->state = SEL_NODE_OPEN;
1918
if (node->state == SEL_NODE_OPEN) {
1920
/* It may be that the current session has not yet started
1921
its transaction, or it has been committed: */
1923
trx_start_if_not_started(thr_get_trx(thr));
1925
plan_reset_cursor(sel_node_get_nth_plan(node, 0));
1927
if (node->consistent_read) {
1928
/* Assign a read view for the query */
1929
node->read_view = trx_assign_read_view(
1932
if (node->set_x_locks) {
1933
i_lock_mode = LOCK_IX;
1935
i_lock_mode = LOCK_IS;
1938
table_node = node->table_list;
1940
while (table_node) {
1941
err = lock_table(0, table_node->table,
1943
if (err != DB_SUCCESS) {
1944
thr_get_trx(thr)->error_state = err;
1949
table_node = que_node_get_next(table_node);
1953
/* If this is an explicit cursor, copy stored procedure
1954
variable values, so that the values cannot change between
1955
fetches (currently, we copy them also for non-explicit
1958
if (node->explicit_cursor
1959
&& UT_LIST_GET_FIRST(node->copy_variables)) {
1961
row_sel_copy_input_variable_vals(node);
1964
node->state = SEL_NODE_FETCH;
1965
node->fetch_table = 0;
1967
if (node->is_aggregate) {
1968
/* Reset the aggregate total values */
1969
sel_reset_aggregate_vals(node);
1973
err = row_sel(node, thr);
1975
/* NOTE! if queries are parallelized, the following assignment may
1976
have problems; the assignment should be made only if thr is the
1977
only top-level thr in the graph: */
1979
thr->graph->last_sel_node = node;
1981
if (err != DB_SUCCESS) {
1982
thr_get_trx(thr)->error_state = err;
1990
/**************************************************************************
1991
Performs a fetch for a cursor. */
1996
/* out: query thread to run next or NULL */
1997
que_thr_t* thr) /* in: query thread */
1999
sel_node_t* sel_node;
2004
node = thr->run_node;
2005
sel_node = node->cursor_def;
2007
ut_ad(que_node_get_type(node) == QUE_NODE_FETCH);
2009
if (thr->prev_node != que_node_get_parent(node)) {
2011
if (sel_node->state != SEL_NODE_NO_MORE_ROWS) {
2013
if (node->into_list) {
2014
sel_assign_into_var_values(node->into_list,
2017
void* ret = (*node->func->func)(
2018
sel_node, node->func->arg);
2022
= SEL_NODE_NO_MORE_ROWS;
2027
thr->run_node = que_node_get_parent(node);
2032
/* Make the fetch node the parent of the cursor definition for
2033
the time of the fetch, so that execution knows to return to this
2034
fetch node after a row has been selected or we know that there is
2037
sel_node->common.parent = node;
2039
if (sel_node->state == SEL_NODE_CLOSED) {
2041
"InnoDB: Error: fetch called on a closed cursor\n");
2043
thr_get_trx(thr)->error_state = DB_ERROR;
2048
thr->run_node = sel_node;
2053
/********************************************************************
2054
Sample callback function for fetch that prints each row.*/
2059
/* out: always returns non-NULL */
2060
void* row, /* in: sel_node_t* */
2061
void* user_arg) /* in: not used */
2063
sel_node_t* node = row;
2067
UT_NOT_USED(user_arg);
2069
fprintf(stderr, "row_fetch_print: row %p\n", row);
2071
exp = node->select_list;
2074
dfield_t* dfield = que_node_get_val(exp);
2075
dtype_t* type = dfield_get_type(dfield);
2077
fprintf(stderr, " column %lu:\n", (ulong)i);
2080
fprintf(stderr, "\n");
2082
if (dfield_get_len(dfield) != UNIV_SQL_NULL) {
2083
ut_print_buf(stderr, dfield_get_data(dfield),
2084
dfield_get_len(dfield));
2086
fprintf(stderr, " <NULL>;");
2089
fprintf(stderr, "\n");
2091
exp = que_node_get_next(exp);
2098
/********************************************************************
2099
Callback function for fetch that stores an unsigned 4 byte integer to the
2100
location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
2104
row_fetch_store_uint4(
2105
/*==================*/
2106
/* out: always returns NULL */
2107
void* row, /* in: sel_node_t* */
2108
void* user_arg) /* in: data pointer */
2110
sel_node_t* node = row;
2111
ib_uint32_t* val = user_arg;
2114
dfield_t* dfield = que_node_get_val(node->select_list);
2115
dtype_t* type = dfield_get_type(dfield);
2116
ulint len = dfield_get_len(dfield);
2118
ut_a(dtype_get_mtype(type) == DATA_INT);
2119
ut_a(dtype_get_prtype(type) & DATA_UNSIGNED);
2122
tmp = mach_read_from_4(dfield_get_data(dfield));
2123
*val = (ib_uint32_t) tmp;
2128
/***************************************************************
2129
Prints a row in a select result. */
2134
/* out: query thread to run next or NULL */
2135
que_thr_t* thr) /* in: query thread */
2137
row_printf_node_t* node;
2138
sel_node_t* sel_node;
2143
node = thr->run_node;
2145
sel_node = node->sel_node;
2147
ut_ad(que_node_get_type(node) == QUE_NODE_ROW_PRINTF);
2149
if (thr->prev_node == que_node_get_parent(node)) {
2151
/* Reset the cursor */
2152
sel_node->state = SEL_NODE_OPEN;
2154
/* Fetch next row to print */
2156
thr->run_node = sel_node;
2161
if (sel_node->state != SEL_NODE_FETCH) {
2163
ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
2165
/* No more rows to print */
2167
thr->run_node = que_node_get_parent(node);
2172
arg = sel_node->select_list;
2175
dfield_print_also_hex(que_node_get_val(arg));
2177
fputs(" ::: ", stderr);
2179
arg = que_node_get_next(arg);
2184
/* Fetch next row to print */
2186
thr->run_node = sel_node;
2191
/********************************************************************
2192
Converts a key value stored in MySQL format to an Innobase dtuple. The last
2193
field of the key value may be just a prefix of a fixed length field: hence
2194
the parameter key_len. But currently we do not allow search keys where the
2195
last field is only a prefix of the full key field len and print a warning if
2196
such appears. A counterpart of this function is
2197
ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
2200
row_sel_convert_mysql_key_to_innobase(
2201
/*==================================*/
2202
dtuple_t* tuple, /* in: tuple where to build;
2203
NOTE: we assume that the type info
2204
in the tuple is already according
2206
byte* buf, /* in: buffer to use in field
2208
ulint buf_len, /* in: buffer length */
2209
dict_index_t* index, /* in: index of the key value */
2210
byte* key_ptr, /* in: MySQL key value */
2211
ulint key_len, /* in: MySQL key value length */
2212
trx_t* trx) /* in: transaction */
2214
byte* original_buf = buf;
2215
byte* original_key_ptr = key_ptr;
2216
dict_field_t* field;
2220
ulint data_field_len;
2226
/* For documentation of the key value storage format in MySQL, see
2227
ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
2229
key_end = key_ptr + key_len;
2231
/* Permit us to access any field in the tuple (ULINT_MAX): */
2233
dtuple_set_n_fields(tuple, ULINT_MAX);
2235
dfield = dtuple_get_nth_field(tuple, 0);
2236
field = dict_index_get_nth_field(index, 0);
2238
if (dfield_get_type(dfield)->mtype == DATA_SYS) {
2239
/* A special case: we are looking for a position in the
2240
generated clustered index which InnoDB automatically added
2241
to a table with no primary key: the first and the only
2242
ordering column is ROW_ID which InnoDB stored to the key_ptr
2245
ut_a(key_len == DATA_ROW_ID_LEN);
2247
dfield_set_data(dfield, key_ptr, DATA_ROW_ID_LEN);
2249
dtuple_set_n_fields(tuple, 1);
2254
while (key_ptr < key_end) {
2256
ut_a(field->col->mtype == dfield_get_type(dfield)->mtype);
2261
if (!(dfield_get_type(dfield)->prtype & DATA_NOT_NULL)) {
2262
/* The first byte in the field tells if this is
2263
an SQL NULL value */
2267
if (*key_ptr != 0) {
2268
dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
2274
type = dfield_get_type(dfield)->mtype;
2276
/* Calculate data length and data field total length */
2278
if (type == DATA_BLOB) {
2279
/* The key field is a column prefix of a BLOB or
2282
ut_a(field->prefix_len > 0);
2284
/* MySQL stores the actual data length to the first 2
2285
bytes after the optional SQL NULL marker byte. The
2286
storage format is little-endian, that is, the most
2287
significant byte at a higher address. In UTF-8, MySQL
2288
seems to reserve field->prefix_len bytes for
2289
storing this field in the key value buffer, even
2290
though the actual value only takes data_len bytes
2293
data_len = key_ptr[data_offset]
2294
+ 256 * key_ptr[data_offset + 1];
2295
data_field_len = data_offset + 2 + field->prefix_len;
2299
/* Now that we know the length, we store the column
2300
value like it would be a fixed char field */
2302
} else if (field->prefix_len > 0) {
2303
/* Looks like MySQL pads unused end bytes in the
2304
prefix with space. Therefore, also in UTF-8, it is ok
2305
to compare with a prefix containing full prefix_len
2306
bytes, and no need to take at most prefix_len / 3
2307
UTF-8 characters from the start.
2308
If the prefix is used as the upper end of a LIKE
2309
'abc%' query, then MySQL pads the end with chars
2310
0xff. TODO: in that case does it any harm to compare
2311
with the full prefix_len bytes. How do characters
2312
0xff in UTF-8 behave? */
2314
data_len = field->prefix_len;
2315
data_field_len = data_offset + data_len;
2317
data_len = dfield_get_type(dfield)->len;
2318
data_field_len = data_offset + data_len;
2321
if (dtype_get_mysql_type(dfield_get_type(dfield))
2322
== DATA_MYSQL_TRUE_VARCHAR
2323
&& dfield_get_type(dfield)->mtype != DATA_INT) {
2324
/* In a MySQL key value format, a true VARCHAR is
2325
always preceded by 2 bytes of a length field.
2326
dfield_get_type(dfield)->len returns the maximum
2327
'payload' len in bytes. That does not include the
2328
2 bytes that tell the actual data length.
2330
We added the check != DATA_INT to make sure we do
2331
not treat MySQL ENUM or SET as a true VARCHAR! */
2334
data_field_len += 2;
2337
/* Storing may use at most data_len bytes of buf */
2340
row_mysql_store_col_in_innobase_format(
2342
FALSE, /* MySQL key value format col */
2343
key_ptr + data_offset, data_len,
2344
dict_table_is_comp(index->table));
2348
key_ptr += data_field_len;
2350
if (key_ptr > key_end) {
2351
/* The last field in key was not a complete key field
2354
Print a warning about this! HA_READ_PREFIX_LAST does
2355
not currently work in InnoDB with partial-field key
2356
value prefixes. Since MySQL currently uses a padding
2357
trick to calculate LIKE 'abc%' type queries there
2358
should never be partial-field prefixes in searches. */
2360
ut_print_timestamp(stderr);
2362
fputs(" InnoDB: Warning: using a partial-field"
2363
" key prefix in search.\n"
2364
"InnoDB: ", stderr);
2365
dict_index_name_print(stderr, trx, index);
2366
fprintf(stderr, ". Last data field length %lu bytes,\n"
2367
"InnoDB: key ptr now exceeds"
2368
" key end by %lu bytes.\n"
2369
"InnoDB: Key value in the MySQL format:\n",
2370
(ulong) data_field_len,
2371
(ulong) (key_ptr - key_end));
2373
ut_print_buf(stderr, original_key_ptr, key_len);
2374
fprintf(stderr, "\n");
2377
dfield->len -= (ulint)(key_ptr - key_end);
2386
ut_a(buf <= original_buf + buf_len);
2388
/* We set the length of tuple to n_fields: we assume that the memory
2389
area allocated for it is big enough (usually bigger than n_fields). */
2391
dtuple_set_n_fields(tuple, n_fields);
2394
/******************************************************************
2395
Stores the row id to the prebuilt struct. */
2398
row_sel_store_row_id_to_prebuilt(
2399
/*=============================*/
2400
row_prebuilt_t* prebuilt, /* in: prebuilt */
2401
rec_t* index_rec, /* in: record */
2402
dict_index_t* index, /* in: index of the record */
2403
const ulint* offsets) /* in: rec_get_offsets
2404
(index_rec, index) */
2409
ut_ad(rec_offs_validate(index_rec, index, offsets));
2411
data = rec_get_nth_field(
2413
dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
2415
if (len != DATA_ROW_ID_LEN) {
2417
"InnoDB: Error: Row id field is"
2418
" wrong length %lu in ", (ulong) len);
2419
dict_index_name_print(stderr, prebuilt->trx, index);
2420
fprintf(stderr, "\n"
2421
"InnoDB: Field number %lu, record:\n",
2422
(ulong) dict_index_get_sys_col_pos(index,
2424
rec_print_new(stderr, index_rec, offsets);
2429
ut_memcpy(prebuilt->row_id, data, len);
2432
/******************************************************************
2433
Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
2434
function is row_mysql_store_col_in_innobase_format() in row0mysql.c. */
2437
row_sel_field_store_in_mysql_format(
2438
/*================================*/
2439
byte* dest, /* in/out: buffer where to store; NOTE that BLOBs
2440
are not in themselves stored here: the caller must
2441
allocate and copy the BLOB into buffer before, and pass
2442
the pointer to the BLOB in 'data' */
2443
const mysql_row_templ_t* templ, /* in: MySQL column template.
2444
Its following fields are referenced:
2445
type, is_unsigned, mysql_col_len, mbminlen, mbmaxlen */
2446
byte* data, /* in: data to store */
2447
ulint len) /* in: length of the data */
2453
ut_ad(len != UNIV_SQL_NULL);
2455
if (templ->type == DATA_INT) {
2456
/* Convert integer data from Innobase to a little-endian
2457
format, sign bit restored to normal */
2470
if (!templ->is_unsigned) {
2471
dest[len - 1] = (byte) (dest[len - 1] ^ 128);
2474
ut_ad(templ->mysql_col_len == len);
2475
} else if (templ->type == DATA_VARCHAR
2476
|| templ->type == DATA_VARMYSQL
2477
|| templ->type == DATA_BINARY) {
2479
field_end = dest + templ->mysql_col_len;
2481
if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
2482
/* This is a >= 5.0.3 type true VARCHAR. Store the
2483
length of the data to the first byte or the first
2484
two bytes of dest. */
2486
dest = row_mysql_store_true_var_len(
2487
dest, len, templ->mysql_length_bytes);
2490
/* Copy the actual data */
2491
ut_memcpy(dest, data, len);
2493
/* Pad with trailing spaces. We pad with spaces also the
2494
unused end of a >= 5.0.3 true VARCHAR column, just in case
2495
MySQL expects its contents to be deterministic. */
2497
pad_ptr = dest + len;
2499
ut_ad(templ->mbminlen <= templ->mbmaxlen);
2501
/* We handle UCS2 charset strings differently. */
2502
if (templ->mbminlen == 2) {
2503
/* A space char is two bytes, 0x0020 in UCS2 */
2506
/* A 0x20 has been stripped from the column.
2509
if (pad_ptr < field_end) {
2515
/* Pad the rest of the string with 0x0020 */
2517
while (pad_ptr < field_end) {
2524
ut_ad(templ->mbminlen == 1);
2527
memset(pad_ptr, 0x20, field_end - pad_ptr);
2529
} else if (templ->type == DATA_BLOB) {
2530
/* Store a pointer to the BLOB buffer to dest: the BLOB was
2531
already copied to the buffer in row_sel_store_mysql_rec */
2533
row_mysql_store_blob_ref(dest, templ->mysql_col_len, data,
2535
} else if (templ->type == DATA_MYSQL) {
2536
memcpy(dest, data, len);
2538
ut_ad(templ->mysql_col_len >= len);
2539
ut_ad(templ->mbmaxlen >= templ->mbminlen);
2541
ut_ad(templ->mbmaxlen > templ->mbminlen
2542
|| templ->mysql_col_len == len);
2543
/* The following assertion would fail for old tables
2544
containing UTF-8 ENUM columns due to Bug #9526. */
2545
ut_ad(!templ->mbmaxlen
2546
|| !(templ->mysql_col_len % templ->mbmaxlen));
2547
ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len);
2549
if (templ->mbminlen != templ->mbmaxlen) {
2550
/* Pad with spaces. This undoes the stripping
2551
done in row0mysql.ic, function
2552
row_mysql_store_col_in_innobase_format(). */
2554
memset(dest + len, 0x20, templ->mysql_col_len - len);
2557
ut_ad(templ->type == DATA_CHAR
2558
|| templ->type == DATA_FIXBINARY
2559
/*|| templ->type == DATA_SYS_CHILD
2560
|| templ->type == DATA_SYS*/
2561
|| templ->type == DATA_FLOAT
2562
|| templ->type == DATA_DOUBLE
2563
|| templ->type == DATA_DECIMAL);
2564
ut_ad(templ->mysql_col_len == len);
2566
memcpy(dest, data, len);
2570
/******************************************************************
2571
Convert a row in the Innobase format to a row in the MySQL format.
2572
Note that the template in prebuilt may advise us to copy only a few
2573
columns to mysql_rec, other columns are left blank. All columns may not
2574
be needed in the query. */
2577
row_sel_store_mysql_rec(
2578
/*====================*/
2579
/* out: TRUE if success, FALSE if
2580
could not allocate memory for a BLOB
2581
(though we may also assert in that
2583
byte* mysql_rec, /* out: row in the MySQL format */
2584
row_prebuilt_t* prebuilt, /* in: prebuilt struct */
2585
rec_t* rec, /* in: Innobase record in the index
2586
which was described in prebuilt's
2588
const ulint* offsets, /* in: array returned by
2589
rec_get_offsets() */
2590
ulint start_field_no,
2593
mysql_row_templ_t* templ;
2594
mem_heap_t* extern_field_heap = NULL;
2600
ut_ad(prebuilt->mysql_template);
2601
ut_ad(rec_offs_validate(rec, NULL, offsets));
2603
if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
2604
mem_heap_free(prebuilt->blob_heap);
2605
prebuilt->blob_heap = NULL;
2608
for (i = start_field_no; i < end_field_no /* prebuilt->n_template */ ; i++) {
2610
templ = prebuilt->mysql_template + i;
2612
if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
2613
templ->rec_field_no))) {
2615
/* Copy an externally stored field to the temporary
2618
ut_a(!prebuilt->trx->has_search_latch);
2620
if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) {
2621
if (prebuilt->blob_heap == NULL) {
2622
prebuilt->blob_heap = mem_heap_create(
2626
heap = prebuilt->blob_heap;
2629
= mem_heap_create(UNIV_PAGE_SIZE);
2631
heap = extern_field_heap;
2634
/* NOTE: if we are retrieving a big BLOB, we may
2635
already run out of memory in the next call, which
2638
data = btr_rec_copy_externally_stored_field(
2639
rec, offsets, templ->rec_field_no,
2642
ut_a(len != UNIV_SQL_NULL);
2644
/* Field is stored in the row. */
2646
data = rec_get_nth_field(rec, offsets,
2647
templ->rec_field_no, &len);
2649
if (UNIV_UNLIKELY(templ->type == DATA_BLOB)
2650
&& len != UNIV_SQL_NULL) {
2652
/* It is a BLOB field locally stored in the
2653
InnoDB record: we MUST copy its contents to
2654
prebuilt->blob_heap here because later code
2655
assumes all BLOB values have been copied to a
2658
if (prebuilt->blob_heap == NULL) {
2659
prebuilt->blob_heap = mem_heap_create(
2663
data = memcpy(mem_heap_alloc(
2664
prebuilt->blob_heap, len),
2669
if (len != UNIV_SQL_NULL) {
2670
row_sel_field_store_in_mysql_format(
2671
mysql_rec + templ->mysql_col_offset,
2675
if (extern_field_heap) {
2676
mem_heap_free(extern_field_heap);
2677
extern_field_heap = NULL;
2680
if (templ->mysql_null_bit_mask) {
2681
/* It is a nullable column with a non-NULL
2683
mysql_rec[templ->mysql_null_byte_offset]
2684
&= ~(byte) templ->mysql_null_bit_mask;
2687
/* MySQL seems to assume the field for an SQL NULL
2688
value is set to zero or space. Not taking this into
2689
account caused seg faults with NULL BLOB fields, and
2690
bug number 154 in the MySQL bug database: GROUP BY
2691
and DISTINCT could treat NULL values inequal. */
2694
mysql_rec[templ->mysql_null_byte_offset]
2695
|= (byte) templ->mysql_null_bit_mask;
2696
switch (templ->type) {
2700
if (templ->mysql_type
2701
== DATA_MYSQL_TRUE_VARCHAR) {
2702
/* This is a >= 5.0.3 type
2703
true VARCHAR. Zero the field. */
2709
case DATA_FIXBINARY:
2711
/* MySQL pads all string types (except
2712
BLOB, TEXT and true VARCHAR) with space. */
2713
if (UNIV_UNLIKELY(templ->mbminlen == 2)) {
2714
/* Treat UCS2 as a special case. */
2716
+ templ->mysql_col_offset;
2717
len = templ->mysql_col_len;
2718
/* There are two UCS2 bytes per char,
2719
so the length has to be even. */
2721
/* Pad with 0x0020. */
2736
ut_ad(!pad_char || templ->mbminlen == 1);
2737
memset(mysql_rec + templ->mysql_col_offset,
2738
pad_char, templ->mysql_col_len);
2745
/*************************************************************************
2746
Builds a previous version of a clustered index record for a consistent read */
2749
row_sel_build_prev_vers_for_mysql(
2750
/*==============================*/
2751
/* out: DB_SUCCESS or error code */
2752
read_view_t* read_view, /* in: read view */
2753
dict_index_t* clust_index, /* in: clustered index */
2754
row_prebuilt_t* prebuilt, /* in: prebuilt struct */
2755
rec_t* rec, /* in: record in a clustered index */
2756
ulint** offsets, /* in/out: offsets returned by
2757
rec_get_offsets(rec, clust_index) */
2758
mem_heap_t** offset_heap, /* in/out: memory heap from which
2759
the offsets are allocated */
2760
rec_t** old_vers, /* out: old version, or NULL if the
2761
record does not exist in the view:
2762
i.e., it was freshly inserted
2764
mtr_t* mtr) /* in: mtr */
2768
if (prebuilt->old_vers_heap) {
2769
mem_heap_empty(prebuilt->old_vers_heap);
2771
prebuilt->old_vers_heap = mem_heap_create(200);
2774
err = row_vers_build_for_consistent_read(
2775
rec, mtr, clust_index, offsets, read_view, offset_heap,
2776
prebuilt->old_vers_heap, old_vers);
2780
/*************************************************************************
2781
Retrieves the clustered index record corresponding to a record in a
2782
non-clustered index. Does the necessary locking. Used in the MySQL
2786
row_sel_get_clust_rec_for_mysql(
2787
/*============================*/
2788
/* out: DB_SUCCESS or error code */
2789
row_prebuilt_t* prebuilt,/* in: prebuilt struct in the handle */
2790
dict_index_t* sec_index,/* in: secondary index where rec resides */
2791
rec_t* rec, /* in: record in a non-clustered index; if
2792
this is a locking read, then rec is not
2793
allowed to be delete-marked, and that would
2794
not make sense either */
2795
que_thr_t* thr, /* in: query thread */
2796
rec_t** out_rec,/* out: clustered record or an old version of
2797
it, NULL if the old version did not exist
2798
in the read view, i.e., it was a fresh
2800
ulint** offsets,/* out: offsets returned by
2801
rec_get_offsets(out_rec, clust_index) */
2802
mem_heap_t** offset_heap,/* in/out: memory heap from which
2803
the offsets are allocated */
2804
mtr_t* mtr) /* in: mtr used to get access to the
2805
non-clustered record; the same mtr is used to
2806
access the clustered index */
2808
dict_index_t* clust_index;
2815
trx = thr_get_trx(thr);
2817
row_build_row_ref_in_tuple(prebuilt->clust_ref, sec_index, rec, trx);
2819
clust_index = dict_table_get_first_index(sec_index->table);
2821
btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref,
2822
PAGE_CUR_LE, BTR_SEARCH_LEAF,
2823
prebuilt->clust_pcur, 0, mtr);
2825
clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur);
2827
prebuilt->clust_pcur->trx_if_known = trx;
2829
/* Note: only if the search ends up on a non-infimum record is the
2830
low_match value the real match to the search tuple */
2832
if (!page_rec_is_user_rec(clust_rec)
2833
|| btr_pcur_get_low_match(prebuilt->clust_pcur)
2834
< dict_index_get_n_unique(clust_index)) {
2836
/* In a rare case it is possible that no clust rec is found
2837
for a delete-marked secondary index record: if in row0umod.c
2838
in row_undo_mod_remove_clust_low() we have already removed
2839
the clust rec, while purge is still cleaning and removing
2840
secondary index records associated with earlier versions of
2841
the clustered index record. In that case we know that the
2842
clustered index record did not exist in the read view of
2845
if (!rec_get_deleted_flag(rec,
2846
dict_table_is_comp(sec_index->table))
2847
|| prebuilt->select_lock_type != LOCK_NONE) {
2848
ut_print_timestamp(stderr);
2849
fputs(" InnoDB: error clustered record"
2850
" for sec rec not found\n"
2851
"InnoDB: ", stderr);
2852
dict_index_name_print(stderr, trx, sec_index);
2854
"InnoDB: sec index record ", stderr);
2855
rec_print(stderr, rec, sec_index);
2857
"InnoDB: clust index record ", stderr);
2858
rec_print(stderr, clust_rec, clust_index);
2860
trx_print(stderr, trx, 600);
2863
"InnoDB: Submit a detailed bug report"
2864
" to http://bugs.mysql.com\n", stderr);
2872
*offsets = rec_get_offsets(clust_rec, clust_index, *offsets,
2873
ULINT_UNDEFINED, offset_heap);
2875
if (prebuilt->select_lock_type != LOCK_NONE) {
2876
/* Try to place a lock on the index record; we are searching
2877
the clust rec with a unique condition, hence
2878
we set a LOCK_REC_NOT_GAP type lock */
2880
err = lock_clust_rec_read_check_and_lock(
2881
0, clust_rec, clust_index, *offsets,
2882
prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr);
2883
if (err != DB_SUCCESS) {
2888
/* This is a non-locking consistent read: if necessary, fetch
2889
a previous version of the record */
2893
/* If the isolation level allows reading of uncommitted data,
2894
then we never look for an earlier version */
2896
if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
2897
&& !lock_clust_rec_cons_read_sees(
2898
clust_rec, clust_index, *offsets,
2901
/* The following call returns 'offsets' associated with
2903
err = row_sel_build_prev_vers_for_mysql(
2904
trx->read_view, clust_index, prebuilt,
2905
clust_rec, offsets, offset_heap, &old_vers,
2908
if (err != DB_SUCCESS) {
2913
clust_rec = old_vers;
2916
/* If we had to go to an earlier version of row or the
2917
secondary index record is delete marked, then it may be that
2918
the secondary index record corresponding to clust_rec
2919
(or old_vers) is not rec; in that case we must ignore
2920
such row because in our snapshot rec would not have existed.
2921
Remember that from rec we cannot see directly which transaction
2922
id corresponds to it: we have to go to the clustered index
2923
record. A query where we want to fetch all rows where
2924
the secondary index value is in some interval would return
2925
a wrong result if we would not drop rows which we come to
2926
visit through secondary index records that would not really
2927
exist in our snapshot. */
2929
if (clust_rec && (old_vers || rec_get_deleted_flag(
2933
&& !row_sel_sec_rec_is_for_clust_rec(
2934
rec, sec_index, clust_rec, clust_index)) {
2937
#ifdef UNIV_SEARCH_DEBUG
2938
ut_a(clust_rec == NULL
2939
|| row_sel_sec_rec_is_for_clust_rec(
2940
rec, sec_index, clust_rec, clust_index));
2946
*out_rec = clust_rec;
2948
if (prebuilt->select_lock_type == LOCK_X) {
2949
/* We may use the cursor in update: store its position */
2951
btr_pcur_store_position(prebuilt->clust_pcur, mtr);
2959
/************************************************************************
2960
Restores cursor position after it has been stored. We have to take into
2961
account that the record cursor was positioned on may have been deleted.
2962
Then we may have to move the cursor one step up or down. */
2965
sel_restore_position_for_mysql(
2966
/*===========================*/
2967
/* out: TRUE if we may need to
2968
process the record the cursor is
2969
now positioned on (i.e. we should
2970
not go to the next record yet) */
2971
ibool* same_user_rec, /* out: TRUE if we were able to restore
2972
the cursor on a user record with the
2973
same ordering prefix in in the
2975
ulint latch_mode, /* in: latch mode wished in
2977
btr_pcur_t* pcur, /* in: cursor whose position
2979
ibool moves_up, /* in: TRUE if the cursor moves up
2981
mtr_t* mtr) /* in: mtr; CAUTION: may commit
2985
ulint relative_position;
2987
relative_position = pcur->rel_pos;
2989
success = btr_pcur_restore_position(latch_mode, pcur, mtr);
2991
*same_user_rec = success;
2993
if (relative_position == BTR_PCUR_ON) {
2999
btr_pcur_move_to_next(pcur, mtr);
3005
if (relative_position == BTR_PCUR_AFTER
3006
|| relative_position == BTR_PCUR_AFTER_LAST_IN_TREE) {
3012
if (btr_pcur_is_on_user_rec(pcur, mtr)) {
3013
btr_pcur_move_to_prev(pcur, mtr);
3019
ut_ad(relative_position == BTR_PCUR_BEFORE
3020
|| relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE);
3022
if (moves_up && btr_pcur_is_on_user_rec(pcur, mtr)) {
3023
btr_pcur_move_to_next(pcur, mtr);
3029
/************************************************************************
3030
Pops a cached row for MySQL from the fetch cache. */
3033
row_sel_pop_cached_row_for_mysql(
3034
/*=============================*/
3035
byte* buf, /* in/out: buffer where to copy the
3037
row_prebuilt_t* prebuilt) /* in: prebuilt struct */
3040
mysql_row_templ_t* templ;
3042
ut_ad(prebuilt->n_fetch_cached > 0);
3043
ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len);
3045
if (UNIV_UNLIKELY(prebuilt->keep_other_fields_on_keyread)) {
3046
/* Copy cache record field by field, don't touch fields that
3047
are not covered by current key */
3048
cached_rec = prebuilt->fetch_cache[
3049
prebuilt->fetch_cache_first];
3051
for (i = 0; i < prebuilt->n_template; i++) {
3052
templ = prebuilt->mysql_template + i;
3053
ut_memcpy(buf + templ->mysql_col_offset,
3054
cached_rec + templ->mysql_col_offset,
3055
templ->mysql_col_len);
3056
/* Copy NULL bit of the current field from cached_rec
3058
if (templ->mysql_null_bit_mask) {
3059
buf[templ->mysql_null_byte_offset]
3060
^= (buf[templ->mysql_null_byte_offset]
3061
^ cached_rec[templ->mysql_null_byte_offset])
3062
& (byte)templ->mysql_null_bit_mask;
3068
prebuilt->fetch_cache[prebuilt->fetch_cache_first],
3069
prebuilt->mysql_prefix_len);
3071
prebuilt->n_fetch_cached--;
3072
prebuilt->fetch_cache_first++;
3074
if (prebuilt->n_fetch_cached == 0) {
3075
prebuilt->fetch_cache_first = 0;
3079
/************************************************************************
3080
Pushes a row for MySQL to the fetch cache. */
3083
row_sel_push_cache_row_for_mysql(
3084
/*=============================*/
3085
row_prebuilt_t* prebuilt, /* in: prebuilt struct */
3086
rec_t* rec, /* in: record to push */
3087
const ulint* offsets, /* in: rec_get_offsets() */
3088
ulint start_field_no, /* psergey: start from this field */
3089
byte* remainder_buf) /* if above !=0 -> where to take prev fields */
3094
ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE);
3095
ut_ad(rec_offs_validate(rec, NULL, offsets));
3096
ut_a(!prebuilt->templ_contains_blob);
3098
if (prebuilt->fetch_cache[0] == NULL) {
3099
/* Allocate memory for the fetch cache */
3101
for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
3103
/* A user has reported memory corruption in these
3104
buffers in Linux. Put magic numbers there to help
3105
to track a possible bug. */
3107
buf = mem_alloc(prebuilt->mysql_row_len + 8);
3109
prebuilt->fetch_cache[i] = buf + 4;
3111
mach_write_to_4(buf, ROW_PREBUILT_FETCH_MAGIC_N);
3112
mach_write_to_4(buf + 4 + prebuilt->mysql_row_len,
3113
ROW_PREBUILT_FETCH_MAGIC_N);
3117
ut_ad(prebuilt->fetch_cache_first == 0);
3119
if (UNIV_UNLIKELY(!row_sel_store_mysql_rec(
3120
prebuilt->fetch_cache[
3121
prebuilt->n_fetch_cached],
3122
prebuilt, rec, offsets, start_field_no,
3123
prebuilt->n_template))) {
3126
if (start_field_no) {
3127
for (i=0; i < start_field_no; i++) {
3128
register ulint offs;
3129
mysql_row_templ_t* templ;
3130
templ = prebuilt->mysql_template + i;
3132
if (templ->mysql_null_bit_mask) {
3133
offs= templ->mysql_null_byte_offset;
3134
*(prebuilt->fetch_cache[prebuilt->n_fetch_cached] + offs) ^=
3135
(*(remainder_buf + offs) & templ->mysql_null_bit_mask);
3137
offs= templ->mysql_col_offset;
3138
memcpy(prebuilt->fetch_cache[prebuilt->n_fetch_cached] + offs,
3139
remainder_buf + offs,
3140
templ->mysql_col_len);
3144
prebuilt->n_fetch_cached++;
3147
/*************************************************************************
3148
Tries to do a shortcut to fetch a clustered index record with a unique key,
3149
using the hash index if possible (not always). We assume that the search
3150
mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx,
3151
btr search latch has been locked in S-mode. */
3154
row_sel_try_search_shortcut_for_mysql(
3155
/*==================================*/
3156
/* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
3157
rec_t** out_rec,/* out: record if found */
3158
row_prebuilt_t* prebuilt,/* in: prebuilt struct */
3159
ulint** offsets,/* in/out: for rec_get_offsets(*out_rec) */
3160
mem_heap_t** heap, /* in/out: heap for rec_get_offsets() */
3161
mtr_t* mtr) /* in: started mtr */
3163
dict_index_t* index = prebuilt->index;
3164
dtuple_t* search_tuple = prebuilt->search_tuple;
3165
btr_pcur_t* pcur = prebuilt->pcur;
3166
trx_t* trx = prebuilt->trx;
3169
ut_ad(index->type & DICT_CLUSTERED);
3170
ut_ad(!prebuilt->templ_contains_blob);
3172
btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
3173
BTR_SEARCH_LEAF, pcur,
3174
#ifndef UNIV_SEARCH_DEBUG
3180
rec = btr_pcur_get_rec(pcur);
3182
if (!page_rec_is_user_rec(rec)) {
3187
/* As the cursor is now placed on a user record after a search with
3188
the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
3189
fields in the user record matched to the search tuple */
3191
if (btr_pcur_get_up_match(pcur) < dtuple_get_n_fields(search_tuple)) {
3193
return(SEL_EXHAUSTED);
3196
/* This is a non-locking consistent read: if necessary, fetch
3197
a previous version of the record */
3199
*offsets = rec_get_offsets(rec, index, *offsets,
3200
ULINT_UNDEFINED, heap);
3202
if (!lock_clust_rec_cons_read_sees(rec, index,
3203
*offsets, trx->read_view)) {
3208
if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))) {
3210
return(SEL_EXHAUSTED);
3218
/************************************************************************
3219
Searches for rows in the database. This is used in the interface to
3220
MySQL. This function opens a cursor, and also implements fetch next
3221
and fetch prev. NOTE that if we do a search with a full key value
3222
from a unique index (ROW_SEL_EXACT), then we will not store the cursor
3223
position and fetch next or fetch prev must not be tried to the cursor! */
3226
row_search_for_mysql(
3227
/*=================*/
3229
DB_RECORD_NOT_FOUND,
3230
DB_END_OF_INDEX, DB_DEADLOCK,
3231
DB_LOCK_TABLE_FULL, DB_CORRUPTION,
3232
or DB_TOO_BIG_RECORD */
3233
byte* buf, /* in/out: buffer for the fetched
3234
row in the MySQL format */
3235
ulint mode, /* in: search mode PAGE_CUR_L, ... */
3236
row_prebuilt_t* prebuilt, /* in: prebuilt struct for the
3237
table handle; this contains the info
3238
of search_tuple, index; if search
3239
tuple contains 0 fields then we
3240
position the cursor at the start or
3241
the end of the index, depending on
3243
ulint match_mode, /* in: 0 or ROW_SEL_EXACT or
3244
ROW_SEL_EXACT_PREFIX */
3245
ulint direction) /* in: 0 or ROW_SEL_NEXT or
3246
ROW_SEL_PREV; NOTE: if this is != 0,
3247
then prebuilt must have a pcur
3248
with stored position! In opening of a
3249
cursor 'direction' should be 0. */
3251
dict_index_t* index = prebuilt->index;
3252
ibool comp = dict_table_is_comp(index->table);
3253
dtuple_t* search_tuple = prebuilt->search_tuple;
3254
btr_pcur_t* pcur = prebuilt->pcur;
3255
trx_t* trx = prebuilt->trx;
3256
dict_index_t* clust_index;
3261
ulint err = DB_SUCCESS;
3262
ibool unique_search = FALSE;
3263
ibool unique_search_from_clust_index = FALSE;
3264
ibool mtr_has_extra_clust_latch = FALSE;
3265
ibool moves_up = FALSE;
3266
ibool set_also_gap_locks = TRUE;
3267
/* if the query is a plain locking SELECT, and the isolation level
3268
is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */
3269
ibool did_semi_consistent_read = FALSE;
3270
/* if the returned record was locked and we did a semi-consistent
3271
read (fetch the newest committed version), then this is set to
3273
#ifdef UNIV_SEARCH_DEBUG
3275
#endif /* UNIV_SEARCH_DEBUG */
3277
ibool same_user_rec;
3279
mem_heap_t* heap = NULL;
3280
ulint offsets_[REC_OFFS_NORMAL_SIZE];
3281
ulint* offsets = offsets_;
3282
ibool some_fields_in_buffer;
3283
ibool get_clust_rec= 0;
3285
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
3287
ut_ad(index && pcur && search_tuple);
3288
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
3290
if (UNIV_UNLIKELY(prebuilt->table->ibd_file_missing)) {
3291
ut_print_timestamp(stderr);
3292
fprintf(stderr, " InnoDB: Error:\n"
3293
"InnoDB: MySQL is trying to use a table handle"
3294
" but the .ibd file for\n"
3295
"InnoDB: table %s does not exist.\n"
3296
"InnoDB: Have you deleted the .ibd file"
3297
" from the database directory under\n"
3298
"InnoDB: the MySQL datadir, or have you used"
3299
" DISCARD TABLESPACE?\n"
3300
"InnoDB: Look from\n"
3301
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
3302
"innodb-troubleshooting.html\n"
3303
"InnoDB: how you can resolve the problem.\n",
3304
prebuilt->table->name);
3309
if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
3311
"InnoDB: Error: trying to free a corrupt\n"
3312
"InnoDB: table handle. Magic n %lu, table name ",
3313
(ulong) prebuilt->magic_n);
3314
ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
3317
mem_analyze_corruption(prebuilt);
3323
/* August 19, 2005 by Heikki: temporarily disable this error
3324
print until the cursor lock count is done correctly.
3325
See bugs #12263 and #12456!*/
3327
if (trx->n_mysql_tables_in_use == 0
3328
&& UNIV_UNLIKELY(prebuilt->select_lock_type == LOCK_NONE)) {
3329
/* Note that if MySQL uses an InnoDB temp table that it
3330
created inside LOCK TABLES, then n_mysql_tables_in_use can
3331
be zero; in that case select_lock_type is set to LOCK_X in
3334
fputs("InnoDB: Error: MySQL is trying to perform a SELECT\n"
3335
"InnoDB: but it has not locked"
3336
" any tables in ::external_lock()!\n",
3338
trx_print(stderr, trx, 600);
3339
fputc('\n', stderr);
3344
fprintf(stderr, "Match mode %lu\n search tuple ",
3345
(ulong) match_mode);
3346
dtuple_print(search_tuple);
3347
fprintf(stderr, "N tables locked %lu\n",
3348
(ulong) trx->mysql_n_tables_locked);
3350
/*-------------------------------------------------------------*/
3351
/* PHASE 0: Release a possible s-latch we are holding on the
3352
adaptive hash index latch if there is someone waiting behind */
3354
if (UNIV_UNLIKELY(btr_search_latch.writer != RW_LOCK_NOT_LOCKED)
3355
&& trx->has_search_latch) {
3357
/* There is an x-latch request on the adaptive hash index:
3358
release the s-latch to reduce starvation and wait for
3359
BTR_SEA_TIMEOUT rounds before trying to keep it again over
3362
rw_lock_s_unlock(&btr_search_latch);
3363
trx->has_search_latch = FALSE;
3365
trx->search_latch_timeout = BTR_SEA_TIMEOUT;
3368
/* Reset the new record lock info if srv_locks_unsafe_for_binlog
3369
is set or session is using a READ COMMITED isolation level. Then
3370
we are able to remove the record locks set here on an individual
3373
if ((srv_locks_unsafe_for_binlog
3374
|| trx->isolation_level == TRX_ISO_READ_COMMITTED)
3375
&& prebuilt->select_lock_type != LOCK_NONE) {
3377
trx_reset_new_rec_lock_info(trx);
3380
/*-------------------------------------------------------------*/
3381
/* PHASE 1: Try to pop the row from the prefetch cache */
3383
if (UNIV_UNLIKELY(direction == 0)) {
3384
trx->op_info = "starting index read";
3386
prebuilt->n_rows_fetched = 0;
3387
prebuilt->n_fetch_cached = 0;
3388
prebuilt->fetch_cache_first = 0;
3390
if (prebuilt->sel_graph == NULL) {
3391
/* Build a dummy select query graph */
3392
row_prebuild_sel_graph(prebuilt);
3395
trx->op_info = "fetching rows";
3397
if (prebuilt->n_rows_fetched == 0) {
3398
prebuilt->fetch_direction = direction;
3401
if (UNIV_UNLIKELY(direction != prebuilt->fetch_direction)) {
3402
if (UNIV_UNLIKELY(prebuilt->n_fetch_cached > 0)) {
3404
/* TODO: scrollable cursor: restore cursor to
3405
the place of the latest returned row,
3406
or better: prevent caching for a scroll
3410
prebuilt->n_rows_fetched = 0;
3411
prebuilt->n_fetch_cached = 0;
3412
prebuilt->fetch_cache_first = 0;
3414
} else if (UNIV_LIKELY(prebuilt->n_fetch_cached > 0)) {
3415
row_sel_pop_cached_row_for_mysql(buf, prebuilt);
3417
prebuilt->n_rows_fetched++;
3424
if (prebuilt->fetch_cache_first > 0
3425
&& prebuilt->fetch_cache_first < MYSQL_FETCH_CACHE_SIZE) {
3427
/* The previous returned row was popped from the fetch
3428
cache, but the cache was not full at the time of the
3429
popping: no more rows can exist in the result set */
3431
err = DB_RECORD_NOT_FOUND;
3435
prebuilt->n_rows_fetched++;
3437
if (prebuilt->n_rows_fetched > 1000000000) {
3438
/* Prevent wrap-over */
3439
prebuilt->n_rows_fetched = 500000000;
3442
mode = pcur->search_mode;
3445
/* In a search where at most one record in the index may match, we
3446
can use a LOCK_REC_NOT_GAP type record lock when locking a
3447
non-delete-marked matching record.
3449
Note that in a unique secondary index there may be different
3450
delete-marked versions of a record where only the primary key
3451
values differ: thus in a secondary index we must use next-key
3452
locks when locking delete-marked records. */
3454
if (match_mode == ROW_SEL_EXACT
3455
&& index->type & DICT_UNIQUE
3456
&& dtuple_get_n_fields(search_tuple)
3457
== dict_index_get_n_unique(index)
3458
&& (index->type & DICT_CLUSTERED
3459
|| !dtuple_contains_null(search_tuple))) {
3461
/* Note above that a UNIQUE secondary index can contain many
3462
rows with the same key value if one of the columns is the SQL
3463
null. A clustered index under MySQL can never contain null
3464
columns because we demand that all the columns in primary key
3467
unique_search = TRUE;
3469
/* Even if the condition is unique, MySQL seems to try to
3470
retrieve also a second row if a primary key contains more than
3471
1 column. Return immediately if this is not a HANDLER
3474
if (UNIV_UNLIKELY(direction != 0
3475
&& !prebuilt->used_in_HANDLER)) {
3477
err = DB_RECORD_NOT_FOUND;
3484
/*-------------------------------------------------------------*/
3485
/* PHASE 2: Try fast adaptive hash index search if possible */
3487
/* Next test if this is the special case where we can use the fast
3488
adaptive hash index to try the search. Since we must release the
3489
search system latch when we retrieve an externally stored field, we
3490
cannot use the adaptive hash index in a search in the case the row
3491
may be long and there may be externally stored fields */
3493
if (UNIV_UNLIKELY(direction == 0)
3495
&& index->type & DICT_CLUSTERED
3496
&& !prebuilt->templ_contains_blob
3497
&& !prebuilt->used_in_HANDLER
3498
&& (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) {
3502
unique_search_from_clust_index = TRUE;
3504
if (trx->mysql_n_tables_locked == 0
3505
&& prebuilt->select_lock_type == LOCK_NONE
3506
&& trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
3507
&& trx->read_view) {
3509
/* This is a SELECT query done as a consistent read,
3510
and the read view has already been allocated:
3511
let us try a search shortcut through the hash
3513
NOTE that we must also test that
3514
mysql_n_tables_locked == 0, because this might
3515
also be INSERT INTO ... SELECT ... or
3516
CREATE TABLE ... SELECT ... . Our algorithm is
3517
NOT prepared to inserts interleaved with the SELECT,
3518
and if we try that, we can deadlock on the adaptive
3519
hash index semaphore! */
3521
#ifndef UNIV_SEARCH_DEBUG
3522
if (!trx->has_search_latch) {
3523
rw_lock_s_lock(&btr_search_latch);
3524
trx->has_search_latch = TRUE;
3527
switch (row_sel_try_search_shortcut_for_mysql(
3528
&rec, prebuilt, &offsets, &heap,
3531
#ifdef UNIV_SEARCH_DEBUG
3532
ut_a(0 == cmp_dtuple_rec(search_tuple,
3535
if (!row_sel_store_mysql_rec(buf, prebuilt,
3537
prebuilt->n_template)) {
3538
err = DB_TOO_BIG_RECORD;
3540
/* We let the main loop to do the
3542
goto shortcut_fails_too_big_rec;
3547
/* ut_print_name(stderr, index->name);
3548
fputs(" shortcut\n", stderr); */
3552
if (trx->search_latch_timeout > 0
3553
&& trx->has_search_latch) {
3555
trx->search_latch_timeout--;
3557
rw_lock_s_unlock(&btr_search_latch);
3558
trx->has_search_latch = FALSE;
3561
/* NOTE that we do NOT store the cursor
3569
/* ut_print_name(stderr, index->name);
3570
fputs(" record not found 2\n", stderr); */
3572
if (trx->search_latch_timeout > 0
3573
&& trx->has_search_latch) {
3575
trx->search_latch_timeout--;
3577
rw_lock_s_unlock(&btr_search_latch);
3578
trx->has_search_latch = FALSE;
3581
/* NOTE that we do NOT store the cursor
3584
err = DB_RECORD_NOT_FOUND;
3587
shortcut_fails_too_big_rec:
3593
/*-------------------------------------------------------------*/
3594
/* PHASE 3: Open or restore index cursor position */
3596
if (trx->has_search_latch) {
3597
rw_lock_s_unlock(&btr_search_latch);
3598
trx->has_search_latch = FALSE;
3601
trx_start_if_not_started(trx);
3603
if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
3604
&& prebuilt->select_lock_type != LOCK_NONE
3605
&& trx->mysql_query_str != NULL
3606
&& *trx->mysql_query_str != NULL
3607
&& trx->mysql_thd != NULL) {
3609
/* Scan the MySQL query string; check if SELECT is the first
3612
if (dict_str_starts_with_keyword(
3613
trx->mysql_thd, *trx->mysql_query_str, "SELECT")) {
3614
/* It is a plain locking SELECT and the isolation
3615
level is low: do not lock gaps */
3617
set_also_gap_locks = FALSE;
3621
/* Note that if the search mode was GE or G, then the cursor
3622
naturally moves upward (in fetch next) in alphabetical order,
3623
otherwise downward */
3625
if (UNIV_UNLIKELY(direction == 0)) {
3626
if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G) {
3629
} else if (direction == ROW_SEL_NEXT) {
3633
thr = que_fork_get_first_thr(prebuilt->sel_graph);
3635
que_thr_move_to_run_state_for_mysql(thr, trx);
3637
clust_index = dict_table_get_first_index(index->table);
3639
if (UNIV_LIKELY(direction != 0)) {
3640
ibool need_to_process = sel_restore_position_for_mysql(
3641
&same_user_rec, BTR_SEARCH_LEAF,
3642
pcur, moves_up, &mtr);
3644
if (UNIV_UNLIKELY(need_to_process)) {
3645
if (UNIV_UNLIKELY(prebuilt->row_read_type
3646
== ROW_READ_DID_SEMI_CONSISTENT)) {
3647
/* We did a semi-consistent read,
3648
but the record was removed in
3650
prebuilt->row_read_type
3651
= ROW_READ_TRY_SEMI_CONSISTENT;
3653
} else if (UNIV_LIKELY(prebuilt->row_read_type
3654
!= ROW_READ_DID_SEMI_CONSISTENT)) {
3656
/* The cursor was positioned on the record
3657
that we returned previously. If we need
3658
to repeat a semi-consistent read as a
3659
pessimistic locking read, the record
3660
cannot be skipped. */
3665
} else if (dtuple_get_n_fields(search_tuple) > 0) {
3667
btr_pcur_open_with_no_init(index, search_tuple, mode,
3671
pcur->trx_if_known = trx;
3673
rec = btr_pcur_get_rec(pcur);
3676
&& !page_rec_is_supremum(rec)
3677
&& set_also_gap_locks
3678
&& !(srv_locks_unsafe_for_binlog
3679
|| trx->isolation_level == TRX_ISO_READ_COMMITTED)
3680
&& prebuilt->select_lock_type != LOCK_NONE) {
3682
/* Try to place a gap lock on the next index record
3683
to prevent phantoms in ORDER BY ... DESC queries */
3685
offsets = rec_get_offsets(page_rec_get_next(rec),
3687
ULINT_UNDEFINED, &heap);
3688
err = sel_set_rec_lock(page_rec_get_next(rec),
3690
prebuilt->select_lock_type,
3693
if (err != DB_SUCCESS) {
3695
goto lock_wait_or_error;
3699
if (mode == PAGE_CUR_G) {
3700
btr_pcur_open_at_index_side(
3701
TRUE, index, BTR_SEARCH_LEAF, pcur, FALSE,
3703
} else if (mode == PAGE_CUR_L) {
3704
btr_pcur_open_at_index_side(
3705
FALSE, index, BTR_SEARCH_LEAF, pcur, FALSE,
3710
if (!prebuilt->sql_stat_start) {
3711
/* No need to set an intention lock or assign a read view */
3713
if (trx->read_view == NULL
3714
&& prebuilt->select_lock_type == LOCK_NONE) {
3716
fputs("InnoDB: Error: MySQL is trying to"
3717
" perform a consistent read\n"
3718
"InnoDB: but the read view is not assigned!\n",
3720
trx_print(stderr, trx, 600);
3721
fputc('\n', stderr);
3724
} else if (prebuilt->select_lock_type == LOCK_NONE) {
3725
/* This is a consistent read */
3726
/* Assign a read view for the query */
3728
trx_assign_read_view(trx);
3729
prebuilt->sql_stat_start = FALSE;
3732
if (prebuilt->select_lock_type == LOCK_S) {
3733
lock_mode = LOCK_IS;
3735
lock_mode = LOCK_IX;
3737
err = lock_table(0, index->table, lock_mode, thr);
3739
if (err != DB_SUCCESS) {
3741
goto lock_wait_or_error;
3743
prebuilt->sql_stat_start = FALSE;
3747
/*-------------------------------------------------------------*/
3748
/* PHASE 4: Look for matching records in a loop */
3750
rec = btr_pcur_get_rec(pcur);
3751
ut_ad(!!page_rec_is_comp(rec) == comp);
3752
#ifdef UNIV_SEARCH_DEBUG
3754
fputs("Using ", stderr);
3755
dict_index_name_print(stderr, index);
3756
fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt,
3757
buf_frame_get_page_no(buf_frame_align(rec)));
3760
#endif /* UNIV_SEARCH_DEBUG */
3762
if (page_rec_is_infimum(rec)) {
3764
/* The infimum record on a page cannot be in the result set,
3765
and neither can a record lock be placed on it: we skip such
3771
if (page_rec_is_supremum(rec)) {
3773
if (set_also_gap_locks
3774
&& !(srv_locks_unsafe_for_binlog
3775
|| trx->isolation_level == TRX_ISO_READ_COMMITTED)
3776
&& prebuilt->select_lock_type != LOCK_NONE) {
3778
/* Try to place a lock on the index record */
3780
/* If innodb_locks_unsafe_for_binlog option is used
3781
or this session is using a READ COMMITTED isolation
3782
level we do not lock gaps. Supremum record is really
3783
a gap and therefore we do not set locks there. */
3785
offsets = rec_get_offsets(rec, index, offsets,
3786
ULINT_UNDEFINED, &heap);
3787
err = sel_set_rec_lock(rec, index, offsets,
3788
prebuilt->select_lock_type,
3789
LOCK_ORDINARY, thr);
3791
if (err != DB_SUCCESS) {
3793
goto lock_wait_or_error;
3796
/* A page supremum record cannot be in the result set: skip
3797
it now that we have placed a possible lock on it */
3802
/*-------------------------------------------------------------*/
3803
/* Do sanity checks in case our cursor has bumped into page
3807
next_offs = rec_get_next_offs(rec, TRUE);
3808
if (UNIV_UNLIKELY(next_offs < PAGE_NEW_SUPREMUM)) {
3813
next_offs = rec_get_next_offs(rec, FALSE);
3814
if (UNIV_UNLIKELY(next_offs < PAGE_OLD_SUPREMUM)) {
3820
if (UNIV_UNLIKELY(next_offs >= UNIV_PAGE_SIZE - PAGE_DIR)) {
3823
if (srv_force_recovery == 0 || moves_up == FALSE) {
3824
ut_print_timestamp(stderr);
3825
buf_page_print(buf_frame_align(rec));
3827
"\nInnoDB: rec address %p, first"
3828
" buffer frame %p\n"
3829
"InnoDB: buffer pool high end %p,"
3830
" buf block fix count %lu\n",
3831
(void*) rec, (void*) buf_pool->frame_zero,
3832
(void*) buf_pool->high_end,
3833
(ulong)buf_block_align(rec)->buf_fix_count);
3835
"InnoDB: Index corruption: rec offs %lu"
3836
" next offs %lu, page no %lu,\n"
3838
(ulong) page_offset(rec),
3840
(ulong) buf_frame_get_page_no(rec));
3841
dict_index_name_print(stderr, trx, index);
3842
fputs(". Run CHECK TABLE. You may need to\n"
3843
"InnoDB: restore from a backup, or"
3844
" dump + drop + reimport the table.\n",
3847
err = DB_CORRUPTION;
3849
goto lock_wait_or_error;
3851
/* The user may be dumping a corrupt table. Jump
3852
over the corruption to recover as much as possible. */
3855
"InnoDB: Index corruption: rec offs %lu"
3856
" next offs %lu, page no %lu,\n"
3858
(ulong) page_offset(rec),
3860
(ulong) buf_frame_get_page_no(rec));
3861
dict_index_name_print(stderr, trx, index);
3862
fputs(". We try to skip the rest of the page.\n",
3865
btr_pcur_move_to_last_on_page(pcur, &mtr);
3870
/*-------------------------------------------------------------*/
3872
/* Calculate the 'offsets' associated with 'rec' */
3874
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
3876
if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
3877
if (!rec_validate(rec, offsets)
3878
|| !btr_index_rec_validate(rec, index, FALSE)) {
3880
"InnoDB: Index corruption: rec offs %lu"
3881
" next offs %lu, page no %lu,\n"
3883
(ulong) page_offset(rec),
3885
(ulong) buf_frame_get_page_no(rec));
3886
dict_index_name_print(stderr, trx, index);
3887
fputs(". We try to skip the record.\n",
3894
/* Note that we cannot trust the up_match value in the cursor at this
3895
place because we can arrive here after moving the cursor! Thus
3896
we have to recompare rec and search_tuple to determine if they
3899
if (match_mode == ROW_SEL_EXACT) {
3900
/* Test if the index record matches completely to search_tuple
3901
in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */
3903
/* fputs("Comparing rec and search tuple\n", stderr); */
3905
if (0 != cmp_dtuple_rec(search_tuple, rec, offsets)) {
3907
if (set_also_gap_locks
3908
&& !(srv_locks_unsafe_for_binlog
3909
|| trx->isolation_level
3910
== TRX_ISO_READ_COMMITTED)
3911
&& prebuilt->select_lock_type != LOCK_NONE) {
3913
/* Try to place a gap lock on the index
3914
record only if innodb_locks_unsafe_for_binlog
3915
option is not set or this session is not
3916
using a READ COMMITTED isolation level. */
3918
err = sel_set_rec_lock(
3919
rec, index, offsets,
3920
prebuilt->select_lock_type, LOCK_GAP,
3923
if (err != DB_SUCCESS) {
3925
goto lock_wait_or_error;
3929
btr_pcur_store_position(pcur, &mtr);
3931
err = DB_RECORD_NOT_FOUND;
3932
/* ut_print_name(stderr, index->name);
3933
fputs(" record not found 3\n", stderr); */
3938
} else if (match_mode == ROW_SEL_EXACT_PREFIX) {
3940
if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec, offsets)) {
3942
if (set_also_gap_locks
3943
&& !(srv_locks_unsafe_for_binlog
3944
|| trx->isolation_level
3945
== TRX_ISO_READ_COMMITTED)
3946
&& prebuilt->select_lock_type != LOCK_NONE) {
3948
/* Try to place a gap lock on the index
3949
record only if innodb_locks_unsafe_for_binlog
3950
option is not set or this session is not
3951
using a READ COMMITTED isolation level. */
3953
err = sel_set_rec_lock(
3954
rec, index, offsets,
3955
prebuilt->select_lock_type, LOCK_GAP,
3958
if (err != DB_SUCCESS) {
3960
goto lock_wait_or_error;
3964
btr_pcur_store_position(pcur, &mtr);
3966
err = DB_RECORD_NOT_FOUND;
3967
/* ut_print_name(stderr, index->name);
3968
fputs(" record not found 4\n", stderr); */
3974
/* We are ready to look at a possible new index entry in the result
3975
set: the cursor is now placed on a user record */
3977
if (prebuilt->select_lock_type != LOCK_NONE) {
3978
/* Try to place a lock on the index record; note that delete
3979
marked records are a special case in a unique search. If there
3980
is a non-delete marked record, then it is enough to lock its
3981
existence with LOCK_REC_NOT_GAP. */
3983
/* If innodb_locks_unsafe_for_binlog option is used
3984
or this session is using a READ COMMITED isolation
3985
level we lock only the record, i.e., next-key locking is
3990
if (!set_also_gap_locks
3991
|| srv_locks_unsafe_for_binlog
3992
|| trx->isolation_level == TRX_ISO_READ_COMMITTED
3994
&& !UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp)))) {
3998
lock_type = LOCK_ORDINARY;
4001
/* If we are doing a 'greater or equal than a primary key
4002
value' search from a clustered index, and we find a record
4003
that has that exact primary key value, then there is no need
4004
to lock the gap before the record, because no insert in the
4005
gap can be in our search range. That is, no phantom row can
4008
An example: if col1 is the primary key, the search is WHERE
4009
col1 >= 100, and we find a record where col1 = 100, then no
4010
need to lock the gap before that record. */
4012
if (index == clust_index
4013
&& mode == PAGE_CUR_GE
4015
&& dtuple_get_n_fields_cmp(search_tuple)
4016
== dict_index_get_n_unique(index)
4017
&& 0 == cmp_dtuple_rec(search_tuple, rec, offsets)) {
4019
lock_type = LOCK_REC_NOT_GAP;
4022
err = sel_set_rec_lock(rec, index, offsets,
4023
prebuilt->select_lock_type,
4031
if (UNIV_LIKELY(prebuilt->row_read_type
4032
!= ROW_READ_TRY_SEMI_CONSISTENT)
4033
|| index != clust_index) {
4035
goto lock_wait_or_error;
4038
/* The following call returns 'offsets'
4039
associated with 'old_vers' */
4040
err = row_sel_build_committed_vers_for_mysql(
4041
clust_index, prebuilt, rec,
4042
&offsets, &heap, &old_vers, &mtr);
4044
if (err != DB_SUCCESS) {
4046
goto lock_wait_or_error;
4049
mutex_enter(&kernel_mutex);
4050
if (trx->was_chosen_as_deadlock_victim) {
4051
mutex_exit(&kernel_mutex);
4054
goto lock_wait_or_error;
4056
if (UNIV_LIKELY(trx->wait_lock != NULL)) {
4057
lock_cancel_waiting_and_release(
4059
trx_reset_new_rec_lock_info(trx);
4061
mutex_exit(&kernel_mutex);
4063
/* The lock was granted while we were
4064
searching for the last committed version.
4065
Do a normal locking read. */
4067
offsets = rec_get_offsets(rec, index, offsets,
4073
mutex_exit(&kernel_mutex);
4075
if (old_vers == NULL) {
4076
/* The row was not yet committed */
4081
did_semi_consistent_read = TRUE;
4086
goto lock_wait_or_error;
4089
/* This is a non-locking consistent read: if necessary, fetch
4090
a previous version of the record */
4092
if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) {
4094
/* Do nothing: we let a non-locking SELECT read the
4095
latest version of the record */
4097
} else if (index == clust_index) {
4099
/* Fetch a previous version of the row if the current
4100
one is not visible in the snapshot; if we have a very
4101
high force recovery level set, we try to avoid crashes
4102
by skipping this lookup */
4104
if (UNIV_LIKELY(srv_force_recovery < 5)
4105
&& !lock_clust_rec_cons_read_sees(
4106
rec, index, offsets, trx->read_view)) {
4109
/* The following call returns 'offsets'
4110
associated with 'old_vers' */
4111
err = row_sel_build_prev_vers_for_mysql(
4112
trx->read_view, clust_index,
4113
prebuilt, rec, &offsets, &heap,
4116
if (err != DB_SUCCESS) {
4118
goto lock_wait_or_error;
4121
if (old_vers == NULL) {
4122
/* The row did not exist yet in
4130
} else if (!lock_sec_rec_cons_read_sees(rec, index,
4132
/* We are looking into a non-clustered index,
4133
and to get the right version of the record we
4134
have to look also into the clustered index: this
4135
is necessary, because we can only get the undo
4136
information via the clustered index record. */
4138
ut_ad(index != clust_index);
4139
get_clust_rec= TRUE;
4140
goto idx_cond_check;
4144
/* NOTE that at this point rec can be an old version of a clustered
4145
index record built for a consistent read. We cannot assume after this
4146
point that rec is on a buffer pool page. Functions like
4147
page_rec_is_comp() cannot be used! */
4149
if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp))) {
4151
/* The record is delete-marked: we can skip it */
4153
if ((srv_locks_unsafe_for_binlog
4154
|| trx->isolation_level == TRX_ISO_READ_COMMITTED)
4155
&& prebuilt->select_lock_type != LOCK_NONE
4156
&& !did_semi_consistent_read) {
4158
/* No need to keep a lock on a delete-marked record
4159
if we do not want to use next-key locking. */
4161
row_unlock_for_mysql(prebuilt, TRUE);
4164
/* This is an optimization to skip setting the next key lock
4165
on the record that follows this delete-marked record. This
4166
optimization works because of the unique search criteria
4167
which precludes the presence of a range lock between this
4168
delete marked record and the record following it.
4170
For now this is applicable only to clustered indexes while
4171
doing a unique search. There is scope for further optimization
4172
applicable to unique secondary indexes. Current behaviour is
4173
to widen the scope of a lock on an already delete marked record
4174
if the same record is deleted twice by the same transaction */
4175
if (index == clust_index && unique_search) {
4176
err = DB_RECORD_NOT_FOUND;
4186
if (prebuilt->idx_cond_func)
4189
ut_ad(prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE);
4190
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
4191
row_sel_store_mysql_rec(buf, prebuilt, rec,
4192
offsets, 0, prebuilt->n_index_fields);
4193
res= prebuilt->idx_cond_func(prebuilt->idx_cond_func_arg);
4198
err = DB_RECORD_NOT_FOUND;
4199
goto idx_cond_failed;
4203
/* Get the clustered index record if needed, if we did not do the
4204
search using the clustered index. */
4205
if (get_clust_rec || (index != clust_index &&
4206
prebuilt->need_to_access_clustered)) {
4208
/* We use a 'goto' to the preceding label if a consistent
4209
read of a secondary index record requires us to look up old
4210
versions of the associated clustered index record. */
4212
ut_ad(rec_offs_validate(rec, index, offsets));
4214
/* It was a non-clustered index and we must fetch also the
4215
clustered index record */
4217
mtr_has_extra_clust_latch = TRUE;
4219
/* The following call returns 'offsets' associated with
4220
'clust_rec'. Note that 'clust_rec' can be an old version
4221
built for a consistent read. */
4223
err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec,
4225
&offsets, &heap, &mtr);
4226
if (err != DB_SUCCESS) {
4228
goto lock_wait_or_error;
4231
if (clust_rec == NULL) {
4232
/* The record did not exist in the read view */
4233
ut_ad(prebuilt->select_lock_type == LOCK_NONE);
4238
if (UNIV_UNLIKELY(rec_get_deleted_flag(clust_rec, comp))) {
4240
/* The record is delete marked: we can skip it */
4242
if ((srv_locks_unsafe_for_binlog
4243
|| trx->isolation_level == TRX_ISO_READ_COMMITTED)
4244
&& prebuilt->select_lock_type != LOCK_NONE) {
4246
/* No need to keep a lock on a delete-marked
4247
record if we do not want to use next-key
4250
row_unlock_for_mysql(prebuilt, TRUE);
4256
if (prebuilt->need_to_access_clustered) {
4258
result_rec = clust_rec;
4260
ut_ad(rec_offs_validate(result_rec, clust_index,
4263
/* We used 'offsets' for the clust rec, recalculate
4265
offsets = rec_get_offsets(rec, index, offsets,
4266
ULINT_UNDEFINED, &heap);
4273
/* We found a qualifying record 'result_rec'. At this point,
4274
'offsets' are associated with 'result_rec'. */
4276
ut_ad(rec_offs_validate(result_rec,
4277
result_rec != rec ? clust_index : index,
4280
if ((match_mode == ROW_SEL_EXACT
4281
|| prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD)
4282
&& prebuilt->select_lock_type == LOCK_NONE
4283
&& !prebuilt->templ_contains_blob
4284
&& !prebuilt->clust_index_was_generated
4285
&& !prebuilt->used_in_HANDLER
4286
&& prebuilt->template_type
4287
!= ROW_MYSQL_DUMMY_TEMPLATE) {
4289
/* Inside an update, for example, we do not cache rows,
4290
since we may use the cursor position to do the actual
4291
update, that is why we require ...lock_type == LOCK_NONE.
4292
Since we keep space in prebuilt only for the BLOBs of
4293
a single row, we cannot cache rows in the case there
4294
are BLOBs in the fields to be fetched. In HANDLER we do
4295
not cache rows because there the cursor is a scrollable
4297
some_fields_in_buffer= (index != clust_index &&
4298
prebuilt->idx_cond_func);
4300
row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
4302
some_fields_in_buffer?
4303
prebuilt->n_index_fields: 0,
4305
if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
4312
if (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE) {
4313
memcpy(buf + 4, result_rec
4314
- rec_offs_extra_size(offsets),
4315
rec_offs_size(offsets));
4316
mach_write_to_4(buf,
4317
rec_offs_extra_size(offsets) + 4);
4319
if (!row_sel_store_mysql_rec(buf, prebuilt,
4320
result_rec, offsets,
4321
prebuilt->idx_cond_func?
4322
prebuilt->n_index_fields: 0,
4323
prebuilt->n_template)) {
4324
err = DB_TOO_BIG_RECORD;
4326
goto lock_wait_or_error;
4330
if (prebuilt->clust_index_was_generated) {
4331
if (result_rec != rec) {
4332
offsets = rec_get_offsets(
4333
rec, index, offsets, ULINT_UNDEFINED,
4336
row_sel_store_row_id_to_prebuilt(prebuilt, rec,
4341
/* From this point on, 'offsets' are invalid. */
4344
/* We have an optimization to save CPU time: if this is a consistent
4345
read on a unique condition on the clustered index, then we do not
4346
store the pcur position, because any fetch next or prev will anyway
4347
return 'end of file'. Exceptions are locking reads and the MySQL
4348
HANDLER command where the user can move the cursor with PREV or NEXT
4349
even after a unique search. */
4354
if (!unique_search_from_clust_index
4355
|| prebuilt->select_lock_type != LOCK_NONE
4356
|| prebuilt->used_in_HANDLER) {
4358
/* Inside an update always store the cursor position */
4360
btr_pcur_store_position(pcur, &mtr);
4366
/* Reset the old and new "did semi-consistent read" flags. */
4367
get_clust_rec= FALSE;
4368
if (UNIV_UNLIKELY(prebuilt->row_read_type
4369
== ROW_READ_DID_SEMI_CONSISTENT)) {
4370
prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4372
did_semi_consistent_read = FALSE;
4374
if (UNIV_UNLIKELY(srv_locks_unsafe_for_binlog
4375
|| trx->isolation_level == TRX_ISO_READ_COMMITTED)
4376
&& prebuilt->select_lock_type != LOCK_NONE) {
4378
trx_reset_new_rec_lock_info(trx);
4381
/*-------------------------------------------------------------*/
4382
/* PHASE 5: Move the cursor to the next index record */
4384
if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) {
4385
/* We must commit mtr if we are moving to the next
4386
non-clustered index record, because we could break the
4387
latching order if we would access a different clustered
4388
index page right away without releasing the previous. */
4390
btr_pcur_store_position(pcur, &mtr);
4393
mtr_has_extra_clust_latch = FALSE;
4396
if (sel_restore_position_for_mysql(&same_user_rec,
4398
pcur, moves_up, &mtr)) {
4399
#ifdef UNIV_SEARCH_DEBUG
4401
#endif /* UNIV_SEARCH_DEBUG */
4408
if (UNIV_UNLIKELY(!btr_pcur_move_to_next(pcur, &mtr))) {
4410
btr_pcur_store_position(pcur, &mtr);
4412
if (match_mode != 0) {
4413
err = DB_RECORD_NOT_FOUND;
4415
err = DB_END_OF_INDEX;
4421
if (UNIV_UNLIKELY(!btr_pcur_move_to_prev(pcur, &mtr))) {
4426
#ifdef UNIV_SEARCH_DEBUG
4428
#endif /* UNIV_SEARCH_DEBUG */
4433
/* Reset the old and new "did semi-consistent read" flags. */
4434
if (UNIV_UNLIKELY(prebuilt->row_read_type
4435
== ROW_READ_DID_SEMI_CONSISTENT)) {
4436
prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4438
did_semi_consistent_read = FALSE;
4440
/*-------------------------------------------------------------*/
4442
btr_pcur_store_position(pcur, &mtr);
4445
mtr_has_extra_clust_latch = FALSE;
4447
trx->error_state = err;
4449
/* The following is a patch for MySQL */
4451
que_thr_stop_for_mysql(thr);
4453
thr->lock_state = QUE_THR_LOCK_ROW;
4455
if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
4456
/* It was a lock wait, and it ended */
4458
thr->lock_state = QUE_THR_LOCK_NOLOCK;
4461
sel_restore_position_for_mysql(&same_user_rec,
4462
BTR_SEARCH_LEAF, pcur,
4465
if ((srv_locks_unsafe_for_binlog
4466
|| trx->isolation_level == TRX_ISO_READ_COMMITTED)
4467
&& !same_user_rec) {
4469
/* Since we were not able to restore the cursor
4470
on the same user record, we cannot use
4471
row_unlock_for_mysql() to unlock any records, and
4472
we must thus reset the new rec lock info. Since
4473
in lock0lock.c we have blocked the inheriting of gap
4474
X-locks, we actually do not have any new record locks
4477
Note that if we were able to restore on the 'same'
4478
user record, it is still possible that we were actually
4479
waiting on a delete-marked record, and meanwhile
4480
it was removed by purge and inserted again by some
4481
other user. But that is no problem, because in
4482
rec_loop we will again try to set a lock, and
4483
new_rec_lock_info in trx will be right at the end. */
4485
trx_reset_new_rec_lock_info(trx);
4488
mode = pcur->search_mode;
4493
thr->lock_state = QUE_THR_LOCK_NOLOCK;
4495
#ifdef UNIV_SEARCH_DEBUG
4496
/* fputs("Using ", stderr);
4497
dict_index_name_print(stderr, index);
4498
fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
4499
#endif /* UNIV_SEARCH_DEBUG */
4503
/*-------------------------------------------------------------*/
4504
que_thr_stop_for_mysql_no_error(thr, trx);
4508
if (prebuilt->n_fetch_cached > 0) {
4509
row_sel_pop_cached_row_for_mysql(buf, prebuilt);
4514
#ifdef UNIV_SEARCH_DEBUG
4515
/* fputs("Using ", stderr);
4516
dict_index_name_print(stderr, index);
4517
fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
4518
#endif /* UNIV_SEARCH_DEBUG */
4519
if (err == DB_SUCCESS) {
4525
if (UNIV_LIKELY_NULL(heap)) {
4526
mem_heap_free(heap);
4529
/* Set or reset the "did semi-consistent read" flag on return.
4530
The flag did_semi_consistent_read is set if and only if
4531
the record being returned was fetched with a semi-consistent read. */
4532
ut_ad(prebuilt->row_read_type != ROW_READ_WITH_LOCKS
4533
|| !did_semi_consistent_read);
4535
if (UNIV_UNLIKELY(prebuilt->row_read_type != ROW_READ_WITH_LOCKS)) {
4536
if (UNIV_UNLIKELY(did_semi_consistent_read)) {
4537
prebuilt->row_read_type = ROW_READ_DID_SEMI_CONSISTENT;
4539
prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4545
/***********************************************************************
4546
Checks if MySQL at the moment is allowed for this table to retrieve a
4547
consistent read result, or store it to the query cache. */
4550
row_search_check_if_query_cache_permitted(
4551
/*======================================*/
4552
/* out: TRUE if storing or retrieving
4553
from the query cache is permitted */
4554
trx_t* trx, /* in: transaction object */
4555
const char* norm_name) /* in: concatenation of database name,
4556
'/' char, table name */
4558
dict_table_t* table;
4561
table = dict_table_get(norm_name, FALSE);
4563
if (table == NULL) {
4568
mutex_enter(&kernel_mutex);
4570
/* Start the transaction if it is not started yet */
4572
trx_start_if_not_started_low(trx);
4574
/* If there are locks on the table or some trx has invalidated the
4575
cache up to our trx id, then ret = FALSE.
4576
We do not check what type locks there are on the table, though only
4577
IX type locks actually would require ret = FALSE. */
4579
if (UT_LIST_GET_LEN(table->locks) == 0
4580
&& ut_dulint_cmp(trx->id,
4581
table->query_cache_inv_trx_id) >= 0) {
4585
/* If the isolation level is high, assign a read view for the
4586
transaction if it does not yet have one */
4588
if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
4589
&& !trx->read_view) {
4591
trx->read_view = read_view_open_now(
4592
trx->id, trx->global_read_view_heap);
4593
trx->global_read_view = trx->read_view;
4597
mutex_exit(&kernel_mutex);
4602
/***********************************************************************
4603
Read the AUTOINC column from the current row. If the value is less than
4604
0 and the type is not unsigned then we reset the value to 0. */
4607
row_search_autoinc_read_column(
4608
/*===========================*/
4609
/* out: value read from the column */
4610
dict_index_t* index, /* in: index to read from */
4611
const rec_t* rec, /* in: current rec */
4612
ulint col_no, /* in: column number */
4613
ibool unsigned_type) /* in: signed or unsigned flag */
4618
mem_heap_t* heap = NULL;
4619
/* Our requirement is that dest should be word aligned. */
4620
byte dest[sizeof(value)];
4621
ulint offsets_[REC_OFFS_NORMAL_SIZE];
4622
ulint* offsets = offsets_;
4624
*offsets_ = sizeof offsets_ / sizeof *offsets_;
4626
/* TODO: We have to cast away the const of rec for now. This needs
4627
to be fixed later.*/
4628
offsets = rec_get_offsets(
4629
(rec_t*) rec, index, offsets, ULINT_UNDEFINED, &heap);
4631
/* TODO: We have to cast away the const of rec for now. This needs
4632
to be fixed later.*/
4633
data = rec_get_nth_field((rec_t*)rec, offsets, col_no, &len);
4635
ut_a(len != UNIV_SQL_NULL);
4636
ut_a(len <= sizeof value);
4638
mach_read_int_type(dest, data, len, unsigned_type);
4640
/* The assumption here is that the AUTOINC value can't be negative
4641
and that dest is word aligned. */
4644
value = *(ib_longlong*) dest;
4648
value = *(ib_uint32_t*) dest;
4652
value = *(ib_uint32_t*) dest;
4657
value = *(uint16 *) dest;
4668
if (UNIV_LIKELY_NULL(heap)) {
4669
mem_heap_free(heap);
4672
if (!unsigned_type && value < 0) {
4679
/***********************************************************************
4680
Get the last row. */
4683
row_search_autoinc_get_rec(
4684
/*=======================*/
4685
/* out: current rec or NULL */
4686
btr_pcur_t* pcur, /* in: the current cursor */
4687
mtr_t* mtr) /* in: mini transaction */
4690
const rec_t* rec = btr_pcur_get_rec(pcur);
4692
if (page_rec_is_user_rec(rec)) {
4695
} while (btr_pcur_move_to_prev(pcur, mtr));
4700
/***********************************************************************
4701
Read the max AUTOINC value from an index. */
4704
row_search_max_autoinc(
4705
/*===================*/
4706
/* out: DB_SUCCESS if all OK else
4707
error code, DB_RECORD_NOT_FOUND if
4708
column name can't be found in index */
4709
dict_index_t* index, /* in: index to search */
4710
const char* col_name, /* in: name of autoinc column */
4711
ib_longlong* value) /* out: AUTOINC value read */
4715
dict_field_t* dfield = NULL;
4716
ulint error = DB_SUCCESS;
4718
n_cols = dict_index_get_n_ordering_defined_by_user(index);
4720
/* Search the index for the AUTOINC column name */
4721
for (i = 0; i < n_cols; ++i) {
4722
dfield = dict_index_get_nth_field(index, i);
4724
if (strcmp(col_name, dfield->name) == 0) {
4731
/* Must find the AUTOINC column name */
4732
if (i < n_cols && dfield) {
4738
/* Open at the high/right end (FALSE), and INIT
4740
btr_pcur_open_at_index_side(
4741
FALSE, index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
4743
if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
4746
rec = row_search_autoinc_get_rec(&pcur, &mtr);
4749
ibool unsigned_type = (
4750
dfield->col->prtype & DATA_UNSIGNED);
4752
*value = row_search_autoinc_read_column(
4753
index, rec, i, unsigned_type);
4757
btr_pcur_close(&pcur);
4761
error = DB_RECORD_NOT_FOUND;