1
/*****************************************************************************
3
Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
4
Copyright (c) 2008, Google Inc.
6
Portions of this file contain modifications contributed and copyrighted by
7
Google, Inc. Those modifications are gratefully acknowledged and are described
8
briefly in the InnoDB documentation. The contributions by Google are
9
incorporated with their permission, and subject to the conditions contained in
10
the file COPYING.Google.
12
This program is free software; you can redistribute it and/or modify it under
13
the terms of the GNU General Public License as published by the Free Software
14
Foundation; version 2 of the License.
16
This program is distributed in the hope that it will be useful, but WITHOUT
17
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
You should have received a copy of the GNU General Public License along with
21
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22
St, Fifth Floor, Boston, MA 02110-1301 USA
24
*****************************************************************************/
26
/***************************************************//**
30
Created 12/19/1997 Heikki Tuuri
31
*******************************************************/
39
#include "dict0dict.h"
40
#include "dict0boot.h"
46
#include "mach0data.h"
52
#include "lock0lock.h"
53
#include "eval0eval.h"
55
#include "pars0pars.h"
56
#include "row0mysql.h"
57
#include "read0read.h"
59
#include "ha_prototypes.h"
61
/* Maximum number of rows to prefetch; MySQL interface has another parameter */
62
#define SEL_MAX_N_PREFETCH 16
64
/* Number of rows fetched, after which to start prefetching; MySQL interface
65
has another parameter */
66
#define SEL_PREFETCH_LIMIT 1
68
/* When a select has accessed about this many pages, it returns control back
69
to que_run_threads: this is to allow canceling runaway queries */
71
#define SEL_COST_LIMIT 100
73
/* Flags for search shortcut */
75
#define SEL_EXHAUSTED 1
78
/********************************************************************//**
79
Returns TRUE if the user-defined column in a secondary index record
80
is alphabetically the same as the corresponding BLOB column in the clustered
82
NOTE: the comparison is NOT done as a binary comparison, but character
83
fields are compared with collation!
84
@return TRUE if the columns are equal */
87
row_sel_sec_rec_is_for_blob(
88
/*========================*/
89
ulint mtype, /*!< in: main type */
90
ulint prtype, /*!< in: precise type */
91
ulint mbminlen, /*!< in: minimum length of a
92
multi-byte character */
93
ulint mbmaxlen, /*!< in: maximum length of a
94
multi-byte character */
95
const byte* clust_field, /*!< in: the locally stored part of
96
the clustered index column, including
97
the BLOB pointer; the clustered
98
index record must be covered by
99
a lock or a page latch to protect it
100
against deletion (rollback or purge) */
101
ulint clust_len, /*!< in: length of clust_field */
102
const byte* sec_field, /*!< in: column in secondary index */
103
ulint sec_len, /*!< in: length of sec_field */
104
ulint zip_size) /*!< in: compressed page size, or 0 */
107
byte buf[DICT_MAX_INDEX_COL_LEN];
109
len = btr_copy_externally_stored_field_prefix(buf, sizeof buf,
111
clust_field, clust_len);
113
if (UNIV_UNLIKELY(len == 0)) {
114
/* The BLOB was being deleted as the server crashed.
115
There should not be any secondary index records
116
referring to this clustered index record, because
117
btr_free_externally_stored_field() is called after all
118
secondary index entries of the row have been purged. */
122
len = dtype_get_at_most_n_mbchars(prtype, mbminlen, mbmaxlen,
123
sec_len, len, (const char*) buf);
125
return(!cmp_data_data(mtype, prtype, buf, len, sec_field, sec_len));
128
/********************************************************************//**
129
Returns TRUE if the user-defined column values in a secondary index record
130
are alphabetically the same as the corresponding columns in the clustered
132
NOTE: the comparison is NOT done as a binary comparison, but character
133
fields are compared with collation!
134
@return TRUE if the secondary record is equal to the corresponding
135
fields in the clustered record, when compared with collation;
136
FALSE if not equal or if the clustered record has been marked for deletion */
139
row_sel_sec_rec_is_for_clust_rec(
140
/*=============================*/
141
const rec_t* sec_rec, /*!< in: secondary index record */
142
dict_index_t* sec_index, /*!< in: secondary index */
143
const rec_t* clust_rec, /*!< in: clustered index record;
144
must be protected by a lock or
145
a page latch against deletion
146
in rollback or purge */
147
dict_index_t* clust_index) /*!< in: clustered index */
149
const byte* sec_field;
151
const byte* clust_field;
154
mem_heap_t* heap = NULL;
155
ulint clust_offsets_[REC_OFFS_NORMAL_SIZE];
156
ulint sec_offsets_[REC_OFFS_SMALL_SIZE];
157
ulint* clust_offs = clust_offsets_;
158
ulint* sec_offs = sec_offsets_;
159
ibool is_equal = TRUE;
161
rec_offs_init(clust_offsets_);
162
rec_offs_init(sec_offsets_);
164
if (rec_get_deleted_flag(clust_rec,
165
dict_table_is_comp(clust_index->table))) {
167
/* The clustered index record is delete-marked;
168
it is not visible in the read view. Besides,
169
if there are any externally stored columns,
170
some of them may have already been purged. */
174
clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs,
175
ULINT_UNDEFINED, &heap);
176
sec_offs = rec_get_offsets(sec_rec, sec_index, sec_offs,
177
ULINT_UNDEFINED, &heap);
179
n = dict_index_get_n_ordering_defined_by_user(sec_index);
181
for (i = 0; i < n; i++) {
182
const dict_field_t* ifield;
183
const dict_col_t* col;
188
ifield = dict_index_get_nth_field(sec_index, i);
189
col = dict_field_get_col(ifield);
190
clust_pos = dict_col_get_clust_pos(col, clust_index);
192
clust_field = rec_get_nth_field(
193
clust_rec, clust_offs, clust_pos, &clust_len);
194
sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len);
198
if (ifield->prefix_len > 0 && len != UNIV_SQL_NULL) {
200
if (rec_offs_nth_extern(clust_offs, clust_pos)) {
201
len -= BTR_EXTERN_FIELD_REF_SIZE;
204
len = dtype_get_at_most_n_mbchars(
205
col->prtype, col->mbminlen, col->mbmaxlen,
206
ifield->prefix_len, len, (char*) clust_field);
208
if (rec_offs_nth_extern(clust_offs, clust_pos)
210
if (!row_sel_sec_rec_is_for_blob(
211
col->mtype, col->prtype,
212
col->mbminlen, col->mbmaxlen,
213
clust_field, clust_len,
216
clust_index->table))) {
224
if (0 != cmp_data_data(col->mtype, col->prtype,
226
sec_field, sec_len)) {
234
if (UNIV_LIKELY_NULL(heap)) {
240
/*********************************************************************//**
241
Creates a select node struct.
242
@return own: select node struct */
247
mem_heap_t* heap) /*!< in: memory heap where created */
251
node = mem_heap_alloc(heap, sizeof(sel_node_t));
252
node->common.type = QUE_NODE_SELECT;
253
node->state = SEL_NODE_OPEN;
260
/*********************************************************************//**
261
Frees the memory private to a select node when a query graph is freed,
262
does not free the heap where the node was originally created. */
265
sel_node_free_private(
266
/*==================*/
267
sel_node_t* node) /*!< in: select node struct */
272
if (node->plans != NULL) {
273
for (i = 0; i < node->n_tables; i++) {
274
plan = sel_node_get_nth_plan(node, i);
276
btr_pcur_close(&(plan->pcur));
277
btr_pcur_close(&(plan->clust_pcur));
279
if (plan->old_vers_heap) {
280
mem_heap_free(plan->old_vers_heap);
286
/*********************************************************************//**
287
Evaluates the values in a select list. If there are aggregate functions,
288
their argument value is added to the aggregate total. */
291
sel_eval_select_list(
292
/*=================*/
293
sel_node_t* node) /*!< in: select node */
297
exp = node->select_list;
302
exp = que_node_get_next(exp);
306
/*********************************************************************//**
307
Assigns the values in the select list to the possible into-variables in
308
SELECT ... INTO ... */
311
sel_assign_into_var_values(
312
/*=======================*/
313
sym_node_t* var, /*!< in: first variable in a list of variables */
314
sel_node_t* node) /*!< in: select node */
323
exp = node->select_list;
328
eval_node_copy_val(var->alias, exp);
330
exp = que_node_get_next(exp);
331
var = que_node_get_next(var);
335
/*********************************************************************//**
336
Resets the aggregate value totals in the select list of an aggregate type
340
sel_reset_aggregate_vals(
341
/*=====================*/
342
sel_node_t* node) /*!< in: select node */
344
func_node_t* func_node;
346
ut_ad(node->is_aggregate);
348
func_node = node->select_list;
351
eval_node_set_int_val(func_node, 0);
353
func_node = que_node_get_next(func_node);
356
node->aggregate_already_fetched = FALSE;
359
/*********************************************************************//**
360
Copies the input variable values when an explicit cursor is opened. */
363
row_sel_copy_input_variable_vals(
364
/*=============================*/
365
sel_node_t* node) /*!< in: select node */
369
var = UT_LIST_GET_FIRST(node->copy_variables);
372
eval_node_copy_val(var, var->alias);
374
var->indirection = NULL;
376
var = UT_LIST_GET_NEXT(col_var_list, var);
380
/*********************************************************************//**
381
Fetches the column values from a record. */
384
row_sel_fetch_columns(
385
/*==================*/
386
dict_index_t* index, /*!< in: record index */
387
const rec_t* rec, /*!< in: record in a clustered or non-clustered
388
index; must be protected by a page latch */
389
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
390
sym_node_t* column) /*!< in: first column in a column list, or
399
ut_ad(rec_offs_validate(rec, index, offsets));
401
if (dict_index_is_clust(index)) {
402
index_type = SYM_CLUST_FIELD_NO;
404
index_type = SYM_SEC_FIELD_NO;
408
mem_heap_t* heap = NULL;
411
field_no = column->field_nos[index_type];
413
if (field_no != ULINT_UNDEFINED) {
415
if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
418
/* Copy an externally stored field to the
421
heap = mem_heap_create(1);
423
data = btr_rec_copy_externally_stored_field(
425
dict_table_zip_size(index->table),
426
field_no, &len, heap);
428
ut_a(len != UNIV_SQL_NULL);
432
data = rec_get_nth_field(rec, offsets,
435
needs_copy = column->copy_val;
439
eval_node_copy_and_alloc_val(column, data,
442
val = que_node_get_val(column);
443
dfield_set_data(val, data, len);
446
if (UNIV_LIKELY_NULL(heap)) {
451
column = UT_LIST_GET_NEXT(col_var_list, column);
455
/*********************************************************************//**
456
Allocates a prefetch buffer for a column when prefetch is first time done. */
459
sel_col_prefetch_buf_alloc(
460
/*=======================*/
461
sym_node_t* column) /*!< in: symbol table node for a column */
466
ut_ad(que_node_get_type(column) == QUE_NODE_SYMBOL);
468
column->prefetch_buf = mem_alloc(SEL_MAX_N_PREFETCH
469
* sizeof(sel_buf_t));
470
for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
471
sel_buf = column->prefetch_buf + i;
473
sel_buf->data = NULL;
475
sel_buf->val_buf_size = 0;
479
/*********************************************************************//**
480
Frees a prefetch buffer for a column, including the dynamically allocated
481
memory for data stored there. */
484
sel_col_prefetch_buf_free(
485
/*======================*/
486
sel_buf_t* prefetch_buf) /*!< in, own: prefetch buffer */
491
for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
492
sel_buf = prefetch_buf + i;
494
if (sel_buf->val_buf_size > 0) {
496
mem_free(sel_buf->data);
501
/*********************************************************************//**
502
Pops the column values for a prefetched, cached row from the column prefetch
503
buffers and places them to the val fields in the column nodes. */
506
sel_pop_prefetched_row(
507
/*===================*/
508
plan_t* plan) /*!< in: plan node for a table */
517
ut_ad(plan->n_rows_prefetched > 0);
519
column = UT_LIST_GET_FIRST(plan->columns);
522
val = que_node_get_val(column);
524
if (!column->copy_val) {
525
/* We did not really push any value for the
528
ut_ad(!column->prefetch_buf);
529
ut_ad(que_node_get_val_buf_size(column) == 0);
530
ut_d(dfield_set_null(val));
535
ut_ad(column->prefetch_buf);
536
ut_ad(!dfield_is_ext(val));
538
sel_buf = column->prefetch_buf + plan->first_prefetched;
540
data = sel_buf->data;
542
val_buf_size = sel_buf->val_buf_size;
544
/* We must keep track of the allocated memory for
545
column values to be able to free it later: therefore
546
we swap the values for sel_buf and val */
548
sel_buf->data = dfield_get_data(val);
549
sel_buf->len = dfield_get_len(val);
550
sel_buf->val_buf_size = que_node_get_val_buf_size(column);
552
dfield_set_data(val, data, len);
553
que_node_set_val_buf_size(column, val_buf_size);
555
column = UT_LIST_GET_NEXT(col_var_list, column);
558
plan->n_rows_prefetched--;
560
plan->first_prefetched++;
563
/*********************************************************************//**
564
Pushes the column values for a prefetched, cached row to the column prefetch
565
buffers from the val fields in the column nodes. */
568
sel_push_prefetched_row(
569
/*====================*/
570
plan_t* plan) /*!< in: plan node for a table */
580
if (plan->n_rows_prefetched == 0) {
582
plan->first_prefetched = 0;
584
pos = plan->n_rows_prefetched;
586
/* We have the convention that pushing new rows starts only
587
after the prefetch stack has been emptied: */
589
ut_ad(plan->first_prefetched == 0);
592
plan->n_rows_prefetched++;
594
ut_ad(pos < SEL_MAX_N_PREFETCH);
596
column = UT_LIST_GET_FIRST(plan->columns);
599
if (!column->copy_val) {
600
/* There is no sense to push pointers to database
601
page fields when we do not keep latch on the page! */
606
if (!column->prefetch_buf) {
607
/* Allocate a new prefetch buffer */
609
sel_col_prefetch_buf_alloc(column);
612
sel_buf = column->prefetch_buf + pos;
614
val = que_node_get_val(column);
616
data = dfield_get_data(val);
617
len = dfield_get_len(val);
618
val_buf_size = que_node_get_val_buf_size(column);
620
/* We must keep track of the allocated memory for
621
column values to be able to free it later: therefore
622
we swap the values for sel_buf and val */
624
dfield_set_data(val, sel_buf->data, sel_buf->len);
625
que_node_set_val_buf_size(column, sel_buf->val_buf_size);
627
sel_buf->data = data;
629
sel_buf->val_buf_size = val_buf_size;
631
column = UT_LIST_GET_NEXT(col_var_list, column);
635
/*********************************************************************//**
636
Builds a previous version of a clustered index record for a consistent read
637
@return DB_SUCCESS or error code */
640
row_sel_build_prev_vers(
641
/*====================*/
642
read_view_t* read_view, /*!< in: read view */
643
dict_index_t* index, /*!< in: plan node for table */
644
rec_t* rec, /*!< in: record in a clustered index */
645
ulint** offsets, /*!< in/out: offsets returned by
646
rec_get_offsets(rec, plan->index) */
647
mem_heap_t** offset_heap, /*!< in/out: memory heap from which
648
the offsets are allocated */
649
mem_heap_t** old_vers_heap, /*!< out: old version heap to use */
650
rec_t** old_vers, /*!< out: old version, or NULL if the
651
record does not exist in the view:
652
i.e., it was freshly inserted
654
mtr_t* mtr) /*!< in: mtr */
658
if (*old_vers_heap) {
659
mem_heap_empty(*old_vers_heap);
661
*old_vers_heap = mem_heap_create(512);
664
err = row_vers_build_for_consistent_read(
665
rec, mtr, index, offsets, read_view, offset_heap,
666
*old_vers_heap, old_vers);
670
/*********************************************************************//**
671
Builds the last committed version of a clustered index record for a
672
semi-consistent read.
673
@return DB_SUCCESS or error code */
676
row_sel_build_committed_vers_for_mysql(
677
/*===================================*/
678
dict_index_t* clust_index, /*!< in: clustered index */
679
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
680
const rec_t* rec, /*!< in: record in a clustered index */
681
ulint** offsets, /*!< in/out: offsets returned by
682
rec_get_offsets(rec, clust_index) */
683
mem_heap_t** offset_heap, /*!< in/out: memory heap from which
684
the offsets are allocated */
685
const rec_t** old_vers, /*!< out: old version, or NULL if the
686
record does not exist in the view:
687
i.e., it was freshly inserted
689
mtr_t* mtr) /*!< in: mtr */
693
if (prebuilt->old_vers_heap) {
694
mem_heap_empty(prebuilt->old_vers_heap);
696
prebuilt->old_vers_heap = mem_heap_create(200);
699
err = row_vers_build_for_semi_consistent_read(
700
rec, mtr, clust_index, offsets, offset_heap,
701
prebuilt->old_vers_heap, old_vers);
705
/*********************************************************************//**
706
Tests the conditions which determine when the index segment we are searching
707
through has been exhausted.
708
@return TRUE if row passed the tests */
711
row_sel_test_end_conds(
712
/*===================*/
713
plan_t* plan) /*!< in: plan for the table; the column values must
714
already have been retrieved and the right sides of
715
comparisons evaluated */
719
/* All conditions in end_conds are comparisons of a column to an
722
cond = UT_LIST_GET_FIRST(plan->end_conds);
725
/* Evaluate the left side of the comparison, i.e., get the
726
column value if there is an indirection */
728
eval_sym(cond->args);
730
/* Do the comparison */
732
if (!eval_cmp(cond)) {
737
cond = UT_LIST_GET_NEXT(cond_list, cond);
743
/*********************************************************************//**
744
Tests the other conditions.
745
@return TRUE if row passed the tests */
748
row_sel_test_other_conds(
749
/*=====================*/
750
plan_t* plan) /*!< in: plan for the table; the column values must
751
already have been retrieved */
755
cond = UT_LIST_GET_FIRST(plan->other_conds);
760
if (!eval_node_get_ibool_val(cond)) {
765
cond = UT_LIST_GET_NEXT(cond_list, cond);
771
/*********************************************************************//**
772
Retrieves the clustered index record corresponding to a record in a
773
non-clustered index. Does the necessary locking.
774
@return DB_SUCCESS or error code */
777
row_sel_get_clust_rec(
778
/*==================*/
779
sel_node_t* node, /*!< in: select_node */
780
plan_t* plan, /*!< in: plan node for table */
781
rec_t* rec, /*!< in: record in a non-clustered index */
782
que_thr_t* thr, /*!< in: query thread */
783
rec_t** out_rec,/*!< out: clustered record or an old version of
784
it, NULL if the old version did not exist
785
in the read view, i.e., it was a fresh
787
mtr_t* mtr) /*!< in: mtr used to get access to the
788
non-clustered record; the same mtr is used to
789
access the clustered index */
795
mem_heap_t* heap = NULL;
796
ulint offsets_[REC_OFFS_NORMAL_SIZE];
797
ulint* offsets = offsets_;
798
rec_offs_init(offsets_);
802
offsets = rec_get_offsets(rec,
803
btr_pcur_get_btr_cur(&plan->pcur)->index,
804
offsets, ULINT_UNDEFINED, &heap);
806
row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec, offsets);
808
index = dict_table_get_first_index(plan->table);
810
btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE,
811
BTR_SEARCH_LEAF, &plan->clust_pcur,
814
clust_rec = btr_pcur_get_rec(&(plan->clust_pcur));
816
/* Note: only if the search ends up on a non-infimum record is the
817
low_match value the real match to the search tuple */
819
if (!page_rec_is_user_rec(clust_rec)
820
|| btr_pcur_get_low_match(&(plan->clust_pcur))
821
< dict_index_get_n_unique(index)) {
823
ut_a(rec_get_deleted_flag(rec,
824
dict_table_is_comp(plan->table)));
825
ut_a(node->read_view);
827
/* In a rare case it is possible that no clust rec is found
828
for a delete-marked secondary index record: if in row0umod.c
829
in row_undo_mod_remove_clust_low() we have already removed
830
the clust rec, while purge is still cleaning and removing
831
secondary index records associated with earlier versions of
832
the clustered index record. In that case we know that the
833
clustered index record did not exist in the read view of
839
offsets = rec_get_offsets(clust_rec, index, offsets,
840
ULINT_UNDEFINED, &heap);
842
if (!node->read_view) {
843
/* Try to place a lock on the index record */
845
/* If innodb_locks_unsafe_for_binlog option is used
846
or this session is using READ COMMITTED isolation level
847
we lock only the record, i.e., next-key locking is
852
trx = thr_get_trx(thr);
854
if (srv_locks_unsafe_for_binlog
855
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
856
lock_type = LOCK_REC_NOT_GAP;
858
lock_type = LOCK_ORDINARY;
861
err = lock_clust_rec_read_check_and_lock(
862
0, btr_pcur_get_block(&plan->clust_pcur),
863
clust_rec, index, offsets,
864
node->row_lock_mode, lock_type, thr);
866
if (err != DB_SUCCESS) {
871
/* This is a non-locking consistent read: if necessary, fetch
872
a previous version of the record */
876
if (!lock_clust_rec_cons_read_sees(clust_rec, index, offsets,
879
err = row_sel_build_prev_vers(
880
node->read_view, index, clust_rec,
881
&offsets, &heap, &plan->old_vers_heap,
884
if (err != DB_SUCCESS) {
889
clust_rec = old_vers;
891
if (clust_rec == NULL) {
896
/* If we had to go to an earlier version of row or the
897
secondary index record is delete marked, then it may be that
898
the secondary index record corresponding to clust_rec
899
(or old_vers) is not rec; in that case we must ignore
900
such row because in our snapshot rec would not have existed.
901
Remember that from rec we cannot see directly which transaction
902
id corresponds to it: we have to go to the clustered index
903
record. A query where we want to fetch all rows where
904
the secondary index value is in some interval would return
905
a wrong result if we would not drop rows which we come to
906
visit through secondary index records that would not really
907
exist in our snapshot. */
910
|| rec_get_deleted_flag(rec, dict_table_is_comp(
912
&& !row_sel_sec_rec_is_for_clust_rec(rec, plan->index,
918
/* Fetch the columns needed in test conditions. The clustered
919
index record is protected by a page latch that was acquired
920
when plan->clust_pcur was positioned. The latch will not be
921
released until mtr_commit(mtr). */
923
row_sel_fetch_columns(index, clust_rec, offsets,
924
UT_LIST_GET_FIRST(plan->columns));
925
*out_rec = clust_rec;
929
if (UNIV_LIKELY_NULL(heap)) {
935
/*********************************************************************//**
936
Sets a lock on a record.
937
@return DB_SUCCESS or error code */
942
const buf_block_t* block, /*!< in: buffer block of rec */
943
const rec_t* rec, /*!< in: record */
944
dict_index_t* index, /*!< in: index */
945
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
946
ulint mode, /*!< in: lock mode */
947
ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
949
que_thr_t* thr) /*!< in: query thread */
954
trx = thr_get_trx(thr);
956
if (UT_LIST_GET_LEN(trx->trx_locks) > 10000) {
957
if (buf_LRU_buf_pool_running_out()) {
959
return(DB_LOCK_TABLE_FULL);
963
if (dict_index_is_clust(index)) {
964
err = lock_clust_rec_read_check_and_lock(
965
0, block, rec, index, offsets, mode, type, thr);
967
err = lock_sec_rec_read_check_and_lock(
968
0, block, rec, index, offsets, mode, type, thr);
974
/*********************************************************************//**
975
Opens a pcur to a table index. */
980
plan_t* plan, /*!< in: table plan */
981
ibool search_latch_locked,
982
/*!< in: TRUE if the thread currently
983
has the search latch locked in
985
mtr_t* mtr) /*!< in: mtr */
991
ulint has_search_latch = 0; /* RW_S_LATCH or 0 */
994
if (search_latch_locked) {
995
has_search_latch = RW_S_LATCH;
1000
/* Calculate the value of the search tuple: the exact match columns
1001
get their expressions evaluated when we evaluate the right sides of
1004
cond = UT_LIST_GET_FIRST(plan->end_conds);
1007
eval_exp(que_node_get_next(cond->args));
1009
cond = UT_LIST_GET_NEXT(cond_list, cond);
1013
n_fields = dtuple_get_n_fields(plan->tuple);
1015
if (plan->n_exact_match < n_fields) {
1016
/* There is a non-exact match field which must be
1017
evaluated separately */
1019
eval_exp(plan->tuple_exps[n_fields - 1]);
1022
for (i = 0; i < n_fields; i++) {
1023
exp = plan->tuple_exps[i];
1025
dfield_copy_data(dtuple_get_nth_field(plan->tuple, i),
1026
que_node_get_val(exp));
1029
/* Open pcur to the index */
1031
btr_pcur_open_with_no_init(index, plan->tuple, plan->mode,
1032
BTR_SEARCH_LEAF, &plan->pcur,
1033
has_search_latch, mtr);
1035
/* Open the cursor to the start or the end of the index
1038
btr_pcur_open_at_index_side(plan->asc, index, BTR_SEARCH_LEAF,
1039
&(plan->pcur), FALSE, mtr);
1042
ut_ad(plan->n_rows_prefetched == 0);
1043
ut_ad(plan->n_rows_fetched == 0);
1044
ut_ad(plan->cursor_at_end == FALSE);
1046
plan->pcur_is_open = TRUE;
1049
/*********************************************************************//**
1050
Restores a stored pcur position to a table index.
1051
@return TRUE if the cursor should be moved to the next record after we
1052
return from this function (moved to the previous, in the case of a
1053
descending cursor) without processing again the current cursor
1057
row_sel_restore_pcur_pos(
1058
/*=====================*/
1059
plan_t* plan, /*!< in: table plan */
1060
mtr_t* mtr) /*!< in: mtr */
1062
ibool equal_position;
1063
ulint relative_position;
1065
ut_ad(!plan->cursor_at_end);
1067
relative_position = btr_pcur_get_rel_pos(&(plan->pcur));
1069
equal_position = btr_pcur_restore_position(BTR_SEARCH_LEAF,
1070
&(plan->pcur), mtr);
1072
/* If the cursor is traveling upwards, and relative_position is
1074
(1) BTR_PCUR_BEFORE: this is not allowed, as we did not have a lock
1075
yet on the successor of the page infimum;
1076
(2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
1077
first record GREATER than the predecessor of a page supremum; we have
1078
not yet processed the cursor record: no need to move the cursor to the
1080
(3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
1081
last record LESS or EQUAL to the old stored user record; (a) if
1082
equal_position is FALSE, this means that the cursor is now on a record
1083
less than the old user record, and we must move to the next record;
1084
(b) if equal_position is TRUE, then if
1085
plan->stored_cursor_rec_processed is TRUE, we must move to the next
1086
record, else there is no need to move the cursor. */
1089
if (relative_position == BTR_PCUR_ON) {
1091
if (equal_position) {
1093
return(plan->stored_cursor_rec_processed);
1099
ut_ad(relative_position == BTR_PCUR_AFTER
1100
|| relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
1105
/* If the cursor is traveling downwards, and relative_position is
1107
(1) BTR_PCUR_BEFORE: btr_pcur_restore_position placed the cursor on
1108
the last record LESS than the successor of a page infimum; we have not
1109
processed the cursor record: no need to move the cursor;
1110
(2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
1111
first record GREATER than the predecessor of a page supremum; we have
1112
processed the cursor record: we should move the cursor to the previous
1114
(3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
1115
last record LESS or EQUAL to the old stored user record; (a) if
1116
equal_position is FALSE, this means that the cursor is now on a record
1117
less than the old user record, and we need not move to the previous
1118
record; (b) if equal_position is TRUE, then if
1119
plan->stored_cursor_rec_processed is TRUE, we must move to the previous
1120
record, else there is no need to move the cursor. */
1122
if (relative_position == BTR_PCUR_BEFORE
1123
|| relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
1128
if (relative_position == BTR_PCUR_ON) {
1130
if (equal_position) {
1132
return(plan->stored_cursor_rec_processed);
1138
ut_ad(relative_position == BTR_PCUR_AFTER
1139
|| relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
1144
/*********************************************************************//**
1145
Resets a plan cursor to a closed state. */
1150
plan_t* plan) /*!< in: plan */
1152
plan->pcur_is_open = FALSE;
1153
plan->cursor_at_end = FALSE;
1154
plan->n_rows_fetched = 0;
1155
plan->n_rows_prefetched = 0;
1158
/*********************************************************************//**
1159
Tries to do a shortcut to fetch a clustered index record with a unique key,
1160
using the hash index if possible (not always).
1161
@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
1164
row_sel_try_search_shortcut(
1165
/*========================*/
1166
sel_node_t* node, /*!< in: select node for a consistent read */
1167
plan_t* plan, /*!< in: plan for a unique search in clustered
1169
mtr_t* mtr) /*!< in: mtr */
1171
dict_index_t* index;
1173
mem_heap_t* heap = NULL;
1174
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1175
ulint* offsets = offsets_;
1177
rec_offs_init(offsets_);
1179
index = plan->index;
1181
ut_ad(node->read_view);
1182
ut_ad(plan->unique_search);
1183
ut_ad(!plan->must_get_clust);
1184
#ifdef UNIV_SYNC_DEBUG
1185
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
1186
#endif /* UNIV_SYNC_DEBUG */
1188
row_sel_open_pcur(plan, TRUE, mtr);
1190
rec = btr_pcur_get_rec(&(plan->pcur));
1192
if (!page_rec_is_user_rec(rec)) {
1197
ut_ad(plan->mode == PAGE_CUR_GE);
1199
/* As the cursor is now placed on a user record after a search with
1200
the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
1201
fields in the user record matched to the search tuple */
1203
if (btr_pcur_get_up_match(&(plan->pcur)) < plan->n_exact_match) {
1205
return(SEL_EXHAUSTED);
1208
/* This is a non-locking consistent read: if necessary, fetch
1209
a previous version of the record */
1211
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
1213
if (dict_index_is_clust(index)) {
1214
if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
1219
} else if (!lock_sec_rec_cons_read_sees(rec, node->read_view)) {
1225
/* Test the deleted flag. */
1227
if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))) {
1229
ret = SEL_EXHAUSTED;
1233
/* Fetch the columns needed in test conditions. The index
1234
record is protected by a page latch that was acquired when
1235
plan->pcur was positioned. The latch will not be released
1236
until mtr_commit(mtr). */
1238
row_sel_fetch_columns(index, rec, offsets,
1239
UT_LIST_GET_FIRST(plan->columns));
1241
/* Test the rest of search conditions */
1243
if (!row_sel_test_other_conds(plan)) {
1245
ret = SEL_EXHAUSTED;
1249
ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
1251
plan->n_rows_fetched++;
1254
if (UNIV_LIKELY_NULL(heap)) {
1255
mem_heap_free(heap);
1260
/*********************************************************************//**
1261
Performs a select step.
1262
@return DB_SUCCESS or error code */
1267
sel_node_t* node, /*!< in: select node */
1268
que_thr_t* thr) /*!< in: query thread */
1270
dict_index_t* index;
1277
ibool search_latch_locked;
1278
ibool consistent_read;
1280
/* The following flag becomes TRUE when we are doing a
1281
consistent read from a non-clustered index and we must look
1282
at the clustered index to find out the previous delete mark
1283
state of the non-clustered record: */
1285
ibool cons_read_requires_clust_rec = FALSE;
1286
ulint cost_counter = 0;
1287
ibool cursor_just_opened;
1288
ibool must_go_to_next;
1289
ibool mtr_has_extra_clust_latch = FALSE;
1290
/* TRUE if the search was made using
1291
a non-clustered index, and we had to
1292
access the clustered record: now &mtr
1293
contains a clustered index latch, and
1294
&mtr must be committed before we move
1295
to the next non-clustered record */
1298
mem_heap_t* heap = NULL;
1299
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1300
ulint* offsets = offsets_;
1301
rec_offs_init(offsets_);
1303
ut_ad(thr->run_node == node);
1305
search_latch_locked = FALSE;
1307
if (node->read_view) {
1308
/* In consistent reads, we try to do with the hash index and
1309
not to use the buffer page get. This is to reduce memory bus
1310
load resulting from semaphore operations. The search latch
1311
will be s-locked when we access an index with a unique search
1312
condition, but not locked when we access an index with a
1313
less selective search condition. */
1315
consistent_read = TRUE;
1317
consistent_read = FALSE;
1323
This is the outer major loop in calculating a join. We come here when
1324
node->fetch_table changes, and after adding a row to aggregate totals
1325
and, of course, when this function is called. */
1327
ut_ad(mtr_has_extra_clust_latch == FALSE);
1329
plan = sel_node_get_nth_plan(node, node->fetch_table);
1330
index = plan->index;
1332
if (plan->n_rows_prefetched > 0) {
1333
sel_pop_prefetched_row(plan);
1335
goto next_table_no_mtr;
1338
if (plan->cursor_at_end) {
1339
/* The cursor has already reached the result set end: no more
1340
rows to process for this table cursor, as also the prefetch
1343
ut_ad(plan->pcur_is_open);
1345
goto table_exhausted_no_mtr;
1348
/* Open a cursor to index, or restore an open cursor position */
1352
if (consistent_read && plan->unique_search && !plan->pcur_is_open
1353
&& !plan->must_get_clust
1354
&& !plan->table->big_rows) {
1355
if (!search_latch_locked) {
1356
rw_lock_s_lock(&btr_search_latch);
1358
search_latch_locked = TRUE;
1359
} else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) {
1361
/* There is an x-latch request waiting: release the
1362
s-latch for a moment; as an s-latch here is often
1363
kept for some 10 searches before being released,
1364
a waiting x-latch request would block other threads
1365
from acquiring an s-latch for a long time, lowering
1366
performance significantly in multiprocessors. */
1368
rw_lock_s_unlock(&btr_search_latch);
1369
rw_lock_s_lock(&btr_search_latch);
1372
found_flag = row_sel_try_search_shortcut(node, plan, &mtr);
1374
if (found_flag == SEL_FOUND) {
1378
} else if (found_flag == SEL_EXHAUSTED) {
1380
goto table_exhausted;
1383
ut_ad(found_flag == SEL_RETRY);
1385
plan_reset_cursor(plan);
1391
if (search_latch_locked) {
1392
rw_lock_s_unlock(&btr_search_latch);
1394
search_latch_locked = FALSE;
1397
if (!plan->pcur_is_open) {
1398
/* Evaluate the expressions to build the search tuple and
1401
row_sel_open_pcur(plan, search_latch_locked, &mtr);
1403
cursor_just_opened = TRUE;
1405
/* A new search was made: increment the cost counter */
1408
/* Restore pcur position to the index */
1410
must_go_to_next = row_sel_restore_pcur_pos(plan, &mtr);
1412
cursor_just_opened = FALSE;
1414
if (must_go_to_next) {
1415
/* We have already processed the cursor record: move
1425
In this loop we use pcur and try to fetch a qualifying row, and
1426
also fill the prefetch buffer for this table if n_rows_fetched has
1427
exceeded a threshold. While we are inside this loop, the following
1429
(1) &mtr is started,
1430
(2) pcur is positioned and open.
1432
NOTE that if cursor_just_opened is TRUE here, it means that we came
1433
to this point right after row_sel_open_pcur. */
1435
ut_ad(mtr_has_extra_clust_latch == FALSE);
1437
rec = btr_pcur_get_rec(&(plan->pcur));
1439
/* PHASE 1: Set a lock if specified */
1441
if (!node->asc && cursor_just_opened
1442
&& !page_rec_is_supremum(rec)) {
1444
/* When we open a cursor for a descending search, we must set
1445
a next-key lock on the successor record: otherwise it would
1446
be possible to insert new records next to the cursor position,
1447
and it might be that these new records should appear in the
1448
search result set, resulting in the phantom problem. */
1450
if (!consistent_read) {
1452
/* If innodb_locks_unsafe_for_binlog option is used
1453
or this session is using READ COMMITTED isolation
1454
level, we lock only the record, i.e., next-key
1455
locking is not used. */
1457
rec_t* next_rec = page_rec_get_next(rec);
1461
trx = thr_get_trx(thr);
1463
offsets = rec_get_offsets(next_rec, index, offsets,
1464
ULINT_UNDEFINED, &heap);
1466
if (srv_locks_unsafe_for_binlog
1467
|| trx->isolation_level
1468
<= TRX_ISO_READ_COMMITTED) {
1470
if (page_rec_is_supremum(next_rec)) {
1475
lock_type = LOCK_REC_NOT_GAP;
1477
lock_type = LOCK_ORDINARY;
1480
err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
1481
next_rec, index, offsets,
1482
node->row_lock_mode,
1485
if (err != DB_SUCCESS) {
1486
/* Note that in this case we will store in pcur
1487
the PREDECESSOR of the record we are waiting
1490
goto lock_wait_or_error;
1496
if (page_rec_is_infimum(rec)) {
1498
/* The infimum record on a page cannot be in the result set,
1499
and neither can a record lock be placed on it: we skip such
1500
a record. We also increment the cost counter as we may have
1501
processed yet another page of index. */
1508
if (!consistent_read) {
1509
/* Try to place a lock on the index record */
1511
/* If innodb_locks_unsafe_for_binlog option is used
1512
or this session is using READ COMMITTED isolation level,
1513
we lock only the record, i.e., next-key locking is
1519
offsets = rec_get_offsets(rec, index, offsets,
1520
ULINT_UNDEFINED, &heap);
1522
trx = thr_get_trx(thr);
1524
if (srv_locks_unsafe_for_binlog
1525
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
1527
if (page_rec_is_supremum(rec)) {
1532
lock_type = LOCK_REC_NOT_GAP;
1534
lock_type = LOCK_ORDINARY;
1537
err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
1538
rec, index, offsets,
1539
node->row_lock_mode, lock_type, thr);
1541
if (err != DB_SUCCESS) {
1543
goto lock_wait_or_error;
1547
if (page_rec_is_supremum(rec)) {
1549
/* A page supremum record cannot be in the result set: skip
1550
it now when we have placed a possible lock on it */
1555
ut_ad(page_rec_is_user_rec(rec));
1557
if (cost_counter > SEL_COST_LIMIT) {
1559
/* Now that we have placed the necessary locks, we can stop
1560
for a while and store the cursor position; NOTE that if we
1561
would store the cursor position BEFORE placing a record lock,
1562
it might happen that the cursor would jump over some records
1563
that another transaction could meanwhile insert adjacent to
1564
the cursor: this would result in the phantom problem. */
1566
goto stop_for_a_while;
1569
/* PHASE 2: Check a mixed index mix id if needed */
1571
if (plan->unique_search && cursor_just_opened) {
1573
ut_ad(plan->mode == PAGE_CUR_GE);
1575
/* As the cursor is now placed on a user record after a search
1576
with the mode PAGE_CUR_GE, the up_match field in the cursor
1577
tells how many fields in the user record matched to the search
1580
if (btr_pcur_get_up_match(&(plan->pcur))
1581
< plan->n_exact_match) {
1582
goto table_exhausted;
1585
/* Ok, no need to test end_conds or mix id */
1589
/* We are ready to look at a possible new index entry in the result
1590
set: the cursor is now placed on a user record */
1592
/* PHASE 3: Get previous version in a consistent read */
1594
cons_read_requires_clust_rec = FALSE;
1595
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
1597
if (consistent_read) {
1598
/* This is a non-locking consistent read: if necessary, fetch
1599
a previous version of the record */
1601
if (dict_index_is_clust(index)) {
1603
if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
1606
err = row_sel_build_prev_vers(
1607
node->read_view, index, rec,
1608
&offsets, &heap, &plan->old_vers_heap,
1611
if (err != DB_SUCCESS) {
1613
goto lock_wait_or_error;
1616
if (old_vers == NULL) {
1617
offsets = rec_get_offsets(
1618
rec, index, offsets,
1619
ULINT_UNDEFINED, &heap);
1621
/* Fetch the columns needed in
1622
test conditions. The clustered
1623
index record is protected by a
1624
page latch that was acquired
1625
by row_sel_open_pcur() or
1626
row_sel_restore_pcur_pos().
1627
The latch will not be released
1628
until mtr_commit(mtr). */
1630
row_sel_fetch_columns(
1631
index, rec, offsets,
1635
if (!row_sel_test_end_conds(plan)) {
1637
goto table_exhausted;
1645
} else if (!lock_sec_rec_cons_read_sees(rec,
1647
cons_read_requires_clust_rec = TRUE;
1651
/* PHASE 4: Test search end conditions and deleted flag */
1653
/* Fetch the columns needed in test conditions. The record is
1654
protected by a page latch that was acquired by
1655
row_sel_open_pcur() or row_sel_restore_pcur_pos(). The latch
1656
will not be released until mtr_commit(mtr). */
1658
row_sel_fetch_columns(index, rec, offsets,
1659
UT_LIST_GET_FIRST(plan->columns));
1661
/* Test the selection end conditions: these can only contain columns
1662
which already are found in the index, even though the index might be
1665
if (plan->unique_search && cursor_just_opened) {
1667
/* No test necessary: the test was already made above */
1669
} else if (!row_sel_test_end_conds(plan)) {
1671
goto table_exhausted;
1674
if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))
1675
&& !cons_read_requires_clust_rec) {
1677
/* The record is delete marked: we can skip it if this is
1678
not a consistent read which might see an earlier version
1679
of a non-clustered index record */
1681
if (plan->unique_search) {
1683
goto table_exhausted;
1689
/* PHASE 5: Get the clustered index record, if needed and if we did
1690
not do the search using the clustered index */
1692
if (plan->must_get_clust || cons_read_requires_clust_rec) {
1694
/* It was a non-clustered index and we must fetch also the
1695
clustered index record */
1697
err = row_sel_get_clust_rec(node, plan, rec, thr, &clust_rec,
1699
mtr_has_extra_clust_latch = TRUE;
1701
if (err != DB_SUCCESS) {
1703
goto lock_wait_or_error;
1706
/* Retrieving the clustered record required a search:
1707
increment the cost counter */
1711
if (clust_rec == NULL) {
1712
/* The record did not exist in the read view */
1713
ut_ad(consistent_read);
1718
if (rec_get_deleted_flag(clust_rec,
1719
dict_table_is_comp(plan->table))) {
1721
/* The record is delete marked: we can skip it */
1726
if (node->can_get_updated) {
1728
btr_pcur_store_position(&(plan->clust_pcur), &mtr);
1732
/* PHASE 6: Test the rest of search conditions */
1734
if (!row_sel_test_other_conds(plan)) {
1736
if (plan->unique_search) {
1738
goto table_exhausted;
1744
/* PHASE 7: We found a new qualifying row for the current table; push
1745
the row if prefetch is on, or move to the next table in the join */
1747
plan->n_rows_fetched++;
1749
ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
1751
if ((plan->n_rows_fetched <= SEL_PREFETCH_LIMIT)
1752
|| plan->unique_search || plan->no_prefetch
1753
|| plan->table->big_rows) {
1755
/* No prefetch in operation: go to the next table */
1760
sel_push_prefetched_row(plan);
1762
if (plan->n_rows_prefetched == SEL_MAX_N_PREFETCH) {
1764
/* The prefetch buffer is now full */
1766
sel_pop_prefetched_row(plan);
1772
ut_ad(!search_latch_locked);
1774
if (mtr_has_extra_clust_latch) {
1776
/* We must commit &mtr if we are moving to the next
1777
non-clustered index record, because we could break the
1778
latching order if we would access a different clustered
1779
index page right away without releasing the previous. */
1781
goto commit_mtr_for_a_while;
1785
moved = btr_pcur_move_to_next(&(plan->pcur), &mtr);
1787
moved = btr_pcur_move_to_prev(&(plan->pcur), &mtr);
1792
goto table_exhausted;
1795
cursor_just_opened = FALSE;
1797
/* END OF RECORD LOOP
1798
------------------ */
1802
/* We found a record which satisfies the conditions: we can move to
1803
the next table or return a row in the result set */
1805
ut_ad(btr_pcur_is_on_user_rec(&plan->pcur));
1807
if (plan->unique_search && !node->can_get_updated) {
1809
plan->cursor_at_end = TRUE;
1811
ut_ad(!search_latch_locked);
1813
plan->stored_cursor_rec_processed = TRUE;
1815
btr_pcur_store_position(&(plan->pcur), &mtr);
1820
mtr_has_extra_clust_latch = FALSE;
1823
/* If we use 'goto' to this label, it means that the row was popped
1824
from the prefetched rows stack, and &mtr is already committed */
1826
if (node->fetch_table + 1 == node->n_tables) {
1828
sel_eval_select_list(node);
1830
if (node->is_aggregate) {
1835
sel_assign_into_var_values(node->into_list, node);
1837
thr->run_node = que_node_get_parent(node);
1843
node->fetch_table++;
1845
/* When we move to the next table, we first reset the plan cursor:
1846
we do not care about resetting it when we backtrack from a table */
1848
plan_reset_cursor(sel_node_get_nth_plan(node, node->fetch_table));
1853
/* The table cursor pcur reached the result set end: backtrack to the
1854
previous table in the join if we do not have cached prefetched rows */
1856
plan->cursor_at_end = TRUE;
1860
mtr_has_extra_clust_latch = FALSE;
1862
if (plan->n_rows_prefetched > 0) {
1863
/* The table became exhausted during a prefetch */
1865
sel_pop_prefetched_row(plan);
1867
goto next_table_no_mtr;
1870
table_exhausted_no_mtr:
1871
if (node->fetch_table == 0) {
1874
if (node->is_aggregate && !node->aggregate_already_fetched) {
1876
node->aggregate_already_fetched = TRUE;
1878
sel_assign_into_var_values(node->into_list, node);
1880
thr->run_node = que_node_get_parent(node);
1882
node->state = SEL_NODE_NO_MORE_ROWS;
1884
thr->run_node = que_node_get_parent(node);
1890
node->fetch_table--;
1895
/* Return control for a while to que_run_threads, so that runaway
1896
queries can be canceled. NOTE that when we come here, we must, in a
1897
locking read, have placed the necessary (possibly waiting request)
1898
record lock on the cursor record or its successor: when we reposition
1899
the cursor, this record lock guarantees that nobody can meanwhile have
1900
inserted new records which should have appeared in the result set,
1901
which would result in the phantom problem. */
1903
ut_ad(!search_latch_locked);
1905
plan->stored_cursor_rec_processed = FALSE;
1906
btr_pcur_store_position(&(plan->pcur), &mtr);
1910
#ifdef UNIV_SYNC_DEBUG
1911
ut_ad(sync_thread_levels_empty_gen(TRUE));
1912
#endif /* UNIV_SYNC_DEBUG */
1916
commit_mtr_for_a_while:
1917
/* Stores the cursor position and commits &mtr; this is used if
1918
&mtr may contain latches which would break the latching order if
1919
&mtr would not be committed and the latches released. */
1921
plan->stored_cursor_rec_processed = TRUE;
1923
ut_ad(!search_latch_locked);
1924
btr_pcur_store_position(&(plan->pcur), &mtr);
1928
mtr_has_extra_clust_latch = FALSE;
1930
#ifdef UNIV_SYNC_DEBUG
1931
ut_ad(sync_thread_levels_empty_gen(TRUE));
1932
#endif /* UNIV_SYNC_DEBUG */
1937
/* See the note at stop_for_a_while: the same holds for this case */
1939
ut_ad(!btr_pcur_is_before_first_on_page(&plan->pcur) || !node->asc);
1940
ut_ad(!search_latch_locked);
1942
plan->stored_cursor_rec_processed = FALSE;
1943
btr_pcur_store_position(&(plan->pcur), &mtr);
1947
#ifdef UNIV_SYNC_DEBUG
1948
ut_ad(sync_thread_levels_empty_gen(TRUE));
1949
#endif /* UNIV_SYNC_DEBUG */
1952
if (search_latch_locked) {
1953
rw_lock_s_unlock(&btr_search_latch);
1955
if (UNIV_LIKELY_NULL(heap)) {
1956
mem_heap_free(heap);
1961
/**********************************************************************//**
1962
Performs a select step. This is a high-level function used in SQL execution
1964
@return query thread to run next or NULL */
1969
que_thr_t* thr) /*!< in: query thread */
1972
sym_node_t* table_node;
1978
node = thr->run_node;
1980
ut_ad(que_node_get_type(node) == QUE_NODE_SELECT);
1982
/* If this is a new time this node is executed (or when execution
1983
resumes after wait for a table intention lock), set intention locks
1984
on the tables, or assign a read view */
1986
if (node->into_list && (thr->prev_node == que_node_get_parent(node))) {
1988
node->state = SEL_NODE_OPEN;
1991
if (node->state == SEL_NODE_OPEN) {
1993
/* It may be that the current session has not yet started
1994
its transaction, or it has been committed: */
1996
trx_start_if_not_started(thr_get_trx(thr));
1998
plan_reset_cursor(sel_node_get_nth_plan(node, 0));
2000
if (node->consistent_read) {
2001
/* Assign a read view for the query */
2002
node->read_view = trx_assign_read_view(
2005
if (node->set_x_locks) {
2006
i_lock_mode = LOCK_IX;
2008
i_lock_mode = LOCK_IS;
2011
table_node = node->table_list;
2013
while (table_node) {
2014
err = lock_table(0, table_node->table,
2016
if (err != DB_SUCCESS) {
2017
thr_get_trx(thr)->error_state = err;
2022
table_node = que_node_get_next(table_node);
2026
/* If this is an explicit cursor, copy stored procedure
2027
variable values, so that the values cannot change between
2028
fetches (currently, we copy them also for non-explicit
2031
if (node->explicit_cursor
2032
&& UT_LIST_GET_FIRST(node->copy_variables)) {
2034
row_sel_copy_input_variable_vals(node);
2037
node->state = SEL_NODE_FETCH;
2038
node->fetch_table = 0;
2040
if (node->is_aggregate) {
2041
/* Reset the aggregate total values */
2042
sel_reset_aggregate_vals(node);
2046
err = row_sel(node, thr);
2048
/* NOTE! if queries are parallelized, the following assignment may
2049
have problems; the assignment should be made only if thr is the
2050
only top-level thr in the graph: */
2052
thr->graph->last_sel_node = node;
2054
if (err != DB_SUCCESS) {
2055
thr_get_trx(thr)->error_state = err;
2063
/**********************************************************************//**
2064
Performs a fetch for a cursor.
2065
@return query thread to run next or NULL */
2070
que_thr_t* thr) /*!< in: query thread */
2072
sel_node_t* sel_node;
2077
node = thr->run_node;
2078
sel_node = node->cursor_def;
2080
ut_ad(que_node_get_type(node) == QUE_NODE_FETCH);
2082
if (thr->prev_node != que_node_get_parent(node)) {
2084
if (sel_node->state != SEL_NODE_NO_MORE_ROWS) {
2086
if (node->into_list) {
2087
sel_assign_into_var_values(node->into_list,
2090
void* ret = (*node->func->func)(
2091
sel_node, node->func->arg);
2095
= SEL_NODE_NO_MORE_ROWS;
2100
thr->run_node = que_node_get_parent(node);
2105
/* Make the fetch node the parent of the cursor definition for
2106
the time of the fetch, so that execution knows to return to this
2107
fetch node after a row has been selected or we know that there is
2110
sel_node->common.parent = node;
2112
if (sel_node->state == SEL_NODE_CLOSED) {
2114
"InnoDB: Error: fetch called on a closed cursor\n");
2116
thr_get_trx(thr)->error_state = DB_ERROR;
2121
thr->run_node = sel_node;
2126
/****************************************************************//**
2127
Sample callback function for fetch that prints each row.
2128
@return always returns non-NULL */
2133
void* row, /*!< in: sel_node_t* */
2134
void* user_arg) /*!< in: not used */
2136
sel_node_t* node = row;
2140
UT_NOT_USED(user_arg);
2142
fprintf(stderr, "row_fetch_print: row %p\n", row);
2144
exp = node->select_list;
2147
dfield_t* dfield = que_node_get_val(exp);
2148
const dtype_t* type = dfield_get_type(dfield);
2150
fprintf(stderr, " column %lu:\n", (ulong)i);
2155
if (dfield_get_len(dfield) != UNIV_SQL_NULL) {
2156
ut_print_buf(stderr, dfield_get_data(dfield),
2157
dfield_get_len(dfield));
2160
fputs(" <NULL>;\n", stderr);
2163
exp = que_node_get_next(exp);
2170
/***********************************************************//**
2171
Prints a row in a select result.
2172
@return query thread to run next or NULL */
2177
que_thr_t* thr) /*!< in: query thread */
2179
row_printf_node_t* node;
2180
sel_node_t* sel_node;
2185
node = thr->run_node;
2187
sel_node = node->sel_node;
2189
ut_ad(que_node_get_type(node) == QUE_NODE_ROW_PRINTF);
2191
if (thr->prev_node == que_node_get_parent(node)) {
2193
/* Reset the cursor */
2194
sel_node->state = SEL_NODE_OPEN;
2196
/* Fetch next row to print */
2198
thr->run_node = sel_node;
2203
if (sel_node->state != SEL_NODE_FETCH) {
2205
ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
2207
/* No more rows to print */
2209
thr->run_node = que_node_get_parent(node);
2214
arg = sel_node->select_list;
2217
dfield_print_also_hex(que_node_get_val(arg));
2219
fputs(" ::: ", stderr);
2221
arg = que_node_get_next(arg);
2226
/* Fetch next row to print */
2228
thr->run_node = sel_node;
2233
/****************************************************************//**
2234
Converts a key value stored in MySQL format to an Innobase dtuple. The last
2235
field of the key value may be just a prefix of a fixed length field: hence
2236
the parameter key_len. But currently we do not allow search keys where the
2237
last field is only a prefix of the full key field len and print a warning if
2238
such appears. A counterpart of this function is
2239
ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
2242
row_sel_convert_mysql_key_to_innobase(
2243
/*==================================*/
2244
dtuple_t* tuple, /*!< in/out: tuple where to build;
2245
NOTE: we assume that the type info
2246
in the tuple is already according
2248
byte* buf, /*!< in: buffer to use in field
2250
ulint buf_len, /*!< in: buffer length */
2251
dict_index_t* index, /*!< in: index of the key value */
2252
const byte* key_ptr, /*!< in: MySQL key value */
2253
ulint key_len, /*!< in: MySQL key value length */
2254
trx_t* trx) /*!< in: transaction */
2256
byte* original_buf = buf;
2257
const byte* original_key_ptr = key_ptr;
2258
dict_field_t* field;
2262
ulint data_field_len;
2264
const byte* key_end;
2267
/* For documentation of the key value storage format in MySQL, see
2268
ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
2270
key_end = key_ptr + key_len;
2272
/* Permit us to access any field in the tuple (ULINT_MAX): */
2274
dtuple_set_n_fields(tuple, ULINT_MAX);
2276
dfield = dtuple_get_nth_field(tuple, 0);
2277
field = dict_index_get_nth_field(index, 0);
2279
if (UNIV_UNLIKELY(dfield_get_type(dfield)->mtype == DATA_SYS)) {
2280
/* A special case: we are looking for a position in the
2281
generated clustered index which InnoDB automatically added
2282
to a table with no primary key: the first and the only
2283
ordering column is ROW_ID which InnoDB stored to the key_ptr
2286
ut_a(key_len == DATA_ROW_ID_LEN);
2288
dfield_set_data(dfield, key_ptr, DATA_ROW_ID_LEN);
2290
dtuple_set_n_fields(tuple, 1);
2295
while (key_ptr < key_end) {
2297
ulint type = dfield_get_type(dfield)->mtype;
2298
ut_a(field->col->mtype == type);
2303
if (!(dfield_get_type(dfield)->prtype & DATA_NOT_NULL)) {
2304
/* The first byte in the field tells if this is
2305
an SQL NULL value */
2309
if (*key_ptr != 0) {
2310
dfield_set_null(dfield);
2316
/* Calculate data length and data field total length */
2318
if (type == DATA_BLOB) {
2319
/* The key field is a column prefix of a BLOB or
2322
ut_a(field->prefix_len > 0);
2324
/* MySQL stores the actual data length to the first 2
2325
bytes after the optional SQL NULL marker byte. The
2326
storage format is little-endian, that is, the most
2327
significant byte at a higher address. In UTF-8, MySQL
2328
seems to reserve field->prefix_len bytes for
2329
storing this field in the key value buffer, even
2330
though the actual value only takes data_len bytes
2333
data_len = key_ptr[data_offset]
2334
+ 256 * key_ptr[data_offset + 1];
2335
data_field_len = data_offset + 2 + field->prefix_len;
2339
/* Now that we know the length, we store the column
2340
value like it would be a fixed char field */
2342
} else if (field->prefix_len > 0) {
2343
/* Looks like MySQL pads unused end bytes in the
2344
prefix with space. Therefore, also in UTF-8, it is ok
2345
to compare with a prefix containing full prefix_len
2346
bytes, and no need to take at most prefix_len / 3
2347
UTF-8 characters from the start.
2348
If the prefix is used as the upper end of a LIKE
2349
'abc%' query, then MySQL pads the end with chars
2350
0xff. TODO: in that case does it any harm to compare
2351
with the full prefix_len bytes. How do characters
2352
0xff in UTF-8 behave? */
2354
data_len = field->prefix_len;
2355
data_field_len = data_offset + data_len;
2357
data_len = dfield_get_type(dfield)->len;
2358
data_field_len = data_offset + data_len;
2362
(dtype_get_mysql_type(dfield_get_type(dfield))
2363
== DATA_MYSQL_TRUE_VARCHAR)
2364
&& UNIV_LIKELY(type != DATA_INT)) {
2365
/* In a MySQL key value format, a true VARCHAR is
2366
always preceded by 2 bytes of a length field.
2367
dfield_get_type(dfield)->len returns the maximum
2368
'payload' len in bytes. That does not include the
2369
2 bytes that tell the actual data length.
2371
We added the check != DATA_INT to make sure we do
2372
not treat MySQL ENUM or SET as a true VARCHAR! */
2375
data_field_len += 2;
2378
/* Storing may use at most data_len bytes of buf */
2380
if (UNIV_LIKELY(!is_null)) {
2381
row_mysql_store_col_in_innobase_format(
2383
FALSE, /* MySQL key value format col */
2384
key_ptr + data_offset, data_len,
2385
dict_table_is_comp(index->table));
2389
key_ptr += data_field_len;
2391
if (UNIV_UNLIKELY(key_ptr > key_end)) {
2392
/* The last field in key was not a complete key field
2395
Print a warning about this! HA_READ_PREFIX_LAST does
2396
not currently work in InnoDB with partial-field key
2397
value prefixes. Since MySQL currently uses a padding
2398
trick to calculate LIKE 'abc%' type queries there
2399
should never be partial-field prefixes in searches. */
2401
ut_print_timestamp(stderr);
2403
fputs(" InnoDB: Warning: using a partial-field"
2404
" key prefix in search.\n"
2405
"InnoDB: ", stderr);
2406
dict_index_name_print(stderr, trx, index);
2407
fprintf(stderr, ". Last data field length %lu bytes,\n"
2408
"InnoDB: key ptr now exceeds"
2409
" key end by %lu bytes.\n"
2410
"InnoDB: Key value in the MySQL format:\n",
2411
(ulong) data_field_len,
2412
(ulong) (key_ptr - key_end));
2414
ut_print_buf(stderr, original_key_ptr, key_len);
2418
ulint len = dfield_get_len(dfield);
2419
dfield_set_len(dfield, len
2420
- (ulint) (key_ptr - key_end));
2429
ut_a(buf <= original_buf + buf_len);
2431
/* We set the length of tuple to n_fields: we assume that the memory
2432
area allocated for it is big enough (usually bigger than n_fields). */
2434
dtuple_set_n_fields(tuple, n_fields);
2437
/**************************************************************//**
2438
Stores the row id to the prebuilt struct. */
2441
row_sel_store_row_id_to_prebuilt(
2442
/*=============================*/
2443
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt */
2444
const rec_t* index_rec, /*!< in: record */
2445
const dict_index_t* index, /*!< in: index of the record */
2446
const ulint* offsets) /*!< in: rec_get_offsets
2447
(index_rec, index) */
2452
ut_ad(rec_offs_validate(index_rec, index, offsets));
2454
data = rec_get_nth_field(
2456
dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
2458
if (UNIV_UNLIKELY(len != DATA_ROW_ID_LEN)) {
2460
"InnoDB: Error: Row id field is"
2461
" wrong length %lu in ", (ulong) len);
2462
dict_index_name_print(stderr, prebuilt->trx, index);
2463
fprintf(stderr, "\n"
2464
"InnoDB: Field number %lu, record:\n",
2465
(ulong) dict_index_get_sys_col_pos(index,
2467
rec_print_new(stderr, index_rec, offsets);
2472
ut_memcpy(prebuilt->row_id, data, len);
2475
/**************************************************************//**
2476
Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
2477
function is row_mysql_store_col_in_innobase_format() in row0mysql.c. */
2480
row_sel_field_store_in_mysql_format(
2481
/*================================*/
2482
byte* dest, /*!< in/out: buffer where to store; NOTE
2483
that BLOBs are not in themselves
2484
stored here: the caller must allocate
2485
and copy the BLOB into buffer before,
2486
and pass the pointer to the BLOB in
2488
const mysql_row_templ_t* templ,
2489
/*!< in: MySQL column template.
2490
Its following fields are referenced:
2491
type, is_unsigned, mysql_col_len,
2492
mbminlen, mbmaxlen */
2493
const byte* data, /*!< in: data to store */
2494
ulint len) /*!< in: length of the data */
2500
ut_ad(len != UNIV_SQL_NULL);
2501
UNIV_MEM_ASSERT_RW(data, len);
2503
switch (templ->type) {
2505
/* Convert integer data from Innobase to a little-endian
2506
format, sign bit restored to normal */
2519
if (!templ->is_unsigned) {
2520
dest[len - 1] = (byte) (dest[len - 1] ^ 128);
2523
ut_ad(templ->mysql_col_len == len);
2529
field_end = dest + templ->mysql_col_len;
2531
if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
2532
/* This is a >= 5.0.3 type true VARCHAR. Store the
2533
length of the data to the first byte or the first
2534
two bytes of dest. */
2536
dest = row_mysql_store_true_var_len(
2537
dest, len, templ->mysql_length_bytes);
2540
/* Copy the actual data */
2541
ut_memcpy(dest, data, len);
2543
/* Pad with trailing spaces. We pad with spaces also the
2544
unused end of a >= 5.0.3 true VARCHAR column, just in case
2545
MySQL expects its contents to be deterministic. */
2547
pad_ptr = dest + len;
2549
ut_ad(templ->mbminlen <= templ->mbmaxlen);
2551
/* We handle UCS2 charset strings differently. */
2552
if (templ->mbminlen == 2) {
2553
/* A space char is two bytes, 0x0020 in UCS2 */
2556
/* A 0x20 has been stripped from the column.
2559
if (pad_ptr < field_end) {
2565
/* Pad the rest of the string with 0x0020 */
2567
while (pad_ptr < field_end) {
2574
ut_ad(templ->mbminlen == 1);
2577
memset(pad_ptr, 0x20, field_end - pad_ptr);
2582
/* Store a pointer to the BLOB buffer to dest: the BLOB was
2583
already copied to the buffer in row_sel_store_mysql_rec */
2585
row_mysql_store_blob_ref(dest, templ->mysql_col_len, data,
2590
memcpy(dest, data, len);
2592
ut_ad(templ->mysql_col_len >= len);
2593
ut_ad(templ->mbmaxlen >= templ->mbminlen);
2595
ut_ad(templ->mbmaxlen > templ->mbminlen
2596
|| templ->mysql_col_len == len);
2597
/* The following assertion would fail for old tables
2598
containing UTF-8 ENUM columns due to Bug #9526. */
2599
ut_ad(!templ->mbmaxlen
2600
|| !(templ->mysql_col_len % templ->mbmaxlen));
2601
ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len);
2603
if (templ->mbminlen != templ->mbmaxlen) {
2604
/* Pad with spaces. This undoes the stripping
2605
done in row0mysql.ic, function
2606
row_mysql_store_col_in_innobase_format(). */
2608
memset(dest + len, 0x20, templ->mysql_col_len - len);
2614
case DATA_SYS_CHILD:
2616
/* These column types should never be shipped to MySQL. */
2620
case DATA_FIXBINARY:
2624
/* Above are the valid column types for MySQL data. */
2625
#endif /* UNIV_DEBUG */
2626
ut_ad(templ->mysql_col_len == len);
2627
memcpy(dest, data, len);
2631
/**************************************************************//**
2632
Convert a row in the Innobase format to a row in the MySQL format.
2633
Note that the template in prebuilt may advise us to copy only a few
2634
columns to mysql_rec, other columns are left blank. All columns may not
2635
be needed in the query.
2636
@return TRUE if success, FALSE if could not allocate memory for a BLOB
2637
(though we may also assert in that case) */
2640
row_sel_store_mysql_rec(
2641
/*====================*/
2642
byte* mysql_rec, /*!< out: row in the MySQL format */
2643
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
2644
const rec_t* rec, /*!< in: Innobase record in the index
2645
which was described in prebuilt's
2646
template; must be protected by
2648
const ulint* offsets) /*!< in: array returned by
2649
rec_get_offsets() */
2651
mysql_row_templ_t* templ;
2652
mem_heap_t* extern_field_heap = NULL;
2658
ut_ad(prebuilt->mysql_template);
2659
ut_ad(prebuilt->default_rec);
2660
ut_ad(rec_offs_validate(rec, NULL, offsets));
2662
if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
2663
mem_heap_free(prebuilt->blob_heap);
2664
prebuilt->blob_heap = NULL;
2667
for (i = 0; i < prebuilt->n_template ; i++) {
2669
templ = prebuilt->mysql_template + i;
2671
if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
2672
templ->rec_field_no))) {
2674
/* Copy an externally stored field to the temporary
2677
ut_a(!prebuilt->trx->has_search_latch);
2679
if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) {
2680
if (prebuilt->blob_heap == NULL) {
2681
prebuilt->blob_heap = mem_heap_create(
2685
heap = prebuilt->blob_heap;
2688
= mem_heap_create(UNIV_PAGE_SIZE);
2690
heap = extern_field_heap;
2693
/* NOTE: if we are retrieving a big BLOB, we may
2694
already run out of memory in the next call, which
2697
data = btr_rec_copy_externally_stored_field(
2699
dict_table_zip_size(prebuilt->table),
2700
templ->rec_field_no, &len, heap);
2702
ut_a(len != UNIV_SQL_NULL);
2704
/* Field is stored in the row. */
2706
data = rec_get_nth_field(rec, offsets,
2707
templ->rec_field_no, &len);
2709
if (UNIV_UNLIKELY(templ->type == DATA_BLOB)
2710
&& len != UNIV_SQL_NULL) {
2712
/* It is a BLOB field locally stored in the
2713
InnoDB record: we MUST copy its contents to
2714
prebuilt->blob_heap here because later code
2715
assumes all BLOB values have been copied to a
2718
if (prebuilt->blob_heap == NULL) {
2719
prebuilt->blob_heap = mem_heap_create(
2723
data = memcpy(mem_heap_alloc(
2724
prebuilt->blob_heap, len),
2729
if (len != UNIV_SQL_NULL) {
2730
row_sel_field_store_in_mysql_format(
2731
mysql_rec + templ->mysql_col_offset,
2735
if (extern_field_heap) {
2736
mem_heap_free(extern_field_heap);
2737
extern_field_heap = NULL;
2740
if (templ->mysql_null_bit_mask) {
2741
/* It is a nullable column with a non-NULL
2743
mysql_rec[templ->mysql_null_byte_offset]
2744
&= ~(byte) templ->mysql_null_bit_mask;
2747
/* MySQL assumes that the field for an SQL
2748
NULL value is set to the default value. */
2750
UNIV_MEM_ASSERT_RW(prebuilt->default_rec
2751
+ templ->mysql_col_offset,
2752
templ->mysql_col_len);
2753
mysql_rec[templ->mysql_null_byte_offset]
2754
|= (byte) templ->mysql_null_bit_mask;
2755
memcpy(mysql_rec + templ->mysql_col_offset,
2756
(const byte*) prebuilt->default_rec
2757
+ templ->mysql_col_offset,
2758
templ->mysql_col_len);
2765
/*********************************************************************//**
2766
Builds a previous version of a clustered index record for a consistent read
2767
@return DB_SUCCESS or error code */
2770
row_sel_build_prev_vers_for_mysql(
2771
/*==============================*/
2772
read_view_t* read_view, /*!< in: read view */
2773
dict_index_t* clust_index, /*!< in: clustered index */
2774
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
2775
const rec_t* rec, /*!< in: record in a clustered index */
2776
ulint** offsets, /*!< in/out: offsets returned by
2777
rec_get_offsets(rec, clust_index) */
2778
mem_heap_t** offset_heap, /*!< in/out: memory heap from which
2779
the offsets are allocated */
2780
rec_t** old_vers, /*!< out: old version, or NULL if the
2781
record does not exist in the view:
2782
i.e., it was freshly inserted
2784
mtr_t* mtr) /*!< in: mtr */
2788
if (prebuilt->old_vers_heap) {
2789
mem_heap_empty(prebuilt->old_vers_heap);
2791
prebuilt->old_vers_heap = mem_heap_create(200);
2794
err = row_vers_build_for_consistent_read(
2795
rec, mtr, clust_index, offsets, read_view, offset_heap,
2796
prebuilt->old_vers_heap, old_vers);
2800
/*********************************************************************//**
2801
Retrieves the clustered index record corresponding to a record in a
2802
non-clustered index. Does the necessary locking. Used in the MySQL
2804
@return DB_SUCCESS or error code */
2807
row_sel_get_clust_rec_for_mysql(
2808
/*============================*/
2809
row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */
2810
dict_index_t* sec_index,/*!< in: secondary index where rec resides */
2811
const rec_t* rec, /*!< in: record in a non-clustered index; if
2812
this is a locking read, then rec is not
2813
allowed to be delete-marked, and that would
2814
not make sense either */
2815
que_thr_t* thr, /*!< in: query thread */
2816
const rec_t** out_rec,/*!< out: clustered record or an old version of
2817
it, NULL if the old version did not exist
2818
in the read view, i.e., it was a fresh
2820
ulint** offsets,/*!< in: offsets returned by
2821
rec_get_offsets(rec, sec_index);
2822
out: offsets returned by
2823
rec_get_offsets(out_rec, clust_index) */
2824
mem_heap_t** offset_heap,/*!< in/out: memory heap from which
2825
the offsets are allocated */
2826
mtr_t* mtr) /*!< in: mtr used to get access to the
2827
non-clustered record; the same mtr is used to
2828
access the clustered index */
2830
dict_index_t* clust_index;
2831
const rec_t* clust_rec;
2837
trx = thr_get_trx(thr);
2839
row_build_row_ref_in_tuple(prebuilt->clust_ref, rec,
2840
sec_index, *offsets, trx);
2842
clust_index = dict_table_get_first_index(sec_index->table);
2844
btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref,
2845
PAGE_CUR_LE, BTR_SEARCH_LEAF,
2846
prebuilt->clust_pcur, 0, mtr);
2848
clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur);
2850
prebuilt->clust_pcur->trx_if_known = trx;
2852
/* Note: only if the search ends up on a non-infimum record is the
2853
low_match value the real match to the search tuple */
2855
if (!page_rec_is_user_rec(clust_rec)
2856
|| btr_pcur_get_low_match(prebuilt->clust_pcur)
2857
< dict_index_get_n_unique(clust_index)) {
2859
/* In a rare case it is possible that no clust rec is found
2860
for a delete-marked secondary index record: if in row0umod.c
2861
in row_undo_mod_remove_clust_low() we have already removed
2862
the clust rec, while purge is still cleaning and removing
2863
secondary index records associated with earlier versions of
2864
the clustered index record. In that case we know that the
2865
clustered index record did not exist in the read view of
2868
if (!rec_get_deleted_flag(rec,
2869
dict_table_is_comp(sec_index->table))
2870
|| prebuilt->select_lock_type != LOCK_NONE) {
2871
ut_print_timestamp(stderr);
2872
fputs(" InnoDB: error clustered record"
2873
" for sec rec not found\n"
2874
"InnoDB: ", stderr);
2875
dict_index_name_print(stderr, trx, sec_index);
2877
"InnoDB: sec index record ", stderr);
2878
rec_print(stderr, rec, sec_index);
2880
"InnoDB: clust index record ", stderr);
2881
rec_print(stderr, clust_rec, clust_index);
2883
trx_print(stderr, trx, 600);
2886
"InnoDB: Submit a detailed bug report"
2887
" to http://bugs.mysql.com\n", stderr);
2895
*offsets = rec_get_offsets(clust_rec, clust_index, *offsets,
2896
ULINT_UNDEFINED, offset_heap);
2898
if (prebuilt->select_lock_type != LOCK_NONE) {
2899
/* Try to place a lock on the index record; we are searching
2900
the clust rec with a unique condition, hence
2901
we set a LOCK_REC_NOT_GAP type lock */
2903
err = lock_clust_rec_read_check_and_lock(
2904
0, btr_pcur_get_block(prebuilt->clust_pcur),
2905
clust_rec, clust_index, *offsets,
2906
prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr);
2907
if (err != DB_SUCCESS) {
2912
/* This is a non-locking consistent read: if necessary, fetch
2913
a previous version of the record */
2917
/* If the isolation level allows reading of uncommitted data,
2918
then we never look for an earlier version */
2920
if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
2921
&& !lock_clust_rec_cons_read_sees(
2922
clust_rec, clust_index, *offsets,
2925
/* The following call returns 'offsets' associated with
2927
err = row_sel_build_prev_vers_for_mysql(
2928
trx->read_view, clust_index, prebuilt,
2929
clust_rec, offsets, offset_heap, &old_vers,
2932
if (err != DB_SUCCESS || old_vers == NULL) {
2937
clust_rec = old_vers;
2940
/* If we had to go to an earlier version of row or the
2941
secondary index record is delete marked, then it may be that
2942
the secondary index record corresponding to clust_rec
2943
(or old_vers) is not rec; in that case we must ignore
2944
such row because in our snapshot rec would not have existed.
2945
Remember that from rec we cannot see directly which transaction
2946
id corresponds to it: we have to go to the clustered index
2947
record. A query where we want to fetch all rows where
2948
the secondary index value is in some interval would return
2949
a wrong result if we would not drop rows which we come to
2950
visit through secondary index records that would not really
2951
exist in our snapshot. */
2955
|| trx->isolation_level <= TRX_ISO_READ_UNCOMMITTED
2956
|| rec_get_deleted_flag(rec, dict_table_is_comp(
2958
&& !row_sel_sec_rec_is_for_clust_rec(
2959
rec, sec_index, clust_rec, clust_index)) {
2961
#ifdef UNIV_SEARCH_DEBUG
2963
ut_a(clust_rec == NULL
2964
|| row_sel_sec_rec_is_for_clust_rec(
2965
rec, sec_index, clust_rec, clust_index));
2971
*out_rec = clust_rec;
2973
if (prebuilt->select_lock_type != LOCK_NONE) {
2974
/* We may use the cursor in update or in unlock_row():
2975
store its position */
2977
btr_pcur_store_position(prebuilt->clust_pcur, mtr);
2985
/********************************************************************//**
2986
Restores cursor position after it has been stored. We have to take into
2987
account that the record cursor was positioned on may have been deleted.
2988
Then we may have to move the cursor one step up or down.
2989
@return TRUE if we may need to process the record the cursor is now
2990
positioned on (i.e. we should not go to the next record yet) */
2993
sel_restore_position_for_mysql(
2994
/*===========================*/
2995
ibool* same_user_rec, /*!< out: TRUE if we were able to restore
2996
the cursor on a user record with the
2997
same ordering prefix in in the
2999
ulint latch_mode, /*!< in: latch mode wished in
3001
btr_pcur_t* pcur, /*!< in: cursor whose position
3003
ibool moves_up, /*!< in: TRUE if the cursor moves up
3005
mtr_t* mtr) /*!< in: mtr; CAUTION: may commit
3009
ulint relative_position;
3011
relative_position = pcur->rel_pos;
3013
success = btr_pcur_restore_position(latch_mode, pcur, mtr);
3015
*same_user_rec = success;
3017
if (relative_position == BTR_PCUR_ON) {
3023
btr_pcur_move_to_next(pcur, mtr);
3029
if (relative_position == BTR_PCUR_AFTER
3030
|| relative_position == BTR_PCUR_AFTER_LAST_IN_TREE) {
3036
if (btr_pcur_is_on_user_rec(pcur)) {
3037
btr_pcur_move_to_prev(pcur, mtr);
3043
ut_ad(relative_position == BTR_PCUR_BEFORE
3044
|| relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE);
3046
if (moves_up && btr_pcur_is_on_user_rec(pcur)) {
3047
btr_pcur_move_to_next(pcur, mtr);
3053
/********************************************************************//**
3054
Pops a cached row for MySQL from the fetch cache. */
3057
row_sel_pop_cached_row_for_mysql(
3058
/*=============================*/
3059
byte* buf, /*!< in/out: buffer where to copy the
3061
row_prebuilt_t* prebuilt) /*!< in: prebuilt struct */
3064
mysql_row_templ_t* templ;
3066
ut_ad(prebuilt->n_fetch_cached > 0);
3067
ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len);
3069
if (UNIV_UNLIKELY(prebuilt->keep_other_fields_on_keyread)) {
3070
/* Copy cache record field by field, don't touch fields that
3071
are not covered by current key */
3072
cached_rec = prebuilt->fetch_cache[
3073
prebuilt->fetch_cache_first];
3075
for (i = 0; i < prebuilt->n_template; i++) {
3076
templ = prebuilt->mysql_template + i;
3077
#if 0 /* Some of the cached_rec may legitimately be uninitialized. */
3078
UNIV_MEM_ASSERT_RW(cached_rec
3079
+ templ->mysql_col_offset,
3080
templ->mysql_col_len);
3082
ut_memcpy(buf + templ->mysql_col_offset,
3083
cached_rec + templ->mysql_col_offset,
3084
templ->mysql_col_len);
3085
/* Copy NULL bit of the current field from cached_rec
3087
if (templ->mysql_null_bit_mask) {
3088
buf[templ->mysql_null_byte_offset]
3089
^= (buf[templ->mysql_null_byte_offset]
3090
^ cached_rec[templ->mysql_null_byte_offset])
3091
& (byte)templ->mysql_null_bit_mask;
3096
#if 0 /* Some of the cached_rec may legitimately be uninitialized. */
3097
UNIV_MEM_ASSERT_RW(prebuilt->fetch_cache
3098
[prebuilt->fetch_cache_first],
3099
prebuilt->mysql_prefix_len);
3102
prebuilt->fetch_cache[prebuilt->fetch_cache_first],
3103
prebuilt->mysql_prefix_len);
3105
prebuilt->n_fetch_cached--;
3106
prebuilt->fetch_cache_first++;
3108
if (prebuilt->n_fetch_cached == 0) {
3109
prebuilt->fetch_cache_first = 0;
3113
/********************************************************************//**
3114
Pushes a row for MySQL to the fetch cache. */
3117
row_sel_push_cache_row_for_mysql(
3118
/*=============================*/
3119
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
3120
const rec_t* rec, /*!< in: record to push; must
3121
be protected by a page latch */
3122
const ulint* offsets) /*!<in: rec_get_offsets() */
3127
ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE);
3128
ut_ad(rec_offs_validate(rec, NULL, offsets));
3129
ut_a(!prebuilt->templ_contains_blob);
3131
if (prebuilt->fetch_cache[0] == NULL) {
3132
/* Allocate memory for the fetch cache */
3134
for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
3136
/* A user has reported memory corruption in these
3137
buffers in Linux. Put magic numbers there to help
3138
to track a possible bug. */
3140
buf = mem_alloc(prebuilt->mysql_row_len + 8);
3142
prebuilt->fetch_cache[i] = buf + 4;
3144
mach_write_to_4(buf, ROW_PREBUILT_FETCH_MAGIC_N);
3145
mach_write_to_4(buf + 4 + prebuilt->mysql_row_len,
3146
ROW_PREBUILT_FETCH_MAGIC_N);
3150
ut_ad(prebuilt->fetch_cache_first == 0);
3151
UNIV_MEM_INVALID(prebuilt->fetch_cache[prebuilt->n_fetch_cached],
3152
prebuilt->mysql_row_len);
3154
if (UNIV_UNLIKELY(!row_sel_store_mysql_rec(
3155
prebuilt->fetch_cache[
3156
prebuilt->n_fetch_cached],
3157
prebuilt, rec, offsets))) {
3161
prebuilt->n_fetch_cached++;
3164
/*********************************************************************//**
3165
Tries to do a shortcut to fetch a clustered index record with a unique key,
3166
using the hash index if possible (not always). We assume that the search
3167
mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx,
3168
btr search latch has been locked in S-mode.
3169
@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
3172
row_sel_try_search_shortcut_for_mysql(
3173
/*==================================*/
3174
const rec_t** out_rec,/*!< out: record if found */
3175
row_prebuilt_t* prebuilt,/*!< in: prebuilt struct */
3176
ulint** offsets,/*!< in/out: for rec_get_offsets(*out_rec) */
3177
mem_heap_t** heap, /*!< in/out: heap for rec_get_offsets() */
3178
mtr_t* mtr) /*!< in: started mtr */
3180
dict_index_t* index = prebuilt->index;
3181
const dtuple_t* search_tuple = prebuilt->search_tuple;
3182
btr_pcur_t* pcur = prebuilt->pcur;
3183
trx_t* trx = prebuilt->trx;
3186
ut_ad(dict_index_is_clust(index));
3187
ut_ad(!prebuilt->templ_contains_blob);
3189
#ifndef UNIV_SEARCH_DEBUG
3190
btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
3191
BTR_SEARCH_LEAF, pcur,
3194
#else /* UNIV_SEARCH_DEBUG */
3195
btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
3196
BTR_SEARCH_LEAF, pcur,
3199
#endif /* UNIV_SEARCH_DEBUG */
3200
rec = btr_pcur_get_rec(pcur);
3202
if (!page_rec_is_user_rec(rec)) {
3207
/* As the cursor is now placed on a user record after a search with
3208
the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
3209
fields in the user record matched to the search tuple */
3211
if (btr_pcur_get_up_match(pcur) < dtuple_get_n_fields(search_tuple)) {
3213
return(SEL_EXHAUSTED);
3216
/* This is a non-locking consistent read: if necessary, fetch
3217
a previous version of the record */
3219
*offsets = rec_get_offsets(rec, index, *offsets,
3220
ULINT_UNDEFINED, heap);
3222
if (!lock_clust_rec_cons_read_sees(rec, index,
3223
*offsets, trx->read_view)) {
3228
if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))) {
3230
return(SEL_EXHAUSTED);
3238
/********************************************************************//**
3239
Searches for rows in the database. This is used in the interface to
3240
MySQL. This function opens a cursor, and also implements fetch next
3241
and fetch prev. NOTE that if we do a search with a full key value
3242
from a unique index (ROW_SEL_EXACT), then we will not store the cursor
3243
position and fetch next or fetch prev must not be tried to the cursor!
3244
@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
3245
DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
3248
row_search_for_mysql(
3249
/*=================*/
3250
byte* buf, /*!< in/out: buffer for the fetched
3251
row in the MySQL format */
3252
ulint mode, /*!< in: search mode PAGE_CUR_L, ... */
3253
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct for the
3254
table handle; this contains the info
3255
of search_tuple, index; if search
3256
tuple contains 0 fields then we
3257
position the cursor at the start or
3258
the end of the index, depending on
3260
ulint match_mode, /*!< in: 0 or ROW_SEL_EXACT or
3261
ROW_SEL_EXACT_PREFIX */
3262
ulint direction) /*!< in: 0 or ROW_SEL_NEXT or
3263
ROW_SEL_PREV; NOTE: if this is != 0,
3264
then prebuilt must have a pcur
3265
with stored position! In opening of a
3266
cursor 'direction' should be 0. */
3268
dict_index_t* index = prebuilt->index;
3269
ibool comp = dict_table_is_comp(index->table);
3270
const dtuple_t* search_tuple = prebuilt->search_tuple;
3271
btr_pcur_t* pcur = prebuilt->pcur;
3272
trx_t* trx = prebuilt->trx;
3273
dict_index_t* clust_index;
3276
const rec_t* result_rec;
3277
const rec_t* clust_rec;
3278
ulint err = DB_SUCCESS;
3279
ibool unique_search = FALSE;
3280
ibool unique_search_from_clust_index = FALSE;
3281
ibool mtr_has_extra_clust_latch = FALSE;
3282
ibool moves_up = FALSE;
3283
ibool set_also_gap_locks = TRUE;
3284
/* if the query is a plain locking SELECT, and the isolation level
3285
is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */
3286
ibool did_semi_consistent_read = FALSE;
3287
/* if the returned record was locked and we did a semi-consistent
3288
read (fetch the newest committed version), then this is set to
3290
#ifdef UNIV_SEARCH_DEBUG
3292
#endif /* UNIV_SEARCH_DEBUG */
3294
ibool same_user_rec;
3296
mem_heap_t* heap = NULL;
3297
ulint offsets_[REC_OFFS_NORMAL_SIZE];
3298
ulint* offsets = offsets_;
3300
rec_offs_init(offsets_);
3302
ut_ad(index && pcur && search_tuple);
3303
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
3305
if (UNIV_UNLIKELY(prebuilt->table->ibd_file_missing)) {
3306
ut_print_timestamp(stderr);
3307
fprintf(stderr, " InnoDB: Error:\n"
3308
"InnoDB: MySQL is trying to use a table handle"
3309
" but the .ibd file for\n"
3310
"InnoDB: table %s does not exist.\n"
3311
"InnoDB: Have you deleted the .ibd file"
3312
" from the database directory under\n"
3313
"InnoDB: the MySQL datadir, or have you used"
3314
" DISCARD TABLESPACE?\n"
3315
"InnoDB: Look from\n"
3316
"InnoDB: " REFMAN "innodb-troubleshooting.html\n"
3317
"InnoDB: how you can resolve the problem.\n",
3318
prebuilt->table->name);
3323
if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
3325
return(DB_MISSING_HISTORY);
3328
if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
3330
"InnoDB: Error: trying to free a corrupt\n"
3331
"InnoDB: table handle. Magic n %lu, table name ",
3332
(ulong) prebuilt->magic_n);
3333
ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
3336
mem_analyze_corruption(prebuilt);
3342
fprintf(stderr, "Match mode %lu\n search tuple ",
3343
(ulong) match_mode);
3344
dtuple_print(search_tuple);
3345
fprintf(stderr, "N tables locked %lu\n",
3346
(ulong) trx->mysql_n_tables_locked);
3348
/*-------------------------------------------------------------*/
3349
/* PHASE 0: Release a possible s-latch we are holding on the
3350
adaptive hash index latch if there is someone waiting behind */
3352
if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED)
3353
&& trx->has_search_latch) {
3355
/* There is an x-latch request on the adaptive hash index:
3356
release the s-latch to reduce starvation and wait for
3357
BTR_SEA_TIMEOUT rounds before trying to keep it again over
3360
rw_lock_s_unlock(&btr_search_latch);
3361
trx->has_search_latch = FALSE;
3363
trx->search_latch_timeout = BTR_SEA_TIMEOUT;
3366
/* Reset the new record lock info if srv_locks_unsafe_for_binlog
3367
is set or session is using a READ COMMITED isolation level. Then
3368
we are able to remove the record locks set here on an individual
3370
prebuilt->new_rec_locks = 0;
3372
/*-------------------------------------------------------------*/
3373
/* PHASE 1: Try to pop the row from the prefetch cache */
3375
if (UNIV_UNLIKELY(direction == 0)) {
3376
trx->op_info = "starting index read";
3378
prebuilt->n_rows_fetched = 0;
3379
prebuilt->n_fetch_cached = 0;
3380
prebuilt->fetch_cache_first = 0;
3382
if (prebuilt->sel_graph == NULL) {
3383
/* Build a dummy select query graph */
3384
row_prebuild_sel_graph(prebuilt);
3387
trx->op_info = "fetching rows";
3389
if (prebuilt->n_rows_fetched == 0) {
3390
prebuilt->fetch_direction = direction;
3393
if (UNIV_UNLIKELY(direction != prebuilt->fetch_direction)) {
3394
if (UNIV_UNLIKELY(prebuilt->n_fetch_cached > 0)) {
3396
/* TODO: scrollable cursor: restore cursor to
3397
the place of the latest returned row,
3398
or better: prevent caching for a scroll
3402
prebuilt->n_rows_fetched = 0;
3403
prebuilt->n_fetch_cached = 0;
3404
prebuilt->fetch_cache_first = 0;
3406
} else if (UNIV_LIKELY(prebuilt->n_fetch_cached > 0)) {
3407
row_sel_pop_cached_row_for_mysql(buf, prebuilt);
3409
prebuilt->n_rows_fetched++;
3416
if (prebuilt->fetch_cache_first > 0
3417
&& prebuilt->fetch_cache_first < MYSQL_FETCH_CACHE_SIZE) {
3419
/* The previous returned row was popped from the fetch
3420
cache, but the cache was not full at the time of the
3421
popping: no more rows can exist in the result set */
3423
err = DB_RECORD_NOT_FOUND;
3427
prebuilt->n_rows_fetched++;
3429
if (prebuilt->n_rows_fetched > 1000000000) {
3430
/* Prevent wrap-over */
3431
prebuilt->n_rows_fetched = 500000000;
3434
mode = pcur->search_mode;
3437
/* In a search where at most one record in the index may match, we
3438
can use a LOCK_REC_NOT_GAP type record lock when locking a
3439
non-delete-marked matching record.
3441
Note that in a unique secondary index there may be different
3442
delete-marked versions of a record where only the primary key
3443
values differ: thus in a secondary index we must use next-key
3444
locks when locking delete-marked records. */
3446
if (match_mode == ROW_SEL_EXACT
3447
&& dict_index_is_unique(index)
3448
&& dtuple_get_n_fields(search_tuple)
3449
== dict_index_get_n_unique(index)
3450
&& (dict_index_is_clust(index)
3451
|| !dtuple_contains_null(search_tuple))) {
3453
/* Note above that a UNIQUE secondary index can contain many
3454
rows with the same key value if one of the columns is the SQL
3455
null. A clustered index under MySQL can never contain null
3456
columns because we demand that all the columns in primary key
3459
unique_search = TRUE;
3461
/* Even if the condition is unique, MySQL seems to try to
3462
retrieve also a second row if a primary key contains more than
3465
if (UNIV_UNLIKELY(direction != 0)) {
3467
err = DB_RECORD_NOT_FOUND;
3474
/*-------------------------------------------------------------*/
3475
/* PHASE 2: Try fast adaptive hash index search if possible */
3477
/* Next test if this is the special case where we can use the fast
3478
adaptive hash index to try the search. Since we must release the
3479
search system latch when we retrieve an externally stored field, we
3480
cannot use the adaptive hash index in a search in the case the row
3481
may be long and there may be externally stored fields */
3483
if (UNIV_UNLIKELY(direction == 0)
3485
&& dict_index_is_clust(index)
3486
&& !prebuilt->templ_contains_blob
3487
&& (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) {
3491
unique_search_from_clust_index = TRUE;
3493
if (trx->mysql_n_tables_locked == 0
3494
&& prebuilt->select_lock_type == LOCK_NONE
3495
&& trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
3496
&& trx->read_view) {
3498
/* This is a SELECT query done as a consistent read,
3499
and the read view has already been allocated:
3500
let us try a search shortcut through the hash
3502
NOTE that we must also test that
3503
mysql_n_tables_locked == 0, because this might
3504
also be INSERT INTO ... SELECT ... or
3505
CREATE TABLE ... SELECT ... . Our algorithm is
3506
NOT prepared to inserts interleaved with the SELECT,
3507
and if we try that, we can deadlock on the adaptive
3508
hash index semaphore! */
3510
#ifndef UNIV_SEARCH_DEBUG
3511
if (!trx->has_search_latch) {
3512
rw_lock_s_lock(&btr_search_latch);
3513
trx->has_search_latch = TRUE;
3516
switch (row_sel_try_search_shortcut_for_mysql(
3517
&rec, prebuilt, &offsets, &heap,
3520
#ifdef UNIV_SEARCH_DEBUG
3521
ut_a(0 == cmp_dtuple_rec(search_tuple,
3524
/* At this point, rec is protected by
3525
a page latch that was acquired by
3526
row_sel_try_search_shortcut_for_mysql().
3527
The latch will not be released until
3528
mtr_commit(&mtr). */
3530
if (!row_sel_store_mysql_rec(buf, prebuilt,
3532
err = DB_TOO_BIG_RECORD;
3534
/* We let the main loop to do the
3536
goto shortcut_fails_too_big_rec;
3541
/* ut_print_name(stderr, index->name);
3542
fputs(" shortcut\n", stderr); */
3547
goto release_search_latch_if_needed;
3552
/* ut_print_name(stderr, index->name);
3553
fputs(" record not found 2\n", stderr); */
3555
err = DB_RECORD_NOT_FOUND;
3556
release_search_latch_if_needed:
3557
if (trx->search_latch_timeout > 0
3558
&& trx->has_search_latch) {
3560
trx->search_latch_timeout--;
3562
rw_lock_s_unlock(&btr_search_latch);
3563
trx->has_search_latch = FALSE;
3566
/* NOTE that we do NOT store the cursor
3576
shortcut_fails_too_big_rec:
3582
/*-------------------------------------------------------------*/
3583
/* PHASE 3: Open or restore index cursor position */
3585
if (trx->has_search_latch) {
3586
rw_lock_s_unlock(&btr_search_latch);
3587
trx->has_search_latch = FALSE;
3590
trx_start_if_not_started(trx);
3592
if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
3593
&& prebuilt->select_lock_type != LOCK_NONE
3594
&& trx->mysql_thd != NULL
3595
&& thd_is_select(trx->mysql_thd)) {
3596
/* It is a plain locking SELECT and the isolation
3597
level is low: do not lock gaps */
3599
set_also_gap_locks = FALSE;
3602
/* Note that if the search mode was GE or G, then the cursor
3603
naturally moves upward (in fetch next) in alphabetical order,
3604
otherwise downward */
3606
if (UNIV_UNLIKELY(direction == 0)) {
3607
if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G) {
3610
} else if (direction == ROW_SEL_NEXT) {
3614
thr = que_fork_get_first_thr(prebuilt->sel_graph);
3616
que_thr_move_to_run_state_for_mysql(thr, trx);
3618
clust_index = dict_table_get_first_index(index->table);
3620
if (UNIV_LIKELY(direction != 0)) {
3621
ibool need_to_process = sel_restore_position_for_mysql(
3622
&same_user_rec, BTR_SEARCH_LEAF,
3623
pcur, moves_up, &mtr);
3625
if (UNIV_UNLIKELY(need_to_process)) {
3626
if (UNIV_UNLIKELY(prebuilt->row_read_type
3627
== ROW_READ_DID_SEMI_CONSISTENT)) {
3628
/* We did a semi-consistent read,
3629
but the record was removed in
3631
prebuilt->row_read_type
3632
= ROW_READ_TRY_SEMI_CONSISTENT;
3634
} else if (UNIV_LIKELY(prebuilt->row_read_type
3635
!= ROW_READ_DID_SEMI_CONSISTENT)) {
3637
/* The cursor was positioned on the record
3638
that we returned previously. If we need
3639
to repeat a semi-consistent read as a
3640
pessimistic locking read, the record
3641
cannot be skipped. */
3646
} else if (dtuple_get_n_fields(search_tuple) > 0) {
3648
btr_pcur_open_with_no_init(index, search_tuple, mode,
3652
pcur->trx_if_known = trx;
3654
rec = btr_pcur_get_rec(pcur);
3657
&& !page_rec_is_supremum(rec)
3658
&& set_also_gap_locks
3659
&& !(srv_locks_unsafe_for_binlog
3660
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
3661
&& prebuilt->select_lock_type != LOCK_NONE) {
3663
/* Try to place a gap lock on the next index record
3664
to prevent phantoms in ORDER BY ... DESC queries */
3665
const rec_t* next = page_rec_get_next_const(rec);
3667
offsets = rec_get_offsets(next, index, offsets,
3668
ULINT_UNDEFINED, &heap);
3669
err = sel_set_rec_lock(btr_pcur_get_block(pcur),
3670
next, index, offsets,
3671
prebuilt->select_lock_type,
3674
if (err != DB_SUCCESS) {
3676
goto lock_wait_or_error;
3680
if (mode == PAGE_CUR_G) {
3681
btr_pcur_open_at_index_side(
3682
TRUE, index, BTR_SEARCH_LEAF, pcur, FALSE,
3684
} else if (mode == PAGE_CUR_L) {
3685
btr_pcur_open_at_index_side(
3686
FALSE, index, BTR_SEARCH_LEAF, pcur, FALSE,
3691
if (!prebuilt->sql_stat_start) {
3692
/* No need to set an intention lock or assign a read view */
3694
if (trx->read_view == NULL
3695
&& prebuilt->select_lock_type == LOCK_NONE) {
3697
fputs("InnoDB: Error: MySQL is trying to"
3698
" perform a consistent read\n"
3699
"InnoDB: but the read view is not assigned!\n",
3701
trx_print(stderr, trx, 600);
3702
fputc('\n', stderr);
3705
} else if (prebuilt->select_lock_type == LOCK_NONE) {
3706
/* This is a consistent read */
3707
/* Assign a read view for the query */
3709
trx_assign_read_view(trx);
3710
prebuilt->sql_stat_start = FALSE;
3713
if (prebuilt->select_lock_type == LOCK_S) {
3714
lock_mode = LOCK_IS;
3716
lock_mode = LOCK_IX;
3718
err = lock_table(0, index->table, lock_mode, thr);
3720
if (err != DB_SUCCESS) {
3722
goto lock_wait_or_error;
3724
prebuilt->sql_stat_start = FALSE;
3728
/*-------------------------------------------------------------*/
3729
/* PHASE 4: Look for matching records in a loop */
3731
rec = btr_pcur_get_rec(pcur);
3732
ut_ad(!!page_rec_is_comp(rec) == comp);
3733
#ifdef UNIV_SEARCH_DEBUG
3735
fputs("Using ", stderr);
3736
dict_index_name_print(stderr, index);
3737
fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt,
3738
page_get_page_no(page_align(rec)));
3741
#endif /* UNIV_SEARCH_DEBUG */
3743
if (page_rec_is_infimum(rec)) {
3745
/* The infimum record on a page cannot be in the result set,
3746
and neither can a record lock be placed on it: we skip such
3752
if (page_rec_is_supremum(rec)) {
3754
if (set_also_gap_locks
3755
&& !(srv_locks_unsafe_for_binlog
3756
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
3757
&& prebuilt->select_lock_type != LOCK_NONE) {
3759
/* Try to place a lock on the index record */
3761
/* If innodb_locks_unsafe_for_binlog option is used
3762
or this session is using a READ COMMITTED isolation
3763
level we do not lock gaps. Supremum record is really
3764
a gap and therefore we do not set locks there. */
3766
offsets = rec_get_offsets(rec, index, offsets,
3767
ULINT_UNDEFINED, &heap);
3768
err = sel_set_rec_lock(btr_pcur_get_block(pcur),
3769
rec, index, offsets,
3770
prebuilt->select_lock_type,
3771
LOCK_ORDINARY, thr);
3773
if (err != DB_SUCCESS) {
3775
goto lock_wait_or_error;
3778
/* A page supremum record cannot be in the result set: skip
3779
it now that we have placed a possible lock on it */
3784
/*-------------------------------------------------------------*/
3785
/* Do sanity checks in case our cursor has bumped into page
3789
next_offs = rec_get_next_offs(rec, TRUE);
3790
if (UNIV_UNLIKELY(next_offs < PAGE_NEW_SUPREMUM)) {
3795
next_offs = rec_get_next_offs(rec, FALSE);
3796
if (UNIV_UNLIKELY(next_offs < PAGE_OLD_SUPREMUM)) {
3802
if (UNIV_UNLIKELY(next_offs >= UNIV_PAGE_SIZE - PAGE_DIR)) {
3805
if (srv_force_recovery == 0 || moves_up == FALSE) {
3806
ut_print_timestamp(stderr);
3807
buf_page_print(page_align(rec), 0);
3809
"\nInnoDB: rec address %p,"
3810
" buf block fix count %lu\n",
3811
(void*) rec, (ulong)
3812
btr_cur_get_block(btr_pcur_get_btr_cur(pcur))
3813
->page.buf_fix_count);
3815
"InnoDB: Index corruption: rec offs %lu"
3816
" next offs %lu, page no %lu,\n"
3818
(ulong) page_offset(rec),
3820
(ulong) page_get_page_no(page_align(rec)));
3821
dict_index_name_print(stderr, trx, index);
3822
fputs(". Run CHECK TABLE. You may need to\n"
3823
"InnoDB: restore from a backup, or"
3824
" dump + drop + reimport the table.\n",
3827
err = DB_CORRUPTION;
3829
goto lock_wait_or_error;
3831
/* The user may be dumping a corrupt table. Jump
3832
over the corruption to recover as much as possible. */
3835
"InnoDB: Index corruption: rec offs %lu"
3836
" next offs %lu, page no %lu,\n"
3838
(ulong) page_offset(rec),
3840
(ulong) page_get_page_no(page_align(rec)));
3841
dict_index_name_print(stderr, trx, index);
3842
fputs(". We try to skip the rest of the page.\n",
3845
btr_pcur_move_to_last_on_page(pcur, &mtr);
3850
/*-------------------------------------------------------------*/
3852
/* Calculate the 'offsets' associated with 'rec' */
3854
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
3856
if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
3857
if (!rec_validate(rec, offsets)
3858
|| !btr_index_rec_validate(rec, index, FALSE)) {
3860
"InnoDB: Index corruption: rec offs %lu"
3861
" next offs %lu, page no %lu,\n"
3863
(ulong) page_offset(rec),
3865
(ulong) page_get_page_no(page_align(rec)));
3866
dict_index_name_print(stderr, trx, index);
3867
fputs(". We try to skip the record.\n",
3874
/* Note that we cannot trust the up_match value in the cursor at this
3875
place because we can arrive here after moving the cursor! Thus
3876
we have to recompare rec and search_tuple to determine if they
3879
if (match_mode == ROW_SEL_EXACT) {
3880
/* Test if the index record matches completely to search_tuple
3881
in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */
3883
/* fputs("Comparing rec and search tuple\n", stderr); */
3885
if (0 != cmp_dtuple_rec(search_tuple, rec, offsets)) {
3887
if (set_also_gap_locks
3888
&& !(srv_locks_unsafe_for_binlog
3889
|| trx->isolation_level
3890
<= TRX_ISO_READ_COMMITTED)
3891
&& prebuilt->select_lock_type != LOCK_NONE) {
3893
/* Try to place a gap lock on the index
3894
record only if innodb_locks_unsafe_for_binlog
3895
option is not set or this session is not
3896
using a READ COMMITTED isolation level. */
3898
err = sel_set_rec_lock(
3899
btr_pcur_get_block(pcur),
3900
rec, index, offsets,
3901
prebuilt->select_lock_type, LOCK_GAP,
3904
if (err != DB_SUCCESS) {
3906
goto lock_wait_or_error;
3910
btr_pcur_store_position(pcur, &mtr);
3912
err = DB_RECORD_NOT_FOUND;
3913
/* ut_print_name(stderr, index->name);
3914
fputs(" record not found 3\n", stderr); */
3919
} else if (match_mode == ROW_SEL_EXACT_PREFIX) {
3921
if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec, offsets)) {
3923
if (set_also_gap_locks
3924
&& !(srv_locks_unsafe_for_binlog
3925
|| trx->isolation_level
3926
<= TRX_ISO_READ_COMMITTED)
3927
&& prebuilt->select_lock_type != LOCK_NONE) {
3929
/* Try to place a gap lock on the index
3930
record only if innodb_locks_unsafe_for_binlog
3931
option is not set or this session is not
3932
using a READ COMMITTED isolation level. */
3934
err = sel_set_rec_lock(
3935
btr_pcur_get_block(pcur),
3936
rec, index, offsets,
3937
prebuilt->select_lock_type, LOCK_GAP,
3940
if (err != DB_SUCCESS) {
3942
goto lock_wait_or_error;
3946
btr_pcur_store_position(pcur, &mtr);
3948
err = DB_RECORD_NOT_FOUND;
3949
/* ut_print_name(stderr, index->name);
3950
fputs(" record not found 4\n", stderr); */
3956
/* We are ready to look at a possible new index entry in the result
3957
set: the cursor is now placed on a user record */
3959
if (prebuilt->select_lock_type != LOCK_NONE) {
3960
/* Try to place a lock on the index record; note that delete
3961
marked records are a special case in a unique search. If there
3962
is a non-delete marked record, then it is enough to lock its
3963
existence with LOCK_REC_NOT_GAP. */
3965
/* If innodb_locks_unsafe_for_binlog option is used
3966
or this session is using a READ COMMITED isolation
3967
level we lock only the record, i.e., next-key locking is
3972
if (!set_also_gap_locks
3973
|| srv_locks_unsafe_for_binlog
3974
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED
3976
&& !UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp)))) {
3980
lock_type = LOCK_ORDINARY;
3983
/* If we are doing a 'greater or equal than a primary key
3984
value' search from a clustered index, and we find a record
3985
that has that exact primary key value, then there is no need
3986
to lock the gap before the record, because no insert in the
3987
gap can be in our search range. That is, no phantom row can
3990
An example: if col1 is the primary key, the search is WHERE
3991
col1 >= 100, and we find a record where col1 = 100, then no
3992
need to lock the gap before that record. */
3994
if (index == clust_index
3995
&& mode == PAGE_CUR_GE
3997
&& dtuple_get_n_fields_cmp(search_tuple)
3998
== dict_index_get_n_unique(index)
3999
&& 0 == cmp_dtuple_rec(search_tuple, rec, offsets)) {
4001
lock_type = LOCK_REC_NOT_GAP;
4004
err = sel_set_rec_lock(btr_pcur_get_block(pcur),
4005
rec, index, offsets,
4006
prebuilt->select_lock_type,
4010
const rec_t* old_vers;
4012
if (srv_locks_unsafe_for_binlog
4013
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
4014
/* Note that a record of
4015
prebuilt->index was locked. */
4016
prebuilt->new_rec_locks = 1;
4020
if (UNIV_LIKELY(prebuilt->row_read_type
4021
!= ROW_READ_TRY_SEMI_CONSISTENT)
4023
|| index != clust_index) {
4025
goto lock_wait_or_error;
4028
/* The following call returns 'offsets'
4029
associated with 'old_vers' */
4030
err = row_sel_build_committed_vers_for_mysql(
4031
clust_index, prebuilt, rec,
4032
&offsets, &heap, &old_vers, &mtr);
4034
if (err != DB_SUCCESS) {
4036
goto lock_wait_or_error;
4039
mutex_enter(&kernel_mutex);
4040
if (trx->was_chosen_as_deadlock_victim) {
4041
mutex_exit(&kernel_mutex);
4044
goto lock_wait_or_error;
4046
if (UNIV_LIKELY(trx->wait_lock != NULL)) {
4047
lock_cancel_waiting_and_release(
4049
prebuilt->new_rec_locks = 0;
4051
mutex_exit(&kernel_mutex);
4053
/* The lock was granted while we were
4054
searching for the last committed version.
4055
Do a normal locking read. */
4057
offsets = rec_get_offsets(rec, index, offsets,
4061
/* Note that a record of
4062
prebuilt->index was locked. */
4063
prebuilt->new_rec_locks = 1;
4066
mutex_exit(&kernel_mutex);
4068
if (old_vers == NULL) {
4069
/* The row was not yet committed */
4074
did_semi_consistent_read = TRUE;
4079
goto lock_wait_or_error;
4082
/* This is a non-locking consistent read: if necessary, fetch
4083
a previous version of the record */
4085
if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) {
4087
/* Do nothing: we let a non-locking SELECT read the
4088
latest version of the record */
4090
} else if (index == clust_index) {
4092
/* Fetch a previous version of the row if the current
4093
one is not visible in the snapshot; if we have a very
4094
high force recovery level set, we try to avoid crashes
4095
by skipping this lookup */
4097
if (UNIV_LIKELY(srv_force_recovery < 5)
4098
&& !lock_clust_rec_cons_read_sees(
4099
rec, index, offsets, trx->read_view)) {
4102
/* The following call returns 'offsets'
4103
associated with 'old_vers' */
4104
err = row_sel_build_prev_vers_for_mysql(
4105
trx->read_view, clust_index,
4106
prebuilt, rec, &offsets, &heap,
4109
if (err != DB_SUCCESS) {
4111
goto lock_wait_or_error;
4114
if (old_vers == NULL) {
4115
/* The row did not exist yet in
4123
} else if (!lock_sec_rec_cons_read_sees(rec, trx->read_view)) {
4124
/* We are looking into a non-clustered index,
4125
and to get the right version of the record we
4126
have to look also into the clustered index: this
4127
is necessary, because we can only get the undo
4128
information via the clustered index record. */
4130
ut_ad(index != clust_index);
4131
goto requires_clust_rec;
4135
/* NOTE that at this point rec can be an old version of a clustered
4136
index record built for a consistent read. We cannot assume after this
4137
point that rec is on a buffer pool page. Functions like
4138
page_rec_is_comp() cannot be used! */
4140
if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp))) {
4142
/* The record is delete-marked: we can skip it */
4144
if ((srv_locks_unsafe_for_binlog
4145
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
4146
&& prebuilt->select_lock_type != LOCK_NONE
4147
&& !did_semi_consistent_read) {
4149
/* No need to keep a lock on a delete-marked record
4150
if we do not want to use next-key locking. */
4152
row_unlock_for_mysql(prebuilt, TRUE);
4155
/* This is an optimization to skip setting the next key lock
4156
on the record that follows this delete-marked record. This
4157
optimization works because of the unique search criteria
4158
which precludes the presence of a range lock between this
4159
delete marked record and the record following it.
4161
For now this is applicable only to clustered indexes while
4162
doing a unique search. There is scope for further optimization
4163
applicable to unique secondary indexes. Current behaviour is
4164
to widen the scope of a lock on an already delete marked record
4165
if the same record is deleted twice by the same transaction */
4166
if (index == clust_index && unique_search) {
4167
err = DB_RECORD_NOT_FOUND;
4175
/* Get the clustered index record if needed, if we did not do the
4176
search using the clustered index. */
4178
if (index != clust_index && prebuilt->need_to_access_clustered) {
4181
/* We use a 'goto' to the preceding label if a consistent
4182
read of a secondary index record requires us to look up old
4183
versions of the associated clustered index record. */
4185
ut_ad(rec_offs_validate(rec, index, offsets));
4187
/* It was a non-clustered index and we must fetch also the
4188
clustered index record */
4190
mtr_has_extra_clust_latch = TRUE;
4192
/* The following call returns 'offsets' associated with
4193
'clust_rec'. Note that 'clust_rec' can be an old version
4194
built for a consistent read. */
4196
err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec,
4198
&offsets, &heap, &mtr);
4199
if (err != DB_SUCCESS) {
4201
goto lock_wait_or_error;
4204
if (clust_rec == NULL) {
4205
/* The record did not exist in the read view */
4206
ut_ad(prebuilt->select_lock_type == LOCK_NONE);
4211
if ((srv_locks_unsafe_for_binlog
4212
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
4213
&& prebuilt->select_lock_type != LOCK_NONE) {
4214
/* Note that both the secondary index record
4215
and the clustered index record were locked. */
4216
ut_ad(prebuilt->new_rec_locks == 1);
4217
prebuilt->new_rec_locks = 2;
4220
if (UNIV_UNLIKELY(rec_get_deleted_flag(clust_rec, comp))) {
4222
/* The record is delete marked: we can skip it */
4224
if ((srv_locks_unsafe_for_binlog
4225
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
4226
&& prebuilt->select_lock_type != LOCK_NONE) {
4228
/* No need to keep a lock on a delete-marked
4229
record if we do not want to use next-key
4232
row_unlock_for_mysql(prebuilt, TRUE);
4238
if (prebuilt->need_to_access_clustered) {
4240
result_rec = clust_rec;
4242
ut_ad(rec_offs_validate(result_rec, clust_index,
4245
/* We used 'offsets' for the clust rec, recalculate
4247
offsets = rec_get_offsets(rec, index, offsets,
4248
ULINT_UNDEFINED, &heap);
4255
/* We found a qualifying record 'result_rec'. At this point,
4256
'offsets' are associated with 'result_rec'. */
4258
ut_ad(rec_offs_validate(result_rec,
4259
result_rec != rec ? clust_index : index,
4262
/* At this point, the clustered index record is protected
4263
by a page latch that was acquired when pcur was positioned.
4264
The latch will not be released until mtr_commit(&mtr). */
4266
if ((match_mode == ROW_SEL_EXACT
4267
|| prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD)
4268
&& prebuilt->select_lock_type == LOCK_NONE
4269
&& !prebuilt->templ_contains_blob
4270
&& !prebuilt->clust_index_was_generated
4271
&& prebuilt->template_type
4272
!= ROW_MYSQL_DUMMY_TEMPLATE) {
4274
/* Inside an update, for example, we do not cache rows,
4275
since we may use the cursor position to do the actual
4276
update, that is why we require ...lock_type == LOCK_NONE.
4277
Since we keep space in prebuilt only for the BLOBs of
4278
a single row, we cannot cache rows in the case there
4279
are BLOBs in the fields to be fetched. In HANDLER we do
4280
not cache rows because there the cursor is a scrollable
4283
row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
4285
if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
4292
if (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE) {
4293
memcpy(buf + 4, result_rec
4294
- rec_offs_extra_size(offsets),
4295
rec_offs_size(offsets));
4296
mach_write_to_4(buf,
4297
rec_offs_extra_size(offsets) + 4);
4299
if (!row_sel_store_mysql_rec(buf, prebuilt,
4300
result_rec, offsets)) {
4301
err = DB_TOO_BIG_RECORD;
4303
goto lock_wait_or_error;
4307
if (prebuilt->clust_index_was_generated) {
4308
if (result_rec != rec) {
4309
offsets = rec_get_offsets(
4310
rec, index, offsets, ULINT_UNDEFINED,
4313
row_sel_store_row_id_to_prebuilt(prebuilt, rec,
4318
/* From this point on, 'offsets' are invalid. */
4321
/* We have an optimization to save CPU time: if this is a consistent
4322
read on a unique condition on the clustered index, then we do not
4323
store the pcur position, because any fetch next or prev will anyway
4324
return 'end of file'. Exceptions are locking reads and the MySQL
4325
HANDLER command where the user can move the cursor with PREV or NEXT
4326
even after a unique search. */
4328
if (!unique_search_from_clust_index
4329
|| prebuilt->select_lock_type != LOCK_NONE) {
4331
/* Inside an update always store the cursor position */
4333
btr_pcur_store_position(pcur, &mtr);
4341
/* Reset the old and new "did semi-consistent read" flags. */
4342
if (UNIV_UNLIKELY(prebuilt->row_read_type
4343
== ROW_READ_DID_SEMI_CONSISTENT)) {
4344
prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4346
did_semi_consistent_read = FALSE;
4347
prebuilt->new_rec_locks = 0;
4349
/*-------------------------------------------------------------*/
4350
/* PHASE 5: Move the cursor to the next index record */
4352
if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) {
4353
/* We must commit mtr if we are moving to the next
4354
non-clustered index record, because we could break the
4355
latching order if we would access a different clustered
4356
index page right away without releasing the previous. */
4358
btr_pcur_store_position(pcur, &mtr);
4361
mtr_has_extra_clust_latch = FALSE;
4364
if (sel_restore_position_for_mysql(&same_user_rec,
4366
pcur, moves_up, &mtr)) {
4367
#ifdef UNIV_SEARCH_DEBUG
4369
#endif /* UNIV_SEARCH_DEBUG */
4376
if (UNIV_UNLIKELY(!btr_pcur_move_to_next(pcur, &mtr))) {
4378
btr_pcur_store_position(pcur, &mtr);
4380
if (match_mode != 0) {
4381
err = DB_RECORD_NOT_FOUND;
4383
err = DB_END_OF_INDEX;
4389
if (UNIV_UNLIKELY(!btr_pcur_move_to_prev(pcur, &mtr))) {
4394
#ifdef UNIV_SEARCH_DEBUG
4396
#endif /* UNIV_SEARCH_DEBUG */
4401
/* Reset the old and new "did semi-consistent read" flags. */
4402
if (UNIV_UNLIKELY(prebuilt->row_read_type
4403
== ROW_READ_DID_SEMI_CONSISTENT)) {
4404
prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4406
did_semi_consistent_read = FALSE;
4408
/*-------------------------------------------------------------*/
4410
btr_pcur_store_position(pcur, &mtr);
4413
mtr_has_extra_clust_latch = FALSE;
4415
trx->error_state = err;
4417
/* The following is a patch for MySQL */
4419
que_thr_stop_for_mysql(thr);
4421
thr->lock_state = QUE_THR_LOCK_ROW;
4423
if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
4424
/* It was a lock wait, and it ended */
4426
thr->lock_state = QUE_THR_LOCK_NOLOCK;
4429
sel_restore_position_for_mysql(&same_user_rec,
4430
BTR_SEARCH_LEAF, pcur,
4433
if ((srv_locks_unsafe_for_binlog
4434
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
4435
&& !same_user_rec) {
4437
/* Since we were not able to restore the cursor
4438
on the same user record, we cannot use
4439
row_unlock_for_mysql() to unlock any records, and
4440
we must thus reset the new rec lock info. Since
4441
in lock0lock.c we have blocked the inheriting of gap
4442
X-locks, we actually do not have any new record locks
4445
Note that if we were able to restore on the 'same'
4446
user record, it is still possible that we were actually
4447
waiting on a delete-marked record, and meanwhile
4448
it was removed by purge and inserted again by some
4449
other user. But that is no problem, because in
4450
rec_loop we will again try to set a lock, and
4451
new_rec_lock_info in trx will be right at the end. */
4453
prebuilt->new_rec_locks = 0;
4456
mode = pcur->search_mode;
4461
thr->lock_state = QUE_THR_LOCK_NOLOCK;
4463
#ifdef UNIV_SEARCH_DEBUG
4464
/* fputs("Using ", stderr);
4465
dict_index_name_print(stderr, index);
4466
fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
4467
#endif /* UNIV_SEARCH_DEBUG */
4471
/*-------------------------------------------------------------*/
4472
que_thr_stop_for_mysql_no_error(thr, trx);
4476
if (prebuilt->n_fetch_cached > 0) {
4477
row_sel_pop_cached_row_for_mysql(buf, prebuilt);
4482
#ifdef UNIV_SEARCH_DEBUG
4483
/* fputs("Using ", stderr);
4484
dict_index_name_print(stderr, index);
4485
fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
4486
#endif /* UNIV_SEARCH_DEBUG */
4487
if (err == DB_SUCCESS) {
4493
if (UNIV_LIKELY_NULL(heap)) {
4494
mem_heap_free(heap);
4497
/* Set or reset the "did semi-consistent read" flag on return.
4498
The flag did_semi_consistent_read is set if and only if
4499
the record being returned was fetched with a semi-consistent read. */
4500
ut_ad(prebuilt->row_read_type != ROW_READ_WITH_LOCKS
4501
|| !did_semi_consistent_read);
4503
if (UNIV_UNLIKELY(prebuilt->row_read_type != ROW_READ_WITH_LOCKS)) {
4504
if (UNIV_UNLIKELY(did_semi_consistent_read)) {
4505
prebuilt->row_read_type = ROW_READ_DID_SEMI_CONSISTENT;
4507
prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4513
/*******************************************************************//**
4514
Checks if MySQL at the moment is allowed for this table to retrieve a
4515
consistent read result, or store it to the query cache.
4516
@return TRUE if storing or retrieving from the query cache is permitted */
4519
row_search_check_if_query_cache_permitted(
4520
/*======================================*/
4521
trx_t* trx, /*!< in: transaction object */
4522
const char* norm_name) /*!< in: concatenation of database name,
4523
'/' char, table name */
4525
dict_table_t* table;
4528
table = dict_table_get(norm_name, FALSE);
4530
if (table == NULL) {
4535
mutex_enter(&kernel_mutex);
4537
/* Start the transaction if it is not started yet */
4539
trx_start_if_not_started_low(trx);
4541
/* If there are locks on the table or some trx has invalidated the
4542
cache up to our trx id, then ret = FALSE.
4543
We do not check what type locks there are on the table, though only
4544
IX type locks actually would require ret = FALSE. */
4546
if (UT_LIST_GET_LEN(table->locks) == 0
4547
&& ut_dulint_cmp(trx->id,
4548
table->query_cache_inv_trx_id) >= 0) {
4552
/* If the isolation level is high, assign a read view for the
4553
transaction if it does not yet have one */
4555
if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
4556
&& !trx->read_view) {
4558
trx->read_view = read_view_open_now(
4559
trx->id, trx->global_read_view_heap);
4560
trx->global_read_view = trx->read_view;
4564
mutex_exit(&kernel_mutex);
4569
/*******************************************************************//**
4570
Read the AUTOINC column from the current row. If the value is less than
4571
0 and the type is not unsigned then we reset the value to 0.
4572
@return value read from the column */
4575
row_search_autoinc_read_column(
4576
/*===========================*/
4577
dict_index_t* index, /*!< in: index to read from */
4578
const rec_t* rec, /*!< in: current rec */
4579
ulint col_no, /*!< in: column number */
4580
ulint mtype, /*!< in: column main type */
4581
ibool unsigned_type) /*!< in: signed or unsigned flag */
4586
mem_heap_t* heap = NULL;
4587
ulint offsets_[REC_OFFS_NORMAL_SIZE];
4588
ulint* offsets = offsets_;
4590
rec_offs_init(offsets_);
4592
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
4594
data = rec_get_nth_field(rec, offsets, col_no, &len);
4596
ut_a(len != UNIV_SQL_NULL);
4600
ut_a(len <= sizeof value);
4601
value = mach_read_int_type(data, len, unsigned_type);
4605
ut_a(len == sizeof(float));
4606
value = (ib_uint64_t) mach_float_read(data);
4610
ut_a(len == sizeof(double));
4611
value = (ib_uint64_t) mach_double_read(data);
4618
if (UNIV_LIKELY_NULL(heap)) {
4619
mem_heap_free(heap);
4622
if (!unsigned_type && (ib_int64_t) value < 0) {
4629
/*******************************************************************//**
4631
@return current rec or NULL */
4634
row_search_autoinc_get_rec(
4635
/*=======================*/
4636
btr_pcur_t* pcur, /*!< in: the current cursor */
4637
mtr_t* mtr) /*!< in: mini transaction */
4640
const rec_t* rec = btr_pcur_get_rec(pcur);
4642
if (page_rec_is_user_rec(rec)) {
4645
} while (btr_pcur_move_to_prev(pcur, mtr));
4650
/*******************************************************************//**
4651
Read the max AUTOINC value from an index.
4652
@return DB_SUCCESS if all OK else error code, DB_RECORD_NOT_FOUND if
4653
column name can't be found in index */
4656
row_search_max_autoinc(
4657
/*===================*/
4658
dict_index_t* index, /*!< in: index to search */
4659
const char* col_name, /*!< in: name of autoinc column */
4660
ib_uint64_t* value) /*!< out: AUTOINC value read */
4664
dict_field_t* dfield = NULL;
4665
ulint error = DB_SUCCESS;
4667
n_cols = dict_index_get_n_ordering_defined_by_user(index);
4669
/* Search the index for the AUTOINC column name */
4670
for (i = 0; i < n_cols; ++i) {
4671
dfield = dict_index_get_nth_field(index, i);
4673
if (strcmp(col_name, dfield->name) == 0) {
4680
/* Must find the AUTOINC column name */
4681
if (i < n_cols && dfield) {
4687
/* Open at the high/right end (FALSE), and INIT
4689
btr_pcur_open_at_index_side(
4690
FALSE, index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
4692
if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
4695
rec = row_search_autoinc_get_rec(&pcur, &mtr);
4698
ibool unsigned_type = (
4699
dfield->col->prtype & DATA_UNSIGNED);
4701
*value = row_search_autoinc_read_column(
4703
dfield->col->mtype, unsigned_type);
4707
btr_pcur_close(&pcur);
4711
error = DB_RECORD_NOT_FOUND;