~drizzle-trunk/drizzle/development

641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1
/*****************************************************************************
2
1819.5.221 by vasil
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6749 from MySQL InnoDB
3
Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
4
Copyright (c) 2008, Google Inc.
5
6
Portions of this file contain modifications contributed and copyrighted by
7
Google, Inc. Those modifications are gratefully acknowledged and are described
8
briefly in the InnoDB documentation. The contributions by Google are
9
incorporated with their permission, and subject to the conditions contained in
10
the file COPYING.Google.
11
12
This program is free software; you can redistribute it and/or modify it under
13
the terms of the GNU General Public License as published by the Free Software
14
Foundation; version 2 of the License.
15
16
This program is distributed in the hope that it will be useful, but WITHOUT
17
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19
20
You should have received a copy of the GNU General Public License along with
1802.10.2 by Monty Taylor
Update all of the copyright headers to include the correct address.
21
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22
St, Fifth Floor, Boston, MA 02110-1301 USA
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
23
24
*****************************************************************************/
25
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
26
/***************************************************//**
27
@file row/row0sel.c
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
28
Select
29
30
Created 12/19/1997 Heikki Tuuri
31
*******************************************************/
32
33
#include "row0sel.h"
34
35
#ifdef UNIV_NONINL
36
#include "row0sel.ic"
37
#endif
38
39
#include "dict0dict.h"
40
#include "dict0boot.h"
41
#include "trx0undo.h"
42
#include "trx0trx.h"
43
#include "btr0btr.h"
44
#include "btr0cur.h"
45
#include "btr0sea.h"
46
#include "mach0data.h"
47
#include "que0que.h"
48
#include "row0upd.h"
49
#include "row0row.h"
50
#include "row0vers.h"
51
#include "rem0cmp.h"
52
#include "lock0lock.h"
53
#include "eval0eval.h"
54
#include "pars0sym.h"
55
#include "pars0pars.h"
56
#include "row0mysql.h"
57
#include "read0read.h"
58
#include "buf0lru.h"
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
59
#include "ha_prototypes.h"
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
60
61
/* Maximum number of rows to prefetch; MySQL interface has another parameter */
62
#define SEL_MAX_N_PREFETCH	16
63
64
/* Number of rows fetched, after which to start prefetching; MySQL interface
65
has another parameter */
66
#define SEL_PREFETCH_LIMIT	1
67
68
/* When a select has accessed about this many pages, it returns control back
69
to que_run_threads: this is to allow canceling runaway queries */
70
71
#define SEL_COST_LIMIT	100
72
73
/* Flags for search shortcut */
74
#define SEL_FOUND	0
75
#define	SEL_EXHAUSTED	1
76
#define SEL_RETRY	2
77
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
78
/********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
79
Returns TRUE if the user-defined column in a secondary index record
80
is alphabetically the same as the corresponding BLOB column in the clustered
81
index record.
82
NOTE: the comparison is NOT done as a binary comparison, but character
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
83
fields are compared with collation!
84
@return	TRUE if the columns are equal */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
85
static
86
ibool
87
row_sel_sec_rec_is_for_blob(
88
/*========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
89
	ulint		mtype,		/*!< in: main type */
90
	ulint		prtype,		/*!< in: precise type */
1819.9.36 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg from MySQL InnoDB
91
	ulint		mbminmaxlen,	/*!< in: minimum and maximum length of
92
					a multi-byte character */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
93
	const byte*	clust_field,	/*!< in: the locally stored part of
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
94
					the clustered index column, including
95
					the BLOB pointer; the clustered
96
					index record must be covered by
97
					a lock or a page latch to protect it
98
					against deletion (rollback or purge) */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
99
	ulint		clust_len,	/*!< in: length of clust_field */
100
	const byte*	sec_field,	/*!< in: column in secondary index */
101
	ulint		sec_len,	/*!< in: length of sec_field */
102
	ulint		zip_size)	/*!< in: compressed page size, or 0 */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
103
{
104
	ulint	len;
105
	byte	buf[DICT_MAX_INDEX_COL_LEN];
106
107
	len = btr_copy_externally_stored_field_prefix(buf, sizeof buf,
108
						      zip_size,
109
						      clust_field, clust_len);
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
110
111
	if (UNIV_UNLIKELY(len == 0)) {
112
		/* The BLOB was being deleted as the server crashed.
113
		There should not be any secondary index records
114
		referring to this clustered index record, because
115
		btr_free_externally_stored_field() is called after all
116
		secondary index entries of the row have been purged. */
117
		return(FALSE);
118
	}
119
1819.9.36 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg from MySQL InnoDB
120
	len = dtype_get_at_most_n_mbchars(prtype, mbminmaxlen,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
121
					  sec_len, len, (const char*) buf);
122
123
	return(!cmp_data_data(mtype, prtype, buf, len, sec_field, sec_len));
124
}
125
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
126
/********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
127
Returns TRUE if the user-defined column values in a secondary index record
128
are alphabetically the same as the corresponding columns in the clustered
129
index record.
130
NOTE: the comparison is NOT done as a binary comparison, but character
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
131
fields are compared with collation!
132
@return TRUE if the secondary record is equal to the corresponding
1819.5.165 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6426 from MySQL InnoDB
133
fields in the clustered record, when compared with collation;
134
FALSE if not equal or if the clustered record has been marked for deletion */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
135
static
136
ibool
137
row_sel_sec_rec_is_for_clust_rec(
138
/*=============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
139
	const rec_t*	sec_rec,	/*!< in: secondary index record */
140
	dict_index_t*	sec_index,	/*!< in: secondary index */
141
	const rec_t*	clust_rec,	/*!< in: clustered index record;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
142
					must be protected by a lock or
143
					a page latch against deletion
144
					in rollback or purge */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
145
	dict_index_t*	clust_index)	/*!< in: clustered index */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
146
{
147
	const byte*	sec_field;
148
	ulint		sec_len;
149
	const byte*	clust_field;
150
	ulint		n;
151
	ulint		i;
152
	mem_heap_t*	heap		= NULL;
153
	ulint		clust_offsets_[REC_OFFS_NORMAL_SIZE];
154
	ulint		sec_offsets_[REC_OFFS_SMALL_SIZE];
155
	ulint*		clust_offs	= clust_offsets_;
156
	ulint*		sec_offs	= sec_offsets_;
157
	ibool		is_equal	= TRUE;
158
159
	rec_offs_init(clust_offsets_);
160
	rec_offs_init(sec_offsets_);
161
162
	if (rec_get_deleted_flag(clust_rec,
163
				 dict_table_is_comp(clust_index->table))) {
164
165
		/* The clustered index record is delete-marked;
166
		it is not visible in the read view.  Besides,
167
		if there are any externally stored columns,
168
		some of them may have already been purged. */
169
		return(FALSE);
170
	}
171
172
	clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs,
173
				     ULINT_UNDEFINED, &heap);
174
	sec_offs = rec_get_offsets(sec_rec, sec_index, sec_offs,
175
				   ULINT_UNDEFINED, &heap);
176
177
	n = dict_index_get_n_ordering_defined_by_user(sec_index);
178
179
	for (i = 0; i < n; i++) {
180
		const dict_field_t*	ifield;
181
		const dict_col_t*	col;
182
		ulint			clust_pos;
183
		ulint			clust_len;
184
		ulint			len;
185
186
		ifield = dict_index_get_nth_field(sec_index, i);
187
		col = dict_field_get_col(ifield);
188
		clust_pos = dict_col_get_clust_pos(col, clust_index);
189
190
		clust_field = rec_get_nth_field(
191
			clust_rec, clust_offs, clust_pos, &clust_len);
192
		sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len);
193
194
		len = clust_len;
195
196
		if (ifield->prefix_len > 0 && len != UNIV_SQL_NULL) {
197
198
			if (rec_offs_nth_extern(clust_offs, clust_pos)) {
199
				len -= BTR_EXTERN_FIELD_REF_SIZE;
200
			}
201
202
			len = dtype_get_at_most_n_mbchars(
1819.9.36 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg from MySQL InnoDB
203
				col->prtype, col->mbminmaxlen,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
204
				ifield->prefix_len, len, (char*) clust_field);
205
206
			if (rec_offs_nth_extern(clust_offs, clust_pos)
207
			    && len < sec_len) {
208
				if (!row_sel_sec_rec_is_for_blob(
209
					    col->mtype, col->prtype,
1819.9.36 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg from MySQL InnoDB
210
					    col->mbminmaxlen,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
211
					    clust_field, clust_len,
212
					    sec_field, sec_len,
213
					    dict_table_zip_size(
214
						    clust_index->table))) {
215
					goto inequal;
216
				}
217
218
				continue;
219
			}
220
		}
221
222
		if (0 != cmp_data_data(col->mtype, col->prtype,
223
				       clust_field, len,
224
				       sec_field, sec_len)) {
225
inequal:
226
			is_equal = FALSE;
227
			goto func_exit;
228
		}
229
	}
230
231
func_exit:
232
	if (UNIV_LIKELY_NULL(heap)) {
233
		mem_heap_free(heap);
234
	}
235
	return(is_equal);
236
}
237
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
238
/*********************************************************************//**
239
Creates a select node struct.
240
@return	own: select node struct */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
241
UNIV_INTERN
242
sel_node_t*
243
sel_node_create(
244
/*============*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
245
	mem_heap_t*	heap)	/*!< in: memory heap where created */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
246
{
247
	sel_node_t*	node;
248
249
	node = mem_heap_alloc(heap, sizeof(sel_node_t));
250
	node->common.type = QUE_NODE_SELECT;
251
	node->state = SEL_NODE_OPEN;
252
253
	node->plans = NULL;
254
255
	return(node);
256
}
257
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
258
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
259
Frees the memory private to a select node when a query graph is freed,
260
does not free the heap where the node was originally created. */
261
UNIV_INTERN
262
void
263
sel_node_free_private(
264
/*==================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
265
	sel_node_t*	node)	/*!< in: select node struct */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
266
{
267
	ulint	i;
268
	plan_t*	plan;
269
270
	if (node->plans != NULL) {
271
		for (i = 0; i < node->n_tables; i++) {
272
			plan = sel_node_get_nth_plan(node, i);
273
274
			btr_pcur_close(&(plan->pcur));
275
			btr_pcur_close(&(plan->clust_pcur));
276
277
			if (plan->old_vers_heap) {
278
				mem_heap_free(plan->old_vers_heap);
279
			}
280
		}
281
	}
282
}
283
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
284
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
285
Evaluates the values in a select list. If there are aggregate functions,
286
their argument value is added to the aggregate total. */
287
UNIV_INLINE
288
void
289
sel_eval_select_list(
290
/*=================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
291
	sel_node_t*	node)	/*!< in: select node */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
292
{
293
	que_node_t*	exp;
294
295
	exp = node->select_list;
296
297
	while (exp) {
298
		eval_exp(exp);
299
300
		exp = que_node_get_next(exp);
301
	}
302
}
303
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
304
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
305
Assigns the values in the select list to the possible into-variables in
306
SELECT ... INTO ... */
307
UNIV_INLINE
308
void
309
sel_assign_into_var_values(
310
/*=======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
311
	sym_node_t*	var,	/*!< in: first variable in a list of variables */
312
	sel_node_t*	node)	/*!< in: select node */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
313
{
314
	que_node_t*	exp;
315
316
	if (var == NULL) {
317
318
		return;
319
	}
320
321
	exp = node->select_list;
322
323
	while (var) {
324
		ut_ad(exp);
325
326
		eval_node_copy_val(var->alias, exp);
327
328
		exp = que_node_get_next(exp);
329
		var = que_node_get_next(var);
330
	}
331
}
332
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
333
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
334
Resets the aggregate value totals in the select list of an aggregate type
335
query. */
336
UNIV_INLINE
337
void
338
sel_reset_aggregate_vals(
339
/*=====================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
340
	sel_node_t*	node)	/*!< in: select node */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
341
{
342
	func_node_t*	func_node;
343
344
	ut_ad(node->is_aggregate);
345
346
	func_node = node->select_list;
347
348
	while (func_node) {
349
		eval_node_set_int_val(func_node, 0);
350
351
		func_node = que_node_get_next(func_node);
352
	}
353
354
	node->aggregate_already_fetched = FALSE;
355
}
356
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
357
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
358
Copies the input variable values when an explicit cursor is opened. */
359
UNIV_INLINE
360
void
361
row_sel_copy_input_variable_vals(
362
/*=============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
363
	sel_node_t*	node)	/*!< in: select node */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
364
{
365
	sym_node_t*	var;
366
367
	var = UT_LIST_GET_FIRST(node->copy_variables);
368
369
	while (var) {
370
		eval_node_copy_val(var, var->alias);
371
372
		var->indirection = NULL;
373
374
		var = UT_LIST_GET_NEXT(col_var_list, var);
375
	}
376
}
377
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
378
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
379
Fetches the column values from a record. */
380
static
381
void
382
row_sel_fetch_columns(
383
/*==================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
384
	dict_index_t*	index,	/*!< in: record index */
385
	const rec_t*	rec,	/*!< in: record in a clustered or non-clustered
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
386
				index; must be protected by a page latch */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
387
	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
388
	sym_node_t*	column)	/*!< in: first column in a column list, or
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
389
				NULL */
390
{
391
	dfield_t*	val;
392
	ulint		index_type;
393
	ulint		field_no;
394
	const byte*	data;
395
	ulint		len;
396
397
	ut_ad(rec_offs_validate(rec, index, offsets));
398
399
	if (dict_index_is_clust(index)) {
400
		index_type = SYM_CLUST_FIELD_NO;
401
	} else {
402
		index_type = SYM_SEC_FIELD_NO;
403
	}
404
405
	while (column) {
406
		mem_heap_t*	heap = NULL;
407
		ibool		needs_copy;
408
409
		field_no = column->field_nos[index_type];
410
411
		if (field_no != ULINT_UNDEFINED) {
412
413
			if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
414
							      field_no))) {
415
416
				/* Copy an externally stored field to the
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
417
				temporary heap, if possible. */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
418
419
				heap = mem_heap_create(1);
420
421
				data = btr_rec_copy_externally_stored_field(
422
					rec, offsets,
423
					dict_table_zip_size(index->table),
424
					field_no, &len, heap);
425
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
426
				/* data == NULL means that the
427
				externally stored field was not
1819.7.62 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100630093149-wmc37t128gic933v from MySQL InnoDB
428
				written yet. This record
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
429
				should only be seen by
430
				recv_recovery_rollback_active() or any
431
				TRX_ISO_READ_UNCOMMITTED
432
				transactions. The InnoDB SQL parser
433
				(the sole caller of this function)
434
				does not implement READ UNCOMMITTED,
435
				and it is not involved during rollback. */
436
				ut_a(data);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
437
				ut_a(len != UNIV_SQL_NULL);
438
439
				needs_copy = TRUE;
440
			} else {
441
				data = rec_get_nth_field(rec, offsets,
442
							 field_no, &len);
443
444
				needs_copy = column->copy_val;
445
			}
446
447
			if (needs_copy) {
448
				eval_node_copy_and_alloc_val(column, data,
449
							     len);
450
			} else {
451
				val = que_node_get_val(column);
452
				dfield_set_data(val, data, len);
453
			}
454
455
			if (UNIV_LIKELY_NULL(heap)) {
456
				mem_heap_free(heap);
457
			}
458
		}
459
460
		column = UT_LIST_GET_NEXT(col_var_list, column);
461
	}
462
}
463
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
464
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
465
Allocates a prefetch buffer for a column when prefetch is first time done. */
466
static
467
void
468
sel_col_prefetch_buf_alloc(
469
/*=======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
470
	sym_node_t*	column)	/*!< in: symbol table node for a column */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
471
{
472
	sel_buf_t*	sel_buf;
473
	ulint		i;
474
475
	ut_ad(que_node_get_type(column) == QUE_NODE_SYMBOL);
476
477
	column->prefetch_buf = mem_alloc(SEL_MAX_N_PREFETCH
478
					 * sizeof(sel_buf_t));
479
	for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
480
		sel_buf = column->prefetch_buf + i;
481
482
		sel_buf->data = NULL;
483
484
		sel_buf->val_buf_size = 0;
485
	}
486
}
487
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
488
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
489
Frees a prefetch buffer for a column, including the dynamically allocated
490
memory for data stored there. */
491
UNIV_INTERN
492
void
493
sel_col_prefetch_buf_free(
494
/*======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
495
	sel_buf_t*	prefetch_buf)	/*!< in, own: prefetch buffer */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
496
{
497
	sel_buf_t*	sel_buf;
498
	ulint		i;
499
500
	for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
501
		sel_buf = prefetch_buf + i;
502
503
		if (sel_buf->val_buf_size > 0) {
504
505
			mem_free(sel_buf->data);
506
		}
507
	}
508
}
509
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
510
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
511
Pops the column values for a prefetched, cached row from the column prefetch
512
buffers and places them to the val fields in the column nodes. */
513
static
514
void
515
sel_pop_prefetched_row(
516
/*===================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
517
	plan_t*	plan)	/*!< in: plan node for a table */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
518
{
519
	sym_node_t*	column;
520
	sel_buf_t*	sel_buf;
521
	dfield_t*	val;
522
	byte*		data;
523
	ulint		len;
524
	ulint		val_buf_size;
525
526
	ut_ad(plan->n_rows_prefetched > 0);
527
528
	column = UT_LIST_GET_FIRST(plan->columns);
529
530
	while (column) {
531
		val = que_node_get_val(column);
532
533
		if (!column->copy_val) {
534
			/* We did not really push any value for the
535
			column */
536
537
			ut_ad(!column->prefetch_buf);
538
			ut_ad(que_node_get_val_buf_size(column) == 0);
539
			ut_d(dfield_set_null(val));
540
541
			goto next_col;
542
		}
543
544
		ut_ad(column->prefetch_buf);
545
		ut_ad(!dfield_is_ext(val));
546
547
		sel_buf = column->prefetch_buf + plan->first_prefetched;
548
549
		data = sel_buf->data;
550
		len = sel_buf->len;
551
		val_buf_size = sel_buf->val_buf_size;
552
553
		/* We must keep track of the allocated memory for
554
		column values to be able to free it later: therefore
555
		we swap the values for sel_buf and val */
556
557
		sel_buf->data = dfield_get_data(val);
558
		sel_buf->len = dfield_get_len(val);
559
		sel_buf->val_buf_size = que_node_get_val_buf_size(column);
560
561
		dfield_set_data(val, data, len);
562
		que_node_set_val_buf_size(column, val_buf_size);
563
next_col:
564
		column = UT_LIST_GET_NEXT(col_var_list, column);
565
	}
566
567
	plan->n_rows_prefetched--;
568
569
	plan->first_prefetched++;
570
}
571
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
572
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
573
Pushes the column values for a prefetched, cached row to the column prefetch
574
buffers from the val fields in the column nodes. */
575
UNIV_INLINE
576
void
577
sel_push_prefetched_row(
578
/*====================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
579
	plan_t*	plan)	/*!< in: plan node for a table */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
580
{
581
	sym_node_t*	column;
582
	sel_buf_t*	sel_buf;
583
	dfield_t*	val;
584
	byte*		data;
585
	ulint		len;
586
	ulint		pos;
587
	ulint		val_buf_size;
588
589
	if (plan->n_rows_prefetched == 0) {
590
		pos = 0;
591
		plan->first_prefetched = 0;
592
	} else {
593
		pos = plan->n_rows_prefetched;
594
595
		/* We have the convention that pushing new rows starts only
596
		after the prefetch stack has been emptied: */
597
598
		ut_ad(plan->first_prefetched == 0);
599
	}
600
601
	plan->n_rows_prefetched++;
602
603
	ut_ad(pos < SEL_MAX_N_PREFETCH);
604
605
	column = UT_LIST_GET_FIRST(plan->columns);
606
607
	while (column) {
608
		if (!column->copy_val) {
609
			/* There is no sense to push pointers to database
610
			page fields when we do not keep latch on the page! */
611
612
			goto next_col;
613
		}
614
615
		if (!column->prefetch_buf) {
616
			/* Allocate a new prefetch buffer */
617
618
			sel_col_prefetch_buf_alloc(column);
619
		}
620
621
		sel_buf = column->prefetch_buf + pos;
622
623
		val = que_node_get_val(column);
624
625
		data = dfield_get_data(val);
626
		len = dfield_get_len(val);
627
		val_buf_size = que_node_get_val_buf_size(column);
628
629
		/* We must keep track of the allocated memory for
630
		column values to be able to free it later: therefore
631
		we swap the values for sel_buf and val */
632
633
		dfield_set_data(val, sel_buf->data, sel_buf->len);
634
		que_node_set_val_buf_size(column, sel_buf->val_buf_size);
635
636
		sel_buf->data = data;
637
		sel_buf->len = len;
638
		sel_buf->val_buf_size = val_buf_size;
639
next_col:
640
		column = UT_LIST_GET_NEXT(col_var_list, column);
641
	}
642
}
643
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
644
/*********************************************************************//**
645
Builds a previous version of a clustered index record for a consistent read
646
@return	DB_SUCCESS or error code */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
647
static
648
ulint
649
row_sel_build_prev_vers(
650
/*====================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
651
	read_view_t*	read_view,	/*!< in: read view */
652
	dict_index_t*	index,		/*!< in: plan node for table */
653
	rec_t*		rec,		/*!< in: record in a clustered index */
654
	ulint**		offsets,	/*!< in/out: offsets returned by
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
655
					rec_get_offsets(rec, plan->index) */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
656
	mem_heap_t**	offset_heap,	/*!< in/out: memory heap from which
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
657
					the offsets are allocated */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
658
	mem_heap_t**    old_vers_heap,  /*!< out: old version heap to use */
659
	rec_t**		old_vers,	/*!< out: old version, or NULL if the
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
660
					record does not exist in the view:
661
					i.e., it was freshly inserted
662
					afterwards */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
663
	mtr_t*		mtr)		/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
664
{
665
	ulint	err;
666
667
	if (*old_vers_heap) {
668
		mem_heap_empty(*old_vers_heap);
669
	} else {
670
		*old_vers_heap = mem_heap_create(512);
671
	}
672
673
	err = row_vers_build_for_consistent_read(
674
		rec, mtr, index, offsets, read_view, offset_heap,
675
		*old_vers_heap, old_vers);
676
	return(err);
677
}
678
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
679
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
680
Builds the last committed version of a clustered index record for a
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
681
semi-consistent read.
682
@return	DB_SUCCESS or error code */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
683
static
684
ulint
685
row_sel_build_committed_vers_for_mysql(
686
/*===================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
687
	dict_index_t*	clust_index,	/*!< in: clustered index */
688
	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct */
689
	const rec_t*	rec,		/*!< in: record in a clustered index */
690
	ulint**		offsets,	/*!< in/out: offsets returned by
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
691
					rec_get_offsets(rec, clust_index) */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
692
	mem_heap_t**	offset_heap,	/*!< in/out: memory heap from which
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
693
					the offsets are allocated */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
694
	const rec_t**	old_vers,	/*!< out: old version, or NULL if the
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
695
					record does not exist in the view:
696
					i.e., it was freshly inserted
697
					afterwards */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
698
	mtr_t*		mtr)		/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
699
{
700
	ulint	err;
701
702
	if (prebuilt->old_vers_heap) {
703
		mem_heap_empty(prebuilt->old_vers_heap);
704
	} else {
705
		prebuilt->old_vers_heap = mem_heap_create(200);
706
	}
707
708
	err = row_vers_build_for_semi_consistent_read(
709
		rec, mtr, clust_index, offsets, offset_heap,
710
		prebuilt->old_vers_heap, old_vers);
711
	return(err);
712
}
713
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
714
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
715
Tests the conditions which determine when the index segment we are searching
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
716
through has been exhausted.
717
@return	TRUE if row passed the tests */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
718
UNIV_INLINE
719
ibool
720
row_sel_test_end_conds(
721
/*===================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
722
	plan_t*	plan)	/*!< in: plan for the table; the column values must
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
723
			already have been retrieved and the right sides of
724
			comparisons evaluated */
725
{
726
	func_node_t*	cond;
727
728
	/* All conditions in end_conds are comparisons of a column to an
729
	expression */
730
731
	cond = UT_LIST_GET_FIRST(plan->end_conds);
732
733
	while (cond) {
734
		/* Evaluate the left side of the comparison, i.e., get the
735
		column value if there is an indirection */
736
737
		eval_sym(cond->args);
738
739
		/* Do the comparison */
740
741
		if (!eval_cmp(cond)) {
742
743
			return(FALSE);
744
		}
745
746
		cond = UT_LIST_GET_NEXT(cond_list, cond);
747
	}
748
749
	return(TRUE);
750
}
751
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
752
/*********************************************************************//**
753
Tests the other conditions.
754
@return	TRUE if row passed the tests */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
755
UNIV_INLINE
756
ibool
757
row_sel_test_other_conds(
758
/*=====================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
759
	plan_t*	plan)	/*!< in: plan for the table; the column values must
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
760
			already have been retrieved */
761
{
762
	func_node_t*	cond;
763
764
	cond = UT_LIST_GET_FIRST(plan->other_conds);
765
766
	while (cond) {
767
		eval_exp(cond);
768
769
		if (!eval_node_get_ibool_val(cond)) {
770
771
			return(FALSE);
772
		}
773
774
		cond = UT_LIST_GET_NEXT(cond_list, cond);
775
	}
776
777
	return(TRUE);
778
}
779
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
780
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
781
Retrieves the clustered index record corresponding to a record in a
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
782
non-clustered index. Does the necessary locking.
783
@return	DB_SUCCESS or error code */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
784
static
785
ulint
786
row_sel_get_clust_rec(
787
/*==================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
788
	sel_node_t*	node,	/*!< in: select_node */
789
	plan_t*		plan,	/*!< in: plan node for table */
790
	rec_t*		rec,	/*!< in: record in a non-clustered index */
791
	que_thr_t*	thr,	/*!< in: query thread */
792
	rec_t**		out_rec,/*!< out: clustered record or an old version of
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
793
				it, NULL if the old version did not exist
794
				in the read view, i.e., it was a fresh
795
				inserted version */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
796
	mtr_t*		mtr)	/*!< in: mtr used to get access to the
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
797
				non-clustered record; the same mtr is used to
798
				access the clustered index */
799
{
800
	dict_index_t*	index;
801
	rec_t*		clust_rec;
802
	rec_t*		old_vers;
803
	ulint		err;
804
	mem_heap_t*	heap		= NULL;
805
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
806
	ulint*		offsets		= offsets_;
807
	rec_offs_init(offsets_);
808
809
	*out_rec = NULL;
810
811
	offsets = rec_get_offsets(rec,
812
				  btr_pcur_get_btr_cur(&plan->pcur)->index,
813
				  offsets, ULINT_UNDEFINED, &heap);
814
815
	row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec, offsets);
816
817
	index = dict_table_get_first_index(plan->table);
818
819
	btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE,
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
820
				   BTR_SEARCH_LEAF, &plan->clust_pcur,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
821
				   0, mtr);
822
823
	clust_rec = btr_pcur_get_rec(&(plan->clust_pcur));
824
825
	/* Note: only if the search ends up on a non-infimum record is the
826
	low_match value the real match to the search tuple */
827
828
	if (!page_rec_is_user_rec(clust_rec)
829
	    || btr_pcur_get_low_match(&(plan->clust_pcur))
830
	    < dict_index_get_n_unique(index)) {
831
832
		ut_a(rec_get_deleted_flag(rec,
833
					  dict_table_is_comp(plan->table)));
834
		ut_a(node->read_view);
835
836
		/* In a rare case it is possible that no clust rec is found
837
		for a delete-marked secondary index record: if in row0umod.c
838
		in row_undo_mod_remove_clust_low() we have already removed
839
		the clust rec, while purge is still cleaning and removing
840
		secondary index records associated with earlier versions of
841
		the clustered index record. In that case we know that the
842
		clustered index record did not exist in the read view of
843
		trx. */
844
845
		goto func_exit;
846
	}
847
848
	offsets = rec_get_offsets(clust_rec, index, offsets,
849
				  ULINT_UNDEFINED, &heap);
850
851
	if (!node->read_view) {
852
		/* Try to place a lock on the index record */
853
854
		/* If innodb_locks_unsafe_for_binlog option is used
855
		or this session is using READ COMMITTED isolation level
856
		we lock only the record, i.e., next-key locking is
857
		not used. */
858
		ulint	lock_type;
859
		trx_t*	trx;
860
861
		trx = thr_get_trx(thr);
862
863
		if (srv_locks_unsafe_for_binlog
1819.7.86 by Vasil Dimov, Stewart Smith
Merge Revision revid:vasil.dimov@oracle.com-20100504105214-ljj5sy3bk21zl7og from MySQL InnoDB
864
		    || trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
865
			lock_type = LOCK_REC_NOT_GAP;
866
		} else {
867
			lock_type = LOCK_ORDINARY;
868
		}
869
870
		err = lock_clust_rec_read_check_and_lock(
871
			0, btr_pcur_get_block(&plan->clust_pcur),
872
			clust_rec, index, offsets,
873
			node->row_lock_mode, lock_type, thr);
874
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
875
		switch (err) {
876
		case DB_SUCCESS:
877
		case DB_SUCCESS_LOCKED_REC:
878
			/* Declare the variable uninitialized in Valgrind.
879
			It should be set to DB_SUCCESS at func_exit. */
880
			UNIV_MEM_INVALID(&err, sizeof err);
881
			break;
882
		default:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
883
			goto err_exit;
884
		}
885
	} else {
886
		/* This is a non-locking consistent read: if necessary, fetch
887
		a previous version of the record */
888
889
		old_vers = NULL;
890
891
		if (!lock_clust_rec_cons_read_sees(clust_rec, index, offsets,
892
						   node->read_view)) {
893
894
			err = row_sel_build_prev_vers(
895
				node->read_view, index, clust_rec,
896
				&offsets, &heap, &plan->old_vers_heap,
897
				&old_vers, mtr);
898
899
			if (err != DB_SUCCESS) {
900
901
				goto err_exit;
902
			}
903
904
			clust_rec = old_vers;
905
906
			if (clust_rec == NULL) {
907
				goto func_exit;
908
			}
909
		}
910
911
		/* If we had to go to an earlier version of row or the
912
		secondary index record is delete marked, then it may be that
913
		the secondary index record corresponding to clust_rec
914
		(or old_vers) is not rec; in that case we must ignore
915
		such row because in our snapshot rec would not have existed.
916
		Remember that from rec we cannot see directly which transaction
917
		id corresponds to it: we have to go to the clustered index
918
		record. A query where we want to fetch all rows where
919
		the secondary index value is in some interval would return
920
		a wrong result if we would not drop rows which we come to
921
		visit through secondary index records that would not really
922
		exist in our snapshot. */
923
924
		if ((old_vers
925
		     || rec_get_deleted_flag(rec, dict_table_is_comp(
926
						     plan->table)))
927
		    && !row_sel_sec_rec_is_for_clust_rec(rec, plan->index,
928
							 clust_rec, index)) {
929
			goto func_exit;
930
		}
931
	}
932
933
	/* Fetch the columns needed in test conditions.  The clustered
934
	index record is protected by a page latch that was acquired
935
	when plan->clust_pcur was positioned.  The latch will not be
936
	released until mtr_commit(mtr). */
937
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
938
	ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets)));
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
939
	row_sel_fetch_columns(index, clust_rec, offsets,
940
			      UT_LIST_GET_FIRST(plan->columns));
941
	*out_rec = clust_rec;
942
func_exit:
943
	err = DB_SUCCESS;
944
err_exit:
945
	if (UNIV_LIKELY_NULL(heap)) {
946
		mem_heap_free(heap);
947
	}
948
	return(err);
949
}
950
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
951
/*********************************************************************//**
952
Sets a lock on a record.
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
953
@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
954
UNIV_INLINE
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
955
enum db_err
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
956
sel_set_rec_lock(
957
/*=============*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
958
	const buf_block_t*	block,	/*!< in: buffer block of rec */
959
	const rec_t*		rec,	/*!< in: record */
960
	dict_index_t*		index,	/*!< in: index */
961
	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
962
	ulint			mode,	/*!< in: lock mode */
963
	ulint			type,	/*!< in: LOCK_ORDINARY, LOCK_GAP, or
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
964
					LOC_REC_NOT_GAP */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
965
	que_thr_t*		thr)	/*!< in: query thread */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
966
{
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
967
	trx_t*		trx;
968
	enum db_err	err;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
969
970
	trx = thr_get_trx(thr);
971
972
	if (UT_LIST_GET_LEN(trx->trx_locks) > 10000) {
973
		if (buf_LRU_buf_pool_running_out()) {
974
975
			return(DB_LOCK_TABLE_FULL);
976
		}
977
	}
978
979
	if (dict_index_is_clust(index)) {
980
		err = lock_clust_rec_read_check_and_lock(
981
			0, block, rec, index, offsets, mode, type, thr);
982
	} else {
983
		err = lock_sec_rec_read_check_and_lock(
984
			0, block, rec, index, offsets, mode, type, thr);
985
	}
986
987
	return(err);
988
}
989
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
990
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
991
Opens a pcur to a table index. */
992
static
993
void
994
row_sel_open_pcur(
995
/*==============*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
996
	plan_t*		plan,		/*!< in: table plan */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
997
	ibool		search_latch_locked,
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
998
					/*!< in: TRUE if the thread currently
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
999
					has the search latch locked in
1000
					s-mode */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1001
	mtr_t*		mtr)		/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1002
{
1003
	dict_index_t*	index;
1004
	func_node_t*	cond;
1005
	que_node_t*	exp;
1006
	ulint		n_fields;
1007
	ulint		has_search_latch = 0;	/* RW_S_LATCH or 0 */
1008
	ulint		i;
1009
1010
	if (search_latch_locked) {
1011
		has_search_latch = RW_S_LATCH;
1012
	}
1013
1014
	index = plan->index;
1015
1016
	/* Calculate the value of the search tuple: the exact match columns
1017
	get their expressions evaluated when we evaluate the right sides of
1018
	end_conds */
1019
1020
	cond = UT_LIST_GET_FIRST(plan->end_conds);
1021
1022
	while (cond) {
1023
		eval_exp(que_node_get_next(cond->args));
1024
1025
		cond = UT_LIST_GET_NEXT(cond_list, cond);
1026
	}
1027
1028
	if (plan->tuple) {
1029
		n_fields = dtuple_get_n_fields(plan->tuple);
1030
1031
		if (plan->n_exact_match < n_fields) {
1032
			/* There is a non-exact match field which must be
1033
			evaluated separately */
1034
1035
			eval_exp(plan->tuple_exps[n_fields - 1]);
1036
		}
1037
1038
		for (i = 0; i < n_fields; i++) {
1039
			exp = plan->tuple_exps[i];
1040
1041
			dfield_copy_data(dtuple_get_nth_field(plan->tuple, i),
1042
					 que_node_get_val(exp));
1043
		}
1044
1045
		/* Open pcur to the index */
1046
1047
		btr_pcur_open_with_no_init(index, plan->tuple, plan->mode,
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1048
					   BTR_SEARCH_LEAF, &plan->pcur,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1049
					   has_search_latch, mtr);
1050
	} else {
1051
		/* Open the cursor to the start or the end of the index
1052
		(FALSE: no init) */
1053
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1054
		btr_pcur_open_at_index_side(plan->asc, index, BTR_SEARCH_LEAF,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1055
					    &(plan->pcur), FALSE, mtr);
1056
	}
1057
1058
	ut_ad(plan->n_rows_prefetched == 0);
1059
	ut_ad(plan->n_rows_fetched == 0);
1060
	ut_ad(plan->cursor_at_end == FALSE);
1061
1062
	plan->pcur_is_open = TRUE;
1063
}
1064
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1065
/*********************************************************************//**
1066
Restores a stored pcur position to a table index.
1067
@return TRUE if the cursor should be moved to the next record after we
1068
return from this function (moved to the previous, in the case of a
1069
descending cursor) without processing again the current cursor
1070
record */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1071
static
1072
ibool
1073
row_sel_restore_pcur_pos(
1074
/*=====================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1075
	plan_t*		plan,	/*!< in: table plan */
1076
	mtr_t*		mtr)	/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1077
{
1078
	ibool	equal_position;
1079
	ulint	relative_position;
1080
1081
	ut_ad(!plan->cursor_at_end);
1082
1083
	relative_position = btr_pcur_get_rel_pos(&(plan->pcur));
1084
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1085
	equal_position = btr_pcur_restore_position(BTR_SEARCH_LEAF,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1086
						   &(plan->pcur), mtr);
1087
1088
	/* If the cursor is traveling upwards, and relative_position is
1089
1090
	(1) BTR_PCUR_BEFORE: this is not allowed, as we did not have a lock
1091
	yet on the successor of the page infimum;
1092
	(2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
1093
	first record GREATER than the predecessor of a page supremum; we have
1094
	not yet processed the cursor record: no need to move the cursor to the
1095
	next record;
1096
	(3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
1097
	last record LESS or EQUAL to the old stored user record; (a) if
1098
	equal_position is FALSE, this means that the cursor is now on a record
1099
	less than the old user record, and we must move to the next record;
1100
	(b) if equal_position is TRUE, then if
1101
	plan->stored_cursor_rec_processed is TRUE, we must move to the next
1102
	record, else there is no need to move the cursor. */
1103
1104
	if (plan->asc) {
1105
		if (relative_position == BTR_PCUR_ON) {
1106
1107
			if (equal_position) {
1108
1109
				return(plan->stored_cursor_rec_processed);
1110
			}
1111
1112
			return(TRUE);
1113
		}
1114
1115
		ut_ad(relative_position == BTR_PCUR_AFTER
1116
		      || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
1117
1118
		return(FALSE);
1119
	}
1120
1121
	/* If the cursor is traveling downwards, and relative_position is
1122
1123
	(1) BTR_PCUR_BEFORE: btr_pcur_restore_position placed the cursor on
1124
	the last record LESS than the successor of a page infimum; we have not
1125
	processed the cursor record: no need to move the cursor;
1126
	(2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
1127
	first record GREATER than the predecessor of a page supremum; we have
1128
	processed the cursor record: we should move the cursor to the previous
1129
	record;
1130
	(3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
1131
	last record LESS or EQUAL to the old stored user record; (a) if
1132
	equal_position is FALSE, this means that the cursor is now on a record
1133
	less than the old user record, and we need not move to the previous
1134
	record; (b) if equal_position is TRUE, then if
1135
	plan->stored_cursor_rec_processed is TRUE, we must move to the previous
1136
	record, else there is no need to move the cursor. */
1137
1138
	if (relative_position == BTR_PCUR_BEFORE
1139
	    || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
1140
1141
		return(FALSE);
1142
	}
1143
1144
	if (relative_position == BTR_PCUR_ON) {
1145
1146
		if (equal_position) {
1147
1148
			return(plan->stored_cursor_rec_processed);
1149
		}
1150
1151
		return(FALSE);
1152
	}
1153
1154
	ut_ad(relative_position == BTR_PCUR_AFTER
1155
	      || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
1156
1157
	return(TRUE);
1158
}
1159
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1160
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1161
Resets a plan cursor to a closed state. */
1162
UNIV_INLINE
1163
void
1164
plan_reset_cursor(
1165
/*==============*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1166
	plan_t*	plan)	/*!< in: plan */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1167
{
1168
	plan->pcur_is_open = FALSE;
1169
	plan->cursor_at_end = FALSE;
1170
	plan->n_rows_fetched = 0;
1171
	plan->n_rows_prefetched = 0;
1172
}
1173
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1174
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1175
Tries to do a shortcut to fetch a clustered index record with a unique key,
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1176
using the hash index if possible (not always).
1177
@return	SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1178
static
1179
ulint
1180
row_sel_try_search_shortcut(
1181
/*========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1182
	sel_node_t*	node,	/*!< in: select node for a consistent read */
1183
	plan_t*		plan,	/*!< in: plan for a unique search in clustered
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1184
				index */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1185
	mtr_t*		mtr)	/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1186
{
1187
	dict_index_t*	index;
1188
	rec_t*		rec;
1189
	mem_heap_t*	heap		= NULL;
1190
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
1191
	ulint*		offsets		= offsets_;
1192
	ulint		ret;
1193
	rec_offs_init(offsets_);
1194
1195
	index = plan->index;
1196
1197
	ut_ad(node->read_view);
1198
	ut_ad(plan->unique_search);
1199
	ut_ad(!plan->must_get_clust);
1200
#ifdef UNIV_SYNC_DEBUG
1201
	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
1202
#endif /* UNIV_SYNC_DEBUG */
1203
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1204
	row_sel_open_pcur(plan, TRUE, mtr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1205
1206
	rec = btr_pcur_get_rec(&(plan->pcur));
1207
1208
	if (!page_rec_is_user_rec(rec)) {
1209
1210
		return(SEL_RETRY);
1211
	}
1212
1213
	ut_ad(plan->mode == PAGE_CUR_GE);
1214
1215
	/* As the cursor is now placed on a user record after a search with
1216
	the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
1217
	fields in the user record matched to the search tuple */
1218
1219
	if (btr_pcur_get_up_match(&(plan->pcur)) < plan->n_exact_match) {
1220
1221
		return(SEL_EXHAUSTED);
1222
	}
1223
1224
	/* This is a non-locking consistent read: if necessary, fetch
1225
	a previous version of the record */
1226
1227
	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
1228
1229
	if (dict_index_is_clust(index)) {
1230
		if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
1231
						   node->read_view)) {
1232
			ret = SEL_RETRY;
1233
			goto func_exit;
1234
		}
1235
	} else if (!lock_sec_rec_cons_read_sees(rec, node->read_view)) {
1236
1237
		ret = SEL_RETRY;
1238
		goto func_exit;
1239
	}
1240
1241
	/* Test the deleted flag. */
1242
1243
	if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))) {
1244
1245
		ret = SEL_EXHAUSTED;
1246
		goto func_exit;
1247
	}
1248
1249
	/* Fetch the columns needed in test conditions.  The index
1250
	record is protected by a page latch that was acquired when
1251
	plan->pcur was positioned.  The latch will not be released
1252
	until mtr_commit(mtr). */
1253
1254
	row_sel_fetch_columns(index, rec, offsets,
1255
			      UT_LIST_GET_FIRST(plan->columns));
1256
1257
	/* Test the rest of search conditions */
1258
1259
	if (!row_sel_test_other_conds(plan)) {
1260
1261
		ret = SEL_EXHAUSTED;
1262
		goto func_exit;
1263
	}
1264
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1265
	ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1266
1267
	plan->n_rows_fetched++;
1268
	ret = SEL_FOUND;
1269
func_exit:
1270
	if (UNIV_LIKELY_NULL(heap)) {
1271
		mem_heap_free(heap);
1272
	}
1273
	return(ret);
1274
}
1275
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1276
/*********************************************************************//**
1277
Performs a select step.
1278
@return	DB_SUCCESS or error code */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1279
static
1280
ulint
1281
row_sel(
1282
/*====*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1283
	sel_node_t*	node,	/*!< in: select node */
1284
	que_thr_t*	thr)	/*!< in: query thread */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1285
{
1286
	dict_index_t*	index;
1287
	plan_t*		plan;
1288
	mtr_t		mtr;
1289
	ibool		moved;
1290
	rec_t*		rec;
1291
	rec_t*		old_vers;
1292
	rec_t*		clust_rec;
1293
	ibool		search_latch_locked;
1294
	ibool		consistent_read;
1295
1296
	/* The following flag becomes TRUE when we are doing a
1297
	consistent read from a non-clustered index and we must look
1298
	at the clustered index to find out the previous delete mark
1299
	state of the non-clustered record: */
1300
1301
	ibool		cons_read_requires_clust_rec	= FALSE;
1302
	ulint		cost_counter			= 0;
1303
	ibool		cursor_just_opened;
1304
	ibool		must_go_to_next;
1305
	ibool		mtr_has_extra_clust_latch	= FALSE;
1306
	/* TRUE if the search was made using
1307
	a non-clustered index, and we had to
1308
	access the clustered record: now &mtr
1309
	contains a clustered index latch, and
1310
	&mtr must be committed before we move
1311
	to the next non-clustered record */
1312
	ulint		found_flag;
1313
	ulint		err;
1314
	mem_heap_t*	heap				= NULL;
1315
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
1316
	ulint*		offsets				= offsets_;
1317
	rec_offs_init(offsets_);
1318
1319
	ut_ad(thr->run_node == node);
1320
1321
	search_latch_locked = FALSE;
1322
1323
	if (node->read_view) {
1324
		/* In consistent reads, we try to do with the hash index and
1325
		not to use the buffer page get. This is to reduce memory bus
1326
		load resulting from semaphore operations. The search latch
1327
		will be s-locked when we access an index with a unique search
1328
		condition, but not locked when we access an index with a
1329
		less selective search condition. */
1330
1331
		consistent_read = TRUE;
1332
	} else {
1333
		consistent_read = FALSE;
1334
	}
1335
1336
table_loop:
1337
	/* TABLE LOOP
1338
	----------
1339
	This is the outer major loop in calculating a join. We come here when
1340
	node->fetch_table changes, and after adding a row to aggregate totals
1341
	and, of course, when this function is called. */
1342
1343
	ut_ad(mtr_has_extra_clust_latch == FALSE);
1344
1345
	plan = sel_node_get_nth_plan(node, node->fetch_table);
1346
	index = plan->index;
1347
1348
	if (plan->n_rows_prefetched > 0) {
1349
		sel_pop_prefetched_row(plan);
1350
1351
		goto next_table_no_mtr;
1352
	}
1353
1354
	if (plan->cursor_at_end) {
1355
		/* The cursor has already reached the result set end: no more
1356
		rows to process for this table cursor, as also the prefetch
1357
		stack was empty */
1358
1359
		ut_ad(plan->pcur_is_open);
1360
1361
		goto table_exhausted_no_mtr;
1362
	}
1363
1364
	/* Open a cursor to index, or restore an open cursor position */
1365
1366
	mtr_start(&mtr);
1367
1368
	if (consistent_read && plan->unique_search && !plan->pcur_is_open
1369
	    && !plan->must_get_clust
1370
	    && !plan->table->big_rows) {
1371
		if (!search_latch_locked) {
1372
			rw_lock_s_lock(&btr_search_latch);
1373
1374
			search_latch_locked = TRUE;
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1375
		} else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1376
1377
			/* There is an x-latch request waiting: release the
1378
			s-latch for a moment; as an s-latch here is often
1379
			kept for some 10 searches before being released,
1380
			a waiting x-latch request would block other threads
1381
			from acquiring an s-latch for a long time, lowering
1382
			performance significantly in multiprocessors. */
1383
1384
			rw_lock_s_unlock(&btr_search_latch);
1385
			rw_lock_s_lock(&btr_search_latch);
1386
		}
1387
1388
		found_flag = row_sel_try_search_shortcut(node, plan, &mtr);
1389
1390
		if (found_flag == SEL_FOUND) {
1391
1392
			goto next_table;
1393
1394
		} else if (found_flag == SEL_EXHAUSTED) {
1395
1396
			goto table_exhausted;
1397
		}
1398
1399
		ut_ad(found_flag == SEL_RETRY);
1400
1401
		plan_reset_cursor(plan);
1402
1403
		mtr_commit(&mtr);
1404
		mtr_start(&mtr);
1405
	}
1406
1407
	if (search_latch_locked) {
1408
		rw_lock_s_unlock(&btr_search_latch);
1409
1410
		search_latch_locked = FALSE;
1411
	}
1412
1413
	if (!plan->pcur_is_open) {
1414
		/* Evaluate the expressions to build the search tuple and
1415
		open the cursor */
1416
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1417
		row_sel_open_pcur(plan, search_latch_locked, &mtr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1418
1419
		cursor_just_opened = TRUE;
1420
1421
		/* A new search was made: increment the cost counter */
1422
		cost_counter++;
1423
	} else {
1424
		/* Restore pcur position to the index */
1425
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1426
		must_go_to_next = row_sel_restore_pcur_pos(plan, &mtr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1427
1428
		cursor_just_opened = FALSE;
1429
1430
		if (must_go_to_next) {
1431
			/* We have already processed the cursor record: move
1432
			to the next */
1433
1434
			goto next_rec;
1435
		}
1436
	}
1437
1438
rec_loop:
1439
	/* RECORD LOOP
1440
	-----------
1441
	In this loop we use pcur and try to fetch a qualifying row, and
1442
	also fill the prefetch buffer for this table if n_rows_fetched has
1443
	exceeded a threshold. While we are inside this loop, the following
1444
	holds:
1445
	(1) &mtr is started,
1446
	(2) pcur is positioned and open.
1447
1448
	NOTE that if cursor_just_opened is TRUE here, it means that we came
1449
	to this point right after row_sel_open_pcur. */
1450
1451
	ut_ad(mtr_has_extra_clust_latch == FALSE);
1452
1453
	rec = btr_pcur_get_rec(&(plan->pcur));
1454
1455
	/* PHASE 1: Set a lock if specified */
1456
1457
	if (!node->asc && cursor_just_opened
1458
	    && !page_rec_is_supremum(rec)) {
1459
1460
		/* When we open a cursor for a descending search, we must set
1461
		a next-key lock on the successor record: otherwise it would
1462
		be possible to insert new records next to the cursor position,
1463
		and it might be that these new records should appear in the
1464
		search result set, resulting in the phantom problem. */
1465
1466
		if (!consistent_read) {
1467
1468
			/* If innodb_locks_unsafe_for_binlog option is used
1469
			or this session is using READ COMMITTED isolation
1470
			level, we lock only the record, i.e., next-key
1471
			locking is not used. */
1472
1473
			rec_t*	next_rec = page_rec_get_next(rec);
1474
			ulint	lock_type;
1475
			trx_t*	trx;
1476
1477
			trx = thr_get_trx(thr);
1478
1479
			offsets = rec_get_offsets(next_rec, index, offsets,
1480
						  ULINT_UNDEFINED, &heap);
1481
1482
			if (srv_locks_unsafe_for_binlog
1483
			    || trx->isolation_level
1819.7.86 by Vasil Dimov, Stewart Smith
Merge Revision revid:vasil.dimov@oracle.com-20100504105214-ljj5sy3bk21zl7og from MySQL InnoDB
1484
			    <= TRX_ISO_READ_COMMITTED) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1485
1486
				if (page_rec_is_supremum(next_rec)) {
1487
1488
					goto skip_lock;
1489
				}
1490
1491
				lock_type = LOCK_REC_NOT_GAP;
1492
			} else {
1493
				lock_type = LOCK_ORDINARY;
1494
			}
1495
1496
			err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
1497
					       next_rec, index, offsets,
1498
					       node->row_lock_mode,
1499
					       lock_type, thr);
1500
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
1501
			switch (err) {
1502
			case DB_SUCCESS_LOCKED_REC:
1503
				err = DB_SUCCESS;
1504
			case DB_SUCCESS:
1505
				break;
1506
			default:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1507
				/* Note that in this case we will store in pcur
1508
				the PREDECESSOR of the record we are waiting
1509
				the lock for */
1510
				goto lock_wait_or_error;
1511
			}
1512
		}
1513
	}
1514
1515
skip_lock:
1516
	if (page_rec_is_infimum(rec)) {
1517
1518
		/* The infimum record on a page cannot be in the result set,
1519
		and neither can a record lock be placed on it: we skip such
1520
		a record. We also increment the cost counter as we may have
1521
		processed yet another page of index. */
1522
1523
		cost_counter++;
1524
1525
		goto next_rec;
1526
	}
1527
1528
	if (!consistent_read) {
1529
		/* Try to place a lock on the index record */
1530
1531
		/* If innodb_locks_unsafe_for_binlog option is used
1532
		or this session is using READ COMMITTED isolation level,
1533
		we lock only the record, i.e., next-key locking is
1534
		not used. */
1535
1536
		ulint	lock_type;
1537
		trx_t*	trx;
1538
1539
		offsets = rec_get_offsets(rec, index, offsets,
1540
					  ULINT_UNDEFINED, &heap);
1541
1542
		trx = thr_get_trx(thr);
1543
1544
		if (srv_locks_unsafe_for_binlog
1819.7.86 by Vasil Dimov, Stewart Smith
Merge Revision revid:vasil.dimov@oracle.com-20100504105214-ljj5sy3bk21zl7og from MySQL InnoDB
1545
		    || trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1546
1547
			if (page_rec_is_supremum(rec)) {
1548
1549
				goto next_rec;
1550
			}
1551
1552
			lock_type = LOCK_REC_NOT_GAP;
1553
		} else {
1554
			lock_type = LOCK_ORDINARY;
1555
		}
1556
1557
		err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
1558
				       rec, index, offsets,
1559
				       node->row_lock_mode, lock_type, thr);
1560
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
1561
		switch (err) {
1562
		case DB_SUCCESS_LOCKED_REC:
1563
			err = DB_SUCCESS;
1564
		case DB_SUCCESS:
1565
			break;
1566
		default:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1567
			goto lock_wait_or_error;
1568
		}
1569
	}
1570
1571
	if (page_rec_is_supremum(rec)) {
1572
1573
		/* A page supremum record cannot be in the result set: skip
1574
		it now when we have placed a possible lock on it */
1575
1576
		goto next_rec;
1577
	}
1578
1579
	ut_ad(page_rec_is_user_rec(rec));
1580
1581
	if (cost_counter > SEL_COST_LIMIT) {
1582
1583
		/* Now that we have placed the necessary locks, we can stop
1584
		for a while and store the cursor position; NOTE that if we
1585
		would store the cursor position BEFORE placing a record lock,
1586
		it might happen that the cursor would jump over some records
1587
		that another transaction could meanwhile insert adjacent to
1588
		the cursor: this would result in the phantom problem. */
1589
1590
		goto stop_for_a_while;
1591
	}
1592
1593
	/* PHASE 2: Check a mixed index mix id if needed */
1594
1595
	if (plan->unique_search && cursor_just_opened) {
1596
1597
		ut_ad(plan->mode == PAGE_CUR_GE);
1598
1599
		/* As the cursor is now placed on a user record after a search
1600
		with the mode PAGE_CUR_GE, the up_match field in the cursor
1601
		tells how many fields in the user record matched to the search
1602
		tuple */
1603
1604
		if (btr_pcur_get_up_match(&(plan->pcur))
1605
		    < plan->n_exact_match) {
1606
			goto table_exhausted;
1607
		}
1608
1609
		/* Ok, no need to test end_conds or mix id */
1610
1611
	}
1612
1613
	/* We are ready to look at a possible new index entry in the result
1614
	set: the cursor is now placed on a user record */
1615
1616
	/* PHASE 3: Get previous version in a consistent read */
1617
1618
	cons_read_requires_clust_rec = FALSE;
1619
	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
1620
1621
	if (consistent_read) {
1622
		/* This is a non-locking consistent read: if necessary, fetch
1623
		a previous version of the record */
1624
1625
		if (dict_index_is_clust(index)) {
1626
1627
			if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
1628
							   node->read_view)) {
1629
1630
				err = row_sel_build_prev_vers(
1631
					node->read_view, index, rec,
1632
					&offsets, &heap, &plan->old_vers_heap,
1633
					&old_vers, &mtr);
1634
1635
				if (err != DB_SUCCESS) {
1636
1637
					goto lock_wait_or_error;
1638
				}
1639
1640
				if (old_vers == NULL) {
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
1641
					/* The record does not exist
1642
					in our read view. Skip it, but
1643
					first attempt to determine
1644
					whether the index segment we
1645
					are searching through has been
1646
					exhausted. */
1647
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1648
					offsets = rec_get_offsets(
1649
						rec, index, offsets,
1650
						ULINT_UNDEFINED, &heap);
1651
1652
					/* Fetch the columns needed in
1653
					test conditions. The clustered
1654
					index record is protected by a
1655
					page latch that was acquired
1656
					by row_sel_open_pcur() or
1657
					row_sel_restore_pcur_pos().
1658
					The latch will not be released
1659
					until mtr_commit(mtr). */
1660
1661
					row_sel_fetch_columns(
1662
						index, rec, offsets,
1663
						UT_LIST_GET_FIRST(
1664
							plan->columns));
1665
1666
					if (!row_sel_test_end_conds(plan)) {
1667
1668
						goto table_exhausted;
1669
					}
1670
1671
					goto next_rec;
1672
				}
1673
1674
				rec = old_vers;
1675
			}
1676
		} else if (!lock_sec_rec_cons_read_sees(rec,
1677
							node->read_view)) {
1678
			cons_read_requires_clust_rec = TRUE;
1679
		}
1680
	}
1681
1682
	/* PHASE 4: Test search end conditions and deleted flag */
1683
1684
	/* Fetch the columns needed in test conditions.  The record is
1685
	protected by a page latch that was acquired by
1686
	row_sel_open_pcur() or row_sel_restore_pcur_pos().  The latch
1687
	will not be released until mtr_commit(mtr). */
1688
1689
	row_sel_fetch_columns(index, rec, offsets,
1690
			      UT_LIST_GET_FIRST(plan->columns));
1691
1692
	/* Test the selection end conditions: these can only contain columns
1693
	which already are found in the index, even though the index might be
1694
	non-clustered */
1695
1696
	if (plan->unique_search && cursor_just_opened) {
1697
1698
		/* No test necessary: the test was already made above */
1699
1700
	} else if (!row_sel_test_end_conds(plan)) {
1701
1702
		goto table_exhausted;
1703
	}
1704
1705
	if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))
1706
	    && !cons_read_requires_clust_rec) {
1707
1708
		/* The record is delete marked: we can skip it if this is
1709
		not a consistent read which might see an earlier version
1710
		of a non-clustered index record */
1711
1712
		if (plan->unique_search) {
1713
1714
			goto table_exhausted;
1715
		}
1716
1717
		goto next_rec;
1718
	}
1719
1720
	/* PHASE 5: Get the clustered index record, if needed and if we did
1721
	not do the search using the clustered index */
1722
1723
	if (plan->must_get_clust || cons_read_requires_clust_rec) {
1724
1725
		/* It was a non-clustered index and we must fetch also the
1726
		clustered index record */
1727
1728
		err = row_sel_get_clust_rec(node, plan, rec, thr, &clust_rec,
1729
					    &mtr);
1730
		mtr_has_extra_clust_latch = TRUE;
1731
1819.9.9 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602103714-0nwxdqskeb1ihozj from MySQL InnoDB
1732
		switch (err) {
1733
		case DB_SUCCESS_LOCKED_REC:
1734
			err = DB_SUCCESS;
1735
		case DB_SUCCESS:
1736
			break;
1737
		default:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1738
			goto lock_wait_or_error;
1739
		}
1740
1741
		/* Retrieving the clustered record required a search:
1742
		increment the cost counter */
1743
1744
		cost_counter++;
1745
1746
		if (clust_rec == NULL) {
1747
			/* The record did not exist in the read view */
1748
			ut_ad(consistent_read);
1749
1750
			goto next_rec;
1751
		}
1752
1753
		if (rec_get_deleted_flag(clust_rec,
1754
					 dict_table_is_comp(plan->table))) {
1755
1756
			/* The record is delete marked: we can skip it */
1757
1758
			goto next_rec;
1759
		}
1760
1761
		if (node->can_get_updated) {
1762
1763
			btr_pcur_store_position(&(plan->clust_pcur), &mtr);
1764
		}
1765
	}
1766
1767
	/* PHASE 6: Test the rest of search conditions */
1768
1769
	if (!row_sel_test_other_conds(plan)) {
1770
1771
		if (plan->unique_search) {
1772
1773
			goto table_exhausted;
1774
		}
1775
1776
		goto next_rec;
1777
	}
1778
1779
	/* PHASE 7: We found a new qualifying row for the current table; push
1780
	the row if prefetch is on, or move to the next table in the join */
1781
1782
	plan->n_rows_fetched++;
1783
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1784
	ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1785
1786
	if ((plan->n_rows_fetched <= SEL_PREFETCH_LIMIT)
1787
	    || plan->unique_search || plan->no_prefetch
1788
	    || plan->table->big_rows) {
1789
1790
		/* No prefetch in operation: go to the next table */
1791
1792
		goto next_table;
1793
	}
1794
1795
	sel_push_prefetched_row(plan);
1796
1797
	if (plan->n_rows_prefetched == SEL_MAX_N_PREFETCH) {
1798
1799
		/* The prefetch buffer is now full */
1800
1801
		sel_pop_prefetched_row(plan);
1802
1803
		goto next_table;
1804
	}
1805
1806
next_rec:
1807
	ut_ad(!search_latch_locked);
1808
1809
	if (mtr_has_extra_clust_latch) {
1810
1811
		/* We must commit &mtr if we are moving to the next
1812
		non-clustered index record, because we could break the
1813
		latching order if we would access a different clustered
1814
		index page right away without releasing the previous. */
1815
1816
		goto commit_mtr_for_a_while;
1817
	}
1818
1819
	if (node->asc) {
1820
		moved = btr_pcur_move_to_next(&(plan->pcur), &mtr);
1821
	} else {
1822
		moved = btr_pcur_move_to_prev(&(plan->pcur), &mtr);
1823
	}
1824
1825
	if (!moved) {
1826
1827
		goto table_exhausted;
1828
	}
1829
1830
	cursor_just_opened = FALSE;
1831
1832
	/* END OF RECORD LOOP
1833
	------------------ */
1834
	goto rec_loop;
1835
1836
next_table:
1837
	/* We found a record which satisfies the conditions: we can move to
1838
	the next table or return a row in the result set */
1839
1840
	ut_ad(btr_pcur_is_on_user_rec(&plan->pcur));
1841
1842
	if (plan->unique_search && !node->can_get_updated) {
1843
1844
		plan->cursor_at_end = TRUE;
1845
	} else {
1846
		ut_ad(!search_latch_locked);
1847
1848
		plan->stored_cursor_rec_processed = TRUE;
1849
1850
		btr_pcur_store_position(&(plan->pcur), &mtr);
1851
	}
1852
1853
	mtr_commit(&mtr);
1854
1855
	mtr_has_extra_clust_latch = FALSE;
1856
1857
next_table_no_mtr:
1858
	/* If we use 'goto' to this label, it means that the row was popped
1859
	from the prefetched rows stack, and &mtr is already committed */
1860
1861
	if (node->fetch_table + 1 == node->n_tables) {
1862
1863
		sel_eval_select_list(node);
1864
1865
		if (node->is_aggregate) {
1866
1867
			goto table_loop;
1868
		}
1869
1870
		sel_assign_into_var_values(node->into_list, node);
1871
1872
		thr->run_node = que_node_get_parent(node);
1873
1874
		err = DB_SUCCESS;
1875
		goto func_exit;
1876
	}
1877
1878
	node->fetch_table++;
1879
1880
	/* When we move to the next table, we first reset the plan cursor:
1881
	we do not care about resetting it when we backtrack from a table */
1882
1883
	plan_reset_cursor(sel_node_get_nth_plan(node, node->fetch_table));
1884
1885
	goto table_loop;
1886
1887
table_exhausted:
1888
	/* The table cursor pcur reached the result set end: backtrack to the
1889
	previous table in the join if we do not have cached prefetched rows */
1890
1891
	plan->cursor_at_end = TRUE;
1892
1893
	mtr_commit(&mtr);
1894
1895
	mtr_has_extra_clust_latch = FALSE;
1896
1897
	if (plan->n_rows_prefetched > 0) {
1898
		/* The table became exhausted during a prefetch */
1899
1900
		sel_pop_prefetched_row(plan);
1901
1902
		goto next_table_no_mtr;
1903
	}
1904
1905
table_exhausted_no_mtr:
1906
	if (node->fetch_table == 0) {
1907
		err = DB_SUCCESS;
1908
1909
		if (node->is_aggregate && !node->aggregate_already_fetched) {
1910
1911
			node->aggregate_already_fetched = TRUE;
1912
1913
			sel_assign_into_var_values(node->into_list, node);
1914
1915
			thr->run_node = que_node_get_parent(node);
1916
		} else {
1917
			node->state = SEL_NODE_NO_MORE_ROWS;
1918
1919
			thr->run_node = que_node_get_parent(node);
1920
		}
1921
1819.9.9 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602103714-0nwxdqskeb1ihozj from MySQL InnoDB
1922
		err = DB_SUCCESS;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1923
		goto func_exit;
1924
	}
1925
1926
	node->fetch_table--;
1927
1928
	goto table_loop;
1929
1930
stop_for_a_while:
1931
	/* Return control for a while to que_run_threads, so that runaway
1932
	queries can be canceled. NOTE that when we come here, we must, in a
1933
	locking read, have placed the necessary (possibly waiting request)
1934
	record lock on the cursor record or its successor: when we reposition
1935
	the cursor, this record lock guarantees that nobody can meanwhile have
1936
	inserted new records which should have appeared in the result set,
1937
	which would result in the phantom problem. */
1938
1939
	ut_ad(!search_latch_locked);
1940
1941
	plan->stored_cursor_rec_processed = FALSE;
1942
	btr_pcur_store_position(&(plan->pcur), &mtr);
1943
1944
	mtr_commit(&mtr);
1945
1946
#ifdef UNIV_SYNC_DEBUG
1947
	ut_ad(sync_thread_levels_empty_gen(TRUE));
1948
#endif /* UNIV_SYNC_DEBUG */
1949
	err = DB_SUCCESS;
1950
	goto func_exit;
1951
1952
commit_mtr_for_a_while:
1953
	/* Stores the cursor position and commits &mtr; this is used if
1954
	&mtr may contain latches which would break the latching order if
1955
	&mtr would not be committed and the latches released. */
1956
1957
	plan->stored_cursor_rec_processed = TRUE;
1958
1959
	ut_ad(!search_latch_locked);
1960
	btr_pcur_store_position(&(plan->pcur), &mtr);
1961
1962
	mtr_commit(&mtr);
1963
1964
	mtr_has_extra_clust_latch = FALSE;
1965
1966
#ifdef UNIV_SYNC_DEBUG
1967
	ut_ad(sync_thread_levels_empty_gen(TRUE));
1968
#endif /* UNIV_SYNC_DEBUG */
1969
1970
	goto table_loop;
1971
1972
lock_wait_or_error:
1973
	/* See the note at stop_for_a_while: the same holds for this case */
1974
1975
	ut_ad(!btr_pcur_is_before_first_on_page(&plan->pcur) || !node->asc);
1976
	ut_ad(!search_latch_locked);
1977
1978
	plan->stored_cursor_rec_processed = FALSE;
1979
	btr_pcur_store_position(&(plan->pcur), &mtr);
1980
1981
	mtr_commit(&mtr);
1982
1983
#ifdef UNIV_SYNC_DEBUG
1984
	ut_ad(sync_thread_levels_empty_gen(TRUE));
1985
#endif /* UNIV_SYNC_DEBUG */
1986
1987
func_exit:
1988
	if (search_latch_locked) {
1989
		rw_lock_s_unlock(&btr_search_latch);
1990
	}
1991
	if (UNIV_LIKELY_NULL(heap)) {
1992
		mem_heap_free(heap);
1993
	}
1994
	return(err);
1995
}
1996
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1997
/**********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1998
Performs a select step. This is a high-level function used in SQL execution
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1999
graphs.
2000
@return	query thread to run next or NULL */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2001
UNIV_INTERN
2002
que_thr_t*
2003
row_sel_step(
2004
/*=========*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2005
	que_thr_t*	thr)	/*!< in: query thread */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2006
{
2007
	ulint		i_lock_mode;
2008
	sym_node_t*	table_node;
2009
	sel_node_t*	node;
2010
	ulint		err;
2011
2012
	ut_ad(thr);
2013
2014
	node = thr->run_node;
2015
2016
	ut_ad(que_node_get_type(node) == QUE_NODE_SELECT);
2017
2018
	/* If this is a new time this node is executed (or when execution
2019
	resumes after wait for a table intention lock), set intention locks
2020
	on the tables, or assign a read view */
2021
2022
	if (node->into_list && (thr->prev_node == que_node_get_parent(node))) {
2023
2024
		node->state = SEL_NODE_OPEN;
2025
	}
2026
2027
	if (node->state == SEL_NODE_OPEN) {
2028
2029
		/* It may be that the current session has not yet started
2030
		its transaction, or it has been committed: */
2031
2032
		trx_start_if_not_started(thr_get_trx(thr));
2033
2034
		plan_reset_cursor(sel_node_get_nth_plan(node, 0));
2035
2036
		if (node->consistent_read) {
2037
			/* Assign a read view for the query */
2038
			node->read_view = trx_assign_read_view(
2039
				thr_get_trx(thr));
2040
		} else {
2041
			if (node->set_x_locks) {
2042
				i_lock_mode = LOCK_IX;
2043
			} else {
2044
				i_lock_mode = LOCK_IS;
2045
			}
2046
2047
			table_node = node->table_list;
2048
2049
			while (table_node) {
2050
				err = lock_table(0, table_node->table,
2051
						 i_lock_mode, thr);
2052
				if (err != DB_SUCCESS) {
2053
					thr_get_trx(thr)->error_state = err;
2054
2055
					return(NULL);
2056
				}
2057
2058
				table_node = que_node_get_next(table_node);
2059
			}
2060
		}
2061
2062
		/* If this is an explicit cursor, copy stored procedure
2063
		variable values, so that the values cannot change between
2064
		fetches (currently, we copy them also for non-explicit
2065
		cursors) */
2066
2067
		if (node->explicit_cursor
2068
		    && UT_LIST_GET_FIRST(node->copy_variables)) {
2069
2070
			row_sel_copy_input_variable_vals(node);
2071
		}
2072
2073
		node->state = SEL_NODE_FETCH;
2074
		node->fetch_table = 0;
2075
2076
		if (node->is_aggregate) {
2077
			/* Reset the aggregate total values */
2078
			sel_reset_aggregate_vals(node);
2079
		}
1819.9.9 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602103714-0nwxdqskeb1ihozj from MySQL InnoDB
2080
2081
		err = DB_SUCCESS;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2082
	}
2083
2084
	err = row_sel(node, thr);
2085
2086
	/* NOTE! if queries are parallelized, the following assignment may
2087
	have problems; the assignment should be made only if thr is the
2088
	only top-level thr in the graph: */
2089
2090
	thr->graph->last_sel_node = node;
2091
2092
	if (err != DB_SUCCESS) {
2093
		thr_get_trx(thr)->error_state = err;
2094
2095
		return(NULL);
2096
	}
2097
2098
	return(thr);
2099
}
2100
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2101
/**********************************************************************//**
2102
Performs a fetch for a cursor.
2103
@return	query thread to run next or NULL */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2104
UNIV_INTERN
2105
que_thr_t*
2106
fetch_step(
2107
/*=======*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2108
	que_thr_t*	thr)	/*!< in: query thread */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2109
{
2110
	sel_node_t*	sel_node;
2111
	fetch_node_t*	node;
2112
2113
	ut_ad(thr);
2114
2115
	node = thr->run_node;
2116
	sel_node = node->cursor_def;
2117
2118
	ut_ad(que_node_get_type(node) == QUE_NODE_FETCH);
2119
2120
	if (thr->prev_node != que_node_get_parent(node)) {
2121
2122
		if (sel_node->state != SEL_NODE_NO_MORE_ROWS) {
2123
2124
			if (node->into_list) {
2125
				sel_assign_into_var_values(node->into_list,
2126
							   sel_node);
2127
			} else {
2128
				void* ret = (*node->func->func)(
2129
					sel_node, node->func->arg);
2130
2131
				if (!ret) {
2132
					sel_node->state
2133
						= SEL_NODE_NO_MORE_ROWS;
2134
				}
2135
			}
2136
		}
2137
2138
		thr->run_node = que_node_get_parent(node);
2139
2140
		return(thr);
2141
	}
2142
2143
	/* Make the fetch node the parent of the cursor definition for
2144
	the time of the fetch, so that execution knows to return to this
2145
	fetch node after a row has been selected or we know that there is
2146
	no row left */
2147
2148
	sel_node->common.parent = node;
2149
2150
	if (sel_node->state == SEL_NODE_CLOSED) {
2151
		fprintf(stderr,
2152
			"InnoDB: Error: fetch called on a closed cursor\n");
2153
2154
		thr_get_trx(thr)->error_state = DB_ERROR;
2155
2156
		return(NULL);
2157
	}
2158
2159
	thr->run_node = sel_node;
2160
2161
	return(thr);
2162
}
2163
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2164
/****************************************************************//**
2165
Sample callback function for fetch that prints each row.
2166
@return	always returns non-NULL */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2167
UNIV_INTERN
2168
void*
2169
row_fetch_print(
2170
/*============*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2171
	void*	row,		/*!< in:  sel_node_t* */
2172
	void*	user_arg)	/*!< in:  not used */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2173
{
2174
	sel_node_t*	node = row;
2175
	que_node_t*	exp;
2176
	ulint		i = 0;
2177
2178
	UT_NOT_USED(user_arg);
2179
2180
	fprintf(stderr, "row_fetch_print: row %p\n", row);
2181
2182
	exp = node->select_list;
2183
2184
	while (exp) {
2185
		dfield_t*	dfield = que_node_get_val(exp);
2186
		const dtype_t*	type = dfield_get_type(dfield);
2187
2188
		fprintf(stderr, " column %lu:\n", (ulong)i);
2189
2190
		dtype_print(type);
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
2191
		putc('\n', stderr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2192
2193
		if (dfield_get_len(dfield) != UNIV_SQL_NULL) {
2194
			ut_print_buf(stderr, dfield_get_data(dfield),
2195
				     dfield_get_len(dfield));
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
2196
			putc('\n', stderr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2197
		} else {
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
2198
			fputs(" <NULL>;\n", stderr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2199
		}
2200
2201
		exp = que_node_get_next(exp);
2202
		i++;
2203
	}
2204
2205
	return((void*)42);
2206
}
2207
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2208
/***********************************************************//**
2209
Prints a row in a select result.
2210
@return	query thread to run next or NULL */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2211
UNIV_INTERN
2212
que_thr_t*
2213
row_printf_step(
2214
/*============*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2215
	que_thr_t*	thr)	/*!< in: query thread */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2216
{
2217
	row_printf_node_t*	node;
2218
	sel_node_t*		sel_node;
2219
	que_node_t*		arg;
2220
2221
	ut_ad(thr);
2222
2223
	node = thr->run_node;
2224
2225
	sel_node = node->sel_node;
2226
2227
	ut_ad(que_node_get_type(node) == QUE_NODE_ROW_PRINTF);
2228
2229
	if (thr->prev_node == que_node_get_parent(node)) {
2230
2231
		/* Reset the cursor */
2232
		sel_node->state = SEL_NODE_OPEN;
2233
2234
		/* Fetch next row to print */
2235
2236
		thr->run_node = sel_node;
2237
2238
		return(thr);
2239
	}
2240
2241
	if (sel_node->state != SEL_NODE_FETCH) {
2242
2243
		ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
2244
2245
		/* No more rows to print */
2246
2247
		thr->run_node = que_node_get_parent(node);
2248
2249
		return(thr);
2250
	}
2251
2252
	arg = sel_node->select_list;
2253
2254
	while (arg) {
2255
		dfield_print_also_hex(que_node_get_val(arg));
2256
2257
		fputs(" ::: ", stderr);
2258
2259
		arg = que_node_get_next(arg);
2260
	}
2261
2262
	putc('\n', stderr);
2263
2264
	/* Fetch next row to print */
2265
2266
	thr->run_node = sel_node;
2267
2268
	return(thr);
2269
}
2270
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2271
/****************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2272
Converts a key value stored in MySQL format to an Innobase dtuple. The last
2273
field of the key value may be just a prefix of a fixed length field: hence
2274
the parameter key_len. But currently we do not allow search keys where the
2275
last field is only a prefix of the full key field len and print a warning if
2276
such appears. A counterpart of this function is
2277
ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
2278
UNIV_INTERN
2279
void
2280
row_sel_convert_mysql_key_to_innobase(
2281
/*==================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2282
	dtuple_t*	tuple,		/*!< in/out: tuple where to build;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2283
					NOTE: we assume that the type info
2284
					in the tuple is already according
2285
					to index! */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2286
	byte*		buf,		/*!< in: buffer to use in field
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2287
					conversions */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2288
	ulint		buf_len,	/*!< in: buffer length */
2289
	dict_index_t*	index,		/*!< in: index of the key value */
2290
	const byte*	key_ptr,	/*!< in: MySQL key value */
2291
	ulint		key_len,	/*!< in: MySQL key value length */
2292
	trx_t*		trx)		/*!< in: transaction */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2293
{
2294
	byte*		original_buf	= buf;
2295
	const byte*	original_key_ptr = key_ptr;
2296
	dict_field_t*	field;
2297
	dfield_t*	dfield;
2298
	ulint		data_offset;
2299
	ulint		data_len;
2300
	ulint		data_field_len;
2301
	ibool		is_null;
2302
	const byte*	key_end;
2303
	ulint		n_fields = 0;
2304
2305
	/* For documentation of the key value storage format in MySQL, see
2306
	ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
2307
2308
	key_end = key_ptr + key_len;
2309
2310
	/* Permit us to access any field in the tuple (ULINT_MAX): */
2311
2312
	dtuple_set_n_fields(tuple, ULINT_MAX);
2313
2314
	dfield = dtuple_get_nth_field(tuple, 0);
2315
	field = dict_index_get_nth_field(index, 0);
2316
2317
	if (UNIV_UNLIKELY(dfield_get_type(dfield)->mtype == DATA_SYS)) {
2318
		/* A special case: we are looking for a position in the
2319
		generated clustered index which InnoDB automatically added
2320
		to a table with no primary key: the first and the only
2321
		ordering column is ROW_ID which InnoDB stored to the key_ptr
2322
		buffer. */
2323
2324
		ut_a(key_len == DATA_ROW_ID_LEN);
2325
2326
		dfield_set_data(dfield, key_ptr, DATA_ROW_ID_LEN);
2327
2328
		dtuple_set_n_fields(tuple, 1);
2329
2330
		return;
2331
	}
2332
2333
	while (key_ptr < key_end) {
2334
2335
		ulint	type = dfield_get_type(dfield)->mtype;
2336
		ut_a(field->col->mtype == type);
2337
2338
		data_offset = 0;
2339
		is_null = FALSE;
2340
2341
		if (!(dfield_get_type(dfield)->prtype & DATA_NOT_NULL)) {
2342
			/* The first byte in the field tells if this is
2343
			an SQL NULL value */
2344
2345
			data_offset = 1;
2346
2347
			if (*key_ptr != 0) {
2348
				dfield_set_null(dfield);
2349
2350
				is_null = TRUE;
2351
			}
2352
		}
2353
2354
		/* Calculate data length and data field total length */
2355
2356
		if (type == DATA_BLOB) {
2357
			/* The key field is a column prefix of a BLOB or
2358
			TEXT */
2359
2360
			ut_a(field->prefix_len > 0);
2361
2362
			/* MySQL stores the actual data length to the first 2
2363
			bytes after the optional SQL NULL marker byte. The
2364
			storage format is little-endian, that is, the most
2365
			significant byte at a higher address. In UTF-8, MySQL
2366
			seems to reserve field->prefix_len bytes for
2367
			storing this field in the key value buffer, even
2368
			though the actual value only takes data_len bytes
2369
			from the start. */
2370
2371
			data_len = key_ptr[data_offset]
2372
				+ 256 * key_ptr[data_offset + 1];
2373
			data_field_len = data_offset + 2 + field->prefix_len;
2374
2375
			data_offset += 2;
2376
2377
			/* Now that we know the length, we store the column
2378
			value like it would be a fixed char field */
2379
2380
		} else if (field->prefix_len > 0) {
2381
			/* Looks like MySQL pads unused end bytes in the
2382
			prefix with space. Therefore, also in UTF-8, it is ok
2383
			to compare with a prefix containing full prefix_len
2384
			bytes, and no need to take at most prefix_len / 3
2385
			UTF-8 characters from the start.
2386
			If the prefix is used as the upper end of a LIKE
2387
			'abc%' query, then MySQL pads the end with chars
2388
			0xff. TODO: in that case does it any harm to compare
2389
			with the full prefix_len bytes. How do characters
2390
			0xff in UTF-8 behave? */
2391
2392
			data_len = field->prefix_len;
2393
			data_field_len = data_offset + data_len;
2394
		} else {
2395
			data_len = dfield_get_type(dfield)->len;
2396
			data_field_len = data_offset + data_len;
2397
		}
2398
2399
		if (UNIV_UNLIKELY
2400
		    (dtype_get_mysql_type(dfield_get_type(dfield))
2401
		     == DATA_MYSQL_TRUE_VARCHAR)
2402
		    && UNIV_LIKELY(type != DATA_INT)) {
2403
			/* In a MySQL key value format, a true VARCHAR is
2404
			always preceded by 2 bytes of a length field.
2405
			dfield_get_type(dfield)->len returns the maximum
2406
			'payload' len in bytes. That does not include the
2407
			2 bytes that tell the actual data length.
2408
2409
			We added the check != DATA_INT to make sure we do
2410
			not treat MySQL ENUM or SET as a true VARCHAR! */
2411
2412
			data_len += 2;
2413
			data_field_len += 2;
2414
		}
2415
2416
		/* Storing may use at most data_len bytes of buf */
2417
2418
		if (UNIV_LIKELY(!is_null)) {
2419
			row_mysql_store_col_in_innobase_format(
2420
				dfield, buf,
2421
				FALSE, /* MySQL key value format col */
2422
				key_ptr + data_offset, data_len,
2423
				dict_table_is_comp(index->table));
2424
			buf += data_len;
2425
		}
2426
2427
		key_ptr += data_field_len;
2428
2429
		if (UNIV_UNLIKELY(key_ptr > key_end)) {
2430
			/* The last field in key was not a complete key field
2431
			but a prefix of it.
2432
2433
			Print a warning about this! HA_READ_PREFIX_LAST does
2434
			not currently work in InnoDB with partial-field key
2435
			value prefixes. Since MySQL currently uses a padding
2436
			trick to calculate LIKE 'abc%' type queries there
2437
			should never be partial-field prefixes in searches. */
2438
2439
			ut_print_timestamp(stderr);
2440
2441
			fputs("  InnoDB: Warning: using a partial-field"
2442
			      " key prefix in search.\n"
2443
			      "InnoDB: ", stderr);
2444
			dict_index_name_print(stderr, trx, index);
2445
			fprintf(stderr, ". Last data field length %lu bytes,\n"
2446
				"InnoDB: key ptr now exceeds"
2447
				" key end by %lu bytes.\n"
2448
				"InnoDB: Key value in the MySQL format:\n",
2449
				(ulong) data_field_len,
2450
				(ulong) (key_ptr - key_end));
2451
			fflush(stderr);
2452
			ut_print_buf(stderr, original_key_ptr, key_len);
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
2453
			putc('\n', stderr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2454
2455
			if (!is_null) {
2456
				ulint	len = dfield_get_len(dfield);
2457
				dfield_set_len(dfield, len
2458
					       - (ulint) (key_ptr - key_end));
2459
			}
2460
		}
2461
2462
		n_fields++;
2463
		field++;
2464
		dfield++;
2465
	}
2466
2467
	ut_a(buf <= original_buf + buf_len);
2468
2469
	/* We set the length of tuple to n_fields: we assume that the memory
2470
	area allocated for it is big enough (usually bigger than n_fields). */
2471
2472
	dtuple_set_n_fields(tuple, n_fields);
2473
}
2474
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2475
/**************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2476
Stores the row id to the prebuilt struct. */
2477
static
2478
void
2479
row_sel_store_row_id_to_prebuilt(
2480
/*=============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2481
	row_prebuilt_t*		prebuilt,	/*!< in/out: prebuilt */
2482
	const rec_t*		index_rec,	/*!< in: record */
2483
	const dict_index_t*	index,		/*!< in: index of the record */
2484
	const ulint*		offsets)	/*!< in: rec_get_offsets
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2485
						(index_rec, index) */
2486
{
2487
	const byte*	data;
2488
	ulint		len;
2489
2490
	ut_ad(rec_offs_validate(index_rec, index, offsets));
2491
2492
	data = rec_get_nth_field(
2493
		index_rec, offsets,
2494
		dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
2495
2496
	if (UNIV_UNLIKELY(len != DATA_ROW_ID_LEN)) {
2497
		fprintf(stderr,
2498
			"InnoDB: Error: Row id field is"
2499
			" wrong length %lu in ", (ulong) len);
2500
		dict_index_name_print(stderr, prebuilt->trx, index);
2501
		fprintf(stderr, "\n"
2502
			"InnoDB: Field number %lu, record:\n",
2503
			(ulong) dict_index_get_sys_col_pos(index,
2504
							   DATA_ROW_ID));
2505
		rec_print_new(stderr, index_rec, offsets);
2506
		putc('\n', stderr);
2507
		ut_error;
2508
	}
2509
2510
	ut_memcpy(prebuilt->row_id, data, len);
2511
}
2512
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2513
/**************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2514
Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
2515
function is row_mysql_store_col_in_innobase_format() in row0mysql.c. */
2516
static
2517
void
2518
row_sel_field_store_in_mysql_format(
2519
/*================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2520
	byte*		dest,	/*!< in/out: buffer where to store; NOTE
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2521
				that BLOBs are not in themselves
2522
				stored here: the caller must allocate
2523
				and copy the BLOB into buffer before,
2524
				and pass the pointer to the BLOB in
2525
				'data' */
2526
	const mysql_row_templ_t* templ,
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2527
				/*!< in: MySQL column template.
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2528
				Its following fields are referenced:
2529
				type, is_unsigned, mysql_col_len,
2530
				mbminlen, mbmaxlen */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2531
	const byte*	data,	/*!< in: data to store */
2532
	ulint		len)	/*!< in: length of the data */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2533
{
2534
	byte*	ptr;
2535
2536
	ut_ad(len != UNIV_SQL_NULL);
1819.7.95 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100505101406-u4low2x26q6itck0 from MySQL InnoDB
2537
	UNIV_MEM_ASSERT_RW(data, len);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2538
2539
	switch (templ->type) {
1819.9.36 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg from MySQL InnoDB
2540
		const byte*	field_end;
2541
		byte*		pad;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2542
	case DATA_INT:
2543
		/* Convert integer data from Innobase to a little-endian
2544
		format, sign bit restored to normal */
2545
2546
		ptr = dest + len;
2547
2548
		for (;;) {
2549
			ptr--;
2550
			*ptr = *data;
2551
			if (ptr == dest) {
2552
				break;
2553
			}
2554
			data++;
2555
		}
2556
2557
		if (!templ->is_unsigned) {
2558
			dest[len - 1] = (byte) (dest[len - 1] ^ 128);
2559
		}
2560
2561
		ut_ad(templ->mysql_col_len == len);
2562
		break;
2563
2564
	case DATA_VARCHAR:
2565
	case DATA_VARMYSQL:
2566
	case DATA_BINARY:
2567
		field_end = dest + templ->mysql_col_len;
2568
2569
		if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
2570
			/* This is a >= 5.0.3 type true VARCHAR. Store the
2571
			length of the data to the first byte or the first
2572
			two bytes of dest. */
2573
2574
			dest = row_mysql_store_true_var_len(
2575
				dest, len, templ->mysql_length_bytes);
2576
		}
2577
2578
		/* Copy the actual data */
2579
		ut_memcpy(dest, data, len);
2580
2581
		/* Pad with trailing spaces. We pad with spaces also the
2582
		unused end of a >= 5.0.3 true VARCHAR column, just in case
2583
		MySQL expects its contents to be deterministic. */
2584
1819.9.36 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg from MySQL InnoDB
2585
		pad = dest + len;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2586
2587
		ut_ad(templ->mbminlen <= templ->mbmaxlen);
2588
1819.9.36 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg from MySQL InnoDB
2589
		/* We treat some Unicode charset strings specially. */
2590
		switch (templ->mbminlen) {
2591
		case 4:
2592
			/* InnoDB should never have stripped partial
2593
			UTF-32 characters. */
2594
			ut_a(!(len & 3));
2595
			break;
2596
		case 2:
2597
			/* A space char is two bytes,
2598
			0x0020 in UCS2 and UTF-16 */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2599
1819.9.36 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg from MySQL InnoDB
2600
			if (UNIV_UNLIKELY(len & 1)) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2601
				/* A 0x20 has been stripped from the column.
2602
				Pad it back. */
2603
1819.9.36 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg from MySQL InnoDB
2604
				if (pad < field_end) {
2605
					*pad++ = 0x20;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2606
				}
2607
			}
2608
		}
1819.9.36 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg from MySQL InnoDB
2609
2610
		row_mysql_pad_col(templ->mbminlen, pad, field_end - pad);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2611
		break;
2612
2613
	case DATA_BLOB:
2614
		/* Store a pointer to the BLOB buffer to dest: the BLOB was
2615
		already copied to the buffer in row_sel_store_mysql_rec */
2616
2617
		row_mysql_store_blob_ref(dest, templ->mysql_col_len, data,
2618
					 len);
2619
		break;
2620
2621
	case DATA_MYSQL:
2622
		memcpy(dest, data, len);
2623
2624
		ut_ad(templ->mysql_col_len >= len);
2625
		ut_ad(templ->mbmaxlen >= templ->mbminlen);
2626
2627
		ut_ad(templ->mbmaxlen > templ->mbminlen
2628
		      || templ->mysql_col_len == len);
2629
		/* The following assertion would fail for old tables
2630
		containing UTF-8 ENUM columns due to Bug #9526. */
2631
		ut_ad(!templ->mbmaxlen
2632
		      || !(templ->mysql_col_len % templ->mbmaxlen));
2633
		ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len);
2634
1819.9.36 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg from MySQL InnoDB
2635
		if (templ->mbminlen == 1 && templ->mbmaxlen != 1) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2636
			/* Pad with spaces. This undoes the stripping
1819.9.36 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg from MySQL InnoDB
2637
			done in row0mysql.c, function
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2638
			row_mysql_store_col_in_innobase_format(). */
2639
2640
			memset(dest + len, 0x20, templ->mysql_col_len - len);
2641
		}
2642
		break;
2643
2644
	default:
2645
#ifdef UNIV_DEBUG
2646
	case DATA_SYS_CHILD:
2647
	case DATA_SYS:
2648
		/* These column types should never be shipped to MySQL. */
2649
		ut_ad(0);
2650
2651
	case DATA_CHAR:
2652
	case DATA_FIXBINARY:
2653
	case DATA_FLOAT:
2654
	case DATA_DOUBLE:
2655
	case DATA_DECIMAL:
2656
		/* Above are the valid column types for MySQL data. */
2657
#endif /* UNIV_DEBUG */
2658
		ut_ad(templ->mysql_col_len == len);
2659
		memcpy(dest, data, len);
2660
	}
2661
}
2662
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2663
/**************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2664
Convert a row in the Innobase format to a row in the MySQL format.
2665
Note that the template in prebuilt may advise us to copy only a few
2666
columns to mysql_rec, other columns are left blank. All columns may not
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2667
be needed in the query.
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
2668
@return TRUE on success, FALSE if not all columns could be retrieved */
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
2669
static
1819.8.2 by Stewart Smith
only warn_unused_result in innobase if GNUC
2670
#ifdef __GNUC__
2671
 __attribute__((warn_unused_result))
2672
#endif
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2673
ibool
2674
row_sel_store_mysql_rec(
2675
/*====================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2676
	byte*		mysql_rec,	/*!< out: row in the MySQL format */
2677
	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct */
2678
	const rec_t*	rec,		/*!< in: Innobase record in the index
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2679
					which was described in prebuilt's
2680
					template; must be protected by
2681
					a page latch */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2682
	const ulint*	offsets)	/*!< in: array returned by
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2683
					rec_get_offsets() */
2684
{
2685
	mysql_row_templ_t*	templ;
2686
	mem_heap_t*		extern_field_heap	= NULL;
2687
	mem_heap_t*		heap;
2688
	const byte*		data;
2689
	ulint			len;
2690
	ulint			i;
2691
2692
	ut_ad(prebuilt->mysql_template);
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
2693
	ut_ad(prebuilt->default_rec);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2694
	ut_ad(rec_offs_validate(rec, NULL, offsets));
2695
2696
	if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
2697
		mem_heap_free(prebuilt->blob_heap);
2698
		prebuilt->blob_heap = NULL;
2699
	}
2700
1114.1.1 by Monty Taylor
Merged InnoDB Plugin 1.0.4
2701
	for (i = 0; i < prebuilt->n_template ; i++) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2702
2703
		templ = prebuilt->mysql_template + i;
2704
2705
		if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
2706
						      templ->rec_field_no))) {
2707
2708
			/* Copy an externally stored field to the temporary
2709
			heap */
2710
2711
			ut_a(!prebuilt->trx->has_search_latch);
2712
2713
			if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) {
2714
				if (prebuilt->blob_heap == NULL) {
2715
					prebuilt->blob_heap = mem_heap_create(
2716
						UNIV_PAGE_SIZE);
2717
				}
2718
2719
				heap = prebuilt->blob_heap;
2720
			} else {
2721
				extern_field_heap
2722
					= mem_heap_create(UNIV_PAGE_SIZE);
2723
2724
				heap = extern_field_heap;
2725
			}
2726
2727
			/* NOTE: if we are retrieving a big BLOB, we may
2728
			already run out of memory in the next call, which
2729
			causes an assert */
2730
2731
			data = btr_rec_copy_externally_stored_field(
2732
				rec, offsets,
2733
				dict_table_zip_size(prebuilt->table),
2734
				templ->rec_field_no, &len, heap);
2735
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
2736
			if (UNIV_UNLIKELY(!data)) {
2737
				/* The externally stored field
1819.7.62 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100630093149-wmc37t128gic933v from MySQL InnoDB
2738
				was not written yet. This
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
2739
				record should only be seen by
2740
				recv_recovery_rollback_active()
2741
				or any TRX_ISO_READ_UNCOMMITTED
2742
				transactions. */
2743
2744
				if (extern_field_heap) {
2745
					mem_heap_free(extern_field_heap);
2746
				}
2747
2748
				return(FALSE);
2749
			}
2750
1819.9.37 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629131907-epjs6h2rv457h7qv from MySQL InnoDB
2751
			if (UNIV_UNLIKELY(!data)) {
2752
				/* The externally stored field
1819.9.40 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100630095241-bietgukdy2g6k7x4 from MySQL InnoDB
2753
				was not written yet. This
1819.9.37 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629131907-epjs6h2rv457h7qv from MySQL InnoDB
2754
				record should only be seen by
2755
				recv_recovery_rollback_active()
2756
				or any TRX_ISO_READ_UNCOMMITTED
2757
				transactions. */
2758
2759
				if (extern_field_heap) {
2760
					mem_heap_free(extern_field_heap);
2761
				}
2762
2763
				return(FALSE);
2764
			}
2765
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2766
			ut_a(len != UNIV_SQL_NULL);
2767
		} else {
2768
			/* Field is stored in the row. */
2769
2770
			data = rec_get_nth_field(rec, offsets,
2771
						 templ->rec_field_no, &len);
2772
2773
			if (UNIV_UNLIKELY(templ->type == DATA_BLOB)
2774
			    && len != UNIV_SQL_NULL) {
2775
2776
				/* It is a BLOB field locally stored in the
2777
				InnoDB record: we MUST copy its contents to
2778
				prebuilt->blob_heap here because later code
2779
				assumes all BLOB values have been copied to a
2780
				safe place. */
2781
2782
				if (prebuilt->blob_heap == NULL) {
2783
					prebuilt->blob_heap = mem_heap_create(
2784
						UNIV_PAGE_SIZE);
2785
				}
2786
2787
				data = memcpy(mem_heap_alloc(
2788
						prebuilt->blob_heap, len),
2789
						data, len);
2790
			}
2791
		}
2792
2793
		if (len != UNIV_SQL_NULL) {
2794
			row_sel_field_store_in_mysql_format(
2795
				mysql_rec + templ->mysql_col_offset,
2796
				templ, data, len);
2797
2798
			/* Cleanup */
2799
			if (extern_field_heap) {
2800
				mem_heap_free(extern_field_heap);
2801
				extern_field_heap = NULL;
2802
			}
2803
2804
			if (templ->mysql_null_bit_mask) {
2805
				/* It is a nullable column with a non-NULL
2806
				value */
2807
				mysql_rec[templ->mysql_null_byte_offset]
2808
					&= ~(byte) templ->mysql_null_bit_mask;
2809
			}
2810
		} else {
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
2811
			/* MySQL assumes that the field for an SQL
2812
			NULL value is set to the default value. */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2813
1819.7.95 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100505101406-u4low2x26q6itck0 from MySQL InnoDB
2814
			UNIV_MEM_ASSERT_RW(prebuilt->default_rec
2815
					   + templ->mysql_col_offset,
2816
					   templ->mysql_col_len);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2817
			mysql_rec[templ->mysql_null_byte_offset]
2818
				|= (byte) templ->mysql_null_bit_mask;
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
2819
			memcpy(mysql_rec + templ->mysql_col_offset,
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2820
			       (const byte*) prebuilt->default_rec
2821
			       + templ->mysql_col_offset,
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
2822
			       templ->mysql_col_len);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2823
		}
2824
	}
2825
2826
	return(TRUE);
2827
}
2828
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2829
/*********************************************************************//**
2830
Builds a previous version of a clustered index record for a consistent read
2831
@return	DB_SUCCESS or error code */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2832
static
2833
ulint
2834
row_sel_build_prev_vers_for_mysql(
2835
/*==============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2836
	read_view_t*	read_view,	/*!< in: read view */
2837
	dict_index_t*	clust_index,	/*!< in: clustered index */
2838
	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct */
2839
	const rec_t*	rec,		/*!< in: record in a clustered index */
2840
	ulint**		offsets,	/*!< in/out: offsets returned by
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2841
					rec_get_offsets(rec, clust_index) */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2842
	mem_heap_t**	offset_heap,	/*!< in/out: memory heap from which
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2843
					the offsets are allocated */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2844
	rec_t**		old_vers,	/*!< out: old version, or NULL if the
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2845
					record does not exist in the view:
2846
					i.e., it was freshly inserted
2847
					afterwards */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2848
	mtr_t*		mtr)		/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2849
{
2850
	ulint	err;
2851
2852
	if (prebuilt->old_vers_heap) {
2853
		mem_heap_empty(prebuilt->old_vers_heap);
2854
	} else {
2855
		prebuilt->old_vers_heap = mem_heap_create(200);
2856
	}
2857
2858
	err = row_vers_build_for_consistent_read(
2859
		rec, mtr, clust_index, offsets, read_view, offset_heap,
2860
		prebuilt->old_vers_heap, old_vers);
2861
	return(err);
2862
}
2863
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2864
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2865
Retrieves the clustered index record corresponding to a record in a
2866
non-clustered index. Does the necessary locking. Used in the MySQL
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2867
interface.
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
2868
@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2869
static
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
2870
enum db_err
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2871
row_sel_get_clust_rec_for_mysql(
2872
/*============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2873
	row_prebuilt_t*	prebuilt,/*!< in: prebuilt struct in the handle */
2874
	dict_index_t*	sec_index,/*!< in: secondary index where rec resides */
2875
	const rec_t*	rec,	/*!< in: record in a non-clustered index; if
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2876
				this is a locking read, then rec is not
2877
				allowed to be delete-marked, and that would
2878
				not make sense either */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2879
	que_thr_t*	thr,	/*!< in: query thread */
2880
	const rec_t**	out_rec,/*!< out: clustered record or an old version of
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2881
				it, NULL if the old version did not exist
2882
				in the read view, i.e., it was a fresh
2883
				inserted version */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2884
	ulint**		offsets,/*!< in: offsets returned by
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2885
				rec_get_offsets(rec, sec_index);
2886
				out: offsets returned by
2887
				rec_get_offsets(out_rec, clust_index) */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2888
	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2889
				the offsets are allocated */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2890
	mtr_t*		mtr)	/*!< in: mtr used to get access to the
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2891
				non-clustered record; the same mtr is used to
2892
				access the clustered index */
2893
{
2894
	dict_index_t*	clust_index;
2895
	const rec_t*	clust_rec;
2896
	rec_t*		old_vers;
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
2897
	enum db_err	err;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2898
	trx_t*		trx;
2899
2900
	*out_rec = NULL;
2901
	trx = thr_get_trx(thr);
2902
2903
	row_build_row_ref_in_tuple(prebuilt->clust_ref, rec,
2904
				   sec_index, *offsets, trx);
2905
2906
	clust_index = dict_table_get_first_index(sec_index->table);
2907
2908
	btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref,
2909
				   PAGE_CUR_LE, BTR_SEARCH_LEAF,
2910
				   prebuilt->clust_pcur, 0, mtr);
2911
2912
	clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur);
2913
2914
	prebuilt->clust_pcur->trx_if_known = trx;
2915
2916
	/* Note: only if the search ends up on a non-infimum record is the
2917
	low_match value the real match to the search tuple */
2918
2919
	if (!page_rec_is_user_rec(clust_rec)
2920
	    || btr_pcur_get_low_match(prebuilt->clust_pcur)
2921
	    < dict_index_get_n_unique(clust_index)) {
2922
2923
		/* In a rare case it is possible that no clust rec is found
2924
		for a delete-marked secondary index record: if in row0umod.c
2925
		in row_undo_mod_remove_clust_low() we have already removed
2926
		the clust rec, while purge is still cleaning and removing
2927
		secondary index records associated with earlier versions of
2928
		the clustered index record. In that case we know that the
2929
		clustered index record did not exist in the read view of
2930
		trx. */
2931
2932
		if (!rec_get_deleted_flag(rec,
2933
					  dict_table_is_comp(sec_index->table))
2934
		    || prebuilt->select_lock_type != LOCK_NONE) {
2935
			ut_print_timestamp(stderr);
2936
			fputs("  InnoDB: error clustered record"
2937
			      " for sec rec not found\n"
2938
			      "InnoDB: ", stderr);
2939
			dict_index_name_print(stderr, trx, sec_index);
2940
			fputs("\n"
2941
			      "InnoDB: sec index record ", stderr);
2942
			rec_print(stderr, rec, sec_index);
2943
			fputs("\n"
2944
			      "InnoDB: clust index record ", stderr);
2945
			rec_print(stderr, clust_rec, clust_index);
2946
			putc('\n', stderr);
2947
			trx_print(stderr, trx, 600);
2948
2949
			fputs("\n"
2950
			      "InnoDB: Submit a detailed bug report"
2951
			      " to http://bugs.mysql.com\n", stderr);
2952
		}
2953
2954
		clust_rec = NULL;
2955
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
2956
		err = DB_SUCCESS;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2957
		goto func_exit;
2958
	}
2959
2960
	*offsets = rec_get_offsets(clust_rec, clust_index, *offsets,
2961
				   ULINT_UNDEFINED, offset_heap);
2962
2963
	if (prebuilt->select_lock_type != LOCK_NONE) {
2964
		/* Try to place a lock on the index record; we are searching
2965
		the clust rec with a unique condition, hence
2966
		we set a LOCK_REC_NOT_GAP type lock */
2967
2968
		err = lock_clust_rec_read_check_and_lock(
2969
			0, btr_pcur_get_block(prebuilt->clust_pcur),
2970
			clust_rec, clust_index, *offsets,
2971
			prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr);
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
2972
		switch (err) {
2973
		case DB_SUCCESS:
2974
		case DB_SUCCESS_LOCKED_REC:
2975
			break;
2976
		default:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2977
			goto err_exit;
2978
		}
2979
	} else {
2980
		/* This is a non-locking consistent read: if necessary, fetch
2981
		a previous version of the record */
2982
2983
		old_vers = NULL;
2984
2985
		/* If the isolation level allows reading of uncommitted data,
2986
		then we never look for an earlier version */
2987
2988
		if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
2989
		    && !lock_clust_rec_cons_read_sees(
2990
			    clust_rec, clust_index, *offsets,
2991
			    trx->read_view)) {
2992
2993
			/* The following call returns 'offsets' associated with
2994
			'old_vers' */
2995
			err = row_sel_build_prev_vers_for_mysql(
2996
				trx->read_view, clust_index, prebuilt,
2997
				clust_rec, offsets, offset_heap, &old_vers,
2998
				mtr);
2999
3000
			if (err != DB_SUCCESS || old_vers == NULL) {
3001
3002
				goto err_exit;
3003
			}
3004
3005
			clust_rec = old_vers;
3006
		}
3007
3008
		/* If we had to go to an earlier version of row or the
3009
		secondary index record is delete marked, then it may be that
3010
		the secondary index record corresponding to clust_rec
3011
		(or old_vers) is not rec; in that case we must ignore
3012
		such row because in our snapshot rec would not have existed.
3013
		Remember that from rec we cannot see directly which transaction
3014
		id corresponds to it: we have to go to the clustered index
3015
		record. A query where we want to fetch all rows where
3016
		the secondary index value is in some interval would return
3017
		a wrong result if we would not drop rows which we come to
3018
		visit through secondary index records that would not really
3019
		exist in our snapshot. */
3020
3021
		if (clust_rec
3022
		    && (old_vers
1819.5.169 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6447 from MySQL InnoDB
3023
			|| trx->isolation_level <= TRX_ISO_READ_UNCOMMITTED
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3024
			|| rec_get_deleted_flag(rec, dict_table_is_comp(
3025
							sec_index->table)))
3026
		    && !row_sel_sec_rec_is_for_clust_rec(
3027
			    rec, sec_index, clust_rec, clust_index)) {
3028
			clust_rec = NULL;
3029
#ifdef UNIV_SEARCH_DEBUG
3030
		} else {
3031
			ut_a(clust_rec == NULL
3032
			     || row_sel_sec_rec_is_for_clust_rec(
3033
				     rec, sec_index, clust_rec, clust_index));
3034
#endif
3035
		}
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
3036
3037
		err = DB_SUCCESS;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3038
	}
3039
3040
func_exit:
3041
	*out_rec = clust_rec;
3042
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3043
	if (prebuilt->select_lock_type != LOCK_NONE) {
3044
		/* We may use the cursor in update or in unlock_row():
3045
		store its position */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3046
3047
		btr_pcur_store_position(prebuilt->clust_pcur, mtr);
3048
	}
3049
3050
err_exit:
3051
	return(err);
3052
}
3053
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3054
/********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3055
Restores cursor position after it has been stored. We have to take into
3056
account that the record cursor was positioned on may have been deleted.
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3057
Then we may have to move the cursor one step up or down.
3058
@return TRUE if we may need to process the record the cursor is now
3059
positioned on (i.e. we should not go to the next record yet) */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3060
static
3061
ibool
3062
sel_restore_position_for_mysql(
3063
/*===========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3064
	ibool*		same_user_rec,	/*!< out: TRUE if we were able to restore
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3065
					the cursor on a user record with the
3066
					same ordering prefix in in the
3067
					B-tree index */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3068
	ulint		latch_mode,	/*!< in: latch mode wished in
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3069
					restoration */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3070
	btr_pcur_t*	pcur,		/*!< in: cursor whose position
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3071
					has been stored */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3072
	ibool		moves_up,	/*!< in: TRUE if the cursor moves up
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3073
					in the index */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3074
	mtr_t*		mtr)		/*!< in: mtr; CAUTION: may commit
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3075
					mtr temporarily! */
3076
{
3077
	ibool	success;
3078
	ulint	relative_position;
3079
3080
	relative_position = pcur->rel_pos;
3081
3082
	success = btr_pcur_restore_position(latch_mode, pcur, mtr);
3083
3084
	*same_user_rec = success;
3085
3086
	if (relative_position == BTR_PCUR_ON) {
3087
		if (success) {
3088
			return(FALSE);
3089
		}
3090
3091
		if (moves_up) {
3092
			btr_pcur_move_to_next(pcur, mtr);
3093
		}
3094
3095
		return(TRUE);
3096
	}
3097
3098
	if (relative_position == BTR_PCUR_AFTER
3099
	    || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE) {
3100
3101
		if (moves_up) {
3102
			return(TRUE);
3103
		}
3104
3105
		if (btr_pcur_is_on_user_rec(pcur)) {
3106
			btr_pcur_move_to_prev(pcur, mtr);
3107
		}
3108
3109
		return(TRUE);
3110
	}
3111
3112
	ut_ad(relative_position == BTR_PCUR_BEFORE
3113
	      || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE);
3114
3115
	if (moves_up && btr_pcur_is_on_user_rec(pcur)) {
3116
		btr_pcur_move_to_next(pcur, mtr);
3117
	}
3118
3119
	return(TRUE);
3120
}
3121
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3122
/********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3123
Pops a cached row for MySQL from the fetch cache. */
3124
UNIV_INLINE
3125
void
3126
row_sel_pop_cached_row_for_mysql(
3127
/*=============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3128
	byte*		buf,		/*!< in/out: buffer where to copy the
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3129
					row */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3130
	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3131
{
3132
	ulint			i;
3133
	mysql_row_templ_t*	templ;
3134
	byte*			cached_rec;
3135
	ut_ad(prebuilt->n_fetch_cached > 0);
3136
	ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len);
3137
3138
	if (UNIV_UNLIKELY(prebuilt->keep_other_fields_on_keyread)) {
3139
		/* Copy cache record field by field, don't touch fields that
3140
		are not covered by current key */
3141
		cached_rec = prebuilt->fetch_cache[
3142
			prebuilt->fetch_cache_first];
3143
3144
		for (i = 0; i < prebuilt->n_template; i++) {
3145
			templ = prebuilt->mysql_template + i;
1819.7.95 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100505101406-u4low2x26q6itck0 from MySQL InnoDB
3146
#if 0 /* Some of the cached_rec may legitimately be uninitialized. */
3147
			UNIV_MEM_ASSERT_RW(cached_rec
3148
					   + templ->mysql_col_offset,
3149
					   templ->mysql_col_len);
3150
#endif
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3151
			ut_memcpy(buf + templ->mysql_col_offset,
3152
				  cached_rec + templ->mysql_col_offset,
3153
				  templ->mysql_col_len);
3154
			/* Copy NULL bit of the current field from cached_rec
3155
			to buf */
3156
			if (templ->mysql_null_bit_mask) {
3157
				buf[templ->mysql_null_byte_offset]
3158
					^= (buf[templ->mysql_null_byte_offset]
3159
					    ^ cached_rec[templ->mysql_null_byte_offset])
3160
					& (byte)templ->mysql_null_bit_mask;
3161
			}
3162
		}
3163
	}
3164
	else {
1819.7.95 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100505101406-u4low2x26q6itck0 from MySQL InnoDB
3165
#if 0 /* Some of the cached_rec may legitimately be uninitialized. */
3166
		UNIV_MEM_ASSERT_RW(prebuilt->fetch_cache
3167
				   [prebuilt->fetch_cache_first],
3168
				   prebuilt->mysql_prefix_len);
3169
#endif
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3170
		ut_memcpy(buf,
3171
			  prebuilt->fetch_cache[prebuilt->fetch_cache_first],
3172
			  prebuilt->mysql_prefix_len);
3173
	}
3174
	prebuilt->n_fetch_cached--;
3175
	prebuilt->fetch_cache_first++;
3176
3177
	if (prebuilt->n_fetch_cached == 0) {
3178
		prebuilt->fetch_cache_first = 0;
3179
	}
3180
}
3181
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3182
/********************************************************************//**
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
3183
Pushes a row for MySQL to the fetch cache.
3184
@return TRUE on success, FALSE if the record contains incomplete BLOBs */
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
3185
UNIV_INLINE
1819.8.2 by Stewart Smith
only warn_unused_result in innobase if GNUC
3186
#ifdef __GNUC__
3187
__attribute__((warn_unused_result))
3188
#endif
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
3189
ibool
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3190
row_sel_push_cache_row_for_mysql(
3191
/*=============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3192
	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct */
3193
	const rec_t*	rec,		/*!< in: record to push; must
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3194
					be protected by a page latch */
1114.1.1 by Monty Taylor
Merged InnoDB Plugin 1.0.4
3195
	const ulint*	offsets)	/*!<in: rec_get_offsets() */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3196
{
3197
	byte*	buf;
3198
	ulint	i;
3199
3200
	ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE);
3201
	ut_ad(rec_offs_validate(rec, NULL, offsets));
3202
	ut_a(!prebuilt->templ_contains_blob);
3203
3204
	if (prebuilt->fetch_cache[0] == NULL) {
3205
		/* Allocate memory for the fetch cache */
3206
3207
		for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
3208
3209
			/* A user has reported memory corruption in these
3210
			buffers in Linux. Put magic numbers there to help
3211
			to track a possible bug. */
3212
3213
			buf = mem_alloc(prebuilt->mysql_row_len + 8);
3214
3215
			prebuilt->fetch_cache[i] = buf + 4;
3216
3217
			mach_write_to_4(buf, ROW_PREBUILT_FETCH_MAGIC_N);
3218
			mach_write_to_4(buf + 4 + prebuilt->mysql_row_len,
3219
					ROW_PREBUILT_FETCH_MAGIC_N);
3220
		}
3221
	}
3222
3223
	ut_ad(prebuilt->fetch_cache_first == 0);
1819.7.95 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100505101406-u4low2x26q6itck0 from MySQL InnoDB
3224
	UNIV_MEM_INVALID(prebuilt->fetch_cache[prebuilt->n_fetch_cached],
3225
			 prebuilt->mysql_row_len);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3226
3227
	if (UNIV_UNLIKELY(!row_sel_store_mysql_rec(
3228
				  prebuilt->fetch_cache[
3229
					  prebuilt->n_fetch_cached],
3230
				  prebuilt, rec, offsets))) {
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
3231
		return(FALSE);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3232
	}
3233
3234
	prebuilt->n_fetch_cached++;
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
3235
	return(TRUE);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3236
}
3237
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3238
/*********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3239
Tries to do a shortcut to fetch a clustered index record with a unique key,
3240
using the hash index if possible (not always). We assume that the search
3241
mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx,
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3242
btr search latch has been locked in S-mode.
3243
@return	SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3244
static
3245
ulint
3246
row_sel_try_search_shortcut_for_mysql(
3247
/*==================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3248
	const rec_t**	out_rec,/*!< out: record if found */
3249
	row_prebuilt_t*	prebuilt,/*!< in: prebuilt struct */
3250
	ulint**		offsets,/*!< in/out: for rec_get_offsets(*out_rec) */
3251
	mem_heap_t**	heap,	/*!< in/out: heap for rec_get_offsets() */
3252
	mtr_t*		mtr)	/*!< in: started mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3253
{
3254
	dict_index_t*	index		= prebuilt->index;
3255
	const dtuple_t*	search_tuple	= prebuilt->search_tuple;
3256
	btr_pcur_t*	pcur		= prebuilt->pcur;
3257
	trx_t*		trx		= prebuilt->trx;
3258
	const rec_t*	rec;
3259
3260
	ut_ad(dict_index_is_clust(index));
3261
	ut_ad(!prebuilt->templ_contains_blob);
3262
1819.5.201 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6635 from MySQL InnoDB
3263
#ifndef UNIV_SEARCH_DEBUG
3264
	btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
3265
				   BTR_SEARCH_LEAF, pcur,
3266
				   RW_S_LATCH,
3267
				   mtr);
3268
#else /* UNIV_SEARCH_DEBUG */
1819.5.194 by Stewart Smith
fix gcc warning 'embedding a directive within macro arguments is not portable' in row0sel.c
3269
	btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
3270
				   BTR_SEARCH_LEAF, pcur,
3271
				   0,
3272
				   mtr);
1819.5.201 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6635 from MySQL InnoDB
3273
#endif /* UNIV_SEARCH_DEBUG */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3274
	rec = btr_pcur_get_rec(pcur);
3275
3276
	if (!page_rec_is_user_rec(rec)) {
3277
3278
		return(SEL_RETRY);
3279
	}
3280
3281
	/* As the cursor is now placed on a user record after a search with
3282
	the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
3283
	fields in the user record matched to the search tuple */
3284
3285
	if (btr_pcur_get_up_match(pcur) < dtuple_get_n_fields(search_tuple)) {
3286
3287
		return(SEL_EXHAUSTED);
3288
	}
3289
3290
	/* This is a non-locking consistent read: if necessary, fetch
3291
	a previous version of the record */
3292
3293
	*offsets = rec_get_offsets(rec, index, *offsets,
3294
				   ULINT_UNDEFINED, heap);
3295
3296
	if (!lock_clust_rec_cons_read_sees(rec, index,
3297
					   *offsets, trx->read_view)) {
3298
3299
		return(SEL_RETRY);
3300
	}
3301
3302
	if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))) {
3303
3304
		return(SEL_EXHAUSTED);
3305
	}
3306
3307
	*out_rec = rec;
3308
3309
	return(SEL_FOUND);
3310
}
3311
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3312
/********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3313
Searches for rows in the database. This is used in the interface to
3314
MySQL. This function opens a cursor, and also implements fetch next
3315
and fetch prev. NOTE that if we do a search with a full key value
3316
from a unique index (ROW_SEL_EXACT), then we will not store the cursor
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3317
position and fetch next or fetch prev must not be tried to the cursor!
3318
@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
3319
DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3320
UNIV_INTERN
3321
ulint
3322
row_search_for_mysql(
3323
/*=================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3324
	byte*		buf,		/*!< in/out: buffer for the fetched
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3325
					row in the MySQL format */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3326
	ulint		mode,		/*!< in: search mode PAGE_CUR_L, ... */
3327
	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct for the
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3328
					table handle; this contains the info
3329
					of search_tuple, index; if search
3330
					tuple contains 0 fields then we
3331
					position the cursor at the start or
3332
					the end of the index, depending on
3333
					'mode' */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3334
	ulint		match_mode,	/*!< in: 0 or ROW_SEL_EXACT or
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3335
					ROW_SEL_EXACT_PREFIX */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3336
	ulint		direction)	/*!< in: 0 or ROW_SEL_NEXT or
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3337
					ROW_SEL_PREV; NOTE: if this is != 0,
3338
					then prebuilt must have a pcur
3339
					with stored position! In opening of a
3340
					cursor 'direction' should be 0. */
3341
{
3342
	dict_index_t*	index		= prebuilt->index;
3343
	ibool		comp		= dict_table_is_comp(index->table);
3344
	const dtuple_t*	search_tuple	= prebuilt->search_tuple;
3345
	btr_pcur_t*	pcur		= prebuilt->pcur;
3346
	trx_t*		trx		= prebuilt->trx;
3347
	dict_index_t*	clust_index;
3348
	que_thr_t*	thr;
3349
	const rec_t*	rec;
3350
	const rec_t*	result_rec;
3351
	const rec_t*	clust_rec;
3352
	ulint		err				= DB_SUCCESS;
3353
	ibool		unique_search			= FALSE;
3354
	ibool		unique_search_from_clust_index	= FALSE;
3355
	ibool		mtr_has_extra_clust_latch	= FALSE;
3356
	ibool		moves_up			= FALSE;
3357
	ibool		set_also_gap_locks		= TRUE;
3358
	/* if the query is a plain locking SELECT, and the isolation level
3359
	is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */
3360
	ibool		did_semi_consistent_read	= FALSE;
3361
	/* if the returned record was locked and we did a semi-consistent
3362
	read (fetch the newest committed version), then this is set to
3363
	TRUE */
3364
#ifdef UNIV_SEARCH_DEBUG
3365
	ulint		cnt				= 0;
3366
#endif /* UNIV_SEARCH_DEBUG */
3367
	ulint		next_offs;
3368
	ibool		same_user_rec;
3369
	mtr_t		mtr;
3370
	mem_heap_t*	heap				= NULL;
3371
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
3372
	ulint*		offsets				= offsets_;
1819.9.153 by Jimmy Yang, Stewart Smith
Merge Revision revid:jimmy.yang@oracle.com-20101011123613-guz1qgdktywmel1g from MySQL InnoDB
3373
	ibool		table_lock_waited		= FALSE;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3374
3375
	rec_offs_init(offsets_);
3376
3377
	ut_ad(index && pcur && search_tuple);
3378
	ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
3379
3380
	if (UNIV_UNLIKELY(prebuilt->table->ibd_file_missing)) {
3381
		ut_print_timestamp(stderr);
3382
		fprintf(stderr, "  InnoDB: Error:\n"
3383
			"InnoDB: MySQL is trying to use a table handle"
3384
			" but the .ibd file for\n"
3385
			"InnoDB: table %s does not exist.\n"
3386
			"InnoDB: Have you deleted the .ibd file"
3387
			" from the database directory under\n"
3388
			"InnoDB: the MySQL datadir, or have you used"
3389
			" DISCARD TABLESPACE?\n"
3390
			"InnoDB: Look from\n"
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3391
			"InnoDB: " REFMAN "innodb-troubleshooting.html\n"
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3392
			"InnoDB: how you can resolve the problem.\n",
3393
			prebuilt->table->name);
3394
3395
		return(DB_ERROR);
3396
	}
3397
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3398
	if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
3399
3400
		return(DB_MISSING_HISTORY);
3401
	}
3402
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3403
	if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
3404
		fprintf(stderr,
3405
			"InnoDB: Error: trying to free a corrupt\n"
3406
			"InnoDB: table handle. Magic n %lu, table name ",
3407
			(ulong) prebuilt->magic_n);
3408
		ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
3409
		putc('\n', stderr);
3410
3411
		mem_analyze_corruption(prebuilt);
3412
3413
		ut_error;
3414
	}
3415
3416
#if 0
3417
	fprintf(stderr, "Match mode %lu\n search tuple ",
3418
		(ulong) match_mode);
3419
	dtuple_print(search_tuple);
3420
	fprintf(stderr, "N tables locked %lu\n",
3421
		(ulong) trx->mysql_n_tables_locked);
3422
#endif
3423
	/*-------------------------------------------------------------*/
3424
	/* PHASE 0: Release a possible s-latch we are holding on the
3425
	adaptive hash index latch if there is someone waiting behind */
3426
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
3427
	if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED)
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3428
	    && trx->has_search_latch) {
3429
3430
		/* There is an x-latch request on the adaptive hash index:
3431
		release the s-latch to reduce starvation and wait for
3432
		BTR_SEA_TIMEOUT rounds before trying to keep it again over
3433
		calls from MySQL */
3434
3435
		rw_lock_s_unlock(&btr_search_latch);
3436
		trx->has_search_latch = FALSE;
3437
3438
		trx->search_latch_timeout = BTR_SEA_TIMEOUT;
3439
	}
3440
3441
	/* Reset the new record lock info if srv_locks_unsafe_for_binlog
3442
	is set or session is using a READ COMMITED isolation level. Then
3443
	we are able to remove the record locks set here on an individual
3444
	row. */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3445
	prebuilt->new_rec_locks = 0;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3446
3447
	/*-------------------------------------------------------------*/
3448
	/* PHASE 1: Try to pop the row from the prefetch cache */
3449
3450
	if (UNIV_UNLIKELY(direction == 0)) {
3451
		trx->op_info = "starting index read";
3452
3453
		prebuilt->n_rows_fetched = 0;
3454
		prebuilt->n_fetch_cached = 0;
3455
		prebuilt->fetch_cache_first = 0;
3456
3457
		if (prebuilt->sel_graph == NULL) {
3458
			/* Build a dummy select query graph */
3459
			row_prebuild_sel_graph(prebuilt);
3460
		}
3461
	} else {
3462
		trx->op_info = "fetching rows";
3463
3464
		if (prebuilt->n_rows_fetched == 0) {
3465
			prebuilt->fetch_direction = direction;
3466
		}
3467
3468
		if (UNIV_UNLIKELY(direction != prebuilt->fetch_direction)) {
3469
			if (UNIV_UNLIKELY(prebuilt->n_fetch_cached > 0)) {
3470
				ut_error;
3471
				/* TODO: scrollable cursor: restore cursor to
3472
				the place of the latest returned row,
3473
				or better: prevent caching for a scroll
3474
				cursor! */
3475
			}
3476
3477
			prebuilt->n_rows_fetched = 0;
3478
			prebuilt->n_fetch_cached = 0;
3479
			prebuilt->fetch_cache_first = 0;
3480
3481
		} else if (UNIV_LIKELY(prebuilt->n_fetch_cached > 0)) {
3482
			row_sel_pop_cached_row_for_mysql(buf, prebuilt);
3483
3484
			prebuilt->n_rows_fetched++;
3485
3486
			srv_n_rows_read++;
3487
			err = DB_SUCCESS;
3488
			goto func_exit;
3489
		}
3490
3491
		if (prebuilt->fetch_cache_first > 0
3492
		    && prebuilt->fetch_cache_first < MYSQL_FETCH_CACHE_SIZE) {
3493
3494
			/* The previous returned row was popped from the fetch
3495
			cache, but the cache was not full at the time of the
3496
			popping: no more rows can exist in the result set */
3497
3498
			err = DB_RECORD_NOT_FOUND;
3499
			goto func_exit;
3500
		}
3501
3502
		prebuilt->n_rows_fetched++;
3503
3504
		if (prebuilt->n_rows_fetched > 1000000000) {
3505
			/* Prevent wrap-over */
3506
			prebuilt->n_rows_fetched = 500000000;
3507
		}
3508
3509
		mode = pcur->search_mode;
3510
	}
3511
3512
	/* In a search where at most one record in the index may match, we
3513
	can use a LOCK_REC_NOT_GAP type record lock when locking a
3514
	non-delete-marked matching record.
3515
3516
	Note that in a unique secondary index there may be different
3517
	delete-marked versions of a record where only the primary key
3518
	values differ: thus in a secondary index we must use next-key
3519
	locks when locking delete-marked records. */
3520
3521
	if (match_mode == ROW_SEL_EXACT
3522
	    && dict_index_is_unique(index)
3523
	    && dtuple_get_n_fields(search_tuple)
3524
	    == dict_index_get_n_unique(index)
3525
	    && (dict_index_is_clust(index)
3526
		|| !dtuple_contains_null(search_tuple))) {
3527
3528
		/* Note above that a UNIQUE secondary index can contain many
3529
		rows with the same key value if one of the columns is the SQL
3530
		null. A clustered index under MySQL can never contain null
3531
		columns because we demand that all the columns in primary key
3532
		are non-null. */
3533
3534
		unique_search = TRUE;
3535
3536
		/* Even if the condition is unique, MySQL seems to try to
3537
		retrieve also a second row if a primary key contains more than
1273.1.23 by Jay Pipes
Finally kills of n_mysql_tables_in_use.
3538
		1 column.*/
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3539
1273.1.23 by Jay Pipes
Finally kills of n_mysql_tables_in_use.
3540
		if (UNIV_UNLIKELY(direction != 0)) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3541
3542
			err = DB_RECORD_NOT_FOUND;
3543
			goto func_exit;
3544
		}
3545
	}
3546
3547
	mtr_start(&mtr);
3548
3549
	/*-------------------------------------------------------------*/
3550
	/* PHASE 2: Try fast adaptive hash index search if possible */
3551
3552
	/* Next test if this is the special case where we can use the fast
3553
	adaptive hash index to try the search. Since we must release the
3554
	search system latch when we retrieve an externally stored field, we
3555
	cannot use the adaptive hash index in a search in the case the row
3556
	may be long and there may be externally stored fields */
3557
3558
	if (UNIV_UNLIKELY(direction == 0)
3559
	    && unique_search
3560
	    && dict_index_is_clust(index)
3561
	    && !prebuilt->templ_contains_blob
3562
	    && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) {
3563
3564
		mode = PAGE_CUR_GE;
3565
3566
		unique_search_from_clust_index = TRUE;
3567
3568
		if (trx->mysql_n_tables_locked == 0
3569
		    && prebuilt->select_lock_type == LOCK_NONE
3570
		    && trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
3571
		    && trx->read_view) {
3572
3573
			/* This is a SELECT query done as a consistent read,
3574
			and the read view has already been allocated:
3575
			let us try a search shortcut through the hash
3576
			index.
3577
			NOTE that we must also test that
3578
			mysql_n_tables_locked == 0, because this might
3579
			also be INSERT INTO ... SELECT ... or
3580
			CREATE TABLE ... SELECT ... . Our algorithm is
3581
			NOT prepared to inserts interleaved with the SELECT,
3582
			and if we try that, we can deadlock on the adaptive
3583
			hash index semaphore! */
3584
3585
#ifndef UNIV_SEARCH_DEBUG
3586
			if (!trx->has_search_latch) {
3587
				rw_lock_s_lock(&btr_search_latch);
3588
				trx->has_search_latch = TRUE;
3589
			}
3590
#endif
3591
			switch (row_sel_try_search_shortcut_for_mysql(
3592
					&rec, prebuilt, &offsets, &heap,
3593
					&mtr)) {
3594
			case SEL_FOUND:
3595
#ifdef UNIV_SEARCH_DEBUG
3596
				ut_a(0 == cmp_dtuple_rec(search_tuple,
3597
							 rec, offsets));
3598
#endif
3599
				/* At this point, rec is protected by
3600
				a page latch that was acquired by
3601
				row_sel_try_search_shortcut_for_mysql().
3602
				The latch will not be released until
3603
				mtr_commit(&mtr). */
1819.9.84 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100818110110-zfs0i1vfrccfb4yw from MySQL InnoDB
3604
				ut_ad(!rec_get_deleted_flag(rec, comp));
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3605
3606
				if (!row_sel_store_mysql_rec(buf, prebuilt,
3607
							     rec, offsets)) {
1819.7.62 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100630093149-wmc37t128gic933v from MySQL InnoDB
3608
					/* Only fresh inserts may contain
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
3609
					incomplete externally stored
3610
					columns. Pretend that such
3611
					records do not exist. Such
3612
					records may only be accessed
3613
					at the READ UNCOMMITTED
3614
					isolation level or when
3615
					rolling back a recovered
3616
					transaction. Rollback happens
3617
					at a lower level, not here. */
3618
					ut_a(trx->isolation_level
3619
					     == TRX_ISO_READ_UNCOMMITTED);
3620
1819.7.63 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100630093847-7gkr1lh3bh2xksy0 from MySQL InnoDB
3621
					/* Proceed as in case SEL_RETRY. */
3622
					break;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3623
				}
3624
3625
				mtr_commit(&mtr);
3626
3627
				/* ut_print_name(stderr, index->name);
3628
				fputs(" shortcut\n", stderr); */
3629
3630
				srv_n_rows_read++;
3631
3632
				err = DB_SUCCESS;
3633
				goto release_search_latch_if_needed;
3634
3635
			case SEL_EXHAUSTED:
3636
				mtr_commit(&mtr);
3637
3638
				/* ut_print_name(stderr, index->name);
3639
				fputs(" record not found 2\n", stderr); */
3640
3641
				err = DB_RECORD_NOT_FOUND;
3642
release_search_latch_if_needed:
3643
				if (trx->search_latch_timeout > 0
3644
				    && trx->has_search_latch) {
3645
3646
					trx->search_latch_timeout--;
3647
3648
					rw_lock_s_unlock(&btr_search_latch);
3649
					trx->has_search_latch = FALSE;
3650
				}
3651
3652
				/* NOTE that we do NOT store the cursor
3653
				position */
3654
				goto func_exit;
3655
3656
			case SEL_RETRY:
3657
				break;
3658
3659
			default:
3660
				ut_ad(0);
3661
			}
1819.7.63 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100630093847-7gkr1lh3bh2xksy0 from MySQL InnoDB
3662
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3663
			mtr_commit(&mtr);
3664
			mtr_start(&mtr);
3665
		}
3666
	}
3667
3668
	/*-------------------------------------------------------------*/
3669
	/* PHASE 3: Open or restore index cursor position */
3670
3671
	if (trx->has_search_latch) {
3672
		rw_lock_s_unlock(&btr_search_latch);
3673
		trx->has_search_latch = FALSE;
3674
	}
3675
1819.7.151 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100601114015-n30uofphhbi71z1v from MySQL InnoDB
3676
	ut_ad(prebuilt->sql_stat_start || trx->conc_state == TRX_ACTIVE);
3677
	ut_ad(trx->conc_state == TRX_NOT_STARTED
3678
	      || trx->conc_state == TRX_ACTIVE);
3679
	ut_ad(prebuilt->sql_stat_start
3680
	      || prebuilt->select_lock_type != LOCK_NONE
3681
	      || trx->read_view);
3682
1819.7.168 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100525125352-hgafpmqhrrj7pv5i from MySQL InnoDB
3683
	ut_ad(prebuilt->sql_stat_start || trx->conc_state == TRX_ACTIVE);
3684
	ut_ad(trx->conc_state == TRX_NOT_STARTED
3685
	      || trx->conc_state == TRX_ACTIVE);
3686
	ut_ad(prebuilt->sql_stat_start
3687
	      || prebuilt->select_lock_type != LOCK_NONE
3688
	      || trx->read_view);
3689
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3690
	trx_start_if_not_started(trx);
3691
3692
	if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
3693
	    && prebuilt->select_lock_type != LOCK_NONE
3694
	    && trx->mysql_thd != NULL
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3695
	    && thd_is_select(trx->mysql_thd)) {
3696
		/* It is a plain locking SELECT and the isolation
3697
		level is low: do not lock gaps */
3698
3699
		set_also_gap_locks = FALSE;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3700
	}
3701
3702
	/* Note that if the search mode was GE or G, then the cursor
3703
	naturally moves upward (in fetch next) in alphabetical order,
3704
	otherwise downward */
3705
3706
	if (UNIV_UNLIKELY(direction == 0)) {
3707
		if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G) {
3708
			moves_up = TRUE;
3709
		}
3710
	} else if (direction == ROW_SEL_NEXT) {
3711
		moves_up = TRUE;
3712
	}
3713
3714
	thr = que_fork_get_first_thr(prebuilt->sel_graph);
3715
3716
	que_thr_move_to_run_state_for_mysql(thr, trx);
3717
3718
	clust_index = dict_table_get_first_index(index->table);
3719
1819.9.147 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20101004100521-qx6fw489swrr29io from MySQL InnoDB
3720
	/* Do some start-of-statement preparations */
3721
3722
	if (!prebuilt->sql_stat_start) {
3723
		/* No need to set an intention lock or assign a read view */
3724
3725
		if (trx->read_view == NULL
3726
		    && prebuilt->select_lock_type == LOCK_NONE) {
3727
3728
			fputs("InnoDB: Error: MySQL is trying to"
3729
			      " perform a consistent read\n"
3730
			      "InnoDB: but the read view is not assigned!\n",
3731
			      stderr);
3732
			trx_print(stderr, trx, 600);
3733
			fputc('\n', stderr);
3734
			ut_error;
3735
		}
3736
	} else if (prebuilt->select_lock_type == LOCK_NONE) {
3737
		/* This is a consistent read */
3738
		/* Assign a read view for the query */
3739
3740
		trx_assign_read_view(trx);
3741
		prebuilt->sql_stat_start = FALSE;
3742
	} else {
1819.9.153 by Jimmy Yang, Stewart Smith
Merge Revision revid:jimmy.yang@oracle.com-20101011123613-guz1qgdktywmel1g from MySQL InnoDB
3743
wait_table_again:
1819.9.147 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20101004100521-qx6fw489swrr29io from MySQL InnoDB
3744
		err = lock_table(0, index->table,
3745
				 prebuilt->select_lock_type == LOCK_S
3746
				 ? LOCK_IS : LOCK_IX, thr);
3747
3748
		if (err != DB_SUCCESS) {
3749
1819.9.153 by Jimmy Yang, Stewart Smith
Merge Revision revid:jimmy.yang@oracle.com-20101011123613-guz1qgdktywmel1g from MySQL InnoDB
3750
			table_lock_waited = TRUE;
3751
			goto lock_table_wait;
1819.9.147 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20101004100521-qx6fw489swrr29io from MySQL InnoDB
3752
		}
3753
		prebuilt->sql_stat_start = FALSE;
3754
	}
3755
3756
	/* Open or restore index cursor position */
3757
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3758
	if (UNIV_LIKELY(direction != 0)) {
3759
		ibool	need_to_process = sel_restore_position_for_mysql(
3760
			&same_user_rec, BTR_SEARCH_LEAF,
3761
			pcur, moves_up, &mtr);
3762
3763
		if (UNIV_UNLIKELY(need_to_process)) {
3764
			if (UNIV_UNLIKELY(prebuilt->row_read_type
3765
					  == ROW_READ_DID_SEMI_CONSISTENT)) {
3766
				/* We did a semi-consistent read,
3767
				but the record was removed in
3768
				the meantime. */
3769
				prebuilt->row_read_type
3770
					= ROW_READ_TRY_SEMI_CONSISTENT;
3771
			}
3772
		} else if (UNIV_LIKELY(prebuilt->row_read_type
3773
				       != ROW_READ_DID_SEMI_CONSISTENT)) {
3774
3775
			/* The cursor was positioned on the record
3776
			that we returned previously.  If we need
3777
			to repeat a semi-consistent read as a
3778
			pessimistic locking read, the record
3779
			cannot be skipped. */
3780
3781
			goto next_rec;
3782
		}
3783
3784
	} else if (dtuple_get_n_fields(search_tuple) > 0) {
3785
3786
		btr_pcur_open_with_no_init(index, search_tuple, mode,
3787
					   BTR_SEARCH_LEAF,
3788
					   pcur, 0, &mtr);
3789
3790
		pcur->trx_if_known = trx;
3791
3792
		rec = btr_pcur_get_rec(pcur);
3793
3794
		if (!moves_up
3795
		    && !page_rec_is_supremum(rec)
3796
		    && set_also_gap_locks
3797
		    && !(srv_locks_unsafe_for_binlog
1819.7.86 by Vasil Dimov, Stewart Smith
Merge Revision revid:vasil.dimov@oracle.com-20100504105214-ljj5sy3bk21zl7og from MySQL InnoDB
3798
			 || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3799
		    && prebuilt->select_lock_type != LOCK_NONE) {
3800
3801
			/* Try to place a gap lock on the next index record
3802
			to prevent phantoms in ORDER BY ... DESC queries */
3803
			const rec_t*	next = page_rec_get_next_const(rec);
3804
3805
			offsets = rec_get_offsets(next, index, offsets,
3806
						  ULINT_UNDEFINED, &heap);
3807
			err = sel_set_rec_lock(btr_pcur_get_block(pcur),
3808
					       next, index, offsets,
3809
					       prebuilt->select_lock_type,
3810
					       LOCK_GAP, thr);
3811
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
3812
			switch (err) {
3813
			case DB_SUCCESS_LOCKED_REC:
3814
				err = DB_SUCCESS;
3815
			case DB_SUCCESS:
3816
				break;
3817
			default:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3818
				goto lock_wait_or_error;
3819
			}
3820
		}
3821
	} else {
3822
		if (mode == PAGE_CUR_G) {
3823
			btr_pcur_open_at_index_side(
3824
				TRUE, index, BTR_SEARCH_LEAF, pcur, FALSE,
3825
				&mtr);
3826
		} else if (mode == PAGE_CUR_L) {
3827
			btr_pcur_open_at_index_side(
3828
				FALSE, index, BTR_SEARCH_LEAF, pcur, FALSE,
3829
				&mtr);
3830
		}
3831
	}
3832
3833
rec_loop:
3834
	/*-------------------------------------------------------------*/
3835
	/* PHASE 4: Look for matching records in a loop */
3836
3837
	rec = btr_pcur_get_rec(pcur);
3838
	ut_ad(!!page_rec_is_comp(rec) == comp);
3839
#ifdef UNIV_SEARCH_DEBUG
3840
	/*
3841
	fputs("Using ", stderr);
3842
	dict_index_name_print(stderr, index);
3843
	fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt,
3844
	page_get_page_no(page_align(rec)));
3845
	rec_print(rec);
3846
	*/
3847
#endif /* UNIV_SEARCH_DEBUG */
3848
3849
	if (page_rec_is_infimum(rec)) {
3850
3851
		/* The infimum record on a page cannot be in the result set,
3852
		and neither can a record lock be placed on it: we skip such
3853
		a record. */
3854
3855
		goto next_rec;
3856
	}
3857
3858
	if (page_rec_is_supremum(rec)) {
3859
3860
		if (set_also_gap_locks
3861
		    && !(srv_locks_unsafe_for_binlog
1819.7.86 by Vasil Dimov, Stewart Smith
Merge Revision revid:vasil.dimov@oracle.com-20100504105214-ljj5sy3bk21zl7og from MySQL InnoDB
3862
			 || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3863
		    && prebuilt->select_lock_type != LOCK_NONE) {
3864
3865
			/* Try to place a lock on the index record */
3866
3867
			/* If innodb_locks_unsafe_for_binlog option is used
3868
			or this session is using a READ COMMITTED isolation
3869
			level we do not lock gaps. Supremum record is really
3870
			a gap and therefore we do not set locks there. */
3871
3872
			offsets = rec_get_offsets(rec, index, offsets,
3873
						  ULINT_UNDEFINED, &heap);
3874
			err = sel_set_rec_lock(btr_pcur_get_block(pcur),
3875
					       rec, index, offsets,
3876
					       prebuilt->select_lock_type,
3877
					       LOCK_ORDINARY, thr);
3878
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
3879
			switch (err) {
3880
			case DB_SUCCESS_LOCKED_REC:
3881
				err = DB_SUCCESS;
3882
			case DB_SUCCESS:
3883
				break;
3884
			default:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3885
				goto lock_wait_or_error;
3886
			}
3887
		}
3888
		/* A page supremum record cannot be in the result set: skip
3889
		it now that we have placed a possible lock on it */
3890
3891
		goto next_rec;
3892
	}
3893
3894
	/*-------------------------------------------------------------*/
3895
	/* Do sanity checks in case our cursor has bumped into page
3896
	corruption */
3897
3898
	if (comp) {
3899
		next_offs = rec_get_next_offs(rec, TRUE);
3900
		if (UNIV_UNLIKELY(next_offs < PAGE_NEW_SUPREMUM)) {
3901
3902
			goto wrong_offs;
3903
		}
3904
	} else {
3905
		next_offs = rec_get_next_offs(rec, FALSE);
3906
		if (UNIV_UNLIKELY(next_offs < PAGE_OLD_SUPREMUM)) {
3907
3908
			goto wrong_offs;
3909
		}
3910
	}
3911
3912
	if (UNIV_UNLIKELY(next_offs >= UNIV_PAGE_SIZE - PAGE_DIR)) {
3913
3914
wrong_offs:
3915
		if (srv_force_recovery == 0 || moves_up == FALSE) {
3916
			ut_print_timestamp(stderr);
3917
			buf_page_print(page_align(rec), 0);
3918
			fprintf(stderr,
3919
				"\nInnoDB: rec address %p,"
3920
				" buf block fix count %lu\n",
3921
				(void*) rec, (ulong)
3922
				btr_cur_get_block(btr_pcur_get_btr_cur(pcur))
3923
				->page.buf_fix_count);
3924
			fprintf(stderr,
3925
				"InnoDB: Index corruption: rec offs %lu"
3926
				" next offs %lu, page no %lu,\n"
3927
				"InnoDB: ",
3928
				(ulong) page_offset(rec),
3929
				(ulong) next_offs,
3930
				(ulong) page_get_page_no(page_align(rec)));
3931
			dict_index_name_print(stderr, trx, index);
3932
			fputs(". Run CHECK TABLE. You may need to\n"
3933
			      "InnoDB: restore from a backup, or"
3934
			      " dump + drop + reimport the table.\n",
3935
			      stderr);
3936
3937
			err = DB_CORRUPTION;
3938
3939
			goto lock_wait_or_error;
3940
		} else {
3941
			/* The user may be dumping a corrupt table. Jump
3942
			over the corruption to recover as much as possible. */
3943
3944
			fprintf(stderr,
3945
				"InnoDB: Index corruption: rec offs %lu"
3946
				" next offs %lu, page no %lu,\n"
3947
				"InnoDB: ",
3948
				(ulong) page_offset(rec),
3949
				(ulong) next_offs,
3950
				(ulong) page_get_page_no(page_align(rec)));
3951
			dict_index_name_print(stderr, trx, index);
3952
			fputs(". We try to skip the rest of the page.\n",
3953
			      stderr);
3954
3955
			btr_pcur_move_to_last_on_page(pcur, &mtr);
3956
3957
			goto next_rec;
3958
		}
3959
	}
3960
	/*-------------------------------------------------------------*/
3961
3962
	/* Calculate the 'offsets' associated with 'rec' */
3963
3964
	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
3965
3966
	if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
3967
		if (!rec_validate(rec, offsets)
3968
		    || !btr_index_rec_validate(rec, index, FALSE)) {
3969
			fprintf(stderr,
3970
				"InnoDB: Index corruption: rec offs %lu"
3971
				" next offs %lu, page no %lu,\n"
3972
				"InnoDB: ",
3973
				(ulong) page_offset(rec),
3974
				(ulong) next_offs,
3975
				(ulong) page_get_page_no(page_align(rec)));
3976
			dict_index_name_print(stderr, trx, index);
3977
			fputs(". We try to skip the record.\n",
3978
			      stderr);
3979
3980
			goto next_rec;
3981
		}
3982
	}
3983
3984
	/* Note that we cannot trust the up_match value in the cursor at this
3985
	place because we can arrive here after moving the cursor! Thus
3986
	we have to recompare rec and search_tuple to determine if they
3987
	match enough. */
3988
3989
	if (match_mode == ROW_SEL_EXACT) {
3990
		/* Test if the index record matches completely to search_tuple
3991
		in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */
3992
3993
		/* fputs("Comparing rec and search tuple\n", stderr); */
3994
3995
		if (0 != cmp_dtuple_rec(search_tuple, rec, offsets)) {
3996
3997
			if (set_also_gap_locks
3998
			    && !(srv_locks_unsafe_for_binlog
3999
				 || trx->isolation_level
1819.7.86 by Vasil Dimov, Stewart Smith
Merge Revision revid:vasil.dimov@oracle.com-20100504105214-ljj5sy3bk21zl7og from MySQL InnoDB
4000
				 <= TRX_ISO_READ_COMMITTED)
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4001
			    && prebuilt->select_lock_type != LOCK_NONE) {
4002
4003
				/* Try to place a gap lock on the index
4004
				record only if innodb_locks_unsafe_for_binlog
4005
				option is not set or this session is not
4006
				using a READ COMMITTED isolation level. */
4007
4008
				err = sel_set_rec_lock(
4009
					btr_pcur_get_block(pcur),
4010
					rec, index, offsets,
4011
					prebuilt->select_lock_type, LOCK_GAP,
4012
					thr);
4013
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
4014
				switch (err) {
4015
				case DB_SUCCESS_LOCKED_REC:
4016
				case DB_SUCCESS:
4017
					break;
4018
				default:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4019
					goto lock_wait_or_error;
4020
				}
4021
			}
4022
4023
			btr_pcur_store_position(pcur, &mtr);
4024
4025
			err = DB_RECORD_NOT_FOUND;
4026
			/* ut_print_name(stderr, index->name);
4027
			fputs(" record not found 3\n", stderr); */
4028
4029
			goto normal_return;
4030
		}
4031
4032
	} else if (match_mode == ROW_SEL_EXACT_PREFIX) {
4033
4034
		if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec, offsets)) {
4035
4036
			if (set_also_gap_locks
4037
			    && !(srv_locks_unsafe_for_binlog
4038
				 || trx->isolation_level
1819.7.86 by Vasil Dimov, Stewart Smith
Merge Revision revid:vasil.dimov@oracle.com-20100504105214-ljj5sy3bk21zl7og from MySQL InnoDB
4039
				 <= TRX_ISO_READ_COMMITTED)
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4040
			    && prebuilt->select_lock_type != LOCK_NONE) {
4041
4042
				/* Try to place a gap lock on the index
4043
				record only if innodb_locks_unsafe_for_binlog
4044
				option is not set or this session is not
4045
				using a READ COMMITTED isolation level. */
4046
4047
				err = sel_set_rec_lock(
4048
					btr_pcur_get_block(pcur),
4049
					rec, index, offsets,
4050
					prebuilt->select_lock_type, LOCK_GAP,
4051
					thr);
4052
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
4053
				switch (err) {
4054
				case DB_SUCCESS_LOCKED_REC:
4055
				case DB_SUCCESS:
4056
					break;
4057
				default:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4058
					goto lock_wait_or_error;
4059
				}
4060
			}
4061
4062
			btr_pcur_store_position(pcur, &mtr);
4063
4064
			err = DB_RECORD_NOT_FOUND;
4065
			/* ut_print_name(stderr, index->name);
4066
			fputs(" record not found 4\n", stderr); */
4067
4068
			goto normal_return;
4069
		}
4070
	}
4071
4072
	/* We are ready to look at a possible new index entry in the result
4073
	set: the cursor is now placed on a user record */
4074
4075
	if (prebuilt->select_lock_type != LOCK_NONE) {
4076
		/* Try to place a lock on the index record; note that delete
4077
		marked records are a special case in a unique search. If there
4078
		is a non-delete marked record, then it is enough to lock its
4079
		existence with LOCK_REC_NOT_GAP. */
4080
4081
		/* If innodb_locks_unsafe_for_binlog option is used
4082
		or this session is using a READ COMMITED isolation
4083
		level we lock only the record, i.e., next-key locking is
4084
		not used. */
4085
4086
		ulint	lock_type;
4087
4088
		if (!set_also_gap_locks
4089
		    || srv_locks_unsafe_for_binlog
1819.7.86 by Vasil Dimov, Stewart Smith
Merge Revision revid:vasil.dimov@oracle.com-20100504105214-ljj5sy3bk21zl7og from MySQL InnoDB
4090
		    || trx->isolation_level <= TRX_ISO_READ_COMMITTED
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4091
		    || (unique_search
4092
			&& !UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp)))) {
4093
4094
			goto no_gap_lock;
4095
		} else {
4096
			lock_type = LOCK_ORDINARY;
4097
		}
4098
4099
		/* If we are doing a 'greater or equal than a primary key
4100
		value' search from a clustered index, and we find a record
4101
		that has that exact primary key value, then there is no need
4102
		to lock the gap before the record, because no insert in the
4103
		gap can be in our search range. That is, no phantom row can
4104
		appear that way.
4105
4106
		An example: if col1 is the primary key, the search is WHERE
4107
		col1 >= 100, and we find a record where col1 = 100, then no
4108
		need to lock the gap before that record. */
4109
4110
		if (index == clust_index
4111
		    && mode == PAGE_CUR_GE
4112
		    && direction == 0
4113
		    && dtuple_get_n_fields_cmp(search_tuple)
4114
		    == dict_index_get_n_unique(index)
4115
		    && 0 == cmp_dtuple_rec(search_tuple, rec, offsets)) {
4116
no_gap_lock:
4117
			lock_type = LOCK_REC_NOT_GAP;
4118
		}
4119
4120
		err = sel_set_rec_lock(btr_pcur_get_block(pcur),
4121
				       rec, index, offsets,
4122
				       prebuilt->select_lock_type,
4123
				       lock_type, thr);
4124
4125
		switch (err) {
4126
			const rec_t*	old_vers;
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
4127
		case DB_SUCCESS_LOCKED_REC:
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4128
			if (srv_locks_unsafe_for_binlog
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
4129
			    || trx->isolation_level
4130
			    <= TRX_ISO_READ_COMMITTED) {
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4131
				/* Note that a record of
4132
				prebuilt->index was locked. */
4133
				prebuilt->new_rec_locks = 1;
4134
			}
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
4135
			err = DB_SUCCESS;
4136
		case DB_SUCCESS:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4137
			break;
4138
		case DB_LOCK_WAIT:
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
4139
			/* Never unlock rows that were part of a conflict. */
4140
			prebuilt->new_rec_locks = 0;
4141
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4142
			if (UNIV_LIKELY(prebuilt->row_read_type
4143
					!= ROW_READ_TRY_SEMI_CONSISTENT)
1819.7.80 by Vasil Dimov, Stewart Smith
Merge Revision revid:vasil.dimov@oracle.com-20100504095125-bntiicm626cnsvmj from MySQL InnoDB
4144
			    || unique_search
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4145
			    || index != clust_index) {
4146
4147
				goto lock_wait_or_error;
4148
			}
4149
4150
			/* The following call returns 'offsets'
4151
			associated with 'old_vers' */
4152
			err = row_sel_build_committed_vers_for_mysql(
4153
				clust_index, prebuilt, rec,
4154
				&offsets, &heap, &old_vers, &mtr);
4155
1819.9.9 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602103714-0nwxdqskeb1ihozj from MySQL InnoDB
4156
			switch (err) {
4157
			case DB_SUCCESS_LOCKED_REC:
4158
				err = DB_SUCCESS;
4159
			case DB_SUCCESS:
4160
				break;
4161
			default:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4162
				goto lock_wait_or_error;
4163
			}
4164
4165
			mutex_enter(&kernel_mutex);
4166
			if (trx->was_chosen_as_deadlock_victim) {
4167
				mutex_exit(&kernel_mutex);
4168
				err = DB_DEADLOCK;
4169
4170
				goto lock_wait_or_error;
4171
			}
4172
			if (UNIV_LIKELY(trx->wait_lock != NULL)) {
4173
				lock_cancel_waiting_and_release(
4174
					trx->wait_lock);
4175
			} else {
4176
				mutex_exit(&kernel_mutex);
4177
4178
				/* The lock was granted while we were
4179
				searching for the last committed version.
4180
				Do a normal locking read. */
4181
4182
				offsets = rec_get_offsets(rec, index, offsets,
4183
							  ULINT_UNDEFINED,
4184
							  &heap);
4185
				err = DB_SUCCESS;
4186
				break;
4187
			}
4188
			mutex_exit(&kernel_mutex);
4189
4190
			if (old_vers == NULL) {
4191
				/* The row was not yet committed */
4192
4193
				goto next_rec;
4194
			}
4195
4196
			did_semi_consistent_read = TRUE;
4197
			rec = old_vers;
4198
			break;
4199
		default:
4200
4201
			goto lock_wait_or_error;
4202
		}
4203
	} else {
4204
		/* This is a non-locking consistent read: if necessary, fetch
4205
		a previous version of the record */
4206
4207
		if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) {
4208
4209
			/* Do nothing: we let a non-locking SELECT read the
4210
			latest version of the record */
4211
4212
		} else if (index == clust_index) {
4213
4214
			/* Fetch a previous version of the row if the current
4215
			one is not visible in the snapshot; if we have a very
4216
			high force recovery level set, we try to avoid crashes
4217
			by skipping this lookup */
4218
4219
			if (UNIV_LIKELY(srv_force_recovery < 5)
4220
			    && !lock_clust_rec_cons_read_sees(
4221
				    rec, index, offsets, trx->read_view)) {
4222
4223
				rec_t*	old_vers;
4224
				/* The following call returns 'offsets'
4225
				associated with 'old_vers' */
4226
				err = row_sel_build_prev_vers_for_mysql(
4227
					trx->read_view, clust_index,
4228
					prebuilt, rec, &offsets, &heap,
4229
					&old_vers, &mtr);
4230
1819.9.9 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602103714-0nwxdqskeb1ihozj from MySQL InnoDB
4231
				switch (err) {
4232
				case DB_SUCCESS_LOCKED_REC:
4233
				case DB_SUCCESS:
4234
					break;
4235
				default:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4236
					goto lock_wait_or_error;
4237
				}
4238
4239
				if (old_vers == NULL) {
4240
					/* The row did not exist yet in
4241
					the read view */
4242
4243
					goto next_rec;
4244
				}
4245
4246
				rec = old_vers;
4247
			}
1819.9.84 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100818110110-zfs0i1vfrccfb4yw from MySQL InnoDB
4248
		} else {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4249
			/* We are looking into a non-clustered index,
4250
			and to get the right version of the record we
4251
			have to look also into the clustered index: this
4252
			is necessary, because we can only get the undo
4253
			information via the clustered index record. */
4254
4255
			ut_ad(index != clust_index);
1819.9.84 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100818110110-zfs0i1vfrccfb4yw from MySQL InnoDB
4256
			ut_ad(!dict_index_is_clust(index));
4257
			if (!lock_sec_rec_cons_read_sees(
4258
				    rec, trx->read_view)) {
4259
				goto requires_clust_rec;
4260
			}
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4261
		}
4262
	}
4263
4264
	/* NOTE that at this point rec can be an old version of a clustered
4265
	index record built for a consistent read. We cannot assume after this
4266
	point that rec is on a buffer pool page. Functions like
4267
	page_rec_is_comp() cannot be used! */
4268
4269
	if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp))) {
4270
4271
		/* The record is delete-marked: we can skip it */
4272
4273
		if ((srv_locks_unsafe_for_binlog
1819.7.86 by Vasil Dimov, Stewart Smith
Merge Revision revid:vasil.dimov@oracle.com-20100504105214-ljj5sy3bk21zl7og from MySQL InnoDB
4274
		     || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4275
		    && prebuilt->select_lock_type != LOCK_NONE
4276
		    && !did_semi_consistent_read) {
4277
4278
			/* No need to keep a lock on a delete-marked record
4279
			if we do not want to use next-key locking. */
4280
4281
			row_unlock_for_mysql(prebuilt, TRUE);
4282
		}
4283
4284
		/* This is an optimization to skip setting the next key lock
4285
		on the record that follows this delete-marked record. This
4286
		optimization works because of the unique search criteria
4287
		which precludes the presence of a range lock between this
4288
		delete marked record and the record following it.
4289
4290
		For now this is applicable only to clustered indexes while
4291
		doing a unique search. There is scope for further optimization
4292
		applicable to unique secondary indexes. Current behaviour is
4293
		to widen the scope of a lock on an already delete marked record
4294
		if the same record is deleted twice by the same transaction */
4295
		if (index == clust_index && unique_search) {
4296
			err = DB_RECORD_NOT_FOUND;
4297
4298
			goto normal_return;
4299
		}
4300
4301
		goto next_rec;
4302
	}
4303
4304
	/* Get the clustered index record if needed, if we did not do the
4305
	search using the clustered index. */
4306
4307
	if (index != clust_index && prebuilt->need_to_access_clustered) {
4308
4309
requires_clust_rec:
4310
		/* We use a 'goto' to the preceding label if a consistent
4311
		read of a secondary index record requires us to look up old
4312
		versions of the associated clustered index record. */
4313
4314
		ut_ad(rec_offs_validate(rec, index, offsets));
4315
4316
		/* It was a non-clustered index and we must fetch also the
4317
		clustered index record */
4318
4319
		mtr_has_extra_clust_latch = TRUE;
4320
4321
		/* The following call returns 'offsets' associated with
4322
		'clust_rec'. Note that 'clust_rec' can be an old version
4323
		built for a consistent read. */
4324
4325
		err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec,
4326
						      thr, &clust_rec,
4327
						      &offsets, &heap, &mtr);
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
4328
		switch (err) {
4329
		case DB_SUCCESS:
4330
			if (clust_rec == NULL) {
4331
				/* The record did not exist in the read view */
4332
				ut_ad(prebuilt->select_lock_type == LOCK_NONE);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4333
1819.7.159 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100602113733-fslfv73nhi0d17t4 from MySQL InnoDB
4334
				goto next_rec;
4335
			}
4336
			break;
4337
		case DB_SUCCESS_LOCKED_REC:
4338
			ut_a(clust_rec != NULL);
4339
			if (srv_locks_unsafe_for_binlog
4340
			     || trx->isolation_level
4341
			    <= TRX_ISO_READ_COMMITTED) {
4342
				/* Note that the clustered index record
4343
				was locked. */
4344
				prebuilt->new_rec_locks = 2;
4345
			}
4346
			err = DB_SUCCESS;
4347
			break;
4348
		default:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4349
			goto lock_wait_or_error;
4350
		}
4351
4352
		if (UNIV_UNLIKELY(rec_get_deleted_flag(clust_rec, comp))) {
4353
4354
			/* The record is delete marked: we can skip it */
4355
4356
			if ((srv_locks_unsafe_for_binlog
1819.7.86 by Vasil Dimov, Stewart Smith
Merge Revision revid:vasil.dimov@oracle.com-20100504105214-ljj5sy3bk21zl7og from MySQL InnoDB
4357
			     || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4358
			    && prebuilt->select_lock_type != LOCK_NONE) {
4359
4360
				/* No need to keep a lock on a delete-marked
4361
				record if we do not want to use next-key
4362
				locking. */
4363
4364
				row_unlock_for_mysql(prebuilt, TRUE);
4365
			}
4366
4367
			goto next_rec;
4368
		}
4369
4370
		if (prebuilt->need_to_access_clustered) {
4371
4372
			result_rec = clust_rec;
4373
4374
			ut_ad(rec_offs_validate(result_rec, clust_index,
4375
						offsets));
4376
		} else {
4377
			/* We used 'offsets' for the clust rec, recalculate
4378
			them for 'rec' */
4379
			offsets = rec_get_offsets(rec, index, offsets,
4380
						  ULINT_UNDEFINED, &heap);
4381
			result_rec = rec;
4382
		}
1819.9.84 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100818110110-zfs0i1vfrccfb4yw from MySQL InnoDB
4383
4384
		/* result_rec can legitimately be delete-marked
4385
		now that it has been established that it points to a
4386
		clustered index record that exists in the read view. */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4387
	} else {
4388
		result_rec = rec;
1819.9.84 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100818110110-zfs0i1vfrccfb4yw from MySQL InnoDB
4389
		ut_ad(!rec_get_deleted_flag(rec, comp));
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4390
	}
4391
4392
	/* We found a qualifying record 'result_rec'. At this point,
4393
	'offsets' are associated with 'result_rec'. */
4394
4395
	ut_ad(rec_offs_validate(result_rec,
4396
				result_rec != rec ? clust_index : index,
4397
				offsets));
4398
4399
	/* At this point, the clustered index record is protected
4400
	by a page latch that was acquired when pcur was positioned.
4401
	The latch will not be released until mtr_commit(&mtr). */
4402
4403
	if ((match_mode == ROW_SEL_EXACT
4404
	     || prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD)
4405
	    && prebuilt->select_lock_type == LOCK_NONE
4406
	    && !prebuilt->templ_contains_blob
4407
	    && !prebuilt->clust_index_was_generated
4408
	    && prebuilt->template_type
4409
	    != ROW_MYSQL_DUMMY_TEMPLATE) {
4410
4411
		/* Inside an update, for example, we do not cache rows,
4412
		since we may use the cursor position to do the actual
4413
		update, that is why we require ...lock_type == LOCK_NONE.
4414
		Since we keep space in prebuilt only for the BLOBs of
4415
		a single row, we cannot cache rows in the case there
4416
		are BLOBs in the fields to be fetched. In HANDLER we do
4417
		not cache rows because there the cursor is a scrollable
4418
		cursor. */
4419
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
4420
		if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
4421
						      offsets)) {
1819.7.62 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100630093149-wmc37t128gic933v from MySQL InnoDB
4422
			/* Only fresh inserts may contain incomplete
4423
			externally stored columns. Pretend that such
4424
			records do not exist. Such records may only be
4425
			accessed at the READ UNCOMMITTED isolation
4426
			level or when rolling back a recovered
4427
			transaction. Rollback happens at a lower
4428
			level, not here. */
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
4429
			ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED);
4430
		} else if (prebuilt->n_fetch_cached
4431
			   == MYSQL_FETCH_CACHE_SIZE) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4432
4433
			goto got_row;
4434
		}
4435
4436
		goto next_rec;
4437
	} else {
4438
		if (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE) {
4439
			memcpy(buf + 4, result_rec
4440
			       - rec_offs_extra_size(offsets),
4441
			       rec_offs_size(offsets));
4442
			mach_write_to_4(buf,
4443
					rec_offs_extra_size(offsets) + 4);
4444
		} else {
4445
			if (!row_sel_store_mysql_rec(buf, prebuilt,
4446
						     result_rec, offsets)) {
1819.7.62 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100630093149-wmc37t128gic933v from MySQL InnoDB
4447
				/* Only fresh inserts may contain
4448
				incomplete externally stored
4449
				columns. Pretend that such records do
4450
				not exist. Such records may only be
4451
				accessed at the READ UNCOMMITTED
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
4452
				isolation level or when rolling back a
1819.7.62 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100630093149-wmc37t128gic933v from MySQL InnoDB
4453
				recovered transaction. Rollback
4454
				happens at a lower level, not here. */
1819.7.58 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j from MySQL InnoDB
4455
				ut_a(trx->isolation_level
4456
				     == TRX_ISO_READ_UNCOMMITTED);
4457
				goto next_rec;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4458
			}
4459
		}
4460
4461
		if (prebuilt->clust_index_was_generated) {
4462
			if (result_rec != rec) {
4463
				offsets = rec_get_offsets(
4464
					rec, index, offsets, ULINT_UNDEFINED,
4465
					&heap);
4466
			}
4467
			row_sel_store_row_id_to_prebuilt(prebuilt, rec,
4468
							 index, offsets);
4469
		}
4470
	}
4471
4472
	/* From this point on, 'offsets' are invalid. */
4473
4474
got_row:
4475
	/* We have an optimization to save CPU time: if this is a consistent
4476
	read on a unique condition on the clustered index, then we do not
4477
	store the pcur position, because any fetch next or prev will anyway
4478
	return 'end of file'. Exceptions are locking reads and the MySQL
4479
	HANDLER command where the user can move the cursor with PREV or NEXT
4480
	even after a unique search. */
4481
4482
	if (!unique_search_from_clust_index
1273.1.23 by Jay Pipes
Finally kills of n_mysql_tables_in_use.
4483
	    || prebuilt->select_lock_type != LOCK_NONE) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4484
4485
		/* Inside an update always store the cursor position */
4486
4487
		btr_pcur_store_position(pcur, &mtr);
4488
	}
4489
4490
	err = DB_SUCCESS;
4491
4492
	goto normal_return;
4493
4494
next_rec:
4495
	/* Reset the old and new "did semi-consistent read" flags. */
4496
	if (UNIV_UNLIKELY(prebuilt->row_read_type
4497
			  == ROW_READ_DID_SEMI_CONSISTENT)) {
4498
		prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4499
	}
4500
	did_semi_consistent_read = FALSE;
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4501
	prebuilt->new_rec_locks = 0;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4502
4503
	/*-------------------------------------------------------------*/
4504
	/* PHASE 5: Move the cursor to the next index record */
4505
4506
	if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) {
4507
		/* We must commit mtr if we are moving to the next
4508
		non-clustered index record, because we could break the
4509
		latching order if we would access a different clustered
4510
		index page right away without releasing the previous. */
4511
4512
		btr_pcur_store_position(pcur, &mtr);
4513
4514
		mtr_commit(&mtr);
4515
		mtr_has_extra_clust_latch = FALSE;
4516
4517
		mtr_start(&mtr);
4518
		if (sel_restore_position_for_mysql(&same_user_rec,
4519
						   BTR_SEARCH_LEAF,
4520
						   pcur, moves_up, &mtr)) {
4521
#ifdef UNIV_SEARCH_DEBUG
4522
			cnt++;
4523
#endif /* UNIV_SEARCH_DEBUG */
4524
4525
			goto rec_loop;
4526
		}
4527
	}
4528
4529
	if (moves_up) {
4530
		if (UNIV_UNLIKELY(!btr_pcur_move_to_next(pcur, &mtr))) {
4531
not_moved:
4532
			btr_pcur_store_position(pcur, &mtr);
4533
4534
			if (match_mode != 0) {
4535
				err = DB_RECORD_NOT_FOUND;
4536
			} else {
4537
				err = DB_END_OF_INDEX;
4538
			}
4539
4540
			goto normal_return;
4541
		}
4542
	} else {
4543
		if (UNIV_UNLIKELY(!btr_pcur_move_to_prev(pcur, &mtr))) {
4544
			goto not_moved;
4545
		}
4546
	}
4547
4548
#ifdef UNIV_SEARCH_DEBUG
4549
	cnt++;
4550
#endif /* UNIV_SEARCH_DEBUG */
4551
4552
	goto rec_loop;
4553
4554
lock_wait_or_error:
4555
	/* Reset the old and new "did semi-consistent read" flags. */
4556
	if (UNIV_UNLIKELY(prebuilt->row_read_type
4557
			  == ROW_READ_DID_SEMI_CONSISTENT)) {
4558
		prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4559
	}
4560
	did_semi_consistent_read = FALSE;
4561
4562
	/*-------------------------------------------------------------*/
4563
4564
	btr_pcur_store_position(pcur, &mtr);
4565
1819.9.153 by Jimmy Yang, Stewart Smith
Merge Revision revid:jimmy.yang@oracle.com-20101011123613-guz1qgdktywmel1g from MySQL InnoDB
4566
lock_table_wait:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4567
	mtr_commit(&mtr);
4568
	mtr_has_extra_clust_latch = FALSE;
4569
4570
	trx->error_state = err;
4571
4572
	/* The following is a patch for MySQL */
4573
4574
	que_thr_stop_for_mysql(thr);
4575
4576
	thr->lock_state = QUE_THR_LOCK_ROW;
4577
4578
	if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
4579
		/* It was a lock wait, and it ended */
4580
4581
		thr->lock_state = QUE_THR_LOCK_NOLOCK;
4582
		mtr_start(&mtr);
4583
1819.9.153 by Jimmy Yang, Stewart Smith
Merge Revision revid:jimmy.yang@oracle.com-20101011123613-guz1qgdktywmel1g from MySQL InnoDB
4584
		/* Table lock waited, go try to obtain table lock
4585
		again */
4586
		if (table_lock_waited) {
4587
			table_lock_waited = FALSE;
4588
4589
			goto wait_table_again;
4590
		}
4591
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4592
		sel_restore_position_for_mysql(&same_user_rec,
4593
					       BTR_SEARCH_LEAF, pcur,
4594
					       moves_up, &mtr);
4595
4596
		if ((srv_locks_unsafe_for_binlog
1819.7.86 by Vasil Dimov, Stewart Smith
Merge Revision revid:vasil.dimov@oracle.com-20100504105214-ljj5sy3bk21zl7og from MySQL InnoDB
4597
		     || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4598
		    && !same_user_rec) {
4599
4600
			/* Since we were not able to restore the cursor
4601
			on the same user record, we cannot use
4602
			row_unlock_for_mysql() to unlock any records, and
4603
			we must thus reset the new rec lock info. Since
4604
			in lock0lock.c we have blocked the inheriting of gap
4605
			X-locks, we actually do not have any new record locks
4606
			set in this case.
4607
4608
			Note that if we were able to restore on the 'same'
4609
			user record, it is still possible that we were actually
4610
			waiting on a delete-marked record, and meanwhile
4611
			it was removed by purge and inserted again by some
4612
			other user. But that is no problem, because in
4613
			rec_loop we will again try to set a lock, and
4614
			new_rec_lock_info in trx will be right at the end. */
4615
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4616
			prebuilt->new_rec_locks = 0;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4617
		}
4618
4619
		mode = pcur->search_mode;
4620
4621
		goto rec_loop;
4622
	}
4623
4624
	thr->lock_state = QUE_THR_LOCK_NOLOCK;
4625
4626
#ifdef UNIV_SEARCH_DEBUG
4627
	/*	fputs("Using ", stderr);
4628
	dict_index_name_print(stderr, index);
4629
	fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
4630
#endif /* UNIV_SEARCH_DEBUG */
4631
	goto func_exit;
4632
4633
normal_return:
4634
	/*-------------------------------------------------------------*/
4635
	que_thr_stop_for_mysql_no_error(thr, trx);
4636
4637
	mtr_commit(&mtr);
4638
4639
	if (prebuilt->n_fetch_cached > 0) {
4640
		row_sel_pop_cached_row_for_mysql(buf, prebuilt);
4641
4642
		err = DB_SUCCESS;
4643
	}
4644
4645
#ifdef UNIV_SEARCH_DEBUG
4646
	/*	fputs("Using ", stderr);
4647
	dict_index_name_print(stderr, index);
4648
	fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
4649
#endif /* UNIV_SEARCH_DEBUG */
4650
	if (err == DB_SUCCESS) {
4651
		srv_n_rows_read++;
4652
	}
4653
4654
func_exit:
4655
	trx->op_info = "";
4656
	if (UNIV_LIKELY_NULL(heap)) {
4657
		mem_heap_free(heap);
4658
	}
4659
4660
	/* Set or reset the "did semi-consistent read" flag on return.
4661
	The flag did_semi_consistent_read is set if and only if
4662
	the record being returned was fetched with a semi-consistent read. */
4663
	ut_ad(prebuilt->row_read_type != ROW_READ_WITH_LOCKS
4664
	      || !did_semi_consistent_read);
4665
4666
	if (UNIV_UNLIKELY(prebuilt->row_read_type != ROW_READ_WITH_LOCKS)) {
4667
		if (UNIV_UNLIKELY(did_semi_consistent_read)) {
4668
			prebuilt->row_read_type = ROW_READ_DID_SEMI_CONSISTENT;
4669
		} else {
4670
			prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4671
		}
4672
	}
4673
	return(err);
4674
}
4675
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4676
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4677
Checks if MySQL at the moment is allowed for this table to retrieve a
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4678
consistent read result, or store it to the query cache.
4679
@return	TRUE if storing or retrieving from the query cache is permitted */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4680
UNIV_INTERN
4681
ibool
4682
row_search_check_if_query_cache_permitted(
4683
/*======================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4684
	trx_t*		trx,		/*!< in: transaction object */
4685
	const char*	norm_name)	/*!< in: concatenation of database name,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4686
					'/' char, table name */
4687
{
4688
	dict_table_t*	table;
4689
	ibool		ret	= FALSE;
4690
4691
	table = dict_table_get(norm_name, FALSE);
4692
4693
	if (table == NULL) {
4694
4695
		return(FALSE);
4696
	}
4697
4698
	mutex_enter(&kernel_mutex);
4699
4700
	/* Start the transaction if it is not started yet */
4701
4702
	trx_start_if_not_started_low(trx);
4703
4704
	/* If there are locks on the table or some trx has invalidated the
4705
	cache up to our trx id, then ret = FALSE.
4706
	We do not check what type locks there are on the table, though only
4707
	IX type locks actually would require ret = FALSE. */
4708
4709
	if (UT_LIST_GET_LEN(table->locks) == 0
1819.9.31 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100623110659-pk5bqnmo0j7hj6md from MySQL InnoDB
4710
	    && trx->id >= table->query_cache_inv_trx_id) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4711
4712
		ret = TRUE;
4713
4714
		/* If the isolation level is high, assign a read view for the
4715
		transaction if it does not yet have one */
4716
4717
		if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
4718
		    && !trx->read_view) {
4719
4720
			trx->read_view = read_view_open_now(
4721
				trx->id, trx->global_read_view_heap);
4722
			trx->global_read_view = trx->read_view;
4723
		}
4724
	}
4725
4726
	mutex_exit(&kernel_mutex);
4727
4728
	return(ret);
4729
}
4730
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4731
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4732
Read the AUTOINC column from the current row. If the value is less than
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4733
0 and the type is not unsigned then we reset the value to 0.
4734
@return	value read from the column */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4735
static
4736
ib_uint64_t
4737
row_search_autoinc_read_column(
4738
/*===========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4739
	dict_index_t*	index,		/*!< in: index to read from */
4740
	const rec_t*	rec,		/*!< in: current rec */
4741
	ulint		col_no,		/*!< in: column number */
1819.5.161 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6352 from MySQL InnoDB
4742
	ulint		mtype,		/*!< in: column main type */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4743
	ibool		unsigned_type)	/*!< in: signed or unsigned flag */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4744
{
4745
	ulint		len;
4746
	const byte*	data;
4747
	ib_uint64_t	value;
4748
	mem_heap_t*	heap = NULL;
4749
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
4750
	ulint*		offsets	= offsets_;
4751
4752
	rec_offs_init(offsets_);
4753
4754
	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
4755
4756
	data = rec_get_nth_field(rec, offsets, col_no, &len);
4757
4758
	ut_a(len != UNIV_SQL_NULL);
1819.5.161 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6352 from MySQL InnoDB
4759
4760
	switch (mtype) {
4761
	case DATA_INT:
4762
		ut_a(len <= sizeof value);
4763
		value = mach_read_int_type(data, len, unsigned_type);
4764
		break;
4765
4766
	case DATA_FLOAT:
4767
		ut_a(len == sizeof(float));
1819.5.263 by mmakela
Original revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6919
4768
		value = (ib_uint64_t) mach_float_read(data);
1819.5.161 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6352 from MySQL InnoDB
4769
		break;
4770
4771
	case DATA_DOUBLE:
4772
		ut_a(len == sizeof(double));
1819.5.263 by mmakela
Original revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6919
4773
		value = (ib_uint64_t) mach_double_read(data);
1819.5.161 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6352 from MySQL InnoDB
4774
		break;
4775
4776
	default:
4777
		ut_error;
4778
	}
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4779
4780
	if (UNIV_LIKELY_NULL(heap)) {
4781
		mem_heap_free(heap);
4782
	}
4783
4784
	if (!unsigned_type && (ib_int64_t) value < 0) {
4785
		value = 0;
4786
	}
4787
4788
	return(value);
4789
}
4790
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4791
/*******************************************************************//**
4792
Get the last row.
4793
@return	current rec or NULL */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4794
static
4795
const rec_t*
4796
row_search_autoinc_get_rec(
4797
/*=======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4798
	btr_pcur_t*	pcur,		/*!< in: the current cursor */
4799
	mtr_t*		mtr)		/*!< in: mini transaction */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4800
{
4801
	do {
4802
		const rec_t* rec = btr_pcur_get_rec(pcur);
4803
4804
		if (page_rec_is_user_rec(rec)) {
4805
			return(rec);
4806
		}
4807
	} while (btr_pcur_move_to_prev(pcur, mtr));
4808
4809
	return(NULL);
4810
}
4811
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4812
/*******************************************************************//**
4813
Read the max AUTOINC value from an index.
4814
@return DB_SUCCESS if all OK else error code, DB_RECORD_NOT_FOUND if
4815
column name can't be found in index */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4816
UNIV_INTERN
4817
ulint
4818
row_search_max_autoinc(
4819
/*===================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4820
	dict_index_t*	index,		/*!< in: index to search */
4821
	const char*	col_name,	/*!< in: name of autoinc column */
4822
	ib_uint64_t*	value)		/*!< out: AUTOINC value read */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4823
{
4824
	ulint		i;
4825
	ulint		n_cols;
4826
	dict_field_t*	dfield = NULL;
4827
	ulint		error = DB_SUCCESS;
4828
4829
	n_cols = dict_index_get_n_ordering_defined_by_user(index);
4830
4831
	/* Search the index for the AUTOINC column name */
4832
	for (i = 0; i < n_cols; ++i) {
4833
		dfield = dict_index_get_nth_field(index, i);
4834
4835
		if (strcmp(col_name, dfield->name) == 0) {
4836
			break;
4837
		}
4838
	}
4839
4840
	*value = 0;
4841
4842
	/* Must find the AUTOINC column name */
4843
	if (i < n_cols && dfield) {
4844
		mtr_t		mtr;
4845
		btr_pcur_t	pcur;
4846
4847
		mtr_start(&mtr);
4848
4849
		/* Open at the high/right end (FALSE), and INIT
4850
		cursor (TRUE) */
4851
		btr_pcur_open_at_index_side(
4852
			FALSE, index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
4853
4854
		if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
4855
			const rec_t*	rec;
4856
4857
			rec = row_search_autoinc_get_rec(&pcur, &mtr);
4858
4859
			if (rec != NULL) {
4860
				ibool unsigned_type = (
4861
					dfield->col->prtype & DATA_UNSIGNED);
4862
4863
				*value = row_search_autoinc_read_column(
1819.5.161 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6352 from MySQL InnoDB
4864
					index, rec, i,
4865
					dfield->col->mtype, unsigned_type);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4866
			}
4867
		}
4868
4869
		btr_pcur_close(&pcur);
4870
4871
		mtr_commit(&mtr);
4872
	} else {
4873
		error = DB_RECORD_NOT_FOUND;
4874
	}
4875
4876
	return(error);
4877
}