~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/******************************************************
2
Cursor read
3
4
(c) 1997 Innobase Oy
5
6
Created 2/16/1997 Heikki Tuuri
7
*******************************************************/
8
9
#include "read0read.h"
10
11
#ifdef UNIV_NONINL
12
#include "read0read.ic"
13
#endif
14
15
#include "srv0srv.h"
16
#include "trx0sys.h"
17
18
/*
19
-------------------------------------------------------------------------------
20
FACT A: Cursor read view on a secondary index sees only committed versions
21
-------
22
of the records in the secondary index or those versions of rows created
23
by transaction which created a cursor before cursor was created even
24
if transaction which created the cursor has changed that clustered index page.
25
26
PROOF: We must show that read goes always to the clustered index record
27
to see that record is visible in the cursor read view. Consider e.g.
28
following table and SQL-clauses:
29
30
create table t1(a int not null, b int, primary key(a), index(b));
31
insert into t1 values (1,1),(2,2);
32
commit;
33
34
Now consider that we have a cursor for a query
35
36
select b from t1 where b >= 1;
37
38
This query will use secondary key on the table t1. Now after the first fetch
39
on this cursor if we do a update:
40
41
update t1 set b = 5 where b = 2;
42
43
Now second fetch of the cursor should not see record (2,5) instead it should
44
see record (2,2).
45
46
We also should show that if we have delete t1 where b = 5; we still
47
can see record (2,2).
48
49
When we access a secondary key record maximum transaction id is fetched
50
from this record and this trx_id is compared to up_limit_id in the view.
51
If trx_id in the record is greater or equal than up_limit_id in the view
52
cluster record is accessed.  Because trx_id of the creating
53
transaction is stored when this view was created to the list of
54
trx_ids not seen by this read view previous version of the
55
record is requested to be built. This is build using clustered record.
56
If the secondary key record is delete  marked it's corresponding
57
clustered record can be already be purged only if records
58
trx_id < low_limit_no. Purge can't remove any record deleted by a
59
transaction which was active when cursor was created. But, we still
60
may have a deleted secondary key record but no clustered record. But,
61
this is not a problem because this case is handled in
62
row_sel_get_clust_rec() function which is called
63
whenever we note that this read view does not see trx_id in the
64
record. Thus, we see correct version. Q. E. D.
65
66
-------------------------------------------------------------------------------
67
FACT B: Cursor read view on a clustered index sees only committed versions
68
-------
69
of the records in the clustered index or those versions of rows created
70
by transaction which created a cursor before cursor was created even
71
if transaction which created the cursor has changed that clustered index page.
72
73
PROOF:  Consider e.g.following table and SQL-clauses:
74
75
create table t1(a int not null, b int, primary key(a));
76
insert into t1 values (1),(2);
77
commit;
78
79
Now consider that we have a cursor for a query
80
81
select a from t1 where a >= 1;
82
83
This query will use clustered key on the table t1. Now after the first fetch
84
on this cursor if we do a update:
85
86
update t1 set a = 5 where a = 2;
87
88
Now second fetch of the cursor should not see record (5) instead it should
89
see record (2).
90
91
We also should show that if we have execute delete t1 where a = 5; after
92
the cursor is opened we still can see record (2).
93
94
When accessing clustered record we always check if this read view sees
95
trx_id stored to clustered record. By default we don't see any changes
96
if record trx_id >= low_limit_id i.e. change was made transaction
97
which started after transaction which created the cursor. If row
98
was changed by the future transaction a previous version of the
99
clustered record is created. Thus we see only committed version in
100
this case. We see all changes made by committed transactions i.e.
101
record trx_id < up_limit_id. In this case we don't need to do anything,
102
we already see correct version of the record. We don't see any changes
103
made by active transaction except creating transaction. We have stored
104
trx_id of creating transaction to list of trx_ids when this view was
105
created. Thus we can easily see if this record was changed by the
106
creating transaction. Because we already have clustered record we can
107
access roll_ptr. Using this roll_ptr we can fetch undo record.
108
We can now check that undo_no of the undo record is less than undo_no of the
109
trancaction which created a view when cursor was created. We see this
110
clustered record only in case when record undo_no is less than undo_no
111
in the view. If this is not true we build based on undo_rec previous
112
version of the record. This record is found because purge can't remove
113
records accessed by active transaction. Thus we see correct version. Q. E. D.
114
-------------------------------------------------------------------------------
115
FACT C: Purge does not remove any delete marked row that is visible
116
-------
117
to cursor view.
118
119
TODO: proof this
120
121
*/
122
123
/*************************************************************************
124
Creates a read view object. */
125
UNIV_INLINE
126
read_view_t*
127
read_view_create_low(
128
/*=================*/
129
				/* out, own: read view struct */
130
	ulint		n,	/* in: number of cells in the trx_ids array */
131
	mem_heap_t*	heap)	/* in: memory heap from which allocated */
132
{
133
	read_view_t*	view;
134
135
	view = mem_heap_alloc(heap, sizeof(read_view_t));
136
137
	view->n_trx_ids = n;
138
	view->trx_ids = mem_heap_alloc(heap, n * sizeof(dulint));
139
140
	return(view);
141
}
142
143
/*************************************************************************
144
Makes a copy of the oldest existing read view, with the exception that also
145
the creating trx of the oldest view is set as not visible in the 'copied'
146
view. Opens a new view if no views currently exist. The view must be closed
147
with ..._close. This is used in purge. */
148
149
read_view_t*
150
read_view_oldest_copy_or_open_new(
151
/*==============================*/
152
					/* out, own: read view struct */
153
	dulint		cr_trx_id,	/* in: trx_id of creating
154
					transaction, or (0, 0) used in purge*/
155
	mem_heap_t*	heap)		/* in: memory heap from which
156
					allocated */
157
{
158
	read_view_t*	old_view;
159
	read_view_t*	view_copy;
160
	ibool		needs_insert	= TRUE;
161
	ulint		insert_done	= 0;
162
	ulint		n;
163
	ulint		i;
164
165
	ut_ad(mutex_own(&kernel_mutex));
166
167
	old_view = UT_LIST_GET_LAST(trx_sys->view_list);
168
169
	if (old_view == NULL) {
170
171
		return(read_view_open_now(cr_trx_id, heap));
172
	}
173
174
	n = old_view->n_trx_ids;
175
176
	if (ut_dulint_cmp(old_view->creator_trx_id,
177
			  ut_dulint_create(0,0)) != 0) {
178
		n++;
179
	} else {
180
		needs_insert = FALSE;
181
	}
182
183
	view_copy = read_view_create_low(n, heap);
184
185
	/* Insert the id of the creator in the right place of the descending
186
	array of ids, if needs_insert is TRUE: */
187
188
	i = 0;
189
	while (i < n) {
190
		if (needs_insert
191
		    && (i >= old_view->n_trx_ids
192
			|| ut_dulint_cmp(old_view->creator_trx_id,
193
					 read_view_get_nth_trx_id(old_view, i))
194
			> 0)) {
195
196
			read_view_set_nth_trx_id(view_copy, i,
197
						 old_view->creator_trx_id);
198
			needs_insert = FALSE;
199
			insert_done = 1;
200
		} else {
201
			read_view_set_nth_trx_id(view_copy, i,
202
						 read_view_get_nth_trx_id(
203
							 old_view,
204
							 i - insert_done));
205
		}
206
207
		i++;
208
	}
209
210
	view_copy->creator_trx_id = cr_trx_id;
211
212
	view_copy->low_limit_no = old_view->low_limit_no;
213
	view_copy->low_limit_id = old_view->low_limit_id;
214
215
	view_copy->can_be_too_old = FALSE;
216
217
	if (n > 0) {
218
		/* The last active transaction has the smallest id: */
219
		view_copy->up_limit_id = read_view_get_nth_trx_id(
220
			view_copy, n - 1);
221
	} else {
222
		view_copy->up_limit_id = old_view->up_limit_id;
223
	}
224
225
	UT_LIST_ADD_LAST(view_list, trx_sys->view_list, view_copy);
226
227
	return(view_copy);
228
}
229
230
/*************************************************************************
231
Opens a read view where exactly the transactions serialized before this
232
point in time are seen in the view. */
233
234
read_view_t*
235
read_view_open_now(
236
/*===============*/
237
					/* out, own: read view struct */
238
	dulint		cr_trx_id,	/* in: trx_id of creating
239
					transaction, or (0, 0) used in
240
					purge */
241
	mem_heap_t*	heap)		/* in: memory heap from which
242
					allocated */
243
{
244
	read_view_t*	view;
245
	trx_t*		trx;
246
	ulint		n;
247
248
	ut_ad(mutex_own(&kernel_mutex));
249
250
	view = read_view_create_low(UT_LIST_GET_LEN(trx_sys->trx_list), heap);
251
252
	view->creator_trx_id = cr_trx_id;
253
	view->type = VIEW_NORMAL;
254
	view->undo_no = ut_dulint_create(0, 0);
255
256
	/* No future transactions should be visible in the view */
257
258
	view->low_limit_no = trx_sys->max_trx_id;
259
	view->low_limit_id = view->low_limit_no;
260
261
	view->can_be_too_old = FALSE;
262
263
	n = 0;
264
	trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
265
266
	/* No active transaction should be visible, except cr_trx */
267
268
	while (trx) {
269
		if (ut_dulint_cmp(trx->id, cr_trx_id) != 0
270
		    && (trx->conc_state == TRX_ACTIVE
271
			|| trx->conc_state == TRX_PREPARED)) {
272
273
			read_view_set_nth_trx_id(view, n, trx->id);
274
275
			n++;
276
277
			/* NOTE that a transaction whose trx number is <
278
			trx_sys->max_trx_id can still be active, if it is
279
			in the middle of its commit! Note that when a
280
			transaction starts, we initialize trx->no to
281
			ut_dulint_max. */
282
283
			if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) {
284
285
				view->low_limit_no = trx->no;
286
			}
287
		}
288
289
		trx = UT_LIST_GET_NEXT(trx_list, trx);
290
	}
291
292
	view->n_trx_ids = n;
293
294
	if (n > 0) {
295
		/* The last active transaction has the smallest id: */
296
		view->up_limit_id = read_view_get_nth_trx_id(view, n - 1);
297
	} else {
298
		view->up_limit_id = view->low_limit_id;
299
	}
300
301
302
	UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);
303
304
	return(view);
305
}
306
307
/*************************************************************************
308
Closes a read view. */
309
310
void
311
read_view_close(
312
/*============*/
313
	read_view_t*	view)	/* in: read view */
314
{
315
	ut_ad(mutex_own(&kernel_mutex));
316
317
	UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
318
}
319
320
/*************************************************************************
321
Closes a consistent read view for MySQL. This function is called at an SQL
322
statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
323
324
void
325
read_view_close_for_mysql(
326
/*======================*/
327
	trx_t*	trx)	/* in: trx which has a read view */
328
{
329
	ut_a(trx->global_read_view);
330
331
	mutex_enter(&kernel_mutex);
332
333
	read_view_close(trx->global_read_view);
334
335
	mem_heap_empty(trx->global_read_view_heap);
336
337
	trx->read_view = NULL;
338
	trx->global_read_view = NULL;
339
340
	mutex_exit(&kernel_mutex);
341
}
342
343
/*************************************************************************
344
Prints a read view to stderr. */
345
346
void
347
read_view_print(
348
/*============*/
349
	read_view_t*	view)	/* in: read view */
350
{
351
	ulint	n_ids;
352
	ulint	i;
353
354
	if (view->type == VIEW_HIGH_GRANULARITY) {
355
		fprintf(stderr,
356
			"High-granularity read view undo_n:o %lu %lu\n",
357
			(ulong) ut_dulint_get_high(view->undo_no),
358
			(ulong) ut_dulint_get_low(view->undo_no));
359
	} else {
360
		fprintf(stderr, "Normal read view\n");
361
	}
362
363
	fprintf(stderr, "Read view low limit trx n:o %lu %lu\n",
364
		(ulong) ut_dulint_get_high(view->low_limit_no),
365
		(ulong) ut_dulint_get_low(view->low_limit_no));
366
367
	fprintf(stderr, "Read view up limit trx id %lu %lu\n",
368
		(ulong) ut_dulint_get_high(view->up_limit_id),
369
		(ulong) ut_dulint_get_low(view->up_limit_id));
370
371
	fprintf(stderr, "Read view low limit trx id %lu %lu\n",
372
		(ulong) ut_dulint_get_high(view->low_limit_id),
373
		(ulong) ut_dulint_get_low(view->low_limit_id));
374
375
	fprintf(stderr, "Read view individually stored trx ids:\n");
376
377
	n_ids = view->n_trx_ids;
378
379
	for (i = 0; i < n_ids; i++) {
380
		fprintf(stderr, "Read view trx id %lu %lu\n",
381
			(ulong) ut_dulint_get_high(
382
				read_view_get_nth_trx_id(view, i)),
383
			(ulong) ut_dulint_get_low(
384
				read_view_get_nth_trx_id(view, i)));
385
	}
386
}
387
388
/*************************************************************************
389
Create a high-granularity consistent cursor view for mysql to be used
390
in cursors. In this consistent read view modifications done by the
391
creating transaction after the cursor is created or future transactions
392
are not visible. */
393
394
cursor_view_t*
395
read_cursor_view_create_for_mysql(
396
/*==============================*/
397
	trx_t*	cr_trx)	/* in: trx where cursor view is created */
398
{
399
	cursor_view_t*	curview;
400
	read_view_t*	view;
401
	mem_heap_t*	heap;
402
	trx_t*		trx;
403
	ulint		n;
404
405
	ut_a(cr_trx);
406
407
	/* Use larger heap than in trx_create when creating a read_view
408
	because cursors are quite long. */
409
410
	heap = mem_heap_create(512);
411
412
	curview = (cursor_view_t*) mem_heap_alloc(heap, sizeof(cursor_view_t));
413
	curview->heap = heap;
414
415
	/* Drop cursor tables from consideration when evaluating the need of
416
	auto-commit */
417
	curview->n_mysql_tables_in_use = cr_trx->n_mysql_tables_in_use;
418
	cr_trx->n_mysql_tables_in_use = 0;
419
420
	mutex_enter(&kernel_mutex);
421
422
	curview->read_view = read_view_create_low(
423
		UT_LIST_GET_LEN(trx_sys->trx_list), curview->heap);
424
425
	view = curview->read_view;
426
	view->creator_trx_id = cr_trx->id;
427
	view->type = VIEW_HIGH_GRANULARITY;
428
	view->undo_no = cr_trx->undo_no;
429
430
	/* No future transactions should be visible in the view */
431
432
	view->low_limit_no = trx_sys->max_trx_id;
433
	view->low_limit_id = view->low_limit_no;
434
435
	view->can_be_too_old = FALSE;
436
437
	n = 0;
438
	trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
439
440
	/* No active transaction should be visible */
441
442
	while (trx) {
443
444
		if (trx->conc_state == TRX_ACTIVE
445
		    || trx->conc_state == TRX_PREPARED) {
446
447
			read_view_set_nth_trx_id(view, n, trx->id);
448
449
			n++;
450
451
			/* NOTE that a transaction whose trx number is <
452
			trx_sys->max_trx_id can still be active, if it is
453
			in the middle of its commit! Note that when a
454
			transaction starts, we initialize trx->no to
455
			ut_dulint_max. */
456
457
			if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) {
458
459
				view->low_limit_no = trx->no;
460
			}
461
		}
462
463
		trx = UT_LIST_GET_NEXT(trx_list, trx);
464
	}
465
466
	view->n_trx_ids = n;
467
468
	if (n > 0) {
469
		/* The last active transaction has the smallest id: */
470
		view->up_limit_id = read_view_get_nth_trx_id(view, n - 1);
471
	} else {
472
		view->up_limit_id = view->low_limit_id;
473
	}
474
475
	UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);
476
477
	mutex_exit(&kernel_mutex);
478
479
	return(curview);
480
}
481
482
/*************************************************************************
483
Close a given consistent cursor view for mysql and restore global read view
484
back to a transaction read view. */
485
486
void
487
read_cursor_view_close_for_mysql(
488
/*=============================*/
489
	trx_t*		trx,	/* in: trx */
490
	cursor_view_t*	curview)/* in: cursor view to be closed */
491
{
492
	ut_a(curview);
493
	ut_a(curview->read_view);
494
	ut_a(curview->heap);
495
496
	/* Add cursor's tables to the global count of active tables that
497
	belong to this transaction */
498
	trx->n_mysql_tables_in_use += curview->n_mysql_tables_in_use;
499
500
	mutex_enter(&kernel_mutex);
501
502
	read_view_close(curview->read_view);
503
	trx->read_view = trx->global_read_view;
504
505
	mutex_exit(&kernel_mutex);
506
507
	mem_heap_free(curview->heap);
508
}
509
510
/*************************************************************************
511
This function sets a given consistent cursor view to a transaction
512
read view if given consistent cursor view is not NULL. Otherwise, function
513
restores a global read view to a transaction read view. */
514
515
void
516
read_cursor_set_for_mysql(
517
/*======================*/
518
	trx_t*		trx,	/* in: transaction where cursor is set */
519
	cursor_view_t*	curview)/* in: consistent cursor view to be set */
520
{
521
	ut_a(trx);
522
523
	mutex_enter(&kernel_mutex);
524
525
	if (UNIV_LIKELY(curview != NULL)) {
526
		trx->read_view = curview->read_view;
527
	} else {
528
		trx->read_view = trx->global_read_view;
529
	}
530
531
	mutex_exit(&kernel_mutex);
532
}