1
by brian
clean slate |
1 |
/******************************************************
|
2 |
Cursor read
|
|
3 |
||
4 |
(c) 1997 Innobase Oy
|
|
5 |
||
6 |
Created 2/16/1997 Heikki Tuuri
|
|
7 |
*******************************************************/
|
|
8 |
||
9 |
#include "read0read.h" |
|
10 |
||
11 |
#ifdef UNIV_NONINL
|
|
12 |
#include "read0read.ic" |
|
13 |
#endif
|
|
14 |
||
15 |
#include "srv0srv.h" |
|
16 |
#include "trx0sys.h" |
|
17 |
||
18 |
/*
|
|
19 |
-------------------------------------------------------------------------------
|
|
20 |
FACT A: Cursor read view on a secondary index sees only committed versions
|
|
21 |
-------
|
|
22 |
of the records in the secondary index or those versions of rows created
|
|
23 |
by transaction which created a cursor before cursor was created even
|
|
24 |
if transaction which created the cursor has changed that clustered index page.
|
|
25 |
||
26 |
PROOF: We must show that read goes always to the clustered index record
|
|
27 |
to see that record is visible in the cursor read view. Consider e.g.
|
|
28 |
following table and SQL-clauses:
|
|
29 |
||
30 |
create table t1(a int not null, b int, primary key(a), index(b));
|
|
31 |
insert into t1 values (1,1),(2,2);
|
|
32 |
commit;
|
|
33 |
||
34 |
Now consider that we have a cursor for a query
|
|
35 |
||
36 |
select b from t1 where b >= 1;
|
|
37 |
||
38 |
This query will use secondary key on the table t1. Now after the first fetch
|
|
39 |
on this cursor if we do a update:
|
|
40 |
||
41 |
update t1 set b = 5 where b = 2;
|
|
42 |
||
43 |
Now second fetch of the cursor should not see record (2,5) instead it should
|
|
44 |
see record (2,2).
|
|
45 |
||
46 |
We also should show that if we have delete t1 where b = 5; we still
|
|
47 |
can see record (2,2).
|
|
48 |
||
49 |
When we access a secondary key record maximum transaction id is fetched
|
|
50 |
from this record and this trx_id is compared to up_limit_id in the view.
|
|
51 |
If trx_id in the record is greater or equal than up_limit_id in the view
|
|
52 |
cluster record is accessed. Because trx_id of the creating
|
|
53 |
transaction is stored when this view was created to the list of
|
|
54 |
trx_ids not seen by this read view previous version of the
|
|
55 |
record is requested to be built. This is build using clustered record.
|
|
56 |
If the secondary key record is delete marked it's corresponding
|
|
57 |
clustered record can be already be purged only if records
|
|
58 |
trx_id < low_limit_no. Purge can't remove any record deleted by a
|
|
59 |
transaction which was active when cursor was created. But, we still
|
|
60 |
may have a deleted secondary key record but no clustered record. But,
|
|
61 |
this is not a problem because this case is handled in
|
|
62 |
row_sel_get_clust_rec() function which is called
|
|
63 |
whenever we note that this read view does not see trx_id in the
|
|
64 |
record. Thus, we see correct version. Q. E. D.
|
|
65 |
||
66 |
-------------------------------------------------------------------------------
|
|
67 |
FACT B: Cursor read view on a clustered index sees only committed versions
|
|
68 |
-------
|
|
69 |
of the records in the clustered index or those versions of rows created
|
|
70 |
by transaction which created a cursor before cursor was created even
|
|
71 |
if transaction which created the cursor has changed that clustered index page.
|
|
72 |
||
73 |
PROOF: Consider e.g.following table and SQL-clauses:
|
|
74 |
||
75 |
create table t1(a int not null, b int, primary key(a));
|
|
76 |
insert into t1 values (1),(2);
|
|
77 |
commit;
|
|
78 |
||
79 |
Now consider that we have a cursor for a query
|
|
80 |
||
81 |
select a from t1 where a >= 1;
|
|
82 |
||
83 |
This query will use clustered key on the table t1. Now after the first fetch
|
|
84 |
on this cursor if we do a update:
|
|
85 |
||
86 |
update t1 set a = 5 where a = 2;
|
|
87 |
||
88 |
Now second fetch of the cursor should not see record (5) instead it should
|
|
89 |
see record (2).
|
|
90 |
||
91 |
We also should show that if we have execute delete t1 where a = 5; after
|
|
92 |
the cursor is opened we still can see record (2).
|
|
93 |
||
94 |
When accessing clustered record we always check if this read view sees
|
|
95 |
trx_id stored to clustered record. By default we don't see any changes
|
|
96 |
if record trx_id >= low_limit_id i.e. change was made transaction
|
|
97 |
which started after transaction which created the cursor. If row
|
|
98 |
was changed by the future transaction a previous version of the
|
|
99 |
clustered record is created. Thus we see only committed version in
|
|
100 |
this case. We see all changes made by committed transactions i.e.
|
|
101 |
record trx_id < up_limit_id. In this case we don't need to do anything,
|
|
102 |
we already see correct version of the record. We don't see any changes
|
|
103 |
made by active transaction except creating transaction. We have stored
|
|
104 |
trx_id of creating transaction to list of trx_ids when this view was
|
|
105 |
created. Thus we can easily see if this record was changed by the
|
|
106 |
creating transaction. Because we already have clustered record we can
|
|
107 |
access roll_ptr. Using this roll_ptr we can fetch undo record.
|
|
108 |
We can now check that undo_no of the undo record is less than undo_no of the
|
|
109 |
trancaction which created a view when cursor was created. We see this
|
|
110 |
clustered record only in case when record undo_no is less than undo_no
|
|
111 |
in the view. If this is not true we build based on undo_rec previous
|
|
112 |
version of the record. This record is found because purge can't remove
|
|
113 |
records accessed by active transaction. Thus we see correct version. Q. E. D.
|
|
114 |
-------------------------------------------------------------------------------
|
|
115 |
FACT C: Purge does not remove any delete marked row that is visible
|
|
116 |
-------
|
|
117 |
to cursor view.
|
|
118 |
||
119 |
TODO: proof this
|
|
120 |
||
121 |
*/
|
|
122 |
||
123 |
/*************************************************************************
|
|
124 |
Creates a read view object. */
|
|
125 |
UNIV_INLINE
|
|
126 |
read_view_t* |
|
127 |
read_view_create_low( |
|
128 |
/*=================*/
|
|
129 |
/* out, own: read view struct */
|
|
130 |
ulint n, /* in: number of cells in the trx_ids array */ |
|
131 |
mem_heap_t* heap) /* in: memory heap from which allocated */ |
|
132 |
{
|
|
133 |
read_view_t* view; |
|
134 |
||
135 |
view = mem_heap_alloc(heap, sizeof(read_view_t)); |
|
136 |
||
137 |
view->n_trx_ids = n; |
|
138 |
view->trx_ids = mem_heap_alloc(heap, n * sizeof(dulint)); |
|
139 |
||
140 |
return(view); |
|
141 |
}
|
|
142 |
||
143 |
/*************************************************************************
|
|
144 |
Makes a copy of the oldest existing read view, with the exception that also
|
|
145 |
the creating trx of the oldest view is set as not visible in the 'copied'
|
|
146 |
view. Opens a new view if no views currently exist. The view must be closed
|
|
147 |
with ..._close. This is used in purge. */
|
|
148 |
||
149 |
read_view_t* |
|
150 |
read_view_oldest_copy_or_open_new( |
|
151 |
/*==============================*/
|
|
152 |
/* out, own: read view struct */
|
|
153 |
dulint cr_trx_id, /* in: trx_id of creating |
|
154 |
transaction, or (0, 0) used in purge*/
|
|
155 |
mem_heap_t* heap) /* in: memory heap from which |
|
156 |
allocated */
|
|
157 |
{
|
|
158 |
read_view_t* old_view; |
|
159 |
read_view_t* view_copy; |
|
160 |
ibool needs_insert = TRUE; |
|
161 |
ulint insert_done = 0; |
|
162 |
ulint n; |
|
163 |
ulint i; |
|
164 |
||
165 |
ut_ad(mutex_own(&kernel_mutex)); |
|
166 |
||
167 |
old_view = UT_LIST_GET_LAST(trx_sys->view_list); |
|
168 |
||
169 |
if (old_view == NULL) { |
|
170 |
||
171 |
return(read_view_open_now(cr_trx_id, heap)); |
|
172 |
}
|
|
173 |
||
174 |
n = old_view->n_trx_ids; |
|
175 |
||
176 |
if (ut_dulint_cmp(old_view->creator_trx_id, |
|
177 |
ut_dulint_create(0,0)) != 0) { |
|
178 |
n++; |
|
179 |
} else { |
|
180 |
needs_insert = FALSE; |
|
181 |
}
|
|
182 |
||
183 |
view_copy = read_view_create_low(n, heap); |
|
184 |
||
185 |
/* Insert the id of the creator in the right place of the descending
|
|
186 |
array of ids, if needs_insert is TRUE: */
|
|
187 |
||
188 |
i = 0; |
|
189 |
while (i < n) { |
|
190 |
if (needs_insert |
|
191 |
&& (i >= old_view->n_trx_ids |
|
192 |
|| ut_dulint_cmp(old_view->creator_trx_id, |
|
193 |
read_view_get_nth_trx_id(old_view, i)) |
|
194 |
> 0)) { |
|
195 |
||
196 |
read_view_set_nth_trx_id(view_copy, i, |
|
197 |
old_view->creator_trx_id); |
|
198 |
needs_insert = FALSE; |
|
199 |
insert_done = 1; |
|
200 |
} else { |
|
201 |
read_view_set_nth_trx_id(view_copy, i, |
|
202 |
read_view_get_nth_trx_id( |
|
203 |
old_view, |
|
204 |
i - insert_done)); |
|
205 |
}
|
|
206 |
||
207 |
i++; |
|
208 |
}
|
|
209 |
||
210 |
view_copy->creator_trx_id = cr_trx_id; |
|
211 |
||
212 |
view_copy->low_limit_no = old_view->low_limit_no; |
|
213 |
view_copy->low_limit_id = old_view->low_limit_id; |
|
214 |
||
215 |
view_copy->can_be_too_old = FALSE; |
|
216 |
||
217 |
if (n > 0) { |
|
218 |
/* The last active transaction has the smallest id: */
|
|
219 |
view_copy->up_limit_id = read_view_get_nth_trx_id( |
|
220 |
view_copy, n - 1); |
|
221 |
} else { |
|
222 |
view_copy->up_limit_id = old_view->up_limit_id; |
|
223 |
}
|
|
224 |
||
225 |
UT_LIST_ADD_LAST(view_list, trx_sys->view_list, view_copy); |
|
226 |
||
227 |
return(view_copy); |
|
228 |
}
|
|
229 |
||
230 |
/*************************************************************************
|
|
231 |
Opens a read view where exactly the transactions serialized before this
|
|
232 |
point in time are seen in the view. */
|
|
233 |
||
234 |
read_view_t* |
|
235 |
read_view_open_now( |
|
236 |
/*===============*/
|
|
237 |
/* out, own: read view struct */
|
|
238 |
dulint cr_trx_id, /* in: trx_id of creating |
|
239 |
transaction, or (0, 0) used in
|
|
240 |
purge */
|
|
241 |
mem_heap_t* heap) /* in: memory heap from which |
|
242 |
allocated */
|
|
243 |
{
|
|
244 |
read_view_t* view; |
|
245 |
trx_t* trx; |
|
246 |
ulint n; |
|
247 |
||
248 |
ut_ad(mutex_own(&kernel_mutex)); |
|
249 |
||
250 |
view = read_view_create_low(UT_LIST_GET_LEN(trx_sys->trx_list), heap); |
|
251 |
||
252 |
view->creator_trx_id = cr_trx_id; |
|
253 |
view->type = VIEW_NORMAL; |
|
254 |
view->undo_no = ut_dulint_create(0, 0); |
|
255 |
||
256 |
/* No future transactions should be visible in the view */
|
|
257 |
||
258 |
view->low_limit_no = trx_sys->max_trx_id; |
|
259 |
view->low_limit_id = view->low_limit_no; |
|
260 |
||
261 |
view->can_be_too_old = FALSE; |
|
262 |
||
263 |
n = 0; |
|
264 |
trx = UT_LIST_GET_FIRST(trx_sys->trx_list); |
|
265 |
||
266 |
/* No active transaction should be visible, except cr_trx */
|
|
267 |
||
268 |
while (trx) { |
|
269 |
if (ut_dulint_cmp(trx->id, cr_trx_id) != 0 |
|
270 |
&& (trx->conc_state == TRX_ACTIVE |
|
271 |
|| trx->conc_state == TRX_PREPARED)) { |
|
272 |
||
273 |
read_view_set_nth_trx_id(view, n, trx->id); |
|
274 |
||
275 |
n++; |
|
276 |
||
277 |
/* NOTE that a transaction whose trx number is <
|
|
278 |
trx_sys->max_trx_id can still be active, if it is
|
|
279 |
in the middle of its commit! Note that when a
|
|
280 |
transaction starts, we initialize trx->no to
|
|
281 |
ut_dulint_max. */
|
|
282 |
||
283 |
if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) { |
|
284 |
||
285 |
view->low_limit_no = trx->no; |
|
286 |
}
|
|
287 |
}
|
|
288 |
||
289 |
trx = UT_LIST_GET_NEXT(trx_list, trx); |
|
290 |
}
|
|
291 |
||
292 |
view->n_trx_ids = n; |
|
293 |
||
294 |
if (n > 0) { |
|
295 |
/* The last active transaction has the smallest id: */
|
|
296 |
view->up_limit_id = read_view_get_nth_trx_id(view, n - 1); |
|
297 |
} else { |
|
298 |
view->up_limit_id = view->low_limit_id; |
|
299 |
}
|
|
300 |
||
301 |
||
302 |
UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view); |
|
303 |
||
304 |
return(view); |
|
305 |
}
|
|
306 |
||
307 |
/*************************************************************************
|
|
308 |
Closes a read view. */
|
|
309 |
||
310 |
void
|
|
311 |
read_view_close( |
|
312 |
/*============*/
|
|
313 |
read_view_t* view) /* in: read view */ |
|
314 |
{
|
|
315 |
ut_ad(mutex_own(&kernel_mutex)); |
|
316 |
||
317 |
UT_LIST_REMOVE(view_list, trx_sys->view_list, view); |
|
318 |
}
|
|
319 |
||
320 |
/*************************************************************************
|
|
321 |
Closes a consistent read view for MySQL. This function is called at an SQL
|
|
322 |
statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
|
|
323 |
||
324 |
void
|
|
325 |
read_view_close_for_mysql( |
|
326 |
/*======================*/
|
|
327 |
trx_t* trx) /* in: trx which has a read view */ |
|
328 |
{
|
|
329 |
ut_a(trx->global_read_view); |
|
330 |
||
331 |
mutex_enter(&kernel_mutex); |
|
332 |
||
333 |
read_view_close(trx->global_read_view); |
|
334 |
||
335 |
mem_heap_empty(trx->global_read_view_heap); |
|
336 |
||
337 |
trx->read_view = NULL; |
|
338 |
trx->global_read_view = NULL; |
|
339 |
||
340 |
mutex_exit(&kernel_mutex); |
|
341 |
}
|
|
342 |
||
343 |
/*************************************************************************
|
|
344 |
Prints a read view to stderr. */
|
|
345 |
||
346 |
void
|
|
347 |
read_view_print( |
|
348 |
/*============*/
|
|
349 |
read_view_t* view) /* in: read view */ |
|
350 |
{
|
|
351 |
ulint n_ids; |
|
352 |
ulint i; |
|
353 |
||
354 |
if (view->type == VIEW_HIGH_GRANULARITY) { |
|
355 |
fprintf(stderr, |
|
356 |
"High-granularity read view undo_n:o %lu %lu\n", |
|
357 |
(ulong) ut_dulint_get_high(view->undo_no), |
|
358 |
(ulong) ut_dulint_get_low(view->undo_no)); |
|
359 |
} else { |
|
360 |
fprintf(stderr, "Normal read view\n"); |
|
361 |
}
|
|
362 |
||
363 |
fprintf(stderr, "Read view low limit trx n:o %lu %lu\n", |
|
364 |
(ulong) ut_dulint_get_high(view->low_limit_no), |
|
365 |
(ulong) ut_dulint_get_low(view->low_limit_no)); |
|
366 |
||
367 |
fprintf(stderr, "Read view up limit trx id %lu %lu\n", |
|
368 |
(ulong) ut_dulint_get_high(view->up_limit_id), |
|
369 |
(ulong) ut_dulint_get_low(view->up_limit_id)); |
|
370 |
||
371 |
fprintf(stderr, "Read view low limit trx id %lu %lu\n", |
|
372 |
(ulong) ut_dulint_get_high(view->low_limit_id), |
|
373 |
(ulong) ut_dulint_get_low(view->low_limit_id)); |
|
374 |
||
375 |
fprintf(stderr, "Read view individually stored trx ids:\n"); |
|
376 |
||
377 |
n_ids = view->n_trx_ids; |
|
378 |
||
379 |
for (i = 0; i < n_ids; i++) { |
|
380 |
fprintf(stderr, "Read view trx id %lu %lu\n", |
|
381 |
(ulong) ut_dulint_get_high( |
|
382 |
read_view_get_nth_trx_id(view, i)), |
|
383 |
(ulong) ut_dulint_get_low( |
|
384 |
read_view_get_nth_trx_id(view, i))); |
|
385 |
}
|
|
386 |
}
|
|
387 |
||
388 |
/*************************************************************************
|
|
389 |
Create a high-granularity consistent cursor view for mysql to be used
|
|
390 |
in cursors. In this consistent read view modifications done by the
|
|
391 |
creating transaction after the cursor is created or future transactions
|
|
392 |
are not visible. */
|
|
393 |
||
394 |
cursor_view_t* |
|
395 |
read_cursor_view_create_for_mysql( |
|
396 |
/*==============================*/
|
|
397 |
trx_t* cr_trx) /* in: trx where cursor view is created */ |
|
398 |
{
|
|
399 |
cursor_view_t* curview; |
|
400 |
read_view_t* view; |
|
401 |
mem_heap_t* heap; |
|
402 |
trx_t* trx; |
|
403 |
ulint n; |
|
404 |
||
405 |
ut_a(cr_trx); |
|
406 |
||
407 |
/* Use larger heap than in trx_create when creating a read_view
|
|
408 |
because cursors are quite long. */
|
|
409 |
||
410 |
heap = mem_heap_create(512); |
|
411 |
||
412 |
curview = (cursor_view_t*) mem_heap_alloc(heap, sizeof(cursor_view_t)); |
|
413 |
curview->heap = heap; |
|
414 |
||
415 |
/* Drop cursor tables from consideration when evaluating the need of
|
|
416 |
auto-commit */
|
|
417 |
curview->n_mysql_tables_in_use = cr_trx->n_mysql_tables_in_use; |
|
418 |
cr_trx->n_mysql_tables_in_use = 0; |
|
419 |
||
420 |
mutex_enter(&kernel_mutex); |
|
421 |
||
422 |
curview->read_view = read_view_create_low( |
|
423 |
UT_LIST_GET_LEN(trx_sys->trx_list), curview->heap); |
|
424 |
||
425 |
view = curview->read_view; |
|
426 |
view->creator_trx_id = cr_trx->id; |
|
427 |
view->type = VIEW_HIGH_GRANULARITY; |
|
428 |
view->undo_no = cr_trx->undo_no; |
|
429 |
||
430 |
/* No future transactions should be visible in the view */
|
|
431 |
||
432 |
view->low_limit_no = trx_sys->max_trx_id; |
|
433 |
view->low_limit_id = view->low_limit_no; |
|
434 |
||
435 |
view->can_be_too_old = FALSE; |
|
436 |
||
437 |
n = 0; |
|
438 |
trx = UT_LIST_GET_FIRST(trx_sys->trx_list); |
|
439 |
||
440 |
/* No active transaction should be visible */
|
|
441 |
||
442 |
while (trx) { |
|
443 |
||
444 |
if (trx->conc_state == TRX_ACTIVE |
|
445 |
|| trx->conc_state == TRX_PREPARED) { |
|
446 |
||
447 |
read_view_set_nth_trx_id(view, n, trx->id); |
|
448 |
||
449 |
n++; |
|
450 |
||
451 |
/* NOTE that a transaction whose trx number is <
|
|
452 |
trx_sys->max_trx_id can still be active, if it is
|
|
453 |
in the middle of its commit! Note that when a
|
|
454 |
transaction starts, we initialize trx->no to
|
|
455 |
ut_dulint_max. */
|
|
456 |
||
457 |
if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) { |
|
458 |
||
459 |
view->low_limit_no = trx->no; |
|
460 |
}
|
|
461 |
}
|
|
462 |
||
463 |
trx = UT_LIST_GET_NEXT(trx_list, trx); |
|
464 |
}
|
|
465 |
||
466 |
view->n_trx_ids = n; |
|
467 |
||
468 |
if (n > 0) { |
|
469 |
/* The last active transaction has the smallest id: */
|
|
470 |
view->up_limit_id = read_view_get_nth_trx_id(view, n - 1); |
|
471 |
} else { |
|
472 |
view->up_limit_id = view->low_limit_id; |
|
473 |
}
|
|
474 |
||
475 |
UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view); |
|
476 |
||
477 |
mutex_exit(&kernel_mutex); |
|
478 |
||
479 |
return(curview); |
|
480 |
}
|
|
481 |
||
482 |
/*************************************************************************
|
|
483 |
Close a given consistent cursor view for mysql and restore global read view
|
|
484 |
back to a transaction read view. */
|
|
485 |
||
486 |
void
|
|
487 |
read_cursor_view_close_for_mysql( |
|
488 |
/*=============================*/
|
|
489 |
trx_t* trx, /* in: trx */ |
|
490 |
cursor_view_t* curview)/* in: cursor view to be closed */ |
|
491 |
{
|
|
492 |
ut_a(curview); |
|
493 |
ut_a(curview->read_view); |
|
494 |
ut_a(curview->heap); |
|
495 |
||
496 |
/* Add cursor's tables to the global count of active tables that
|
|
497 |
belong to this transaction */
|
|
498 |
trx->n_mysql_tables_in_use += curview->n_mysql_tables_in_use; |
|
499 |
||
500 |
mutex_enter(&kernel_mutex); |
|
501 |
||
502 |
read_view_close(curview->read_view); |
|
503 |
trx->read_view = trx->global_read_view; |
|
504 |
||
505 |
mutex_exit(&kernel_mutex); |
|
506 |
||
507 |
mem_heap_free(curview->heap); |
|
508 |
}
|
|
509 |
||
510 |
/*************************************************************************
|
|
511 |
This function sets a given consistent cursor view to a transaction
|
|
512 |
read view if given consistent cursor view is not NULL. Otherwise, function
|
|
513 |
restores a global read view to a transaction read view. */
|
|
514 |
||
515 |
void
|
|
516 |
read_cursor_set_for_mysql( |
|
517 |
/*======================*/
|
|
518 |
trx_t* trx, /* in: transaction where cursor is set */ |
|
519 |
cursor_view_t* curview)/* in: consistent cursor view to be set */ |
|
520 |
{
|
|
521 |
ut_a(trx); |
|
522 |
||
523 |
mutex_enter(&kernel_mutex); |
|
524 |
||
525 |
if (UNIV_LIKELY(curview != NULL)) { |
|
526 |
trx->read_view = curview->read_view; |
|
527 |
} else { |
|
528 |
trx->read_view = trx->global_read_view; |
|
529 |
}
|
|
530 |
||
531 |
mutex_exit(&kernel_mutex); |
|
532 |
}
|