1
/******************************************************
6
Created 2/6/1997 Heikki Tuuri
7
*******************************************************/
12
#include "row0vers.ic"
15
#include "dict0dict.h"
16
#include "dict0boot.h"
18
#include "mach0data.h"
23
#include "trx0purge.h"
29
#include "read0read.h"
30
#include "lock0lock.h"
32
/*********************************************************************
33
Finds out if an active transaction has inserted or modified a secondary
34
index record. NOTE: the kernel mutex is temporarily released in this
38
row_vers_impl_x_locked_off_kernel(
39
/*==============================*/
40
/* out: NULL if committed, else the active
41
transaction; NOTE that the kernel mutex is
42
temporarily released! */
43
rec_t* rec, /* in: record in a secondary index */
44
dict_index_t* index, /* in: the secondary index */
45
const ulint* offsets)/* in: rec_get_offsets(rec, index) */
47
dict_index_t* clust_index;
57
dtuple_t* entry = NULL; /* assignment to eliminate compiler
66
ut_ad(mutex_own(&kernel_mutex));
67
#ifdef UNIV_SYNC_DEBUG
68
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
69
#endif /* UNIV_SYNC_DEBUG */
71
mutex_exit(&kernel_mutex);
75
/* Search for the clustered index record: this is a time-consuming
76
operation: therefore we release the kernel mutex; also, the release
77
is required by the latching order convention. The latch on the
78
clustered index locks the top of the stack of versions. We also
79
reserve purge_latch to lock the bottom of the version stack. */
81
clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index,
84
/* In a rare case it is possible that no clust rec is found
85
for a secondary index record: if in row0umod.c
86
row_undo_mod_remove_clust_low() we have already removed the
87
clust rec, while purge is still cleaning and removing
88
secondary index records associated with earlier versions of
89
the clustered index record. In that case there cannot be
90
any implicit lock on the secondary index record, because
91
an active transaction which has modified the secondary index
92
record has also modified the clustered index record. And in
93
a rollback we always undo the modifications to secondary index
94
records before the clustered index record. */
96
mutex_enter(&kernel_mutex);
102
heap = mem_heap_create(1024);
103
clust_offsets = rec_get_offsets(clust_rec, clust_index, NULL,
104
ULINT_UNDEFINED, &heap);
105
trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);
107
mtr_s_lock(&(purge_sys->latch), &mtr);
109
mutex_enter(&kernel_mutex);
112
if (!trx_is_active(trx_id)) {
113
/* The transaction that modified or inserted clust_rec is no
114
longer active: no implicit lock on rec */
118
if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index,
119
clust_offsets, TRUE)) {
120
/* Corruption noticed: try to avoid a crash by returning */
124
comp = page_rec_is_comp(rec);
125
ut_ad(index->table == clust_index->table);
126
ut_ad(!!comp == dict_table_is_comp(index->table));
127
ut_ad(!comp == !page_rec_is_comp(clust_rec));
129
/* We look up if some earlier version, which was modified by the trx_id
130
transaction, of the clustered index record would require rec to be in
131
a different state (delete marked or unmarked, or have different field
132
values, or not existing). If there is such a version, then rec was
133
modified by the trx_id transaction, and it has an implicit x-lock on
134
rec. Note that if clust_rec itself would require rec to be in a
135
different state, then the trx_id transaction has not yet had time to
136
modify rec, and does not necessarily have an implicit x-lock on rec. */
138
rec_del = rec_get_deleted_flag(rec, comp);
144
mutex_exit(&kernel_mutex);
146
/* While we retrieve an earlier version of clust_rec, we
147
release the kernel mutex, because it may take time to access
148
the disk. After the release, we have to check if the trx_id
149
transaction is still active. We keep the semaphore in mtr on
150
the clust_rec page, so that no other transaction can update
151
it and get an implicit x-lock on rec. */
154
heap = mem_heap_create(1024);
155
err = trx_undo_prev_version_build(clust_rec, &mtr, version,
156
clust_index, clust_offsets,
157
heap, &prev_version);
158
mem_heap_free(heap2); /* free version and clust_offsets */
161
clust_offsets = rec_get_offsets(
162
prev_version, clust_index, NULL,
163
ULINT_UNDEFINED, &heap);
164
row = row_build(ROW_COPY_POINTERS, clust_index,
165
prev_version, clust_offsets, heap);
166
entry = row_build_index_entry(row, index, heap);
169
mutex_enter(&kernel_mutex);
171
if (!trx_is_active(trx_id)) {
172
/* Transaction no longer active: no implicit x-lock */
177
/* If the transaction is still active, the previous version
178
of clust_rec must be accessible if not a fresh insert; we
179
may assert the following: */
181
ut_ad(err == DB_SUCCESS);
183
if (prev_version == NULL) {
184
/* It was a freshly inserted version: there is an
185
implicit x-lock on rec */
187
trx = trx_get_on_id(trx_id);
192
/* If we get here, we know that the trx_id transaction is
193
still active and it has modified prev_version. Let us check
194
if prev_version would require rec to be in a different
197
vers_del = rec_get_deleted_flag(prev_version, comp);
199
/* We check if entry and rec are identified in the alphabetical
201
if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
202
/* The delete marks of rec and prev_version should be
203
equal for rec to be in the state required by
206
if (rec_del != vers_del) {
207
trx = trx_get_on_id(trx_id);
212
/* It is possible that the row was updated so that the
213
secondary index record remained the same in
214
alphabetical ordering, but the field values changed
215
still. For example, 'abc' -> 'ABC'. Check also that. */
217
dtuple_set_types_binary(entry,
218
dtuple_get_n_fields(entry));
219
if (0 != cmp_dtuple_rec(entry, rec, offsets)) {
221
trx = trx_get_on_id(trx_id);
225
} else if (!rec_del) {
226
/* The delete mark should be set in rec for it to be
227
in the state required by prev_version */
229
trx = trx_get_on_id(trx_id);
234
prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
237
if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
238
/* The versions modified by the trx_id transaction end
239
to prev_version: no implicit x-lock */
244
version = prev_version;
254
/*********************************************************************
255
Finds out if we must preserve a delete marked earlier version of a clustered
256
index record, because it is >= the purge view. */
259
row_vers_must_preserve_del_marked(
260
/*==============================*/
261
/* out: TRUE if earlier version should be preserved */
262
dulint trx_id, /* in: transaction id in the version */
263
mtr_t* mtr) /* in: mtr holding the latch on the clustered index
264
record; it will also hold the latch on purge_view */
266
#ifdef UNIV_SYNC_DEBUG
267
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
268
#endif /* UNIV_SYNC_DEBUG */
270
mtr_s_lock(&(purge_sys->latch), mtr);
272
if (trx_purge_update_undo_must_exist(trx_id)) {
274
/* A purge operation is not yet allowed to remove this
275
delete marked record */
283
/*********************************************************************
284
Finds out if a version of the record, where the version >= the current
285
purge view, should have ientry as its secondary index entry. We check
286
if there is any not delete marked version of the record where the trx
287
id >= purge view, and the secondary index entry and ientry are identified in
288
the alphabetical ordering; exactly in this case we return TRUE. */
291
row_vers_old_has_index_entry(
292
/*=========================*/
293
/* out: TRUE if earlier version should have */
294
ibool also_curr,/* in: TRUE if also rec is included in the
295
versions to search; otherwise only versions
296
prior to it are searched */
297
rec_t* rec, /* in: record in the clustered index; the
298
caller must have a latch on the page */
299
mtr_t* mtr, /* in: mtr holding the latch on rec; it will
300
also hold the latch on purge_view */
301
dict_index_t* index, /* in: the secondary index */
302
dtuple_t* ientry) /* in: the secondary index entry */
306
dict_index_t* clust_index;
307
ulint* clust_offsets;
315
ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
316
|| mtr_memo_contains(mtr, buf_block_align(rec),
317
MTR_MEMO_PAGE_S_FIX));
318
#ifdef UNIV_SYNC_DEBUG
319
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
320
#endif /* UNIV_SYNC_DEBUG */
321
mtr_s_lock(&(purge_sys->latch), mtr);
323
clust_index = dict_table_get_first_index(index->table);
325
comp = page_rec_is_comp(rec);
326
ut_ad(!dict_table_is_comp(index->table) == !comp);
327
heap = mem_heap_create(1024);
328
clust_offsets = rec_get_offsets(rec, clust_index, NULL,
329
ULINT_UNDEFINED, &heap);
331
if (also_curr && !rec_get_deleted_flag(rec, comp)) {
332
row = row_build(ROW_COPY_POINTERS, clust_index,
333
rec, clust_offsets, heap);
334
entry = row_build_index_entry(row, index, heap);
336
/* NOTE that we cannot do the comparison as binary
337
fields because the row is maybe being modified so that
338
the clustered index record has already been updated
339
to a different binary value in a char field, but the
340
collation identifies the old and new value anyway! */
342
if (dtuple_datas_are_ordering_equal(ientry, entry)) {
354
heap = mem_heap_create(1024);
355
err = trx_undo_prev_version_build(rec, mtr, version,
356
clust_index, clust_offsets,
357
heap, &prev_version);
358
mem_heap_free(heap2); /* free version and clust_offsets */
360
if (err != DB_SUCCESS || !prev_version) {
361
/* Versions end here */
368
clust_offsets = rec_get_offsets(prev_version, clust_index,
369
NULL, ULINT_UNDEFINED, &heap);
371
if (!rec_get_deleted_flag(prev_version, comp)) {
372
row = row_build(ROW_COPY_POINTERS, clust_index,
373
prev_version, clust_offsets, heap);
374
entry = row_build_index_entry(row, index, heap);
376
/* NOTE that we cannot do the comparison as binary
377
fields because maybe the secondary index record has
378
already been updated to a different binary value in
379
a char field, but the collation identifies the old
380
and new value anyway! */
382
if (dtuple_datas_are_ordering_equal(ientry, entry)) {
390
version = prev_version;
394
/*********************************************************************
395
Constructs the version of a clustered index record which a consistent
396
read should see. We assume that the trx id stored in rec is such that
397
the consistent read should not see rec in its present version. */
400
row_vers_build_for_consistent_read(
401
/*===============================*/
402
/* out: DB_SUCCESS or DB_MISSING_HISTORY */
403
rec_t* rec, /* in: record in a clustered index; the
404
caller must have a latch on the page; this
405
latch locks the top of the stack of versions
407
mtr_t* mtr, /* in: mtr holding the latch on rec */
408
dict_index_t* index, /* in: the clustered index */
409
ulint** offsets,/* in/out: offsets returned by
410
rec_get_offsets(rec, index) */
411
read_view_t* view, /* in: the consistent read view */
412
mem_heap_t** offset_heap,/* in/out: memory heap from which
413
the offsets are allocated */
414
mem_heap_t* in_heap,/* in: memory heap from which the memory for
415
old_vers is allocated; memory for possible
416
intermediate versions is allocated and freed
417
locally within the function */
418
rec_t** old_vers)/* out, own: old version, or NULL if the
419
record does not exist in the view, that is,
420
it was freshly inserted afterwards */
425
mem_heap_t* heap = NULL;
429
ut_ad(index->type & DICT_CLUSTERED);
430
ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
431
|| mtr_memo_contains(mtr, buf_block_align(rec),
432
MTR_MEMO_PAGE_S_FIX));
433
#ifdef UNIV_SYNC_DEBUG
434
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
435
#endif /* UNIV_SYNC_DEBUG */
437
ut_ad(rec_offs_validate(rec, index, *offsets));
439
trx_id = row_get_rec_trx_id(rec, index, *offsets);
441
ut_ad(!read_view_sees_trx_id(view, trx_id));
443
rw_lock_s_lock(&(purge_sys->latch));
447
mem_heap_t* heap2 = heap;
448
trx_undo_rec_t* undo_rec;
451
heap = mem_heap_create(1024);
453
/* If we have high-granularity consistent read view and
454
creating transaction of the view is the same as trx_id in
455
the record we see this record only in the case when
456
undo_no of the record is < undo_no in the view. */
458
if (view->type == VIEW_HIGH_GRANULARITY
459
&& ut_dulint_cmp(view->creator_trx_id, trx_id) == 0) {
461
roll_ptr = row_get_rec_roll_ptr(version, index,
463
undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
464
undo_no = trx_undo_rec_get_undo_no(undo_rec);
465
mem_heap_empty(heap);
467
if (ut_dulint_cmp(view->undo_no, undo_no) > 0) {
468
/* The view already sees this version: we can
469
copy it to in_heap and return */
471
buf = mem_heap_alloc(in_heap,
472
rec_offs_size(*offsets));
473
*old_vers = rec_copy(buf, version, *offsets);
474
rec_offs_make_valid(*old_vers, index,
482
err = trx_undo_prev_version_build(rec, mtr, version, index,
486
mem_heap_free(heap2); /* free version */
489
if (err != DB_SUCCESS) {
493
if (prev_version == NULL) {
494
/* It was a freshly inserted version */
501
*offsets = rec_get_offsets(prev_version, index, *offsets,
502
ULINT_UNDEFINED, offset_heap);
504
trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
506
if (read_view_sees_trx_id(view, trx_id)) {
508
/* The view already sees this version: we can copy
509
it to in_heap and return */
511
buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets));
512
*old_vers = rec_copy(buf, prev_version, *offsets);
513
rec_offs_make_valid(*old_vers, index, *offsets);
519
version = prev_version;
523
rw_lock_s_unlock(&(purge_sys->latch));
528
/*********************************************************************
529
Constructs the last committed version of a clustered index record,
530
which should be seen by a semi-consistent read. */
533
row_vers_build_for_semi_consistent_read(
534
/*====================================*/
535
/* out: DB_SUCCESS or DB_MISSING_HISTORY */
536
rec_t* rec, /* in: record in a clustered index; the
537
caller must have a latch on the page; this
538
latch locks the top of the stack of versions
540
mtr_t* mtr, /* in: mtr holding the latch on rec */
541
dict_index_t* index, /* in: the clustered index */
542
ulint** offsets,/* in/out: offsets returned by
543
rec_get_offsets(rec, index) */
544
mem_heap_t** offset_heap,/* in/out: memory heap from which
545
the offsets are allocated */
546
mem_heap_t* in_heap,/* in: memory heap from which the memory for
547
old_vers is allocated; memory for possible
548
intermediate versions is allocated and freed
549
locally within the function */
550
rec_t** old_vers)/* out, own: rec, old version, or NULL if the
551
record does not exist in the view, that is,
552
it was freshly inserted afterwards */
555
mem_heap_t* heap = NULL;
558
dulint rec_trx_id = ut_dulint_create(0, 0);
560
ut_ad(index->type & DICT_CLUSTERED);
561
ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
562
|| mtr_memo_contains(mtr, buf_block_align(rec),
563
MTR_MEMO_PAGE_S_FIX));
564
#ifdef UNIV_SYNC_DEBUG
565
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
566
#endif /* UNIV_SYNC_DEBUG */
568
ut_ad(rec_offs_validate(rec, index, *offsets));
570
rw_lock_s_lock(&(purge_sys->latch));
571
/* The S-latch on purge_sys prevents the purge view from
572
changing. Thus, if we have an uncommitted transaction at
573
this point, then purge cannot remove its undo log even if
574
the transaction could commit now. */
582
dulint version_trx_id;
584
version_trx_id = row_get_rec_trx_id(version, index, *offsets);
585
if (rec == version) {
586
rec_trx_id = version_trx_id;
589
mutex_enter(&kernel_mutex);
590
version_trx = trx_get_on_id(version_trx_id);
591
mutex_exit(&kernel_mutex);
594
|| version_trx->conc_state == TRX_NOT_STARTED
595
|| version_trx->conc_state == TRX_COMMITTED_IN_MEMORY) {
597
/* We found a version that belongs to a
598
committed transaction: return it. */
600
if (rec == version) {
606
/* We assume that a rolled-back transaction stays in
607
TRX_ACTIVE state until all the changes have been
608
rolled back and the transaction is removed from
609
the global list of transactions. */
611
if (!ut_dulint_cmp(rec_trx_id, version_trx_id)) {
612
/* The transaction was committed while
613
we searched for earlier versions.
614
Return the current version as a
615
semi-consistent read. */
618
*offsets = rec_get_offsets(version,
624
buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets));
625
*old_vers = rec_copy(buf, version, *offsets);
626
rec_offs_make_valid(*old_vers, index, *offsets);
633
heap = mem_heap_create(1024);
635
err = trx_undo_prev_version_build(rec, mtr, version, index,
639
mem_heap_free(heap2); /* free version */
642
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
646
if (prev_version == NULL) {
647
/* It was a freshly inserted version */
654
version = prev_version;
655
*offsets = rec_get_offsets(version, index, *offsets,
656
ULINT_UNDEFINED, offset_heap);
662
rw_lock_s_unlock(&(purge_sys->latch));