1
/*****************************************************************************
3
Copyright (C) 1997, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15
St, Fifth Floor, Boston, MA 02110-1301 USA
17
*****************************************************************************/
19
/**************************************************//**
23
Created 2/6/1997 Heikki Tuuri
24
*******************************************************/
29
#include "row0vers.ic"
32
#include "dict0dict.h"
33
#include "dict0boot.h"
35
#include "mach0data.h"
40
#include "trx0purge.h"
46
#include "read0read.h"
47
#include "lock0lock.h"
49
/*****************************************************************//**
50
Finds out if an active transaction has inserted or modified a secondary
51
index record. NOTE: the kernel mutex is temporarily released in this
53
@return NULL if committed, else the active transaction */
56
row_vers_impl_x_locked_off_kernel(
57
/*==============================*/
58
const rec_t* rec, /*!< in: record in a secondary index */
59
dict_index_t* index, /*!< in: the secondary index */
60
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
62
dict_index_t* clust_index;
70
dtuple_t* entry = NULL; /* assignment to eliminate compiler
76
#endif /* UNIV_DEBUG */
80
ut_ad(mutex_own(&kernel_mutex));
81
#ifdef UNIV_SYNC_DEBUG
82
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
83
#endif /* UNIV_SYNC_DEBUG */
85
mutex_exit(&kernel_mutex);
89
/* Search for the clustered index record: this is a time-consuming
90
operation: therefore we release the kernel mutex; also, the release
91
is required by the latching order convention. The latch on the
92
clustered index locks the top of the stack of versions. We also
93
reserve purge_latch to lock the bottom of the version stack. */
95
clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index,
98
/* In a rare case it is possible that no clust rec is found
99
for a secondary index record: if in row0umod.c
100
row_undo_mod_remove_clust_low() we have already removed the
101
clust rec, while purge is still cleaning and removing
102
secondary index records associated with earlier versions of
103
the clustered index record. In that case there cannot be
104
any implicit lock on the secondary index record, because
105
an active transaction which has modified the secondary index
106
record has also modified the clustered index record. And in
107
a rollback we always undo the modifications to secondary index
108
records before the clustered index record. */
110
mutex_enter(&kernel_mutex);
116
heap = mem_heap_create(1024);
117
clust_offsets = rec_get_offsets(clust_rec, clust_index, NULL,
118
ULINT_UNDEFINED, &heap);
119
trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);
121
mtr_s_lock(&(purge_sys->latch), &mtr);
123
mutex_enter(&kernel_mutex);
126
if (!trx_is_active(trx_id)) {
127
/* The transaction that modified or inserted clust_rec is no
128
longer active: no implicit lock on rec */
132
if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index,
133
clust_offsets, TRUE)) {
134
/* Corruption noticed: try to avoid a crash by returning */
138
comp = page_rec_is_comp(rec);
139
ut_ad(index->table == clust_index->table);
140
ut_ad(!!comp == dict_table_is_comp(index->table));
141
ut_ad(!comp == !page_rec_is_comp(clust_rec));
143
/* We look up if some earlier version, which was modified by the trx_id
144
transaction, of the clustered index record would require rec to be in
145
a different state (delete marked or unmarked, or have different field
146
values, or not existing). If there is such a version, then rec was
147
modified by the trx_id transaction, and it has an implicit x-lock on
148
rec. Note that if clust_rec itself would require rec to be in a
149
different state, then the trx_id transaction has not yet had time to
150
modify rec, and does not necessarily have an implicit x-lock on rec. */
152
rec_del = rec_get_deleted_flag(rec, comp);
161
trx_id_t prev_trx_id;
163
mutex_exit(&kernel_mutex);
165
/* While we retrieve an earlier version of clust_rec, we
166
release the kernel mutex, because it may take time to access
167
the disk. After the release, we have to check if the trx_id
168
transaction is still active. We keep the semaphore in mtr on
169
the clust_rec page, so that no other transaction can update
170
it and get an implicit x-lock on rec. */
173
heap = mem_heap_create(1024);
176
#endif /* UNIV_DEBUG */
177
trx_undo_prev_version_build(clust_rec, &mtr, version,
178
clust_index, clust_offsets,
179
heap, &prev_version);
180
mem_heap_free(heap2); /* free version and clust_offsets */
182
if (prev_version == NULL) {
183
mutex_enter(&kernel_mutex);
185
if (!trx_is_active(trx_id)) {
186
/* Transaction no longer active: no
192
/* If the transaction is still active,
193
clust_rec must be a fresh insert, because no
194
previous version was found. */
195
ut_ad(err == DB_SUCCESS);
197
/* It was a freshly inserted version: there is an
198
implicit x-lock on rec */
200
trx = trx_get_on_id(trx_id);
205
clust_offsets = rec_get_offsets(prev_version, clust_index,
206
NULL, ULINT_UNDEFINED, &heap);
208
vers_del = rec_get_deleted_flag(prev_version, comp);
209
prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
212
/* If the trx_id and prev_trx_id are different and if
213
the prev_version is marked deleted then the
214
prev_trx_id must have already committed for the trx_id
215
to be able to modify the row. Therefore, prev_trx_id
216
cannot hold any implicit lock. */
217
if (vers_del && trx_id != prev_trx_id) {
219
mutex_enter(&kernel_mutex);
223
/* The stack of versions is locked by mtr. Thus, it
224
is safe to fetch the prefixes for externally stored
226
row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
227
clust_offsets, NULL, &ext, heap);
228
entry = row_build_index_entry(row, ext, index, heap);
229
/* entry may be NULL if a record was inserted in place
230
of a deleted record, and the BLOB pointers of the new
231
record were not initialized yet. But in that case,
232
prev_version should be NULL. */
235
mutex_enter(&kernel_mutex);
237
if (!trx_is_active(trx_id)) {
238
/* Transaction no longer active: no implicit x-lock */
243
/* If we get here, we know that the trx_id transaction is
244
still active and it has modified prev_version. Let us check
245
if prev_version would require rec to be in a different
248
/* The previous version of clust_rec must be
249
accessible, because the transaction is still active
250
and clust_rec was not a fresh insert. */
251
ut_ad(err == DB_SUCCESS);
253
/* We check if entry and rec are identified in the alphabetical
255
if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
256
/* The delete marks of rec and prev_version should be
257
equal for rec to be in the state required by
260
if (rec_del != vers_del) {
261
trx = trx_get_on_id(trx_id);
266
/* It is possible that the row was updated so that the
267
secondary index record remained the same in
268
alphabetical ordering, but the field values changed
269
still. For example, 'abc' -> 'ABC'. Check also that. */
271
dtuple_set_types_binary(entry,
272
dtuple_get_n_fields(entry));
273
if (0 != cmp_dtuple_rec(entry, rec, offsets)) {
275
trx = trx_get_on_id(trx_id);
279
} else if (!rec_del) {
280
/* The delete mark should be set in rec for it to be
281
in the state required by prev_version */
283
trx = trx_get_on_id(trx_id);
288
if (trx_id != prev_trx_id) {
289
/* The versions modified by the trx_id transaction end
290
to prev_version: no implicit x-lock */
295
version = prev_version;
305
/*****************************************************************//**
306
Finds out if we must preserve a delete marked earlier version of a clustered
307
index record, because it is >= the purge view.
308
@return TRUE if earlier version should be preserved */
311
row_vers_must_preserve_del_marked(
312
/*==============================*/
313
trx_id_t trx_id, /*!< in: transaction id in the version */
314
mtr_t* mtr) /*!< in: mtr holding the latch on the
315
clustered index record; it will also
316
hold the latch on purge_view */
318
#ifdef UNIV_SYNC_DEBUG
319
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
320
#endif /* UNIV_SYNC_DEBUG */
322
mtr_s_lock(&(purge_sys->latch), mtr);
324
if (trx_purge_update_undo_must_exist(trx_id)) {
326
/* A purge operation is not yet allowed to remove this
327
delete marked record */
335
/*****************************************************************//**
336
Finds out if a version of the record, where the version >= the current
337
purge view, should have ientry as its secondary index entry. We check
338
if there is any not delete marked version of the record where the trx
339
id >= purge view, and the secondary index entry and ientry are identified in
340
the alphabetical ordering; exactly in this case we return TRUE.
341
@return TRUE if earlier version should have */
344
row_vers_old_has_index_entry(
345
/*=========================*/
346
ibool also_curr,/*!< in: TRUE if also rec is included in the
347
versions to search; otherwise only versions
348
prior to it are searched */
349
const rec_t* rec, /*!< in: record in the clustered index; the
350
caller must have a latch on the page */
351
mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
352
also hold the latch on purge_view */
353
dict_index_t* index, /*!< in: the secondary index */
354
const dtuple_t* ientry) /*!< in: the secondary index entry */
356
const rec_t* version;
358
dict_index_t* clust_index;
359
ulint* clust_offsets;
363
const dtuple_t* entry;
367
ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
368
|| mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
369
#ifdef UNIV_SYNC_DEBUG
370
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
371
#endif /* UNIV_SYNC_DEBUG */
372
mtr_s_lock(&(purge_sys->latch), mtr);
374
clust_index = dict_table_get_first_index(index->table);
376
comp = page_rec_is_comp(rec);
377
ut_ad(!dict_table_is_comp(index->table) == !comp);
378
heap = mem_heap_create(1024);
379
clust_offsets = rec_get_offsets(rec, clust_index, NULL,
380
ULINT_UNDEFINED, &heap);
382
if (also_curr && !rec_get_deleted_flag(rec, comp)) {
385
/* The stack of versions is locked by mtr.
386
Thus, it is safe to fetch the prefixes for
387
externally stored columns. */
388
row = row_build(ROW_COPY_POINTERS, clust_index,
389
rec, clust_offsets, NULL, &ext, heap);
390
entry = row_build_index_entry(row, ext, index, heap);
392
/* If entry == NULL, the record contains unset BLOB
393
pointers. This must be a freshly inserted record. If
395
row_purge_remove_sec_if_poss_low(), the thread will
396
hold latches on the clustered index and the secondary
397
index. Because the insert works in three steps:
399
(1) insert the record to clustered index
400
(2) store the BLOBs and update BLOB pointers
401
(3) insert records to secondary indexes
403
the purge thread can safely ignore freshly inserted
404
records and delete the secondary index record. The
405
thread that inserted the new record will be inserting
406
the secondary index records. */
408
/* NOTE that we cannot do the comparison as binary
409
fields because the row is maybe being modified so that
410
the clustered index record has already been updated to
411
a different binary value in a char field, but the
412
collation identifies the old and new value anyway! */
413
if (entry && !dtuple_coll_cmp(ientry, entry)) {
425
heap = mem_heap_create(1024);
426
err = trx_undo_prev_version_build(rec, mtr, version,
427
clust_index, clust_offsets,
428
heap, &prev_version);
429
mem_heap_free(heap2); /* free version and clust_offsets */
431
if (err != DB_SUCCESS || !prev_version) {
432
/* Versions end here */
439
clust_offsets = rec_get_offsets(prev_version, clust_index,
440
NULL, ULINT_UNDEFINED, &heap);
442
if (!rec_get_deleted_flag(prev_version, comp)) {
445
/* The stack of versions is locked by mtr.
446
Thus, it is safe to fetch the prefixes for
447
externally stored columns. */
448
row = row_build(ROW_COPY_POINTERS, clust_index,
449
prev_version, clust_offsets,
451
entry = row_build_index_entry(row, ext, index, heap);
453
/* If entry == NULL, the record contains unset
454
BLOB pointers. This must be a freshly
455
inserted record that we can safely ignore.
456
For the justification, see the comments after
457
the previous row_build_index_entry() call. */
459
/* NOTE that we cannot do the comparison as binary
460
fields because maybe the secondary index record has
461
already been updated to a different binary value in
462
a char field, but the collation identifies the old
463
and new value anyway! */
465
if (entry && !dtuple_coll_cmp(ientry, entry)) {
473
version = prev_version;
477
/*****************************************************************//**
478
Constructs the version of a clustered index record which a consistent
479
read should see. We assume that the trx id stored in rec is such that
480
the consistent read should not see rec in its present version.
481
@return DB_SUCCESS or DB_MISSING_HISTORY */
484
row_vers_build_for_consistent_read(
485
/*===============================*/
486
const rec_t* rec, /*!< in: record in a clustered index; the
487
caller must have a latch on the page; this
488
latch locks the top of the stack of versions
490
mtr_t* mtr, /*!< in: mtr holding the latch on rec */
491
dict_index_t* index, /*!< in: the clustered index */
492
ulint** offsets,/*!< in/out: offsets returned by
493
rec_get_offsets(rec, index) */
494
read_view_t* view, /*!< in: the consistent read view */
495
mem_heap_t** offset_heap,/*!< in/out: memory heap from which
496
the offsets are allocated */
497
mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
498
*old_vers is allocated; memory for possible
499
intermediate versions is allocated and freed
500
locally within the function */
501
rec_t** old_vers)/*!< out, own: old version, or NULL if the
502
record does not exist in the view, that is,
503
it was freshly inserted afterwards */
505
const rec_t* version;
508
mem_heap_t* heap = NULL;
512
ut_ad(dict_index_is_clust(index));
513
ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
514
|| mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
515
#ifdef UNIV_SYNC_DEBUG
516
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
517
#endif /* UNIV_SYNC_DEBUG */
519
ut_ad(rec_offs_validate(rec, index, *offsets));
521
trx_id = row_get_rec_trx_id(rec, index, *offsets);
523
ut_ad(!read_view_sees_trx_id(view, trx_id));
525
rw_lock_s_lock(&(purge_sys->latch));
529
mem_heap_t* heap2 = heap;
530
trx_undo_rec_t* undo_rec;
533
heap = mem_heap_create(1024);
535
/* If we have high-granularity consistent read view and
536
creating transaction of the view is the same as trx_id in
537
the record we see this record only in the case when
538
undo_no of the record is < undo_no in the view. */
540
if (view->type == VIEW_HIGH_GRANULARITY
541
&& view->creator_trx_id == trx_id) {
543
roll_ptr = row_get_rec_roll_ptr(version, index,
545
undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
546
undo_no = trx_undo_rec_get_undo_no(undo_rec);
547
mem_heap_empty(heap);
549
if (view->undo_no > undo_no) {
550
/* The view already sees this version: we can
551
copy it to in_heap and return */
553
buf = static_cast<byte *>(mem_heap_alloc(in_heap,
554
rec_offs_size(*offsets)));
555
*old_vers = rec_copy(buf, version, *offsets);
556
rec_offs_make_valid(*old_vers, index,
564
err = trx_undo_prev_version_build(rec, mtr, version, index,
568
mem_heap_free(heap2); /* free version */
571
if (err != DB_SUCCESS) {
575
if (prev_version == NULL) {
576
/* It was a freshly inserted version */
583
*offsets = rec_get_offsets(prev_version, index, *offsets,
584
ULINT_UNDEFINED, offset_heap);
586
trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
588
if (read_view_sees_trx_id(view, trx_id)) {
590
/* The view already sees this version: we can copy
591
it to in_heap and return */
593
buf = static_cast<byte *>(mem_heap_alloc(in_heap, rec_offs_size(*offsets)));
594
*old_vers = rec_copy(buf, prev_version, *offsets);
595
rec_offs_make_valid(*old_vers, index, *offsets);
601
version = prev_version;
605
rw_lock_s_unlock(&(purge_sys->latch));
610
/*****************************************************************//**
611
Constructs the last committed version of a clustered index record,
612
which should be seen by a semi-consistent read.
613
@return DB_SUCCESS or DB_MISSING_HISTORY */
616
row_vers_build_for_semi_consistent_read(
617
/*====================================*/
618
const rec_t* rec, /*!< in: record in a clustered index; the
619
caller must have a latch on the page; this
620
latch locks the top of the stack of versions
622
mtr_t* mtr, /*!< in: mtr holding the latch on rec */
623
dict_index_t* index, /*!< in: the clustered index */
624
ulint** offsets,/*!< in/out: offsets returned by
625
rec_get_offsets(rec, index) */
626
mem_heap_t** offset_heap,/*!< in/out: memory heap from which
627
the offsets are allocated */
628
mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
629
*old_vers is allocated; memory for possible
630
intermediate versions is allocated and freed
631
locally within the function */
632
const rec_t** old_vers)/*!< out: rec, old version, or NULL if the
633
record does not exist in the view, that is,
634
it was freshly inserted afterwards */
636
const rec_t* version;
637
mem_heap_t* heap = NULL;
640
trx_id_t rec_trx_id = 0;
642
ut_ad(dict_index_is_clust(index));
643
ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
644
|| mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
645
#ifdef UNIV_SYNC_DEBUG
646
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
647
#endif /* UNIV_SYNC_DEBUG */
649
ut_ad(rec_offs_validate(rec, index, *offsets));
651
rw_lock_s_lock(&(purge_sys->latch));
652
/* The S-latch on purge_sys prevents the purge view from
653
changing. Thus, if we have an uncommitted transaction at
654
this point, then purge cannot remove its undo log even if
655
the transaction could commit now. */
663
trx_id_t version_trx_id;
665
version_trx_id = row_get_rec_trx_id(version, index, *offsets);
666
if (rec == version) {
667
rec_trx_id = version_trx_id;
670
mutex_enter(&kernel_mutex);
671
version_trx = trx_get_on_id(version_trx_id);
672
mutex_exit(&kernel_mutex);
675
|| version_trx->conc_state == TRX_NOT_STARTED
676
|| version_trx->conc_state == TRX_COMMITTED_IN_MEMORY) {
678
/* We found a version that belongs to a
679
committed transaction: return it. */
681
if (rec == version) {
687
/* We assume that a rolled-back transaction stays in
688
TRX_ACTIVE state until all the changes have been
689
rolled back and the transaction is removed from
690
the global list of transactions. */
692
if (rec_trx_id == version_trx_id) {
693
/* The transaction was committed while
694
we searched for earlier versions.
695
Return the current version as a
696
semi-consistent read. */
699
*offsets = rec_get_offsets(version,
705
buf = static_cast<byte *>(mem_heap_alloc(in_heap, rec_offs_size(*offsets)));
706
*old_vers = rec_copy(buf, version, *offsets);
707
rec_offs_make_valid(*old_vers, index, *offsets);
714
heap = mem_heap_create(1024);
716
err = trx_undo_prev_version_build(rec, mtr, version, index,
720
mem_heap_free(heap2); /* free version */
723
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
727
if (prev_version == NULL) {
728
/* It was a freshly inserted version */
735
version = prev_version;
736
*offsets = rec_get_offsets(version, index, *offsets,
737
ULINT_UNDEFINED, offset_heap);
743
rw_lock_s_unlock(&(purge_sys->latch));