1
/*****************************************************************************
3
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
Place, Suite 330, Boston, MA 02111-1307 USA
17
*****************************************************************************/
19
/**************************************************//**
23
Created 2/6/1997 Heikki Tuuri
24
*******************************************************/
29
#include "row0vers.ic"
32
#include "dict0dict.h"
33
#include "dict0boot.h"
35
#include "mach0data.h"
40
#include "trx0purge.h"
46
#include "read0read.h"
47
#include "lock0lock.h"
49
/*****************************************************************//**
50
Finds out if an active transaction has inserted or modified a secondary
51
index record. NOTE: the kernel mutex is temporarily released in this
53
@return NULL if committed, else the active transaction */
56
row_vers_impl_x_locked_off_kernel(
57
/*==============================*/
58
const rec_t* rec, /*!< in: record in a secondary index */
59
dict_index_t* index, /*!< in: the secondary index */
60
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
62
dict_index_t* clust_index;
70
dtuple_t* entry = NULL; /* assignment to eliminate compiler
78
ut_ad(mutex_own(&kernel_mutex));
79
#ifdef UNIV_SYNC_DEBUG
80
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
81
#endif /* UNIV_SYNC_DEBUG */
83
mutex_exit(&kernel_mutex);
87
/* Search for the clustered index record: this is a time-consuming
88
operation: therefore we release the kernel mutex; also, the release
89
is required by the latching order convention. The latch on the
90
clustered index locks the top of the stack of versions. We also
91
reserve purge_latch to lock the bottom of the version stack. */
93
clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index,
96
/* In a rare case it is possible that no clust rec is found
97
for a secondary index record: if in row0umod.c
98
row_undo_mod_remove_clust_low() we have already removed the
99
clust rec, while purge is still cleaning and removing
100
secondary index records associated with earlier versions of
101
the clustered index record. In that case there cannot be
102
any implicit lock on the secondary index record, because
103
an active transaction which has modified the secondary index
104
record has also modified the clustered index record. And in
105
a rollback we always undo the modifications to secondary index
106
records before the clustered index record. */
108
mutex_enter(&kernel_mutex);
114
heap = mem_heap_create(1024);
115
clust_offsets = rec_get_offsets(clust_rec, clust_index, NULL,
116
ULINT_UNDEFINED, &heap);
117
trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);
119
mtr_s_lock(&(purge_sys->latch), &mtr);
121
mutex_enter(&kernel_mutex);
124
if (!trx_is_active(trx_id)) {
125
/* The transaction that modified or inserted clust_rec is no
126
longer active: no implicit lock on rec */
130
if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index,
131
clust_offsets, TRUE)) {
132
/* Corruption noticed: try to avoid a crash by returning */
136
comp = page_rec_is_comp(rec);
137
ut_ad(index->table == clust_index->table);
138
ut_ad(!!comp == dict_table_is_comp(index->table));
139
ut_ad(!comp == !page_rec_is_comp(clust_rec));
141
/* We look up if some earlier version, which was modified by the trx_id
142
transaction, of the clustered index record would require rec to be in
143
a different state (delete marked or unmarked, or have different field
144
values, or not existing). If there is such a version, then rec was
145
modified by the trx_id transaction, and it has an implicit x-lock on
146
rec. Note that if clust_rec itself would require rec to be in a
147
different state, then the trx_id transaction has not yet had time to
148
modify rec, and does not necessarily have an implicit x-lock on rec. */
150
rec_del = rec_get_deleted_flag(rec, comp);
159
trx_id_t prev_trx_id;
161
mutex_exit(&kernel_mutex);
163
/* While we retrieve an earlier version of clust_rec, we
164
release the kernel mutex, because it may take time to access
165
the disk. After the release, we have to check if the trx_id
166
transaction is still active. We keep the semaphore in mtr on
167
the clust_rec page, so that no other transaction can update
168
it and get an implicit x-lock on rec. */
171
heap = mem_heap_create(1024);
172
err = trx_undo_prev_version_build(clust_rec, &mtr, version,
173
clust_index, clust_offsets,
174
heap, &prev_version);
175
mem_heap_free(heap2); /* free version and clust_offsets */
177
if (prev_version == NULL) {
178
mutex_enter(&kernel_mutex);
180
if (!trx_is_active(trx_id)) {
181
/* Transaction no longer active: no
187
/* If the transaction is still active,
188
clust_rec must be a fresh insert, because no
189
previous version was found. */
190
ut_ad(err == DB_SUCCESS);
192
/* It was a freshly inserted version: there is an
193
implicit x-lock on rec */
195
trx = trx_get_on_id(trx_id);
200
clust_offsets = rec_get_offsets(prev_version, clust_index,
201
NULL, ULINT_UNDEFINED, &heap);
203
vers_del = rec_get_deleted_flag(prev_version, comp);
204
prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
207
/* If the trx_id and prev_trx_id are different and if
208
the prev_version is marked deleted then the
209
prev_trx_id must have already committed for the trx_id
210
to be able to modify the row. Therefore, prev_trx_id
211
cannot hold any implicit lock. */
212
if (vers_del && 0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
214
mutex_enter(&kernel_mutex);
218
/* The stack of versions is locked by mtr. Thus, it
219
is safe to fetch the prefixes for externally stored
221
row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
222
clust_offsets, NULL, &ext, heap);
223
entry = row_build_index_entry(row, ext, index, heap);
224
/* entry may be NULL if a record was inserted in place
225
of a deleted record, and the BLOB pointers of the new
226
record were not initialized yet. But in that case,
227
prev_version should be NULL. */
230
mutex_enter(&kernel_mutex);
232
if (!trx_is_active(trx_id)) {
233
/* Transaction no longer active: no implicit x-lock */
238
/* If we get here, we know that the trx_id transaction is
239
still active and it has modified prev_version. Let us check
240
if prev_version would require rec to be in a different
243
/* The previous version of clust_rec must be
244
accessible, because the transaction is still active
245
and clust_rec was not a fresh insert. */
246
ut_ad(err == DB_SUCCESS);
248
/* We check if entry and rec are identified in the alphabetical
250
if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
251
/* The delete marks of rec and prev_version should be
252
equal for rec to be in the state required by
255
if (rec_del != vers_del) {
256
trx = trx_get_on_id(trx_id);
261
/* It is possible that the row was updated so that the
262
secondary index record remained the same in
263
alphabetical ordering, but the field values changed
264
still. For example, 'abc' -> 'ABC'. Check also that. */
266
dtuple_set_types_binary(entry,
267
dtuple_get_n_fields(entry));
268
if (0 != cmp_dtuple_rec(entry, rec, offsets)) {
270
trx = trx_get_on_id(trx_id);
274
} else if (!rec_del) {
275
/* The delete mark should be set in rec for it to be
276
in the state required by prev_version */
278
trx = trx_get_on_id(trx_id);
283
if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
284
/* The versions modified by the trx_id transaction end
285
to prev_version: no implicit x-lock */
290
version = prev_version;
300
/*****************************************************************//**
301
Finds out if we must preserve a delete marked earlier version of a clustered
302
index record, because it is >= the purge view.
303
@return TRUE if earlier version should be preserved */
306
row_vers_must_preserve_del_marked(
307
/*==============================*/
308
trx_id_t trx_id, /*!< in: transaction id in the version */
309
mtr_t* mtr) /*!< in: mtr holding the latch on the
310
clustered index record; it will also
311
hold the latch on purge_view */
313
#ifdef UNIV_SYNC_DEBUG
314
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
315
#endif /* UNIV_SYNC_DEBUG */
317
mtr_s_lock(&(purge_sys->latch), mtr);
319
if (trx_purge_update_undo_must_exist(trx_id)) {
321
/* A purge operation is not yet allowed to remove this
322
delete marked record */
330
/*****************************************************************//**
331
Finds out if a version of the record, where the version >= the current
332
purge view, should have ientry as its secondary index entry. We check
333
if there is any not delete marked version of the record where the trx
334
id >= purge view, and the secondary index entry and ientry are identified in
335
the alphabetical ordering; exactly in this case we return TRUE.
336
@return TRUE if earlier version should have */
339
row_vers_old_has_index_entry(
340
/*=========================*/
341
ibool also_curr,/*!< in: TRUE if also rec is included in the
342
versions to search; otherwise only versions
343
prior to it are searched */
344
const rec_t* rec, /*!< in: record in the clustered index; the
345
caller must have a latch on the page */
346
mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
347
also hold the latch on purge_view */
348
dict_index_t* index, /*!< in: the secondary index */
349
const dtuple_t* ientry) /*!< in: the secondary index entry */
351
const rec_t* version;
353
dict_index_t* clust_index;
354
ulint* clust_offsets;
358
const dtuple_t* entry;
362
ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
363
|| mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
364
#ifdef UNIV_SYNC_DEBUG
365
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
366
#endif /* UNIV_SYNC_DEBUG */
367
mtr_s_lock(&(purge_sys->latch), mtr);
369
clust_index = dict_table_get_first_index(index->table);
371
comp = page_rec_is_comp(rec);
372
ut_ad(!dict_table_is_comp(index->table) == !comp);
373
heap = mem_heap_create(1024);
374
clust_offsets = rec_get_offsets(rec, clust_index, NULL,
375
ULINT_UNDEFINED, &heap);
377
if (also_curr && !rec_get_deleted_flag(rec, comp)) {
380
/* The stack of versions is locked by mtr.
381
Thus, it is safe to fetch the prefixes for
382
externally stored columns. */
383
row = row_build(ROW_COPY_POINTERS, clust_index,
384
rec, clust_offsets, NULL, &ext, heap);
385
entry = row_build_index_entry(row, ext, index, heap);
387
/* If entry == NULL, the record contains unset BLOB
388
pointers. This must be a freshly inserted record. If
390
row_purge_remove_sec_if_poss_low(), the thread will
391
hold latches on the clustered index and the secondary
392
index. Because the insert works in three steps:
394
(1) insert the record to clustered index
395
(2) store the BLOBs and update BLOB pointers
396
(3) insert records to secondary indexes
398
the purge thread can safely ignore freshly inserted
399
records and delete the secondary index record. The
400
thread that inserted the new record will be inserting
401
the secondary index records. */
403
/* NOTE that we cannot do the comparison as binary
404
fields because the row is maybe being modified so that
405
the clustered index record has already been updated to
406
a different binary value in a char field, but the
407
collation identifies the old and new value anyway! */
408
if (entry && !dtuple_coll_cmp(ientry, entry)) {
420
heap = mem_heap_create(1024);
421
err = trx_undo_prev_version_build(rec, mtr, version,
422
clust_index, clust_offsets,
423
heap, &prev_version);
424
mem_heap_free(heap2); /* free version and clust_offsets */
426
if (err != DB_SUCCESS || !prev_version) {
427
/* Versions end here */
434
clust_offsets = rec_get_offsets(prev_version, clust_index,
435
NULL, ULINT_UNDEFINED, &heap);
437
if (!rec_get_deleted_flag(prev_version, comp)) {
440
/* The stack of versions is locked by mtr.
441
Thus, it is safe to fetch the prefixes for
442
externally stored columns. */
443
row = row_build(ROW_COPY_POINTERS, clust_index,
444
prev_version, clust_offsets,
446
entry = row_build_index_entry(row, ext, index, heap);
448
/* If entry == NULL, the record contains unset
449
BLOB pointers. This must be a freshly
450
inserted record that we can safely ignore.
451
For the justification, see the comments after
452
the previous row_build_index_entry() call. */
454
/* NOTE that we cannot do the comparison as binary
455
fields because maybe the secondary index record has
456
already been updated to a different binary value in
457
a char field, but the collation identifies the old
458
and new value anyway! */
460
if (entry && !dtuple_coll_cmp(ientry, entry)) {
468
version = prev_version;
472
/*****************************************************************//**
473
Constructs the version of a clustered index record which a consistent
474
read should see. We assume that the trx id stored in rec is such that
475
the consistent read should not see rec in its present version.
476
@return DB_SUCCESS or DB_MISSING_HISTORY */
479
row_vers_build_for_consistent_read(
480
/*===============================*/
481
const rec_t* rec, /*!< in: record in a clustered index; the
482
caller must have a latch on the page; this
483
latch locks the top of the stack of versions
485
mtr_t* mtr, /*!< in: mtr holding the latch on rec */
486
dict_index_t* index, /*!< in: the clustered index */
487
ulint** offsets,/*!< in/out: offsets returned by
488
rec_get_offsets(rec, index) */
489
read_view_t* view, /*!< in: the consistent read view */
490
mem_heap_t** offset_heap,/*!< in/out: memory heap from which
491
the offsets are allocated */
492
mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
493
*old_vers is allocated; memory for possible
494
intermediate versions is allocated and freed
495
locally within the function */
496
rec_t** old_vers)/*!< out, own: old version, or NULL if the
497
record does not exist in the view, that is,
498
it was freshly inserted afterwards */
500
const rec_t* version;
503
mem_heap_t* heap = NULL;
507
ut_ad(dict_index_is_clust(index));
508
ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
509
|| mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
510
#ifdef UNIV_SYNC_DEBUG
511
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
512
#endif /* UNIV_SYNC_DEBUG */
514
ut_ad(rec_offs_validate(rec, index, *offsets));
516
trx_id = row_get_rec_trx_id(rec, index, *offsets);
518
ut_ad(!read_view_sees_trx_id(view, trx_id));
520
rw_lock_s_lock(&(purge_sys->latch));
524
mem_heap_t* heap2 = heap;
525
trx_undo_rec_t* undo_rec;
528
heap = mem_heap_create(1024);
530
/* If we have high-granularity consistent read view and
531
creating transaction of the view is the same as trx_id in
532
the record we see this record only in the case when
533
undo_no of the record is < undo_no in the view. */
535
if (view->type == VIEW_HIGH_GRANULARITY
536
&& ut_dulint_cmp(view->creator_trx_id, trx_id) == 0) {
538
roll_ptr = row_get_rec_roll_ptr(version, index,
540
undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
541
undo_no = trx_undo_rec_get_undo_no(undo_rec);
542
mem_heap_empty(heap);
544
if (ut_dulint_cmp(view->undo_no, undo_no) > 0) {
545
/* The view already sees this version: we can
546
copy it to in_heap and return */
548
buf = mem_heap_alloc(in_heap,
549
rec_offs_size(*offsets));
550
*old_vers = rec_copy(buf, version, *offsets);
551
rec_offs_make_valid(*old_vers, index,
559
err = trx_undo_prev_version_build(rec, mtr, version, index,
563
mem_heap_free(heap2); /* free version */
566
if (err != DB_SUCCESS) {
570
if (prev_version == NULL) {
571
/* It was a freshly inserted version */
578
*offsets = rec_get_offsets(prev_version, index, *offsets,
579
ULINT_UNDEFINED, offset_heap);
581
trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
583
if (read_view_sees_trx_id(view, trx_id)) {
585
/* The view already sees this version: we can copy
586
it to in_heap and return */
588
buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets));
589
*old_vers = rec_copy(buf, prev_version, *offsets);
590
rec_offs_make_valid(*old_vers, index, *offsets);
596
version = prev_version;
600
rw_lock_s_unlock(&(purge_sys->latch));
605
/*****************************************************************//**
606
Constructs the last committed version of a clustered index record,
607
which should be seen by a semi-consistent read.
608
@return DB_SUCCESS or DB_MISSING_HISTORY */
611
row_vers_build_for_semi_consistent_read(
612
/*====================================*/
613
const rec_t* rec, /*!< in: record in a clustered index; the
614
caller must have a latch on the page; this
615
latch locks the top of the stack of versions
617
mtr_t* mtr, /*!< in: mtr holding the latch on rec */
618
dict_index_t* index, /*!< in: the clustered index */
619
ulint** offsets,/*!< in/out: offsets returned by
620
rec_get_offsets(rec, index) */
621
mem_heap_t** offset_heap,/*!< in/out: memory heap from which
622
the offsets are allocated */
623
mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
624
*old_vers is allocated; memory for possible
625
intermediate versions is allocated and freed
626
locally within the function */
627
const rec_t** old_vers)/*!< out: rec, old version, or NULL if the
628
record does not exist in the view, that is,
629
it was freshly inserted afterwards */
631
const rec_t* version;
632
mem_heap_t* heap = NULL;
635
trx_id_t rec_trx_id = ut_dulint_zero;
637
ut_ad(dict_index_is_clust(index));
638
ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
639
|| mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
640
#ifdef UNIV_SYNC_DEBUG
641
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
642
#endif /* UNIV_SYNC_DEBUG */
644
ut_ad(rec_offs_validate(rec, index, *offsets));
646
rw_lock_s_lock(&(purge_sys->latch));
647
/* The S-latch on purge_sys prevents the purge view from
648
changing. Thus, if we have an uncommitted transaction at
649
this point, then purge cannot remove its undo log even if
650
the transaction could commit now. */
658
trx_id_t version_trx_id;
660
version_trx_id = row_get_rec_trx_id(version, index, *offsets);
661
if (rec == version) {
662
rec_trx_id = version_trx_id;
665
mutex_enter(&kernel_mutex);
666
version_trx = trx_get_on_id(version_trx_id);
667
mutex_exit(&kernel_mutex);
670
|| version_trx->conc_state == TRX_NOT_STARTED
671
|| version_trx->conc_state == TRX_COMMITTED_IN_MEMORY) {
673
/* We found a version that belongs to a
674
committed transaction: return it. */
676
if (rec == version) {
682
/* We assume that a rolled-back transaction stays in
683
TRX_ACTIVE state until all the changes have been
684
rolled back and the transaction is removed from
685
the global list of transactions. */
687
if (!ut_dulint_cmp(rec_trx_id, version_trx_id)) {
688
/* The transaction was committed while
689
we searched for earlier versions.
690
Return the current version as a
691
semi-consistent read. */
694
*offsets = rec_get_offsets(version,
700
buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets));
701
*old_vers = rec_copy(buf, version, *offsets);
702
rec_offs_make_valid(*old_vers, index, *offsets);
709
heap = mem_heap_create(1024);
711
err = trx_undo_prev_version_build(rec, mtr, version, index,
715
mem_heap_free(heap2); /* free version */
718
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
722
if (prev_version == NULL) {
723
/* It was a freshly inserted version */
730
version = prev_version;
731
*offsets = rec_get_offsets(version, index, *offsets,
732
ULINT_UNDEFINED, offset_heap);
738
rw_lock_s_unlock(&(purge_sys->latch));