1
/*****************************************************************************
3
Copyright (C) 1997, 2010, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15
St, Fifth Floor, Boston, MA 02110-1301 USA
17
*****************************************************************************/
19
/**************************************************//**
23
Created 2/27/1997 Heikki Tuuri
24
*******************************************************/
29
#include "row0umod.ic"
32
#include "dict0dict.h"
33
#include "dict0boot.h"
37
#include "mach0data.h"
47
/* Considerations on undoing a modify operation.
48
(1) Undoing a delete marking: all index records should be found. Some of
49
them may have delete mark already FALSE, if the delete mark operation was
50
stopped underway, or if the undo operation ended prematurely because of a
52
(2) Undoing an update of a delete unmarked record: the newer version of
53
an updated secondary index entry should be removed if no prior version
54
of the clustered index record requires its existence. Otherwise, it should
56
(3) Undoing an update of a delete marked record. In this kind of update a
57
delete marked clustered index record was delete unmarked and possibly also
58
some of its fields were changed. Now, it is possible that the delete marked
59
version has become obsolete at the time the undo is started. */
61
/*************************************************************************
62
IMPORTANT NOTE: Any operation that generates redo MUST check that there
63
is enough space in the redo log before for that operation. This is
64
done by calling log_free_check(). The reason for checking the
65
availability of the redo log space before the start of the operation is
66
that we MUST not hold any synchonization objects when performing the
68
If you make a change in this module make sure that no codepath is
69
introduced where a call to log_free_check() is bypassed. */
71
/***********************************************************//**
72
Checks if also the previous version of the clustered index record was
73
modified or inserted by the same transaction, and its undo number is such
74
that it should be undone in the same rollback.
75
@return TRUE if also previous modify or insert of this row should be undone */
78
row_undo_mod_undo_also_prev_vers(
79
/*=============================*/
80
undo_node_t* node, /*!< in: row undo node */
81
undo_no_t* undo_no)/*!< out: the undo number */
83
trx_undo_rec_t* undo_rec;
88
if (node->new_trx_id != trx->id) {
94
undo_rec = trx_undo_get_undo_rec_low(node->new_roll_ptr, node->heap);
96
*undo_no = trx_undo_rec_get_undo_no(undo_rec);
98
return(trx->roll_limit <= *undo_no);
101
/***********************************************************//**
102
Undoes a modify in a clustered index record.
103
@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
106
row_undo_mod_clust_low(
107
/*===================*/
108
undo_node_t* node, /*!< in: row undo node */
109
que_thr_t* thr, /*!< in: query thread */
110
mtr_t* mtr, /*!< in: mtr; must be committed before
111
latching any further pages */
112
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
119
#endif /* UNIV_DEBUG */
121
pcur = &(node->pcur);
122
btr_cur = btr_pcur_get_btr_cur(pcur);
126
#endif /* UNIV_DEBUG */
127
btr_pcur_restore_position(mode, pcur, mtr);
131
if (mode == BTR_MODIFY_LEAF) {
133
err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG
134
| BTR_NO_UNDO_LOG_FLAG
136
btr_cur, node->update,
137
node->cmpl_info, thr, mtr);
139
mem_heap_t* heap = NULL;
140
big_rec_t* dummy_big_rec;
142
ut_ad(mode == BTR_MODIFY_TREE);
144
err = btr_cur_pessimistic_update(
146
| BTR_NO_UNDO_LOG_FLAG
148
btr_cur, &heap, &dummy_big_rec, node->update,
149
node->cmpl_info, thr, mtr);
151
ut_a(!dummy_big_rec);
152
if (UNIV_LIKELY_NULL(heap)) {
160
/***********************************************************//**
161
Removes a clustered index record after undo if possible.
162
This is attempted when the record was inserted by updating a
163
delete-marked record and there no longer exist transactions
164
that would see the delete-marked record. In other words, we
165
roll back the insert by purging the record.
166
@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
169
row_undo_mod_remove_clust_low(
170
/*==========================*/
171
undo_node_t* node, /*!< in: row undo node */
172
que_thr_t* thr, /*!< in: query thread */
173
mtr_t* mtr, /*!< in: mtr */
174
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
181
ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
182
pcur = &(node->pcur);
183
btr_cur = btr_pcur_get_btr_cur(pcur);
185
success = btr_pcur_restore_position(mode, pcur, mtr);
192
/* Find out if we can remove the whole clustered index record */
194
if (node->rec_type == TRX_UNDO_UPD_DEL_REC
195
&& !row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) {
197
/* Ok, we can remove */
202
if (mode == BTR_MODIFY_LEAF) {
203
success = btr_cur_optimistic_delete(btr_cur, mtr);
211
ut_ad(mode == BTR_MODIFY_TREE);
213
/* This operation is analogous to purge, we can free also
214
inherited externally stored fields */
216
btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
218
? RB_RECOVERY_PURGE_REC
221
/* The delete operation may fail if we have little
222
file space left: TODO: easiest to crash the database
223
and restart with more file space */
229
/***********************************************************//**
230
Undoes a modify in a clustered index record. Sets also the node state for the
232
@return DB_SUCCESS or error code: we may run out of file space */
237
undo_node_t* node, /*!< in: row undo node */
238
que_thr_t* thr) /*!< in: query thread */
245
undo_no_t new_undo_no;
251
/* Check if also the previous version of the clustered index record
252
should be undone in this same rollback operation */
254
more_vers = row_undo_mod_undo_also_prev_vers(node, &new_undo_no);
256
pcur = &(node->pcur);
260
/* Try optimistic processing of the record, keeping changes within
263
err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_LEAF);
265
if (err != DB_SUCCESS) {
266
btr_pcur_commit_specify_mtr(pcur, &mtr);
268
/* We may have to modify tree structure: do a pessimistic
269
descent down the index tree */
273
err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_TREE);
276
btr_pcur_commit_specify_mtr(pcur, &mtr);
278
if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) {
282
err = row_undo_mod_remove_clust_low(node, thr, &mtr,
284
if (err != DB_SUCCESS) {
285
btr_pcur_commit_specify_mtr(pcur, &mtr);
287
/* We may have to modify tree structure: do a
288
pessimistic descent down the index tree */
292
err = row_undo_mod_remove_clust_low(node, thr, &mtr,
296
btr_pcur_commit_specify_mtr(pcur, &mtr);
299
node->state = UNDO_NODE_FETCH_NEXT;
301
trx_undo_rec_release(node->trx, node->undo_no);
303
if (more_vers && err == DB_SUCCESS) {
305
/* Reserve the undo log record to the prior version after
306
committing &mtr: this is necessary to comply with the latching
307
order, as &mtr may contain the fsp latch which is lower in
308
the latch hierarchy than trx->undo_mutex. */
310
success = trx_undo_rec_reserve(node->trx, new_undo_no);
313
node->state = UNDO_NODE_PREV_VERS;
320
/***********************************************************//**
321
Delete marks or removes a secondary index entry if found.
322
@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
325
row_undo_mod_del_mark_or_remove_sec_low(
326
/*====================================*/
327
undo_node_t* node, /*!< in: row undo node */
328
que_thr_t* thr, /*!< in: query thread */
329
dict_index_t* index, /*!< in: index */
330
dtuple_t* entry, /*!< in: index entry */
331
ulint mode) /*!< in: latch mode BTR_MODIFY_LEAF or
341
enum row_search_result search_result;
346
btr_cur = btr_pcur_get_btr_cur(&pcur);
348
ut_ad(mode == BTR_MODIFY_TREE || mode == BTR_MODIFY_LEAF);
350
search_result = row_search_index_entry(index, entry, mode,
353
switch (UNIV_EXPECT(search_result, ROW_FOUND)) {
355
/* In crash recovery, the secondary index record may
356
be missing if the UPDATE did not have time to insert
357
the secondary index records before the crash. When we
358
are undoing that UPDATE in crash recovery, the record
361
In normal processing, if an update ends in a deadlock
362
before it has inserted all updated secondary index
363
records, then the undo will not find those records. */
370
case ROW_NOT_DELETED_REF:
371
/* These are invalid outcomes, because the mode passed
372
to row_search_index_entry() did not include any of the
373
flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
377
/* We should remove the index record if no prior version of the row,
378
which cannot be purged yet, requires its existence. If some requires,
379
we should delete mark the record. */
381
mtr_start(&mtr_vers);
383
success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur),
387
old_has = row_vers_old_has_index_entry(FALSE,
388
btr_pcur_get_rec(&(node->pcur)),
389
&mtr_vers, index, entry);
391
err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
392
btr_cur, TRUE, thr, &mtr);
393
ut_ad(err == DB_SUCCESS);
395
/* Remove the index record */
397
if (mode == BTR_MODIFY_LEAF) {
398
success = btr_cur_optimistic_delete(btr_cur, &mtr);
405
ut_ad(mode == BTR_MODIFY_TREE);
407
/* No need to distinguish RB_RECOVERY_PURGE here,
408
because we are deleting a secondary index record:
409
the distinction between RB_NORMAL and
410
RB_RECOVERY_PURGE only matters when deleting a
411
record that contains externally stored
413
ut_ad(!dict_index_is_clust(index));
414
btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
417
/* The delete operation may fail if we have little
418
file space left: TODO: easiest to crash the database
419
and restart with more file space */
423
btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
426
btr_pcur_close(&pcur);
432
/***********************************************************//**
433
Delete marks or removes a secondary index entry if found.
434
NOTE that if we updated the fields of a delete-marked secondary index record
435
so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot
436
return to the original values because we do not know them. But this should
437
not cause problems because in row0sel.c, in queries we always retrieve the
438
clustered index record or an earlier version of it, if the secondary index
439
record through which we do the search is delete-marked.
440
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
443
row_undo_mod_del_mark_or_remove_sec(
444
/*================================*/
445
undo_node_t* node, /*!< in: row undo node */
446
que_thr_t* thr, /*!< in: query thread */
447
dict_index_t* index, /*!< in: index */
448
dtuple_t* entry) /*!< in: index entry */
452
err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
453
entry, BTR_MODIFY_LEAF);
454
if (err == DB_SUCCESS) {
459
err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
460
entry, BTR_MODIFY_TREE);
464
/***********************************************************//**
465
Delete unmarks a secondary index entry which must be found. It might not be
466
delete-marked at the moment, but it does not harm to unmark it anyway. We also
467
need to update the fields of the secondary index record if we updated its
468
fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'.
469
@return DB_FAIL or DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
472
row_undo_mod_del_unmark_sec_and_undo_update(
473
/*========================================*/
474
ulint mode, /*!< in: search mode: BTR_MODIFY_LEAF or
476
que_thr_t* thr, /*!< in: query thread */
477
dict_index_t* index, /*!< in: index */
478
const dtuple_t* entry) /*!< in: index entry */
484
ulint err = DB_SUCCESS;
485
big_rec_t* dummy_big_rec;
487
trx_t* trx = thr_get_trx(thr);
488
enum row_search_result search_result;
490
/* Ignore indexes that are being created. */
491
if (UNIV_UNLIKELY(*index->name == TEMP_INDEX_PREFIX)) {
499
ut_ad(mode == BTR_MODIFY_TREE || mode == BTR_MODIFY_LEAF);
501
search_result = row_search_index_entry(index, entry, mode,
504
switch (search_result) {
506
case ROW_NOT_DELETED_REF:
507
/* These are invalid outcomes, because the mode passed
508
to row_search_index_entry() did not include any of the
509
flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
512
fputs("InnoDB: error in sec index entry del undo in\n"
514
dict_index_name_print(stderr, trx, index);
516
"InnoDB: tuple ", stderr);
517
dtuple_print(stderr, entry);
519
"InnoDB: record ", stderr);
520
rec_print(stderr, btr_pcur_get_rec(&pcur), index);
522
trx_print(stderr, trx, 0);
524
"InnoDB: Submit a detailed bug report"
525
" to http://bugs.mysql.com\n", stderr);
528
btr_cur = btr_pcur_get_btr_cur(&pcur);
529
err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
530
btr_cur, FALSE, thr, &mtr);
531
ut_a(err == DB_SUCCESS);
532
heap = mem_heap_create(100);
534
update = row_upd_build_sec_rec_difference_binary(
535
index, entry, btr_cur_get_rec(btr_cur), trx, heap);
536
if (upd_get_n_fields(update) == 0) {
540
} else if (mode == BTR_MODIFY_LEAF) {
541
/* Try an optimistic updating of the record, keeping
542
changes within the page */
544
err = btr_cur_optimistic_update(
545
BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
546
btr_cur, update, 0, thr, &mtr);
550
case DB_ZIP_OVERFLOW:
554
ut_a(mode == BTR_MODIFY_TREE);
555
err = btr_cur_pessimistic_update(
556
BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
557
btr_cur, &heap, &dummy_big_rec,
558
update, 0, thr, &mtr);
559
ut_a(!dummy_big_rec);
565
btr_pcur_close(&pcur);
571
/***********************************************************//**
572
Undoes a modify in secondary indexes when undo record type is UPD_DEL.
573
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
576
row_undo_mod_upd_del_sec(
577
/*=====================*/
578
undo_node_t* node, /*!< in: row undo node */
579
que_thr_t* thr) /*!< in: query thread */
584
ulint err = DB_SUCCESS;
586
ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
587
heap = mem_heap_create(1024);
589
while (node->index != NULL) {
592
entry = row_build_index_entry(node->row, node->ext,
594
if (UNIV_UNLIKELY(!entry)) {
595
/* The database must have crashed after
596
inserting a clustered index record but before
597
writing all the externally stored columns of
598
that record. Because secondary index entries
599
are inserted after the clustered index record,
600
we may assume that the secondary index record
601
does not exist. However, this situation may
602
only occur during the rollback of incomplete
604
ut_a(thr_is_recv(thr));
606
err = row_undo_mod_del_mark_or_remove_sec(
607
node, thr, index, entry);
609
if (err != DB_SUCCESS) {
615
mem_heap_empty(heap);
617
node->index = dict_table_get_next_index(node->index);
625
/***********************************************************//**
626
Undoes a modify in secondary indexes when undo record type is DEL_MARK.
627
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
630
row_undo_mod_del_mark_sec(
631
/*======================*/
632
undo_node_t* node, /*!< in: row undo node */
633
que_thr_t* thr) /*!< in: query thread */
640
heap = mem_heap_create(1024);
642
while (node->index != NULL) {
645
entry = row_build_index_entry(node->row, node->ext,
648
err = row_undo_mod_del_unmark_sec_and_undo_update(
649
BTR_MODIFY_LEAF, thr, index, entry);
650
if (err == DB_FAIL) {
651
err = row_undo_mod_del_unmark_sec_and_undo_update(
652
BTR_MODIFY_TREE, thr, index, entry);
655
if (err != DB_SUCCESS) {
662
node->index = dict_table_get_next_index(node->index);
670
/***********************************************************//**
671
Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
672
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
675
row_undo_mod_upd_exist_sec(
676
/*=======================*/
677
undo_node_t* node, /*!< in: row undo node */
678
que_thr_t* thr) /*!< in: query thread */
685
if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
686
/* No change in secondary indexes */
691
heap = mem_heap_create(1024);
693
while (node->index != NULL) {
696
if (row_upd_changes_ord_field_binary(node->row, node->index,
699
/* Build the newest version of the index entry */
700
entry = row_build_index_entry(node->row, node->ext,
702
if (UNIV_UNLIKELY(!entry)) {
703
/* The server must have crashed in
704
row_upd_clust_rec_by_insert(), in
705
row_ins_index_entry_low() before
706
btr_store_big_rec_extern_fields()
707
has written the externally stored columns
708
(BLOBs) of the new clustered index entry. */
710
/* The table must be in DYNAMIC or COMPRESSED
711
format. REDUNDANT and COMPACT formats
712
store a local 768-byte prefix of each
713
externally stored column. */
714
ut_a(dict_table_get_format(index->table)
715
>= DICT_TF_FORMAT_ZIP);
717
/* This is only legitimate when
718
rolling back an incomplete transaction
719
after crash recovery. */
720
ut_a(thr_get_trx(thr)->is_recovered);
722
/* The server must have crashed before
723
completing the insert of the new
724
clustered index entry and before
725
inserting to the secondary indexes.
726
Because node->row was not yet written
727
to this index, we can ignore it. But
728
we must restore node->undo_row. */
730
/* NOTE that if we updated the fields of a
731
delete-marked secondary index record so that
732
alphabetically they stayed the same, e.g.,
733
'abc' -> 'aBc', we cannot return to the
734
original values because we do not know them.
735
But this should not cause problems because
736
in row0sel.c, in queries we always retrieve
737
the clustered index record or an earlier
738
version of it, if the secondary index record
739
through which we do the search is
742
err = row_undo_mod_del_mark_or_remove_sec(
743
node, thr, index, entry);
744
if (err != DB_SUCCESS) {
750
mem_heap_empty(heap);
753
/* We may have to update the delete mark in the
754
secondary index record of the previous version of
755
the row. We also need to update the fields of
756
the secondary index record if we updated its fields
757
but alphabetically they stayed the same, e.g.,
759
entry = row_build_index_entry(node->undo_row,
764
err = row_undo_mod_del_unmark_sec_and_undo_update(
765
BTR_MODIFY_LEAF, thr, index, entry);
766
if (err == DB_FAIL) {
767
err = row_undo_mod_del_unmark_sec_and_undo_update(
768
BTR_MODIFY_TREE, thr, index, entry);
771
if (err != DB_SUCCESS) {
778
node->index = dict_table_get_next_index(node->index);
786
/***********************************************************//**
787
Parses the row reference and other info in a modify undo log record. */
790
row_undo_mod_parse_undo_rec(
791
/*========================*/
792
undo_node_t* node, /*!< in: row undo node */
793
que_thr_t* thr) /*!< in: query thread */
795
dict_index_t* clust_index;
808
trx = thr_get_trx(thr);
809
ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
810
&dummy_extern, &undo_no, &table_id);
811
node->rec_type = type;
813
node->table = dict_table_get_on_id(table_id, trx);
815
/* TODO: other fixes associated with DROP TABLE + rollback in the
816
same table by another user */
818
if (node->table == NULL) {
819
/* Table was dropped */
823
if (node->table->ibd_file_missing) {
824
/* We skip undo operations to missing .ibd files */
830
clust_index = dict_table_get_first_index(node->table);
832
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
835
ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
838
trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
839
roll_ptr, info_bits, trx,
840
node->heap, &(node->update));
841
node->new_roll_ptr = roll_ptr;
842
node->new_trx_id = trx_id;
843
node->cmpl_info = cmpl_info;
846
/***********************************************************//**
847
Undoes a modify operation on a row of a table.
848
@return DB_SUCCESS or error code */
853
undo_node_t* node, /*!< in: row undo node */
854
que_thr_t* thr) /*!< in: query thread */
859
ut_ad(node->state == UNDO_NODE_MODIFY);
861
row_undo_mod_parse_undo_rec(node, thr);
863
if (!node->table || !row_undo_search_clust_to_pcur(node)) {
864
/* It is already undone, or will be undone by another query
865
thread, or table was dropped */
867
trx_undo_rec_release(node->trx, node->undo_no);
868
node->state = UNDO_NODE_FETCH_NEXT;
873
node->index = dict_table_get_next_index(
874
dict_table_get_first_index(node->table));
876
if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
878
err = row_undo_mod_upd_exist_sec(node, thr);
880
} else if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
882
err = row_undo_mod_del_mark_sec(node, thr);
884
ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
885
err = row_undo_mod_upd_del_sec(node, thr);
888
if (err != DB_SUCCESS) {
893
err = row_undo_mod_clust(node, thr);