1
/*****************************************************************************
3
Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15
St, Fifth Floor, Boston, MA 02110-1301 USA
17
*****************************************************************************/
19
/**************************************************//**
21
Purge obsolete records
23
Created 3/14/1997 Heikki Tuuri
24
*******************************************************/
26
#include "row0purge.h"
29
#include "row0purge.ic"
33
#include "mach0data.h"
38
#include "trx0purge.h"
44
#include "row0mysql.h"
47
/*************************************************************************
48
IMPORTANT NOTE: Any operation that generates redo MUST check that there
49
is enough space in the redo log before for that operation. This is
50
done by calling log_free_check(). The reason for checking the
51
availability of the redo log space before the start of the operation is
52
that we MUST not hold any synchonization objects when performing the
54
If you make a change in this module make sure that no codepath is
55
introduced where a call to log_free_check() is bypassed. */
57
/*************************************************************************
58
IMPORTANT NOTE: Any operation that generates redo MUST check that there
59
is enough space in the redo log before for that operation. This is
60
done by calling log_free_check(). The reason for checking the
61
availability of the redo log space before the start of the operation is
62
that we MUST not hold any synchonization objects when performing the
64
If you make a change in this module make sure that no codepath is
65
introduced where a call to log_free_check() is bypassed. */
67
/********************************************************************//**
68
Creates a purge node to a query graph.
69
@return own: purge node */
72
row_purge_node_create(
73
/*==================*/
74
que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
75
mem_heap_t* heap) /*!< in: memory heap where created */
79
ut_ad(parent && heap);
81
node = mem_heap_alloc(heap, sizeof(purge_node_t));
83
node->common.type = QUE_NODE_PURGE;
84
node->common.parent = parent;
86
node->heap = mem_heap_create(256);
91
/***********************************************************//**
92
Repositions the pcur in the purge node on the clustered index record,
94
@return TRUE if the record was found */
97
row_purge_reposition_pcur(
98
/*======================*/
99
ulint mode, /*!< in: latching mode */
100
purge_node_t* node, /*!< in: row purge node */
101
mtr_t* mtr) /*!< in: mtr */
105
if (node->found_clust) {
106
found = btr_pcur_restore_position(mode, &(node->pcur), mtr);
111
found = row_search_on_row_ref(&(node->pcur), mode, node->table,
113
node->found_clust = found;
116
btr_pcur_store_position(&(node->pcur), mtr);
122
/***********************************************************//**
123
Removes a delete marked clustered index record if possible.
124
@return TRUE if success, or if not found, or if modified after the
128
row_purge_remove_clust_if_poss_low(
129
/*===============================*/
130
purge_node_t* node, /*!< in: row purge node */
131
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
140
mem_heap_t* heap = NULL;
141
ulint offsets_[REC_OFFS_NORMAL_SIZE];
142
rec_offs_init(offsets_);
144
index = dict_table_get_first_index(node->table);
146
pcur = &(node->pcur);
147
btr_cur = btr_pcur_get_btr_cur(pcur);
152
success = row_purge_reposition_pcur(mode, node, &mtr);
155
/* The record is already removed */
157
btr_pcur_commit_specify_mtr(pcur, &mtr);
162
rec = btr_pcur_get_rec(pcur);
164
if (node->roll_ptr != row_get_rec_roll_ptr(
165
rec, index, rec_get_offsets(rec, index, offsets_,
166
ULINT_UNDEFINED, &heap))) {
167
if (UNIV_LIKELY_NULL(heap)) {
170
/* Someone else has modified the record later: do not remove */
171
btr_pcur_commit_specify_mtr(pcur, &mtr);
176
if (UNIV_LIKELY_NULL(heap)) {
180
if (mode == BTR_MODIFY_LEAF) {
181
success = btr_cur_optimistic_delete(btr_cur, &mtr);
183
ut_ad(mode == BTR_MODIFY_TREE);
184
btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
187
if (err == DB_SUCCESS) {
189
} else if (err == DB_OUT_OF_FILE_SPACE) {
196
btr_pcur_commit_specify_mtr(pcur, &mtr);
201
/***********************************************************//**
202
Removes a clustered index record if it has not been modified after the delete
206
row_purge_remove_clust_if_poss(
207
/*===========================*/
208
purge_node_t* node) /*!< in: row purge node */
213
/* fputs("Purge: Removing clustered record\n", stderr); */
215
success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF);
221
success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_TREE);
222
/* The delete operation may fail if we have little
223
file space left: TODO: easiest to crash the database
224
and restart with more file space */
226
if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
229
os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
237
/***********************************************************//**
238
Determines if it is possible to remove a secondary index entry.
239
Removal is possible if the secondary index entry does not refer to any
240
not delete marked version of a clustered index record where DB_TRX_ID
241
is newer than the purge view.
243
NOTE: This function should only be called by the purge thread, only
244
while holding a latch on the leaf page of the secondary index entry
245
(or keeping the buffer pool watch on the page). It is possible that
246
this function first returns TRUE and then FALSE, if a user transaction
247
inserts a record that the secondary index entry would refer to.
248
However, in that case, the user transaction would also re-insert the
249
secondary index entry after purge has removed it and released the leaf
251
@return TRUE if the secondary index record can be purged */
256
purge_node_t* node, /*!< in/out: row purge node */
257
dict_index_t* index, /*!< in: secondary index */
258
const dtuple_t* entry) /*!< in: secondary index entry */
263
ut_ad(!dict_index_is_clust(index));
266
can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr)
267
|| !row_vers_old_has_index_entry(TRUE,
268
btr_pcur_get_rec(&node->pcur),
271
btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
276
/***************************************************************
277
Removes a secondary index entry if possible, by modifying the
278
index tree. Does not try to buffer the delete.
279
@return TRUE if success or if not found */
282
row_purge_remove_sec_if_poss_tree(
283
/*==============================*/
284
purge_node_t* node, /*!< in: row purge node */
285
dict_index_t* index, /*!< in: index */
286
const dtuple_t* entry) /*!< in: index entry */
290
ibool success = TRUE;
293
enum row_search_result search_result;
298
search_result = row_search_index_entry(index, entry, BTR_MODIFY_TREE,
301
switch (search_result) {
303
/* Not found. This is a legitimate condition. In a
304
rollback, InnoDB will remove secondary recs that would
305
be purged anyway. Then the actual purge will not find
306
the secondary index record. Also, the purge itself is
307
eager: if it comes to consider a secondary index
308
record, and notices it does not need to exist in the
309
index, it will remove it. Then if/when the purge
310
comes to consider the secondary index record a second
311
time, it will not exist any more in the index. */
313
/* fputs("PURGE:........sec entry not found\n", stderr); */
314
/* dtuple_print(stderr, entry); */
319
case ROW_NOT_DELETED_REF:
320
/* These are invalid outcomes, because the mode passed
321
to row_search_index_entry() did not include any of the
322
flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
326
btr_cur = btr_pcur_get_btr_cur(&pcur);
328
/* We should remove the index record if no later version of the row,
329
which cannot be purged yet, requires its existence. If some requires,
330
we should do nothing. */
332
if (row_purge_poss_sec(node, index, entry)) {
333
/* Remove the index record, which should have been
334
marked for deletion. */
335
ut_ad(REC_INFO_DELETED_FLAG
336
& rec_get_info_bits(btr_cur_get_rec(btr_cur),
337
dict_table_is_comp(index->table)));
339
btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
341
switch (UNIV_EXPECT(err, DB_SUCCESS)) {
344
case DB_OUT_OF_FILE_SPACE:
353
btr_pcur_close(&pcur);
359
/***************************************************************
360
Removes a secondary index entry without modifying the index tree,
362
@return TRUE if success or if not found */
365
row_purge_remove_sec_if_poss_leaf(
366
/*==============================*/
367
purge_node_t* node, /*!< in: row purge node */
368
dict_index_t* index, /*!< in: index */
369
const dtuple_t* entry) /*!< in: index entry */
373
enum row_search_result search_result;
379
/* Set the purge node for the call to row_purge_poss_sec(). */
380
pcur.btr_cur.purge_node = node;
381
/* Set the query thread, so that ibuf_insert_low() will be
382
able to invoke thd_get_trx(). */
383
pcur.btr_cur.thr = que_node_get_parent(node);
385
search_result = row_search_index_entry(
386
index, entry, BTR_MODIFY_LEAF | BTR_DELETE, &pcur, &mtr);
388
switch (search_result) {
391
/* Before attempting to purge a record, check
392
if it is safe to do so. */
393
if (row_purge_poss_sec(node, index, entry)) {
394
btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
396
/* Only delete-marked records should be purged. */
397
ut_ad(REC_INFO_DELETED_FLAG
399
btr_cur_get_rec(btr_cur),
400
dict_table_is_comp(index->table)));
402
if (!btr_cur_optimistic_delete(btr_cur, &mtr)) {
404
/* The index entry could not be deleted. */
409
/* fall through (the index entry is still needed,
410
or the deletion succeeded) */
411
case ROW_NOT_DELETED_REF:
412
/* The index entry is still needed. */
414
/* The deletion was buffered. */
416
/* The index entry does not exist, nothing to do. */
419
btr_pcur_close(&pcur);
428
/***********************************************************//**
429
Removes a secondary index entry if possible. */
432
row_purge_remove_sec_if_poss(
433
/*=========================*/
434
purge_node_t* node, /*!< in: row purge node */
435
dict_index_t* index, /*!< in: index */
436
dtuple_t* entry) /*!< in: index entry */
441
/* fputs("Purge: Removing secondary record\n", stderr); */
443
if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) {
448
success = row_purge_remove_sec_if_poss_tree(node, index, entry);
449
/* The delete operation may fail if we have little
450
file space left: TODO: easiest to crash the database
451
and restart with more file space */
453
if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
457
os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
465
/***********************************************************//**
466
Purges a delete marking of a record. */
471
purge_node_t* node) /*!< in: row purge node */
479
heap = mem_heap_create(1024);
481
while (node->index != NULL) {
484
/* Build the index entry */
485
entry = row_build_index_entry(node->row, NULL, index, heap);
487
row_purge_remove_sec_if_poss(node, index, entry);
489
node->index = dict_table_get_next_index(node->index);
494
row_purge_remove_clust_if_poss(node);
497
/***********************************************************//**
498
Purges an update of an existing record. Also purges an update of a delete
499
marked record if that record contained an externally stored field. */
502
row_purge_upd_exist_or_extern(
503
/*==========================*/
504
purge_node_t* node) /*!< in: row purge node */
518
if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
520
goto skip_secondaries;
523
heap = mem_heap_create(1024);
525
while (node->index != NULL) {
528
if (row_upd_changes_ord_field_binary(NULL, node->index,
530
/* Build the older version of the index entry */
531
entry = row_build_index_entry(node->row, NULL,
534
row_purge_remove_sec_if_poss(node, index, entry);
537
node->index = dict_table_get_next_index(node->index);
543
/* Free possible externally stored fields */
544
for (i = 0; i < upd_get_n_fields(node->update); i++) {
546
const upd_field_t* ufield
547
= upd_get_nth_field(node->update, i);
549
if (dfield_is_ext(&ufield->new_val)) {
551
ulint internal_offset;
554
/* We use the fact that new_val points to
555
node->undo_rec and get thus the offset of
556
dfield data inside the undo record. Then we
557
can calculate from node->roll_ptr the file
558
address of the new_val data */
562
dfield_get_data(&ufield->new_val))
565
ut_a(internal_offset < UNIV_PAGE_SIZE);
567
trx_undo_decode_roll_ptr(node->roll_ptr,
568
&is_insert, &rseg_id,
572
/* We have to acquire an X-latch to the clustered
575
index = dict_table_get_first_index(node->table);
577
mtr_x_lock(dict_index_get_lock(index), &mtr);
579
/* NOTE: we must also acquire an X-latch to the
580
root page of the tree. We will need it when we
581
free pages from the tree. If the tree is of height 1,
582
the tree X-latch does NOT protect the root page,
583
because it is also a leaf page. Since we will have a
584
latch on an undo log page, we would break the
585
latching order if we would only later latch the
586
root page of such a tree! */
588
btr_root_get(index, &mtr);
590
/* We assume in purge of externally stored fields
591
that the space id of the undo log record is 0! */
593
block = buf_page_get(0, 0, page_no, RW_X_LATCH, &mtr);
594
buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
596
data_field = buf_block_get_frame(block)
597
+ offset + internal_offset;
599
ut_a(dfield_get_len(&ufield->new_val)
600
>= BTR_EXTERN_FIELD_REF_SIZE);
601
btr_free_externally_stored_field(
603
data_field + dfield_get_len(&ufield->new_val)
604
- BTR_EXTERN_FIELD_REF_SIZE,
605
NULL, NULL, NULL, 0, RB_NONE, &mtr);
611
/***********************************************************//**
612
Parses the row reference and other info in a modify undo log record.
613
@return TRUE if purge operation required: NOTE that then the CALLER
614
must unfreeze data dictionary! */
617
row_purge_parse_undo_rec(
618
/*=====================*/
619
purge_node_t* node, /*!< in: row undo node */
620
ibool* updated_extern,
621
/*!< out: TRUE if an externally stored field
623
que_thr_t* thr) /*!< in: query thread */
625
dict_index_t* clust_index;
638
trx = thr_get_trx(thr);
640
ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
641
updated_extern, &undo_no, &table_id);
642
node->rec_type = type;
644
if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) {
649
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
653
if (type == TRX_UNDO_UPD_EXIST_REC
654
&& cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) {
656
/* Purge requires no changes to indexes: we may return */
661
/* Prevent DROP TABLE etc. from running when we are doing the purge
664
row_mysql_freeze_data_dictionary(trx);
666
mutex_enter(&(dict_sys->mutex));
668
node->table = dict_table_get_on_id_low(table_id);
670
mutex_exit(&(dict_sys->mutex));
672
if (node->table == NULL) {
673
/* The table has been dropped: no need to do purge */
675
row_mysql_unfreeze_data_dictionary(trx);
679
if (node->table->ibd_file_missing) {
680
/* We skip purge of missing .ibd files */
687
clust_index = dict_table_get_first_index(node->table);
689
if (clust_index == NULL) {
690
/* The table was corrupt in the data dictionary */
695
ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
698
ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
699
roll_ptr, info_bits, trx,
700
node->heap, &(node->update));
702
/* Read to the partial row the fields that occur in indexes */
704
if (!(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
705
ptr = trx_undo_rec_get_partial_row(
706
ptr, clust_index, &node->row,
707
type == TRX_UNDO_UPD_DEL_REC,
714
/***********************************************************//**
715
Fetches an undo log record and does the purge for the recorded operation.
716
If none left, or the current purge completed, returns the control to the
717
parent node, which is always a query thread node.
718
@return DB_SUCCESS if operation successfully completed, else error code */
723
purge_node_t* node, /*!< in: row purge node */
724
que_thr_t* thr) /*!< in: query thread */
728
ibool updated_extern;
733
trx = thr_get_trx(thr);
735
node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr,
736
&(node->reservation),
738
if (!node->undo_rec) {
739
/* Purge completed for this query thread */
741
thr->run_node = que_node_get_parent(node);
746
node->roll_ptr = roll_ptr;
748
if (node->undo_rec == &trx_purge_dummy_rec) {
749
purge_needed = FALSE;
751
purge_needed = row_purge_parse_undo_rec(node, &updated_extern,
753
/* If purge_needed == TRUE, we must also remember to unfreeze
758
node->found_clust = FALSE;
760
node->index = dict_table_get_next_index(
761
dict_table_get_first_index(node->table));
763
if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
764
row_purge_del_mark(node);
766
} else if (updated_extern
767
|| node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
769
row_purge_upd_exist_or_extern(node);
772
if (node->found_clust) {
773
btr_pcur_close(&(node->pcur));
776
row_mysql_unfreeze_data_dictionary(trx);
779
/* Do some cleanup */
780
trx_purge_rec_release(node->reservation);
781
mem_heap_empty(node->heap);
783
thr->run_node = node;
788
/***********************************************************//**
789
Does the purge operation for a single undo log record. This is a high-level
790
function used in an SQL execution graph.
791
@return query thread to run next or NULL */
796
que_thr_t* thr) /*!< in: query thread */
801
#endif /* UNIV_DEBUG */
805
node = thr->run_node;
807
ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
811
#endif /* UNIV_DEBUG */
812
row_purge(node, thr);
815
ut_a(err == DB_SUCCESS);
816
#endif /* UNIV_DEBUG */