~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/******************************************************
2
Purge obsolete records
3
4
(c) 1997 Innobase Oy
5
6
Created 3/14/1997 Heikki Tuuri
7
*******************************************************/
8
9
#include "row0purge.h"
10
11
#ifdef UNIV_NONINL
12
#include "row0purge.ic"
13
#endif
14
15
#include "fsp0fsp.h"
16
#include "mach0data.h"
17
#include "trx0rseg.h"
18
#include "trx0trx.h"
19
#include "trx0roll.h"
20
#include "trx0undo.h"
21
#include "trx0purge.h"
22
#include "trx0rec.h"
23
#include "que0que.h"
24
#include "row0row.h"
25
#include "row0upd.h"
26
#include "row0vers.h"
27
#include "row0mysql.h"
28
#include "log0log.h"
29
30
/************************************************************************
31
Creates a purge node to a query graph. */
32
33
purge_node_t*
34
row_purge_node_create(
35
/*==================*/
36
				/* out, own: purge node */
37
	que_thr_t*	parent,	/* in: parent node, i.e., a thr node */
38
	mem_heap_t*	heap)	/* in: memory heap where created */
39
{
40
	purge_node_t*	node;
41
42
	ut_ad(parent && heap);
43
44
	node = mem_heap_alloc(heap, sizeof(purge_node_t));
45
46
	node->common.type = QUE_NODE_PURGE;
47
	node->common.parent = parent;
48
49
	node->heap = mem_heap_create(256);
50
51
	return(node);
52
}
53
54
/***************************************************************
55
Repositions the pcur in the purge node on the clustered index record,
56
if found. */
57
static
58
ibool
59
row_purge_reposition_pcur(
60
/*======================*/
61
				/* out: TRUE if the record was found */
62
	ulint		mode,	/* in: latching mode */
63
	purge_node_t*	node,	/* in: row purge node */
64
	mtr_t*		mtr)	/* in: mtr */
65
{
66
	ibool	found;
67
68
	if (node->found_clust) {
69
		found = btr_pcur_restore_position(mode, &(node->pcur), mtr);
70
71
		return(found);
72
	}
73
74
	found = row_search_on_row_ref(&(node->pcur), mode, node->table,
75
				      node->ref, mtr);
76
	node->found_clust = found;
77
78
	if (found) {
79
		btr_pcur_store_position(&(node->pcur), mtr);
80
	}
81
82
	return(found);
83
}
84
85
/***************************************************************
86
Removes a delete marked clustered index record if possible. */
87
static
88
ibool
89
row_purge_remove_clust_if_poss_low(
90
/*===============================*/
91
				/* out: TRUE if success, or if not found, or
92
				if modified after the delete marking */
93
	purge_node_t*	node,	/* in: row purge node */
94
	ulint		mode)	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
95
{
96
	dict_index_t*	index;
97
	btr_pcur_t*	pcur;
98
	btr_cur_t*	btr_cur;
99
	ibool		success;
100
	ulint		err;
101
	mtr_t		mtr;
102
	rec_t*		rec;
103
	mem_heap_t*	heap		= NULL;
104
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
105
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
106
107
	index = dict_table_get_first_index(node->table);
108
109
	pcur = &(node->pcur);
110
	btr_cur = btr_pcur_get_btr_cur(pcur);
111
112
	mtr_start(&mtr);
113
114
	success = row_purge_reposition_pcur(mode, node, &mtr);
115
116
	if (!success) {
117
		/* The record is already removed */
118
119
		btr_pcur_commit_specify_mtr(pcur, &mtr);
120
121
		return(TRUE);
122
	}
123
124
	rec = btr_pcur_get_rec(pcur);
125
126
	if (0 != ut_dulint_cmp(node->roll_ptr, row_get_rec_roll_ptr(
127
				       rec, index, rec_get_offsets(
128
					       rec, index, offsets_,
129
					       ULINT_UNDEFINED, &heap)))) {
130
		if (UNIV_LIKELY_NULL(heap)) {
131
			mem_heap_free(heap);
132
		}
133
		/* Someone else has modified the record later: do not remove */
134
		btr_pcur_commit_specify_mtr(pcur, &mtr);
135
136
		return(TRUE);
137
	}
138
139
	if (UNIV_LIKELY_NULL(heap)) {
140
		mem_heap_free(heap);
141
	}
142
143
	if (mode == BTR_MODIFY_LEAF) {
144
		success = btr_cur_optimistic_delete(btr_cur, &mtr);
145
	} else {
146
		ut_ad(mode == BTR_MODIFY_TREE);
147
		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, &mtr);
148
149
		if (err == DB_SUCCESS) {
150
			success = TRUE;
151
		} else if (err == DB_OUT_OF_FILE_SPACE) {
152
			success = FALSE;
153
		} else {
154
			ut_error;
155
		}
156
	}
157
158
	btr_pcur_commit_specify_mtr(pcur, &mtr);
159
160
	return(success);
161
}
162
163
/***************************************************************
164
Removes a clustered index record if it has not been modified after the delete
165
marking. */
166
static
167
void
168
row_purge_remove_clust_if_poss(
169
/*===========================*/
170
	purge_node_t*	node)	/* in: row purge node */
171
{
172
	ibool	success;
173
	ulint	n_tries	= 0;
174
175
	/*	fputs("Purge: Removing clustered record\n", stderr); */
176
177
	success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF);
178
	if (success) {
179
180
		return;
181
	}
182
retry:
183
	success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_TREE);
184
	/* The delete operation may fail if we have little
185
	file space left: TODO: easiest to crash the database
186
	and restart with more file space */
187
188
	if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
189
		n_tries++;
190
191
		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
192
193
		goto retry;
194
	}
195
196
	ut_a(success);
197
}
198
199
/***************************************************************
200
Removes a secondary index entry if possible. */
201
static
202
ibool
203
row_purge_remove_sec_if_poss_low(
204
/*=============================*/
205
				/* out: TRUE if success or if not found */
206
	purge_node_t*	node,	/* in: row purge node */
207
	dict_index_t*	index,	/* in: index */
208
	dtuple_t*	entry,	/* in: index entry */
209
	ulint		mode)	/* in: latch mode BTR_MODIFY_LEAF or
210
				BTR_MODIFY_TREE */
211
{
212
	btr_pcur_t	pcur;
213
	btr_cur_t*	btr_cur;
214
	ibool		success;
215
	ibool		old_has = 0; /* remove warning */
216
	ibool		found;
217
	ulint		err;
218
	mtr_t		mtr;
219
	mtr_t*		mtr_vers;
220
221
	log_free_check();
222
	mtr_start(&mtr);
223
224
	found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
225
226
	if (!found) {
227
		/* Not found */
228
229
		/* fputs("PURGE:........sec entry not found\n", stderr); */
230
		/* dtuple_print(entry); */
231
232
		btr_pcur_close(&pcur);
233
		mtr_commit(&mtr);
234
235
		return(TRUE);
236
	}
237
238
	btr_cur = btr_pcur_get_btr_cur(&pcur);
239
240
	/* We should remove the index record if no later version of the row,
241
	which cannot be purged yet, requires its existence. If some requires,
242
	we should do nothing. */
243
244
	mtr_vers = mem_alloc(sizeof(mtr_t));
245
246
	mtr_start(mtr_vers);
247
248
	success = row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, mtr_vers);
249
250
	if (success) {
251
		old_has = row_vers_old_has_index_entry(
252
			TRUE, btr_pcur_get_rec(&(node->pcur)),
253
			mtr_vers, index, entry);
254
	}
255
256
	btr_pcur_commit_specify_mtr(&(node->pcur), mtr_vers);
257
258
	mem_free(mtr_vers);
259
260
	if (!success || !old_has) {
261
		/* Remove the index record */
262
263
		if (mode == BTR_MODIFY_LEAF) {
264
			success = btr_cur_optimistic_delete(btr_cur, &mtr);
265
		} else {
266
			ut_ad(mode == BTR_MODIFY_TREE);
267
			btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
268
						   FALSE, &mtr);
269
			if (err == DB_SUCCESS) {
270
				success = TRUE;
271
			} else if (err == DB_OUT_OF_FILE_SPACE) {
272
				success = FALSE;
273
			} else {
274
				ut_error;
275
			}
276
		}
277
	}
278
279
	btr_pcur_close(&pcur);
280
	mtr_commit(&mtr);
281
282
	return(success);
283
}
284
285
/***************************************************************
286
Removes a secondary index entry if possible. */
287
UNIV_INLINE
288
void
289
row_purge_remove_sec_if_poss(
290
/*=========================*/
291
	purge_node_t*	node,	/* in: row purge node */
292
	dict_index_t*	index,	/* in: index */
293
	dtuple_t*	entry)	/* in: index entry */
294
{
295
	ibool	success;
296
	ulint	n_tries		= 0;
297
298
	/*	fputs("Purge: Removing secondary record\n", stderr); */
299
300
	success = row_purge_remove_sec_if_poss_low(node, index, entry,
301
						   BTR_MODIFY_LEAF);
302
	if (success) {
303
304
		return;
305
	}
306
retry:
307
	success = row_purge_remove_sec_if_poss_low(node, index, entry,
308
						   BTR_MODIFY_TREE);
309
	/* The delete operation may fail if we have little
310
	file space left: TODO: easiest to crash the database
311
	and restart with more file space */
312
313
	if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
314
315
		n_tries++;
316
317
		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
318
319
		goto retry;
320
	}
321
322
	ut_a(success);
323
}
324
325
/***************************************************************
326
Purges a delete marking of a record. */
327
static
328
void
329
row_purge_del_mark(
330
/*===============*/
331
	purge_node_t*	node)	/* in: row purge node */
332
{
333
	mem_heap_t*	heap;
334
	dtuple_t*	entry;
335
	dict_index_t*	index;
336
337
	ut_ad(node);
338
339
	heap = mem_heap_create(1024);
340
341
	while (node->index != NULL) {
342
		index = node->index;
343
344
		/* Build the index entry */
345
		entry = row_build_index_entry(node->row, index, heap);
346
347
		row_purge_remove_sec_if_poss(node, index, entry);
348
349
		node->index = dict_table_get_next_index(node->index);
350
	}
351
352
	mem_heap_free(heap);
353
354
	row_purge_remove_clust_if_poss(node);
355
}
356
357
/***************************************************************
358
Purges an update of an existing record. Also purges an update of a delete
359
marked record if that record contained an externally stored field. */
360
static
361
void
362
row_purge_upd_exist_or_extern(
363
/*==========================*/
364
	purge_node_t*	node)	/* in: row purge node */
365
{
366
	mem_heap_t*	heap;
367
	dtuple_t*	entry;
368
	dict_index_t*	index;
369
	upd_field_t*	ufield;
370
	ibool		is_insert;
371
	ulint		rseg_id;
372
	ulint		page_no;
373
	ulint		offset;
374
	ulint		internal_offset;
375
	byte*		data_field;
376
	ulint		data_field_len;
377
	ulint		i;
378
	mtr_t		mtr;
379
380
	ut_ad(node);
381
382
	if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
383
384
		goto skip_secondaries;
385
	}
386
387
	heap = mem_heap_create(1024);
388
389
	while (node->index != NULL) {
390
		index = node->index;
391
392
		if (row_upd_changes_ord_field_binary(NULL, node->index,
393
						     node->update)) {
394
			/* Build the older version of the index entry */
395
			entry = row_build_index_entry(node->row, index, heap);
396
397
			row_purge_remove_sec_if_poss(node, index, entry);
398
		}
399
400
		node->index = dict_table_get_next_index(node->index);
401
	}
402
403
	mem_heap_free(heap);
404
405
skip_secondaries:
406
	/* Free possible externally stored fields */
407
	for (i = 0; i < upd_get_n_fields(node->update); i++) {
408
409
		ufield = upd_get_nth_field(node->update, i);
410
411
		if (ufield->extern_storage) {
412
			/* We use the fact that new_val points to
413
			node->undo_rec and get thus the offset of
414
			dfield data inside the unod record. Then we
415
			can calculate from node->roll_ptr the file
416
			address of the new_val data */
417
418
			internal_offset = ((byte*)ufield->new_val.data)
419
				- node->undo_rec;
420
421
			ut_a(internal_offset < UNIV_PAGE_SIZE);
422
423
			trx_undo_decode_roll_ptr(node->roll_ptr,
424
						 &is_insert, &rseg_id,
425
						 &page_no, &offset);
426
			mtr_start(&mtr);
427
428
			/* We have to acquire an X-latch to the clustered
429
			index tree */
430
431
			index = dict_table_get_first_index(node->table);
432
433
			mtr_x_lock(dict_index_get_lock(index), &mtr);
434
435
			/* NOTE: we must also acquire an X-latch to the
436
			root page of the tree. We will need it when we
437
			free pages from the tree. If the tree is of height 1,
438
			the tree X-latch does NOT protect the root page,
439
			because it is also a leaf page. Since we will have a
440
			latch on an undo log page, we would break the
441
			latching order if we would only later latch the
442
			root page of such a tree! */
443
444
			btr_root_get(index, &mtr);
445
446
			/* We assume in purge of externally stored fields
447
			that the space id of the undo log record is 0! */
448
449
			data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr)
450
				+ offset + internal_offset;
451
452
#ifdef UNIV_SYNC_DEBUG
453
			buf_page_dbg_add_level(buf_frame_align(data_field),
454
					       SYNC_TRX_UNDO_PAGE);
455
#endif /* UNIV_SYNC_DEBUG */
456
457
			data_field_len = ufield->new_val.len;
458
459
			btr_free_externally_stored_field(index, data_field,
460
							 data_field_len,
461
							 FALSE, &mtr);
462
			mtr_commit(&mtr);
463
		}
464
	}
465
}
466
467
/***************************************************************
468
Parses the row reference and other info in a modify undo log record. */
469
static
470
ibool
471
row_purge_parse_undo_rec(
472
/*=====================*/
473
				/* out: TRUE if purge operation required:
474
				NOTE that then the CALLER must unfreeze
475
				data dictionary! */
476
	purge_node_t*	node,	/* in: row undo node */
477
	ibool*		updated_extern,
478
				/* out: TRUE if an externally stored field
479
				was updated */
480
	que_thr_t*	thr)	/* in: query thread */
481
{
482
	dict_index_t*	clust_index;
483
	byte*		ptr;
484
	trx_t*		trx;
485
	dulint		undo_no;
486
	dulint		table_id;
487
	dulint		trx_id;
488
	dulint		roll_ptr;
489
	ulint		info_bits;
490
	ulint		type;
491
	ulint		cmpl_info;
492
493
	ut_ad(node && thr);
494
495
	trx = thr_get_trx(thr);
496
497
	ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
498
				    updated_extern, &undo_no, &table_id);
499
	node->rec_type = type;
500
501
	if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) {
502
503
		return(FALSE);
504
	}
505
506
	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
507
					       &info_bits);
508
	node->table = NULL;
509
510
	if (type == TRX_UNDO_UPD_EXIST_REC
511
	    && cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) {
512
513
		/* Purge requires no changes to indexes: we may return */
514
515
		return(FALSE);
516
	}
517
518
	/* Prevent DROP TABLE etc. from running when we are doing the purge
519
	for this row */
520
521
	row_mysql_freeze_data_dictionary(trx);
522
523
	mutex_enter(&(dict_sys->mutex));
524
525
	node->table = dict_table_get_on_id_low(table_id);
526
527
	mutex_exit(&(dict_sys->mutex));
528
529
	if (node->table == NULL) {
530
		/* The table has been dropped: no need to do purge */
531
532
		row_mysql_unfreeze_data_dictionary(trx);
533
534
		return(FALSE);
535
	}
536
537
	if (node->table->ibd_file_missing) {
538
		/* We skip purge of missing .ibd files */
539
540
		node->table = NULL;
541
542
		row_mysql_unfreeze_data_dictionary(trx);
543
544
		return(FALSE);
545
	}
546
547
	clust_index = dict_table_get_first_index(node->table);
548
549
	if (clust_index == NULL) {
550
		/* The table was corrupt in the data dictionary */
551
552
		row_mysql_unfreeze_data_dictionary(trx);
553
554
		return(FALSE);
555
	}
556
557
	ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
558
				       node->heap);
559
560
	ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
561
					     roll_ptr, info_bits, trx,
562
					     node->heap, &(node->update));
563
564
	/* Read to the partial row the fields that occur in indexes */
565
566
	if (!(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
567
		ptr = trx_undo_rec_get_partial_row(ptr, clust_index,
568
						   &(node->row), node->heap);
569
	}
570
571
	return(TRUE);
572
}
573
574
/***************************************************************
575
Fetches an undo log record and does the purge for the recorded operation.
576
If none left, or the current purge completed, returns the control to the
577
parent node, which is always a query thread node. */
578
static
579
ulint
580
row_purge(
581
/*======*/
582
				/* out: DB_SUCCESS if operation successfully
583
				completed, else error code */
584
	purge_node_t*	node,	/* in: row purge node */
585
	que_thr_t*	thr)	/* in: query thread */
586
{
587
	dulint	roll_ptr;
588
	ibool	purge_needed;
589
	ibool	updated_extern;
590
	trx_t*	trx;
591
592
	ut_ad(node && thr);
593
594
	trx = thr_get_trx(thr);
595
596
	node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr,
597
						  &(node->reservation),
598
						  node->heap);
599
	if (!node->undo_rec) {
600
		/* Purge completed for this query thread */
601
602
		thr->run_node = que_node_get_parent(node);
603
604
		return(DB_SUCCESS);
605
	}
606
607
	node->roll_ptr = roll_ptr;
608
609
	if (node->undo_rec == &trx_purge_dummy_rec) {
610
		purge_needed = FALSE;
611
	} else {
612
		purge_needed = row_purge_parse_undo_rec(node, &updated_extern,
613
							thr);
614
		/* If purge_needed == TRUE, we must also remember to unfreeze
615
		data dictionary! */
616
	}
617
618
	if (purge_needed) {
619
		node->found_clust = FALSE;
620
621
		node->index = dict_table_get_next_index(
622
			dict_table_get_first_index(node->table));
623
624
		if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
625
			row_purge_del_mark(node);
626
627
		} else if (updated_extern
628
			   || node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
629
630
			row_purge_upd_exist_or_extern(node);
631
		}
632
633
		if (node->found_clust) {
634
			btr_pcur_close(&(node->pcur));
635
		}
636
637
		row_mysql_unfreeze_data_dictionary(trx);
638
	}
639
640
	/* Do some cleanup */
641
	trx_purge_rec_release(node->reservation);
642
	mem_heap_empty(node->heap);
643
644
	thr->run_node = node;
645
646
	return(DB_SUCCESS);
647
}
648
649
/***************************************************************
650
Does the purge operation for a single undo log record. This is a high-level
651
function used in an SQL execution graph. */
652
653
que_thr_t*
654
row_purge_step(
655
/*===========*/
656
				/* out: query thread to run next or NULL */
657
	que_thr_t*	thr)	/* in: query thread */
658
{
659
	purge_node_t*	node;
660
	ulint		err;
661
662
	ut_ad(thr);
663
664
	node = thr->run_node;
665
666
	ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
667
668
	err = row_purge(node, thr);
669
670
	ut_ad(err == DB_SUCCESS);
671
672
	return(thr);
673
}