~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/******************************************************
2
Transaction rollback
3
4
(c) 1996 Innobase Oy
5
6
Created 3/26/1996 Heikki Tuuri
7
*******************************************************/
8
9
#include "trx0roll.h"
10
11
#ifdef UNIV_NONINL
12
#include "trx0roll.ic"
13
#endif
14
15
#include "fsp0fsp.h"
16
#include "mach0data.h"
17
#include "trx0rseg.h"
18
#include "trx0trx.h"
19
#include "trx0undo.h"
20
#include "trx0rec.h"
21
#include "que0que.h"
22
#include "usr0sess.h"
23
#include "srv0que.h"
24
#include "srv0start.h"
25
#include "row0undo.h"
26
#include "row0mysql.h"
27
#include "lock0lock.h"
28
#include "pars0pars.h"
29
30
/* This many pages must be undone before a truncate is tried within rollback */
31
#define TRX_ROLL_TRUNC_THRESHOLD	1
32
33
/* In crash recovery, the current trx to be rolled back */
34
trx_t*		trx_roll_crash_recv_trx	= NULL;
35
36
/* In crash recovery we set this to the undo n:o of the current trx to be
37
rolled back. Then we can print how many % the rollback has progressed. */
38
ib_longlong	trx_roll_max_undo_no;
39
40
/* Auxiliary variable which tells the previous progress % we printed */
41
ulint		trx_roll_progress_printed_pct;
42
43
/***********************************************************************
44
Rollback a transaction used in MySQL. */
45
46
int
47
trx_general_rollback_for_mysql(
48
/*===========================*/
49
				/* out: error code or DB_SUCCESS */
50
	trx_t*		trx,	/* in: transaction handle */
51
	ibool		partial,/* in: TRUE if partial rollback requested */
52
	trx_savept_t*	savept)	/* in: pointer to savepoint undo number, if
53
				partial rollback requested */
54
{
55
#ifndef UNIV_HOTBACKUP
56
	mem_heap_t*	heap;
57
	que_thr_t*	thr;
58
	roll_node_t*	roll_node;
59
60
	/* Tell Innobase server that there might be work for
61
	utility threads: */
62
63
	srv_active_wake_master_thread();
64
65
	trx_start_if_not_started(trx);
66
67
	heap = mem_heap_create(512);
68
69
	roll_node = roll_node_create(heap);
70
71
	roll_node->partial = partial;
72
73
	if (partial) {
74
		roll_node->savept = *savept;
75
	}
76
77
	trx->error_state = DB_SUCCESS;
78
79
	thr = pars_complete_graph_for_exec(roll_node, trx, heap);
80
81
	ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
82
	que_run_threads(thr);
83
84
	mutex_enter(&kernel_mutex);
85
86
	while (trx->que_state != TRX_QUE_RUNNING) {
87
88
		mutex_exit(&kernel_mutex);
89
90
		os_thread_sleep(100000);
91
92
		mutex_enter(&kernel_mutex);
93
	}
94
95
	mutex_exit(&kernel_mutex);
96
97
	mem_heap_free(heap);
98
99
	ut_a(trx->error_state == DB_SUCCESS);
100
101
	/* Tell Innobase server that there might be work for
102
	utility threads: */
103
104
	srv_active_wake_master_thread();
105
106
	return((int) trx->error_state);
107
#else /* UNIV_HOTBACKUP */
108
	/* This function depends on MySQL code that is not included in
109
	InnoDB Hot Backup builds.  Besides, this function should never
110
	be called in InnoDB Hot Backup. */
111
	ut_error;
112
	return(DB_FAIL);
113
#endif /* UNIV_HOTBACKUP */
114
}
115
116
/***********************************************************************
117
Rollback a transaction used in MySQL. */
118
119
int
120
trx_rollback_for_mysql(
121
/*===================*/
122
			/* out: error code or DB_SUCCESS */
123
	trx_t*	trx)	/* in: transaction handle */
124
{
125
	int	err;
126
127
	if (trx->conc_state == TRX_NOT_STARTED) {
128
129
		return(DB_SUCCESS);
130
	}
131
132
	trx->op_info = "rollback";
133
134
	/* If we are doing the XA recovery of prepared transactions, then
135
	the transaction object does not have an InnoDB session object, and we
136
	set a dummy session that we use for all MySQL transactions. */
137
138
	mutex_enter(&kernel_mutex);
139
140
	if (trx->sess == NULL) {
141
		/* Open a dummy session */
142
143
		if (!trx_dummy_sess) {
144
			trx_dummy_sess = sess_open();
145
		}
146
147
		trx->sess = trx_dummy_sess;
148
	}
149
150
	mutex_exit(&kernel_mutex);
151
152
	err = trx_general_rollback_for_mysql(trx, FALSE, NULL);
153
	
154
	trx->op_info = "";
155
156
	return(err);
157
}
158
159
/***********************************************************************
160
Rollback the latest SQL statement for MySQL. */
161
162
int
163
trx_rollback_last_sql_stat_for_mysql(
164
/*=================================*/
165
			/* out: error code or DB_SUCCESS */
166
	trx_t*	trx)	/* in: transaction handle */
167
{
168
	int	err;
169
170
	if (trx->conc_state == TRX_NOT_STARTED) {
171
172
		return(DB_SUCCESS);
173
	}
174
175
	trx->op_info = "rollback of SQL statement";
176
177
	err = trx_general_rollback_for_mysql(trx, TRUE,
178
					     &(trx->last_sql_stat_start));
179
	/* The following call should not be needed, but we play safe: */
180
	trx_mark_sql_stat_end(trx);
181
182
	trx->op_info = "";
183
184
	return(err);
185
}
186
187
/***********************************************************************
188
Frees savepoint structs. */
189
190
void
191
trx_roll_savepoints_free(
192
/*=====================*/
193
	trx_t*			trx,	/* in: transaction handle */
194
	trx_named_savept_t*	savep)	/* in: free all savepoints > this one;
195
					if this is NULL, free all savepoints
196
					of trx */
197
{
198
	trx_named_savept_t*	next_savep;
199
200
	if (savep == NULL) {
201
		savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
202
	} else {
203
		savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
204
	}
205
206
	while (savep != NULL) {
207
		next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
208
209
		UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
210
		mem_free(savep->name);
211
		mem_free(savep);
212
213
		savep = next_savep;
214
	}
215
}
216
217
/***********************************************************************
218
Rolls back a transaction back to a named savepoint. Modifications after the
219
savepoint are undone but InnoDB does NOT release the corresponding locks
220
which are stored in memory. If a lock is 'implicit', that is, a new inserted
221
row holds a lock where the lock information is carried by the trx id stored in
222
the row, these locks are naturally released in the rollback. Savepoints which
223
were set after this savepoint are deleted. */
224
225
ulint
226
trx_rollback_to_savepoint_for_mysql(
227
/*================================*/
228
						/* out: if no savepoint
229
						of the name found then
230
						DB_NO_SAVEPOINT,
231
						otherwise DB_SUCCESS */
232
	trx_t*		trx,			/* in: transaction handle */
233
	const char*	savepoint_name,		/* in: savepoint name */
234
	ib_longlong*	mysql_binlog_cache_pos)	/* out: the MySQL binlog cache
235
						position corresponding to this
236
						savepoint; MySQL needs this
237
						information to remove the
238
						binlog entries of the queries
239
						executed after the savepoint */
240
{
241
	trx_named_savept_t*	savep;
242
	ulint			err;
243
244
	savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
245
246
	while (savep != NULL) {
247
		if (0 == ut_strcmp(savep->name, savepoint_name)) {
248
			/* Found */
249
			break;
250
		}
251
		savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
252
	}
253
254
	if (savep == NULL) {
255
256
		return(DB_NO_SAVEPOINT);
257
	}
258
259
	if (trx->conc_state == TRX_NOT_STARTED) {
260
		ut_print_timestamp(stderr);
261
		fputs("  InnoDB: Error: transaction has a savepoint ", stderr);
262
		ut_print_name(stderr, trx, FALSE, savep->name);
263
		fputs(" though it is not started\n", stderr);
264
		return(DB_ERROR);
265
	}
266
267
	/* We can now free all savepoints strictly later than this one */
268
269
	trx_roll_savepoints_free(trx, savep);
270
271
	*mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
272
273
	trx->op_info = "rollback to a savepoint";
274
275
	err = trx_general_rollback_for_mysql(trx, TRUE, &(savep->savept));
276
277
	/* Store the current undo_no of the transaction so that we know where
278
	to roll back if we have to roll back the next SQL statement: */
279
280
	trx_mark_sql_stat_end(trx);
281
282
	trx->op_info = "";
283
284
	return(err);
285
}
286
287
/***********************************************************************
288
Creates a named savepoint. If the transaction is not yet started, starts it.
289
If there is already a savepoint of the same name, this call erases that old
290
savepoint and replaces it with a new. Savepoints are deleted in a transaction
291
commit or rollback. */
292
293
ulint
294
trx_savepoint_for_mysql(
295
/*====================*/
296
						/* out: always DB_SUCCESS */
297
	trx_t*		trx,			/* in: transaction handle */
298
	const char*	savepoint_name,		/* in: savepoint name */
299
	ib_longlong	binlog_cache_pos)	/* in: MySQL binlog cache
300
						position corresponding to this
301
						connection at the time of the
302
						savepoint */
303
{
304
	trx_named_savept_t*	savep;
305
306
	ut_a(trx);
307
	ut_a(savepoint_name);
308
309
	trx_start_if_not_started(trx);
310
311
	savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
312
313
	while (savep != NULL) {
314
		if (0 == ut_strcmp(savep->name, savepoint_name)) {
315
			/* Found */
316
			break;
317
		}
318
		savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
319
	}
320
321
	if (savep) {
322
		/* There is a savepoint with the same name: free that */
323
324
		UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
325
326
		mem_free(savep->name);
327
		mem_free(savep);
328
	}
329
330
	/* Create a new savepoint and add it as the last in the list */
331
332
	savep = mem_alloc(sizeof(trx_named_savept_t));
333
334
	savep->name = mem_strdup(savepoint_name);
335
336
	savep->savept = trx_savept_take(trx);
337
338
	savep->mysql_binlog_cache_pos = binlog_cache_pos;
339
340
	UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep);
341
342
	return(DB_SUCCESS);
343
}
344
345
/***********************************************************************
346
Releases a named savepoint. Savepoints which
347
were set after this savepoint are deleted. */
348
349
ulint
350
trx_release_savepoint_for_mysql(
351
/*============================*/
352
						/* out: if no savepoint
353
						of the name found then
354
						DB_NO_SAVEPOINT,
355
						otherwise DB_SUCCESS */
356
	trx_t*		trx,			/* in: transaction handle */
357
	const char*	savepoint_name)		/* in: savepoint name */
358
{
359
	trx_named_savept_t*	savep;
360
361
	savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
362
363
	while (savep != NULL) {
364
		if (0 == ut_strcmp(savep->name, savepoint_name)) {
365
			/* Found */
366
			break;
367
		}
368
		savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
369
	}
370
371
	if (savep == NULL) {
372
373
		return(DB_NO_SAVEPOINT);
374
	}
375
376
	/* We can now free all savepoints strictly later than this one */
377
378
	trx_roll_savepoints_free(trx, savep);
379
380
	/* Now we can free this savepoint too */
381
382
	UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
383
384
	mem_free(savep->name);
385
	mem_free(savep);
386
387
	return(DB_SUCCESS);
388
}
389
390
/***********************************************************************
391
Returns a transaction savepoint taken at this point in time. */
392
393
trx_savept_t
394
trx_savept_take(
395
/*============*/
396
			/* out: savepoint */
397
	trx_t*	trx)	/* in: transaction */
398
{
399
	trx_savept_t	savept;
400
401
	savept.least_undo_no = trx->undo_no;
402
403
	return(savept);
404
}
405
406
/***********************************************************************
407
Rollback or clean up transactions which have no user session. If the
408
transaction already was committed, then we clean up a possible insert
409
undo log. If the transaction was not yet committed, then we roll it back.
410
Note: this is done in a background thread. */
411
412
os_thread_ret_t
413
trx_rollback_or_clean_all_without_sess(
414
/*===================================*/
415
			/* out: a dummy parameter */
416
	void*	arg __attribute__((unused)))
417
			/* in: a dummy parameter required by
418
			os_thread_create */
419
{
420
	mem_heap_t*	heap;
421
	que_fork_t*	fork;
422
	que_thr_t*	thr;
423
	roll_node_t*	roll_node;
424
	trx_t*		trx;
425
	dict_table_t*	table;
426
	ib_longlong	rows_to_undo;
427
	const char*	unit		= "";
428
	int		err;
429
430
	mutex_enter(&kernel_mutex);
431
432
	/* Open a dummy session */
433
434
	if (!trx_dummy_sess) {
435
		trx_dummy_sess = sess_open();
436
	}
437
438
	mutex_exit(&kernel_mutex);
439
440
	if (UT_LIST_GET_FIRST(trx_sys->trx_list)) {
441
442
		fprintf(stderr,
443
			"InnoDB: Starting in background the rollback"
444
			" of uncommitted transactions\n");
445
	} else {
446
		goto leave_function;
447
	}
448
loop:
449
	heap = mem_heap_create(512);
450
451
	mutex_enter(&kernel_mutex);
452
453
	trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
454
455
	while (trx) {
456
		if ((trx->sess || (trx->conc_state == TRX_NOT_STARTED))) {
457
			trx = UT_LIST_GET_NEXT(trx_list, trx);
458
		} else if (trx->conc_state == TRX_PREPARED) {
459
460
			trx->sess = trx_dummy_sess;
461
			trx = UT_LIST_GET_NEXT(trx_list, trx);
462
		} else {
463
			break;
464
		}
465
	}
466
467
	mutex_exit(&kernel_mutex);
468
469
	if (trx == NULL) {
470
		ut_print_timestamp(stderr);
471
		fprintf(stderr,
472
			"  InnoDB: Rollback of non-prepared transactions"
473
			" completed\n");
474
475
		mem_heap_free(heap);
476
477
		goto leave_function;
478
	}
479
480
	trx->sess = trx_dummy_sess;
481
482
	if (trx->conc_state == TRX_COMMITTED_IN_MEMORY) {
483
		fprintf(stderr, "InnoDB: Cleaning up trx with id %lu %lu\n",
484
			(ulong) ut_dulint_get_high(trx->id),
485
			(ulong) ut_dulint_get_low(trx->id));
486
487
		trx_cleanup_at_db_startup(trx);
488
489
		mem_heap_free(heap);
490
491
		goto loop;
492
	}
493
494
	fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
495
	fork->trx = trx;
496
497
	thr = que_thr_create(fork, heap);
498
499
	roll_node = roll_node_create(heap);
500
501
	thr->child = roll_node;
502
	roll_node->common.parent = thr;
503
504
	mutex_enter(&kernel_mutex);
505
506
	trx->graph = fork;
507
508
	ut_a(thr == que_fork_start_command(fork));
509
510
	trx_roll_crash_recv_trx	= trx;
511
	trx_roll_max_undo_no = ut_conv_dulint_to_longlong(trx->undo_no);
512
	trx_roll_progress_printed_pct = 0;
513
	rows_to_undo = trx_roll_max_undo_no;
514
515
	if (rows_to_undo > 1000000000) {
516
		rows_to_undo = rows_to_undo / 1000000;
517
		unit = "M";
518
	}
519
520
	ut_print_timestamp(stderr);
521
	fprintf(stderr,
522
		"  InnoDB: Rolling back trx with id %lu %lu, %lu%s"
523
		" rows to undo\n",
524
		(ulong) ut_dulint_get_high(trx->id),
525
		(ulong) ut_dulint_get_low(trx->id),
526
		(ulong) rows_to_undo, unit);
527
	mutex_exit(&kernel_mutex);
528
529
	trx->mysql_thread_id = os_thread_get_curr_id();
530
531
	trx->mysql_process_no = os_proc_get_number();
532
533
	if (trx->dict_operation) {
534
		row_mysql_lock_data_dictionary(trx);
535
	}
536
537
	que_run_threads(thr);
538
539
	mutex_enter(&kernel_mutex);
540
541
	while (trx->que_state != TRX_QUE_RUNNING) {
542
543
		mutex_exit(&kernel_mutex);
544
545
		fprintf(stderr,
546
			"InnoDB: Waiting for rollback of trx id %lu to end\n",
547
			(ulong) ut_dulint_get_low(trx->id));
548
		os_thread_sleep(100000);
549
550
		mutex_enter(&kernel_mutex);
551
	}
552
553
	mutex_exit(&kernel_mutex);
554
555
	if (trx->dict_operation) {
556
		/* If the transaction was for a dictionary operation, we
557
		drop the relevant table, if it still exists */
558
559
		fprintf(stderr,
560
			"InnoDB: Dropping table with id %lu %lu"
561
			" in recovery if it exists\n",
562
			(ulong) ut_dulint_get_high(trx->table_id),
563
			(ulong) ut_dulint_get_low(trx->table_id));
564
565
		table = dict_table_get_on_id_low(trx->table_id);
566
567
		if (table) {
568
			fputs("InnoDB: Table found: dropping table ", stderr);
569
			ut_print_name(stderr, trx, TRUE, table->name);
570
			fputs(" in recovery\n", stderr);
571
572
			err = row_drop_table_for_mysql(table->name, trx, TRUE);
573
574
			ut_a(err == (int) DB_SUCCESS);
575
		}
576
	}
577
578
	if (trx->dict_operation) {
579
		row_mysql_unlock_data_dictionary(trx);
580
	}
581
582
	fprintf(stderr, "\nInnoDB: Rolling back of trx id %lu %lu completed\n",
583
		(ulong) ut_dulint_get_high(trx->id),
584
		(ulong) ut_dulint_get_low(trx->id));
585
	mem_heap_free(heap);
586
587
	trx_roll_crash_recv_trx	= NULL;
588
589
	goto loop;
590
591
leave_function:
592
	/* We count the number of threads in os_thread_exit(). A created
593
	thread should always use that to exit and not use return() to exit. */
594
595
	os_thread_exit(NULL);
596
597
	OS_THREAD_DUMMY_RETURN;
598
}
599
600
/***********************************************************************
601
Creates an undo number array. */
602
603
trx_undo_arr_t*
604
trx_undo_arr_create(void)
605
/*=====================*/
606
{
607
	trx_undo_arr_t*	arr;
608
	mem_heap_t*	heap;
609
	ulint		i;
610
611
	heap = mem_heap_create(1024);
612
613
	arr = mem_heap_alloc(heap, sizeof(trx_undo_arr_t));
614
615
	arr->infos = mem_heap_alloc(heap, sizeof(trx_undo_inf_t)
616
				    * UNIV_MAX_PARALLELISM);
617
	arr->n_cells = UNIV_MAX_PARALLELISM;
618
	arr->n_used = 0;
619
620
	arr->heap = heap;
621
622
	for (i = 0; i < UNIV_MAX_PARALLELISM; i++) {
623
624
		(trx_undo_arr_get_nth_info(arr, i))->in_use = FALSE;
625
	}
626
627
	return(arr);
628
}
629
630
/***********************************************************************
631
Frees an undo number array. */
632
633
void
634
trx_undo_arr_free(
635
/*==============*/
636
	trx_undo_arr_t*	arr)	/* in: undo number array */
637
{
638
	ut_ad(arr->n_used == 0);
639
640
	mem_heap_free(arr->heap);
641
}
642
643
/***********************************************************************
644
Stores info of an undo log record to the array if it is not stored yet. */
645
static
646
ibool
647
trx_undo_arr_store_info(
648
/*====================*/
649
			/* out: FALSE if the record already existed in the
650
			array */
651
	trx_t*	trx,	/* in: transaction */
652
	dulint	undo_no)/* in: undo number */
653
{
654
	trx_undo_inf_t*	cell;
655
	trx_undo_inf_t*	stored_here;
656
	trx_undo_arr_t*	arr;
657
	ulint		n_used;
658
	ulint		n;
659
	ulint		i;
660
661
	n = 0;
662
	arr = trx->undo_no_arr;
663
	n_used = arr->n_used;
664
	stored_here = NULL;
665
666
	for (i = 0;; i++) {
667
		cell = trx_undo_arr_get_nth_info(arr, i);
668
669
		if (!cell->in_use) {
670
			if (!stored_here) {
671
				/* Not in use, we may store here */
672
				cell->undo_no = undo_no;
673
				cell->in_use = TRUE;
674
675
				arr->n_used++;
676
677
				stored_here = cell;
678
			}
679
		} else {
680
			n++;
681
682
			if (0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
683
684
				if (stored_here) {
685
					stored_here->in_use = FALSE;
686
					ut_ad(arr->n_used > 0);
687
					arr->n_used--;
688
				}
689
690
				ut_ad(arr->n_used == n_used);
691
692
				return(FALSE);
693
			}
694
		}
695
696
		if (n == n_used && stored_here) {
697
698
			ut_ad(arr->n_used == 1 + n_used);
699
700
			return(TRUE);
701
		}
702
	}
703
}
704
705
/***********************************************************************
706
Removes an undo number from the array. */
707
static
708
void
709
trx_undo_arr_remove_info(
710
/*=====================*/
711
	trx_undo_arr_t*	arr,	/* in: undo number array */
712
	dulint		undo_no)/* in: undo number */
713
{
714
	trx_undo_inf_t*	cell;
715
	ulint		n_used;
716
	ulint		n;
717
	ulint		i;
718
719
	n_used = arr->n_used;
720
	n = 0;
721
722
	for (i = 0;; i++) {
723
		cell = trx_undo_arr_get_nth_info(arr, i);
724
725
		if (cell->in_use
726
		    && 0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
727
728
			cell->in_use = FALSE;
729
730
			ut_ad(arr->n_used > 0);
731
732
			arr->n_used--;
733
734
			return;
735
		}
736
	}
737
}
738
739
/***********************************************************************
740
Gets the biggest undo number in an array. */
741
static
742
dulint
743
trx_undo_arr_get_biggest(
744
/*=====================*/
745
				/* out: biggest value, ut_dulint_zero if
746
				the array is empty */
747
	trx_undo_arr_t*	arr)	/* in: undo number array */
748
{
749
	trx_undo_inf_t*	cell;
750
	ulint		n_used;
751
	dulint		biggest;
752
	ulint		n;
753
	ulint		i;
754
755
	n = 0;
756
	n_used = arr->n_used;
757
	biggest = ut_dulint_zero;
758
759
	for (i = 0;; i++) {
760
		cell = trx_undo_arr_get_nth_info(arr, i);
761
762
		if (cell->in_use) {
763
			n++;
764
			if (ut_dulint_cmp(cell->undo_no, biggest) > 0) {
765
766
				biggest = cell->undo_no;
767
			}
768
		}
769
770
		if (n == n_used) {
771
			return(biggest);
772
		}
773
	}
774
}
775
776
/***************************************************************************
777
Tries truncate the undo logs. */
778
779
void
780
trx_roll_try_truncate(
781
/*==================*/
782
	trx_t*	trx)	/* in: transaction */
783
{
784
	trx_undo_arr_t*	arr;
785
	dulint		limit;
786
	dulint		biggest;
787
788
	ut_ad(mutex_own(&(trx->undo_mutex)));
789
	ut_ad(mutex_own(&((trx->rseg)->mutex)));
790
791
	trx->pages_undone = 0;
792
793
	arr = trx->undo_no_arr;
794
795
	limit = trx->undo_no;
796
797
	if (arr->n_used > 0) {
798
		biggest = trx_undo_arr_get_biggest(arr);
799
800
		if (ut_dulint_cmp(biggest, limit) >= 0) {
801
802
			limit = ut_dulint_add(biggest, 1);
803
		}
804
	}
805
806
	if (trx->insert_undo) {
807
		trx_undo_truncate_end(trx, trx->insert_undo, limit);
808
	}
809
810
	if (trx->update_undo) {
811
		trx_undo_truncate_end(trx, trx->update_undo, limit);
812
	}
813
}
814
815
/***************************************************************************
816
Pops the topmost undo log record in a single undo log and updates the info
817
about the topmost record in the undo log memory struct. */
818
static
819
trx_undo_rec_t*
820
trx_roll_pop_top_rec(
821
/*=================*/
822
				/* out: undo log record, the page s-latched */
823
	trx_t*		trx,	/* in: transaction */
824
	trx_undo_t*	undo,	/* in: undo log */
825
	mtr_t*		mtr)	/* in: mtr */
826
{
827
	page_t*		undo_page;
828
	ulint		offset;
829
	trx_undo_rec_t*	prev_rec;
830
	page_t*		prev_rec_page;
831
832
	ut_ad(mutex_own(&(trx->undo_mutex)));
833
834
	undo_page = trx_undo_page_get_s_latched(undo->space,
835
						undo->top_page_no, mtr);
836
	offset = undo->top_offset;
837
838
	/*	fprintf(stderr, "Thread %lu undoing trx %lu undo record %lu\n",
839
	os_thread_get_curr_id(), ut_dulint_get_low(trx->id),
840
	ut_dulint_get_low(undo->top_undo_no)); */
841
842
	prev_rec = trx_undo_get_prev_rec(undo_page + offset,
843
					 undo->hdr_page_no, undo->hdr_offset,
844
					 mtr);
845
	if (prev_rec == NULL) {
846
847
		undo->empty = TRUE;
848
	} else {
849
		prev_rec_page = buf_frame_align(prev_rec);
850
851
		if (prev_rec_page != undo_page) {
852
853
			trx->pages_undone++;
854
		}
855
856
		undo->top_page_no = buf_frame_get_page_no(prev_rec_page);
857
		undo->top_offset  = prev_rec - prev_rec_page;
858
		undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
859
	}
860
861
	return(undo_page + offset);
862
}
863
864
/************************************************************************
865
Pops the topmost record when the two undo logs of a transaction are seen
866
as a single stack of records ordered by their undo numbers. Inserts the
867
undo number of the popped undo record to the array of currently processed
868
undo numbers in the transaction. When the query thread finishes processing
869
of this undo record, it must be released with trx_undo_rec_release. */
870
871
trx_undo_rec_t*
872
trx_roll_pop_top_rec_of_trx(
873
/*========================*/
874
				/* out: undo log record copied to heap, NULL
875
				if none left, or if the undo number of the
876
				top record would be less than the limit */
877
	trx_t*		trx,	/* in: transaction */
878
	dulint		limit,	/* in: least undo number we need */
879
	dulint*		roll_ptr,/* out: roll pointer to undo record */
880
	mem_heap_t*	heap)	/* in: memory heap where copied */
881
{
882
	trx_undo_t*	undo;
883
	trx_undo_t*	ins_undo;
884
	trx_undo_t*	upd_undo;
885
	trx_undo_rec_t*	undo_rec;
886
	trx_undo_rec_t*	undo_rec_copy;
887
	dulint		undo_no;
888
	ibool		is_insert;
889
	trx_rseg_t*	rseg;
890
	ulint		progress_pct;
891
	mtr_t		mtr;
892
893
	rseg = trx->rseg;
894
try_again:
895
	mutex_enter(&(trx->undo_mutex));
896
897
	if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
898
		mutex_enter(&(rseg->mutex));
899
900
		trx_roll_try_truncate(trx);
901
902
		mutex_exit(&(rseg->mutex));
903
	}
904
905
	ins_undo = trx->insert_undo;
906
	upd_undo = trx->update_undo;
907
908
	if (!ins_undo || ins_undo->empty) {
909
		undo = upd_undo;
910
	} else if (!upd_undo || upd_undo->empty) {
911
		undo = ins_undo;
912
	} else if (ut_dulint_cmp(upd_undo->top_undo_no,
913
				 ins_undo->top_undo_no) > 0) {
914
		undo = upd_undo;
915
	} else {
916
		undo = ins_undo;
917
	}
918
919
	if (!undo || undo->empty
920
	    || (ut_dulint_cmp(limit, undo->top_undo_no) > 0)) {
921
922
		if ((trx->undo_no_arr)->n_used == 0) {
923
			/* Rollback is ending */
924
925
			mutex_enter(&(rseg->mutex));
926
927
			trx_roll_try_truncate(trx);
928
929
			mutex_exit(&(rseg->mutex));
930
		}
931
932
		mutex_exit(&(trx->undo_mutex));
933
934
		return(NULL);
935
	}
936
937
	if (undo == ins_undo) {
938
		is_insert = TRUE;
939
	} else {
940
		is_insert = FALSE;
941
	}
942
943
	*roll_ptr = trx_undo_build_roll_ptr(is_insert, (undo->rseg)->id,
944
					    undo->top_page_no,
945
					    undo->top_offset);
946
	mtr_start(&mtr);
947
948
	undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
949
950
	undo_no = trx_undo_rec_get_undo_no(undo_rec);
951
952
	ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0);
953
954
	/* We print rollback progress info if we are in a crash recovery
955
	and the transaction has at least 1000 row operations to undo. */
956
957
	if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) {
958
959
		progress_pct = 100 - (ulint)
960
			((ut_conv_dulint_to_longlong(undo_no) * 100)
961
			 / trx_roll_max_undo_no);
962
		if (progress_pct != trx_roll_progress_printed_pct) {
963
			if (trx_roll_progress_printed_pct == 0) {
964
				fprintf(stderr,
965
					"\nInnoDB: Progress in percents:"
966
					" %lu", (ulong) progress_pct);
967
			} else {
968
				fprintf(stderr,
969
					" %lu", (ulong) progress_pct);
970
			}
971
			fflush(stderr);
972
			trx_roll_progress_printed_pct = progress_pct;
973
		}
974
	}
975
976
	trx->undo_no = undo_no;
977
978
	if (!trx_undo_arr_store_info(trx, undo_no)) {
979
		/* A query thread is already processing this undo log record */
980
981
		mutex_exit(&(trx->undo_mutex));
982
983
		mtr_commit(&mtr);
984
985
		goto try_again;
986
	}
987
988
	undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
989
990
	mutex_exit(&(trx->undo_mutex));
991
992
	mtr_commit(&mtr);
993
994
	return(undo_rec_copy);
995
}
996
997
/************************************************************************
998
Reserves an undo log record for a query thread to undo. This should be
999
called if the query thread gets the undo log record not using the pop
1000
function above. */
1001
1002
ibool
1003
trx_undo_rec_reserve(
1004
/*=================*/
1005
			/* out: TRUE if succeeded */
1006
	trx_t*	trx,	/* in: transaction */
1007
	dulint	undo_no)/* in: undo number of the record */
1008
{
1009
	ibool	ret;
1010
1011
	mutex_enter(&(trx->undo_mutex));
1012
1013
	ret = trx_undo_arr_store_info(trx, undo_no);
1014
1015
	mutex_exit(&(trx->undo_mutex));
1016
1017
	return(ret);
1018
}
1019
1020
/***********************************************************************
1021
Releases a reserved undo record. */
1022
1023
void
1024
trx_undo_rec_release(
1025
/*=================*/
1026
	trx_t*	trx,	/* in: transaction */
1027
	dulint	undo_no)/* in: undo number */
1028
{
1029
	trx_undo_arr_t*	arr;
1030
1031
	mutex_enter(&(trx->undo_mutex));
1032
1033
	arr = trx->undo_no_arr;
1034
1035
	trx_undo_arr_remove_info(arr, undo_no);
1036
1037
	mutex_exit(&(trx->undo_mutex));
1038
}
1039
1040
/*************************************************************************
1041
Starts a rollback operation. */
1042
1043
void
1044
trx_rollback(
1045
/*=========*/
1046
	trx_t*		trx,	/* in: transaction */
1047
	trx_sig_t*	sig,	/* in: signal starting the rollback */
1048
	que_thr_t**	next_thr)/* in/out: next query thread to run;
1049
				if the value which is passed in is
1050
				a pointer to a NULL pointer, then the
1051
				calling function can start running
1052
				a new query thread; if the passed value is
1053
				NULL, the parameter is ignored */
1054
{
1055
	que_t*		roll_graph;
1056
	que_thr_t*	thr;
1057
	/*	que_thr_t*	thr2; */
1058
1059
	ut_ad(mutex_own(&kernel_mutex));
1060
	ut_ad((trx->undo_no_arr == NULL) || ((trx->undo_no_arr)->n_used == 0));
1061
1062
	/* Initialize the rollback field in the transaction */
1063
1064
	if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
1065
1066
		trx->roll_limit = ut_dulint_zero;
1067
1068
	} else if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
1069
1070
		trx->roll_limit = (sig->savept).least_undo_no;
1071
1072
	} else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
1073
1074
		trx->roll_limit = trx->last_sql_stat_start.least_undo_no;
1075
	} else {
1076
		ut_error;
1077
	}
1078
1079
	ut_a(ut_dulint_cmp(trx->roll_limit, trx->undo_no) <= 0);
1080
1081
	trx->pages_undone = 0;
1082
1083
	if (trx->undo_no_arr == NULL) {
1084
		trx->undo_no_arr = trx_undo_arr_create();
1085
	}
1086
1087
	/* Build a 'query' graph which will perform the undo operations */
1088
1089
	roll_graph = trx_roll_graph_build(trx);
1090
1091
	trx->graph = roll_graph;
1092
	trx->que_state = TRX_QUE_ROLLING_BACK;
1093
1094
	thr = que_fork_start_command(roll_graph);
1095
1096
	ut_ad(thr);
1097
1098
	/*	thr2 = que_fork_start_command(roll_graph);
1099
1100
	ut_ad(thr2); */
1101
1102
	if (next_thr && (*next_thr == NULL)) {
1103
		*next_thr = thr;
1104
		/*		srv_que_task_enqueue_low(thr2); */
1105
	} else {
1106
		srv_que_task_enqueue_low(thr);
1107
		/*		srv_que_task_enqueue_low(thr2); */
1108
	}
1109
}
1110
1111
/********************************************************************
1112
Builds an undo 'query' graph for a transaction. The actual rollback is
1113
performed by executing this query graph like a query subprocedure call.
1114
The reply about the completion of the rollback will be sent by this
1115
graph. */
1116
1117
que_t*
1118
trx_roll_graph_build(
1119
/*=================*/
1120
			/* out, own: the query graph */
1121
	trx_t*	trx)	/* in: trx handle */
1122
{
1123
	mem_heap_t*	heap;
1124
	que_fork_t*	fork;
1125
	que_thr_t*	thr;
1126
	/*	que_thr_t*	thr2; */
1127
1128
	ut_ad(mutex_own(&kernel_mutex));
1129
1130
	heap = mem_heap_create(512);
1131
	fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
1132
	fork->trx = trx;
1133
1134
	thr = que_thr_create(fork, heap);
1135
	/*	thr2 = que_thr_create(fork, heap); */
1136
1137
	thr->child = row_undo_node_create(trx, thr, heap);
1138
	/*	thr2->child = row_undo_node_create(trx, thr2, heap); */
1139
1140
	return(fork);
1141
}
1142
1143
/*************************************************************************
1144
Finishes error processing after the necessary partial rollback has been
1145
done. */
1146
static
1147
void
1148
trx_finish_error_processing(
1149
/*========================*/
1150
	trx_t*	trx)	/* in: transaction */
1151
{
1152
	trx_sig_t*	sig;
1153
	trx_sig_t*	next_sig;
1154
1155
	ut_ad(mutex_own(&kernel_mutex));
1156
1157
	sig = UT_LIST_GET_FIRST(trx->signals);
1158
1159
	while (sig != NULL) {
1160
		next_sig = UT_LIST_GET_NEXT(signals, sig);
1161
1162
		if (sig->type == TRX_SIG_ERROR_OCCURRED) {
1163
1164
			trx_sig_remove(trx, sig);
1165
		}
1166
1167
		sig = next_sig;
1168
	}
1169
1170
	trx->que_state = TRX_QUE_RUNNING;
1171
}
1172
1173
/*************************************************************************
1174
Finishes a partial rollback operation. */
1175
static
1176
void
1177
trx_finish_partial_rollback_off_kernel(
1178
/*===================================*/
1179
	trx_t*		trx,	/* in: transaction */
1180
	que_thr_t**	next_thr)/* in/out: next query thread to run;
1181
				if the value which is passed in is a pointer
1182
				to a NULL pointer, then the calling function
1183
				can start running a new query thread; if this
1184
				parameter is NULL, it is ignored */
1185
{
1186
	trx_sig_t*	sig;
1187
1188
	ut_ad(mutex_own(&kernel_mutex));
1189
1190
	sig = UT_LIST_GET_FIRST(trx->signals);
1191
1192
	/* Remove the signal from the signal queue and send reply message
1193
	to it */
1194
1195
	trx_sig_reply(sig, next_thr);
1196
	trx_sig_remove(trx, sig);
1197
1198
	trx->que_state = TRX_QUE_RUNNING;
1199
}
1200
1201
/********************************************************************
1202
Finishes a transaction rollback. */
1203
1204
void
1205
trx_finish_rollback_off_kernel(
1206
/*===========================*/
1207
	que_t*		graph,	/* in: undo graph which can now be freed */
1208
	trx_t*		trx,	/* in: transaction */
1209
	que_thr_t**	next_thr)/* in/out: next query thread to run;
1210
				if the value which is passed in is
1211
				a pointer to a NULL pointer, then the
1212
				calling function can start running
1213
				a new query thread; if this parameter is
1214
				NULL, it is ignored */
1215
{
1216
	trx_sig_t*	sig;
1217
	trx_sig_t*	next_sig;
1218
1219
	ut_ad(mutex_own(&kernel_mutex));
1220
1221
	ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
1222
1223
	/* Free the memory reserved by the undo graph */
1224
	que_graph_free(graph);
1225
1226
	sig = UT_LIST_GET_FIRST(trx->signals);
1227
1228
	if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
1229
1230
		trx_finish_partial_rollback_off_kernel(trx, next_thr);
1231
1232
		return;
1233
1234
	} else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
1235
1236
		trx_finish_error_processing(trx);
1237
1238
		return;
1239
	}
1240
1241
#ifdef UNIV_DEBUG
1242
	if (lock_print_waits) {
1243
		fprintf(stderr, "Trx %lu rollback finished\n",
1244
			(ulong) ut_dulint_get_low(trx->id));
1245
	}
1246
#endif /* UNIV_DEBUG */
1247
1248
	trx_commit_off_kernel(trx);
1249
1250
	/* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and
1251
	send reply messages to them */
1252
1253
	trx->que_state = TRX_QUE_RUNNING;
1254
1255
	while (sig != NULL) {
1256
		next_sig = UT_LIST_GET_NEXT(signals, sig);
1257
1258
		if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
1259
1260
			trx_sig_reply(sig, next_thr);
1261
1262
			trx_sig_remove(trx, sig);
1263
		}
1264
1265
		sig = next_sig;
1266
	}
1267
}
1268
1269
/*************************************************************************
1270
Creates a rollback command node struct. */
1271
1272
roll_node_t*
1273
roll_node_create(
1274
/*=============*/
1275
				/* out, own: rollback node struct */
1276
	mem_heap_t*	heap)	/* in: mem heap where created */
1277
{
1278
	roll_node_t*	node;
1279
1280
	node = mem_heap_alloc(heap, sizeof(roll_node_t));
1281
	node->common.type = QUE_NODE_ROLLBACK;
1282
	node->state = ROLL_NODE_SEND;
1283
1284
	node->partial = FALSE;
1285
1286
	return(node);
1287
}
1288
1289
/***************************************************************
1290
Performs an execution step for a rollback command node in a query graph. */
1291
1292
que_thr_t*
1293
trx_rollback_step(
1294
/*==============*/
1295
				/* out: query thread to run next, or NULL */
1296
	que_thr_t*	thr)	/* in: query thread */
1297
{
1298
	roll_node_t*	node;
1299
	ulint		sig_no;
1300
	trx_savept_t*	savept;
1301
1302
	node = thr->run_node;
1303
1304
	ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK);
1305
1306
	if (thr->prev_node == que_node_get_parent(node)) {
1307
		node->state = ROLL_NODE_SEND;
1308
	}
1309
1310
	if (node->state == ROLL_NODE_SEND) {
1311
		mutex_enter(&kernel_mutex);
1312
1313
		node->state = ROLL_NODE_WAIT;
1314
1315
		if (node->partial) {
1316
			sig_no = TRX_SIG_ROLLBACK_TO_SAVEPT;
1317
			savept = &(node->savept);
1318
		} else {
1319
			sig_no = TRX_SIG_TOTAL_ROLLBACK;
1320
			savept = NULL;
1321
		}
1322
1323
		/* Send a rollback signal to the transaction */
1324
1325
		trx_sig_send(thr_get_trx(thr), sig_no, TRX_SIG_SELF, thr,
1326
			     savept, NULL);
1327
1328
		thr->state = QUE_THR_SIG_REPLY_WAIT;
1329
1330
		mutex_exit(&kernel_mutex);
1331
1332
		return(NULL);
1333
	}
1334
1335
	ut_ad(node->state == ROLL_NODE_WAIT);
1336
1337
	thr->run_node = que_node_get_parent(node);
1338
1339
	return(thr);
1340
}