1
/******************************************************
6
Created 3/26/1996 Heikki Tuuri
7
*******************************************************/
21
#include "trx0purge.h"
25
/* The transaction system */
26
trx_sys_t* trx_sys = NULL;
27
trx_doublewrite_t* trx_doublewrite = NULL;
29
/* The following is set to TRUE when we are upgrading from the old format data
30
files to the new >= 4.1.x format multiple tablespaces format data files */
32
ibool trx_doublewrite_must_reset_space_ids = FALSE;
34
/* The following is TRUE when we are using the database in the new format,
35
i.e., we have successfully upgraded, or have created a new database
38
ibool trx_sys_multiple_tablespace_format = FALSE;
40
/* In a MySQL replication slave, in crash recovery we store the master log
41
file name and position here. We have successfully got the updates to InnoDB
42
up to this position. If .._pos is -1, it means no crash recovery was needed,
43
or there was no master log position info inside InnoDB. */
45
char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
46
ib_longlong trx_sys_mysql_master_log_pos = -1;
48
/* If this MySQL server uses binary logging, after InnoDB has been inited
49
and if it has done a crash recovery, we store the binlog file name and position
50
here. If .._pos is -1, it means there was no binlog position info inside
53
char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
54
ib_longlong trx_sys_mysql_bin_log_pos = -1;
57
/********************************************************************
58
Determines if a page number is located inside the doublewrite buffer. */
61
trx_doublewrite_page_inside(
62
/*========================*/
63
/* out: TRUE if the location is inside
64
the two blocks of the doublewrite buffer */
65
ulint page_no) /* in: page number */
67
if (trx_doublewrite == NULL) {
72
if (page_no >= trx_doublewrite->block1
73
&& page_no < trx_doublewrite->block1
74
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
78
if (page_no >= trx_doublewrite->block2
79
&& page_no < trx_doublewrite->block2
80
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
87
/********************************************************************
88
Creates or initialializes the doublewrite buffer at a database start. */
93
byte* doublewrite) /* in: pointer to the doublewrite buf
94
header on trx sys page */
96
trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
98
/* Since we now start to use the doublewrite buffer, no need to call
99
fsync() after every write to a data file */
101
os_do_not_call_flush_at_each_write = TRUE;
102
#endif /* UNIV_DO_FLUSH */
104
mutex_create(&trx_doublewrite->mutex, SYNC_DOUBLEWRITE);
106
trx_doublewrite->first_free = 0;
108
trx_doublewrite->block1 = mach_read_from_4(
109
doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
110
trx_doublewrite->block2 = mach_read_from_4(
111
doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
112
trx_doublewrite->write_buf_unaligned = ut_malloc(
113
(1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE);
115
trx_doublewrite->write_buf = ut_align(
116
trx_doublewrite->write_buf_unaligned, UNIV_PAGE_SIZE);
117
trx_doublewrite->buf_block_arr = mem_alloc(
118
2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*));
121
/********************************************************************
122
Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
123
multiple tablespace format. */
126
trx_sys_mark_upgraded_to_multiple_tablespaces(void)
127
/*===============================================*/
133
/* We upgraded to 4.1.x and reset the space id fields in the
134
doublewrite buffer. Let us mark to the trx_sys header that the upgrade
139
page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
140
#ifdef UNIV_SYNC_DEBUG
141
buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
142
#endif /* UNIV_SYNC_DEBUG */
144
doublewrite = page + TRX_SYS_DOUBLEWRITE;
146
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
147
TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
151
/* Flush the modified pages to disk and make a checkpoint */
152
log_make_checkpoint_at(ut_dulint_max, TRUE);
154
trx_sys_multiple_tablespace_format = TRUE;
157
/********************************************************************
158
Creates the doublewrite buffer to a new InnoDB installation. The header of the
159
doublewrite buffer is placed on the trx system header page. */
162
trx_sys_create_doublewrite_buf(void)
163
/*================================*/
175
if (trx_doublewrite) {
184
page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
185
#ifdef UNIV_SYNC_DEBUG
186
buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
187
#endif /* UNIV_SYNC_DEBUG */
189
doublewrite = page + TRX_SYS_DOUBLEWRITE;
191
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
192
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
193
/* The doublewrite buffer has already been created:
194
just read in some numbers */
196
trx_doublewrite_init(doublewrite);
201
"InnoDB: Doublewrite buffer not found:"
204
if (buf_pool_get_curr_size()
205
< ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
206
+ FSP_EXTENT_SIZE / 2 + 100)
209
"InnoDB: Cannot create doublewrite buffer:"
211
"InnoDB: increase your buffer pool size.\n"
212
"InnoDB: Cannot continue operation.\n");
217
page2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
219
+ TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
221
/* fseg_create acquires a second latch on the page,
222
therefore we must declare it: */
224
#ifdef UNIV_SYNC_DEBUG
225
buf_page_dbg_add_level(page2, SYNC_NO_ORDER_CHECK);
226
#endif /* UNIV_SYNC_DEBUG */
230
"InnoDB: Cannot create doublewrite buffer:"
232
"InnoDB: increase your tablespace size.\n"
233
"InnoDB: Cannot continue operation.\n");
235
/* We exit without committing the mtr to prevent
236
its modifications to the database getting to disk */
241
fseg_header = page + TRX_SYS_DOUBLEWRITE
242
+ TRX_SYS_DOUBLEWRITE_FSEG;
245
for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
246
+ FSP_EXTENT_SIZE / 2; i++) {
247
page_no = fseg_alloc_free_page(fseg_header,
250
if (page_no == FIL_NULL) {
252
"InnoDB: Cannot create doublewrite"
253
" buffer: you must\n"
254
"InnoDB: increase your"
255
" tablespace size.\n"
256
"InnoDB: Cannot continue operation.\n"
262
/* We read the allocated pages to the buffer pool;
263
when they are written to disk in a flush, the space
264
id and page number fields are also written to the
265
pages. When we at database startup read pages
266
from the doublewrite buffer, we know that if the
267
space id and page number in them are the same as
268
the page position in the tablespace, then the page
269
has not been written to in doublewrite. */
271
new_page = buf_page_get(TRX_SYS_SPACE, page_no,
273
#ifdef UNIV_SYNC_DEBUG
274
buf_page_dbg_add_level(new_page, SYNC_NO_ORDER_CHECK);
275
#endif /* UNIV_SYNC_DEBUG */
277
/* Make a dummy change to the page to ensure it will
278
be written to disk in a flush */
280
mlog_write_ulint(new_page + FIL_PAGE_DATA,
281
TRX_SYS_DOUBLEWRITE_MAGIC_N,
284
if (i == FSP_EXTENT_SIZE / 2) {
285
mlog_write_ulint(doublewrite
286
+ TRX_SYS_DOUBLEWRITE_BLOCK1,
287
page_no, MLOG_4BYTES, &mtr);
288
mlog_write_ulint(doublewrite
289
+ TRX_SYS_DOUBLEWRITE_REPEAT
290
+ TRX_SYS_DOUBLEWRITE_BLOCK1,
291
page_no, MLOG_4BYTES, &mtr);
292
} else if (i == FSP_EXTENT_SIZE / 2
293
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
294
mlog_write_ulint(doublewrite
295
+ TRX_SYS_DOUBLEWRITE_BLOCK2,
296
page_no, MLOG_4BYTES, &mtr);
297
mlog_write_ulint(doublewrite
298
+ TRX_SYS_DOUBLEWRITE_REPEAT
299
+ TRX_SYS_DOUBLEWRITE_BLOCK2,
300
page_no, MLOG_4BYTES, &mtr);
301
} else if (i > FSP_EXTENT_SIZE / 2) {
302
ut_a(page_no == prev_page_no + 1);
305
prev_page_no = page_no;
308
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
309
TRX_SYS_DOUBLEWRITE_MAGIC_N,
311
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
312
+ TRX_SYS_DOUBLEWRITE_REPEAT,
313
TRX_SYS_DOUBLEWRITE_MAGIC_N,
316
mlog_write_ulint(doublewrite
317
+ TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
318
TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
322
/* Flush the modified pages to disk and make a checkpoint */
323
log_make_checkpoint_at(ut_dulint_max, TRUE);
325
fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
327
trx_sys_multiple_tablespace_format = TRUE;
333
/********************************************************************
334
At a database startup initializes the doublewrite buffer memory structure if
335
we already have a doublewrite buffer created in the data files. If we are
336
upgrading to an InnoDB version which supports multiple tablespaces, then this
337
function performs the necessary update operations. If we are in a crash
338
recovery, this function uses a possible doublewrite buffer to restore
339
half-written pages in the data files. */
342
trx_sys_doublewrite_init_or_restore_pages(
343
/*======================================*/
344
ibool restore_corrupt_pages)
348
byte* unaligned_read_buf;
351
ulint source_page_no;
358
/* We do the file i/o past the buffer pool */
360
unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
361
read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE);
363
/* Read the trx sys header to check if we are using the doublewrite
366
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, TRX_SYS_PAGE_NO, 0,
367
UNIV_PAGE_SIZE, read_buf, NULL);
368
doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
370
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
371
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
372
/* The doublewrite buffer has been created */
374
trx_doublewrite_init(doublewrite);
376
block1 = trx_doublewrite->block1;
377
block2 = trx_doublewrite->block2;
379
buf = trx_doublewrite->write_buf;
384
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
385
!= TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
387
/* We are upgrading from a version < 4.1.x to a version where
388
multiple tablespaces are supported. We must reset the space id
389
field in the pages in the doublewrite buffer because starting
390
from this version the space id is stored to
391
FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
393
trx_doublewrite_must_reset_space_ids = TRUE;
396
"InnoDB: Resetting space id's in the"
397
" doublewrite buffer\n");
399
trx_sys_multiple_tablespace_format = TRUE;
402
/* Read the pages from the doublewrite buffer to memory */
404
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block1, 0,
405
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
407
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block2, 0,
408
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
409
buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
411
/* Check if any of these pages is half-written in data files, in the
416
for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
418
page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
420
if (trx_doublewrite_must_reset_space_ids) {
424
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0);
425
/* We do not need to calculate new checksums for the
426
pages because the field .._SPACE_ID does not affect
427
them. Write the page back to where we read it from. */
429
if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
430
source_page_no = block1 + i;
432
source_page_no = block2
433
+ i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
436
fil_io(OS_FILE_WRITE, TRUE, 0, source_page_no, 0,
437
UNIV_PAGE_SIZE, page, NULL);
438
/* printf("Resetting space id in page %lu\n",
441
space_id = mach_read_from_4(
442
page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
445
if (!restore_corrupt_pages) {
446
/* The database was shut down gracefully: no need to
449
} else if (!fil_tablespace_exists_in_mem(space_id)) {
450
/* Maybe we have dropped the single-table tablespace
451
and this page once belonged to it: do nothing */
453
} else if (!fil_check_adress_in_tablespace(space_id,
456
"InnoDB: Warning: a page in the"
457
" doublewrite buffer is not within space\n"
458
"InnoDB: bounds; space id %lu"
459
" page number %lu, page %lu in"
460
" doublewrite buf.\n",
461
(ulong) space_id, (ulong) page_no, (ulong) i);
463
} else if (space_id == TRX_SYS_SPACE
464
&& ((page_no >= block1
466
< block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
467
|| (page_no >= block2
470
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) {
472
/* It is an unwritten doublewrite buffer page:
475
/* Read in the actual page from the data files */
477
fil_io(OS_FILE_READ, TRUE, space_id, page_no, 0,
478
UNIV_PAGE_SIZE, read_buf, NULL);
479
/* Check if the page is corrupt */
481
if (buf_page_is_corrupted(read_buf)) {
484
"InnoDB: Warning: database page"
485
" corruption or a failed\n"
486
"InnoDB: file read of page %lu.\n",
489
"InnoDB: Trying to recover it from"
490
" the doublewrite buffer.\n");
492
if (buf_page_is_corrupted(page)) {
494
"InnoDB: Dump of the page:\n");
495
buf_page_print(read_buf);
498
" corresponding page"
499
" in doublewrite buffer:\n");
500
buf_page_print(page);
503
"InnoDB: Also the page in the"
504
" doublewrite buffer"
506
"InnoDB: Cannot continue"
508
"InnoDB: You can try to"
509
" recover the database"
512
"InnoDB: set-variable="
513
"innodb_force_recovery=6\n");
517
/* Write the good page from the
518
doublewrite buffer to the intended
521
fil_io(OS_FILE_WRITE, TRUE, space_id,
523
UNIV_PAGE_SIZE, page, NULL);
525
"InnoDB: Recovered the page from"
526
" the doublewrite buffer.\n");
530
page += UNIV_PAGE_SIZE;
533
fil_flush_file_spaces(FIL_TABLESPACE);
536
ut_free(unaligned_read_buf);
539
/********************************************************************
540
Checks that trx is in the trx list. */
545
/* out: TRUE if is in */
546
trx_t* in_trx) /* in: trx */
550
ut_ad(mutex_own(&(kernel_mutex)));
552
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
554
while (trx != NULL) {
561
trx = UT_LIST_GET_NEXT(trx_list, trx);
567
/*********************************************************************
568
Writes the value of max_trx_id to the file based trx system header. */
571
trx_sys_flush_max_trx_id(void)
572
/*==========================*/
574
trx_sysf_t* sys_header;
577
ut_ad(mutex_own(&kernel_mutex));
581
sys_header = trx_sysf_get(&mtr);
583
mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
584
trx_sys->max_trx_id, &mtr);
588
/*********************************************************************
589
Updates the offset information about the end of the MySQL binlog entry
590
which corresponds to the transaction just being committed. In a MySQL
591
replication slave updates the latest master binlog position up to which
592
replication has proceeded. */
595
trx_sys_update_mysql_binlog_offset(
596
/*===============================*/
597
const char* file_name,/* in: MySQL log file name */
598
ib_longlong offset, /* in: position in that log file */
599
ulint field, /* in: offset of the MySQL log info field in
600
the trx sys header */
601
mtr_t* mtr) /* in: mtr */
603
trx_sysf_t* sys_header;
605
if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
607
/* We cannot fit the name to the 512 bytes we have reserved */
612
sys_header = trx_sysf_get(mtr);
614
if (mach_read_from_4(sys_header + field
615
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
616
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
618
mlog_write_ulint(sys_header + field
619
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
620
TRX_SYS_MYSQL_LOG_MAGIC_N,
624
if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
627
mlog_write_string(sys_header + field
628
+ TRX_SYS_MYSQL_LOG_NAME,
629
(byte*) file_name, 1 + ut_strlen(file_name),
633
if (mach_read_from_4(sys_header + field
634
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
635
|| (offset >> 32) > 0) {
637
mlog_write_ulint(sys_header + field
638
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
639
(ulint)(offset >> 32),
643
mlog_write_ulint(sys_header + field
644
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW,
645
(ulint)(offset & 0xFFFFFFFFUL),
649
#ifdef UNIV_HOTBACKUP
650
/*********************************************************************
651
Prints to stderr the MySQL binlog info in the system header if the
652
magic number shows it valid. */
655
trx_sys_print_mysql_binlog_offset_from_page(
656
/*========================================*/
657
byte* page) /* in: buffer containing the trx system header page,
658
i.e., page number TRX_SYS_PAGE_NO in the tablespace */
660
trx_sysf_t* sys_header;
662
sys_header = page + TRX_SYS;
664
if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
665
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
666
== TRX_SYS_MYSQL_LOG_MAGIC_N) {
669
"ibbackup: Last MySQL binlog file position %lu %lu,"
671
(ulong) mach_read_from_4(
672
sys_header + TRX_SYS_MYSQL_LOG_INFO
673
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
674
(ulong) mach_read_from_4(
675
sys_header + TRX_SYS_MYSQL_LOG_INFO
676
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
677
sys_header + TRX_SYS_MYSQL_LOG_INFO
678
+ TRX_SYS_MYSQL_LOG_NAME);
681
#endif /* UNIV_HOTBACKUP */
683
/*********************************************************************
684
Stores the MySQL binlog offset info in the trx system header if
685
the magic number shows it valid, and print the info to stderr */
688
trx_sys_print_mysql_binlog_offset(void)
689
/*===================================*/
691
trx_sysf_t* sys_header;
693
ulint trx_sys_mysql_bin_log_pos_high;
694
ulint trx_sys_mysql_bin_log_pos_low;
698
sys_header = trx_sysf_get(&mtr);
700
if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
701
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
702
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
709
trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
710
sys_header + TRX_SYS_MYSQL_LOG_INFO
711
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
712
trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
713
sys_header + TRX_SYS_MYSQL_LOG_INFO
714
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW);
716
trx_sys_mysql_bin_log_pos
717
= (((ib_longlong)trx_sys_mysql_bin_log_pos_high) << 32)
718
+ (ib_longlong)trx_sys_mysql_bin_log_pos_low;
720
ut_memcpy(trx_sys_mysql_bin_log_name,
721
sys_header + TRX_SYS_MYSQL_LOG_INFO
722
+ TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
725
"InnoDB: Last MySQL binlog file position %lu %lu,"
727
trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
728
trx_sys_mysql_bin_log_name);
733
/*********************************************************************
734
Prints to stderr the MySQL master log offset info in the trx system header if
735
the magic number shows it valid. */
738
trx_sys_print_mysql_master_log_pos(void)
739
/*====================================*/
741
trx_sysf_t* sys_header;
746
sys_header = trx_sysf_get(&mtr);
748
if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
749
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
750
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
758
"InnoDB: In a MySQL replication slave the last"
759
" master binlog file\n"
760
"InnoDB: position %lu %lu, file name %s\n",
761
(ulong) mach_read_from_4(sys_header
762
+ TRX_SYS_MYSQL_MASTER_LOG_INFO
763
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
764
(ulong) mach_read_from_4(sys_header
765
+ TRX_SYS_MYSQL_MASTER_LOG_INFO
766
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
767
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
768
+ TRX_SYS_MYSQL_LOG_NAME);
769
/* Copy the master log position info to global variables we can
770
use in ha_innobase.cc to initialize glob_mi to right values */
772
ut_memcpy(trx_sys_mysql_master_log_name,
773
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
774
+ TRX_SYS_MYSQL_LOG_NAME,
775
TRX_SYS_MYSQL_LOG_NAME_LEN);
777
trx_sys_mysql_master_log_pos
778
= (((ib_longlong) mach_read_from_4(
779
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
780
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
781
+ ((ib_longlong) mach_read_from_4(
782
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
783
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW));
787
/********************************************************************
788
Looks for a free slot for a rollback segment in the trx system file copy. */
791
trx_sysf_rseg_find_free(
792
/*====================*/
793
/* out: slot index or ULINT_UNDEFINED if not found */
794
mtr_t* mtr) /* in: mtr */
796
trx_sysf_t* sys_header;
800
ut_ad(mutex_own(&(kernel_mutex)));
802
sys_header = trx_sysf_get(mtr);
804
for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
806
page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
808
if (page_no == FIL_NULL) {
814
return(ULINT_UNDEFINED);
817
/*********************************************************************
818
Creates the file page for the transaction system. This function is called only
819
at the database creation, before trx_sys_init. */
824
mtr_t* mtr) /* in: mtr */
826
trx_sysf_t* sys_header;
834
/* Note that below we first reserve the file space x-latch, and
835
then enter the kernel: we must do it in this order to conform
836
to the latching order rules. */
838
mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE), mtr);
839
mutex_enter(&kernel_mutex);
841
/* Create the trx sys file block in a new allocated file segment */
842
page = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
844
ut_a(buf_frame_get_page_no(page) == TRX_SYS_PAGE_NO);
846
#ifdef UNIV_SYNC_DEBUG
847
buf_page_dbg_add_level(page, SYNC_TRX_SYS_HEADER);
848
#endif /* UNIV_SYNC_DEBUG */
850
mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
853
/* Reset the doublewrite buffer magic number to zero so that we
854
know that the doublewrite buffer has not yet been created (this
855
suppresses a Valgrind warning) */
857
mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
858
+ TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
860
sys_header = trx_sysf_get(mtr);
862
/* Start counting transaction ids from number 1 up */
863
mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
864
ut_dulint_create(0, 1), mtr);
866
/* Reset the rollback segment slots */
867
for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
869
trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
870
trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
873
/* The remaining area (up to the page trailer) is uninitialized.
874
Silence Valgrind warnings about it. */
875
UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
876
+ TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
877
+ TRX_SYS_RSEG_SPACE),
878
(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
880
+ TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
881
+ TRX_SYS_RSEG_SPACE))
882
+ page - sys_header);
884
/* Create the first rollback segment in the SYSTEM tablespace */
885
page_no = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no,
887
ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
888
ut_a(page_no != FIL_NULL);
890
mutex_exit(&kernel_mutex);
893
/*********************************************************************
894
Creates and initializes the central memory structures for the transaction
895
system. This is called when the database is started. */
898
trx_sys_init_at_db_start(void)
899
/*==========================*/
901
trx_sysf_t* sys_header;
902
ib_longlong rows_to_undo = 0;
903
const char* unit = "";
909
ut_ad(trx_sys == NULL);
911
mutex_enter(&kernel_mutex);
913
trx_sys = mem_alloc(sizeof(trx_sys_t));
915
sys_header = trx_sysf_get(&mtr);
917
trx_rseg_list_and_array_init(sys_header, &mtr);
919
trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
921
/* VERY important: after the database is started, max_trx_id value is
922
divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
923
trx_sys_get_new_trx_id will evaluate to TRUE when the function
924
is first time called, and the value for trx id will be written
925
to the disk-based header! Thus trx id values will not overlap when
926
the database is repeatedly started! */
928
trx_sys->max_trx_id = ut_dulint_add(
929
ut_dulint_align_up(mtr_read_dulint(
931
+ TRX_SYS_TRX_ID_STORE, &mtr),
932
TRX_SYS_TRX_ID_WRITE_MARGIN),
933
2 * TRX_SYS_TRX_ID_WRITE_MARGIN);
935
UT_LIST_INIT(trx_sys->mysql_trx_list);
936
trx_lists_init_at_db_start();
938
if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
939
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
943
if ( trx->conc_state != TRX_PREPARED) {
944
rows_to_undo += ut_conv_dulint_to_longlong(
948
trx = UT_LIST_GET_NEXT(trx_list, trx);
955
if (rows_to_undo > 1000000000) {
957
rows_to_undo = rows_to_undo / 1000000;
961
"InnoDB: %lu transaction(s) which must be"
962
" rolled back or cleaned up\n"
963
"InnoDB: in total %lu%s row operations to undo\n",
964
(ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
965
(ulong) rows_to_undo, unit);
967
fprintf(stderr, "InnoDB: Trx id counter is %lu %lu\n",
968
(ulong) ut_dulint_get_high(trx_sys->max_trx_id),
969
(ulong) ut_dulint_get_low(trx_sys->max_trx_id));
972
UT_LIST_INIT(trx_sys->view_list);
974
trx_purge_sys_create();
976
mutex_exit(&kernel_mutex);
981
/*********************************************************************
982
Creates and initializes the transaction system at the database creation. */
992
trx_sysf_create(&mtr);
996
trx_sys_init_at_db_start();