1
/******************************************************
6
Created 3/26/1996 Heikki Tuuri
7
*******************************************************/
21
#include "trx0purge.h"
25
/* The file format tag structure with id and name. */
26
struct file_format_struct {
27
ulint id; /* id of the file format */
28
const char* name; /* text representation of the
30
mutex_t mutex; /* covers changes to the above
34
typedef struct file_format_struct file_format_t;
36
/* The transaction system */
37
UNIV_INTERN trx_sys_t* trx_sys = NULL;
38
UNIV_INTERN trx_doublewrite_t* trx_doublewrite = NULL;
40
/* The following is set to TRUE when we are upgrading from the old format data
41
files to the new >= 4.1.x format multiple tablespaces format data files */
43
UNIV_INTERN ibool trx_doublewrite_must_reset_space_ids = FALSE;
45
/* The following is TRUE when we are using the database in the new format,
46
i.e., we have successfully upgraded, or have created a new database
49
UNIV_INTERN ibool trx_sys_multiple_tablespace_format = FALSE;
51
/* In a MySQL replication slave, in crash recovery we store the master log
52
file name and position here. We have successfully got the updates to InnoDB
53
up to this position. If .._pos is -1, it means no crash recovery was needed,
54
or there was no master log position info inside InnoDB. */
56
UNIV_INTERN char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
57
UNIV_INTERN ib_int64_t trx_sys_mysql_master_log_pos = -1;
59
/* If this MySQL server uses binary logging, after InnoDB has been inited
60
and if it has done a crash recovery, we store the binlog file name and position
61
here. If .._pos is -1, it means there was no binlog position info inside
64
UNIV_INTERN char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
65
UNIV_INTERN ib_int64_t trx_sys_mysql_bin_log_pos = -1;
67
/* List of animal names representing file format. */
68
static const char* file_format_name_map[] = {
97
/* The number of elements in the file format name array. */
98
static const ulint FILE_FORMAT_NAME_N
99
= sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
101
/* This is used to track the maximum file format id known to InnoDB. It's
102
updated via SET GLOBAL innodb_file_format_check = 'x' or when we open
103
or create a table. */
104
static file_format_t file_format_max;
106
/********************************************************************
107
Determines if a page number is located inside the doublewrite buffer. */
110
trx_doublewrite_page_inside(
111
/*========================*/
112
/* out: TRUE if the location is inside
113
the two blocks of the doublewrite buffer */
114
ulint page_no) /* in: page number */
116
if (trx_doublewrite == NULL) {
121
if (page_no >= trx_doublewrite->block1
122
&& page_no < trx_doublewrite->block1
123
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
127
if (page_no >= trx_doublewrite->block2
128
&& page_no < trx_doublewrite->block2
129
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
136
/********************************************************************
137
Creates or initialializes the doublewrite buffer at a database start. */
140
trx_doublewrite_init(
141
/*=================*/
142
byte* doublewrite) /* in: pointer to the doublewrite buf
143
header on trx sys page */
145
trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
147
/* Since we now start to use the doublewrite buffer, no need to call
148
fsync() after every write to a data file */
150
os_do_not_call_flush_at_each_write = TRUE;
151
#endif /* UNIV_DO_FLUSH */
153
mutex_create(&trx_doublewrite->mutex, SYNC_DOUBLEWRITE);
155
trx_doublewrite->first_free = 0;
157
trx_doublewrite->block1 = mach_read_from_4(
158
doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
159
trx_doublewrite->block2 = mach_read_from_4(
160
doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
161
trx_doublewrite->write_buf_unaligned = ut_malloc(
162
(1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE);
164
trx_doublewrite->write_buf = ut_align(
165
trx_doublewrite->write_buf_unaligned, UNIV_PAGE_SIZE);
166
trx_doublewrite->buf_block_arr = mem_alloc(
167
2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*));
170
/********************************************************************
171
Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
172
multiple tablespace format. */
175
trx_sys_mark_upgraded_to_multiple_tablespaces(void)
176
/*===============================================*/
182
/* We upgraded to 4.1.x and reset the space id fields in the
183
doublewrite buffer. Let us mark to the trx_sys header that the upgrade
188
block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
190
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
192
doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
194
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
195
TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
199
/* Flush the modified pages to disk and make a checkpoint */
200
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
202
trx_sys_multiple_tablespace_format = TRUE;
205
/********************************************************************
206
Creates the doublewrite buffer to a new InnoDB installation. The header of the
207
doublewrite buffer is placed on the trx system header page. */
210
trx_sys_create_doublewrite_buf(void)
211
/*================================*/
215
buf_block_t* new_block;
223
if (trx_doublewrite) {
232
block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
234
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
236
doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
238
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
239
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
240
/* The doublewrite buffer has already been created:
241
just read in some numbers */
243
trx_doublewrite_init(doublewrite);
248
"InnoDB: Doublewrite buffer not found:"
251
if (buf_pool_get_curr_size()
252
< ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
253
+ FSP_EXTENT_SIZE / 2 + 100)
256
"InnoDB: Cannot create doublewrite buffer:"
258
"InnoDB: increase your buffer pool size.\n"
259
"InnoDB: Cannot continue operation.\n");
264
block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
266
+ TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
268
/* fseg_create acquires a second latch on the page,
269
therefore we must declare it: */
271
buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
273
if (block2 == NULL) {
275
"InnoDB: Cannot create doublewrite buffer:"
277
"InnoDB: increase your tablespace size.\n"
278
"InnoDB: Cannot continue operation.\n");
280
/* We exit without committing the mtr to prevent
281
its modifications to the database getting to disk */
286
fseg_header = buf_block_get_frame(block)
287
+ TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
290
for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
291
+ FSP_EXTENT_SIZE / 2; i++) {
292
page_no = fseg_alloc_free_page(fseg_header,
295
if (page_no == FIL_NULL) {
297
"InnoDB: Cannot create doublewrite"
298
" buffer: you must\n"
299
"InnoDB: increase your"
300
" tablespace size.\n"
301
"InnoDB: Cannot continue operation.\n"
307
/* We read the allocated pages to the buffer pool;
308
when they are written to disk in a flush, the space
309
id and page number fields are also written to the
310
pages. When we at database startup read pages
311
from the doublewrite buffer, we know that if the
312
space id and page number in them are the same as
313
the page position in the tablespace, then the page
314
has not been written to in doublewrite. */
316
new_block = buf_page_get(TRX_SYS_SPACE, 0, page_no,
318
buf_block_dbg_add_level(new_block,
319
SYNC_NO_ORDER_CHECK);
321
/* Make a dummy change to the page to ensure it will
322
be written to disk in a flush */
324
mlog_write_ulint(buf_block_get_frame(new_block)
326
TRX_SYS_DOUBLEWRITE_MAGIC_N,
329
if (i == FSP_EXTENT_SIZE / 2) {
330
mlog_write_ulint(doublewrite
331
+ TRX_SYS_DOUBLEWRITE_BLOCK1,
332
page_no, MLOG_4BYTES, &mtr);
333
mlog_write_ulint(doublewrite
334
+ TRX_SYS_DOUBLEWRITE_REPEAT
335
+ TRX_SYS_DOUBLEWRITE_BLOCK1,
336
page_no, MLOG_4BYTES, &mtr);
337
} else if (i == FSP_EXTENT_SIZE / 2
338
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
339
mlog_write_ulint(doublewrite
340
+ TRX_SYS_DOUBLEWRITE_BLOCK2,
341
page_no, MLOG_4BYTES, &mtr);
342
mlog_write_ulint(doublewrite
343
+ TRX_SYS_DOUBLEWRITE_REPEAT
344
+ TRX_SYS_DOUBLEWRITE_BLOCK2,
345
page_no, MLOG_4BYTES, &mtr);
346
} else if (i > FSP_EXTENT_SIZE / 2) {
347
ut_a(page_no == prev_page_no + 1);
350
prev_page_no = page_no;
353
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
354
TRX_SYS_DOUBLEWRITE_MAGIC_N,
356
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
357
+ TRX_SYS_DOUBLEWRITE_REPEAT,
358
TRX_SYS_DOUBLEWRITE_MAGIC_N,
361
mlog_write_ulint(doublewrite
362
+ TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
363
TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
367
/* Flush the modified pages to disk and make a checkpoint */
368
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
370
fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
372
trx_sys_multiple_tablespace_format = TRUE;
378
/********************************************************************
379
At a database startup initializes the doublewrite buffer memory structure if
380
we already have a doublewrite buffer created in the data files. If we are
381
upgrading to an InnoDB version which supports multiple tablespaces, then this
382
function performs the necessary update operations. If we are in a crash
383
recovery, this function uses a possible doublewrite buffer to restore
384
half-written pages in the data files. */
387
trx_sys_doublewrite_init_or_restore_pages(
388
/*======================================*/
389
ibool restore_corrupt_pages)
393
byte* unaligned_read_buf;
396
ulint source_page_no;
403
/* We do the file i/o past the buffer pool */
405
unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
406
read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE);
408
/* Read the trx sys header to check if we are using the doublewrite
411
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
412
UNIV_PAGE_SIZE, read_buf, NULL);
413
doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
415
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
416
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
417
/* The doublewrite buffer has been created */
419
trx_doublewrite_init(doublewrite);
421
block1 = trx_doublewrite->block1;
422
block2 = trx_doublewrite->block2;
424
buf = trx_doublewrite->write_buf;
429
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
430
!= TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
432
/* We are upgrading from a version < 4.1.x to a version where
433
multiple tablespaces are supported. We must reset the space id
434
field in the pages in the doublewrite buffer because starting
435
from this version the space id is stored to
436
FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
438
trx_doublewrite_must_reset_space_ids = TRUE;
441
"InnoDB: Resetting space id's in the"
442
" doublewrite buffer\n");
444
trx_sys_multiple_tablespace_format = TRUE;
447
/* Read the pages from the doublewrite buffer to memory */
449
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
450
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
452
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
453
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
454
buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
456
/* Check if any of these pages is half-written in data files, in the
461
for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
463
page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
465
if (trx_doublewrite_must_reset_space_ids) {
469
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0);
470
/* We do not need to calculate new checksums for the
471
pages because the field .._SPACE_ID does not affect
472
them. Write the page back to where we read it from. */
474
if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
475
source_page_no = block1 + i;
477
source_page_no = block2
478
+ i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
481
fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
482
UNIV_PAGE_SIZE, page, NULL);
483
/* printf("Resetting space id in page %lu\n",
486
space_id = mach_read_from_4(
487
page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
490
if (!restore_corrupt_pages) {
491
/* The database was shut down gracefully: no need to
494
} else if (!fil_tablespace_exists_in_mem(space_id)) {
495
/* Maybe we have dropped the single-table tablespace
496
and this page once belonged to it: do nothing */
498
} else if (!fil_check_adress_in_tablespace(space_id,
501
"InnoDB: Warning: a page in the"
502
" doublewrite buffer is not within space\n"
503
"InnoDB: bounds; space id %lu"
504
" page number %lu, page %lu in"
505
" doublewrite buf.\n",
506
(ulong) space_id, (ulong) page_no, (ulong) i);
508
} else if (space_id == TRX_SYS_SPACE
509
&& ((page_no >= block1
511
< block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
512
|| (page_no >= block2
515
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) {
517
/* It is an unwritten doublewrite buffer page:
520
ulint zip_size = fil_space_get_zip_size(space_id);
522
/* Read in the actual page from the file */
523
fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
525
zip_size ? zip_size : UNIV_PAGE_SIZE,
528
/* Check if the page is corrupt */
531
(buf_page_is_corrupted(read_buf, zip_size))) {
534
"InnoDB: Warning: database page"
535
" corruption or a failed\n"
536
"InnoDB: file read of"
537
" space %lu page %lu.\n"
538
"InnoDB: Trying to recover it from"
539
" the doublewrite buffer.\n",
540
(ulong) space_id, (ulong) page_no);
542
if (buf_page_is_corrupted(page, zip_size)) {
544
"InnoDB: Dump of the page:\n");
545
buf_page_print(read_buf, zip_size);
548
" corresponding page"
549
" in doublewrite buffer:\n");
550
buf_page_print(page, zip_size);
553
"InnoDB: Also the page in the"
554
" doublewrite buffer"
556
"InnoDB: Cannot continue"
558
"InnoDB: You can try to"
559
" recover the database"
562
"InnoDB: set-variable="
563
"innodb_force_recovery=6\n");
567
/* Write the good page from the
568
doublewrite buffer to the intended
571
fil_io(OS_FILE_WRITE, TRUE, space_id,
572
zip_size, page_no, 0,
573
zip_size ? zip_size : UNIV_PAGE_SIZE,
576
"InnoDB: Recovered the page from"
577
" the doublewrite buffer.\n");
581
page += UNIV_PAGE_SIZE;
584
fil_flush_file_spaces(FIL_TABLESPACE);
587
ut_free(unaligned_read_buf);
590
/********************************************************************
591
Checks that trx is in the trx list. */
596
/* out: TRUE if is in */
597
trx_t* in_trx) /* in: trx */
601
ut_ad(mutex_own(&(kernel_mutex)));
603
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
605
while (trx != NULL) {
612
trx = UT_LIST_GET_NEXT(trx_list, trx);
618
/*********************************************************************
619
Writes the value of max_trx_id to the file based trx system header. */
622
trx_sys_flush_max_trx_id(void)
623
/*==========================*/
625
trx_sysf_t* sys_header;
628
ut_ad(mutex_own(&kernel_mutex));
632
sys_header = trx_sysf_get(&mtr);
634
mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
635
trx_sys->max_trx_id, &mtr);
639
/*********************************************************************
640
Updates the offset information about the end of the MySQL binlog entry
641
which corresponds to the transaction just being committed. In a MySQL
642
replication slave updates the latest master binlog position up to which
643
replication has proceeded. */
646
trx_sys_update_mysql_binlog_offset(
647
/*===============================*/
648
const char* file_name,/* in: MySQL log file name */
649
ib_int64_t offset, /* in: position in that log file */
650
ulint field, /* in: offset of the MySQL log info field in
651
the trx sys header */
652
mtr_t* mtr) /* in: mtr */
654
trx_sysf_t* sys_header;
656
if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
658
/* We cannot fit the name to the 512 bytes we have reserved */
663
sys_header = trx_sysf_get(mtr);
665
if (mach_read_from_4(sys_header + field
666
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
667
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
669
mlog_write_ulint(sys_header + field
670
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
671
TRX_SYS_MYSQL_LOG_MAGIC_N,
675
if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
678
mlog_write_string(sys_header + field
679
+ TRX_SYS_MYSQL_LOG_NAME,
680
(byte*) file_name, 1 + ut_strlen(file_name),
684
if (mach_read_from_4(sys_header + field
685
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
686
|| (offset >> 32) > 0) {
688
mlog_write_ulint(sys_header + field
689
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
690
(ulint)(offset >> 32),
694
mlog_write_ulint(sys_header + field
695
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW,
696
(ulint)(offset & 0xFFFFFFFFUL),
700
#ifdef UNIV_HOTBACKUP
701
/*********************************************************************
702
Prints to stderr the MySQL binlog info in the system header if the
703
magic number shows it valid. */
706
trx_sys_print_mysql_binlog_offset_from_page(
707
/*========================================*/
708
const byte* page) /* in: buffer containing the trx
709
system header page, i.e., page number
710
TRX_SYS_PAGE_NO in the tablespace */
712
const trx_sysf_t* sys_header;
714
sys_header = page + TRX_SYS;
716
if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
717
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
718
== TRX_SYS_MYSQL_LOG_MAGIC_N) {
721
"ibbackup: Last MySQL binlog file position %lu %lu,"
723
(ulong) mach_read_from_4(
724
sys_header + TRX_SYS_MYSQL_LOG_INFO
725
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
726
(ulong) mach_read_from_4(
727
sys_header + TRX_SYS_MYSQL_LOG_INFO
728
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
729
sys_header + TRX_SYS_MYSQL_LOG_INFO
730
+ TRX_SYS_MYSQL_LOG_NAME);
733
#endif /* UNIV_HOTBACKUP */
735
/*********************************************************************
736
Stores the MySQL binlog offset info in the trx system header if
737
the magic number shows it valid, and print the info to stderr */
740
trx_sys_print_mysql_binlog_offset(void)
741
/*===================================*/
743
trx_sysf_t* sys_header;
745
ulint trx_sys_mysql_bin_log_pos_high;
746
ulint trx_sys_mysql_bin_log_pos_low;
750
sys_header = trx_sysf_get(&mtr);
752
if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
753
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
754
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
761
trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
762
sys_header + TRX_SYS_MYSQL_LOG_INFO
763
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
764
trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
765
sys_header + TRX_SYS_MYSQL_LOG_INFO
766
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW);
768
trx_sys_mysql_bin_log_pos
769
= (((ib_int64_t)trx_sys_mysql_bin_log_pos_high) << 32)
770
+ (ib_int64_t)trx_sys_mysql_bin_log_pos_low;
772
ut_memcpy(trx_sys_mysql_bin_log_name,
773
sys_header + TRX_SYS_MYSQL_LOG_INFO
774
+ TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
777
"InnoDB: Last MySQL binlog file position %lu %lu,"
779
trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
780
trx_sys_mysql_bin_log_name);
785
/*********************************************************************
786
Prints to stderr the MySQL master log offset info in the trx system header if
787
the magic number shows it valid. */
790
trx_sys_print_mysql_master_log_pos(void)
791
/*====================================*/
793
trx_sysf_t* sys_header;
798
sys_header = trx_sysf_get(&mtr);
800
if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
801
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
802
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
810
"InnoDB: In a MySQL replication slave the last"
811
" master binlog file\n"
812
"InnoDB: position %lu %lu, file name %s\n",
813
(ulong) mach_read_from_4(sys_header
814
+ TRX_SYS_MYSQL_MASTER_LOG_INFO
815
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
816
(ulong) mach_read_from_4(sys_header
817
+ TRX_SYS_MYSQL_MASTER_LOG_INFO
818
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
819
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
820
+ TRX_SYS_MYSQL_LOG_NAME);
821
/* Copy the master log position info to global variables we can
822
use in ha_innobase.cc to initialize glob_mi to right values */
824
ut_memcpy(trx_sys_mysql_master_log_name,
825
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
826
+ TRX_SYS_MYSQL_LOG_NAME,
827
TRX_SYS_MYSQL_LOG_NAME_LEN);
829
trx_sys_mysql_master_log_pos
830
= (((ib_int64_t) mach_read_from_4(
831
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
832
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
833
+ ((ib_int64_t) mach_read_from_4(
834
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
835
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW));
839
/********************************************************************
840
Looks for a free slot for a rollback segment in the trx system file copy. */
843
trx_sysf_rseg_find_free(
844
/*====================*/
845
/* out: slot index or ULINT_UNDEFINED if not found */
846
mtr_t* mtr) /* in: mtr */
848
trx_sysf_t* sys_header;
852
ut_ad(mutex_own(&(kernel_mutex)));
854
sys_header = trx_sysf_get(mtr);
856
for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
858
page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
860
if (page_no == FIL_NULL) {
866
return(ULINT_UNDEFINED);
869
/*********************************************************************
870
Creates the file page for the transaction system. This function is called only
871
at the database creation, before trx_sys_init. */
876
mtr_t* mtr) /* in: mtr */
878
trx_sysf_t* sys_header;
887
/* Note that below we first reserve the file space x-latch, and
888
then enter the kernel: we must do it in this order to conform
889
to the latching order rules. */
891
mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
892
mutex_enter(&kernel_mutex);
894
/* Create the trx sys file block in a new allocated file segment */
895
block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
897
buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
899
ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
901
page = buf_block_get_frame(block);
903
mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
906
/* Reset the doublewrite buffer magic number to zero so that we
907
know that the doublewrite buffer has not yet been created (this
908
suppresses a Valgrind warning) */
910
mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
911
+ TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
913
sys_header = trx_sysf_get(mtr);
915
/* Start counting transaction ids from number 1 up */
916
mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
917
ut_dulint_create(0, 1), mtr);
919
/* Reset the rollback segment slots */
920
for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
922
trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
923
trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
926
/* The remaining area (up to the page trailer) is uninitialized.
927
Silence Valgrind warnings about it. */
928
UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
929
+ TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
930
+ TRX_SYS_RSEG_SPACE),
931
(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
933
+ TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
934
+ TRX_SYS_RSEG_SPACE))
935
+ page - sys_header);
937
/* Create the first rollback segment in the SYSTEM tablespace */
938
page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, &slot_no,
940
ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
941
ut_a(page_no != FIL_NULL);
943
mutex_exit(&kernel_mutex);
946
/*********************************************************************
947
Creates and initializes the central memory structures for the transaction
948
system. This is called when the database is started. */
951
trx_sys_init_at_db_start(void)
952
/*==========================*/
954
trx_sysf_t* sys_header;
955
ib_int64_t rows_to_undo = 0;
956
const char* unit = "";
962
ut_ad(trx_sys == NULL);
964
mutex_enter(&kernel_mutex);
966
trx_sys = mem_alloc(sizeof(trx_sys_t));
968
sys_header = trx_sysf_get(&mtr);
970
trx_rseg_list_and_array_init(sys_header, &mtr);
972
trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
974
/* VERY important: after the database is started, max_trx_id value is
975
divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
976
trx_sys_get_new_trx_id will evaluate to TRUE when the function
977
is first time called, and the value for trx id will be written
978
to the disk-based header! Thus trx id values will not overlap when
979
the database is repeatedly started! */
981
trx_sys->max_trx_id = ut_dulint_add(
982
ut_dulint_align_up(mtr_read_dulint(
984
+ TRX_SYS_TRX_ID_STORE, &mtr),
985
TRX_SYS_TRX_ID_WRITE_MARGIN),
986
2 * TRX_SYS_TRX_ID_WRITE_MARGIN);
988
UT_LIST_INIT(trx_sys->mysql_trx_list);
989
trx_dummy_sess = sess_open();
990
trx_lists_init_at_db_start();
992
if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
993
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
997
if ( trx->conc_state != TRX_PREPARED) {
998
rows_to_undo += ut_conv_dulint_to_longlong(
1002
trx = UT_LIST_GET_NEXT(trx_list, trx);
1009
if (rows_to_undo > 1000000000) {
1011
rows_to_undo = rows_to_undo / 1000000;
1015
"InnoDB: %lu transaction(s) which must be"
1016
" rolled back or cleaned up\n"
1017
"InnoDB: in total %lu%s row operations to undo\n",
1018
(ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
1019
(ulong) rows_to_undo, unit);
1021
fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
1022
TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
1025
UT_LIST_INIT(trx_sys->view_list);
1027
trx_purge_sys_create();
1029
mutex_exit(&kernel_mutex);
1034
/*********************************************************************
1035
Creates and initializes the transaction system at the database creation. */
1038
trx_sys_create(void)
1039
/*================*/
1045
trx_sysf_create(&mtr);
1049
trx_sys_init_at_db_start();
1052
/*********************************************************************
1053
Update the file format tag. */
1056
trx_sys_file_format_max_write(
1057
/*==========================*/
1058
/* out: always TRUE */
1059
ulint format_id, /* in: file format id */
1060
const char** name) /* out: max file format name, can
1066
ulint tag_value_low;
1070
block = buf_page_get(
1071
TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
1073
file_format_max.id = format_id;
1074
file_format_max.name = trx_sys_file_format_id_to_name(format_id);
1076
ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
1077
tag_value_low = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
1080
*name = file_format_max.name;
1085
ut_dulint_create(TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH,
1094
/*********************************************************************
1095
Read the file format tag. */
1098
trx_sys_file_format_max_read(void)
1099
/*==============================*/
1100
/* out: the file format or
1101
ULINT_UNDEFINED if not set. */
1105
const buf_block_t* block;
1107
dulint file_format_id;
1109
/* Since this is called during the startup phase it's safe to
1110
read the value without a covering mutex. */
1113
block = buf_page_get(
1114
TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
1116
ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
1117
file_format_id = mach_read_from_8(ptr);
1121
format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
1123
if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH
1124
|| format_id >= FILE_FORMAT_NAME_N) {
1126
/* Either it has never been tagged, or garbage in it. */
1127
return(ULINT_UNDEFINED);
1133
/*********************************************************************
1134
Get the name representation of the file format from its id. */
1137
trx_sys_file_format_id_to_name(
1138
/*===========================*/
1139
/* out: pointer to the name */
1140
const ulint id) /* in: id of the file format */
1142
ut_a(id < FILE_FORMAT_NAME_N);
1144
return(file_format_name_map[id]);
1147
/*********************************************************************
1148
Check for the max file format tag stored on disk. Note: If max_format_id
1149
is == DICT_TF_FORMAT_MAX + 1 then we only print a warning. */
1152
trx_sys_file_format_max_check(
1153
/*==========================*/
1154
/* out: DB_SUCCESS or error code */
1155
ulint max_format_id) /* in: max format id to check */
1159
/* Check the file format in the tablespace. Do not try to
1160
recover if the file format is not supported by the engine
1161
unless forced by the user. */
1162
format_id = trx_sys_file_format_max_read();
1163
if (format_id == ULINT_UNDEFINED) {
1164
/* Format ID was not set. Set it to minimum possible
1166
format_id = DICT_TF_FORMAT_51;
1169
ut_print_timestamp(stderr);
1171
" InnoDB: highest supported file format is %s.\n",
1172
trx_sys_file_format_id_to_name(DICT_TF_FORMAT_MAX));
1174
if (format_id > DICT_TF_FORMAT_MAX) {
1176
ut_a(format_id < FILE_FORMAT_NAME_N);
1178
ut_print_timestamp(stderr);
1180
" InnoDB: %s: the system tablespace is in a file "
1181
"format that this version doesn't support - %s\n",
1182
((max_format_id <= DICT_TF_FORMAT_MAX)
1183
? "Error" : "Warning"),
1184
trx_sys_file_format_id_to_name(format_id));
1186
if (max_format_id <= DICT_TF_FORMAT_MAX) {
1191
format_id = (format_id > max_format_id) ? format_id : max_format_id;
1193
/* We don't need a mutex here, as this function should only
1194
be called once at start up. */
1195
file_format_max.id = format_id;
1196
file_format_max.name = trx_sys_file_format_id_to_name(format_id);
1201
/*********************************************************************
1202
Set the file format id unconditionally except if it's already the
1206
trx_sys_file_format_max_set(
1207
/*========================*/
1208
/* out: TRUE if value updated */
1209
ulint format_id, /* in: file format id */
1210
const char** name) /* out: max file format name or
1211
NULL if not needed. */
1215
ut_a(format_id <= DICT_TF_FORMAT_MAX);
1217
mutex_enter(&file_format_max.mutex);
1219
/* Only update if not already same value. */
1220
if (format_id != file_format_max.id) {
1222
ret = trx_sys_file_format_max_write(format_id, name);
1225
mutex_exit(&file_format_max.mutex);
1230
/************************************************************************
1231
Tags the system table space with minimum format id if it has not been
1233
WARNING: This function is only called during the startup and AFTER the
1234
redo log application during recovery has finished. */
1237
trx_sys_file_format_tag_init(void)
1238
/*==============================*/
1242
format_id = trx_sys_file_format_max_read();
1244
/* If format_id is not set then set it to the minimum. */
1245
if (format_id == ULINT_UNDEFINED) {
1246
trx_sys_file_format_max_set(DICT_TF_FORMAT_51, NULL);
1250
/************************************************************************
1251
Update the file format tag in the system tablespace only if the given
1252
format id is greater than the known max id. */
1255
trx_sys_file_format_max_upgrade(
1256
/*============================*/
1257
/* out: TRUE if format_id was
1258
bigger than the known max id */
1259
const char** name, /* out: max file format name */
1260
ulint format_id) /* in: file format identifier */
1265
ut_a(file_format_max.name != NULL);
1266
ut_a(format_id <= DICT_TF_FORMAT_MAX);
1268
mutex_enter(&file_format_max.mutex);
1270
if (format_id > file_format_max.id) {
1272
ret = trx_sys_file_format_max_write(format_id, name);
1275
mutex_exit(&file_format_max.mutex);
1280
/*********************************************************************
1281
Get the name representation of the file format from its id. */
1284
trx_sys_file_format_max_get(void)
1285
/*=============================*/
1286
/* out: pointer to the max format name */
1288
return(file_format_max.name);
1291
/*********************************************************************
1292
Initializes the tablespace tag system. */
1295
trx_sys_file_format_init(void)
1296
/*==========================*/
1298
mutex_create(&file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
1300
/* We don't need a mutex here, as this function should only
1301
be called once at start up. */
1302
file_format_max.id = DICT_TF_FORMAT_51;
1304
file_format_max.name = trx_sys_file_format_id_to_name(
1305
file_format_max.id);
1308
/*********************************************************************
1309
Closes the tablespace tag system. */
1312
trx_sys_file_format_close(void)
1313
/*===========================*/
1315
/* Does nothing at the moment */