1
/******************************************************
6
Created 3/26/1996 Heikki Tuuri
7
*******************************************************/
21
#include "trx0purge.h"
25
/* The file format tag structure with id and name. */
26
struct file_format_struct {
27
uint id; /* id of the file format */
28
const char* name; /* text representation of the
30
mutex_t mutex; /* covers changes to the above
34
typedef struct file_format_struct file_format_t;
36
/* The transaction system */
37
UNIV_INTERN trx_sys_t* trx_sys = NULL;
38
UNIV_INTERN trx_doublewrite_t* trx_doublewrite = NULL;
40
/* The following is set to TRUE when we are upgrading from the old format data
41
files to the new >= 4.1.x format multiple tablespaces format data files */
43
UNIV_INTERN ibool trx_doublewrite_must_reset_space_ids = FALSE;
45
/* The following is TRUE when we are using the database in the new format,
46
i.e., we have successfully upgraded, or have created a new database
49
UNIV_INTERN ibool trx_sys_multiple_tablespace_format = FALSE;
51
/* In a MySQL replication slave, in crash recovery we store the master log
52
file name and position here. We have successfully got the updates to InnoDB
53
up to this position. If .._pos is -1, it means no crash recovery was needed,
54
or there was no master log position info inside InnoDB. */
56
UNIV_INTERN char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
57
UNIV_INTERN ib_int64_t trx_sys_mysql_master_log_pos = -1;
59
/* If this MySQL server uses binary logging, after InnoDB has been inited
60
and if it has done a crash recovery, we store the binlog file name and position
61
here. If .._pos is -1, it means there was no binlog position info inside
64
UNIV_INTERN char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
65
UNIV_INTERN ib_int64_t trx_sys_mysql_bin_log_pos = -1;
67
/* List of animal names representing file format. */
68
static const char* file_format_name_map[] = {
97
/* The number of elements in the file format name array. */
98
static const ulint FILE_FORMAT_NAME_N =
99
sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
101
/* This is used to track the maximum file format id known to InnoDB. It's
102
updated via SET GLOBAL innodb_file_format_check = 'x' or when we open
103
or create a table. */
104
static file_format_t file_format_max;
106
/********************************************************************
107
Determines if a page number is located inside the doublewrite buffer. */
110
trx_doublewrite_page_inside(
111
/*========================*/
112
/* out: TRUE if the location is inside
113
the two blocks of the doublewrite buffer */
114
ulint page_no) /* in: page number */
116
if (trx_doublewrite == NULL) {
121
if (page_no >= trx_doublewrite->block1
122
&& page_no < trx_doublewrite->block1
123
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
127
if (page_no >= trx_doublewrite->block2
128
&& page_no < trx_doublewrite->block2
129
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
136
/********************************************************************
137
Creates or initialializes the doublewrite buffer at a database start. */
140
trx_doublewrite_init(
141
/*=================*/
142
byte* doublewrite) /* in: pointer to the doublewrite buf
143
header on trx sys page */
145
trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
147
/* Since we now start to use the doublewrite buffer, no need to call
148
fsync() after every write to a data file */
150
os_do_not_call_flush_at_each_write = TRUE;
151
#endif /* UNIV_DO_FLUSH */
153
mutex_create(&trx_doublewrite->mutex, SYNC_DOUBLEWRITE);
155
trx_doublewrite->first_free = 0;
157
trx_doublewrite->block1 = mach_read_from_4(
158
doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
159
trx_doublewrite->block2 = mach_read_from_4(
160
doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
161
trx_doublewrite->write_buf_unaligned = ut_malloc(
162
(1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE);
164
trx_doublewrite->write_buf = ut_align(
165
trx_doublewrite->write_buf_unaligned, UNIV_PAGE_SIZE);
166
trx_doublewrite->buf_block_arr = mem_alloc(
167
2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*));
170
/********************************************************************
171
Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
172
multiple tablespace format. */
175
trx_sys_mark_upgraded_to_multiple_tablespaces(void)
176
/*===============================================*/
182
/* We upgraded to 4.1.x and reset the space id fields in the
183
doublewrite buffer. Let us mark to the trx_sys header that the upgrade
188
block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
190
#ifdef UNIV_SYNC_DEBUG
191
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
192
#endif /* UNIV_SYNC_DEBUG */
194
doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
196
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
197
TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
201
/* Flush the modified pages to disk and make a checkpoint */
202
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
204
trx_sys_multiple_tablespace_format = TRUE;
207
/********************************************************************
208
Creates the doublewrite buffer to a new InnoDB installation. The header of the
209
doublewrite buffer is placed on the trx system header page. */
212
trx_sys_create_doublewrite_buf(void)
213
/*================================*/
217
buf_block_t* new_block;
225
if (trx_doublewrite) {
234
block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
236
#ifdef UNIV_SYNC_DEBUG
237
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
238
#endif /* UNIV_SYNC_DEBUG */
240
doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
242
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
243
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
244
/* The doublewrite buffer has already been created:
245
just read in some numbers */
247
trx_doublewrite_init(doublewrite);
252
"InnoDB: Doublewrite buffer not found:"
255
if (buf_pool_get_curr_size()
256
< ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
257
+ FSP_EXTENT_SIZE / 2 + 100)
260
"InnoDB: Cannot create doublewrite buffer:"
262
"InnoDB: increase your buffer pool size.\n"
263
"InnoDB: Cannot continue operation.\n");
268
block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
270
+ TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
272
/* fseg_create acquires a second latch on the page,
273
therefore we must declare it: */
275
#ifdef UNIV_SYNC_DEBUG
276
buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
277
#endif /* UNIV_SYNC_DEBUG */
279
if (block2 == NULL) {
281
"InnoDB: Cannot create doublewrite buffer:"
283
"InnoDB: increase your tablespace size.\n"
284
"InnoDB: Cannot continue operation.\n");
286
/* We exit without committing the mtr to prevent
287
its modifications to the database getting to disk */
292
fseg_header = buf_block_get_frame(block)
293
+ TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
296
for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
297
+ FSP_EXTENT_SIZE / 2; i++) {
298
page_no = fseg_alloc_free_page(fseg_header,
301
if (page_no == FIL_NULL) {
303
"InnoDB: Cannot create doublewrite"
304
" buffer: you must\n"
305
"InnoDB: increase your"
306
" tablespace size.\n"
307
"InnoDB: Cannot continue operation.\n"
313
/* We read the allocated pages to the buffer pool;
314
when they are written to disk in a flush, the space
315
id and page number fields are also written to the
316
pages. When we at database startup read pages
317
from the doublewrite buffer, we know that if the
318
space id and page number in them are the same as
319
the page position in the tablespace, then the page
320
has not been written to in doublewrite. */
322
new_block = buf_page_get(TRX_SYS_SPACE, 0, page_no,
324
#ifdef UNIV_SYNC_DEBUG
325
buf_block_dbg_add_level(new_block,
326
SYNC_NO_ORDER_CHECK);
327
#endif /* UNIV_SYNC_DEBUG */
329
/* Make a dummy change to the page to ensure it will
330
be written to disk in a flush */
332
mlog_write_ulint(buf_block_get_frame(new_block)
334
TRX_SYS_DOUBLEWRITE_MAGIC_N,
337
if (i == FSP_EXTENT_SIZE / 2) {
338
mlog_write_ulint(doublewrite
339
+ TRX_SYS_DOUBLEWRITE_BLOCK1,
340
page_no, MLOG_4BYTES, &mtr);
341
mlog_write_ulint(doublewrite
342
+ TRX_SYS_DOUBLEWRITE_REPEAT
343
+ TRX_SYS_DOUBLEWRITE_BLOCK1,
344
page_no, MLOG_4BYTES, &mtr);
345
} else if (i == FSP_EXTENT_SIZE / 2
346
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
347
mlog_write_ulint(doublewrite
348
+ TRX_SYS_DOUBLEWRITE_BLOCK2,
349
page_no, MLOG_4BYTES, &mtr);
350
mlog_write_ulint(doublewrite
351
+ TRX_SYS_DOUBLEWRITE_REPEAT
352
+ TRX_SYS_DOUBLEWRITE_BLOCK2,
353
page_no, MLOG_4BYTES, &mtr);
354
} else if (i > FSP_EXTENT_SIZE / 2) {
355
ut_a(page_no == prev_page_no + 1);
358
prev_page_no = page_no;
361
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
362
TRX_SYS_DOUBLEWRITE_MAGIC_N,
364
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
365
+ TRX_SYS_DOUBLEWRITE_REPEAT,
366
TRX_SYS_DOUBLEWRITE_MAGIC_N,
369
mlog_write_ulint(doublewrite
370
+ TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
371
TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
375
/* Flush the modified pages to disk and make a checkpoint */
376
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
378
fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
380
trx_sys_multiple_tablespace_format = TRUE;
386
/********************************************************************
387
At a database startup initializes the doublewrite buffer memory structure if
388
we already have a doublewrite buffer created in the data files. If we are
389
upgrading to an InnoDB version which supports multiple tablespaces, then this
390
function performs the necessary update operations. If we are in a crash
391
recovery, this function uses a possible doublewrite buffer to restore
392
half-written pages in the data files. */
395
trx_sys_doublewrite_init_or_restore_pages(
396
/*======================================*/
397
ibool restore_corrupt_pages)
401
byte* unaligned_read_buf;
404
ulint source_page_no;
411
/* We do the file i/o past the buffer pool */
413
unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
414
read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE);
416
/* Read the trx sys header to check if we are using the doublewrite
419
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
420
UNIV_PAGE_SIZE, read_buf, NULL);
421
doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
423
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
424
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
425
/* The doublewrite buffer has been created */
427
trx_doublewrite_init(doublewrite);
429
block1 = trx_doublewrite->block1;
430
block2 = trx_doublewrite->block2;
432
buf = trx_doublewrite->write_buf;
437
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
438
!= TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
440
/* We are upgrading from a version < 4.1.x to a version where
441
multiple tablespaces are supported. We must reset the space id
442
field in the pages in the doublewrite buffer because starting
443
from this version the space id is stored to
444
FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
446
trx_doublewrite_must_reset_space_ids = TRUE;
449
"InnoDB: Resetting space id's in the"
450
" doublewrite buffer\n");
452
trx_sys_multiple_tablespace_format = TRUE;
455
/* Read the pages from the doublewrite buffer to memory */
457
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
458
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
460
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
461
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
462
buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
464
/* Check if any of these pages is half-written in data files, in the
469
for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
471
page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
473
if (trx_doublewrite_must_reset_space_ids) {
477
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0);
478
/* We do not need to calculate new checksums for the
479
pages because the field .._SPACE_ID does not affect
480
them. Write the page back to where we read it from. */
482
if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
483
source_page_no = block1 + i;
485
source_page_no = block2
486
+ i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
489
fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
490
UNIV_PAGE_SIZE, page, NULL);
491
/* printf("Resetting space id in page %lu\n",
494
space_id = mach_read_from_4(
495
page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
498
if (!restore_corrupt_pages) {
499
/* The database was shut down gracefully: no need to
502
} else if (!fil_tablespace_exists_in_mem(space_id)) {
503
/* Maybe we have dropped the single-table tablespace
504
and this page once belonged to it: do nothing */
506
} else if (!fil_check_adress_in_tablespace(space_id,
509
"InnoDB: Warning: a page in the"
510
" doublewrite buffer is not within space\n"
511
"InnoDB: bounds; space id %lu"
512
" page number %lu, page %lu in"
513
" doublewrite buf.\n",
514
(ulong) space_id, (ulong) page_no, (ulong) i);
516
} else if (space_id == TRX_SYS_SPACE
517
&& ((page_no >= block1
519
< block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
520
|| (page_no >= block2
523
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) {
525
/* It is an unwritten doublewrite buffer page:
528
ulint zip_size = fil_space_get_zip_size(space_id);
530
/* Read in the actual page from the file */
531
fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
533
zip_size ? zip_size : UNIV_PAGE_SIZE,
536
/* Check if the page is corrupt */
539
(buf_page_is_corrupted(read_buf, zip_size))) {
542
"InnoDB: Warning: database page"
543
" corruption or a failed\n"
544
"InnoDB: file read of"
545
" space %lu page %lu.\n"
546
"InnoDB: Trying to recover it from"
547
" the doublewrite buffer.\n",
548
(ulong) space_id, (ulong) page_no);
550
if (buf_page_is_corrupted(page, zip_size)) {
552
"InnoDB: Dump of the page:\n");
553
buf_page_print(read_buf, zip_size);
556
" corresponding page"
557
" in doublewrite buffer:\n");
558
buf_page_print(page, zip_size);
561
"InnoDB: Also the page in the"
562
" doublewrite buffer"
564
"InnoDB: Cannot continue"
566
"InnoDB: You can try to"
567
" recover the database"
570
"InnoDB: set-variable="
571
"innodb_force_recovery=6\n");
575
/* Write the good page from the
576
doublewrite buffer to the intended
579
fil_io(OS_FILE_WRITE, TRUE, space_id,
580
zip_size, page_no, 0,
581
zip_size ? zip_size : UNIV_PAGE_SIZE,
584
"InnoDB: Recovered the page from"
585
" the doublewrite buffer.\n");
589
page += UNIV_PAGE_SIZE;
592
fil_flush_file_spaces(FIL_TABLESPACE);
595
ut_free(unaligned_read_buf);
598
/********************************************************************
599
Checks that trx is in the trx list. */
604
/* out: TRUE if is in */
605
trx_t* in_trx) /* in: trx */
609
ut_ad(mutex_own(&(kernel_mutex)));
611
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
613
while (trx != NULL) {
620
trx = UT_LIST_GET_NEXT(trx_list, trx);
626
/*********************************************************************
627
Writes the value of max_trx_id to the file based trx system header. */
630
trx_sys_flush_max_trx_id(void)
631
/*==========================*/
633
trx_sysf_t* sys_header;
636
ut_ad(mutex_own(&kernel_mutex));
640
sys_header = trx_sysf_get(&mtr);
642
mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
643
trx_sys->max_trx_id, &mtr);
647
/*********************************************************************
648
Updates the offset information about the end of the MySQL binlog entry
649
which corresponds to the transaction just being committed. In a MySQL
650
replication slave updates the latest master binlog position up to which
651
replication has proceeded. */
654
trx_sys_update_mysql_binlog_offset(
655
/*===============================*/
656
const char* file_name,/* in: MySQL log file name */
657
ib_int64_t offset, /* in: position in that log file */
658
ulint field, /* in: offset of the MySQL log info field in
659
the trx sys header */
660
mtr_t* mtr) /* in: mtr */
662
trx_sysf_t* sys_header;
664
if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
666
/* We cannot fit the name to the 512 bytes we have reserved */
671
sys_header = trx_sysf_get(mtr);
673
if (mach_read_from_4(sys_header + field
674
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
675
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
677
mlog_write_ulint(sys_header + field
678
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
679
TRX_SYS_MYSQL_LOG_MAGIC_N,
683
if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
686
mlog_write_string(sys_header + field
687
+ TRX_SYS_MYSQL_LOG_NAME,
688
(byte*) file_name, 1 + ut_strlen(file_name),
692
if (mach_read_from_4(sys_header + field
693
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
694
|| (offset >> 32) > 0) {
696
mlog_write_ulint(sys_header + field
697
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
698
(ulint)(offset >> 32),
702
mlog_write_ulint(sys_header + field
703
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW,
704
(ulint)(offset & 0xFFFFFFFFUL),
708
#ifdef UNIV_HOTBACKUP
709
/*********************************************************************
710
Prints to stderr the MySQL binlog info in the system header if the
711
magic number shows it valid. */
714
trx_sys_print_mysql_binlog_offset_from_page(
715
/*========================================*/
716
const byte* page) /* in: buffer containing the trx
717
system header page, i.e., page number
718
TRX_SYS_PAGE_NO in the tablespace */
720
const trx_sysf_t* sys_header;
722
sys_header = page + TRX_SYS;
724
if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
725
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
726
== TRX_SYS_MYSQL_LOG_MAGIC_N) {
729
"ibbackup: Last MySQL binlog file position %lu %lu,"
731
(ulong) mach_read_from_4(
732
sys_header + TRX_SYS_MYSQL_LOG_INFO
733
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
734
(ulong) mach_read_from_4(
735
sys_header + TRX_SYS_MYSQL_LOG_INFO
736
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
737
sys_header + TRX_SYS_MYSQL_LOG_INFO
738
+ TRX_SYS_MYSQL_LOG_NAME);
741
#endif /* UNIV_HOTBACKUP */
743
/*********************************************************************
744
Stores the MySQL binlog offset info in the trx system header if
745
the magic number shows it valid, and print the info to stderr */
748
trx_sys_print_mysql_binlog_offset(void)
749
/*===================================*/
751
trx_sysf_t* sys_header;
753
ulint trx_sys_mysql_bin_log_pos_high;
754
ulint trx_sys_mysql_bin_log_pos_low;
758
sys_header = trx_sysf_get(&mtr);
760
if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
761
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
762
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
769
trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
770
sys_header + TRX_SYS_MYSQL_LOG_INFO
771
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
772
trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
773
sys_header + TRX_SYS_MYSQL_LOG_INFO
774
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW);
776
trx_sys_mysql_bin_log_pos
777
= (((ib_int64_t)trx_sys_mysql_bin_log_pos_high) << 32)
778
+ (ib_int64_t)trx_sys_mysql_bin_log_pos_low;
780
ut_memcpy(trx_sys_mysql_bin_log_name,
781
sys_header + TRX_SYS_MYSQL_LOG_INFO
782
+ TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
785
"InnoDB: Last MySQL binlog file position %lu %lu,"
787
trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
788
trx_sys_mysql_bin_log_name);
793
/*********************************************************************
794
Prints to stderr the MySQL master log offset info in the trx system header if
795
the magic number shows it valid. */
798
trx_sys_print_mysql_master_log_pos(void)
799
/*====================================*/
801
trx_sysf_t* sys_header;
806
sys_header = trx_sysf_get(&mtr);
808
if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
809
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
810
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
818
"InnoDB: In a MySQL replication slave the last"
819
" master binlog file\n"
820
"InnoDB: position %lu %lu, file name %s\n",
821
(ulong) mach_read_from_4(sys_header
822
+ TRX_SYS_MYSQL_MASTER_LOG_INFO
823
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
824
(ulong) mach_read_from_4(sys_header
825
+ TRX_SYS_MYSQL_MASTER_LOG_INFO
826
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
827
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
828
+ TRX_SYS_MYSQL_LOG_NAME);
829
/* Copy the master log position info to global variables we can
830
use in ha_innobase.cc to initialize glob_mi to right values */
832
ut_memcpy(trx_sys_mysql_master_log_name,
833
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
834
+ TRX_SYS_MYSQL_LOG_NAME,
835
TRX_SYS_MYSQL_LOG_NAME_LEN);
837
trx_sys_mysql_master_log_pos
838
= (((ib_int64_t) mach_read_from_4(
839
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
840
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
841
+ ((ib_int64_t) mach_read_from_4(
842
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
843
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW));
847
/********************************************************************
848
Looks for a free slot for a rollback segment in the trx system file copy. */
851
trx_sysf_rseg_find_free(
852
/*====================*/
853
/* out: slot index or ULINT_UNDEFINED if not found */
854
mtr_t* mtr) /* in: mtr */
856
trx_sysf_t* sys_header;
860
ut_ad(mutex_own(&(kernel_mutex)));
862
sys_header = trx_sysf_get(mtr);
864
for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
866
page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
868
if (page_no == FIL_NULL) {
874
return(ULINT_UNDEFINED);
877
/*********************************************************************
878
Creates the file page for the transaction system. This function is called only
879
at the database creation, before trx_sys_init. */
884
mtr_t* mtr) /* in: mtr */
886
trx_sysf_t* sys_header;
895
/* Note that below we first reserve the file space x-latch, and
896
then enter the kernel: we must do it in this order to conform
897
to the latching order rules. */
899
mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
900
mutex_enter(&kernel_mutex);
902
/* Create the trx sys file block in a new allocated file segment */
903
block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
905
#ifdef UNIV_SYNC_DEBUG
906
buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
907
#endif /* UNIV_SYNC_DEBUG */
908
ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
910
page = buf_block_get_frame(block);
912
mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
915
/* Reset the doublewrite buffer magic number to zero so that we
916
know that the doublewrite buffer has not yet been created (this
917
suppresses a Valgrind warning) */
919
mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
920
+ TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
922
sys_header = trx_sysf_get(mtr);
924
/* Start counting transaction ids from number 1 up */
925
mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
926
ut_dulint_create(0, 1), mtr);
928
/* Reset the rollback segment slots */
929
for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
931
trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
932
trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
935
/* The remaining area (up to the page trailer) is uninitialized.
936
Silence Valgrind warnings about it. */
937
UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
938
+ TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
939
+ TRX_SYS_RSEG_SPACE),
940
(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
942
+ TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
943
+ TRX_SYS_RSEG_SPACE))
944
+ page - sys_header);
946
/* Create the first rollback segment in the SYSTEM tablespace */
947
page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, &slot_no,
949
ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
950
ut_a(page_no != FIL_NULL);
952
mutex_exit(&kernel_mutex);
955
/*********************************************************************
956
Creates and initializes the central memory structures for the transaction
957
system. This is called when the database is started. */
960
trx_sys_init_at_db_start(void)
961
/*==========================*/
963
trx_sysf_t* sys_header;
964
ib_int64_t rows_to_undo = 0;
965
const char* unit = "";
971
ut_ad(trx_sys == NULL);
973
mutex_enter(&kernel_mutex);
975
trx_sys = mem_alloc(sizeof(trx_sys_t));
977
sys_header = trx_sysf_get(&mtr);
979
trx_rseg_list_and_array_init(sys_header, &mtr);
981
trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
983
/* VERY important: after the database is started, max_trx_id value is
984
divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
985
trx_sys_get_new_trx_id will evaluate to TRUE when the function
986
is first time called, and the value for trx id will be written
987
to the disk-based header! Thus trx id values will not overlap when
988
the database is repeatedly started! */
990
trx_sys->max_trx_id = ut_dulint_add(
991
ut_dulint_align_up(mtr_read_dulint(
993
+ TRX_SYS_TRX_ID_STORE, &mtr),
994
TRX_SYS_TRX_ID_WRITE_MARGIN),
995
2 * TRX_SYS_TRX_ID_WRITE_MARGIN);
997
UT_LIST_INIT(trx_sys->mysql_trx_list);
998
trx_dummy_sess = sess_open();
999
trx_lists_init_at_db_start();
1001
if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
1002
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
1006
if ( trx->conc_state != TRX_PREPARED) {
1007
rows_to_undo += ut_conv_dulint_to_longlong(
1011
trx = UT_LIST_GET_NEXT(trx_list, trx);
1018
if (rows_to_undo > 1000000000) {
1020
rows_to_undo = rows_to_undo / 1000000;
1024
"InnoDB: %lu transaction(s) which must be"
1025
" rolled back or cleaned up\n"
1026
"InnoDB: in total %lu%s row operations to undo\n",
1027
(ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
1028
(ulong) rows_to_undo, unit);
1030
fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
1031
TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
1034
UT_LIST_INIT(trx_sys->view_list);
1036
trx_purge_sys_create();
1038
mutex_exit(&kernel_mutex);
1043
/*********************************************************************
1044
Creates and initializes the transaction system at the database creation. */
1047
trx_sys_create(void)
1048
/*================*/
1054
trx_sysf_create(&mtr);
1058
trx_sys_init_at_db_start();
1061
/*********************************************************************
1062
Update the file format tag. */
1065
trx_sys_file_format_max_write(
1066
/*==========================*/
1067
/* out: always TRUE */
1068
ulint format_id, /* in: file format id */
1069
char** name) /* out: max file format name, can
1075
ulint tag_value_low;
1079
block = buf_page_get(
1080
TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
1082
file_format_max.id = format_id;
1083
file_format_max.name = trx_sys_file_format_id_to_name(format_id);
1085
ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
1086
tag_value_low = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
1089
*name = (char*) file_format_max.name;
1094
ut_dulint_create(TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH,
1103
/*********************************************************************
1104
Read the file format tag. */
1107
trx_sys_file_format_max_read(void)
1108
/*==============================*/
1109
/* out: the file format */
1113
const buf_block_t* block;
1115
dulint file_format_id;
1117
/* Since this is called during the startup phase it's safe to
1118
read the value without a covering mutex. */
1121
block = buf_page_get(
1122
TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
1124
ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
1125
file_format_id = mach_read_from_8(ptr);
1129
format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
1131
if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH
1132
|| format_id >= FILE_FORMAT_NAME_N) {
1134
/* Either it has never been tagged, or garbage in it.
1135
Reset the tag in either case. */
1136
format_id = DICT_TF_FORMAT_51;
1137
trx_sys_file_format_max_write(format_id, NULL);
1143
/*********************************************************************
1144
Get the name representation of the file format from its id. */
1147
trx_sys_file_format_id_to_name(
1148
/*===========================*/
1149
/* out: pointer to the name */
1150
const uint id) /* in: id of the file format */
1152
ut_a(id < FILE_FORMAT_NAME_N);
1154
return(file_format_name_map[id]);
1157
/*********************************************************************
1158
Check for the max file format tag stored on disk. Note: If max_format_id
1159
is == DICT_TF_FORMAT_MAX + 1 then we only print a warning. */
1162
trx_sys_file_format_max_check(
1163
/*==========================*/
1164
/* out: DB_SUCCESS or error code */
1165
ulint max_format_id) /* in: max format id to check */
1169
/* Check the file format in the tablespace. Do not try to
1170
recover if the file format is not supported by the engine
1171
unless forced by the user. */
1172
format_id = trx_sys_file_format_max_read();
1174
ut_print_timestamp(stderr);
1176
" InnoDB: highest supported file format is %s.\n",
1177
trx_sys_file_format_id_to_name(DICT_TF_FORMAT_MAX));
1179
if (format_id > DICT_TF_FORMAT_MAX) {
1181
ut_a(format_id < FILE_FORMAT_NAME_N);
1183
ut_print_timestamp(stderr);
1185
" InnoDB: %s: the system tablespace is in a file "
1186
"format that this version doesn't support - %s\n",
1187
((max_format_id <= DICT_TF_FORMAT_MAX)
1188
? "Error" : "Warning"),
1189
trx_sys_file_format_id_to_name(format_id));
1191
if (max_format_id <= DICT_TF_FORMAT_MAX) {
1196
format_id = (format_id > max_format_id) ? format_id : max_format_id;
1198
/* We don't need a mutex here, as this function should only
1199
be called once at start up. */
1200
file_format_max.id = format_id;
1201
file_format_max.name = trx_sys_file_format_id_to_name(format_id);
1206
/*********************************************************************
1207
Set the file format id unconditionally except if it's already the
1211
trx_sys_file_format_max_set(
1212
/*========================*/
1213
/* out: TRUE if value updated */
1214
ulint format_id, /* in: file format id */
1215
char** name) /* out: max file format name */
1220
ut_a(format_id <= DICT_TF_FORMAT_MAX);
1222
mutex_enter(&file_format_max.mutex);
1224
/* Only update if not already same value. */
1225
if (format_id != file_format_max.id) {
1227
ret = trx_sys_file_format_max_write(format_id, name);
1230
mutex_exit(&file_format_max.mutex);
1235
/************************************************************************
1236
Update the file format tag in the tablespace only if the given format id
1237
is greater than the known max id. */
1240
trx_sys_file_format_max_update(
1241
/*===========================*/
1242
uint flags, /* in: flags of the table.*/
1243
char** name) /* out: max file format name */
1248
format_id = (flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT;
1251
ut_a(file_format_max.name != NULL);
1252
ut_a(format_id <= DICT_TF_FORMAT_MAX);
1254
mutex_enter(&file_format_max.mutex);
1256
if (format_id > file_format_max.id) {
1258
ret = trx_sys_file_format_max_write(format_id, name);
1261
mutex_exit(&file_format_max.mutex);
1266
/*********************************************************************
1267
Get the name representation of the file format from its id. */
1270
trx_sys_file_format_max_get(void)
1271
/*=============================*/
1272
/* out: pointer to the max format name */
1274
return(file_format_max.name);
1277
/*********************************************************************
1278
Initializes the tablespace tag system. */
1281
trx_sys_file_format_init(void)
1282
/*==========================*/
1284
mutex_create(&file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
1286
/* We don't need a mutex here, as this function should only
1287
be called once at start up. */
1288
file_format_max.id = DICT_TF_FORMAT_51;
1290
file_format_max.name = trx_sys_file_format_id_to_name(
1291
file_format_max.id);
1294
/*********************************************************************
1295
Closes the tablespace tag system. */
1298
trx_sys_file_format_close(void)
1299
/*===========================*/
1301
/* Does nothing at the moment */