~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/trx/trx0sys.c

Tags: innodb-plugin-1.0.1
Imported 1.0.1 with clean - with no changes.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/******************************************************
 
2
Transaction system
 
3
 
 
4
(c) 1996 Innobase Oy
 
5
 
 
6
Created 3/26/1996 Heikki Tuuri
 
7
*******************************************************/
 
8
 
 
9
#include "trx0sys.h"
 
10
 
 
11
#ifdef UNIV_NONINL
 
12
#include "trx0sys.ic"
 
13
#endif
 
14
 
 
15
#include "fsp0fsp.h"
 
16
#include "mtr0mtr.h"
 
17
#include "trx0trx.h"
 
18
#include "trx0rseg.h"
 
19
#include "trx0undo.h"
 
20
#include "srv0srv.h"
 
21
#include "trx0purge.h"
 
22
#include "log0log.h"
 
23
#include "os0file.h"
 
24
 
 
25
/* The file format tag structure with id and name. */
 
26
struct file_format_struct {
 
27
        uint            id;             /* id of the file format */
 
28
        const char*     name;           /* text representation of the
 
29
                                        file format */
 
30
        mutex_t         mutex;          /* covers changes to the above
 
31
                                        fields */
 
32
};
 
33
 
 
34
typedef struct file_format_struct       file_format_t;
 
35
 
 
36
/* The transaction system */
 
37
UNIV_INTERN trx_sys_t*          trx_sys         = NULL;
 
38
UNIV_INTERN trx_doublewrite_t*  trx_doublewrite = NULL;
 
39
 
 
40
/* The following is set to TRUE when we are upgrading from the old format data
 
41
files to the new >= 4.1.x format multiple tablespaces format data files */
 
42
 
 
43
UNIV_INTERN ibool       trx_doublewrite_must_reset_space_ids    = FALSE;
 
44
 
 
45
/* The following is TRUE when we are using the database in the new format,
 
46
i.e., we have successfully upgraded, or have created a new database
 
47
installation */
 
48
 
 
49
UNIV_INTERN ibool       trx_sys_multiple_tablespace_format      = FALSE;
 
50
 
 
51
/* In a MySQL replication slave, in crash recovery we store the master log
 
52
file name and position here. We have successfully got the updates to InnoDB
 
53
up to this position. If .._pos is -1, it means no crash recovery was needed,
 
54
or there was no master log position info inside InnoDB. */
 
55
 
 
56
UNIV_INTERN char        trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
 
57
UNIV_INTERN ib_int64_t  trx_sys_mysql_master_log_pos    = -1;
 
58
 
 
59
/* If this MySQL server uses binary logging, after InnoDB has been inited
 
60
and if it has done a crash recovery, we store the binlog file name and position
 
61
here. If .._pos is -1, it means there was no binlog position info inside
 
62
InnoDB. */
 
63
 
 
64
UNIV_INTERN char        trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
 
65
UNIV_INTERN ib_int64_t  trx_sys_mysql_bin_log_pos       = -1;
 
66
 
 
67
/* List of animal names representing file format. */
 
68
static const char*      file_format_name_map[] = {
 
69
        "Antelope",
 
70
        "Barracuda",
 
71
        "Cheetah",
 
72
        "Dragon",
 
73
        "Elk",
 
74
        "Fox",
 
75
        "Gazelle",
 
76
        "Hornet",
 
77
        "Impala",
 
78
        "Jaguar",
 
79
        "Kangaroo",
 
80
        "Leopard",
 
81
        "Moose",
 
82
        "Nautilus",
 
83
        "Ocelot",
 
84
        "Porpoise",
 
85
        "Quail",
 
86
        "Rabbit",
 
87
        "Shark",
 
88
        "Tiger",
 
89
        "Urchin",
 
90
        "Viper",
 
91
        "Whale",
 
92
        "Xenops",
 
93
        "Yak",
 
94
        "Zebra"
 
95
};
 
96
 
 
97
/* The number of elements in the file format name array. */
 
98
static const ulint      FILE_FORMAT_NAME_N = 
 
99
        sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
 
100
 
 
101
/* This is used to track the maximum file format id known to InnoDB. It's
 
102
updated via SET GLOBAL innodb_file_format_check = 'x' or when we open
 
103
or create a table. */
 
104
static  file_format_t   file_format_max;
 
105
 
 
106
/********************************************************************
 
107
Determines if a page number is located inside the doublewrite buffer. */
 
108
UNIV_INTERN
 
109
ibool
 
110
trx_doublewrite_page_inside(
 
111
/*========================*/
 
112
                                /* out: TRUE if the location is inside
 
113
                                the two blocks of the doublewrite buffer */
 
114
        ulint   page_no)        /* in: page number */
 
115
{
 
116
        if (trx_doublewrite == NULL) {
 
117
 
 
118
                return(FALSE);
 
119
        }
 
120
 
 
121
        if (page_no >= trx_doublewrite->block1
 
122
            && page_no < trx_doublewrite->block1
 
123
            + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 
124
                return(TRUE);
 
125
        }
 
126
 
 
127
        if (page_no >= trx_doublewrite->block2
 
128
            && page_no < trx_doublewrite->block2
 
129
            + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 
130
                return(TRUE);
 
131
        }
 
132
 
 
133
        return(FALSE);
 
134
}
 
135
 
 
136
/********************************************************************
 
137
Creates or initialializes the doublewrite buffer at a database start. */
 
138
static
 
139
void
 
140
trx_doublewrite_init(
 
141
/*=================*/
 
142
        byte*   doublewrite)    /* in: pointer to the doublewrite buf
 
143
                                header on trx sys page */
 
144
{
 
145
        trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
 
146
 
 
147
        /* Since we now start to use the doublewrite buffer, no need to call
 
148
        fsync() after every write to a data file */
 
149
#ifdef UNIV_DO_FLUSH
 
150
        os_do_not_call_flush_at_each_write = TRUE;
 
151
#endif /* UNIV_DO_FLUSH */
 
152
 
 
153
        mutex_create(&trx_doublewrite->mutex, SYNC_DOUBLEWRITE);
 
154
 
 
155
        trx_doublewrite->first_free = 0;
 
156
 
 
157
        trx_doublewrite->block1 = mach_read_from_4(
 
158
                doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
 
159
        trx_doublewrite->block2 = mach_read_from_4(
 
160
                doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
 
161
        trx_doublewrite->write_buf_unaligned = ut_malloc(
 
162
                (1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE);
 
163
 
 
164
        trx_doublewrite->write_buf = ut_align(
 
165
                trx_doublewrite->write_buf_unaligned, UNIV_PAGE_SIZE);
 
166
        trx_doublewrite->buf_block_arr = mem_alloc(
 
167
                2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*));
 
168
}
 
169
 
 
170
/********************************************************************
 
171
Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
 
172
multiple tablespace format. */
 
173
UNIV_INTERN
 
174
void
 
175
trx_sys_mark_upgraded_to_multiple_tablespaces(void)
 
176
/*===============================================*/
 
177
{
 
178
        buf_block_t*    block;
 
179
        byte*           doublewrite;
 
180
        mtr_t           mtr;
 
181
 
 
182
        /* We upgraded to 4.1.x and reset the space id fields in the
 
183
        doublewrite buffer. Let us mark to the trx_sys header that the upgrade
 
184
        has been done. */
 
185
 
 
186
        mtr_start(&mtr);
 
187
 
 
188
        block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
 
189
                             RW_X_LATCH, &mtr);
 
190
#ifdef UNIV_SYNC_DEBUG
 
191
        buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
192
#endif /* UNIV_SYNC_DEBUG */
 
193
 
 
194
        doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
 
195
 
 
196
        mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
 
197
                         TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
 
198
                         MLOG_4BYTES, &mtr);
 
199
        mtr_commit(&mtr);
 
200
 
 
201
        /* Flush the modified pages to disk and make a checkpoint */
 
202
        log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
203
 
 
204
        trx_sys_multiple_tablespace_format = TRUE;
 
205
}
 
206
 
 
207
/********************************************************************
 
208
Creates the doublewrite buffer to a new InnoDB installation. The header of the
 
209
doublewrite buffer is placed on the trx system header page. */
 
210
UNIV_INTERN
 
211
void
 
212
trx_sys_create_doublewrite_buf(void)
 
213
/*================================*/
 
214
{
 
215
        buf_block_t*    block;
 
216
        buf_block_t*    block2;
 
217
        buf_block_t*    new_block;
 
218
        byte*   doublewrite;
 
219
        byte*   fseg_header;
 
220
        ulint   page_no;
 
221
        ulint   prev_page_no;
 
222
        ulint   i;
 
223
        mtr_t   mtr;
 
224
 
 
225
        if (trx_doublewrite) {
 
226
                /* Already inited */
 
227
 
 
228
                return;
 
229
        }
 
230
 
 
231
start_again:
 
232
        mtr_start(&mtr);
 
233
 
 
234
        block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
 
235
                             RW_X_LATCH, &mtr);
 
236
#ifdef UNIV_SYNC_DEBUG
 
237
        buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
238
#endif /* UNIV_SYNC_DEBUG */
 
239
 
 
240
        doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
 
241
 
 
242
        if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
 
243
            == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
 
244
                /* The doublewrite buffer has already been created:
 
245
                just read in some numbers */
 
246
 
 
247
                trx_doublewrite_init(doublewrite);
 
248
 
 
249
                mtr_commit(&mtr);
 
250
        } else {
 
251
                fprintf(stderr,
 
252
                        "InnoDB: Doublewrite buffer not found:"
 
253
                        " creating new\n");
 
254
 
 
255
                if (buf_pool_get_curr_size()
 
256
                    < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
 
257
                        + FSP_EXTENT_SIZE / 2 + 100)
 
258
                       * UNIV_PAGE_SIZE)) {
 
259
                        fprintf(stderr,
 
260
                                "InnoDB: Cannot create doublewrite buffer:"
 
261
                                " you must\n"
 
262
                                "InnoDB: increase your buffer pool size.\n"
 
263
                                "InnoDB: Cannot continue operation.\n");
 
264
 
 
265
                        exit(1);
 
266
                }
 
267
 
 
268
                block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
 
269
                                     TRX_SYS_DOUBLEWRITE
 
270
                                     + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
 
271
 
 
272
                /* fseg_create acquires a second latch on the page,
 
273
                therefore we must declare it: */
 
274
 
 
275
#ifdef UNIV_SYNC_DEBUG
 
276
                buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
 
277
#endif /* UNIV_SYNC_DEBUG */
 
278
 
 
279
                if (block2 == NULL) {
 
280
                        fprintf(stderr,
 
281
                                "InnoDB: Cannot create doublewrite buffer:"
 
282
                                " you must\n"
 
283
                                "InnoDB: increase your tablespace size.\n"
 
284
                                "InnoDB: Cannot continue operation.\n");
 
285
 
 
286
                        /* We exit without committing the mtr to prevent
 
287
                        its modifications to the database getting to disk */
 
288
 
 
289
                        exit(1);
 
290
                }
 
291
 
 
292
                fseg_header = buf_block_get_frame(block)
 
293
                        + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
 
294
                prev_page_no = 0;
 
295
 
 
296
                for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
 
297
                             + FSP_EXTENT_SIZE / 2; i++) {
 
298
                        page_no = fseg_alloc_free_page(fseg_header,
 
299
                                                       prev_page_no + 1,
 
300
                                                       FSP_UP, &mtr);
 
301
                        if (page_no == FIL_NULL) {
 
302
                                fprintf(stderr,
 
303
                                        "InnoDB: Cannot create doublewrite"
 
304
                                        " buffer: you must\n"
 
305
                                        "InnoDB: increase your"
 
306
                                        " tablespace size.\n"
 
307
                                        "InnoDB: Cannot continue operation.\n"
 
308
                                        );
 
309
 
 
310
                                exit(1);
 
311
                        }
 
312
 
 
313
                        /* We read the allocated pages to the buffer pool;
 
314
                        when they are written to disk in a flush, the space
 
315
                        id and page number fields are also written to the
 
316
                        pages. When we at database startup read pages
 
317
                        from the doublewrite buffer, we know that if the
 
318
                        space id and page number in them are the same as
 
319
                        the page position in the tablespace, then the page
 
320
                        has not been written to in doublewrite. */
 
321
 
 
322
                        new_block = buf_page_get(TRX_SYS_SPACE, 0, page_no,
 
323
                                                 RW_X_LATCH, &mtr);
 
324
#ifdef UNIV_SYNC_DEBUG
 
325
                        buf_block_dbg_add_level(new_block,
 
326
                                                SYNC_NO_ORDER_CHECK);
 
327
#endif /* UNIV_SYNC_DEBUG */
 
328
 
 
329
                        /* Make a dummy change to the page to ensure it will
 
330
                        be written to disk in a flush */
 
331
 
 
332
                        mlog_write_ulint(buf_block_get_frame(new_block)
 
333
                                         + FIL_PAGE_DATA,
 
334
                                         TRX_SYS_DOUBLEWRITE_MAGIC_N,
 
335
                                         MLOG_4BYTES, &mtr);
 
336
 
 
337
                        if (i == FSP_EXTENT_SIZE / 2) {
 
338
                                mlog_write_ulint(doublewrite
 
339
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK1,
 
340
                                                 page_no, MLOG_4BYTES, &mtr);
 
341
                                mlog_write_ulint(doublewrite
 
342
                                                 + TRX_SYS_DOUBLEWRITE_REPEAT
 
343
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK1,
 
344
                                                 page_no, MLOG_4BYTES, &mtr);
 
345
                        } else if (i == FSP_EXTENT_SIZE / 2
 
346
                                   + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 
347
                                mlog_write_ulint(doublewrite
 
348
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK2,
 
349
                                                 page_no, MLOG_4BYTES, &mtr);
 
350
                                mlog_write_ulint(doublewrite
 
351
                                                 + TRX_SYS_DOUBLEWRITE_REPEAT
 
352
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK2,
 
353
                                                 page_no, MLOG_4BYTES, &mtr);
 
354
                        } else if (i > FSP_EXTENT_SIZE / 2) {
 
355
                                ut_a(page_no == prev_page_no + 1);
 
356
                        }
 
357
 
 
358
                        prev_page_no = page_no;
 
359
                }
 
360
 
 
361
                mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
 
362
                                 TRX_SYS_DOUBLEWRITE_MAGIC_N,
 
363
                                 MLOG_4BYTES, &mtr);
 
364
                mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
 
365
                                 + TRX_SYS_DOUBLEWRITE_REPEAT,
 
366
                                 TRX_SYS_DOUBLEWRITE_MAGIC_N,
 
367
                                 MLOG_4BYTES, &mtr);
 
368
 
 
369
                mlog_write_ulint(doublewrite
 
370
                                 + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
 
371
                                 TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
 
372
                                 MLOG_4BYTES, &mtr);
 
373
                mtr_commit(&mtr);
 
374
 
 
375
                /* Flush the modified pages to disk and make a checkpoint */
 
376
                log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
377
 
 
378
                fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
 
379
 
 
380
                trx_sys_multiple_tablespace_format = TRUE;
 
381
 
 
382
                goto start_again;
 
383
        }
 
384
}
 
385
 
 
386
/********************************************************************
 
387
At a database startup initializes the doublewrite buffer memory structure if
 
388
we already have a doublewrite buffer created in the data files. If we are
 
389
upgrading to an InnoDB version which supports multiple tablespaces, then this
 
390
function performs the necessary update operations. If we are in a crash
 
391
recovery, this function uses a possible doublewrite buffer to restore
 
392
half-written pages in the data files. */
 
393
UNIV_INTERN
 
394
void
 
395
trx_sys_doublewrite_init_or_restore_pages(
 
396
/*======================================*/
 
397
        ibool   restore_corrupt_pages)
 
398
{
 
399
        byte*   buf;
 
400
        byte*   read_buf;
 
401
        byte*   unaligned_read_buf;
 
402
        ulint   block1;
 
403
        ulint   block2;
 
404
        ulint   source_page_no;
 
405
        byte*   page;
 
406
        byte*   doublewrite;
 
407
        ulint   space_id;
 
408
        ulint   page_no;
 
409
        ulint   i;
 
410
 
 
411
        /* We do the file i/o past the buffer pool */
 
412
 
 
413
        unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
 
414
        read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE);
 
415
 
 
416
        /* Read the trx sys header to check if we are using the doublewrite
 
417
        buffer */
 
418
 
 
419
        fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
 
420
               UNIV_PAGE_SIZE, read_buf, NULL);
 
421
        doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
 
422
 
 
423
        if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
 
424
            == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
 
425
                /* The doublewrite buffer has been created */
 
426
 
 
427
                trx_doublewrite_init(doublewrite);
 
428
 
 
429
                block1 = trx_doublewrite->block1;
 
430
                block2 = trx_doublewrite->block2;
 
431
 
 
432
                buf = trx_doublewrite->write_buf;
 
433
        } else {
 
434
                goto leave_func;
 
435
        }
 
436
 
 
437
        if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
 
438
            != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
 
439
 
 
440
                /* We are upgrading from a version < 4.1.x to a version where
 
441
                multiple tablespaces are supported. We must reset the space id
 
442
                field in the pages in the doublewrite buffer because starting
 
443
                from this version the space id is stored to
 
444
                FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
 
445
 
 
446
                trx_doublewrite_must_reset_space_ids = TRUE;
 
447
 
 
448
                fprintf(stderr,
 
449
                        "InnoDB: Resetting space id's in the"
 
450
                        " doublewrite buffer\n");
 
451
        } else {
 
452
                trx_sys_multiple_tablespace_format = TRUE;
 
453
        }
 
454
 
 
455
        /* Read the pages from the doublewrite buffer to memory */
 
456
 
 
457
        fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
 
458
               TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
 
459
               buf, NULL);
 
460
        fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
 
461
               TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
 
462
               buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
 
463
               NULL);
 
464
        /* Check if any of these pages is half-written in data files, in the
 
465
        intended position */
 
466
 
 
467
        page = buf;
 
468
 
 
469
        for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
 
470
 
 
471
                page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
 
472
 
 
473
                if (trx_doublewrite_must_reset_space_ids) {
 
474
 
 
475
                        space_id = 0;
 
476
                        mach_write_to_4(page
 
477
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0);
 
478
                        /* We do not need to calculate new checksums for the
 
479
                        pages because the field .._SPACE_ID does not affect
 
480
                        them. Write the page back to where we read it from. */
 
481
 
 
482
                        if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 
483
                                source_page_no = block1 + i;
 
484
                        } else {
 
485
                                source_page_no = block2
 
486
                                        + i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
 
487
                        }
 
488
 
 
489
                        fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
 
490
                               UNIV_PAGE_SIZE, page, NULL);
 
491
                        /* printf("Resetting space id in page %lu\n",
 
492
                        source_page_no); */
 
493
                } else {
 
494
                        space_id = mach_read_from_4(
 
495
                                page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
496
                }
 
497
 
 
498
                if (!restore_corrupt_pages) {
 
499
                        /* The database was shut down gracefully: no need to
 
500
                        restore pages */
 
501
 
 
502
                } else if (!fil_tablespace_exists_in_mem(space_id)) {
 
503
                        /* Maybe we have dropped the single-table tablespace
 
504
                        and this page once belonged to it: do nothing */
 
505
 
 
506
                } else if (!fil_check_adress_in_tablespace(space_id,
 
507
                                                           page_no)) {
 
508
                        fprintf(stderr,
 
509
                                "InnoDB: Warning: a page in the"
 
510
                                " doublewrite buffer is not within space\n"
 
511
                                "InnoDB: bounds; space id %lu"
 
512
                                " page number %lu, page %lu in"
 
513
                                " doublewrite buf.\n",
 
514
                                (ulong) space_id, (ulong) page_no, (ulong) i);
 
515
 
 
516
                } else if (space_id == TRX_SYS_SPACE
 
517
                           && ((page_no >= block1
 
518
                                && page_no
 
519
                                < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
 
520
                               || (page_no >= block2
 
521
                                   && page_no
 
522
                                   < (block2
 
523
                                      + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) {
 
524
 
 
525
                        /* It is an unwritten doublewrite buffer page:
 
526
                        do nothing */
 
527
                } else {
 
528
                        ulint   zip_size = fil_space_get_zip_size(space_id);
 
529
 
 
530
                        /* Read in the actual page from the file */
 
531
                        fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
 
532
                               page_no, 0,
 
533
                               zip_size ? zip_size : UNIV_PAGE_SIZE,
 
534
                               read_buf, NULL);
 
535
 
 
536
                        /* Check if the page is corrupt */
 
537
 
 
538
                        if (UNIV_UNLIKELY
 
539
                            (buf_page_is_corrupted(read_buf, zip_size))) {
 
540
 
 
541
                                fprintf(stderr,
 
542
                                        "InnoDB: Warning: database page"
 
543
                                        " corruption or a failed\n"
 
544
                                        "InnoDB: file read of"
 
545
                                        " space %lu page %lu.\n"
 
546
                                        "InnoDB: Trying to recover it from"
 
547
                                        " the doublewrite buffer.\n",
 
548
                                        (ulong) space_id, (ulong) page_no);
 
549
 
 
550
                                if (buf_page_is_corrupted(page, zip_size)) {
 
551
                                        fprintf(stderr,
 
552
                                                "InnoDB: Dump of the page:\n");
 
553
                                        buf_page_print(read_buf, zip_size);
 
554
                                        fprintf(stderr,
 
555
                                                "InnoDB: Dump of"
 
556
                                                " corresponding page"
 
557
                                                " in doublewrite buffer:\n");
 
558
                                        buf_page_print(page, zip_size);
 
559
 
 
560
                                        fprintf(stderr,
 
561
                                                "InnoDB: Also the page in the"
 
562
                                                " doublewrite buffer"
 
563
                                                " is corrupt.\n"
 
564
                                                "InnoDB: Cannot continue"
 
565
                                                " operation.\n"
 
566
                                                "InnoDB: You can try to"
 
567
                                                " recover the database"
 
568
                                                " with the my.cnf\n"
 
569
                                                "InnoDB: option:\n"
 
570
                                                "InnoDB: set-variable="
 
571
                                                "innodb_force_recovery=6\n");
 
572
                                        exit(1);
 
573
                                }
 
574
 
 
575
                                /* Write the good page from the
 
576
                                doublewrite buffer to the intended
 
577
                                position */
 
578
 
 
579
                                fil_io(OS_FILE_WRITE, TRUE, space_id,
 
580
                                       zip_size, page_no, 0,
 
581
                                       zip_size ? zip_size : UNIV_PAGE_SIZE,
 
582
                                       page, NULL);
 
583
                                fprintf(stderr,
 
584
                                        "InnoDB: Recovered the page from"
 
585
                                        " the doublewrite buffer.\n");
 
586
                        }
 
587
                }
 
588
 
 
589
                page += UNIV_PAGE_SIZE;
 
590
        }
 
591
 
 
592
        fil_flush_file_spaces(FIL_TABLESPACE);
 
593
 
 
594
leave_func:
 
595
        ut_free(unaligned_read_buf);
 
596
}
 
597
 
 
598
/********************************************************************
 
599
Checks that trx is in the trx list. */
 
600
UNIV_INTERN
 
601
ibool
 
602
trx_in_trx_list(
 
603
/*============*/
 
604
                        /* out: TRUE if is in */
 
605
        trx_t*  in_trx) /* in: trx */
 
606
{
 
607
        trx_t*  trx;
 
608
 
 
609
        ut_ad(mutex_own(&(kernel_mutex)));
 
610
 
 
611
        trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
 
612
 
 
613
        while (trx != NULL) {
 
614
 
 
615
                if (trx == in_trx) {
 
616
 
 
617
                        return(TRUE);
 
618
                }
 
619
 
 
620
                trx = UT_LIST_GET_NEXT(trx_list, trx);
 
621
        }
 
622
 
 
623
        return(FALSE);
 
624
}
 
625
 
 
626
/*********************************************************************
 
627
Writes the value of max_trx_id to the file based trx system header. */
 
628
UNIV_INTERN
 
629
void
 
630
trx_sys_flush_max_trx_id(void)
 
631
/*==========================*/
 
632
{
 
633
        trx_sysf_t*     sys_header;
 
634
        mtr_t           mtr;
 
635
 
 
636
        ut_ad(mutex_own(&kernel_mutex));
 
637
 
 
638
        mtr_start(&mtr);
 
639
 
 
640
        sys_header = trx_sysf_get(&mtr);
 
641
 
 
642
        mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
 
643
                          trx_sys->max_trx_id, &mtr);
 
644
        mtr_commit(&mtr);
 
645
}
 
646
 
 
647
/*********************************************************************
 
648
Updates the offset information about the end of the MySQL binlog entry
 
649
which corresponds to the transaction just being committed. In a MySQL
 
650
replication slave updates the latest master binlog position up to which
 
651
replication has proceeded. */
 
652
UNIV_INTERN
 
653
void
 
654
trx_sys_update_mysql_binlog_offset(
 
655
/*===============================*/
 
656
        const char*     file_name,/* in: MySQL log file name */
 
657
        ib_int64_t      offset, /* in: position in that log file */
 
658
        ulint           field,  /* in: offset of the MySQL log info field in
 
659
                                the trx sys header */
 
660
        mtr_t*          mtr)    /* in: mtr */
 
661
{
 
662
        trx_sysf_t*     sys_header;
 
663
 
 
664
        if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
 
665
 
 
666
                /* We cannot fit the name to the 512 bytes we have reserved */
 
667
 
 
668
                return;
 
669
        }
 
670
 
 
671
        sys_header = trx_sysf_get(mtr);
 
672
 
 
673
        if (mach_read_from_4(sys_header + field
 
674
                             + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
 
675
            != TRX_SYS_MYSQL_LOG_MAGIC_N) {
 
676
 
 
677
                mlog_write_ulint(sys_header + field
 
678
                                 + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
 
679
                                 TRX_SYS_MYSQL_LOG_MAGIC_N,
 
680
                                 MLOG_4BYTES, mtr);
 
681
        }
 
682
 
 
683
        if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
 
684
                        file_name)) {
 
685
 
 
686
                mlog_write_string(sys_header + field
 
687
                                  + TRX_SYS_MYSQL_LOG_NAME,
 
688
                                  (byte*) file_name, 1 + ut_strlen(file_name),
 
689
                                  mtr);
 
690
        }
 
691
 
 
692
        if (mach_read_from_4(sys_header + field
 
693
                             + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
 
694
            || (offset >> 32) > 0) {
 
695
 
 
696
                mlog_write_ulint(sys_header + field
 
697
                                 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
 
698
                                 (ulint)(offset >> 32),
 
699
                                 MLOG_4BYTES, mtr);
 
700
        }
 
701
 
 
702
        mlog_write_ulint(sys_header + field
 
703
                         + TRX_SYS_MYSQL_LOG_OFFSET_LOW,
 
704
                         (ulint)(offset & 0xFFFFFFFFUL),
 
705
                         MLOG_4BYTES, mtr);
 
706
}
 
707
 
 
708
#ifdef UNIV_HOTBACKUP
 
709
/*********************************************************************
 
710
Prints to stderr the MySQL binlog info in the system header if the
 
711
magic number shows it valid. */
 
712
UNIV_INTERN
 
713
void
 
714
trx_sys_print_mysql_binlog_offset_from_page(
 
715
/*========================================*/
 
716
        const byte*     page)   /* in: buffer containing the trx
 
717
                                system header page, i.e., page number
 
718
                                TRX_SYS_PAGE_NO in the tablespace */
 
719
{
 
720
        const trx_sysf_t*       sys_header;
 
721
 
 
722
        sys_header = page + TRX_SYS;
 
723
 
 
724
        if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
 
725
                             + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
 
726
            == TRX_SYS_MYSQL_LOG_MAGIC_N) {
 
727
 
 
728
                fprintf(stderr,
 
729
                        "ibbackup: Last MySQL binlog file position %lu %lu,"
 
730
                        " file name %s\n",
 
731
                        (ulong) mach_read_from_4(
 
732
                                sys_header + TRX_SYS_MYSQL_LOG_INFO
 
733
                                + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
 
734
                        (ulong) mach_read_from_4(
 
735
                                sys_header + TRX_SYS_MYSQL_LOG_INFO
 
736
                                + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
 
737
                        sys_header + TRX_SYS_MYSQL_LOG_INFO
 
738
                        + TRX_SYS_MYSQL_LOG_NAME);
 
739
        }
 
740
}
 
741
#endif /* UNIV_HOTBACKUP */
 
742
 
 
743
/*********************************************************************
 
744
Stores the MySQL binlog offset info in the trx system header if
 
745
the magic number shows it valid, and print the info to stderr */
 
746
UNIV_INTERN
 
747
void
 
748
trx_sys_print_mysql_binlog_offset(void)
 
749
/*===================================*/
 
750
{
 
751
        trx_sysf_t*     sys_header;
 
752
        mtr_t           mtr;
 
753
        ulint           trx_sys_mysql_bin_log_pos_high;
 
754
        ulint           trx_sys_mysql_bin_log_pos_low;
 
755
 
 
756
        mtr_start(&mtr);
 
757
 
 
758
        sys_header = trx_sysf_get(&mtr);
 
759
 
 
760
        if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
 
761
                             + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
 
762
            != TRX_SYS_MYSQL_LOG_MAGIC_N) {
 
763
 
 
764
                mtr_commit(&mtr);
 
765
 
 
766
                return;
 
767
        }
 
768
 
 
769
        trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
 
770
                sys_header + TRX_SYS_MYSQL_LOG_INFO
 
771
                + TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
 
772
        trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
 
773
                sys_header + TRX_SYS_MYSQL_LOG_INFO
 
774
                + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
 
775
 
 
776
        trx_sys_mysql_bin_log_pos
 
777
                = (((ib_int64_t)trx_sys_mysql_bin_log_pos_high) << 32)
 
778
                + (ib_int64_t)trx_sys_mysql_bin_log_pos_low;
 
779
 
 
780
        ut_memcpy(trx_sys_mysql_bin_log_name,
 
781
                  sys_header + TRX_SYS_MYSQL_LOG_INFO
 
782
                  + TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
 
783
 
 
784
        fprintf(stderr,
 
785
                "InnoDB: Last MySQL binlog file position %lu %lu,"
 
786
                " file name %s\n",
 
787
                trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
 
788
                trx_sys_mysql_bin_log_name);
 
789
 
 
790
        mtr_commit(&mtr);
 
791
}
 
792
 
 
793
/*********************************************************************
 
794
Prints to stderr the MySQL master log offset info in the trx system header if
 
795
the magic number shows it valid. */
 
796
UNIV_INTERN
 
797
void
 
798
trx_sys_print_mysql_master_log_pos(void)
 
799
/*====================================*/
 
800
{
 
801
        trx_sysf_t*     sys_header;
 
802
        mtr_t           mtr;
 
803
 
 
804
        mtr_start(&mtr);
 
805
 
 
806
        sys_header = trx_sysf_get(&mtr);
 
807
 
 
808
        if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
809
                             + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
 
810
            != TRX_SYS_MYSQL_LOG_MAGIC_N) {
 
811
 
 
812
                mtr_commit(&mtr);
 
813
 
 
814
                return;
 
815
        }
 
816
 
 
817
        fprintf(stderr,
 
818
                "InnoDB: In a MySQL replication slave the last"
 
819
                " master binlog file\n"
 
820
                "InnoDB: position %lu %lu, file name %s\n",
 
821
                (ulong) mach_read_from_4(sys_header
 
822
                                         + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
823
                                         + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
 
824
                (ulong) mach_read_from_4(sys_header
 
825
                                         + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
826
                                         + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
 
827
                sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
828
                + TRX_SYS_MYSQL_LOG_NAME);
 
829
        /* Copy the master log position info to global variables we can
 
830
        use in ha_innobase.cc to initialize glob_mi to right values */
 
831
 
 
832
        ut_memcpy(trx_sys_mysql_master_log_name,
 
833
                  sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
834
                  + TRX_SYS_MYSQL_LOG_NAME,
 
835
                  TRX_SYS_MYSQL_LOG_NAME_LEN);
 
836
 
 
837
        trx_sys_mysql_master_log_pos
 
838
                = (((ib_int64_t) mach_read_from_4(
 
839
                            sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
840
                            + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
 
841
                + ((ib_int64_t) mach_read_from_4(
 
842
                           sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
843
                           + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
 
844
        mtr_commit(&mtr);
 
845
}
 
846
 
 
847
/********************************************************************
 
848
Looks for a free slot for a rollback segment in the trx system file copy. */
 
849
UNIV_INTERN
 
850
ulint
 
851
trx_sysf_rseg_find_free(
 
852
/*====================*/
 
853
                        /* out: slot index or ULINT_UNDEFINED if not found */
 
854
        mtr_t*  mtr)    /* in: mtr */
 
855
{
 
856
        trx_sysf_t*     sys_header;
 
857
        ulint           page_no;
 
858
        ulint           i;
 
859
 
 
860
        ut_ad(mutex_own(&(kernel_mutex)));
 
861
 
 
862
        sys_header = trx_sysf_get(mtr);
 
863
 
 
864
        for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
 
865
 
 
866
                page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
 
867
 
 
868
                if (page_no == FIL_NULL) {
 
869
 
 
870
                        return(i);
 
871
                }
 
872
        }
 
873
 
 
874
        return(ULINT_UNDEFINED);
 
875
}
 
876
 
 
877
/*********************************************************************
 
878
Creates the file page for the transaction system. This function is called only
 
879
at the database creation, before trx_sys_init. */
 
880
static
 
881
void
 
882
trx_sysf_create(
 
883
/*============*/
 
884
        mtr_t*  mtr)    /* in: mtr */
 
885
{
 
886
        trx_sysf_t*     sys_header;
 
887
        ulint           slot_no;
 
888
        buf_block_t*    block;
 
889
        page_t*         page;
 
890
        ulint           page_no;
 
891
        ulint           i;
 
892
 
 
893
        ut_ad(mtr);
 
894
 
 
895
        /* Note that below we first reserve the file space x-latch, and
 
896
        then enter the kernel: we must do it in this order to conform
 
897
        to the latching order rules. */
 
898
 
 
899
        mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
 
900
        mutex_enter(&kernel_mutex);
 
901
 
 
902
        /* Create the trx sys file block in a new allocated file segment */
 
903
        block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
 
904
                            mtr);
 
905
#ifdef UNIV_SYNC_DEBUG
 
906
        buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
 
907
#endif /* UNIV_SYNC_DEBUG */
 
908
        ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
 
909
 
 
910
        page = buf_block_get_frame(block);
 
911
 
 
912
        mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
 
913
                         MLOG_2BYTES, mtr);
 
914
 
 
915
        /* Reset the doublewrite buffer magic number to zero so that we
 
916
        know that the doublewrite buffer has not yet been created (this
 
917
        suppresses a Valgrind warning) */
 
918
 
 
919
        mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
 
920
                         + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
 
921
 
 
922
        sys_header = trx_sysf_get(mtr);
 
923
 
 
924
        /* Start counting transaction ids from number 1 up */
 
925
        mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
 
926
                          ut_dulint_create(0, 1), mtr);
 
927
 
 
928
        /* Reset the rollback segment slots */
 
929
        for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
 
930
 
 
931
                trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
 
932
                trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
 
933
        }
 
934
 
 
935
        /* The remaining area (up to the page trailer) is uninitialized.
 
936
        Silence Valgrind warnings about it. */
 
937
        UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
 
938
                                     + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
 
939
                                     + TRX_SYS_RSEG_SPACE),
 
940
                       (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
 
941
                        - (TRX_SYS_RSEGS
 
942
                           + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
 
943
                           + TRX_SYS_RSEG_SPACE))
 
944
                       + page - sys_header);
 
945
 
 
946
        /* Create the first rollback segment in the SYSTEM tablespace */
 
947
        page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, &slot_no,
 
948
                                         mtr);
 
949
        ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
 
950
        ut_a(page_no != FIL_NULL);
 
951
 
 
952
        mutex_exit(&kernel_mutex);
 
953
}
 
954
 
 
955
/*********************************************************************
 
956
Creates and initializes the central memory structures for the transaction
 
957
system. This is called when the database is started. */
 
958
UNIV_INTERN
 
959
void
 
960
trx_sys_init_at_db_start(void)
 
961
/*==========================*/
 
962
{
 
963
        trx_sysf_t*     sys_header;
 
964
        ib_int64_t      rows_to_undo    = 0;
 
965
        const char*     unit            = "";
 
966
        trx_t*          trx;
 
967
        mtr_t           mtr;
 
968
 
 
969
        mtr_start(&mtr);
 
970
 
 
971
        ut_ad(trx_sys == NULL);
 
972
 
 
973
        mutex_enter(&kernel_mutex);
 
974
 
 
975
        trx_sys = mem_alloc(sizeof(trx_sys_t));
 
976
 
 
977
        sys_header = trx_sysf_get(&mtr);
 
978
 
 
979
        trx_rseg_list_and_array_init(sys_header, &mtr);
 
980
 
 
981
        trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
 
982
 
 
983
        /* VERY important: after the database is started, max_trx_id value is
 
984
        divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
 
985
        trx_sys_get_new_trx_id will evaluate to TRUE when the function
 
986
        is first time called, and the value for trx id will be written
 
987
        to the disk-based header! Thus trx id values will not overlap when
 
988
        the database is repeatedly started! */
 
989
 
 
990
        trx_sys->max_trx_id = ut_dulint_add(
 
991
                ut_dulint_align_up(mtr_read_dulint(
 
992
                                           sys_header
 
993
                                           + TRX_SYS_TRX_ID_STORE, &mtr),
 
994
                                   TRX_SYS_TRX_ID_WRITE_MARGIN),
 
995
                2 * TRX_SYS_TRX_ID_WRITE_MARGIN);
 
996
 
 
997
        UT_LIST_INIT(trx_sys->mysql_trx_list);
 
998
        trx_dummy_sess = sess_open();
 
999
        trx_lists_init_at_db_start();
 
1000
 
 
1001
        if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
 
1002
                trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
 
1003
 
 
1004
                for (;;) {
 
1005
 
 
1006
                        if ( trx->conc_state != TRX_PREPARED) {
 
1007
                                rows_to_undo += ut_conv_dulint_to_longlong(
 
1008
                                        trx->undo_no);
 
1009
                        }
 
1010
 
 
1011
                        trx = UT_LIST_GET_NEXT(trx_list, trx);
 
1012
 
 
1013
                        if (!trx) {
 
1014
                                break;
 
1015
                        }
 
1016
                }
 
1017
 
 
1018
                if (rows_to_undo > 1000000000) {
 
1019
                        unit = "M";
 
1020
                        rows_to_undo = rows_to_undo / 1000000;
 
1021
                }
 
1022
 
 
1023
                fprintf(stderr,
 
1024
                        "InnoDB: %lu transaction(s) which must be"
 
1025
                        " rolled back or cleaned up\n"
 
1026
                        "InnoDB: in total %lu%s row operations to undo\n",
 
1027
                        (ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
 
1028
                        (ulong) rows_to_undo, unit);
 
1029
 
 
1030
                fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
 
1031
                        TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
 
1032
        }
 
1033
 
 
1034
        UT_LIST_INIT(trx_sys->view_list);
 
1035
 
 
1036
        trx_purge_sys_create();
 
1037
 
 
1038
        mutex_exit(&kernel_mutex);
 
1039
 
 
1040
        mtr_commit(&mtr);
 
1041
}
 
1042
 
 
1043
/*********************************************************************
 
1044
Creates and initializes the transaction system at the database creation. */
 
1045
UNIV_INTERN
 
1046
void
 
1047
trx_sys_create(void)
 
1048
/*================*/
 
1049
{
 
1050
        mtr_t   mtr;
 
1051
 
 
1052
        mtr_start(&mtr);
 
1053
 
 
1054
        trx_sysf_create(&mtr);
 
1055
 
 
1056
        mtr_commit(&mtr);
 
1057
 
 
1058
        trx_sys_init_at_db_start();
 
1059
}
 
1060
 
 
1061
/*********************************************************************
 
1062
Update the file format tag. */
 
1063
static
 
1064
ibool
 
1065
trx_sys_file_format_max_write(
 
1066
/*==========================*/
 
1067
                                        /* out: always TRUE */
 
1068
        ulint           format_id,      /* in: file format id */
 
1069
        char**          name)           /* out: max file format name, can
 
1070
                                        be NULL */
 
1071
{
 
1072
        mtr_t           mtr;
 
1073
        byte*           ptr;
 
1074
        buf_block_t*    block;
 
1075
        ulint           tag_value_low;
 
1076
 
 
1077
        mtr_start(&mtr);
 
1078
 
 
1079
        block = buf_page_get(
 
1080
                TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
 
1081
 
 
1082
        file_format_max.id = format_id;
 
1083
        file_format_max.name = trx_sys_file_format_id_to_name(format_id);
 
1084
 
 
1085
        ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
 
1086
        tag_value_low = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
 
1087
 
 
1088
        if (name) {
 
1089
                *name = (char*) file_format_max.name;
 
1090
        }
 
1091
 
 
1092
        mlog_write_dulint(
 
1093
                ptr,
 
1094
                ut_dulint_create(TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH,
 
1095
                                 tag_value_low),
 
1096
                &mtr);
 
1097
 
 
1098
        mtr_commit(&mtr);
 
1099
 
 
1100
        return(TRUE);
 
1101
}
 
1102
 
 
1103
/*********************************************************************
 
1104
Read the file format tag. */
 
1105
static
 
1106
ulint
 
1107
trx_sys_file_format_max_read(void)
 
1108
/*==============================*/
 
1109
                                /* out: the file format */
 
1110
{
 
1111
        mtr_t                   mtr;
 
1112
        const byte*             ptr;
 
1113
        const buf_block_t*      block;
 
1114
        ulint                   format_id;
 
1115
        dulint                  file_format_id;
 
1116
 
 
1117
        /* Since this is called during the startup phase it's safe to
 
1118
        read the value without a covering mutex. */
 
1119
        mtr_start(&mtr);
 
1120
 
 
1121
        block = buf_page_get(
 
1122
                TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
 
1123
 
 
1124
        ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
 
1125
        file_format_id = mach_read_from_8(ptr);
 
1126
 
 
1127
        mtr_commit(&mtr);
 
1128
 
 
1129
        format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
 
1130
 
 
1131
        if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH
 
1132
            || format_id >= FILE_FORMAT_NAME_N) {
 
1133
 
 
1134
                /* Either it has never been tagged, or garbage in it.
 
1135
                Reset the tag in either case. */
 
1136
                format_id = DICT_TF_FORMAT_51;
 
1137
                trx_sys_file_format_max_write(format_id, NULL);
 
1138
        }
 
1139
 
 
1140
        return(format_id);
 
1141
}
 
1142
 
 
1143
/*********************************************************************
 
1144
Get the name representation of the file format from its id. */
 
1145
UNIV_INTERN
 
1146
const char*
 
1147
trx_sys_file_format_id_to_name(
 
1148
/*===========================*/
 
1149
                                /* out: pointer to the name */
 
1150
        const uint      id)     /* in: id of the file format */
 
1151
{
 
1152
        ut_a(id < FILE_FORMAT_NAME_N);
 
1153
 
 
1154
        return(file_format_name_map[id]);
 
1155
}
 
1156
 
 
1157
/*********************************************************************
 
1158
Check for the max file format tag stored on disk. Note: If max_format_id
 
1159
is == DICT_TF_FORMAT_MAX + 1 then we only print a warning. */
 
1160
UNIV_INTERN
 
1161
ulint
 
1162
trx_sys_file_format_max_check(
 
1163
/*==========================*/
 
1164
                                /* out: DB_SUCCESS or error code */
 
1165
        ulint   max_format_id)  /* in: max format id to check */
 
1166
{
 
1167
        ulint   format_id;
 
1168
 
 
1169
        /* Check the file format in the tablespace. Do not try to
 
1170
        recover if the file format is not supported by the engine
 
1171
        unless forced by the user. */
 
1172
        format_id = trx_sys_file_format_max_read();
 
1173
 
 
1174
        ut_print_timestamp(stderr);
 
1175
        fprintf(stderr,
 
1176
                "  InnoDB: highest supported file format is %s.\n",
 
1177
                trx_sys_file_format_id_to_name(DICT_TF_FORMAT_MAX));
 
1178
 
 
1179
        if (format_id > DICT_TF_FORMAT_MAX) {
 
1180
 
 
1181
                ut_a(format_id < FILE_FORMAT_NAME_N);
 
1182
 
 
1183
                ut_print_timestamp(stderr);
 
1184
                fprintf(stderr,
 
1185
                        "  InnoDB: %s: the system tablespace is in a file "
 
1186
                        "format that this version doesn't support - %s\n",
 
1187
                        ((max_format_id <= DICT_TF_FORMAT_MAX)
 
1188
                                ? "Error" : "Warning"),
 
1189
                        trx_sys_file_format_id_to_name(format_id));
 
1190
 
 
1191
                if (max_format_id <= DICT_TF_FORMAT_MAX) {
 
1192
                        return(DB_ERROR);
 
1193
                }
 
1194
        }
 
1195
 
 
1196
        format_id = (format_id > max_format_id) ? format_id : max_format_id;
 
1197
 
 
1198
        /* We don't need a mutex here, as this function should only
 
1199
        be called once at start up. */
 
1200
        file_format_max.id = format_id;
 
1201
        file_format_max.name = trx_sys_file_format_id_to_name(format_id);
 
1202
 
 
1203
        return(DB_SUCCESS);
 
1204
}
 
1205
 
 
1206
/*********************************************************************
 
1207
Set the file format id unconditionally except if it's already the
 
1208
same value. */
 
1209
UNIV_INTERN
 
1210
ibool
 
1211
trx_sys_file_format_max_set(
 
1212
/*========================*/
 
1213
                                        /* out: TRUE if value updated */
 
1214
        ulint           format_id,      /* in: file format id */
 
1215
        char**          name)           /* out: max file format name */
 
1216
{
 
1217
        ibool           ret = FALSE;
 
1218
 
 
1219
        ut_a(name);
 
1220
        ut_a(format_id <= DICT_TF_FORMAT_MAX);
 
1221
 
 
1222
        mutex_enter(&file_format_max.mutex);
 
1223
 
 
1224
        /* Only update if not already same value. */
 
1225
        if (format_id != file_format_max.id) {
 
1226
 
 
1227
                ret = trx_sys_file_format_max_write(format_id, name);
 
1228
        }
 
1229
 
 
1230
        mutex_exit(&file_format_max.mutex);
 
1231
 
 
1232
        return(ret);
 
1233
}
 
1234
 
 
1235
/************************************************************************
 
1236
Update the file format tag in the tablespace only if the given format id
 
1237
is greater than the known max id. */
 
1238
UNIV_INTERN
 
1239
ibool
 
1240
trx_sys_file_format_max_update(
 
1241
/*===========================*/
 
1242
        uint            flags,          /* in: flags of the table.*/
 
1243
        char**          name)           /* out: max file format name */
 
1244
{
 
1245
        ulint           format_id;
 
1246
        ibool           ret = FALSE;
 
1247
 
 
1248
        format_id = (flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT;
 
1249
 
 
1250
        ut_a(name);
 
1251
        ut_a(file_format_max.name != NULL);
 
1252
        ut_a(format_id <= DICT_TF_FORMAT_MAX);
 
1253
 
 
1254
        mutex_enter(&file_format_max.mutex);
 
1255
 
 
1256
        if (format_id > file_format_max.id) {
 
1257
 
 
1258
                ret = trx_sys_file_format_max_write(format_id, name);
 
1259
        }
 
1260
 
 
1261
        mutex_exit(&file_format_max.mutex);
 
1262
 
 
1263
        return(ret);
 
1264
}
 
1265
 
 
1266
/*********************************************************************
 
1267
Get the name representation of the file format from its id. */
 
1268
UNIV_INTERN
 
1269
const char*
 
1270
trx_sys_file_format_max_get(void)
 
1271
/*=============================*/
 
1272
                                /* out: pointer to the max format name */
 
1273
{
 
1274
        return(file_format_max.name);
 
1275
}
 
1276
 
 
1277
/*********************************************************************
 
1278
Initializes the tablespace tag system. */
 
1279
UNIV_INTERN
 
1280
void
 
1281
trx_sys_file_format_init(void)
 
1282
/*==========================*/
 
1283
{
 
1284
        mutex_create(&file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
 
1285
 
 
1286
        /* We don't need a mutex here, as this function should only
 
1287
        be called once at start up. */
 
1288
        file_format_max.id = DICT_TF_FORMAT_51;
 
1289
 
 
1290
        file_format_max.name = trx_sys_file_format_id_to_name(
 
1291
                file_format_max.id);
 
1292
}
 
1293
 
 
1294
/*********************************************************************
 
1295
Closes the tablespace tag system. */
 
1296
UNIV_INTERN
 
1297
void
 
1298
trx_sys_file_format_close(void)
 
1299
/*===========================*/
 
1300
{
 
1301
        /* Does nothing at the moment */
 
1302
}