~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/trx/trx0sys.c

enable remaining subselect tests, merge with latest from the trunk

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/******************************************************
 
2
Transaction system
 
3
 
 
4
(c) 1996 Innobase Oy
 
5
 
 
6
Created 3/26/1996 Heikki Tuuri
 
7
*******************************************************/
 
8
 
 
9
#include "trx0sys.h"
 
10
 
 
11
#ifdef UNIV_NONINL
 
12
#include "trx0sys.ic"
 
13
#endif
 
14
 
 
15
#include "fsp0fsp.h"
 
16
#include "mtr0mtr.h"
 
17
#include "trx0trx.h"
 
18
#include "trx0rseg.h"
 
19
#include "trx0undo.h"
 
20
#include "srv0srv.h"
 
21
#include "trx0purge.h"
 
22
#include "log0log.h"
 
23
#include "os0file.h"
 
24
 
 
25
/* The file format tag structure with id and name. */
 
26
struct file_format_struct {
 
27
        ulint           id;             /* id of the file format */
 
28
        const char*     name;           /* text representation of the
 
29
                                        file format */
 
30
        mutex_t         mutex;          /* covers changes to the above
 
31
                                        fields */
 
32
};
 
33
 
 
34
typedef struct file_format_struct       file_format_t;
 
35
 
 
36
/* The transaction system */
 
37
UNIV_INTERN trx_sys_t*          trx_sys         = NULL;
 
38
UNIV_INTERN trx_doublewrite_t*  trx_doublewrite = NULL;
 
39
 
 
40
/* The following is set to TRUE when we are upgrading from the old format data
 
41
files to the new >= 4.1.x format multiple tablespaces format data files */
 
42
 
 
43
UNIV_INTERN ibool       trx_doublewrite_must_reset_space_ids    = FALSE;
 
44
 
 
45
/* The following is TRUE when we are using the database in the new format,
 
46
i.e., we have successfully upgraded, or have created a new database
 
47
installation */
 
48
 
 
49
UNIV_INTERN ibool       trx_sys_multiple_tablespace_format      = FALSE;
 
50
 
 
51
/* In a MySQL replication slave, in crash recovery we store the master log
 
52
file name and position here. We have successfully got the updates to InnoDB
 
53
up to this position. If .._pos is -1, it means no crash recovery was needed,
 
54
or there was no master log position info inside InnoDB. */
 
55
 
 
56
UNIV_INTERN char        trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
 
57
UNIV_INTERN ib_int64_t  trx_sys_mysql_master_log_pos    = -1;
 
58
 
 
59
/* If this MySQL server uses binary logging, after InnoDB has been inited
 
60
and if it has done a crash recovery, we store the binlog file name and position
 
61
here. If .._pos is -1, it means there was no binlog position info inside
 
62
InnoDB. */
 
63
 
 
64
UNIV_INTERN char        trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
 
65
UNIV_INTERN ib_int64_t  trx_sys_mysql_bin_log_pos       = -1;
 
66
 
 
67
/* List of animal names representing file format. */
 
68
static const char*      file_format_name_map[] = {
 
69
        "Antelope",
 
70
        "Barracuda",
 
71
        "Cheetah",
 
72
        "Dragon",
 
73
        "Elk",
 
74
        "Fox",
 
75
        "Gazelle",
 
76
        "Hornet",
 
77
        "Impala",
 
78
        "Jaguar",
 
79
        "Kangaroo",
 
80
        "Leopard",
 
81
        "Moose",
 
82
        "Nautilus",
 
83
        "Ocelot",
 
84
        "Porpoise",
 
85
        "Quail",
 
86
        "Rabbit",
 
87
        "Shark",
 
88
        "Tiger",
 
89
        "Urchin",
 
90
        "Viper",
 
91
        "Whale",
 
92
        "Xenops",
 
93
        "Yak",
 
94
        "Zebra"
 
95
};
 
96
 
 
97
/* The number of elements in the file format name array. */
 
98
static const ulint      FILE_FORMAT_NAME_N
 
99
        = sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
 
100
 
 
101
/* This is used to track the maximum file format id known to InnoDB. It's
 
102
updated via SET GLOBAL innodb_file_format_check = 'x' or when we open
 
103
or create a table. */
 
104
static  file_format_t   file_format_max;
 
105
 
 
106
/********************************************************************
 
107
Determines if a page number is located inside the doublewrite buffer. */
 
108
UNIV_INTERN
 
109
ibool
 
110
trx_doublewrite_page_inside(
 
111
/*========================*/
 
112
                                /* out: TRUE if the location is inside
 
113
                                the two blocks of the doublewrite buffer */
 
114
        ulint   page_no)        /* in: page number */
 
115
{
 
116
        if (trx_doublewrite == NULL) {
 
117
 
 
118
                return(FALSE);
 
119
        }
 
120
 
 
121
        if (page_no >= trx_doublewrite->block1
 
122
            && page_no < trx_doublewrite->block1
 
123
            + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 
124
                return(TRUE);
 
125
        }
 
126
 
 
127
        if (page_no >= trx_doublewrite->block2
 
128
            && page_no < trx_doublewrite->block2
 
129
            + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 
130
                return(TRUE);
 
131
        }
 
132
 
 
133
        return(FALSE);
 
134
}
 
135
 
 
136
/********************************************************************
 
137
Creates or initialializes the doublewrite buffer at a database start. */
 
138
static
 
139
void
 
140
trx_doublewrite_init(
 
141
/*=================*/
 
142
        byte*   doublewrite)    /* in: pointer to the doublewrite buf
 
143
                                header on trx sys page */
 
144
{
 
145
        trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
 
146
 
 
147
        /* Since we now start to use the doublewrite buffer, no need to call
 
148
        fsync() after every write to a data file */
 
149
#ifdef UNIV_DO_FLUSH
 
150
        os_do_not_call_flush_at_each_write = TRUE;
 
151
#endif /* UNIV_DO_FLUSH */
 
152
 
 
153
        mutex_create(&trx_doublewrite->mutex, SYNC_DOUBLEWRITE);
 
154
 
 
155
        trx_doublewrite->first_free = 0;
 
156
 
 
157
        trx_doublewrite->block1 = mach_read_from_4(
 
158
                doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
 
159
        trx_doublewrite->block2 = mach_read_from_4(
 
160
                doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
 
161
        trx_doublewrite->write_buf_unaligned = ut_malloc(
 
162
                (1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE);
 
163
 
 
164
        trx_doublewrite->write_buf = ut_align(
 
165
                trx_doublewrite->write_buf_unaligned, UNIV_PAGE_SIZE);
 
166
        trx_doublewrite->buf_block_arr = mem_alloc(
 
167
                2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*));
 
168
}
 
169
 
 
170
/********************************************************************
 
171
Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
 
172
multiple tablespace format. */
 
173
UNIV_INTERN
 
174
void
 
175
trx_sys_mark_upgraded_to_multiple_tablespaces(void)
 
176
/*===============================================*/
 
177
{
 
178
        buf_block_t*    block;
 
179
        byte*           doublewrite;
 
180
        mtr_t           mtr;
 
181
 
 
182
        /* We upgraded to 4.1.x and reset the space id fields in the
 
183
        doublewrite buffer. Let us mark to the trx_sys header that the upgrade
 
184
        has been done. */
 
185
 
 
186
        mtr_start(&mtr);
 
187
 
 
188
        block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
 
189
                             RW_X_LATCH, &mtr);
 
190
        buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
191
 
 
192
        doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
 
193
 
 
194
        mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
 
195
                         TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
 
196
                         MLOG_4BYTES, &mtr);
 
197
        mtr_commit(&mtr);
 
198
 
 
199
        /* Flush the modified pages to disk and make a checkpoint */
 
200
        log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
201
 
 
202
        trx_sys_multiple_tablespace_format = TRUE;
 
203
}
 
204
 
 
205
/********************************************************************
 
206
Creates the doublewrite buffer to a new InnoDB installation. The header of the
 
207
doublewrite buffer is placed on the trx system header page. */
 
208
UNIV_INTERN
 
209
void
 
210
trx_sys_create_doublewrite_buf(void)
 
211
/*================================*/
 
212
{
 
213
        buf_block_t*    block;
 
214
        buf_block_t*    block2;
 
215
        buf_block_t*    new_block;
 
216
        byte*   doublewrite;
 
217
        byte*   fseg_header;
 
218
        ulint   page_no;
 
219
        ulint   prev_page_no;
 
220
        ulint   i;
 
221
        mtr_t   mtr;
 
222
 
 
223
        if (trx_doublewrite) {
 
224
                /* Already inited */
 
225
 
 
226
                return;
 
227
        }
 
228
 
 
229
start_again:
 
230
        mtr_start(&mtr);
 
231
 
 
232
        block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
 
233
                             RW_X_LATCH, &mtr);
 
234
        buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
235
 
 
236
        doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
 
237
 
 
238
        if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
 
239
            == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
 
240
                /* The doublewrite buffer has already been created:
 
241
                just read in some numbers */
 
242
 
 
243
                trx_doublewrite_init(doublewrite);
 
244
 
 
245
                mtr_commit(&mtr);
 
246
        } else {
 
247
                fprintf(stderr,
 
248
                        "InnoDB: Doublewrite buffer not found:"
 
249
                        " creating new\n");
 
250
 
 
251
                if (buf_pool_get_curr_size()
 
252
                    < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
 
253
                        + FSP_EXTENT_SIZE / 2 + 100)
 
254
                       * UNIV_PAGE_SIZE)) {
 
255
                        fprintf(stderr,
 
256
                                "InnoDB: Cannot create doublewrite buffer:"
 
257
                                " you must\n"
 
258
                                "InnoDB: increase your buffer pool size.\n"
 
259
                                "InnoDB: Cannot continue operation.\n");
 
260
 
 
261
                        exit(1);
 
262
                }
 
263
 
 
264
                block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
 
265
                                     TRX_SYS_DOUBLEWRITE
 
266
                                     + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
 
267
 
 
268
                /* fseg_create acquires a second latch on the page,
 
269
                therefore we must declare it: */
 
270
 
 
271
                buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
 
272
 
 
273
                if (block2 == NULL) {
 
274
                        fprintf(stderr,
 
275
                                "InnoDB: Cannot create doublewrite buffer:"
 
276
                                " you must\n"
 
277
                                "InnoDB: increase your tablespace size.\n"
 
278
                                "InnoDB: Cannot continue operation.\n");
 
279
 
 
280
                        /* We exit without committing the mtr to prevent
 
281
                        its modifications to the database getting to disk */
 
282
 
 
283
                        exit(1);
 
284
                }
 
285
 
 
286
                fseg_header = buf_block_get_frame(block)
 
287
                        + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
 
288
                prev_page_no = 0;
 
289
 
 
290
                for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
 
291
                             + FSP_EXTENT_SIZE / 2; i++) {
 
292
                        page_no = fseg_alloc_free_page(fseg_header,
 
293
                                                       prev_page_no + 1,
 
294
                                                       FSP_UP, &mtr);
 
295
                        if (page_no == FIL_NULL) {
 
296
                                fprintf(stderr,
 
297
                                        "InnoDB: Cannot create doublewrite"
 
298
                                        " buffer: you must\n"
 
299
                                        "InnoDB: increase your"
 
300
                                        " tablespace size.\n"
 
301
                                        "InnoDB: Cannot continue operation.\n"
 
302
                                        );
 
303
 
 
304
                                exit(1);
 
305
                        }
 
306
 
 
307
                        /* We read the allocated pages to the buffer pool;
 
308
                        when they are written to disk in a flush, the space
 
309
                        id and page number fields are also written to the
 
310
                        pages. When we at database startup read pages
 
311
                        from the doublewrite buffer, we know that if the
 
312
                        space id and page number in them are the same as
 
313
                        the page position in the tablespace, then the page
 
314
                        has not been written to in doublewrite. */
 
315
 
 
316
                        new_block = buf_page_get(TRX_SYS_SPACE, 0, page_no,
 
317
                                                 RW_X_LATCH, &mtr);
 
318
                        buf_block_dbg_add_level(new_block,
 
319
                                                SYNC_NO_ORDER_CHECK);
 
320
 
 
321
                        /* Make a dummy change to the page to ensure it will
 
322
                        be written to disk in a flush */
 
323
 
 
324
                        mlog_write_ulint(buf_block_get_frame(new_block)
 
325
                                         + FIL_PAGE_DATA,
 
326
                                         TRX_SYS_DOUBLEWRITE_MAGIC_N,
 
327
                                         MLOG_4BYTES, &mtr);
 
328
 
 
329
                        if (i == FSP_EXTENT_SIZE / 2) {
 
330
                                mlog_write_ulint(doublewrite
 
331
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK1,
 
332
                                                 page_no, MLOG_4BYTES, &mtr);
 
333
                                mlog_write_ulint(doublewrite
 
334
                                                 + TRX_SYS_DOUBLEWRITE_REPEAT
 
335
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK1,
 
336
                                                 page_no, MLOG_4BYTES, &mtr);
 
337
                        } else if (i == FSP_EXTENT_SIZE / 2
 
338
                                   + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 
339
                                mlog_write_ulint(doublewrite
 
340
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK2,
 
341
                                                 page_no, MLOG_4BYTES, &mtr);
 
342
                                mlog_write_ulint(doublewrite
 
343
                                                 + TRX_SYS_DOUBLEWRITE_REPEAT
 
344
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK2,
 
345
                                                 page_no, MLOG_4BYTES, &mtr);
 
346
                        } else if (i > FSP_EXTENT_SIZE / 2) {
 
347
                                ut_a(page_no == prev_page_no + 1);
 
348
                        }
 
349
 
 
350
                        prev_page_no = page_no;
 
351
                }
 
352
 
 
353
                mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
 
354
                                 TRX_SYS_DOUBLEWRITE_MAGIC_N,
 
355
                                 MLOG_4BYTES, &mtr);
 
356
                mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
 
357
                                 + TRX_SYS_DOUBLEWRITE_REPEAT,
 
358
                                 TRX_SYS_DOUBLEWRITE_MAGIC_N,
 
359
                                 MLOG_4BYTES, &mtr);
 
360
 
 
361
                mlog_write_ulint(doublewrite
 
362
                                 + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
 
363
                                 TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
 
364
                                 MLOG_4BYTES, &mtr);
 
365
                mtr_commit(&mtr);
 
366
 
 
367
                /* Flush the modified pages to disk and make a checkpoint */
 
368
                log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
369
 
 
370
                fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
 
371
 
 
372
                trx_sys_multiple_tablespace_format = TRUE;
 
373
 
 
374
                goto start_again;
 
375
        }
 
376
}
 
377
 
 
378
/********************************************************************
 
379
At a database startup initializes the doublewrite buffer memory structure if
 
380
we already have a doublewrite buffer created in the data files. If we are
 
381
upgrading to an InnoDB version which supports multiple tablespaces, then this
 
382
function performs the necessary update operations. If we are in a crash
 
383
recovery, this function uses a possible doublewrite buffer to restore
 
384
half-written pages in the data files. */
 
385
UNIV_INTERN
 
386
void
 
387
trx_sys_doublewrite_init_or_restore_pages(
 
388
/*======================================*/
 
389
        ibool   restore_corrupt_pages)
 
390
{
 
391
        byte*   buf;
 
392
        byte*   read_buf;
 
393
        byte*   unaligned_read_buf;
 
394
        ulint   block1;
 
395
        ulint   block2;
 
396
        ulint   source_page_no;
 
397
        byte*   page;
 
398
        byte*   doublewrite;
 
399
        ulint   space_id;
 
400
        ulint   page_no;
 
401
        ulint   i;
 
402
 
 
403
        /* We do the file i/o past the buffer pool */
 
404
 
 
405
        unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
 
406
        read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE);
 
407
 
 
408
        /* Read the trx sys header to check if we are using the doublewrite
 
409
        buffer */
 
410
 
 
411
        fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
 
412
               UNIV_PAGE_SIZE, read_buf, NULL);
 
413
        doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
 
414
 
 
415
        if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
 
416
            == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
 
417
                /* The doublewrite buffer has been created */
 
418
 
 
419
                trx_doublewrite_init(doublewrite);
 
420
 
 
421
                block1 = trx_doublewrite->block1;
 
422
                block2 = trx_doublewrite->block2;
 
423
 
 
424
                buf = trx_doublewrite->write_buf;
 
425
        } else {
 
426
                goto leave_func;
 
427
        }
 
428
 
 
429
        if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
 
430
            != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
 
431
 
 
432
                /* We are upgrading from a version < 4.1.x to a version where
 
433
                multiple tablespaces are supported. We must reset the space id
 
434
                field in the pages in the doublewrite buffer because starting
 
435
                from this version the space id is stored to
 
436
                FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
 
437
 
 
438
                trx_doublewrite_must_reset_space_ids = TRUE;
 
439
 
 
440
                fprintf(stderr,
 
441
                        "InnoDB: Resetting space id's in the"
 
442
                        " doublewrite buffer\n");
 
443
        } else {
 
444
                trx_sys_multiple_tablespace_format = TRUE;
 
445
        }
 
446
 
 
447
        /* Read the pages from the doublewrite buffer to memory */
 
448
 
 
449
        fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
 
450
               TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
 
451
               buf, NULL);
 
452
        fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
 
453
               TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
 
454
               buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
 
455
               NULL);
 
456
        /* Check if any of these pages is half-written in data files, in the
 
457
        intended position */
 
458
 
 
459
        page = buf;
 
460
 
 
461
        for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
 
462
 
 
463
                page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
 
464
 
 
465
                if (trx_doublewrite_must_reset_space_ids) {
 
466
 
 
467
                        space_id = 0;
 
468
                        mach_write_to_4(page
 
469
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0);
 
470
                        /* We do not need to calculate new checksums for the
 
471
                        pages because the field .._SPACE_ID does not affect
 
472
                        them. Write the page back to where we read it from. */
 
473
 
 
474
                        if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 
475
                                source_page_no = block1 + i;
 
476
                        } else {
 
477
                                source_page_no = block2
 
478
                                        + i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
 
479
                        }
 
480
 
 
481
                        fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
 
482
                               UNIV_PAGE_SIZE, page, NULL);
 
483
                        /* printf("Resetting space id in page %lu\n",
 
484
                        source_page_no); */
 
485
                } else {
 
486
                        space_id = mach_read_from_4(
 
487
                                page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
488
                }
 
489
 
 
490
                if (!restore_corrupt_pages) {
 
491
                        /* The database was shut down gracefully: no need to
 
492
                        restore pages */
 
493
 
 
494
                } else if (!fil_tablespace_exists_in_mem(space_id)) {
 
495
                        /* Maybe we have dropped the single-table tablespace
 
496
                        and this page once belonged to it: do nothing */
 
497
 
 
498
                } else if (!fil_check_adress_in_tablespace(space_id,
 
499
                                                           page_no)) {
 
500
                        fprintf(stderr,
 
501
                                "InnoDB: Warning: a page in the"
 
502
                                " doublewrite buffer is not within space\n"
 
503
                                "InnoDB: bounds; space id %lu"
 
504
                                " page number %lu, page %lu in"
 
505
                                " doublewrite buf.\n",
 
506
                                (ulong) space_id, (ulong) page_no, (ulong) i);
 
507
 
 
508
                } else if (space_id == TRX_SYS_SPACE
 
509
                           && ((page_no >= block1
 
510
                                && page_no
 
511
                                < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
 
512
                               || (page_no >= block2
 
513
                                   && page_no
 
514
                                   < (block2
 
515
                                      + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) {
 
516
 
 
517
                        /* It is an unwritten doublewrite buffer page:
 
518
                        do nothing */
 
519
                } else {
 
520
                        ulint   zip_size = fil_space_get_zip_size(space_id);
 
521
 
 
522
                        /* Read in the actual page from the file */
 
523
                        fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
 
524
                               page_no, 0,
 
525
                               zip_size ? zip_size : UNIV_PAGE_SIZE,
 
526
                               read_buf, NULL);
 
527
 
 
528
                        /* Check if the page is corrupt */
 
529
 
 
530
                        if (UNIV_UNLIKELY
 
531
                            (buf_page_is_corrupted(read_buf, zip_size))) {
 
532
 
 
533
                                fprintf(stderr,
 
534
                                        "InnoDB: Warning: database page"
 
535
                                        " corruption or a failed\n"
 
536
                                        "InnoDB: file read of"
 
537
                                        " space %lu page %lu.\n"
 
538
                                        "InnoDB: Trying to recover it from"
 
539
                                        " the doublewrite buffer.\n",
 
540
                                        (ulong) space_id, (ulong) page_no);
 
541
 
 
542
                                if (buf_page_is_corrupted(page, zip_size)) {
 
543
                                        fprintf(stderr,
 
544
                                                "InnoDB: Dump of the page:\n");
 
545
                                        buf_page_print(read_buf, zip_size);
 
546
                                        fprintf(stderr,
 
547
                                                "InnoDB: Dump of"
 
548
                                                " corresponding page"
 
549
                                                " in doublewrite buffer:\n");
 
550
                                        buf_page_print(page, zip_size);
 
551
 
 
552
                                        fprintf(stderr,
 
553
                                                "InnoDB: Also the page in the"
 
554
                                                " doublewrite buffer"
 
555
                                                " is corrupt.\n"
 
556
                                                "InnoDB: Cannot continue"
 
557
                                                " operation.\n"
 
558
                                                "InnoDB: You can try to"
 
559
                                                " recover the database"
 
560
                                                " with the my.cnf\n"
 
561
                                                "InnoDB: option:\n"
 
562
                                                "InnoDB: set-variable="
 
563
                                                "innodb_force_recovery=6\n");
 
564
                                        exit(1);
 
565
                                }
 
566
 
 
567
                                /* Write the good page from the
 
568
                                doublewrite buffer to the intended
 
569
                                position */
 
570
 
 
571
                                fil_io(OS_FILE_WRITE, TRUE, space_id,
 
572
                                       zip_size, page_no, 0,
 
573
                                       zip_size ? zip_size : UNIV_PAGE_SIZE,
 
574
                                       page, NULL);
 
575
                                fprintf(stderr,
 
576
                                        "InnoDB: Recovered the page from"
 
577
                                        " the doublewrite buffer.\n");
 
578
                        }
 
579
                }
 
580
 
 
581
                page += UNIV_PAGE_SIZE;
 
582
        }
 
583
 
 
584
        fil_flush_file_spaces(FIL_TABLESPACE);
 
585
 
 
586
leave_func:
 
587
        ut_free(unaligned_read_buf);
 
588
}
 
589
 
 
590
/********************************************************************
 
591
Checks that trx is in the trx list. */
 
592
UNIV_INTERN
 
593
ibool
 
594
trx_in_trx_list(
 
595
/*============*/
 
596
                        /* out: TRUE if is in */
 
597
        trx_t*  in_trx) /* in: trx */
 
598
{
 
599
        trx_t*  trx;
 
600
 
 
601
        ut_ad(mutex_own(&(kernel_mutex)));
 
602
 
 
603
        trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
 
604
 
 
605
        while (trx != NULL) {
 
606
 
 
607
                if (trx == in_trx) {
 
608
 
 
609
                        return(TRUE);
 
610
                }
 
611
 
 
612
                trx = UT_LIST_GET_NEXT(trx_list, trx);
 
613
        }
 
614
 
 
615
        return(FALSE);
 
616
}
 
617
 
 
618
/*********************************************************************
 
619
Writes the value of max_trx_id to the file based trx system header. */
 
620
UNIV_INTERN
 
621
void
 
622
trx_sys_flush_max_trx_id(void)
 
623
/*==========================*/
 
624
{
 
625
        trx_sysf_t*     sys_header;
 
626
        mtr_t           mtr;
 
627
 
 
628
        ut_ad(mutex_own(&kernel_mutex));
 
629
 
 
630
        mtr_start(&mtr);
 
631
 
 
632
        sys_header = trx_sysf_get(&mtr);
 
633
 
 
634
        mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
 
635
                          trx_sys->max_trx_id, &mtr);
 
636
        mtr_commit(&mtr);
 
637
}
 
638
 
 
639
/*********************************************************************
 
640
Updates the offset information about the end of the MySQL binlog entry
 
641
which corresponds to the transaction just being committed. In a MySQL
 
642
replication slave updates the latest master binlog position up to which
 
643
replication has proceeded. */
 
644
UNIV_INTERN
 
645
void
 
646
trx_sys_update_mysql_binlog_offset(
 
647
/*===============================*/
 
648
        const char*     file_name,/* in: MySQL log file name */
 
649
        ib_int64_t      offset, /* in: position in that log file */
 
650
        ulint           field,  /* in: offset of the MySQL log info field in
 
651
                                the trx sys header */
 
652
        mtr_t*          mtr)    /* in: mtr */
 
653
{
 
654
        trx_sysf_t*     sys_header;
 
655
 
 
656
        if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
 
657
 
 
658
                /* We cannot fit the name to the 512 bytes we have reserved */
 
659
 
 
660
                return;
 
661
        }
 
662
 
 
663
        sys_header = trx_sysf_get(mtr);
 
664
 
 
665
        if (mach_read_from_4(sys_header + field
 
666
                             + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
 
667
            != TRX_SYS_MYSQL_LOG_MAGIC_N) {
 
668
 
 
669
                mlog_write_ulint(sys_header + field
 
670
                                 + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
 
671
                                 TRX_SYS_MYSQL_LOG_MAGIC_N,
 
672
                                 MLOG_4BYTES, mtr);
 
673
        }
 
674
 
 
675
        if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
 
676
                        file_name)) {
 
677
 
 
678
                mlog_write_string(sys_header + field
 
679
                                  + TRX_SYS_MYSQL_LOG_NAME,
 
680
                                  (byte*) file_name, 1 + ut_strlen(file_name),
 
681
                                  mtr);
 
682
        }
 
683
 
 
684
        if (mach_read_from_4(sys_header + field
 
685
                             + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
 
686
            || (offset >> 32) > 0) {
 
687
 
 
688
                mlog_write_ulint(sys_header + field
 
689
                                 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
 
690
                                 (ulint)(offset >> 32),
 
691
                                 MLOG_4BYTES, mtr);
 
692
        }
 
693
 
 
694
        mlog_write_ulint(sys_header + field
 
695
                         + TRX_SYS_MYSQL_LOG_OFFSET_LOW,
 
696
                         (ulint)(offset & 0xFFFFFFFFUL),
 
697
                         MLOG_4BYTES, mtr);
 
698
}
 
699
 
 
700
#ifdef UNIV_HOTBACKUP
 
701
/*********************************************************************
 
702
Prints to stderr the MySQL binlog info in the system header if the
 
703
magic number shows it valid. */
 
704
UNIV_INTERN
 
705
void
 
706
trx_sys_print_mysql_binlog_offset_from_page(
 
707
/*========================================*/
 
708
        const byte*     page)   /* in: buffer containing the trx
 
709
                                system header page, i.e., page number
 
710
                                TRX_SYS_PAGE_NO in the tablespace */
 
711
{
 
712
        const trx_sysf_t*       sys_header;
 
713
 
 
714
        sys_header = page + TRX_SYS;
 
715
 
 
716
        if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
 
717
                             + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
 
718
            == TRX_SYS_MYSQL_LOG_MAGIC_N) {
 
719
 
 
720
                fprintf(stderr,
 
721
                        "ibbackup: Last MySQL binlog file position %lu %lu,"
 
722
                        " file name %s\n",
 
723
                        (ulong) mach_read_from_4(
 
724
                                sys_header + TRX_SYS_MYSQL_LOG_INFO
 
725
                                + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
 
726
                        (ulong) mach_read_from_4(
 
727
                                sys_header + TRX_SYS_MYSQL_LOG_INFO
 
728
                                + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
 
729
                        sys_header + TRX_SYS_MYSQL_LOG_INFO
 
730
                        + TRX_SYS_MYSQL_LOG_NAME);
 
731
        }
 
732
}
 
733
#endif /* UNIV_HOTBACKUP */
 
734
 
 
735
/*********************************************************************
 
736
Stores the MySQL binlog offset info in the trx system header if
 
737
the magic number shows it valid, and print the info to stderr */
 
738
UNIV_INTERN
 
739
void
 
740
trx_sys_print_mysql_binlog_offset(void)
 
741
/*===================================*/
 
742
{
 
743
        trx_sysf_t*     sys_header;
 
744
        mtr_t           mtr;
 
745
        ulint           trx_sys_mysql_bin_log_pos_high;
 
746
        ulint           trx_sys_mysql_bin_log_pos_low;
 
747
 
 
748
        mtr_start(&mtr);
 
749
 
 
750
        sys_header = trx_sysf_get(&mtr);
 
751
 
 
752
        if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
 
753
                             + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
 
754
            != TRX_SYS_MYSQL_LOG_MAGIC_N) {
 
755
 
 
756
                mtr_commit(&mtr);
 
757
 
 
758
                return;
 
759
        }
 
760
 
 
761
        trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
 
762
                sys_header + TRX_SYS_MYSQL_LOG_INFO
 
763
                + TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
 
764
        trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
 
765
                sys_header + TRX_SYS_MYSQL_LOG_INFO
 
766
                + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
 
767
 
 
768
        trx_sys_mysql_bin_log_pos
 
769
                = (((ib_int64_t)trx_sys_mysql_bin_log_pos_high) << 32)
 
770
                + (ib_int64_t)trx_sys_mysql_bin_log_pos_low;
 
771
 
 
772
        ut_memcpy(trx_sys_mysql_bin_log_name,
 
773
                  sys_header + TRX_SYS_MYSQL_LOG_INFO
 
774
                  + TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
 
775
 
 
776
        fprintf(stderr,
 
777
                "InnoDB: Last MySQL binlog file position %lu %lu,"
 
778
                " file name %s\n",
 
779
                trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
 
780
                trx_sys_mysql_bin_log_name);
 
781
 
 
782
        mtr_commit(&mtr);
 
783
}
 
784
 
 
785
/*********************************************************************
 
786
Prints to stderr the MySQL master log offset info in the trx system header if
 
787
the magic number shows it valid. */
 
788
UNIV_INTERN
 
789
void
 
790
trx_sys_print_mysql_master_log_pos(void)
 
791
/*====================================*/
 
792
{
 
793
        trx_sysf_t*     sys_header;
 
794
        mtr_t           mtr;
 
795
 
 
796
        mtr_start(&mtr);
 
797
 
 
798
        sys_header = trx_sysf_get(&mtr);
 
799
 
 
800
        if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
801
                             + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
 
802
            != TRX_SYS_MYSQL_LOG_MAGIC_N) {
 
803
 
 
804
                mtr_commit(&mtr);
 
805
 
 
806
                return;
 
807
        }
 
808
 
 
809
        fprintf(stderr,
 
810
                "InnoDB: In a MySQL replication slave the last"
 
811
                " master binlog file\n"
 
812
                "InnoDB: position %lu %lu, file name %s\n",
 
813
                (ulong) mach_read_from_4(sys_header
 
814
                                         + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
815
                                         + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
 
816
                (ulong) mach_read_from_4(sys_header
 
817
                                         + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
818
                                         + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
 
819
                sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
820
                + TRX_SYS_MYSQL_LOG_NAME);
 
821
        /* Copy the master log position info to global variables we can
 
822
        use in ha_innobase.cc to initialize glob_mi to right values */
 
823
 
 
824
        ut_memcpy(trx_sys_mysql_master_log_name,
 
825
                  sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
826
                  + TRX_SYS_MYSQL_LOG_NAME,
 
827
                  TRX_SYS_MYSQL_LOG_NAME_LEN);
 
828
 
 
829
        trx_sys_mysql_master_log_pos
 
830
                = (((ib_int64_t) mach_read_from_4(
 
831
                            sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
832
                            + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
 
833
                + ((ib_int64_t) mach_read_from_4(
 
834
                           sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
835
                           + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
 
836
        mtr_commit(&mtr);
 
837
}
 
838
 
 
839
/********************************************************************
 
840
Looks for a free slot for a rollback segment in the trx system file copy. */
 
841
UNIV_INTERN
 
842
ulint
 
843
trx_sysf_rseg_find_free(
 
844
/*====================*/
 
845
                        /* out: slot index or ULINT_UNDEFINED if not found */
 
846
        mtr_t*  mtr)    /* in: mtr */
 
847
{
 
848
        trx_sysf_t*     sys_header;
 
849
        ulint           page_no;
 
850
        ulint           i;
 
851
 
 
852
        ut_ad(mutex_own(&(kernel_mutex)));
 
853
 
 
854
        sys_header = trx_sysf_get(mtr);
 
855
 
 
856
        for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
 
857
 
 
858
                page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
 
859
 
 
860
                if (page_no == FIL_NULL) {
 
861
 
 
862
                        return(i);
 
863
                }
 
864
        }
 
865
 
 
866
        return(ULINT_UNDEFINED);
 
867
}
 
868
 
 
869
/*********************************************************************
 
870
Creates the file page for the transaction system. This function is called only
 
871
at the database creation, before trx_sys_init. */
 
872
static
 
873
void
 
874
trx_sysf_create(
 
875
/*============*/
 
876
        mtr_t*  mtr)    /* in: mtr */
 
877
{
 
878
        trx_sysf_t*     sys_header;
 
879
        ulint           slot_no;
 
880
        buf_block_t*    block;
 
881
        page_t*         page;
 
882
        ulint           page_no;
 
883
        ulint           i;
 
884
 
 
885
        ut_ad(mtr);
 
886
 
 
887
        /* Note that below we first reserve the file space x-latch, and
 
888
        then enter the kernel: we must do it in this order to conform
 
889
        to the latching order rules. */
 
890
 
 
891
        mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
 
892
        mutex_enter(&kernel_mutex);
 
893
 
 
894
        /* Create the trx sys file block in a new allocated file segment */
 
895
        block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
 
896
                            mtr);
 
897
        buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
 
898
 
 
899
        ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
 
900
 
 
901
        page = buf_block_get_frame(block);
 
902
 
 
903
        mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
 
904
                         MLOG_2BYTES, mtr);
 
905
 
 
906
        /* Reset the doublewrite buffer magic number to zero so that we
 
907
        know that the doublewrite buffer has not yet been created (this
 
908
        suppresses a Valgrind warning) */
 
909
 
 
910
        mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
 
911
                         + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
 
912
 
 
913
        sys_header = trx_sysf_get(mtr);
 
914
 
 
915
        /* Start counting transaction ids from number 1 up */
 
916
        mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
 
917
                          ut_dulint_create(0, 1), mtr);
 
918
 
 
919
        /* Reset the rollback segment slots */
 
920
        for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
 
921
 
 
922
                trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
 
923
                trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
 
924
        }
 
925
 
 
926
        /* The remaining area (up to the page trailer) is uninitialized.
 
927
        Silence Valgrind warnings about it. */
 
928
        UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
 
929
                                     + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
 
930
                                     + TRX_SYS_RSEG_SPACE),
 
931
                       (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
 
932
                        - (TRX_SYS_RSEGS
 
933
                           + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
 
934
                           + TRX_SYS_RSEG_SPACE))
 
935
                       + page - sys_header);
 
936
 
 
937
        /* Create the first rollback segment in the SYSTEM tablespace */
 
938
        page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, &slot_no,
 
939
                                         mtr);
 
940
        ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
 
941
        ut_a(page_no != FIL_NULL);
 
942
 
 
943
        mutex_exit(&kernel_mutex);
 
944
}
 
945
 
 
946
/*********************************************************************
 
947
Creates and initializes the central memory structures for the transaction
 
948
system. This is called when the database is started. */
 
949
UNIV_INTERN
 
950
void
 
951
trx_sys_init_at_db_start(void)
 
952
/*==========================*/
 
953
{
 
954
        trx_sysf_t*     sys_header;
 
955
        ib_int64_t      rows_to_undo    = 0;
 
956
        const char*     unit            = "";
 
957
        trx_t*          trx;
 
958
        mtr_t           mtr;
 
959
 
 
960
        mtr_start(&mtr);
 
961
 
 
962
        ut_ad(trx_sys == NULL);
 
963
 
 
964
        mutex_enter(&kernel_mutex);
 
965
 
 
966
        trx_sys = mem_alloc(sizeof(trx_sys_t));
 
967
 
 
968
        sys_header = trx_sysf_get(&mtr);
 
969
 
 
970
        trx_rseg_list_and_array_init(sys_header, &mtr);
 
971
 
 
972
        trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
 
973
 
 
974
        /* VERY important: after the database is started, max_trx_id value is
 
975
        divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
 
976
        trx_sys_get_new_trx_id will evaluate to TRUE when the function
 
977
        is first time called, and the value for trx id will be written
 
978
        to the disk-based header! Thus trx id values will not overlap when
 
979
        the database is repeatedly started! */
 
980
 
 
981
        trx_sys->max_trx_id = ut_dulint_add(
 
982
                ut_dulint_align_up(mtr_read_dulint(
 
983
                                           sys_header
 
984
                                           + TRX_SYS_TRX_ID_STORE, &mtr),
 
985
                                   TRX_SYS_TRX_ID_WRITE_MARGIN),
 
986
                2 * TRX_SYS_TRX_ID_WRITE_MARGIN);
 
987
 
 
988
        UT_LIST_INIT(trx_sys->mysql_trx_list);
 
989
        trx_dummy_sess = sess_open();
 
990
        trx_lists_init_at_db_start();
 
991
 
 
992
        if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
 
993
                trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
 
994
 
 
995
                for (;;) {
 
996
 
 
997
                        if ( trx->conc_state != TRX_PREPARED) {
 
998
                                rows_to_undo += ut_conv_dulint_to_longlong(
 
999
                                        trx->undo_no);
 
1000
                        }
 
1001
 
 
1002
                        trx = UT_LIST_GET_NEXT(trx_list, trx);
 
1003
 
 
1004
                        if (!trx) {
 
1005
                                break;
 
1006
                        }
 
1007
                }
 
1008
 
 
1009
                if (rows_to_undo > 1000000000) {
 
1010
                        unit = "M";
 
1011
                        rows_to_undo = rows_to_undo / 1000000;
 
1012
                }
 
1013
 
 
1014
                fprintf(stderr,
 
1015
                        "InnoDB: %lu transaction(s) which must be"
 
1016
                        " rolled back or cleaned up\n"
 
1017
                        "InnoDB: in total %lu%s row operations to undo\n",
 
1018
                        (ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
 
1019
                        (ulong) rows_to_undo, unit);
 
1020
 
 
1021
                fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
 
1022
                        TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
 
1023
        }
 
1024
 
 
1025
        UT_LIST_INIT(trx_sys->view_list);
 
1026
 
 
1027
        trx_purge_sys_create();
 
1028
 
 
1029
        mutex_exit(&kernel_mutex);
 
1030
 
 
1031
        mtr_commit(&mtr);
 
1032
}
 
1033
 
 
1034
/*********************************************************************
 
1035
Creates and initializes the transaction system at the database creation. */
 
1036
UNIV_INTERN
 
1037
void
 
1038
trx_sys_create(void)
 
1039
/*================*/
 
1040
{
 
1041
        mtr_t   mtr;
 
1042
 
 
1043
        mtr_start(&mtr);
 
1044
 
 
1045
        trx_sysf_create(&mtr);
 
1046
 
 
1047
        mtr_commit(&mtr);
 
1048
 
 
1049
        trx_sys_init_at_db_start();
 
1050
}
 
1051
 
 
1052
/*********************************************************************
 
1053
Update the file format tag. */
 
1054
static
 
1055
ibool
 
1056
trx_sys_file_format_max_write(
 
1057
/*==========================*/
 
1058
                                        /* out: always TRUE */
 
1059
        ulint           format_id,      /* in: file format id */
 
1060
        const char**    name)           /* out: max file format name, can
 
1061
                                        be NULL */
 
1062
{
 
1063
        mtr_t           mtr;
 
1064
        byte*           ptr;
 
1065
        buf_block_t*    block;
 
1066
        ulint           tag_value_low;
 
1067
 
 
1068
        mtr_start(&mtr);
 
1069
 
 
1070
        block = buf_page_get(
 
1071
                TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
 
1072
 
 
1073
        file_format_max.id = format_id;
 
1074
        file_format_max.name = trx_sys_file_format_id_to_name(format_id);
 
1075
 
 
1076
        ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
 
1077
        tag_value_low = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
 
1078
 
 
1079
        if (name) {
 
1080
                *name = file_format_max.name;
 
1081
        }
 
1082
 
 
1083
        mlog_write_dulint(
 
1084
                ptr,
 
1085
                ut_dulint_create(TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH,
 
1086
                                 tag_value_low),
 
1087
                &mtr);
 
1088
 
 
1089
        mtr_commit(&mtr);
 
1090
 
 
1091
        return(TRUE);
 
1092
}
 
1093
 
 
1094
/*********************************************************************
 
1095
Read the file format tag. */
 
1096
static
 
1097
ulint
 
1098
trx_sys_file_format_max_read(void)
 
1099
/*==============================*/
 
1100
                                /* out: the file format or
 
1101
                                ULINT_UNDEFINED if not set. */
 
1102
{
 
1103
        mtr_t                   mtr;
 
1104
        const byte*             ptr;
 
1105
        const buf_block_t*      block;
 
1106
        ulint                   format_id;
 
1107
        dulint                  file_format_id;
 
1108
 
 
1109
        /* Since this is called during the startup phase it's safe to
 
1110
        read the value without a covering mutex. */
 
1111
        mtr_start(&mtr);
 
1112
 
 
1113
        block = buf_page_get(
 
1114
                TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
 
1115
 
 
1116
        ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
 
1117
        file_format_id = mach_read_from_8(ptr);
 
1118
 
 
1119
        mtr_commit(&mtr);
 
1120
 
 
1121
        format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
 
1122
 
 
1123
        if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH
 
1124
            || format_id >= FILE_FORMAT_NAME_N) {
 
1125
 
 
1126
                /* Either it has never been tagged, or garbage in it. */
 
1127
                return(ULINT_UNDEFINED);
 
1128
        }
 
1129
 
 
1130
        return(format_id);
 
1131
}
 
1132
 
 
1133
/*********************************************************************
 
1134
Get the name representation of the file format from its id. */
 
1135
UNIV_INTERN
 
1136
const char*
 
1137
trx_sys_file_format_id_to_name(
 
1138
/*===========================*/
 
1139
                                /* out: pointer to the name */
 
1140
        const ulint     id)     /* in: id of the file format */
 
1141
{
 
1142
        ut_a(id < FILE_FORMAT_NAME_N);
 
1143
 
 
1144
        return(file_format_name_map[id]);
 
1145
}
 
1146
 
 
1147
/*********************************************************************
 
1148
Check for the max file format tag stored on disk. Note: If max_format_id
 
1149
is == DICT_TF_FORMAT_MAX + 1 then we only print a warning. */
 
1150
UNIV_INTERN
 
1151
ulint
 
1152
trx_sys_file_format_max_check(
 
1153
/*==========================*/
 
1154
                                /* out: DB_SUCCESS or error code */
 
1155
        ulint   max_format_id)  /* in: max format id to check */
 
1156
{
 
1157
        ulint   format_id;
 
1158
 
 
1159
        /* Check the file format in the tablespace. Do not try to
 
1160
        recover if the file format is not supported by the engine
 
1161
        unless forced by the user. */
 
1162
        format_id = trx_sys_file_format_max_read();
 
1163
        if (format_id == ULINT_UNDEFINED) {
 
1164
                /* Format ID was not set. Set it to minimum possible
 
1165
                value. */
 
1166
                format_id = DICT_TF_FORMAT_51;
 
1167
        }
 
1168
 
 
1169
        ut_print_timestamp(stderr);
 
1170
        fprintf(stderr,
 
1171
                "  InnoDB: highest supported file format is %s.\n",
 
1172
                trx_sys_file_format_id_to_name(DICT_TF_FORMAT_MAX));
 
1173
 
 
1174
        if (format_id > DICT_TF_FORMAT_MAX) {
 
1175
 
 
1176
                ut_a(format_id < FILE_FORMAT_NAME_N);
 
1177
 
 
1178
                ut_print_timestamp(stderr);
 
1179
                fprintf(stderr,
 
1180
                        "  InnoDB: %s: the system tablespace is in a file "
 
1181
                        "format that this version doesn't support - %s\n",
 
1182
                        ((max_format_id <= DICT_TF_FORMAT_MAX)
 
1183
                                ? "Error" : "Warning"),
 
1184
                        trx_sys_file_format_id_to_name(format_id));
 
1185
 
 
1186
                if (max_format_id <= DICT_TF_FORMAT_MAX) {
 
1187
                        return(DB_ERROR);
 
1188
                }
 
1189
        }
 
1190
 
 
1191
        format_id = (format_id > max_format_id) ? format_id : max_format_id;
 
1192
 
 
1193
        /* We don't need a mutex here, as this function should only
 
1194
        be called once at start up. */
 
1195
        file_format_max.id = format_id;
 
1196
        file_format_max.name = trx_sys_file_format_id_to_name(format_id);
 
1197
 
 
1198
        return(DB_SUCCESS);
 
1199
}
 
1200
 
 
1201
/*********************************************************************
 
1202
Set the file format id unconditionally except if it's already the
 
1203
same value. */
 
1204
UNIV_INTERN
 
1205
ibool
 
1206
trx_sys_file_format_max_set(
 
1207
/*========================*/
 
1208
                                        /* out: TRUE if value updated */
 
1209
        ulint           format_id,      /* in: file format id */
 
1210
        const char**    name)           /* out: max file format name or
 
1211
                                        NULL if not needed. */
 
1212
{
 
1213
        ibool           ret = FALSE;
 
1214
 
 
1215
        ut_a(format_id <= DICT_TF_FORMAT_MAX);
 
1216
 
 
1217
        mutex_enter(&file_format_max.mutex);
 
1218
 
 
1219
        /* Only update if not already same value. */
 
1220
        if (format_id != file_format_max.id) {
 
1221
 
 
1222
                ret = trx_sys_file_format_max_write(format_id, name);
 
1223
        }
 
1224
 
 
1225
        mutex_exit(&file_format_max.mutex);
 
1226
 
 
1227
        return(ret);
 
1228
}
 
1229
 
 
1230
/************************************************************************
 
1231
Tags the system table space with minimum format id if it has not been
 
1232
tagged yet.
 
1233
WARNING: This function is only called during the startup and AFTER the
 
1234
redo log application during recovery has finished. */
 
1235
UNIV_INTERN
 
1236
void
 
1237
trx_sys_file_format_tag_init(void)
 
1238
/*==============================*/
 
1239
{
 
1240
        ulint   format_id;
 
1241
 
 
1242
        format_id = trx_sys_file_format_max_read();
 
1243
 
 
1244
        /* If format_id is not set then set it to the minimum. */
 
1245
        if (format_id == ULINT_UNDEFINED) {
 
1246
                trx_sys_file_format_max_set(DICT_TF_FORMAT_51, NULL);
 
1247
        }
 
1248
}
 
1249
 
 
1250
/************************************************************************
 
1251
Update the file format tag in the system tablespace only if the given
 
1252
format id is greater than the known max id. */
 
1253
UNIV_INTERN
 
1254
ibool
 
1255
trx_sys_file_format_max_upgrade(
 
1256
/*============================*/
 
1257
                                        /* out: TRUE if format_id was
 
1258
                                        bigger than the known max id */
 
1259
        const char**    name,           /* out: max file format name */
 
1260
        ulint           format_id)      /* in: file format identifier */
 
1261
{
 
1262
        ibool           ret = FALSE;
 
1263
 
 
1264
        ut_a(name);
 
1265
        ut_a(file_format_max.name != NULL);
 
1266
        ut_a(format_id <= DICT_TF_FORMAT_MAX);
 
1267
 
 
1268
        mutex_enter(&file_format_max.mutex);
 
1269
 
 
1270
        if (format_id > file_format_max.id) {
 
1271
 
 
1272
                ret = trx_sys_file_format_max_write(format_id, name);
 
1273
        }
 
1274
 
 
1275
        mutex_exit(&file_format_max.mutex);
 
1276
 
 
1277
        return(ret);
 
1278
}
 
1279
 
 
1280
/*********************************************************************
 
1281
Get the name representation of the file format from its id. */
 
1282
UNIV_INTERN
 
1283
const char*
 
1284
trx_sys_file_format_max_get(void)
 
1285
/*=============================*/
 
1286
                                /* out: pointer to the max format name */
 
1287
{
 
1288
        return(file_format_max.name);
 
1289
}
 
1290
 
 
1291
/*********************************************************************
 
1292
Initializes the tablespace tag system. */
 
1293
UNIV_INTERN
 
1294
void
 
1295
trx_sys_file_format_init(void)
 
1296
/*==========================*/
 
1297
{
 
1298
        mutex_create(&file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
 
1299
 
 
1300
        /* We don't need a mutex here, as this function should only
 
1301
        be called once at start up. */
 
1302
        file_format_max.id = DICT_TF_FORMAT_51;
 
1303
 
 
1304
        file_format_max.name = trx_sys_file_format_id_to_name(
 
1305
                file_format_max.id);
 
1306
}
 
1307
 
 
1308
/*********************************************************************
 
1309
Closes the tablespace tag system. */
 
1310
UNIV_INTERN
 
1311
void
 
1312
trx_sys_file_format_close(void)
 
1313
/*===========================*/
 
1314
{
 
1315
        /* Does nothing at the moment */
 
1316
}