~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/trx/trx0sys.c

  • Committer: Brian Aker
  • Date: 2010-12-18 18:24:57 UTC
  • mfrom: (1999.6.3 trunk)
  • Revision ID: brian@tangent.org-20101218182457-yi1wd0so2hml1k1w
Merge in Lee's copyright header fix

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/******************************************************
2
 
Transaction system
3
 
 
4
 
(c) 1996 Innobase Oy
5
 
 
6
 
Created 3/26/1996 Heikki Tuuri
7
 
*******************************************************/
8
 
 
9
 
#include "trx0sys.h"
10
 
 
11
 
#ifdef UNIV_NONINL
12
 
#include "trx0sys.ic"
13
 
#endif
14
 
 
15
 
#include "fsp0fsp.h"
16
 
#include "mtr0mtr.h"
17
 
#include "trx0trx.h"
18
 
#include "trx0rseg.h"
19
 
#include "trx0undo.h"
20
 
#include "srv0srv.h"
21
 
#include "trx0purge.h"
22
 
#include "log0log.h"
23
 
#include "os0file.h"
24
 
 
25
 
/* The transaction system */
26
 
trx_sys_t*              trx_sys         = NULL;
27
 
trx_doublewrite_t*      trx_doublewrite = NULL;
28
 
 
29
 
/* The following is set to TRUE when we are upgrading from the old format data
30
 
files to the new >= 4.1.x format multiple tablespaces format data files */
31
 
 
32
 
ibool                   trx_doublewrite_must_reset_space_ids    = FALSE;
33
 
 
34
 
/* The following is TRUE when we are using the database in the new format,
35
 
i.e., we have successfully upgraded, or have created a new database
36
 
installation */
37
 
 
38
 
ibool                   trx_sys_multiple_tablespace_format      = FALSE;
39
 
 
40
 
/* In a MySQL replication slave, in crash recovery we store the master log
41
 
file name and position here. We have successfully got the updates to InnoDB
42
 
up to this position. If .._pos is -1, it means no crash recovery was needed,
43
 
or there was no master log position info inside InnoDB. */
44
 
 
45
 
char            trx_sys_mysql_master_log_name[TRX_SYS_DRIZZLE_LOG_NAME_LEN];
46
 
ib_longlong     trx_sys_mysql_master_log_pos    = -1;
47
 
 
48
 
/* If this MySQL server uses binary logging, after InnoDB has been inited
49
 
and if it has done a crash recovery, we store the binlog file name and position
50
 
here. If .._pos is -1, it means there was no binlog position info inside
51
 
InnoDB. */
52
 
 
53
 
char            trx_sys_mysql_bin_log_name[TRX_SYS_DRIZZLE_LOG_NAME_LEN];
54
 
ib_longlong     trx_sys_mysql_bin_log_pos       = -1;
55
 
 
56
 
 
57
 
/********************************************************************
58
 
Determines if a page number is located inside the doublewrite buffer. */
59
 
 
60
 
ibool
61
 
trx_doublewrite_page_inside(
62
 
/*========================*/
63
 
                                /* out: TRUE if the location is inside
64
 
                                the two blocks of the doublewrite buffer */
65
 
        ulint   page_no)        /* in: page number */
66
 
{
67
 
        if (trx_doublewrite == NULL) {
68
 
 
69
 
                return(FALSE);
70
 
        }
71
 
 
72
 
        if (page_no >= trx_doublewrite->block1
73
 
            && page_no < trx_doublewrite->block1
74
 
            + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
75
 
                return(TRUE);
76
 
        }
77
 
 
78
 
        if (page_no >= trx_doublewrite->block2
79
 
            && page_no < trx_doublewrite->block2
80
 
            + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
81
 
                return(TRUE);
82
 
        }
83
 
 
84
 
        return(FALSE);
85
 
}
86
 
 
87
 
/********************************************************************
88
 
Creates or initialializes the doublewrite buffer at a database start. */
89
 
static
90
 
void
91
 
trx_doublewrite_init(
92
 
/*=================*/
93
 
        byte*   doublewrite)    /* in: pointer to the doublewrite buf
94
 
                                header on trx sys page */
95
 
{
96
 
        trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
97
 
 
98
 
        /* Since we now start to use the doublewrite buffer, no need to call
99
 
        fsync() after every write to a data file */
100
 
#ifdef UNIV_DO_FLUSH
101
 
        os_do_not_call_flush_at_each_write = TRUE;
102
 
#endif /* UNIV_DO_FLUSH */
103
 
 
104
 
        mutex_create(&trx_doublewrite->mutex, SYNC_DOUBLEWRITE);
105
 
 
106
 
        trx_doublewrite->first_free = 0;
107
 
 
108
 
        trx_doublewrite->block1 = mach_read_from_4(
109
 
                doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
110
 
        trx_doublewrite->block2 = mach_read_from_4(
111
 
                doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
112
 
        trx_doublewrite->write_buf_unaligned = ut_malloc(
113
 
                (1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE);
114
 
 
115
 
        trx_doublewrite->write_buf = ut_align(
116
 
                trx_doublewrite->write_buf_unaligned, UNIV_PAGE_SIZE);
117
 
        trx_doublewrite->buf_block_arr = mem_alloc(
118
 
                2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*));
119
 
}
120
 
 
121
 
/********************************************************************
122
 
Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
123
 
multiple tablespace format. */
124
 
 
125
 
void
126
 
trx_sys_mark_upgraded_to_multiple_tablespaces(void)
127
 
/*===============================================*/
128
 
{
129
 
        page_t* page;
130
 
        byte*   doublewrite;
131
 
        mtr_t   mtr;
132
 
 
133
 
        /* We upgraded to 4.1.x and reset the space id fields in the
134
 
        doublewrite buffer. Let us mark to the trx_sys header that the upgrade
135
 
        has been done. */
136
 
 
137
 
        mtr_start(&mtr);
138
 
 
139
 
        page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
140
 
#ifdef UNIV_SYNC_DEBUG
141
 
        buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
142
 
#endif /* UNIV_SYNC_DEBUG */
143
 
 
144
 
        doublewrite = page + TRX_SYS_DOUBLEWRITE;
145
 
 
146
 
        mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
147
 
                         TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
148
 
                         MLOG_4BYTES, &mtr);
149
 
        mtr_commit(&mtr);
150
 
 
151
 
        /* Flush the modified pages to disk and make a checkpoint */
152
 
        log_make_checkpoint_at(ut_dulint_max, TRUE);
153
 
 
154
 
        trx_sys_multiple_tablespace_format = TRUE;
155
 
}
156
 
 
157
 
/********************************************************************
158
 
Creates the doublewrite buffer to a new InnoDB installation. The header of the
159
 
doublewrite buffer is placed on the trx system header page. */
160
 
 
161
 
void
162
 
trx_sys_create_doublewrite_buf(void)
163
 
/*================================*/
164
 
{
165
 
        page_t* page;
166
 
        page_t* page2;
167
 
        page_t* new_page;
168
 
        byte*   doublewrite;
169
 
        byte*   fseg_header;
170
 
        ulint   page_no;
171
 
        ulint   prev_page_no;
172
 
        ulint   i;
173
 
        mtr_t   mtr;
174
 
 
175
 
        if (trx_doublewrite) {
176
 
                /* Already inited */
177
 
 
178
 
                return;
179
 
        }
180
 
 
181
 
start_again:
182
 
        mtr_start(&mtr);
183
 
 
184
 
        page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
185
 
#ifdef UNIV_SYNC_DEBUG
186
 
        buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
187
 
#endif /* UNIV_SYNC_DEBUG */
188
 
 
189
 
        doublewrite = page + TRX_SYS_DOUBLEWRITE;
190
 
 
191
 
        if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
192
 
            == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
193
 
                /* The doublewrite buffer has already been created:
194
 
                just read in some numbers */
195
 
 
196
 
                trx_doublewrite_init(doublewrite);
197
 
 
198
 
                mtr_commit(&mtr);
199
 
        } else {
200
 
                fprintf(stderr,
201
 
                        "InnoDB: Doublewrite buffer not found:"
202
 
                        " creating new\n");
203
 
 
204
 
                if (buf_pool_get_curr_size()
205
 
                    < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
206
 
                        + FSP_EXTENT_SIZE / 2 + 100)
207
 
                       * UNIV_PAGE_SIZE)) {
208
 
                        fprintf(stderr,
209
 
                                "InnoDB: Cannot create doublewrite buffer:"
210
 
                                " you must\n"
211
 
                                "InnoDB: increase your buffer pool size.\n"
212
 
                                "InnoDB: Cannot continue operation.\n");
213
 
 
214
 
                        exit(1);
215
 
                }
216
 
 
217
 
                page2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
218
 
                                    TRX_SYS_DOUBLEWRITE
219
 
                                    + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
220
 
 
221
 
                /* fseg_create acquires a second latch on the page,
222
 
                therefore we must declare it: */
223
 
 
224
 
#ifdef UNIV_SYNC_DEBUG
225
 
                buf_page_dbg_add_level(page2, SYNC_NO_ORDER_CHECK);
226
 
#endif /* UNIV_SYNC_DEBUG */
227
 
 
228
 
                if (page2 == NULL) {
229
 
                        fprintf(stderr,
230
 
                                "InnoDB: Cannot create doublewrite buffer:"
231
 
                                " you must\n"
232
 
                                "InnoDB: increase your tablespace size.\n"
233
 
                                "InnoDB: Cannot continue operation.\n");
234
 
 
235
 
                        /* We exit without committing the mtr to prevent
236
 
                        its modifications to the database getting to disk */
237
 
 
238
 
                        exit(1);
239
 
                }
240
 
 
241
 
                fseg_header = page + TRX_SYS_DOUBLEWRITE
242
 
                        + TRX_SYS_DOUBLEWRITE_FSEG;
243
 
                prev_page_no = 0;
244
 
 
245
 
                for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
246
 
                             + FSP_EXTENT_SIZE / 2; i++) {
247
 
                        page_no = fseg_alloc_free_page(fseg_header,
248
 
                                                       prev_page_no + 1,
249
 
                                                       FSP_UP, &mtr);
250
 
                        if (page_no == FIL_NULL) {
251
 
                                fprintf(stderr,
252
 
                                        "InnoDB: Cannot create doublewrite"
253
 
                                        " buffer: you must\n"
254
 
                                        "InnoDB: increase your"
255
 
                                        " tablespace size.\n"
256
 
                                        "InnoDB: Cannot continue operation.\n"
257
 
                                        );
258
 
 
259
 
                                exit(1);
260
 
                        }
261
 
 
262
 
                        /* We read the allocated pages to the buffer pool;
263
 
                        when they are written to disk in a flush, the space
264
 
                        id and page number fields are also written to the
265
 
                        pages. When we at database startup read pages
266
 
                        from the doublewrite buffer, we know that if the
267
 
                        space id and page number in them are the same as
268
 
                        the page position in the tablespace, then the page
269
 
                        has not been written to in doublewrite. */
270
 
 
271
 
                        new_page = buf_page_get(TRX_SYS_SPACE, page_no,
272
 
                                                RW_X_LATCH, &mtr);
273
 
#ifdef UNIV_SYNC_DEBUG
274
 
                        buf_page_dbg_add_level(new_page, SYNC_NO_ORDER_CHECK);
275
 
#endif /* UNIV_SYNC_DEBUG */
276
 
 
277
 
                        /* Make a dummy change to the page to ensure it will
278
 
                        be written to disk in a flush */
279
 
 
280
 
                        mlog_write_ulint(new_page + FIL_PAGE_DATA,
281
 
                                         TRX_SYS_DOUBLEWRITE_MAGIC_N,
282
 
                                         MLOG_4BYTES, &mtr);
283
 
 
284
 
                        if (i == FSP_EXTENT_SIZE / 2) {
285
 
                                mlog_write_ulint(doublewrite
286
 
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK1,
287
 
                                                 page_no, MLOG_4BYTES, &mtr);
288
 
                                mlog_write_ulint(doublewrite
289
 
                                                 + TRX_SYS_DOUBLEWRITE_REPEAT
290
 
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK1,
291
 
                                                 page_no, MLOG_4BYTES, &mtr);
292
 
                        } else if (i == FSP_EXTENT_SIZE / 2
293
 
                                   + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
294
 
                                mlog_write_ulint(doublewrite
295
 
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK2,
296
 
                                                 page_no, MLOG_4BYTES, &mtr);
297
 
                                mlog_write_ulint(doublewrite
298
 
                                                 + TRX_SYS_DOUBLEWRITE_REPEAT
299
 
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK2,
300
 
                                                 page_no, MLOG_4BYTES, &mtr);
301
 
                        } else if (i > FSP_EXTENT_SIZE / 2) {
302
 
                                ut_a(page_no == prev_page_no + 1);
303
 
                        }
304
 
 
305
 
                        prev_page_no = page_no;
306
 
                }
307
 
 
308
 
                mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
309
 
                                 TRX_SYS_DOUBLEWRITE_MAGIC_N,
310
 
                                 MLOG_4BYTES, &mtr);
311
 
                mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
312
 
                                 + TRX_SYS_DOUBLEWRITE_REPEAT,
313
 
                                 TRX_SYS_DOUBLEWRITE_MAGIC_N,
314
 
                                 MLOG_4BYTES, &mtr);
315
 
 
316
 
                mlog_write_ulint(doublewrite
317
 
                                 + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
318
 
                                 TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
319
 
                                 MLOG_4BYTES, &mtr);
320
 
                mtr_commit(&mtr);
321
 
 
322
 
                /* Flush the modified pages to disk and make a checkpoint */
323
 
                log_make_checkpoint_at(ut_dulint_max, TRUE);
324
 
 
325
 
                fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
326
 
 
327
 
                trx_sys_multiple_tablespace_format = TRUE;
328
 
 
329
 
                goto start_again;
330
 
        }
331
 
}
332
 
 
333
 
/********************************************************************
334
 
At a database startup initializes the doublewrite buffer memory structure if
335
 
we already have a doublewrite buffer created in the data files. If we are
336
 
upgrading to an InnoDB version which supports multiple tablespaces, then this
337
 
function performs the necessary update operations. If we are in a crash
338
 
recovery, this function uses a possible doublewrite buffer to restore
339
 
half-written pages in the data files. */
340
 
 
341
 
void
342
 
trx_sys_doublewrite_init_or_restore_pages(
343
 
/*======================================*/
344
 
        ibool   restore_corrupt_pages)
345
 
{
346
 
        byte*   buf;
347
 
        byte*   read_buf;
348
 
        byte*   unaligned_read_buf;
349
 
        ulint   block1;
350
 
        ulint   block2;
351
 
        ulint   source_page_no;
352
 
        byte*   page;
353
 
        byte*   doublewrite;
354
 
        ulint   space_id;
355
 
        ulint   page_no;
356
 
        ulint   i;
357
 
 
358
 
        /* We do the file i/o past the buffer pool */
359
 
 
360
 
        unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
361
 
        read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE);
362
 
 
363
 
        /* Read the trx sys header to check if we are using the doublewrite
364
 
        buffer */
365
 
 
366
 
        fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, TRX_SYS_PAGE_NO, 0,
367
 
               UNIV_PAGE_SIZE, read_buf, NULL);
368
 
        doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
369
 
 
370
 
        if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
371
 
            == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
372
 
                /* The doublewrite buffer has been created */
373
 
 
374
 
                trx_doublewrite_init(doublewrite);
375
 
 
376
 
                block1 = trx_doublewrite->block1;
377
 
                block2 = trx_doublewrite->block2;
378
 
 
379
 
                buf = trx_doublewrite->write_buf;
380
 
        } else {
381
 
                goto leave_func;
382
 
        }
383
 
 
384
 
        if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
385
 
            != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
386
 
 
387
 
                /* We are upgrading from a version < 4.1.x to a version where
388
 
                multiple tablespaces are supported. We must reset the space id
389
 
                field in the pages in the doublewrite buffer because starting
390
 
                from this version the space id is stored to
391
 
                FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
392
 
 
393
 
                trx_doublewrite_must_reset_space_ids = TRUE;
394
 
 
395
 
                fprintf(stderr,
396
 
                        "InnoDB: Resetting space id's in the"
397
 
                        " doublewrite buffer\n");
398
 
        } else {
399
 
                trx_sys_multiple_tablespace_format = TRUE;
400
 
        }
401
 
 
402
 
        /* Read the pages from the doublewrite buffer to memory */
403
 
 
404
 
        fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block1, 0,
405
 
               TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
406
 
               buf, NULL);
407
 
        fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block2, 0,
408
 
               TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
409
 
               buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
410
 
               NULL);
411
 
        /* Check if any of these pages is half-written in data files, in the
412
 
        intended position */
413
 
 
414
 
        page = buf;
415
 
 
416
 
        for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
417
 
 
418
 
                page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
419
 
 
420
 
                if (trx_doublewrite_must_reset_space_ids) {
421
 
 
422
 
                        space_id = 0;
423
 
                        mach_write_to_4(page
424
 
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0);
425
 
                        /* We do not need to calculate new checksums for the
426
 
                        pages because the field .._SPACE_ID does not affect
427
 
                        them. Write the page back to where we read it from. */
428
 
 
429
 
                        if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
430
 
                                source_page_no = block1 + i;
431
 
                        } else {
432
 
                                source_page_no = block2
433
 
                                        + i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
434
 
                        }
435
 
 
436
 
                        fil_io(OS_FILE_WRITE, TRUE, 0, source_page_no, 0,
437
 
                               UNIV_PAGE_SIZE, page, NULL);
438
 
                        /* printf("Resetting space id in page %lu\n",
439
 
                        source_page_no); */
440
 
                } else {
441
 
                        space_id = mach_read_from_4(
442
 
                                page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
443
 
                }
444
 
 
445
 
                if (!restore_corrupt_pages) {
446
 
                        /* The database was shut down gracefully: no need to
447
 
                        restore pages */
448
 
 
449
 
                } else if (!fil_tablespace_exists_in_mem(space_id)) {
450
 
                        /* Maybe we have dropped the single-table tablespace
451
 
                        and this page once belonged to it: do nothing */
452
 
 
453
 
                } else if (!fil_check_adress_in_tablespace(space_id,
454
 
                                                           page_no)) {
455
 
                        fprintf(stderr,
456
 
                                "InnoDB: Warning: a page in the"
457
 
                                " doublewrite buffer is not within space\n"
458
 
                                "InnoDB: bounds; space id %lu"
459
 
                                " page number %lu, page %lu in"
460
 
                                " doublewrite buf.\n",
461
 
                                (ulong) space_id, (ulong) page_no, (ulong) i);
462
 
 
463
 
                } else if (space_id == TRX_SYS_SPACE
464
 
                           && ((page_no >= block1
465
 
                                && page_no
466
 
                                < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
467
 
                               || (page_no >= block2
468
 
                                   && page_no
469
 
                                   < (block2
470
 
                                      + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) {
471
 
 
472
 
                        /* It is an unwritten doublewrite buffer page:
473
 
                        do nothing */
474
 
                } else {
475
 
                        /* Read in the actual page from the data files */
476
 
 
477
 
                        fil_io(OS_FILE_READ, TRUE, space_id, page_no, 0,
478
 
                               UNIV_PAGE_SIZE, read_buf, NULL);
479
 
                        /* Check if the page is corrupt */
480
 
 
481
 
                        if (buf_page_is_corrupted(read_buf)) {
482
 
 
483
 
                                fprintf(stderr,
484
 
                                        "InnoDB: Warning: database page"
485
 
                                        " corruption or a failed\n"
486
 
                                        "InnoDB: file read of page %lu.\n",
487
 
                                        (ulong) page_no);
488
 
                                fprintf(stderr,
489
 
                                        "InnoDB: Trying to recover it from"
490
 
                                        " the doublewrite buffer.\n");
491
 
 
492
 
                                if (buf_page_is_corrupted(page)) {
493
 
                                        fprintf(stderr,
494
 
                                                "InnoDB: Dump of the page:\n");
495
 
                                        buf_page_print(read_buf);
496
 
                                        fprintf(stderr,
497
 
                                                "InnoDB: Dump of"
498
 
                                                " corresponding page"
499
 
                                                " in doublewrite buffer:\n");
500
 
                                        buf_page_print(page);
501
 
 
502
 
                                        fprintf(stderr,
503
 
                                                "InnoDB: Also the page in the"
504
 
                                                " doublewrite buffer"
505
 
                                                " is corrupt.\n"
506
 
                                                "InnoDB: Cannot continue"
507
 
                                                " operation.\n"
508
 
                                                "InnoDB: You can try to"
509
 
                                                " recover the database"
510
 
                                                " with the my.cnf\n"
511
 
                                                "InnoDB: option:\n"
512
 
                                                "InnoDB: set-variable="
513
 
                                                "innodb_force_recovery=6\n");
514
 
                                        exit(1);
515
 
                                }
516
 
 
517
 
                                /* Write the good page from the
518
 
                                doublewrite buffer to the intended
519
 
                                position */
520
 
 
521
 
                                fil_io(OS_FILE_WRITE, TRUE, space_id,
522
 
                                       page_no, 0,
523
 
                                       UNIV_PAGE_SIZE, page, NULL);
524
 
                                fprintf(stderr,
525
 
                                        "InnoDB: Recovered the page from"
526
 
                                        " the doublewrite buffer.\n");
527
 
                        }
528
 
                }
529
 
 
530
 
                page += UNIV_PAGE_SIZE;
531
 
        }
532
 
 
533
 
        fil_flush_file_spaces(FIL_TABLESPACE);
534
 
 
535
 
leave_func:
536
 
        ut_free(unaligned_read_buf);
537
 
}
538
 
 
539
 
/********************************************************************
540
 
Checks that trx is in the trx list. */
541
 
 
542
 
ibool
543
 
trx_in_trx_list(
544
 
/*============*/
545
 
                        /* out: TRUE if is in */
546
 
        trx_t*  in_trx) /* in: trx */
547
 
{
548
 
        trx_t*  trx;
549
 
 
550
 
        ut_ad(mutex_own(&(kernel_mutex)));
551
 
 
552
 
        trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
553
 
 
554
 
        while (trx != NULL) {
555
 
 
556
 
                if (trx == in_trx) {
557
 
 
558
 
                        return(TRUE);
559
 
                }
560
 
 
561
 
                trx = UT_LIST_GET_NEXT(trx_list, trx);
562
 
        }
563
 
 
564
 
        return(FALSE);
565
 
}
566
 
 
567
 
/*********************************************************************
568
 
Writes the value of max_trx_id to the file based trx system header. */
569
 
 
570
 
void
571
 
trx_sys_flush_max_trx_id(void)
572
 
/*==========================*/
573
 
{
574
 
        trx_sysf_t*     sys_header;
575
 
        mtr_t           mtr;
576
 
 
577
 
        ut_ad(mutex_own(&kernel_mutex));
578
 
 
579
 
        mtr_start(&mtr);
580
 
 
581
 
        sys_header = trx_sysf_get(&mtr);
582
 
 
583
 
        mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
584
 
                          trx_sys->max_trx_id, &mtr);
585
 
        mtr_commit(&mtr);
586
 
}
587
 
 
588
 
/*********************************************************************
589
 
Updates the offset information about the end of the MySQL binlog entry
590
 
which corresponds to the transaction just being committed. In a MySQL
591
 
replication slave updates the latest master binlog position up to which
592
 
replication has proceeded. */
593
 
 
594
 
void
595
 
trx_sys_update_mysql_binlog_offset(
596
 
/*===============================*/
597
 
        const char*     file_name,/* in: MySQL log file name */
598
 
        ib_longlong     offset, /* in: position in that log file */
599
 
        ulint           field,  /* in: offset of the MySQL log info field in
600
 
                                the trx sys header */
601
 
        mtr_t*          mtr)    /* in: mtr */
602
 
{
603
 
        trx_sysf_t*     sys_header;
604
 
 
605
 
        if (ut_strlen(file_name) >= TRX_SYS_DRIZZLE_LOG_NAME_LEN) {
606
 
 
607
 
                /* We cannot fit the name to the 512 bytes we have reserved */
608
 
 
609
 
                return;
610
 
        }
611
 
 
612
 
        sys_header = trx_sysf_get(mtr);
613
 
 
614
 
        if (mach_read_from_4(sys_header + field
615
 
                             + TRX_SYS_DRIZZLE_LOG_MAGIC_N_FLD)
616
 
            != TRX_SYS_DRIZZLE_LOG_MAGIC_N) {
617
 
 
618
 
                mlog_write_ulint(sys_header + field
619
 
                                 + TRX_SYS_DRIZZLE_LOG_MAGIC_N_FLD,
620
 
                                 TRX_SYS_DRIZZLE_LOG_MAGIC_N,
621
 
                                 MLOG_4BYTES, mtr);
622
 
        }
623
 
 
624
 
        if (0 != strcmp((char*) (sys_header + field + TRX_SYS_DRIZZLE_LOG_NAME),
625
 
                        file_name)) {
626
 
 
627
 
                mlog_write_string(sys_header + field
628
 
                                  + TRX_SYS_DRIZZLE_LOG_NAME,
629
 
                                  (byte*) file_name, 1 + ut_strlen(file_name),
630
 
                                  mtr);
631
 
        }
632
 
 
633
 
        if (mach_read_from_4(sys_header + field
634
 
                             + TRX_SYS_DRIZZLE_LOG_OFFSET_HIGH) > 0
635
 
            || (offset >> 32) > 0) {
636
 
 
637
 
                mlog_write_ulint(sys_header + field
638
 
                                 + TRX_SYS_DRIZZLE_LOG_OFFSET_HIGH,
639
 
                                 (ulint)(offset >> 32),
640
 
                                 MLOG_4BYTES, mtr);
641
 
        }
642
 
 
643
 
        mlog_write_ulint(sys_header + field
644
 
                         + TRX_SYS_DRIZZLE_LOG_OFFSET_LOW,
645
 
                         (ulint)(offset & 0xFFFFFFFFUL),
646
 
                         MLOG_4BYTES, mtr);
647
 
}
648
 
 
649
 
#ifdef UNIV_HOTBACKUP
650
 
/*********************************************************************
651
 
Prints to stderr the MySQL binlog info in the system header if the
652
 
magic number shows it valid. */
653
 
 
654
 
void
655
 
trx_sys_print_mysql_binlog_offset_from_page(
656
 
/*========================================*/
657
 
        byte*   page)   /* in: buffer containing the trx system header page,
658
 
                        i.e., page number TRX_SYS_PAGE_NO in the tablespace */
659
 
{
660
 
        trx_sysf_t*     sys_header;
661
 
 
662
 
        sys_header = page + TRX_SYS;
663
 
 
664
 
        if (mach_read_from_4(sys_header + TRX_SYS_DRIZZLE_LOG_INFO
665
 
                             + TRX_SYS_DRIZZLE_LOG_MAGIC_N_FLD)
666
 
            == TRX_SYS_DRIZZLE_LOG_MAGIC_N) {
667
 
 
668
 
                fprintf(stderr,
669
 
                        "ibbackup: Last MySQL binlog file position %lu %lu,"
670
 
                        " file name %s\n",
671
 
                        (ulong) mach_read_from_4(
672
 
                                sys_header + TRX_SYS_DRIZZLE_LOG_INFO
673
 
                                + TRX_SYS_DRIZZLE_LOG_OFFSET_HIGH),
674
 
                        (ulong) mach_read_from_4(
675
 
                                sys_header + TRX_SYS_DRIZZLE_LOG_INFO
676
 
                                + TRX_SYS_DRIZZLE_LOG_OFFSET_LOW),
677
 
                        sys_header + TRX_SYS_DRIZZLE_LOG_INFO
678
 
                        + TRX_SYS_DRIZZLE_LOG_NAME);
679
 
        }
680
 
}
681
 
#endif /* UNIV_HOTBACKUP */
682
 
 
683
 
/*********************************************************************
684
 
Stores the MySQL binlog offset info in the trx system header if
685
 
the magic number shows it valid, and print the info to stderr */
686
 
 
687
 
void
688
 
trx_sys_print_mysql_binlog_offset(void)
689
 
/*===================================*/
690
 
{
691
 
        trx_sysf_t*     sys_header;
692
 
        mtr_t           mtr;
693
 
        ulint           trx_sys_mysql_bin_log_pos_high;
694
 
        ulint           trx_sys_mysql_bin_log_pos_low;
695
 
 
696
 
        mtr_start(&mtr);
697
 
 
698
 
        sys_header = trx_sysf_get(&mtr);
699
 
 
700
 
        if (mach_read_from_4(sys_header + TRX_SYS_DRIZZLE_LOG_INFO
701
 
                             + TRX_SYS_DRIZZLE_LOG_MAGIC_N_FLD)
702
 
            != TRX_SYS_DRIZZLE_LOG_MAGIC_N) {
703
 
 
704
 
                mtr_commit(&mtr);
705
 
 
706
 
                return;
707
 
        }
708
 
 
709
 
        trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
710
 
                sys_header + TRX_SYS_DRIZZLE_LOG_INFO
711
 
                + TRX_SYS_DRIZZLE_LOG_OFFSET_HIGH);
712
 
        trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
713
 
                sys_header + TRX_SYS_DRIZZLE_LOG_INFO
714
 
                + TRX_SYS_DRIZZLE_LOG_OFFSET_LOW);
715
 
 
716
 
        trx_sys_mysql_bin_log_pos
717
 
                = (((ib_longlong)trx_sys_mysql_bin_log_pos_high) << 32)
718
 
                + (ib_longlong)trx_sys_mysql_bin_log_pos_low;
719
 
 
720
 
        ut_memcpy(trx_sys_mysql_bin_log_name,
721
 
                  sys_header + TRX_SYS_DRIZZLE_LOG_INFO
722
 
                  + TRX_SYS_DRIZZLE_LOG_NAME, TRX_SYS_DRIZZLE_LOG_NAME_LEN);
723
 
 
724
 
        fprintf(stderr,
725
 
                "InnoDB: Last MySQL binlog file position %lu %lu,"
726
 
                " file name %s\n",
727
 
                trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
728
 
                trx_sys_mysql_bin_log_name);
729
 
 
730
 
        mtr_commit(&mtr);
731
 
}
732
 
 
733
 
/*********************************************************************
734
 
Prints to stderr the MySQL master log offset info in the trx system header if
735
 
the magic number shows it valid. */
736
 
 
737
 
void
738
 
trx_sys_print_mysql_master_log_pos(void)
739
 
/*====================================*/
740
 
{
741
 
        trx_sysf_t*     sys_header;
742
 
        mtr_t           mtr;
743
 
 
744
 
        mtr_start(&mtr);
745
 
 
746
 
        sys_header = trx_sysf_get(&mtr);
747
 
 
748
 
        if (mach_read_from_4(sys_header + TRX_SYS_DRIZZLE_MASTER_LOG_INFO
749
 
                             + TRX_SYS_DRIZZLE_LOG_MAGIC_N_FLD)
750
 
            != TRX_SYS_DRIZZLE_LOG_MAGIC_N) {
751
 
 
752
 
                mtr_commit(&mtr);
753
 
 
754
 
                return;
755
 
        }
756
 
 
757
 
        fprintf(stderr,
758
 
                "InnoDB: In a MySQL replication slave the last"
759
 
                " master binlog file\n"
760
 
                "InnoDB: position %lu %lu, file name %s\n",
761
 
                (ulong) mach_read_from_4(sys_header
762
 
                                         + TRX_SYS_DRIZZLE_MASTER_LOG_INFO
763
 
                                         + TRX_SYS_DRIZZLE_LOG_OFFSET_HIGH),
764
 
                (ulong) mach_read_from_4(sys_header
765
 
                                         + TRX_SYS_DRIZZLE_MASTER_LOG_INFO
766
 
                                         + TRX_SYS_DRIZZLE_LOG_OFFSET_LOW),
767
 
                sys_header + TRX_SYS_DRIZZLE_MASTER_LOG_INFO
768
 
                + TRX_SYS_DRIZZLE_LOG_NAME);
769
 
        /* Copy the master log position info to global variables we can
770
 
        use in ha_innobase.cc to initialize glob_mi to right values */
771
 
 
772
 
        ut_memcpy(trx_sys_mysql_master_log_name,
773
 
                  sys_header + TRX_SYS_DRIZZLE_MASTER_LOG_INFO
774
 
                  + TRX_SYS_DRIZZLE_LOG_NAME,
775
 
                  TRX_SYS_DRIZZLE_LOG_NAME_LEN);
776
 
 
777
 
        trx_sys_mysql_master_log_pos
778
 
                = (((ib_longlong) mach_read_from_4(
779
 
                            sys_header + TRX_SYS_DRIZZLE_MASTER_LOG_INFO
780
 
                            + TRX_SYS_DRIZZLE_LOG_OFFSET_HIGH)) << 32)
781
 
                + ((ib_longlong) mach_read_from_4(
782
 
                           sys_header + TRX_SYS_DRIZZLE_MASTER_LOG_INFO
783
 
                           + TRX_SYS_DRIZZLE_LOG_OFFSET_LOW));
784
 
        mtr_commit(&mtr);
785
 
}
786
 
 
787
 
/********************************************************************
788
 
Looks for a free slot for a rollback segment in the trx system file copy. */
789
 
 
790
 
ulint
791
 
trx_sysf_rseg_find_free(
792
 
/*====================*/
793
 
                        /* out: slot index or ULINT_UNDEFINED if not found */
794
 
        mtr_t*  mtr)    /* in: mtr */
795
 
{
796
 
        trx_sysf_t*     sys_header;
797
 
        ulint           page_no;
798
 
        ulint           i;
799
 
 
800
 
        ut_ad(mutex_own(&(kernel_mutex)));
801
 
 
802
 
        sys_header = trx_sysf_get(mtr);
803
 
 
804
 
        for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
805
 
 
806
 
                page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
807
 
 
808
 
                if (page_no == FIL_NULL) {
809
 
 
810
 
                        return(i);
811
 
                }
812
 
        }
813
 
 
814
 
        return(ULINT_UNDEFINED);
815
 
}
816
 
 
817
 
/*********************************************************************
818
 
Creates the file page for the transaction system. This function is called only
819
 
at the database creation, before trx_sys_init. */
820
 
static
821
 
void
822
 
trx_sysf_create(
823
 
/*============*/
824
 
        mtr_t*  mtr)    /* in: mtr */
825
 
{
826
 
        trx_sysf_t*     sys_header;
827
 
        ulint           slot_no;
828
 
        page_t*         page;
829
 
        ulint           page_no;
830
 
        ulint           i;
831
 
 
832
 
        ut_ad(mtr);
833
 
 
834
 
        /* Note that below we first reserve the file space x-latch, and
835
 
        then enter the kernel: we must do it in this order to conform
836
 
        to the latching order rules. */
837
 
 
838
 
        mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE), mtr);
839
 
        mutex_enter(&kernel_mutex);
840
 
 
841
 
        /* Create the trx sys file block in a new allocated file segment */
842
 
        page = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
843
 
                           mtr);
844
 
        ut_a(buf_frame_get_page_no(page) == TRX_SYS_PAGE_NO);
845
 
 
846
 
#ifdef UNIV_SYNC_DEBUG
847
 
        buf_page_dbg_add_level(page, SYNC_TRX_SYS_HEADER);
848
 
#endif /* UNIV_SYNC_DEBUG */
849
 
 
850
 
        mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
851
 
                         MLOG_2BYTES, mtr);
852
 
 
853
 
        /* Reset the doublewrite buffer magic number to zero so that we
854
 
        know that the doublewrite buffer has not yet been created (this
855
 
        suppresses a Valgrind warning) */
856
 
 
857
 
        mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
858
 
                         + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
859
 
 
860
 
        sys_header = trx_sysf_get(mtr);
861
 
 
862
 
        /* Start counting transaction ids from number 1 up */
863
 
        mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
864
 
                          ut_dulint_create(0, 1), mtr);
865
 
 
866
 
        /* Reset the rollback segment slots */
867
 
        for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
868
 
 
869
 
                trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
870
 
                trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
871
 
        }
872
 
 
873
 
        /* The remaining area (up to the page trailer) is uninitialized.
874
 
        Silence Valgrind warnings about it. */
875
 
        UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
876
 
                                     + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
877
 
                                     + TRX_SYS_RSEG_SPACE),
878
 
                       (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
879
 
                        - (TRX_SYS_RSEGS
880
 
                           + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
881
 
                           + TRX_SYS_RSEG_SPACE))
882
 
                       + page - sys_header);
883
 
 
884
 
        /* Create the first rollback segment in the SYSTEM tablespace */
885
 
        page_no = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no,
886
 
                                         mtr);
887
 
        ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
888
 
        ut_a(page_no != FIL_NULL);
889
 
 
890
 
        mutex_exit(&kernel_mutex);
891
 
}
892
 
 
893
 
/*********************************************************************
894
 
Creates and initializes the central memory structures for the transaction
895
 
system. This is called when the database is started. */
896
 
 
897
 
void
898
 
trx_sys_init_at_db_start(void)
899
 
/*==========================*/
900
 
{
901
 
        trx_sysf_t*     sys_header;
902
 
        ib_longlong     rows_to_undo    = 0;
903
 
        const char*     unit            = "";
904
 
        trx_t*          trx;
905
 
        mtr_t           mtr;
906
 
 
907
 
        mtr_start(&mtr);
908
 
 
909
 
        ut_ad(trx_sys == NULL);
910
 
 
911
 
        mutex_enter(&kernel_mutex);
912
 
 
913
 
        trx_sys = mem_alloc(sizeof(trx_sys_t));
914
 
 
915
 
        sys_header = trx_sysf_get(&mtr);
916
 
 
917
 
        trx_rseg_list_and_array_init(sys_header, &mtr);
918
 
 
919
 
        trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
920
 
 
921
 
        /* VERY important: after the database is started, max_trx_id value is
922
 
        divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
923
 
        trx_sys_get_new_trx_id will evaluate to TRUE when the function
924
 
        is first time called, and the value for trx id will be written
925
 
        to the disk-based header! Thus trx id values will not overlap when
926
 
        the database is repeatedly started! */
927
 
 
928
 
        trx_sys->max_trx_id = ut_dulint_add(
929
 
                ut_dulint_align_up(mtr_read_dulint(
930
 
                                           sys_header
931
 
                                           + TRX_SYS_TRX_ID_STORE, &mtr),
932
 
                                   TRX_SYS_TRX_ID_WRITE_MARGIN),
933
 
                2 * TRX_SYS_TRX_ID_WRITE_MARGIN);
934
 
 
935
 
        UT_LIST_INIT(trx_sys->mysql_trx_list);
936
 
        trx_lists_init_at_db_start();
937
 
 
938
 
        if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
939
 
                trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
940
 
 
941
 
                for (;;) {
942
 
 
943
 
                        if ( trx->conc_state != TRX_PREPARED) {
944
 
                                rows_to_undo += ut_conv_dulint_to_longlong(
945
 
                                        trx->undo_no);
946
 
                        }
947
 
 
948
 
                        trx = UT_LIST_GET_NEXT(trx_list, trx);
949
 
 
950
 
                        if (!trx) {
951
 
                                break;
952
 
                        }
953
 
                }
954
 
 
955
 
                if (rows_to_undo > 1000000000) {
956
 
                        unit = "M";
957
 
                        rows_to_undo = rows_to_undo / 1000000;
958
 
                }
959
 
 
960
 
                fprintf(stderr,
961
 
                        "InnoDB: %lu transaction(s) which must be"
962
 
                        " rolled back or cleaned up\n"
963
 
                        "InnoDB: in total %lu%s row operations to undo\n",
964
 
                        (ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
965
 
                        (ulong) rows_to_undo, unit);
966
 
 
967
 
                fprintf(stderr, "InnoDB: Trx id counter is %lu %lu\n",
968
 
                        (ulong) ut_dulint_get_high(trx_sys->max_trx_id),
969
 
                        (ulong) ut_dulint_get_low(trx_sys->max_trx_id));
970
 
        }
971
 
 
972
 
        UT_LIST_INIT(trx_sys->view_list);
973
 
 
974
 
        trx_purge_sys_create();
975
 
 
976
 
        mutex_exit(&kernel_mutex);
977
 
 
978
 
        mtr_commit(&mtr);
979
 
}
980
 
 
981
 
/*********************************************************************
982
 
Creates and initializes the transaction system at the database creation. */
983
 
 
984
 
void
985
 
trx_sys_create(void)
986
 
/*================*/
987
 
{
988
 
        mtr_t   mtr;
989
 
 
990
 
        mtr_start(&mtr);
991
 
 
992
 
        trx_sysf_create(&mtr);
993
 
 
994
 
        mtr_commit(&mtr);
995
 
 
996
 
        trx_sys_init_at_db_start();
997
 
}