~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/log/log0recv.c

  • Committer: Brian Aker
  • Date: 2008-07-08 16:17:31 UTC
  • Revision ID: brian@tangent.org-20080708161731-io36j7igglok79py
DATE cleanup.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/******************************************************
 
2
Recovery
 
3
 
 
4
(c) 1997 Innobase Oy
 
5
 
 
6
Created 9/20/1997 Heikki Tuuri
 
7
*******************************************************/
 
8
 
 
9
#include "log0recv.h"
 
10
 
 
11
#ifdef UNIV_NONINL
 
12
#include "log0recv.ic"
 
13
#endif
 
14
 
 
15
#include "mem0mem.h"
 
16
#include "buf0buf.h"
 
17
#include "buf0flu.h"
 
18
#include "buf0rea.h"
 
19
#include "srv0srv.h"
 
20
#include "srv0start.h"
 
21
#include "mtr0mtr.h"
 
22
#include "mtr0log.h"
 
23
#include "page0page.h"
 
24
#include "page0cur.h"
 
25
#include "btr0btr.h"
 
26
#include "btr0cur.h"
 
27
#include "ibuf0ibuf.h"
 
28
#include "trx0undo.h"
 
29
#include "trx0rec.h"
 
30
#include "trx0roll.h"
 
31
#include "btr0cur.h"
 
32
#include "btr0cur.h"
 
33
#include "btr0cur.h"
 
34
#include "dict0boot.h"
 
35
#include "fil0fil.h"
 
36
#include "sync0sync.h"
 
37
 
 
38
#ifdef UNIV_HOTBACKUP
 
39
/* This is set to FALSE if the backup was originally taken with the
 
40
ibbackup --include regexp option: then we do not want to create tables in
 
41
directories which were not included */
 
42
ibool   recv_replay_file_ops    = TRUE;
 
43
#endif /* UNIV_HOTBACKUP */
 
44
 
 
45
/* Log records are stored in the hash table in chunks at most of this size;
 
46
this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
 
47
#define RECV_DATA_BLOCK_SIZE    (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
 
48
 
 
49
/* Read-ahead area in applying log records to file pages */
 
50
#define RECV_READ_AHEAD_AREA    32
 
51
 
 
52
recv_sys_t*     recv_sys = NULL;
 
53
ibool           recv_recovery_on = FALSE;
 
54
ibool           recv_recovery_from_backup_on = FALSE;
 
55
 
 
56
ibool           recv_needed_recovery = FALSE;
 
57
 
 
58
ibool           recv_lsn_checks_on = FALSE;
 
59
 
 
60
/* There are two conditions under which we scan the logs, the first
 
61
is normal startup and the second is when we do a recovery from an
 
62
archive.
 
63
This flag is set if we are doing a scan from the last checkpoint during
 
64
startup. If we find log entries that were written after the last checkpoint
 
65
we know that the server was not cleanly shutdown. We must then initialize
 
66
the crash recovery environment before attempting to store these entries in
 
67
the log hash table. */
 
68
ibool   recv_log_scan_is_startup_type = FALSE;
 
69
 
 
70
/* If the following is TRUE, the buffer pool file pages must be invalidated
 
71
after recovery and no ibuf operations are allowed; this becomes TRUE if
 
72
the log record hash table becomes too full, and log records must be merged
 
73
to file pages already before the recovery is finished: in this case no
 
74
ibuf operations are allowed, as they could modify the pages read in the
 
75
buffer pool before the pages have been recovered to the up-to-date state */
 
76
 
 
77
/* Recovery is running and no operations on the log files are allowed
 
78
yet: the variable name is misleading */
 
79
 
 
80
ibool   recv_no_ibuf_operations = FALSE;
 
81
 
 
82
/* The following counter is used to decide when to print info on
 
83
log scan */
 
84
ulint   recv_scan_print_counter = 0;
 
85
 
 
86
ibool   recv_is_from_backup     = FALSE;
 
87
#ifdef UNIV_HOTBACKUP
 
88
ibool   recv_is_making_a_backup = FALSE;
 
89
#else
 
90
# define recv_is_making_a_backup FALSE
 
91
#endif /* UNIV_HOTBACKUP */
 
92
 
 
93
ulint   recv_previous_parsed_rec_type   = 999999;
 
94
ulint   recv_previous_parsed_rec_offset = 0;
 
95
ulint   recv_previous_parsed_rec_is_multi = 0;
 
96
 
 
97
ulint   recv_max_parsed_page_no         = 0;
 
98
 
 
99
/* This many frames must be left free in the buffer pool when we scan
 
100
the log and store the scanned log records in the buffer pool: we will
 
101
use these free frames to read in pages when we start applying the
 
102
log records to the database. */
 
103
 
 
104
ulint   recv_n_pool_free_frames         = 256;
 
105
 
 
106
/* The maximum lsn we see for a page during the recovery process. If this
 
107
is bigger than the lsn we are able to scan up to, that is an indication that
 
108
the recovery failed and the database may be corrupt. */
 
109
 
 
110
dulint  recv_max_page_lsn;
 
111
 
 
112
/* prototypes */
 
113
 
 
114
/***********************************************************
 
115
Initialize crash recovery environment. Can be called iff
 
116
recv_needed_recovery == FALSE. */
 
117
static
 
118
void
 
119
recv_init_crash_recovery(void);
 
120
/*===========================*/
 
121
 
 
122
/************************************************************
 
123
Creates the recovery system. */
 
124
 
 
125
void
 
126
recv_sys_create(void)
 
127
/*=================*/
 
128
{
 
129
        if (recv_sys != NULL) {
 
130
 
 
131
                return;
 
132
        }
 
133
 
 
134
        recv_sys = mem_alloc(sizeof(recv_sys_t));
 
135
 
 
136
        mutex_create(&recv_sys->mutex, SYNC_RECV);
 
137
 
 
138
        recv_sys->heap = NULL;
 
139
        recv_sys->addr_hash = NULL;
 
140
}
 
141
 
 
142
/************************************************************
 
143
Inits the recovery system for a recovery operation. */
 
144
 
 
145
void
 
146
recv_sys_init(
 
147
/*==========*/
 
148
        ibool   recover_from_backup,    /* in: TRUE if this is called
 
149
                                        to recover from a hot backup */
 
150
        ulint   available_memory)       /* in: available memory in bytes */
 
151
{
 
152
        if (recv_sys->heap != NULL) {
 
153
 
 
154
                return;
 
155
        }
 
156
 
 
157
        mutex_enter(&(recv_sys->mutex));
 
158
 
 
159
        if (!recover_from_backup) {
 
160
                recv_sys->heap = mem_heap_create_in_buffer(256);
 
161
        } else {
 
162
                recv_sys->heap = mem_heap_create(256);
 
163
                recv_is_from_backup = TRUE;
 
164
        }
 
165
 
 
166
        recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
 
167
        recv_sys->len = 0;
 
168
        recv_sys->recovered_offset = 0;
 
169
 
 
170
        recv_sys->addr_hash = hash_create(available_memory / 64);
 
171
        recv_sys->n_addrs = 0;
 
172
 
 
173
        recv_sys->apply_log_recs = FALSE;
 
174
        recv_sys->apply_batch_on = FALSE;
 
175
 
 
176
        recv_sys->last_block_buf_start = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
 
177
 
 
178
        recv_sys->last_block = ut_align(recv_sys->last_block_buf_start,
 
179
                                        OS_FILE_LOG_BLOCK_SIZE);
 
180
        recv_sys->found_corrupt_log = FALSE;
 
181
 
 
182
        recv_max_page_lsn = ut_dulint_zero;
 
183
 
 
184
        mutex_exit(&(recv_sys->mutex));
 
185
}
 
186
 
 
187
/************************************************************
 
188
Empties the hash table when it has been fully processed. */
 
189
static
 
190
void
 
191
recv_sys_empty_hash(void)
 
192
/*=====================*/
 
193
{
 
194
        ut_ad(mutex_own(&(recv_sys->mutex)));
 
195
 
 
196
        if (recv_sys->n_addrs != 0) {
 
197
                fprintf(stderr,
 
198
                        "InnoDB: Error: %lu pages with log records"
 
199
                        " were left unprocessed!\n"
 
200
                        "InnoDB: Maximum page number with"
 
201
                        " log records on it %lu\n",
 
202
                        (ulong) recv_sys->n_addrs,
 
203
                        (ulong) recv_max_parsed_page_no);
 
204
                ut_error;
 
205
        }
 
206
 
 
207
        hash_table_free(recv_sys->addr_hash);
 
208
        mem_heap_empty(recv_sys->heap);
 
209
 
 
210
        recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
 
211
}
 
212
 
 
213
#ifndef UNIV_LOG_DEBUG
 
214
/************************************************************
 
215
Frees the recovery system. */
 
216
static
 
217
void
 
218
recv_sys_free(void)
 
219
/*===============*/
 
220
{
 
221
        mutex_enter(&(recv_sys->mutex));
 
222
 
 
223
        hash_table_free(recv_sys->addr_hash);
 
224
        mem_heap_free(recv_sys->heap);
 
225
        ut_free(recv_sys->buf);
 
226
        mem_free(recv_sys->last_block_buf_start);
 
227
 
 
228
        recv_sys->addr_hash = NULL;
 
229
        recv_sys->heap = NULL;
 
230
 
 
231
        mutex_exit(&(recv_sys->mutex));
 
232
}
 
233
#endif /* UNIV_LOG_DEBUG */
 
234
 
 
235
/************************************************************
 
236
Truncates possible corrupted or extra records from a log group. */
 
237
static
 
238
void
 
239
recv_truncate_group(
 
240
/*================*/
 
241
        log_group_t*    group,          /* in: log group */
 
242
        dulint          recovered_lsn,  /* in: recovery succeeded up to this
 
243
                                        lsn */
 
244
        dulint          limit_lsn,      /* in: this was the limit for
 
245
                                        recovery */
 
246
        dulint          checkpoint_lsn, /* in: recovery was started from this
 
247
                                        checkpoint */
 
248
        dulint          archived_lsn)   /* in: the log has been archived up to
 
249
                                        this lsn */
 
250
{
 
251
        dulint  start_lsn;
 
252
        dulint  end_lsn;
 
253
        dulint  finish_lsn1;
 
254
        dulint  finish_lsn2;
 
255
        dulint  finish_lsn;
 
256
        ulint   len;
 
257
        ulint   i;
 
258
 
 
259
        if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
 
260
                /* Checkpoint was taken in the NOARCHIVELOG mode */
 
261
                archived_lsn = checkpoint_lsn;
 
262
        }
 
263
 
 
264
        finish_lsn1 = ut_dulint_add(ut_dulint_align_down(
 
265
                                            archived_lsn,
 
266
                                            OS_FILE_LOG_BLOCK_SIZE),
 
267
                                    log_group_get_capacity(group));
 
268
 
 
269
        finish_lsn2 = ut_dulint_add(ut_dulint_align_up(
 
270
                                            recovered_lsn,
 
271
                                            OS_FILE_LOG_BLOCK_SIZE),
 
272
                                    recv_sys->last_log_buf_size);
 
273
 
 
274
        if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
 
275
                /* We do not know how far we should erase log records: erase
 
276
                as much as possible */
 
277
 
 
278
                finish_lsn = finish_lsn1;
 
279
        } else {
 
280
                /* It is enough to erase the length of the log buffer */
 
281
                finish_lsn = ut_dulint_get_min(finish_lsn1, finish_lsn2);
 
282
        }
 
283
 
 
284
        ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
 
285
 
 
286
        /* Write the log buffer full of zeros */
 
287
        for (i = 0; i < RECV_SCAN_SIZE; i++) {
 
288
 
 
289
                *(log_sys->buf + i) = '\0';
 
290
        }
 
291
 
 
292
        start_lsn = ut_dulint_align_down(recovered_lsn,
 
293
                                         OS_FILE_LOG_BLOCK_SIZE);
 
294
 
 
295
        if (ut_dulint_cmp(start_lsn, recovered_lsn) != 0) {
 
296
                /* Copy the last incomplete log block to the log buffer and
 
297
                edit its data length: */
 
298
 
 
299
                ut_memcpy(log_sys->buf, recv_sys->last_block,
 
300
                          OS_FILE_LOG_BLOCK_SIZE);
 
301
                log_block_set_data_len(log_sys->buf, ut_dulint_minus(
 
302
                                               recovered_lsn, start_lsn));
 
303
        }
 
304
 
 
305
        if (ut_dulint_cmp(start_lsn, finish_lsn) >= 0) {
 
306
 
 
307
                return;
 
308
        }
 
309
 
 
310
        for (;;) {
 
311
                end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
 
312
 
 
313
                if (ut_dulint_cmp(end_lsn, finish_lsn) > 0) {
 
314
 
 
315
                        end_lsn = finish_lsn;
 
316
                }
 
317
 
 
318
                len = ut_dulint_minus(end_lsn, start_lsn);
 
319
 
 
320
                log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
 
321
                if (ut_dulint_cmp(end_lsn, finish_lsn) >= 0) {
 
322
 
 
323
                        return;
 
324
                }
 
325
 
 
326
                /* Write the log buffer full of zeros */
 
327
                for (i = 0; i < RECV_SCAN_SIZE; i++) {
 
328
 
 
329
                        *(log_sys->buf + i) = '\0';
 
330
                }
 
331
 
 
332
                start_lsn = end_lsn;
 
333
        }
 
334
}
 
335
 
 
336
/************************************************************
 
337
Copies the log segment between group->recovered_lsn and recovered_lsn from the
 
338
most up-to-date log group to group, so that it contains the latest log data. */
 
339
static
 
340
void
 
341
recv_copy_group(
 
342
/*============*/
 
343
        log_group_t*    up_to_date_group,       /* in: the most up-to-date log
 
344
                                                group */
 
345
        log_group_t*    group,                  /* in: copy to this log
 
346
                                                group */
 
347
        dulint          recovered_lsn)          /* in: recovery succeeded up
 
348
                                                to this lsn */
 
349
{
 
350
        dulint  start_lsn;
 
351
        dulint  end_lsn;
 
352
        ulint   len;
 
353
 
 
354
        if (ut_dulint_cmp(group->scanned_lsn, recovered_lsn) >= 0) {
 
355
 
 
356
                return;
 
357
        }
 
358
 
 
359
        ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
 
360
 
 
361
        start_lsn = ut_dulint_align_down(group->scanned_lsn,
 
362
                                         OS_FILE_LOG_BLOCK_SIZE);
 
363
        for (;;) {
 
364
                end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
 
365
 
 
366
                if (ut_dulint_cmp(end_lsn, recovered_lsn) > 0) {
 
367
                        end_lsn = ut_dulint_align_up(recovered_lsn,
 
368
                                                     OS_FILE_LOG_BLOCK_SIZE);
 
369
                }
 
370
 
 
371
                log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
 
372
                                       up_to_date_group, start_lsn, end_lsn);
 
373
 
 
374
                len = ut_dulint_minus(end_lsn, start_lsn);
 
375
 
 
376
                log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
 
377
 
 
378
                if (ut_dulint_cmp(end_lsn, recovered_lsn) >= 0) {
 
379
 
 
380
                        return;
 
381
                }
 
382
 
 
383
                start_lsn = end_lsn;
 
384
        }
 
385
}
 
386
 
 
387
/************************************************************
 
388
Copies a log segment from the most up-to-date log group to the other log
 
389
groups, so that they all contain the latest log data. Also writes the info
 
390
about the latest checkpoint to the groups, and inits the fields in the group
 
391
memory structs to up-to-date values. */
 
392
static
 
393
void
 
394
recv_synchronize_groups(
 
395
/*====================*/
 
396
        log_group_t*    up_to_date_group)       /* in: the most up-to-date
 
397
                                                log group */
 
398
{
 
399
        log_group_t*    group;
 
400
        dulint          start_lsn;
 
401
        dulint          end_lsn;
 
402
        dulint          recovered_lsn;
 
403
        dulint          limit_lsn;
 
404
 
 
405
        recovered_lsn = recv_sys->recovered_lsn;
 
406
        limit_lsn = recv_sys->limit_lsn;
 
407
 
 
408
        /* Read the last recovered log block to the recovery system buffer:
 
409
        the block is always incomplete */
 
410
 
 
411
        start_lsn = ut_dulint_align_down(recovered_lsn,
 
412
                                         OS_FILE_LOG_BLOCK_SIZE);
 
413
        end_lsn = ut_dulint_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
 
414
 
 
415
        ut_a(ut_dulint_cmp(start_lsn, end_lsn) != 0);
 
416
 
 
417
        log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
 
418
                               up_to_date_group, start_lsn, end_lsn);
 
419
 
 
420
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
421
 
 
422
        while (group) {
 
423
                if (group != up_to_date_group) {
 
424
 
 
425
                        /* Copy log data if needed */
 
426
 
 
427
                        recv_copy_group(group, up_to_date_group,
 
428
                                        recovered_lsn);
 
429
                }
 
430
 
 
431
                /* Update the fields in the group struct to correspond to
 
432
                recovered_lsn */
 
433
 
 
434
                log_group_set_fields(group, recovered_lsn);
 
435
 
 
436
                group = UT_LIST_GET_NEXT(log_groups, group);
 
437
        }
 
438
 
 
439
        /* Copy the checkpoint info to the groups; remember that we have
 
440
        incremented checkpoint_no by one, and the info will not be written
 
441
        over the max checkpoint info, thus making the preservation of max
 
442
        checkpoint info on disk certain */
 
443
 
 
444
        log_groups_write_checkpoint_info();
 
445
 
 
446
        mutex_exit(&(log_sys->mutex));
 
447
 
 
448
        /* Wait for the checkpoint write to complete */
 
449
        rw_lock_s_lock(&(log_sys->checkpoint_lock));
 
450
        rw_lock_s_unlock(&(log_sys->checkpoint_lock));
 
451
 
 
452
        mutex_enter(&(log_sys->mutex));
 
453
}
 
454
 
 
455
/***************************************************************************
 
456
Checks the consistency of the checkpoint info */
 
457
static
 
458
ibool
 
459
recv_check_cp_is_consistent(
 
460
/*========================*/
 
461
                        /* out: TRUE if ok */
 
462
        byte*   buf)    /* in: buffer containing checkpoint info */
 
463
{
 
464
        ulint   fold;
 
465
 
 
466
        fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
 
467
 
 
468
        if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
 
469
                    buf + LOG_CHECKPOINT_CHECKSUM_1)) {
 
470
                return(FALSE);
 
471
        }
 
472
 
 
473
        fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
 
474
                              LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
 
475
 
 
476
        if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
 
477
                    buf + LOG_CHECKPOINT_CHECKSUM_2)) {
 
478
                return(FALSE);
 
479
        }
 
480
 
 
481
        return(TRUE);
 
482
}
 
483
 
 
484
/************************************************************
 
485
Looks for the maximum consistent checkpoint from the log groups. */
 
486
static
 
487
ulint
 
488
recv_find_max_checkpoint(
 
489
/*=====================*/
 
490
                                        /* out: error code or DB_SUCCESS */
 
491
        log_group_t**   max_group,      /* out: max group */
 
492
        ulint*          max_field)      /* out: LOG_CHECKPOINT_1 or
 
493
                                        LOG_CHECKPOINT_2 */
 
494
{
 
495
        log_group_t*    group;
 
496
        dulint          max_no;
 
497
        dulint          checkpoint_no;
 
498
        ulint           field;
 
499
        byte*           buf;
 
500
 
 
501
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
502
 
 
503
        max_no = ut_dulint_zero;
 
504
        *max_group = NULL;
 
505
        *max_field = 0;
 
506
 
 
507
        buf = log_sys->checkpoint_buf;
 
508
 
 
509
        while (group) {
 
510
                group->state = LOG_GROUP_CORRUPTED;
 
511
 
 
512
                for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
 
513
                     field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
 
514
 
 
515
                        log_group_read_checkpoint_info(group, field);
 
516
 
 
517
                        if (!recv_check_cp_is_consistent(buf)) {
 
518
#ifdef UNIV_DEBUG
 
519
                                if (log_debug_writes) {
 
520
                                        fprintf(stderr,
 
521
                                                "InnoDB: Checkpoint in group"
 
522
                                                " %lu at %lu invalid, %lu\n",
 
523
                                                (ulong) group->id,
 
524
                                                (ulong) field,
 
525
                                                (ulong) mach_read_from_4(
 
526
                                                        buf
 
527
                                                        + LOG_CHECKPOINT_CHECKSUM_1));
 
528
 
 
529
                                }
 
530
#endif /* UNIV_DEBUG */
 
531
                                goto not_consistent;
 
532
                        }
 
533
 
 
534
                        group->state = LOG_GROUP_OK;
 
535
 
 
536
                        group->lsn = mach_read_from_8(
 
537
                                buf + LOG_CHECKPOINT_LSN);
 
538
                        group->lsn_offset = mach_read_from_4(
 
539
                                buf + LOG_CHECKPOINT_OFFSET);
 
540
                        checkpoint_no = mach_read_from_8(
 
541
                                buf + LOG_CHECKPOINT_NO);
 
542
 
 
543
#ifdef UNIV_DEBUG
 
544
                        if (log_debug_writes) {
 
545
                                fprintf(stderr,
 
546
                                        "InnoDB: Checkpoint number %lu"
 
547
                                        " found in group %lu\n",
 
548
                                        (ulong) ut_dulint_get_low(
 
549
                                                checkpoint_no),
 
550
                                        (ulong) group->id);
 
551
                        }
 
552
#endif /* UNIV_DEBUG */
 
553
 
 
554
                        if (ut_dulint_cmp(checkpoint_no, max_no) >= 0) {
 
555
                                *max_group = group;
 
556
                                *max_field = field;
 
557
                                max_no = checkpoint_no;
 
558
                        }
 
559
 
 
560
not_consistent:
 
561
                        ;
 
562
                }
 
563
 
 
564
                group = UT_LIST_GET_NEXT(log_groups, group);
 
565
        }
 
566
 
 
567
        if (*max_group == NULL) {
 
568
 
 
569
                fprintf(stderr,
 
570
                        "InnoDB: No valid checkpoint found.\n"
 
571
                        "InnoDB: If this error appears when you are"
 
572
                        " creating an InnoDB database,\n"
 
573
                        "InnoDB: the problem may be that during"
 
574
                        " an earlier attempt you managed\n"
 
575
                        "InnoDB: to create the InnoDB data files,"
 
576
                        " but log file creation failed.\n"
 
577
                        "InnoDB: If that is the case, please refer to\n"
 
578
                        "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
 
579
                        "error-creating-innodb.html\n");
 
580
                return(DB_ERROR);
 
581
        }
 
582
 
 
583
        return(DB_SUCCESS);
 
584
}
 
585
 
 
586
/***********************************************************************
 
587
Reads the checkpoint info needed in hot backup. */
 
588
 
 
589
ibool
 
590
recv_read_cp_info_for_backup(
 
591
/*=========================*/
 
592
                        /* out: TRUE if success */
 
593
        byte*   hdr,    /* in: buffer containing the log group header */
 
594
        dulint* lsn,    /* out: checkpoint lsn */
 
595
        ulint*  offset, /* out: checkpoint offset in the log group */
 
596
        ulint*  fsp_limit,/* out: fsp limit of space 0, 1000000000 if the
 
597
                        database is running with < version 3.23.50 of InnoDB */
 
598
        dulint* cp_no,  /* out: checkpoint number */
 
599
        dulint* first_header_lsn)
 
600
                        /* out: lsn of of the start of the first log file */
 
601
{
 
602
        ulint   max_cp          = 0;
 
603
        dulint  max_cp_no       = ut_dulint_zero;
 
604
        byte*   cp_buf;
 
605
 
 
606
        cp_buf = hdr + LOG_CHECKPOINT_1;
 
607
 
 
608
        if (recv_check_cp_is_consistent(cp_buf)) {
 
609
                max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
 
610
                max_cp = LOG_CHECKPOINT_1;
 
611
        }
 
612
 
 
613
        cp_buf = hdr + LOG_CHECKPOINT_2;
 
614
 
 
615
        if (recv_check_cp_is_consistent(cp_buf)) {
 
616
                if (ut_dulint_cmp(mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO),
 
617
                                  max_cp_no) > 0) {
 
618
                        max_cp = LOG_CHECKPOINT_2;
 
619
                }
 
620
        }
 
621
 
 
622
        if (max_cp == 0) {
 
623
                return(FALSE);
 
624
        }
 
625
 
 
626
        cp_buf = hdr + max_cp;
 
627
 
 
628
        *lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN);
 
629
        *offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
 
630
 
 
631
        /* If the user is running a pre-3.23.50 version of InnoDB, its
 
632
        checkpoint data does not contain the fsp limit info */
 
633
        if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N)
 
634
            == LOG_CHECKPOINT_FSP_MAGIC_N_VAL) {
 
635
 
 
636
                *fsp_limit = mach_read_from_4(
 
637
                        cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT);
 
638
 
 
639
                if (*fsp_limit == 0) {
 
640
                        *fsp_limit = 1000000000;
 
641
                }
 
642
        } else {
 
643
                *fsp_limit = 1000000000;
 
644
        }
 
645
 
 
646
        /*      fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */
 
647
 
 
648
        *cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
 
649
 
 
650
        *first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN);
 
651
 
 
652
        return(TRUE);
 
653
}
 
654
 
 
655
/**********************************************************
 
656
Checks the 4-byte checksum to the trailer checksum field of a log block.
 
657
We also accept a log block in the old format < InnoDB-3.23.52 where the
 
658
checksum field contains the log block number. */
 
659
static
 
660
ibool
 
661
log_block_checksum_is_ok_or_old_format(
 
662
/*===================================*/
 
663
                        /* out: TRUE if ok, or if the log block may be in the
 
664
                        format of InnoDB version < 3.23.52 */
 
665
        byte*   block)  /* in: pointer to a log block */
 
666
{
 
667
#ifdef UNIV_LOG_DEBUG
 
668
        return(TRUE);
 
669
#endif /* UNIV_LOG_DEBUG */
 
670
        if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
 
671
 
 
672
                return(TRUE);
 
673
        }
 
674
 
 
675
        if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
 
676
 
 
677
                /* We assume the log block is in the format of
 
678
                InnoDB version < 3.23.52 and the block is ok */
 
679
#if 0
 
680
                fprintf(stderr,
 
681
                        "InnoDB: Scanned old format < InnoDB-3.23.52"
 
682
                        " log block number %lu\n",
 
683
                        log_block_get_hdr_no(block));
 
684
#endif
 
685
                return(TRUE);
 
686
        }
 
687
 
 
688
        return(FALSE);
 
689
}
 
690
 
 
691
/***********************************************************************
 
692
Scans the log segment and n_bytes_scanned is set to the length of valid
 
693
log scanned. */
 
694
 
 
695
void
 
696
recv_scan_log_seg_for_backup(
 
697
/*=========================*/
 
698
        byte*           buf,            /* in: buffer containing log data */
 
699
        ulint           buf_len,        /* in: data length in that buffer */
 
700
        dulint*         scanned_lsn,    /* in/out: lsn of buffer start,
 
701
                                        we return scanned lsn */
 
702
        ulint*          scanned_checkpoint_no,
 
703
                                        /* in/out: 4 lowest bytes of the
 
704
                                        highest scanned checkpoint number so
 
705
                                        far */
 
706
        ulint*          n_bytes_scanned)/* out: how much we were able to
 
707
                                        scan, smaller than buf_len if log
 
708
                                        data ended here */
 
709
{
 
710
        ulint   data_len;
 
711
        byte*   log_block;
 
712
        ulint   no;
 
713
 
 
714
        *n_bytes_scanned = 0;
 
715
 
 
716
        for (log_block = buf; log_block < buf + buf_len;
 
717
             log_block += OS_FILE_LOG_BLOCK_SIZE) {
 
718
 
 
719
                no = log_block_get_hdr_no(log_block);
 
720
 
 
721
#if 0
 
722
                fprintf(stderr, "Log block header no %lu\n", no);
 
723
#endif
 
724
 
 
725
                if (no != log_block_convert_lsn_to_no(*scanned_lsn)
 
726
                    || !log_block_checksum_is_ok_or_old_format(log_block)) {
 
727
#if 0
 
728
                        fprintf(stderr,
 
729
                                "Log block n:o %lu, scanned lsn n:o %lu\n",
 
730
                                no, log_block_convert_lsn_to_no(*scanned_lsn));
 
731
#endif
 
732
                        /* Garbage or an incompletely written log block */
 
733
 
 
734
                        log_block += OS_FILE_LOG_BLOCK_SIZE;
 
735
#if 0
 
736
                        fprintf(stderr,
 
737
                                "Next log block n:o %lu\n",
 
738
                                log_block_get_hdr_no(log_block));
 
739
#endif
 
740
                        break;
 
741
                }
 
742
 
 
743
                if (*scanned_checkpoint_no > 0
 
744
                    && log_block_get_checkpoint_no(log_block)
 
745
                    < *scanned_checkpoint_no
 
746
                    && *scanned_checkpoint_no
 
747
                    - log_block_get_checkpoint_no(log_block)
 
748
                    > 0x80000000UL) {
 
749
 
 
750
                        /* Garbage from a log buffer flush which was made
 
751
                        before the most recent database recovery */
 
752
#if 0
 
753
                        fprintf(stderr,
 
754
                                "Scanned cp n:o %lu, block cp n:o %lu\n",
 
755
                                *scanned_checkpoint_no,
 
756
                                log_block_get_checkpoint_no(log_block));
 
757
#endif
 
758
                        break;
 
759
                }
 
760
 
 
761
                data_len = log_block_get_data_len(log_block);
 
762
 
 
763
                *scanned_checkpoint_no
 
764
                        = log_block_get_checkpoint_no(log_block);
 
765
                *scanned_lsn = ut_dulint_add(*scanned_lsn, data_len);
 
766
 
 
767
                *n_bytes_scanned += data_len;
 
768
 
 
769
                if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
 
770
                        /* Log data ends here */
 
771
 
 
772
#if 0
 
773
                        fprintf(stderr, "Log block data len %lu\n",
 
774
                                data_len);
 
775
#endif
 
776
                        break;
 
777
                }
 
778
        }
 
779
}
 
780
 
 
781
/***********************************************************************
 
782
Tries to parse a single log record body and also applies it to a page if
 
783
specified. File ops are parsed, but not applied in this function. */
 
784
static
 
785
byte*
 
786
recv_parse_or_apply_log_rec_body(
 
787
/*=============================*/
 
788
                        /* out: log record end, NULL if not a complete
 
789
                        record */
 
790
        byte    type,   /* in: type */
 
791
        byte*   ptr,    /* in: pointer to a buffer */
 
792
        byte*   end_ptr,/* in: pointer to the buffer end */
 
793
        page_t* page,   /* in: buffer page or NULL; if not NULL, then the log
 
794
                        record is applied to the page, and the log record
 
795
                        should be complete then */
 
796
        mtr_t*  mtr)    /* in: mtr or NULL; should be non-NULL if and only if
 
797
                        page is non-NULL */
 
798
{
 
799
        dict_index_t*   index = NULL;
 
800
 
 
801
        switch (type) {
 
802
        case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
 
803
                ptr = mlog_parse_nbytes(type, ptr, end_ptr, page);
 
804
                break;
 
805
        case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
 
806
                if (NULL != (ptr = mlog_parse_index(
 
807
                                     ptr, end_ptr,
 
808
                                     type == MLOG_COMP_REC_INSERT,
 
809
                                     &index))) {
 
810
                        ut_a(!page
 
811
                             || (ibool)!!page_is_comp(page)
 
812
                             == dict_table_is_comp(index->table));
 
813
                        ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
 
814
                                                        index, page, mtr);
 
815
                }
 
816
                break;
 
817
        case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
 
818
                if (NULL != (ptr = mlog_parse_index(
 
819
                                     ptr, end_ptr,
 
820
                                     type == MLOG_COMP_REC_CLUST_DELETE_MARK,
 
821
                                     &index))) {
 
822
                        ut_a(!page
 
823
                             || (ibool)!!page_is_comp(page)
 
824
                             == dict_table_is_comp(index->table));
 
825
                        ptr = btr_cur_parse_del_mark_set_clust_rec(
 
826
                                ptr, end_ptr, index, page);
 
827
                }
 
828
                break;
 
829
        case MLOG_COMP_REC_SEC_DELETE_MARK:
 
830
                /* This log record type is obsolete, but we process it for
 
831
                backward compatibility with MySQL 5.0.3 and 5.0.4. */
 
832
                ut_a(!page || page_is_comp(page));
 
833
                ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
 
834
                if (!ptr) {
 
835
                        break;
 
836
                }
 
837
                /* Fall through */
 
838
        case MLOG_REC_SEC_DELETE_MARK:
 
839
                ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, page);
 
840
                break;
 
841
        case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
 
842
                if (NULL != (ptr = mlog_parse_index(
 
843
                                     ptr, end_ptr,
 
844
                                     type == MLOG_COMP_REC_UPDATE_IN_PLACE,
 
845
                                     &index))) {
 
846
                        ut_a(!page
 
847
                             || (ibool)!!page_is_comp(page)
 
848
                             == dict_table_is_comp(index->table));
 
849
                        ptr = btr_cur_parse_update_in_place(ptr, end_ptr,
 
850
                                                            page, index);
 
851
                }
 
852
                break;
 
853
        case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
 
854
        case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
 
855
                if (NULL != (ptr = mlog_parse_index(
 
856
                                     ptr, end_ptr,
 
857
                                     type == MLOG_COMP_LIST_END_DELETE
 
858
                                     || type == MLOG_COMP_LIST_START_DELETE,
 
859
                                     &index))) {
 
860
                        ut_a(!page
 
861
                             || (ibool)!!page_is_comp(page)
 
862
                             == dict_table_is_comp(index->table));
 
863
                        ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
 
864
                                                         index, page, mtr);
 
865
                }
 
866
                break;
 
867
        case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
 
868
                if (NULL != (ptr = mlog_parse_index(
 
869
                                     ptr, end_ptr,
 
870
                                     type == MLOG_COMP_LIST_END_COPY_CREATED,
 
871
                                     &index))) {
 
872
                        ut_a(!page
 
873
                             || (ibool)!!page_is_comp(page)
 
874
                             == dict_table_is_comp(index->table));
 
875
                        ptr = page_parse_copy_rec_list_to_created_page(
 
876
                                ptr, end_ptr, index, page, mtr);
 
877
                }
 
878
                break;
 
879
        case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
 
880
                if (NULL != (ptr = mlog_parse_index(
 
881
                                     ptr, end_ptr,
 
882
                                     type == MLOG_COMP_PAGE_REORGANIZE,
 
883
                                     &index))) {
 
884
                        ut_a(!page
 
885
                             || (ibool)!!page_is_comp(page)
 
886
                             == dict_table_is_comp(index->table));
 
887
                        ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
 
888
                                                        page, mtr);
 
889
                }
 
890
                break;
 
891
        case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
 
892
                ptr = page_parse_create(ptr, end_ptr,
 
893
                                        type == MLOG_COMP_PAGE_CREATE,
 
894
                                        page, mtr);
 
895
                break;
 
896
        case MLOG_UNDO_INSERT:
 
897
                ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
 
898
                break;
 
899
        case MLOG_UNDO_ERASE_END:
 
900
                ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
 
901
                break;
 
902
        case MLOG_UNDO_INIT:
 
903
                ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
 
904
                break;
 
905
        case MLOG_UNDO_HDR_DISCARD:
 
906
                ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
 
907
                break;
 
908
        case MLOG_UNDO_HDR_CREATE:
 
909
        case MLOG_UNDO_HDR_REUSE:
 
910
                ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
 
911
                                                 page, mtr);
 
912
                break;
 
913
        case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
 
914
                ptr = btr_parse_set_min_rec_mark(
 
915
                        ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
 
916
                        page, mtr);
 
917
                break;
 
918
        case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
 
919
                if (NULL != (ptr = mlog_parse_index(
 
920
                                     ptr, end_ptr,
 
921
                                     type == MLOG_COMP_REC_DELETE,
 
922
                                     &index))) {
 
923
                        ut_a(!page
 
924
                             || (ibool)!!page_is_comp(page)
 
925
                             == dict_table_is_comp(index->table));
 
926
                        ptr = page_cur_parse_delete_rec(ptr, end_ptr,
 
927
                                                        index, page, mtr);
 
928
                }
 
929
                break;
 
930
        case MLOG_IBUF_BITMAP_INIT:
 
931
                ptr = ibuf_parse_bitmap_init(ptr, end_ptr, page, mtr);
 
932
                break;
 
933
        case MLOG_INIT_FILE_PAGE:
 
934
                ptr = fsp_parse_init_file_page(ptr, end_ptr, page);
 
935
                break;
 
936
        case MLOG_WRITE_STRING:
 
937
                ptr = mlog_parse_string(ptr, end_ptr, page);
 
938
                break;
 
939
        case MLOG_FILE_CREATE:
 
940
        case MLOG_FILE_RENAME:
 
941
        case MLOG_FILE_DELETE:
 
942
                ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE,
 
943
                                                 ULINT_UNDEFINED);
 
944
                break;
 
945
        default:
 
946
                ptr = NULL;
 
947
                recv_sys->found_corrupt_log = TRUE;
 
948
        }
 
949
 
 
950
        if (index) {
 
951
                dict_table_t*   table = index->table;
 
952
 
 
953
                dict_mem_index_free(index);
 
954
                dict_mem_table_free(table);
 
955
        }
 
956
 
 
957
        return(ptr);
 
958
}
 
959
 
 
960
/*************************************************************************
 
961
Calculates the fold value of a page file address: used in inserting or
 
962
searching for a log record in the hash table. */
 
963
UNIV_INLINE
 
964
ulint
 
965
recv_fold(
 
966
/*======*/
 
967
                        /* out: folded value */
 
968
        ulint   space,  /* in: space */
 
969
        ulint   page_no)/* in: page number */
 
970
{
 
971
        return(ut_fold_ulint_pair(space, page_no));
 
972
}
 
973
 
 
974
/*************************************************************************
 
975
Calculates the hash value of a page file address: used in inserting or
 
976
searching for a log record in the hash table. */
 
977
UNIV_INLINE
 
978
ulint
 
979
recv_hash(
 
980
/*======*/
 
981
                        /* out: folded value */
 
982
        ulint   space,  /* in: space */
 
983
        ulint   page_no)/* in: page number */
 
984
{
 
985
        return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
 
986
}
 
987
 
 
988
/*************************************************************************
 
989
Gets the hashed file address struct for a page. */
 
990
static
 
991
recv_addr_t*
 
992
recv_get_fil_addr_struct(
 
993
/*=====================*/
 
994
                        /* out: file address struct, NULL if not found from
 
995
                        the hash table */
 
996
        ulint   space,  /* in: space id */
 
997
        ulint   page_no)/* in: page number */
 
998
{
 
999
        recv_addr_t*    recv_addr;
 
1000
 
 
1001
        recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
 
1002
                                   recv_hash(space, page_no));
 
1003
        while (recv_addr) {
 
1004
                if ((recv_addr->space == space)
 
1005
                    && (recv_addr->page_no == page_no)) {
 
1006
 
 
1007
                        break;
 
1008
                }
 
1009
 
 
1010
                recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
 
1011
        }
 
1012
 
 
1013
        return(recv_addr);
 
1014
}
 
1015
 
 
1016
/***********************************************************************
 
1017
Adds a new log record to the hash table of log records. */
 
1018
static
 
1019
void
 
1020
recv_add_to_hash_table(
 
1021
/*===================*/
 
1022
        byte    type,           /* in: log record type */
 
1023
        ulint   space,          /* in: space id */
 
1024
        ulint   page_no,        /* in: page number */
 
1025
        byte*   body,           /* in: log record body */
 
1026
        byte*   rec_end,        /* in: log record end */
 
1027
        dulint  start_lsn,      /* in: start lsn of the mtr */
 
1028
        dulint  end_lsn)        /* in: end lsn of the mtr */
 
1029
{
 
1030
        recv_t*         recv;
 
1031
        ulint           len;
 
1032
        recv_data_t*    recv_data;
 
1033
        recv_data_t**   prev_field;
 
1034
        recv_addr_t*    recv_addr;
 
1035
 
 
1036
        if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
 
1037
                /* The tablespace does not exist any more: do not store the
 
1038
                log record */
 
1039
 
 
1040
                return;
 
1041
        }
 
1042
 
 
1043
        len = rec_end - body;
 
1044
 
 
1045
        recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
 
1046
        recv->type = type;
 
1047
        recv->len = rec_end - body;
 
1048
        recv->start_lsn = start_lsn;
 
1049
        recv->end_lsn = end_lsn;
 
1050
 
 
1051
        recv_addr = recv_get_fil_addr_struct(space, page_no);
 
1052
 
 
1053
        if (recv_addr == NULL) {
 
1054
                recv_addr = mem_heap_alloc(recv_sys->heap,
 
1055
                                           sizeof(recv_addr_t));
 
1056
                recv_addr->space = space;
 
1057
                recv_addr->page_no = page_no;
 
1058
                recv_addr->state = RECV_NOT_PROCESSED;
 
1059
 
 
1060
                UT_LIST_INIT(recv_addr->rec_list);
 
1061
 
 
1062
                HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
 
1063
                            recv_fold(space, page_no), recv_addr);
 
1064
                recv_sys->n_addrs++;
 
1065
#if 0
 
1066
                fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
 
1067
                        space, page_no);
 
1068
#endif
 
1069
        }
 
1070
 
 
1071
        UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
 
1072
 
 
1073
        prev_field = &(recv->data);
 
1074
 
 
1075
        /* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
 
1076
        recv_sys->heap grows into the buffer pool, and bigger chunks could not
 
1077
        be allocated */
 
1078
 
 
1079
        while (rec_end > body) {
 
1080
 
 
1081
                len = rec_end - body;
 
1082
 
 
1083
                if (len > RECV_DATA_BLOCK_SIZE) {
 
1084
                        len = RECV_DATA_BLOCK_SIZE;
 
1085
                }
 
1086
 
 
1087
                recv_data = mem_heap_alloc(recv_sys->heap,
 
1088
                                           sizeof(recv_data_t) + len);
 
1089
                *prev_field = recv_data;
 
1090
 
 
1091
                ut_memcpy(((byte*)recv_data) + sizeof(recv_data_t), body, len);
 
1092
 
 
1093
                prev_field = &(recv_data->next);
 
1094
 
 
1095
                body += len;
 
1096
        }
 
1097
 
 
1098
        *prev_field = NULL;
 
1099
}
 
1100
 
 
1101
/*************************************************************************
 
1102
Copies the log record body from recv to buf. */
 
1103
static
 
1104
void
 
1105
recv_data_copy_to_buf(
 
1106
/*==================*/
 
1107
        byte*   buf,    /* in: buffer of length at least recv->len */
 
1108
        recv_t* recv)   /* in: log record */
 
1109
{
 
1110
        recv_data_t*    recv_data;
 
1111
        ulint           part_len;
 
1112
        ulint           len;
 
1113
 
 
1114
        len = recv->len;
 
1115
        recv_data = recv->data;
 
1116
 
 
1117
        while (len > 0) {
 
1118
                if (len > RECV_DATA_BLOCK_SIZE) {
 
1119
                        part_len = RECV_DATA_BLOCK_SIZE;
 
1120
                } else {
 
1121
                        part_len = len;
 
1122
                }
 
1123
 
 
1124
                ut_memcpy(buf, ((byte*)recv_data) + sizeof(recv_data_t),
 
1125
                          part_len);
 
1126
                buf += part_len;
 
1127
                len -= part_len;
 
1128
 
 
1129
                recv_data = recv_data->next;
 
1130
        }
 
1131
}
 
1132
 
 
1133
/****************************************************************************
 
1134
Applies the hashed log records to the page, if the page lsn is less than the
 
1135
lsn of a log record. This can be called when a buffer page has just been
 
1136
read in, or also for a page already in the buffer pool. */
 
1137
 
 
1138
void
 
1139
recv_recover_page(
 
1140
/*==============*/
 
1141
        ibool   recover_backup, /* in: TRUE if we are recovering a backup
 
1142
                                page: then we do not acquire any latches
 
1143
                                since the page was read in outside the
 
1144
                                buffer pool */
 
1145
        ibool   just_read_in,   /* in: TRUE if the i/o-handler calls this for
 
1146
                                a freshly read page */
 
1147
        page_t* page,           /* in: buffer page */
 
1148
        ulint   space,          /* in: space id */
 
1149
        ulint   page_no)        /* in: page number */
 
1150
{
 
1151
        buf_block_t*    block           = NULL;
 
1152
        recv_addr_t*    recv_addr;
 
1153
        recv_t*         recv;
 
1154
        byte*           buf;
 
1155
        dulint          start_lsn;
 
1156
        dulint          end_lsn;
 
1157
        dulint          page_lsn;
 
1158
        dulint          page_newest_lsn;
 
1159
        ibool           modification_to_page;
 
1160
        ibool           success;
 
1161
        mtr_t           mtr;
 
1162
 
 
1163
        mutex_enter(&(recv_sys->mutex));
 
1164
 
 
1165
        if (recv_sys->apply_log_recs == FALSE) {
 
1166
 
 
1167
                /* Log records should not be applied now */
 
1168
 
 
1169
                mutex_exit(&(recv_sys->mutex));
 
1170
 
 
1171
                return;
 
1172
        }
 
1173
 
 
1174
        recv_addr = recv_get_fil_addr_struct(space, page_no);
 
1175
 
 
1176
        if ((recv_addr == NULL)
 
1177
            || (recv_addr->state == RECV_BEING_PROCESSED)
 
1178
            || (recv_addr->state == RECV_PROCESSED)) {
 
1179
 
 
1180
                mutex_exit(&(recv_sys->mutex));
 
1181
 
 
1182
                return;
 
1183
        }
 
1184
 
 
1185
#if 0
 
1186
        fprintf(stderr, "Recovering space %lu, page %lu\n", space, page_no);
 
1187
#endif
 
1188
 
 
1189
        recv_addr->state = RECV_BEING_PROCESSED;
 
1190
 
 
1191
        mutex_exit(&(recv_sys->mutex));
 
1192
 
 
1193
        mtr_start(&mtr);
 
1194
        mtr_set_log_mode(&mtr, MTR_LOG_NONE);
 
1195
 
 
1196
        if (!recover_backup) {
 
1197
                block = buf_block_align(page);
 
1198
 
 
1199
                if (just_read_in) {
 
1200
                        /* Move the ownership of the x-latch on the
 
1201
                        page to this OS thread, so that we can acquire
 
1202
                        a second x-latch on it. This is needed for the
 
1203
                        operations to the page to pass the debug
 
1204
                        checks. */
 
1205
 
 
1206
                        rw_lock_x_lock_move_ownership(&(block->lock));
 
1207
                }
 
1208
 
 
1209
                success = buf_page_get_known_nowait(RW_X_LATCH, page,
 
1210
                                                    BUF_KEEP_OLD,
 
1211
                                                    __FILE__, __LINE__,
 
1212
                                                    &mtr);
 
1213
                ut_a(success);
 
1214
 
 
1215
#ifdef UNIV_SYNC_DEBUG
 
1216
                buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
 
1217
#endif /* UNIV_SYNC_DEBUG */
 
1218
        }
 
1219
 
 
1220
        /* Read the newest modification lsn from the page */
 
1221
        page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
 
1222
 
 
1223
        if (!recover_backup) {
 
1224
                /* It may be that the page has been modified in the buffer
 
1225
                pool: read the newest modification lsn there */
 
1226
 
 
1227
                page_newest_lsn = buf_frame_get_newest_modification(page);
 
1228
 
 
1229
                if (!ut_dulint_is_zero(page_newest_lsn)) {
 
1230
 
 
1231
                        page_lsn = page_newest_lsn;
 
1232
                }
 
1233
        } else {
 
1234
                /* In recovery from a backup we do not really use the buffer
 
1235
                pool */
 
1236
 
 
1237
                page_newest_lsn = ut_dulint_zero;
 
1238
        }
 
1239
 
 
1240
        modification_to_page = FALSE;
 
1241
        start_lsn = end_lsn = ut_dulint_zero;
 
1242
 
 
1243
        recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
 
1244
 
 
1245
        while (recv) {
 
1246
                end_lsn = recv->end_lsn;
 
1247
 
 
1248
                if (recv->len > RECV_DATA_BLOCK_SIZE) {
 
1249
                        /* We have to copy the record body to a separate
 
1250
                        buffer */
 
1251
 
 
1252
                        buf = mem_alloc(recv->len);
 
1253
 
 
1254
                        recv_data_copy_to_buf(buf, recv);
 
1255
                } else {
 
1256
                        buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
 
1257
                }
 
1258
 
 
1259
                if (recv->type == MLOG_INIT_FILE_PAGE) {
 
1260
                        page_lsn = page_newest_lsn;
 
1261
 
 
1262
                        mach_write_to_8(page + UNIV_PAGE_SIZE
 
1263
                                        - FIL_PAGE_END_LSN_OLD_CHKSUM,
 
1264
                                        ut_dulint_zero);
 
1265
                        mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
 
1266
                }
 
1267
 
 
1268
                if (ut_dulint_cmp(recv->start_lsn, page_lsn) >= 0) {
 
1269
 
 
1270
                        if (!modification_to_page) {
 
1271
 
 
1272
                                modification_to_page = TRUE;
 
1273
                                start_lsn = recv->start_lsn;
 
1274
                        }
 
1275
 
 
1276
#ifdef UNIV_DEBUG
 
1277
                        if (log_debug_writes) {
 
1278
                                fprintf(stderr,
 
1279
                                        "InnoDB: Applying log rec"
 
1280
                                        " type %lu len %lu"
 
1281
                                        " to space %lu page no %lu\n",
 
1282
                                        (ulong) recv->type, (ulong) recv->len,
 
1283
                                        (ulong) recv_addr->space,
 
1284
                                        (ulong) recv_addr->page_no);
 
1285
                        }
 
1286
#endif /* UNIV_DEBUG */
 
1287
 
 
1288
                        recv_parse_or_apply_log_rec_body(recv->type, buf,
 
1289
                                                         buf + recv->len,
 
1290
                                                         page, &mtr);
 
1291
                        mach_write_to_8(page + UNIV_PAGE_SIZE
 
1292
                                        - FIL_PAGE_END_LSN_OLD_CHKSUM,
 
1293
                                        ut_dulint_add(recv->start_lsn,
 
1294
                                                      recv->len));
 
1295
                        mach_write_to_8(page + FIL_PAGE_LSN,
 
1296
                                        ut_dulint_add(recv->start_lsn,
 
1297
                                                      recv->len));
 
1298
                }
 
1299
 
 
1300
                if (recv->len > RECV_DATA_BLOCK_SIZE) {
 
1301
                        mem_free(buf);
 
1302
                }
 
1303
 
 
1304
                recv = UT_LIST_GET_NEXT(rec_list, recv);
 
1305
        }
 
1306
 
 
1307
        mutex_enter(&(recv_sys->mutex));
 
1308
 
 
1309
        if (ut_dulint_cmp(recv_max_page_lsn, page_lsn) < 0) {
 
1310
                recv_max_page_lsn = page_lsn;
 
1311
        }
 
1312
 
 
1313
        recv_addr->state = RECV_PROCESSED;
 
1314
 
 
1315
        ut_a(recv_sys->n_addrs);
 
1316
        recv_sys->n_addrs--;
 
1317
 
 
1318
        mutex_exit(&(recv_sys->mutex));
 
1319
 
 
1320
        if (!recover_backup && modification_to_page) {
 
1321
                ut_a(block);
 
1322
 
 
1323
                buf_flush_recv_note_modification(block, start_lsn, end_lsn);
 
1324
        }
 
1325
 
 
1326
        /* Make sure that committing mtr does not change the modification
 
1327
        lsn values of page */
 
1328
 
 
1329
        mtr.modifications = FALSE;
 
1330
 
 
1331
        mtr_commit(&mtr);
 
1332
}
 
1333
 
 
1334
/***********************************************************************
 
1335
Reads in pages which have hashed log records, from an area around a given
 
1336
page number. */
 
1337
static
 
1338
ulint
 
1339
recv_read_in_area(
 
1340
/*==============*/
 
1341
                        /* out: number of pages found */
 
1342
        ulint   space,  /* in: space */
 
1343
        ulint   page_no)/* in: page number */
 
1344
{
 
1345
        recv_addr_t* recv_addr;
 
1346
        ulint   page_nos[RECV_READ_AHEAD_AREA];
 
1347
        ulint   low_limit;
 
1348
        ulint   n;
 
1349
 
 
1350
        low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
 
1351
 
 
1352
        n = 0;
 
1353
 
 
1354
        for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
 
1355
             page_no++) {
 
1356
                recv_addr = recv_get_fil_addr_struct(space, page_no);
 
1357
 
 
1358
                if (recv_addr && !buf_page_peek(space, page_no)) {
 
1359
 
 
1360
                        mutex_enter(&(recv_sys->mutex));
 
1361
 
 
1362
                        if (recv_addr->state == RECV_NOT_PROCESSED) {
 
1363
                                recv_addr->state = RECV_BEING_READ;
 
1364
 
 
1365
                                page_nos[n] = page_no;
 
1366
 
 
1367
                                n++;
 
1368
                        }
 
1369
 
 
1370
                        mutex_exit(&(recv_sys->mutex));
 
1371
                }
 
1372
        }
 
1373
 
 
1374
        buf_read_recv_pages(FALSE, space, page_nos, n);
 
1375
        /*
 
1376
        fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
 
1377
        */
 
1378
        return(n);
 
1379
}
 
1380
 
 
1381
/***********************************************************************
 
1382
Empties the hash table of stored log records, applying them to appropriate
 
1383
pages. */
 
1384
 
 
1385
void
 
1386
recv_apply_hashed_log_recs(
 
1387
/*=======================*/
 
1388
        ibool   allow_ibuf)     /* in: if TRUE, also ibuf operations are
 
1389
                                allowed during the application; if FALSE,
 
1390
                                no ibuf operations are allowed, and after
 
1391
                                the application all file pages are flushed to
 
1392
                                disk and invalidated in buffer pool: this
 
1393
                                alternative means that no new log records
 
1394
                                can be generated during the application;
 
1395
                                the caller must in this case own the log
 
1396
                                mutex */
 
1397
{
 
1398
        recv_addr_t* recv_addr;
 
1399
        page_t* page;
 
1400
        ulint   i;
 
1401
        ulint   space;
 
1402
        ulint   page_no;
 
1403
        ulint   n_pages;
 
1404
        ibool   has_printed     = FALSE;
 
1405
        mtr_t   mtr;
 
1406
loop:
 
1407
        mutex_enter(&(recv_sys->mutex));
 
1408
 
 
1409
        if (recv_sys->apply_batch_on) {
 
1410
 
 
1411
                mutex_exit(&(recv_sys->mutex));
 
1412
 
 
1413
                os_thread_sleep(500000);
 
1414
 
 
1415
                goto loop;
 
1416
        }
 
1417
 
 
1418
        ut_ad(!allow_ibuf == mutex_own(&log_sys->mutex));
 
1419
 
 
1420
        if (!allow_ibuf) {
 
1421
                recv_no_ibuf_operations = TRUE;
 
1422
        }
 
1423
 
 
1424
        recv_sys->apply_log_recs = TRUE;
 
1425
        recv_sys->apply_batch_on = TRUE;
 
1426
 
 
1427
        for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
 
1428
 
 
1429
                recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
 
1430
 
 
1431
                while (recv_addr) {
 
1432
                        space = recv_addr->space;
 
1433
                        page_no = recv_addr->page_no;
 
1434
 
 
1435
                        if (recv_addr->state == RECV_NOT_PROCESSED) {
 
1436
                                if (!has_printed) {
 
1437
                                        ut_print_timestamp(stderr);
 
1438
                                        fputs("  InnoDB: Starting an"
 
1439
                                              " apply batch of log records"
 
1440
                                              " to the database...\n"
 
1441
                                              "InnoDB: Progress in percents: ",
 
1442
                                              stderr);
 
1443
                                        has_printed = TRUE;
 
1444
                                }
 
1445
 
 
1446
                                mutex_exit(&(recv_sys->mutex));
 
1447
 
 
1448
                                if (buf_page_peek(space, page_no)) {
 
1449
 
 
1450
                                        mtr_start(&mtr);
 
1451
 
 
1452
                                        page = buf_page_get(space, page_no,
 
1453
                                                            RW_X_LATCH, &mtr);
 
1454
 
 
1455
#ifdef UNIV_SYNC_DEBUG
 
1456
                                        buf_page_dbg_add_level(
 
1457
                                                page, SYNC_NO_ORDER_CHECK);
 
1458
#endif /* UNIV_SYNC_DEBUG */
 
1459
                                        recv_recover_page(FALSE, FALSE, page,
 
1460
                                                          space, page_no);
 
1461
                                        mtr_commit(&mtr);
 
1462
                                } else {
 
1463
                                        recv_read_in_area(space, page_no);
 
1464
                                }
 
1465
 
 
1466
                                mutex_enter(&(recv_sys->mutex));
 
1467
                        }
 
1468
 
 
1469
                        recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
 
1470
                }
 
1471
 
 
1472
                if (has_printed
 
1473
                    && (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
 
1474
                    != ((i + 1) * 100)
 
1475
                    / hash_get_n_cells(recv_sys->addr_hash)) {
 
1476
 
 
1477
                        fprintf(stderr, "%lu ", (ulong)
 
1478
                                ((i * 100)
 
1479
                                 / hash_get_n_cells(recv_sys->addr_hash)));
 
1480
                }
 
1481
        }
 
1482
 
 
1483
        /* Wait until all the pages have been processed */
 
1484
 
 
1485
        while (recv_sys->n_addrs != 0) {
 
1486
 
 
1487
                mutex_exit(&(recv_sys->mutex));
 
1488
 
 
1489
                os_thread_sleep(500000);
 
1490
 
 
1491
                mutex_enter(&(recv_sys->mutex));
 
1492
        }
 
1493
 
 
1494
        if (has_printed) {
 
1495
 
 
1496
                fprintf(stderr, "\n");
 
1497
        }
 
1498
 
 
1499
        if (!allow_ibuf) {
 
1500
                /* Flush all the file pages to disk and invalidate them in
 
1501
                the buffer pool */
 
1502
 
 
1503
                mutex_exit(&(recv_sys->mutex));
 
1504
                mutex_exit(&(log_sys->mutex));
 
1505
 
 
1506
                n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
 
1507
                                          ut_dulint_max);
 
1508
                ut_a(n_pages != ULINT_UNDEFINED);
 
1509
 
 
1510
                buf_flush_wait_batch_end(BUF_FLUSH_LIST);
 
1511
 
 
1512
                buf_pool_invalidate();
 
1513
 
 
1514
                mutex_enter(&(log_sys->mutex));
 
1515
                mutex_enter(&(recv_sys->mutex));
 
1516
 
 
1517
                recv_no_ibuf_operations = FALSE;
 
1518
        }
 
1519
 
 
1520
        recv_sys->apply_log_recs = FALSE;
 
1521
        recv_sys->apply_batch_on = FALSE;
 
1522
 
 
1523
        recv_sys_empty_hash();
 
1524
 
 
1525
        if (has_printed) {
 
1526
                fprintf(stderr, "InnoDB: Apply batch completed\n");
 
1527
        }
 
1528
 
 
1529
        mutex_exit(&(recv_sys->mutex));
 
1530
}
 
1531
 
 
1532
 
 
1533
/***********************************************************************
 
1534
Tries to parse a single log record and returns its length. */
 
1535
static
 
1536
ulint
 
1537
recv_parse_log_rec(
 
1538
/*===============*/
 
1539
                        /* out: length of the record, or 0 if the record was
 
1540
                        not complete */
 
1541
        byte*   ptr,    /* in: pointer to a buffer */
 
1542
        byte*   end_ptr,/* in: pointer to the buffer end */
 
1543
        byte*   type,   /* out: type */
 
1544
        ulint*  space,  /* out: space id */
 
1545
        ulint*  page_no,/* out: page number */
 
1546
        byte**  body)   /* out: log record body start */
 
1547
{
 
1548
        byte*   new_ptr;
 
1549
 
 
1550
        *body = NULL;
 
1551
 
 
1552
        if (ptr == end_ptr) {
 
1553
 
 
1554
                return(0);
 
1555
        }
 
1556
 
 
1557
        if (*ptr == MLOG_MULTI_REC_END) {
 
1558
 
 
1559
                *type = *ptr;
 
1560
 
 
1561
                return(1);
 
1562
        }
 
1563
 
 
1564
        if (*ptr == MLOG_DUMMY_RECORD) {
 
1565
                *type = *ptr;
 
1566
 
 
1567
                *space = ULINT_UNDEFINED - 1; /* For debugging */
 
1568
 
 
1569
                return(1);
 
1570
        }
 
1571
 
 
1572
        new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
 
1573
                                                page_no);
 
1574
        *body = new_ptr;
 
1575
 
 
1576
        if (UNIV_UNLIKELY(!new_ptr)) {
 
1577
 
 
1578
                return(0);
 
1579
        }
 
1580
 
 
1581
        /* Check that page_no is sensible */
 
1582
 
 
1583
        if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
 
1584
 
 
1585
                recv_sys->found_corrupt_log = TRUE;
 
1586
 
 
1587
                return(0);
 
1588
        }
 
1589
 
 
1590
        new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
 
1591
                                                   NULL, NULL);
 
1592
        if (UNIV_UNLIKELY(new_ptr == NULL)) {
 
1593
 
 
1594
                return(0);
 
1595
        }
 
1596
 
 
1597
        if (*page_no > recv_max_parsed_page_no) {
 
1598
                recv_max_parsed_page_no = *page_no;
 
1599
        }
 
1600
 
 
1601
        return(new_ptr - ptr);
 
1602
}
 
1603
 
 
1604
/***********************************************************
 
1605
Calculates the new value for lsn when more data is added to the log. */
 
1606
static
 
1607
dulint
 
1608
recv_calc_lsn_on_data_add(
 
1609
/*======================*/
 
1610
        dulint  lsn,    /* in: old lsn */
 
1611
        ulint   len)    /* in: this many bytes of data is added, log block
 
1612
                        headers not included */
 
1613
{
 
1614
        ulint   frag_len;
 
1615
        ulint   lsn_len;
 
1616
 
 
1617
        frag_len = (ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
 
1618
                - LOG_BLOCK_HDR_SIZE;
 
1619
        ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
 
1620
              - LOG_BLOCK_TRL_SIZE);
 
1621
        lsn_len = len + ((len + frag_len)
 
1622
                         / (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
 
1623
                            - LOG_BLOCK_TRL_SIZE))
 
1624
                * (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
 
1625
 
 
1626
        return(ut_dulint_add(lsn, lsn_len));
 
1627
}
 
1628
 
 
1629
/***********************************************************
 
1630
Prints diagnostic info of corrupt log. */
 
1631
static
 
1632
void
 
1633
recv_report_corrupt_log(
 
1634
/*====================*/
 
1635
        byte*   ptr,    /* in: pointer to corrupt log record */
 
1636
        byte    type,   /* in: type of the record */
 
1637
        ulint   space,  /* in: space id, this may also be garbage */
 
1638
        ulint   page_no)/* in: page number, this may also be garbage */
 
1639
{
 
1640
        fprintf(stderr,
 
1641
                "InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
 
1642
                "InnoDB: Log record type %lu, space id %lu, page number %lu\n"
 
1643
                "InnoDB: Log parsing proceeded successfully up to %lu %lu\n"
 
1644
                "InnoDB: Previous log record type %lu, is multi %lu\n"
 
1645
                "InnoDB: Recv offset %lu, prev %lu\n",
 
1646
                (ulong) type, (ulong) space, (ulong) page_no,
 
1647
                (ulong) ut_dulint_get_high(recv_sys->recovered_lsn),
 
1648
                (ulong) ut_dulint_get_low(recv_sys->recovered_lsn),
 
1649
                (ulong) recv_previous_parsed_rec_type,
 
1650
                (ulong) recv_previous_parsed_rec_is_multi,
 
1651
                (ulong) (ptr - recv_sys->buf),
 
1652
                (ulong) recv_previous_parsed_rec_offset);
 
1653
 
 
1654
        if ((ulint)(ptr - recv_sys->buf + 100)
 
1655
            > recv_previous_parsed_rec_offset
 
1656
            && (ulint)(ptr - recv_sys->buf + 100
 
1657
                       - recv_previous_parsed_rec_offset)
 
1658
            < 200000) {
 
1659
                fputs("InnoDB: Hex dump of corrupt log starting"
 
1660
                      " 100 bytes before the start\n"
 
1661
                      "InnoDB: of the previous log rec,\n"
 
1662
                      "InnoDB: and ending 100 bytes after the start"
 
1663
                      " of the corrupt rec:\n",
 
1664
                      stderr);
 
1665
 
 
1666
                ut_print_buf(stderr,
 
1667
                             recv_sys->buf
 
1668
                             + recv_previous_parsed_rec_offset - 100,
 
1669
                             ptr - recv_sys->buf + 200
 
1670
                             - recv_previous_parsed_rec_offset);
 
1671
                putc('\n', stderr);
 
1672
        }
 
1673
 
 
1674
        fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
 
1675
              "InnoDB: is possible that the log scan did not proceed\n"
 
1676
              "InnoDB: far enough in recovery! Please run CHECK TABLE\n"
 
1677
              "InnoDB: on your InnoDB tables to check that they are ok!\n"
 
1678
              "InnoDB: If mysqld crashes after this recovery, look at\n"
 
1679
              "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
 
1680
              "forcing-recovery.html\n"
 
1681
              "InnoDB: about forcing recovery.\n", stderr);
 
1682
 
 
1683
        fflush(stderr);
 
1684
}
 
1685
 
 
1686
/***********************************************************
 
1687
Parses log records from a buffer and stores them to a hash table to wait
 
1688
merging to file pages. */
 
1689
static
 
1690
ibool
 
1691
recv_parse_log_recs(
 
1692
/*================*/
 
1693
                                /* out: currently always returns FALSE */
 
1694
        ibool   store_to_hash)  /* in: TRUE if the records should be stored
 
1695
                                to the hash table; this is set to FALSE if just
 
1696
                                debug checking is needed */
 
1697
{
 
1698
        byte*   ptr;
 
1699
        byte*   end_ptr;
 
1700
        ulint   single_rec;
 
1701
        ulint   len;
 
1702
        ulint   total_len;
 
1703
        dulint  new_recovered_lsn;
 
1704
        dulint  old_lsn;
 
1705
        byte    type;
 
1706
        ulint   space;
 
1707
        ulint   page_no;
 
1708
        byte*   body;
 
1709
        ulint   n_recs;
 
1710
 
 
1711
        ut_ad(mutex_own(&(log_sys->mutex)));
 
1712
        ut_ad(!ut_dulint_is_zero(recv_sys->parse_start_lsn));
 
1713
loop:
 
1714
        ptr = recv_sys->buf + recv_sys->recovered_offset;
 
1715
 
 
1716
        end_ptr = recv_sys->buf + recv_sys->len;
 
1717
 
 
1718
        if (ptr == end_ptr) {
 
1719
 
 
1720
                return(FALSE);
 
1721
        }
 
1722
 
 
1723
        single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
 
1724
 
 
1725
        if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
 
1726
                /* The mtr only modified a single page, or this is a file op */
 
1727
 
 
1728
                old_lsn = recv_sys->recovered_lsn;
 
1729
 
 
1730
                /* Try to parse a log record, fetching its type, space id,
 
1731
                page no, and a pointer to the body of the log record */
 
1732
 
 
1733
                len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 
1734
                                         &page_no, &body);
 
1735
 
 
1736
                if (len == 0 || recv_sys->found_corrupt_log) {
 
1737
                        if (recv_sys->found_corrupt_log) {
 
1738
 
 
1739
                                recv_report_corrupt_log(ptr,
 
1740
                                                        type, space, page_no);
 
1741
                        }
 
1742
 
 
1743
                        return(FALSE);
 
1744
                }
 
1745
 
 
1746
                new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
 
1747
 
 
1748
                if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
 
1749
                    > 0) {
 
1750
                        /* The log record filled a log block, and we require
 
1751
                        that also the next log block should have been scanned
 
1752
                        in */
 
1753
 
 
1754
                        return(FALSE);
 
1755
                }
 
1756
 
 
1757
                recv_previous_parsed_rec_type = (ulint)type;
 
1758
                recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
 
1759
                recv_previous_parsed_rec_is_multi = 0;
 
1760
 
 
1761
                recv_sys->recovered_offset += len;
 
1762
                recv_sys->recovered_lsn = new_recovered_lsn;
 
1763
 
 
1764
#ifdef UNIV_DEBUG
 
1765
                if (log_debug_writes) {
 
1766
                        fprintf(stderr,
 
1767
                                "InnoDB: Parsed a single log rec"
 
1768
                                " type %lu len %lu space %lu page no %lu\n",
 
1769
                                (ulong) type, (ulong) len, (ulong) space,
 
1770
                                (ulong) page_no);
 
1771
                }
 
1772
#endif /* UNIV_DEBUG */
 
1773
 
 
1774
                if (type == MLOG_DUMMY_RECORD) {
 
1775
                        /* Do nothing */
 
1776
 
 
1777
                } else if (store_to_hash && (type == MLOG_FILE_CREATE
 
1778
                                             || type == MLOG_FILE_RENAME
 
1779
                                             || type == MLOG_FILE_DELETE)) {
 
1780
#ifdef UNIV_HOTBACKUP
 
1781
                        if (recv_replay_file_ops) {
 
1782
 
 
1783
                                /* In ibbackup --apply-log, replay an .ibd file
 
1784
                                operation, if possible; note that
 
1785
                                fil_path_to_mysql_datadir is set in ibbackup to
 
1786
                                point to the datadir we should use there */
 
1787
 
 
1788
                                if (NULL == fil_op_log_parse_or_replay(
 
1789
                                            body, end_ptr, type, TRUE,
 
1790
                                            space)) {
 
1791
                                        fprintf(stderr,
 
1792
                                                "InnoDB: Error: file op"
 
1793
                                                " log record of type %lu"
 
1794
                                                " space %lu not complete in\n"
 
1795
                                                "InnoDB: the replay phase."
 
1796
                                                " Path %s\n",
 
1797
                                                (ulint)type, space,
 
1798
                                                (char*)(body + 2));
 
1799
 
 
1800
                                        ut_a(0);
 
1801
                                }
 
1802
                        }
 
1803
#endif
 
1804
                        /* In normal mysqld crash recovery we do not try to
 
1805
                        replay file operations */
 
1806
                } else if (store_to_hash) {
 
1807
                        recv_add_to_hash_table(type, space, page_no, body,
 
1808
                                               ptr + len, old_lsn,
 
1809
                                               recv_sys->recovered_lsn);
 
1810
                } else {
 
1811
#ifdef UNIV_LOG_DEBUG
 
1812
                        recv_check_incomplete_log_recs(ptr, len);
 
1813
#endif/* UNIV_LOG_DEBUG */
 
1814
                }
 
1815
        } else {
 
1816
                /* Check that all the records associated with the single mtr
 
1817
                are included within the buffer */
 
1818
 
 
1819
                total_len = 0;
 
1820
                n_recs = 0;
 
1821
 
 
1822
                for (;;) {
 
1823
                        len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 
1824
                                                 &page_no, &body);
 
1825
                        if (len == 0 || recv_sys->found_corrupt_log) {
 
1826
 
 
1827
                                if (recv_sys->found_corrupt_log) {
 
1828
 
 
1829
                                        recv_report_corrupt_log(
 
1830
                                                ptr, type, space, page_no);
 
1831
                                }
 
1832
 
 
1833
                                return(FALSE);
 
1834
                        }
 
1835
 
 
1836
                        recv_previous_parsed_rec_type = (ulint)type;
 
1837
                        recv_previous_parsed_rec_offset
 
1838
                                = recv_sys->recovered_offset + total_len;
 
1839
                        recv_previous_parsed_rec_is_multi = 1;
 
1840
 
 
1841
                        if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
 
1842
#ifdef UNIV_LOG_DEBUG
 
1843
                                recv_check_incomplete_log_recs(ptr, len);
 
1844
#endif /* UNIV_LOG_DEBUG */
 
1845
                        }
 
1846
 
 
1847
#ifdef UNIV_DEBUG
 
1848
                        if (log_debug_writes) {
 
1849
                                fprintf(stderr,
 
1850
                                        "InnoDB: Parsed a multi log rec"
 
1851
                                        " type %lu len %lu"
 
1852
                                        " space %lu page no %lu\n",
 
1853
                                        (ulong) type, (ulong) len,
 
1854
                                        (ulong) space, (ulong) page_no);
 
1855
                        }
 
1856
#endif /* UNIV_DEBUG */
 
1857
 
 
1858
                        total_len += len;
 
1859
                        n_recs++;
 
1860
 
 
1861
                        ptr += len;
 
1862
 
 
1863
                        if (type == MLOG_MULTI_REC_END) {
 
1864
 
 
1865
                                /* Found the end mark for the records */
 
1866
 
 
1867
                                break;
 
1868
                        }
 
1869
                }
 
1870
 
 
1871
                new_recovered_lsn = recv_calc_lsn_on_data_add(
 
1872
                        recv_sys->recovered_lsn, total_len);
 
1873
 
 
1874
                if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
 
1875
                    > 0) {
 
1876
                        /* The log record filled a log block, and we require
 
1877
                        that also the next log block should have been scanned
 
1878
                        in */
 
1879
 
 
1880
                        return(FALSE);
 
1881
                }
 
1882
 
 
1883
                /* Add all the records to the hash table */
 
1884
 
 
1885
                ptr = recv_sys->buf + recv_sys->recovered_offset;
 
1886
 
 
1887
                for (;;) {
 
1888
                        old_lsn = recv_sys->recovered_lsn;
 
1889
                        len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 
1890
                                                 &page_no, &body);
 
1891
                        if (recv_sys->found_corrupt_log) {
 
1892
 
 
1893
                                recv_report_corrupt_log(ptr,
 
1894
                                                        type, space, page_no);
 
1895
                        }
 
1896
 
 
1897
                        ut_a(len != 0);
 
1898
                        ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
 
1899
 
 
1900
                        recv_sys->recovered_offset += len;
 
1901
                        recv_sys->recovered_lsn
 
1902
                                = recv_calc_lsn_on_data_add(old_lsn, len);
 
1903
                        if (type == MLOG_MULTI_REC_END) {
 
1904
 
 
1905
                                /* Found the end mark for the records */
 
1906
 
 
1907
                                break;
 
1908
                        }
 
1909
 
 
1910
                        if (store_to_hash) {
 
1911
                                recv_add_to_hash_table(type, space, page_no,
 
1912
                                                       body, ptr + len,
 
1913
                                                       old_lsn,
 
1914
                                                       new_recovered_lsn);
 
1915
                        }
 
1916
 
 
1917
                        ptr += len;
 
1918
                }
 
1919
        }
 
1920
 
 
1921
        goto loop;
 
1922
}
 
1923
 
 
1924
/***********************************************************
 
1925
Adds data from a new log block to the parsing buffer of recv_sys if
 
1926
recv_sys->parse_start_lsn is non-zero. */
 
1927
static
 
1928
ibool
 
1929
recv_sys_add_to_parsing_buf(
 
1930
/*========================*/
 
1931
                                /* out: TRUE if more data added */
 
1932
        byte*   log_block,      /* in: log block */
 
1933
        dulint  scanned_lsn)    /* in: lsn of how far we were able to find
 
1934
                                data in this log block */
 
1935
{
 
1936
        ulint   more_len;
 
1937
        ulint   data_len;
 
1938
        ulint   start_offset;
 
1939
        ulint   end_offset;
 
1940
 
 
1941
        ut_ad(ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) >= 0);
 
1942
 
 
1943
        if (ut_dulint_is_zero(recv_sys->parse_start_lsn)) {
 
1944
                /* Cannot start parsing yet because no start point for
 
1945
                it found */
 
1946
 
 
1947
                return(FALSE);
 
1948
        }
 
1949
 
 
1950
        data_len = log_block_get_data_len(log_block);
 
1951
 
 
1952
        if (ut_dulint_cmp(recv_sys->parse_start_lsn, scanned_lsn) >= 0) {
 
1953
 
 
1954
                return(FALSE);
 
1955
 
 
1956
        } else if (ut_dulint_cmp(recv_sys->scanned_lsn, scanned_lsn) >= 0) {
 
1957
 
 
1958
                return(FALSE);
 
1959
 
 
1960
        } else if (ut_dulint_cmp(recv_sys->parse_start_lsn,
 
1961
                                 recv_sys->scanned_lsn) > 0) {
 
1962
                more_len = ut_dulint_minus(scanned_lsn,
 
1963
                                           recv_sys->parse_start_lsn);
 
1964
        } else {
 
1965
                more_len = ut_dulint_minus(scanned_lsn, recv_sys->scanned_lsn);
 
1966
        }
 
1967
 
 
1968
        if (more_len == 0) {
 
1969
 
 
1970
                return(FALSE);
 
1971
        }
 
1972
 
 
1973
        ut_ad(data_len >= more_len);
 
1974
 
 
1975
        start_offset = data_len - more_len;
 
1976
 
 
1977
        if (start_offset < LOG_BLOCK_HDR_SIZE) {
 
1978
                start_offset = LOG_BLOCK_HDR_SIZE;
 
1979
        }
 
1980
 
 
1981
        end_offset = data_len;
 
1982
 
 
1983
        if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
 
1984
                end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
 
1985
        }
 
1986
 
 
1987
        ut_ad(start_offset <= end_offset);
 
1988
 
 
1989
        if (start_offset < end_offset) {
 
1990
                ut_memcpy(recv_sys->buf + recv_sys->len,
 
1991
                          log_block + start_offset, end_offset - start_offset);
 
1992
 
 
1993
                recv_sys->len += end_offset - start_offset;
 
1994
 
 
1995
                ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
 
1996
        }
 
1997
 
 
1998
        return(TRUE);
 
1999
}
 
2000
 
 
2001
/***********************************************************
 
2002
Moves the parsing buffer data left to the buffer start. */
 
2003
static
 
2004
void
 
2005
recv_sys_justify_left_parsing_buf(void)
 
2006
/*===================================*/
 
2007
{
 
2008
        ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
 
2009
                   recv_sys->len - recv_sys->recovered_offset);
 
2010
 
 
2011
        recv_sys->len -= recv_sys->recovered_offset;
 
2012
 
 
2013
        recv_sys->recovered_offset = 0;
 
2014
}
 
2015
 
 
2016
/***********************************************************
 
2017
Scans log from a buffer and stores new log data to the parsing buffer. Parses
 
2018
and hashes the log records if new data found. */
 
2019
 
 
2020
ibool
 
2021
recv_scan_log_recs(
 
2022
/*===============*/
 
2023
                                /* out: TRUE if limit_lsn has been reached, or
 
2024
                                not able to scan any more in this log group */
 
2025
        ibool   apply_automatically,/* in: TRUE if we want this function to
 
2026
                                apply log records automatically when the
 
2027
                                hash table becomes full; in the hot backup tool
 
2028
                                the tool does the applying, not this
 
2029
                                function */
 
2030
        ulint   available_memory,/* in: we let the hash table of recs to grow
 
2031
                                to this size, at the maximum */
 
2032
        ibool   store_to_hash,  /* in: TRUE if the records should be stored
 
2033
                                to the hash table; this is set to FALSE if just
 
2034
                                debug checking is needed */
 
2035
        byte*   buf,            /* in: buffer containing a log segment or
 
2036
                                garbage */
 
2037
        ulint   len,            /* in: buffer length */
 
2038
        dulint  start_lsn,      /* in: buffer start lsn */
 
2039
        dulint* contiguous_lsn, /* in/out: it is known that all log groups
 
2040
                                contain contiguous log data up to this lsn */
 
2041
        dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */
 
2042
{
 
2043
        byte*   log_block;
 
2044
        ulint   no;
 
2045
        dulint  scanned_lsn;
 
2046
        ibool   finished;
 
2047
        ulint   data_len;
 
2048
        ibool   more_data;
 
2049
 
 
2050
        ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
 
2051
        ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
 
2052
        ut_ad(len > 0);
 
2053
        ut_a(apply_automatically <= TRUE);
 
2054
        ut_a(store_to_hash <= TRUE);
 
2055
 
 
2056
        finished = FALSE;
 
2057
 
 
2058
        log_block = buf;
 
2059
        scanned_lsn = start_lsn;
 
2060
        more_data = FALSE;
 
2061
 
 
2062
        while (log_block < buf + len && !finished) {
 
2063
 
 
2064
                no = log_block_get_hdr_no(log_block);
 
2065
                /*
 
2066
                fprintf(stderr, "Log block header no %lu\n", no);
 
2067
 
 
2068
                fprintf(stderr, "Scanned lsn no %lu\n",
 
2069
                log_block_convert_lsn_to_no(scanned_lsn));
 
2070
                */
 
2071
                if (no != log_block_convert_lsn_to_no(scanned_lsn)
 
2072
                    || !log_block_checksum_is_ok_or_old_format(log_block)) {
 
2073
 
 
2074
                        if (no == log_block_convert_lsn_to_no(scanned_lsn)
 
2075
                            && !log_block_checksum_is_ok_or_old_format(
 
2076
                                    log_block)) {
 
2077
                                fprintf(stderr,
 
2078
                                        "InnoDB: Log block no %lu at"
 
2079
                                        " lsn %lu %lu has\n"
 
2080
                                        "InnoDB: ok header, but checksum field"
 
2081
                                        " contains %lu, should be %lu\n",
 
2082
                                        (ulong) no,
 
2083
                                        (ulong) ut_dulint_get_high(
 
2084
                                                scanned_lsn),
 
2085
                                        (ulong) ut_dulint_get_low(scanned_lsn),
 
2086
                                        (ulong) log_block_get_checksum(
 
2087
                                                log_block),
 
2088
                                        (ulong) log_block_calc_checksum(
 
2089
                                                log_block));
 
2090
                        }
 
2091
 
 
2092
                        /* Garbage or an incompletely written log block */
 
2093
 
 
2094
                        finished = TRUE;
 
2095
 
 
2096
                        break;
 
2097
                }
 
2098
 
 
2099
                if (log_block_get_flush_bit(log_block)) {
 
2100
                        /* This block was a start of a log flush operation:
 
2101
                        we know that the previous flush operation must have
 
2102
                        been completed for all log groups before this block
 
2103
                        can have been flushed to any of the groups. Therefore,
 
2104
                        we know that log data is contiguous up to scanned_lsn
 
2105
                        in all non-corrupt log groups. */
 
2106
 
 
2107
                        if (ut_dulint_cmp(scanned_lsn, *contiguous_lsn) > 0) {
 
2108
                                *contiguous_lsn = scanned_lsn;
 
2109
                        }
 
2110
                }
 
2111
 
 
2112
                data_len = log_block_get_data_len(log_block);
 
2113
 
 
2114
                if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
 
2115
                    && (ut_dulint_cmp(ut_dulint_add(scanned_lsn, data_len),
 
2116
                                      recv_sys->scanned_lsn) > 0)
 
2117
                    && (recv_sys->scanned_checkpoint_no > 0)
 
2118
                    && (log_block_get_checkpoint_no(log_block)
 
2119
                        < recv_sys->scanned_checkpoint_no)
 
2120
                    && (recv_sys->scanned_checkpoint_no
 
2121
                        - log_block_get_checkpoint_no(log_block)
 
2122
                        > 0x80000000UL)) {
 
2123
 
 
2124
                        /* Garbage from a log buffer flush which was made
 
2125
                        before the most recent database recovery */
 
2126
 
 
2127
                        finished = TRUE;
 
2128
#ifdef UNIV_LOG_DEBUG
 
2129
                        /* This is not really an error, but currently
 
2130
                        we stop here in the debug version: */
 
2131
 
 
2132
                        ut_error;
 
2133
#endif
 
2134
                        break;
 
2135
                }
 
2136
 
 
2137
                if (ut_dulint_is_zero(recv_sys->parse_start_lsn)
 
2138
                    && (log_block_get_first_rec_group(log_block) > 0)) {
 
2139
 
 
2140
                        /* We found a point from which to start the parsing
 
2141
                        of log records */
 
2142
 
 
2143
                        recv_sys->parse_start_lsn
 
2144
                                = ut_dulint_add(scanned_lsn,
 
2145
                                                log_block_get_first_rec_group(
 
2146
                                                        log_block));
 
2147
                        recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
 
2148
                        recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
 
2149
                }
 
2150
 
 
2151
                scanned_lsn = ut_dulint_add(scanned_lsn, data_len);
 
2152
 
 
2153
                if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) {
 
2154
 
 
2155
                        /* We have found more entries. If this scan is
 
2156
                        of startup type, we must initiate crash recovery
 
2157
                        environment before parsing these log records. */
 
2158
 
 
2159
                        if (recv_log_scan_is_startup_type
 
2160
                            && !recv_needed_recovery) {
 
2161
 
 
2162
                                fprintf(stderr,
 
2163
                                        "InnoDB: Log scan progressed"
 
2164
                                        " past the checkpoint lsn %lu %lu\n",
 
2165
                                        (ulong) ut_dulint_get_high(
 
2166
                                                recv_sys->scanned_lsn),
 
2167
                                        (ulong) ut_dulint_get_low(
 
2168
                                                recv_sys->scanned_lsn));
 
2169
                                recv_init_crash_recovery();
 
2170
                        }
 
2171
 
 
2172
                        /* We were able to find more log data: add it to the
 
2173
                        parsing buffer if parse_start_lsn is already
 
2174
                        non-zero */
 
2175
 
 
2176
                        if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
 
2177
                            >= RECV_PARSING_BUF_SIZE) {
 
2178
                                fprintf(stderr,
 
2179
                                        "InnoDB: Error: log parsing"
 
2180
                                        " buffer overflow."
 
2181
                                        " Recovery may have failed!\n");
 
2182
 
 
2183
                                recv_sys->found_corrupt_log = TRUE;
 
2184
 
 
2185
                        } else if (!recv_sys->found_corrupt_log) {
 
2186
                                more_data = recv_sys_add_to_parsing_buf(
 
2187
                                        log_block, scanned_lsn);
 
2188
                        }
 
2189
 
 
2190
                        recv_sys->scanned_lsn = scanned_lsn;
 
2191
                        recv_sys->scanned_checkpoint_no
 
2192
                                = log_block_get_checkpoint_no(log_block);
 
2193
                }
 
2194
 
 
2195
                if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
 
2196
                        /* Log data for this group ends here */
 
2197
 
 
2198
                        finished = TRUE;
 
2199
                } else {
 
2200
                        log_block += OS_FILE_LOG_BLOCK_SIZE;
 
2201
                }
 
2202
        }
 
2203
 
 
2204
        *group_scanned_lsn = scanned_lsn;
 
2205
 
 
2206
        if (recv_needed_recovery
 
2207
            || (recv_is_from_backup && !recv_is_making_a_backup)) {
 
2208
                recv_scan_print_counter++;
 
2209
 
 
2210
                if (finished || (recv_scan_print_counter % 80 == 0)) {
 
2211
 
 
2212
                        fprintf(stderr,
 
2213
                                "InnoDB: Doing recovery: scanned up to"
 
2214
                                " log sequence number %lu %lu\n",
 
2215
                                (ulong) ut_dulint_get_high(*group_scanned_lsn),
 
2216
                                (ulong) ut_dulint_get_low(*group_scanned_lsn));
 
2217
                }
 
2218
        }
 
2219
 
 
2220
        if (more_data && !recv_sys->found_corrupt_log) {
 
2221
                /* Try to parse more log records */
 
2222
 
 
2223
                recv_parse_log_recs(store_to_hash);
 
2224
 
 
2225
                if (store_to_hash && mem_heap_get_size(recv_sys->heap)
 
2226
                    > available_memory
 
2227
                    && apply_automatically) {
 
2228
 
 
2229
                        /* Hash table of log records has grown too big:
 
2230
                        empty it; FALSE means no ibuf operations
 
2231
                        allowed, as we cannot add new records to the
 
2232
                        log yet: they would be produced by ibuf
 
2233
                        operations */
 
2234
 
 
2235
                        recv_apply_hashed_log_recs(FALSE);
 
2236
                }
 
2237
 
 
2238
                if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
 
2239
                        /* Move parsing buffer data to the buffer start */
 
2240
 
 
2241
                        recv_sys_justify_left_parsing_buf();
 
2242
                }
 
2243
        }
 
2244
 
 
2245
        return(finished);
 
2246
}
 
2247
 
 
2248
/***********************************************************
 
2249
Scans log from a buffer and stores new log data to the parsing buffer. Parses
 
2250
and hashes the log records if new data found. */
 
2251
static
 
2252
void
 
2253
recv_group_scan_log_recs(
 
2254
/*=====================*/
 
2255
        log_group_t* group,     /* in: log group */
 
2256
        dulint* contiguous_lsn, /* in/out: it is known that all log groups
 
2257
                                contain contiguous log data up to this lsn */
 
2258
        dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */
 
2259
{
 
2260
        ibool   finished;
 
2261
        dulint  start_lsn;
 
2262
        dulint  end_lsn;
 
2263
 
 
2264
        finished = FALSE;
 
2265
 
 
2266
        start_lsn = *contiguous_lsn;
 
2267
 
 
2268
        while (!finished) {
 
2269
                end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
 
2270
 
 
2271
                log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
 
2272
                                       group, start_lsn, end_lsn);
 
2273
 
 
2274
                finished = recv_scan_log_recs(
 
2275
                        TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
 
2276
                        * UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE,
 
2277
                        start_lsn, contiguous_lsn, group_scanned_lsn);
 
2278
                start_lsn = end_lsn;
 
2279
        }
 
2280
 
 
2281
#ifdef UNIV_DEBUG
 
2282
        if (log_debug_writes) {
 
2283
                fprintf(stderr,
 
2284
                        "InnoDB: Scanned group %lu up to"
 
2285
                        " log sequence number %lu %lu\n",
 
2286
                        (ulong) group->id,
 
2287
                        (ulong) ut_dulint_get_high(*group_scanned_lsn),
 
2288
                        (ulong) ut_dulint_get_low(*group_scanned_lsn));
 
2289
        }
 
2290
#endif /* UNIV_DEBUG */
 
2291
}
 
2292
 
 
2293
/***********************************************************
 
2294
Initialize crash recovery environment. Can be called iff
 
2295
recv_needed_recovery == FALSE. */
 
2296
static
 
2297
void
 
2298
recv_init_crash_recovery(void)
 
2299
/*==========================*/
 
2300
{
 
2301
        ut_a(!recv_needed_recovery);
 
2302
 
 
2303
        recv_needed_recovery = TRUE;
 
2304
 
 
2305
        ut_print_timestamp(stderr);
 
2306
 
 
2307
        fprintf(stderr,
 
2308
                "  InnoDB: Database was not"
 
2309
                " shut down normally!\n"
 
2310
                "InnoDB: Starting crash recovery.\n");
 
2311
 
 
2312
        fprintf(stderr,
 
2313
                "InnoDB: Reading tablespace information"
 
2314
                " from the .ibd files...\n");
 
2315
 
 
2316
        fil_load_single_table_tablespaces();
 
2317
 
 
2318
        /* If we are using the doublewrite method, we will
 
2319
        check if there are half-written pages in data files,
 
2320
        and restore them from the doublewrite buffer if
 
2321
        possible */
 
2322
 
 
2323
        if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
 
2324
 
 
2325
                fprintf(stderr,
 
2326
                        "InnoDB: Restoring possible"
 
2327
                        " half-written data pages from"
 
2328
                        " the doublewrite\n"
 
2329
                        "InnoDB: buffer...\n");
 
2330
                trx_sys_doublewrite_init_or_restore_pages(TRUE);
 
2331
        }
 
2332
}
 
2333
 
 
2334
/************************************************************
 
2335
Recovers from a checkpoint. When this function returns, the database is able
 
2336
to start processing of new user transactions, but the function
 
2337
recv_recovery_from_checkpoint_finish should be called later to complete
 
2338
the recovery and free the resources used in it. */
 
2339
 
 
2340
ulint
 
2341
recv_recovery_from_checkpoint_start(
 
2342
/*================================*/
 
2343
                                /* out: error code or DB_SUCCESS */
 
2344
        ulint   type,           /* in: LOG_CHECKPOINT or LOG_ARCHIVE */
 
2345
        dulint  limit_lsn,      /* in: recover up to this lsn if possible */
 
2346
        dulint  min_flushed_lsn,/* in: min flushed lsn from data files */
 
2347
        dulint  max_flushed_lsn)/* in: max flushed lsn from data files */
 
2348
{
 
2349
        log_group_t*    group;
 
2350
        log_group_t*    max_cp_group;
 
2351
        log_group_t*    up_to_date_group;
 
2352
        ulint           max_cp_field;
 
2353
        dulint          checkpoint_lsn;
 
2354
        dulint          checkpoint_no;
 
2355
        dulint          old_scanned_lsn;
 
2356
        dulint          group_scanned_lsn;
 
2357
        dulint          contiguous_lsn;
 
2358
        dulint          archived_lsn;
 
2359
        ulint           capacity;
 
2360
        byte*           buf;
 
2361
        byte            log_hdr_buf[LOG_FILE_HDR_SIZE];
 
2362
        ulint           err;
 
2363
 
 
2364
        ut_ad((type != LOG_CHECKPOINT)
 
2365
              || (ut_dulint_cmp(limit_lsn, ut_dulint_max) == 0));
 
2366
 
 
2367
        if (type == LOG_CHECKPOINT) {
 
2368
                recv_sys_create();
 
2369
                recv_sys_init(FALSE, buf_pool_get_curr_size());
 
2370
        }
 
2371
 
 
2372
        if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
 
2373
                fprintf(stderr,
 
2374
                        "InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n");
 
2375
                fprintf(stderr,
 
2376
                        "InnoDB: Skipping log redo\n");
 
2377
 
 
2378
                return(DB_SUCCESS);
 
2379
        }
 
2380
 
 
2381
        recv_recovery_on = TRUE;
 
2382
 
 
2383
        recv_sys->limit_lsn = limit_lsn;
 
2384
 
 
2385
        mutex_enter(&(log_sys->mutex));
 
2386
 
 
2387
        /* Look for the latest checkpoint from any of the log groups */
 
2388
 
 
2389
        err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
 
2390
 
 
2391
        if (err != DB_SUCCESS) {
 
2392
 
 
2393
                mutex_exit(&(log_sys->mutex));
 
2394
 
 
2395
                return(err);
 
2396
        }
 
2397
 
 
2398
        log_group_read_checkpoint_info(max_cp_group, max_cp_field);
 
2399
 
 
2400
        buf = log_sys->checkpoint_buf;
 
2401
 
 
2402
        checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
 
2403
        checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
 
2404
        archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
 
2405
 
 
2406
        /* Read the first log file header to print a note if this is
 
2407
        a recovery from a restored InnoDB Hot Backup */
 
2408
 
 
2409
        fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id,
 
2410
               0, 0, LOG_FILE_HDR_SIZE,
 
2411
               log_hdr_buf, max_cp_group);
 
2412
 
 
2413
        if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 
2414
                           (byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
 
2415
                /* This log file was created by ibbackup --restore: print
 
2416
                a note to the user about it */
 
2417
 
 
2418
                fprintf(stderr,
 
2419
                        "InnoDB: The log file was created by"
 
2420
                        " ibbackup --apply-log at\n"
 
2421
                        "InnoDB: %s\n",
 
2422
                        log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
 
2423
                fprintf(stderr,
 
2424
                        "InnoDB: NOTE: the following crash recovery"
 
2425
                        " is part of a normal restore.\n");
 
2426
 
 
2427
                /* Wipe over the label now */
 
2428
 
 
2429
                memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 
2430
                       ' ', 4);
 
2431
                /* Write to the log file to wipe over the label */
 
2432
                fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
 
2433
                       max_cp_group->space_id,
 
2434
                       0, 0, OS_FILE_LOG_BLOCK_SIZE,
 
2435
                       log_hdr_buf, max_cp_group);
 
2436
        }
 
2437
 
 
2438
#ifdef UNIV_LOG_ARCHIVE
 
2439
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
2440
 
 
2441
        while (group) {
 
2442
                log_checkpoint_get_nth_group_info(buf, group->id,
 
2443
                                                  &(group->archived_file_no),
 
2444
                                                  &(group->archived_offset));
 
2445
 
 
2446
                group = UT_LIST_GET_NEXT(log_groups, group);
 
2447
        }
 
2448
#endif /* UNIV_LOG_ARCHIVE */
 
2449
 
 
2450
        if (type == LOG_CHECKPOINT) {
 
2451
                /* Start reading the log groups from the checkpoint lsn up. The
 
2452
                variable contiguous_lsn contains an lsn up to which the log is
 
2453
                known to be contiguously written to all log groups. */
 
2454
 
 
2455
                recv_sys->parse_start_lsn = checkpoint_lsn;
 
2456
                recv_sys->scanned_lsn = checkpoint_lsn;
 
2457
                recv_sys->scanned_checkpoint_no = 0;
 
2458
                recv_sys->recovered_lsn = checkpoint_lsn;
 
2459
 
 
2460
                srv_start_lsn = checkpoint_lsn;
 
2461
        }
 
2462
 
 
2463
        contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn,
 
2464
                                              OS_FILE_LOG_BLOCK_SIZE);
 
2465
        if (type == LOG_ARCHIVE) {
 
2466
                /* Try to recover the remaining part from logs: first from
 
2467
                the logs of the archived group */
 
2468
 
 
2469
                group = recv_sys->archive_group;
 
2470
                capacity = log_group_get_capacity(group);
 
2471
 
 
2472
                if ((ut_dulint_cmp(recv_sys->scanned_lsn, ut_dulint_add(
 
2473
                                           checkpoint_lsn, capacity)) > 0)
 
2474
                    || (ut_dulint_cmp(checkpoint_lsn, ut_dulint_add(
 
2475
                                              recv_sys->scanned_lsn, capacity))
 
2476
                        > 0)) {
 
2477
 
 
2478
                        mutex_exit(&(log_sys->mutex));
 
2479
 
 
2480
                        /* The group does not contain enough log: probably
 
2481
                        an archived log file was missing or corrupt */
 
2482
 
 
2483
                        return(DB_ERROR);
 
2484
                }
 
2485
 
 
2486
                recv_group_scan_log_recs(group, &contiguous_lsn,
 
2487
                                         &group_scanned_lsn);
 
2488
                if (ut_dulint_cmp(recv_sys->scanned_lsn, checkpoint_lsn) < 0) {
 
2489
 
 
2490
                        mutex_exit(&(log_sys->mutex));
 
2491
 
 
2492
                        /* The group did not contain enough log: an archived
 
2493
                        log file was missing or invalid, or the log group
 
2494
                        was corrupt */
 
2495
 
 
2496
                        return(DB_ERROR);
 
2497
                }
 
2498
 
 
2499
                group->scanned_lsn = group_scanned_lsn;
 
2500
                up_to_date_group = group;
 
2501
        } else {
 
2502
                up_to_date_group = max_cp_group;
 
2503
        }
 
2504
 
 
2505
        ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
 
2506
 
 
2507
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
2508
 
 
2509
        if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
 
2510
                group = UT_LIST_GET_NEXT(log_groups, group);
 
2511
        }
 
2512
 
 
2513
        /* Set the flag to publish that we are doing startup scan. */
 
2514
        recv_log_scan_is_startup_type = (type == LOG_CHECKPOINT);
 
2515
        while (group) {
 
2516
                old_scanned_lsn = recv_sys->scanned_lsn;
 
2517
 
 
2518
                recv_group_scan_log_recs(group, &contiguous_lsn,
 
2519
                                         &group_scanned_lsn);
 
2520
                group->scanned_lsn = group_scanned_lsn;
 
2521
 
 
2522
                if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) {
 
2523
                        /* We found a more up-to-date group */
 
2524
 
 
2525
                        up_to_date_group = group;
 
2526
                }
 
2527
 
 
2528
                if ((type == LOG_ARCHIVE)
 
2529
                    && (group == recv_sys->archive_group)) {
 
2530
                        group = UT_LIST_GET_NEXT(log_groups, group);
 
2531
                }
 
2532
 
 
2533
                group = UT_LIST_GET_NEXT(log_groups, group);
 
2534
        }
 
2535
 
 
2536
        /* Done with startup scan. Clear the flag. */
 
2537
        recv_log_scan_is_startup_type = FALSE;
 
2538
        if (type == LOG_CHECKPOINT) {
 
2539
                /* NOTE: we always do a 'recovery' at startup, but only if
 
2540
                there is something wrong we will print a message to the
 
2541
                user about recovery: */
 
2542
 
 
2543
                if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
 
2544
                    || ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
 
2545
 
 
2546
                        if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn)
 
2547
                            < 0) {
 
2548
                                fprintf(stderr,
 
2549
                                        "InnoDB: #########################"
 
2550
                                        "#################################\n"
 
2551
                                        "InnoDB:                          "
 
2552
                                        "WARNING!\n"
 
2553
                                        "InnoDB: The log sequence number"
 
2554
                                        " in ibdata files is higher\n"
 
2555
                                        "InnoDB: than the log sequence number"
 
2556
                                        " in the ib_logfiles! Are you sure\n"
 
2557
                                        "InnoDB: you are using the right"
 
2558
                                        " ib_logfiles to start up"
 
2559
                                        " the database?\n"
 
2560
                                        "InnoDB: Log sequence number in"
 
2561
                                        " ib_logfiles is %lu %lu, log\n"
 
2562
                                        "InnoDB: sequence numbers stamped"
 
2563
                                        " to ibdata file headers are between\n"
 
2564
                                        "InnoDB: %lu %lu and %lu %lu.\n"
 
2565
                                        "InnoDB: #########################"
 
2566
                                        "#################################\n",
 
2567
                                        (ulong) ut_dulint_get_high(
 
2568
                                                checkpoint_lsn),
 
2569
                                        (ulong) ut_dulint_get_low(
 
2570
                                                checkpoint_lsn),
 
2571
                                        (ulong) ut_dulint_get_high(
 
2572
                                                min_flushed_lsn),
 
2573
                                        (ulong) ut_dulint_get_low(
 
2574
                                                min_flushed_lsn),
 
2575
                                        (ulong) ut_dulint_get_high(
 
2576
                                                max_flushed_lsn),
 
2577
                                        (ulong) ut_dulint_get_low(
 
2578
                                                max_flushed_lsn));
 
2579
 
 
2580
 
 
2581
                        }
 
2582
 
 
2583
                        if (!recv_needed_recovery) {
 
2584
                                fprintf(stderr,
 
2585
                                        "InnoDB: The log sequence number"
 
2586
                                        " in ibdata files does not match\n"
 
2587
                                        "InnoDB: the log sequence number"
 
2588
                                        " in the ib_logfiles!\n");
 
2589
                                recv_init_crash_recovery();
 
2590
                        }
 
2591
 
 
2592
                }
 
2593
                if (!recv_needed_recovery) {
 
2594
                        /* Init the doublewrite buffer memory structure */
 
2595
                        trx_sys_doublewrite_init_or_restore_pages(FALSE);
 
2596
                }
 
2597
        }
 
2598
 
 
2599
        /* We currently have only one log group */
 
2600
        if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) {
 
2601
                ut_print_timestamp(stderr);
 
2602
                fprintf(stderr,
 
2603
                        "  InnoDB: ERROR: We were only able to scan the log"
 
2604
                        " up to\n"
 
2605
                        "InnoDB: %lu %lu, but a checkpoint was at %lu %lu.\n"
 
2606
                        "InnoDB: It is possible that"
 
2607
                        " the database is now corrupt!\n",
 
2608
                        (ulong) ut_dulint_get_high(group_scanned_lsn),
 
2609
                        (ulong) ut_dulint_get_low(group_scanned_lsn),
 
2610
                        (ulong) ut_dulint_get_high(checkpoint_lsn),
 
2611
                        (ulong) ut_dulint_get_low(checkpoint_lsn));
 
2612
        }
 
2613
 
 
2614
        if (ut_dulint_cmp(group_scanned_lsn, recv_max_page_lsn) < 0) {
 
2615
                ut_print_timestamp(stderr);
 
2616
                fprintf(stderr,
 
2617
                        "  InnoDB: ERROR: We were only able to scan the log"
 
2618
                        " up to %lu %lu\n"
 
2619
                        "InnoDB: but a database page a had an lsn %lu %lu."
 
2620
                        " It is possible that the\n"
 
2621
                        "InnoDB: database is now corrupt!\n",
 
2622
                        (ulong) ut_dulint_get_high(group_scanned_lsn),
 
2623
                        (ulong) ut_dulint_get_low(group_scanned_lsn),
 
2624
                        (ulong) ut_dulint_get_high(recv_max_page_lsn),
 
2625
                        (ulong) ut_dulint_get_low(recv_max_page_lsn));
 
2626
        }
 
2627
 
 
2628
        if (ut_dulint_cmp(recv_sys->recovered_lsn, checkpoint_lsn) < 0) {
 
2629
 
 
2630
                mutex_exit(&(log_sys->mutex));
 
2631
 
 
2632
                if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) >= 0) {
 
2633
 
 
2634
                        return(DB_SUCCESS);
 
2635
                }
 
2636
 
 
2637
                ut_error;
 
2638
 
 
2639
                return(DB_ERROR);
 
2640
        }
 
2641
 
 
2642
        /* Synchronize the uncorrupted log groups to the most up-to-date log
 
2643
        group; we also copy checkpoint info to groups */
 
2644
 
 
2645
        log_sys->next_checkpoint_lsn = checkpoint_lsn;
 
2646
        log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
 
2647
 
 
2648
#ifdef UNIV_LOG_ARCHIVE
 
2649
        log_sys->archived_lsn = archived_lsn;
 
2650
#endif /* UNIV_LOG_ARCHIVE */
 
2651
 
 
2652
        recv_synchronize_groups(up_to_date_group);
 
2653
 
 
2654
        if (!recv_needed_recovery) {
 
2655
                ut_a(ut_dulint_cmp(checkpoint_lsn,
 
2656
                                   recv_sys->recovered_lsn) == 0);
 
2657
 
 
2658
        } else {
 
2659
                srv_start_lsn = recv_sys->recovered_lsn;
 
2660
        }
 
2661
 
 
2662
        log_sys->lsn = recv_sys->recovered_lsn;
 
2663
 
 
2664
        ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
 
2665
 
 
2666
        log_sys->buf_free = ut_dulint_get_low(log_sys->lsn)
 
2667
                % OS_FILE_LOG_BLOCK_SIZE;
 
2668
        log_sys->buf_next_to_write = log_sys->buf_free;
 
2669
        log_sys->written_to_some_lsn = log_sys->lsn;
 
2670
        log_sys->written_to_all_lsn = log_sys->lsn;
 
2671
 
 
2672
        log_sys->last_checkpoint_lsn = checkpoint_lsn;
 
2673
 
 
2674
        log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
 
2675
 
 
2676
#ifdef UNIV_LOG_ARCHIVE
 
2677
        if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
 
2678
 
 
2679
                log_sys->archiving_state = LOG_ARCH_OFF;
 
2680
        }
 
2681
#endif /* UNIV_LOG_ARCHIVE */
 
2682
 
 
2683
        mutex_enter(&(recv_sys->mutex));
 
2684
 
 
2685
        recv_sys->apply_log_recs = TRUE;
 
2686
 
 
2687
        mutex_exit(&(recv_sys->mutex));
 
2688
 
 
2689
        mutex_exit(&(log_sys->mutex));
 
2690
 
 
2691
        recv_lsn_checks_on = TRUE;
 
2692
 
 
2693
        /* The database is now ready to start almost normal processing of user
 
2694
        transactions: transaction rollbacks and the application of the log
 
2695
        records in the hash table can be run in background. */
 
2696
 
 
2697
        return(DB_SUCCESS);
 
2698
}
 
2699
 
 
2700
/************************************************************
 
2701
Completes recovery from a checkpoint. */
 
2702
 
 
2703
void
 
2704
recv_recovery_from_checkpoint_finish(void)
 
2705
/*======================================*/
 
2706
{
 
2707
        int             i;
 
2708
 
 
2709
        /* Apply the hashed log records to the respective file pages */
 
2710
 
 
2711
        if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
 
2712
 
 
2713
                recv_apply_hashed_log_recs(TRUE);
 
2714
        }
 
2715
 
 
2716
#ifdef UNIV_DEBUG
 
2717
        if (log_debug_writes) {
 
2718
                fprintf(stderr,
 
2719
                        "InnoDB: Log records applied to the database\n");
 
2720
        }
 
2721
#endif /* UNIV_DEBUG */
 
2722
 
 
2723
        if (recv_needed_recovery) {
 
2724
                trx_sys_print_mysql_master_log_pos();
 
2725
                trx_sys_print_mysql_binlog_offset();
 
2726
        }
 
2727
 
 
2728
        if (recv_sys->found_corrupt_log) {
 
2729
 
 
2730
                fprintf(stderr,
 
2731
                        "InnoDB: WARNING: the log file may have been"
 
2732
                        " corrupt and it\n"
 
2733
                        "InnoDB: is possible that the log scan or parsing"
 
2734
                        " did not proceed\n"
 
2735
                        "InnoDB: far enough in recovery. Please run"
 
2736
                        " CHECK TABLE\n"
 
2737
                        "InnoDB: on your InnoDB tables to check that"
 
2738
                        " they are ok!\n"
 
2739
                        "InnoDB: It may be safest to recover your"
 
2740
                        " InnoDB database from\n"
 
2741
                        "InnoDB: a backup!\n");
 
2742
        }
 
2743
 
 
2744
        /* Free the resources of the recovery system */
 
2745
 
 
2746
        recv_recovery_on = FALSE;
 
2747
 
 
2748
#ifndef UNIV_LOG_DEBUG
 
2749
        recv_sys_free();
 
2750
#endif
 
2751
 
 
2752
#ifdef UNIV_SYNC_DEBUG
 
2753
        /* Wait for a while so that created threads have time to suspend
 
2754
        themselves before we switch the latching order checks on */
 
2755
        os_thread_sleep(1000000);
 
2756
 
 
2757
        /* Switch latching order checks on in sync0sync.c */
 
2758
        sync_order_checks_on = TRUE;
 
2759
#endif
 
2760
        if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
 
2761
                /* Rollback the uncommitted transactions which have no user
 
2762
                session */
 
2763
 
 
2764
                os_thread_create(trx_rollback_or_clean_all_without_sess,
 
2765
                                 (void *)&i, NULL);
 
2766
        }
 
2767
}
 
2768
 
 
2769
/**********************************************************
 
2770
Resets the logs. The contents of log files will be lost! */
 
2771
 
 
2772
void
 
2773
recv_reset_logs(
 
2774
/*============*/
 
2775
        dulint  lsn,            /* in: reset to this lsn rounded up to
 
2776
                                be divisible by OS_FILE_LOG_BLOCK_SIZE,
 
2777
                                after which we add LOG_BLOCK_HDR_SIZE */
 
2778
#ifdef UNIV_LOG_ARCHIVE
 
2779
        ulint   arch_log_no,    /* in: next archived log file number */
 
2780
#endif /* UNIV_LOG_ARCHIVE */
 
2781
        ibool   new_logs_created)/* in: TRUE if resetting logs is done
 
2782
                                at the log creation; FALSE if it is done
 
2783
                                after archive recovery */
 
2784
{
 
2785
        log_group_t*    group;
 
2786
 
 
2787
        ut_ad(mutex_own(&(log_sys->mutex)));
 
2788
 
 
2789
        log_sys->lsn = ut_dulint_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
 
2790
 
 
2791
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
2792
 
 
2793
        while (group) {
 
2794
                group->lsn = log_sys->lsn;
 
2795
                group->lsn_offset = LOG_FILE_HDR_SIZE;
 
2796
#ifdef UNIV_LOG_ARCHIVE
 
2797
                group->archived_file_no = arch_log_no;
 
2798
                group->archived_offset = 0;
 
2799
#endif /* UNIV_LOG_ARCHIVE */
 
2800
 
 
2801
                if (!new_logs_created) {
 
2802
                        recv_truncate_group(group, group->lsn, group->lsn,
 
2803
                                            group->lsn, group->lsn);
 
2804
                }
 
2805
 
 
2806
                group = UT_LIST_GET_NEXT(log_groups, group);
 
2807
        }
 
2808
 
 
2809
        log_sys->buf_next_to_write = 0;
 
2810
        log_sys->written_to_some_lsn = log_sys->lsn;
 
2811
        log_sys->written_to_all_lsn = log_sys->lsn;
 
2812
 
 
2813
        log_sys->next_checkpoint_no = ut_dulint_zero;
 
2814
        log_sys->last_checkpoint_lsn = ut_dulint_zero;
 
2815
 
 
2816
#ifdef UNIV_LOG_ARCHIVE
 
2817
        log_sys->archived_lsn = log_sys->lsn;
 
2818
#endif /* UNIV_LOG_ARCHIVE */
 
2819
 
 
2820
        log_block_init(log_sys->buf, log_sys->lsn);
 
2821
        log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
 
2822
 
 
2823
        log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
 
2824
        log_sys->lsn = ut_dulint_add(log_sys->lsn, LOG_BLOCK_HDR_SIZE);
 
2825
 
 
2826
        mutex_exit(&(log_sys->mutex));
 
2827
 
 
2828
        /* Reset the checkpoint fields in logs */
 
2829
 
 
2830
        log_make_checkpoint_at(ut_dulint_max, TRUE);
 
2831
        log_make_checkpoint_at(ut_dulint_max, TRUE);
 
2832
 
 
2833
        mutex_enter(&(log_sys->mutex));
 
2834
}
 
2835
 
 
2836
#ifdef UNIV_HOTBACKUP
 
2837
/**********************************************************
 
2838
Creates new log files after a backup has been restored. */
 
2839
 
 
2840
void
 
2841
recv_reset_log_files_for_backup(
 
2842
/*============================*/
 
2843
        const char*     log_dir,        /* in: log file directory path */
 
2844
        ulint           n_log_files,    /* in: number of log files */
 
2845
        ulint           log_file_size,  /* in: log file size */
 
2846
        dulint          lsn)            /* in: new start lsn, must be
 
2847
                                        divisible by OS_FILE_LOG_BLOCK_SIZE */
 
2848
{
 
2849
        os_file_t       log_file;
 
2850
        ibool           success;
 
2851
        byte*           buf;
 
2852
        ulint           i;
 
2853
        ulint           log_dir_len;
 
2854
        char            name[5000];
 
2855
        static const char ib_logfile_basename[] = "ib_logfile";
 
2856
 
 
2857
        log_dir_len = strlen(log_dir);
 
2858
        /* full path name of ib_logfile consists of log dir path + basename
 
2859
        + number. This must fit in the name buffer.
 
2860
        */
 
2861
        ut_a(log_dir_len + strlen(ib_logfile_basename) + 11  < sizeof(name));
 
2862
 
 
2863
        buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 
2864
        memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 
2865
 
 
2866
        for (i = 0; i < n_log_files; i++) {
 
2867
 
 
2868
                sprintf(name, "%s%s%lu", log_dir,
 
2869
                        ib_logfile_basename, (ulong)i);
 
2870
 
 
2871
                log_file = os_file_create_simple(name, OS_FILE_CREATE,
 
2872
                                                 OS_FILE_READ_WRITE, &success);
 
2873
                if (!success) {
 
2874
                        fprintf(stderr,
 
2875
                                "InnoDB: Cannot create %s. Check that"
 
2876
                                " the file does not exist yet.\n", name);
 
2877
 
 
2878
                        exit(1);
 
2879
                }
 
2880
 
 
2881
                fprintf(stderr,
 
2882
                        "Setting log file size to %lu %lu\n",
 
2883
                        (ulong) ut_get_high32(log_file_size),
 
2884
                        (ulong) log_file_size & 0xFFFFFFFFUL);
 
2885
 
 
2886
                success = os_file_set_size(name, log_file,
 
2887
                                           log_file_size & 0xFFFFFFFFUL,
 
2888
                                           ut_get_high32(log_file_size));
 
2889
 
 
2890
                if (!success) {
 
2891
                        fprintf(stderr,
 
2892
                                "InnoDB: Cannot set %s size to %lu %lu\n",
 
2893
                                name, (ulong) ut_get_high32(log_file_size),
 
2894
                                (ulong) (log_file_size & 0xFFFFFFFFUL));
 
2895
                        exit(1);
 
2896
                }
 
2897
 
 
2898
                os_file_flush(log_file);
 
2899
                os_file_close(log_file);
 
2900
        }
 
2901
 
 
2902
        /* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
 
2903
 
 
2904
        log_reset_first_header_and_checkpoint(buf, lsn);
 
2905
 
 
2906
        log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
 
2907
        log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
 
2908
                                      LOG_BLOCK_HDR_SIZE);
 
2909
        sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
 
2910
 
 
2911
        log_file = os_file_create_simple(name, OS_FILE_OPEN,
 
2912
                                         OS_FILE_READ_WRITE, &success);
 
2913
        if (!success) {
 
2914
                fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
 
2915
 
 
2916
                exit(1);
 
2917
        }
 
2918
 
 
2919
        os_file_write(name, log_file, buf, 0, 0,
 
2920
                      LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 
2921
        os_file_flush(log_file);
 
2922
        os_file_close(log_file);
 
2923
 
 
2924
        ut_free(buf);
 
2925
}
 
2926
#endif /* UNIV_HOTBACKUP */
 
2927
 
 
2928
#ifdef UNIV_LOG_ARCHIVE
 
2929
/**********************************************************
 
2930
Reads from the archive of a log group and performs recovery. */
 
2931
static
 
2932
ibool
 
2933
log_group_recover_from_archive_file(
 
2934
/*================================*/
 
2935
                                        /* out: TRUE if no more complete
 
2936
                                        consistent archive files */
 
2937
        log_group_t*    group)          /* in: log group */
 
2938
{
 
2939
        os_file_t file_handle;
 
2940
        dulint  start_lsn;
 
2941
        dulint  file_end_lsn;
 
2942
        dulint  dummy_lsn;
 
2943
        dulint  scanned_lsn;
 
2944
        ulint   len;
 
2945
        ibool   ret;
 
2946
        byte*   buf;
 
2947
        ulint   read_offset;
 
2948
        ulint   file_size;
 
2949
        ulint   file_size_high;
 
2950
        int     input_char;
 
2951
        char    name[10000];
 
2952
 
 
2953
        ut_a(0);
 
2954
 
 
2955
try_open_again:
 
2956
        buf = log_sys->buf;
 
2957
 
 
2958
        /* Add the file to the archive file space; open the file */
 
2959
 
 
2960
        log_archived_file_name_gen(name, group->id, group->archived_file_no);
 
2961
 
 
2962
        file_handle = os_file_create(name, OS_FILE_OPEN,
 
2963
                                     OS_FILE_LOG, OS_FILE_AIO, &ret);
 
2964
 
 
2965
        if (ret == FALSE) {
 
2966
ask_again:
 
2967
                fprintf(stderr,
 
2968
                        "InnoDB: Do you want to copy additional"
 
2969
                        " archived log files\n"
 
2970
                        "InnoDB: to the directory\n");
 
2971
                fprintf(stderr,
 
2972
                        "InnoDB: or were these all the files needed"
 
2973
                        " in recovery?\n");
 
2974
                fprintf(stderr,
 
2975
                        "InnoDB: (Y == copy more files; N == this is all)?");
 
2976
 
 
2977
                input_char = getchar();
 
2978
 
 
2979
                if (input_char == (int) 'N') {
 
2980
 
 
2981
                        return(TRUE);
 
2982
                } else if (input_char == (int) 'Y') {
 
2983
 
 
2984
                        goto try_open_again;
 
2985
                } else {
 
2986
                        goto ask_again;
 
2987
                }
 
2988
        }
 
2989
 
 
2990
        ret = os_file_get_size(file_handle, &file_size, &file_size_high);
 
2991
        ut_a(ret);
 
2992
 
 
2993
        ut_a(file_size_high == 0);
 
2994
 
 
2995
        fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
 
2996
 
 
2997
        ret = os_file_close(file_handle);
 
2998
 
 
2999
        if (file_size < LOG_FILE_HDR_SIZE) {
 
3000
                fprintf(stderr,
 
3001
                        "InnoDB: Archive file header incomplete %s\n", name);
 
3002
 
 
3003
                return(TRUE);
 
3004
        }
 
3005
 
 
3006
        ut_a(ret);
 
3007
 
 
3008
        /* Add the archive file as a node to the space */
 
3009
 
 
3010
        fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
 
3011
                        group->archive_space_id, FALSE);
 
3012
#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
 
3013
# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
 
3014
#endif
 
3015
 
 
3016
        /* Read the archive file header */
 
3017
        fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
 
3018
               LOG_FILE_HDR_SIZE, buf, NULL);
 
3019
 
 
3020
        /* Check if the archive file header is consistent */
 
3021
 
 
3022
        if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
 
3023
            || mach_read_from_4(buf + LOG_FILE_NO)
 
3024
            != group->archived_file_no) {
 
3025
                fprintf(stderr,
 
3026
                        "InnoDB: Archive file header inconsistent %s\n", name);
 
3027
 
 
3028
                return(TRUE);
 
3029
        }
 
3030
 
 
3031
        if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
 
3032
                fprintf(stderr,
 
3033
                        "InnoDB: Archive file not completely written %s\n",
 
3034
                        name);
 
3035
 
 
3036
                return(TRUE);
 
3037
        }
 
3038
 
 
3039
        start_lsn = mach_read_from_8(buf + LOG_FILE_START_LSN);
 
3040
        file_end_lsn = mach_read_from_8(buf + LOG_FILE_END_LSN);
 
3041
 
 
3042
        if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
 
3043
 
 
3044
                if (ut_dulint_cmp(recv_sys->parse_start_lsn, start_lsn) < 0) {
 
3045
                        fprintf(stderr,
 
3046
                                "InnoDB: Archive log file %s"
 
3047
                                " starts from too big a lsn\n",
 
3048
                                name);
 
3049
                        return(TRUE);
 
3050
                }
 
3051
 
 
3052
                recv_sys->scanned_lsn = start_lsn;
 
3053
        }
 
3054
 
 
3055
        if (ut_dulint_cmp(recv_sys->scanned_lsn, start_lsn) != 0) {
 
3056
 
 
3057
                fprintf(stderr,
 
3058
                        "InnoDB: Archive log file %s starts from"
 
3059
                        " a wrong lsn\n",
 
3060
                        name);
 
3061
                return(TRUE);
 
3062
        }
 
3063
 
 
3064
        read_offset = LOG_FILE_HDR_SIZE;
 
3065
 
 
3066
        for (;;) {
 
3067
                len = RECV_SCAN_SIZE;
 
3068
 
 
3069
                if (read_offset + len > file_size) {
 
3070
                        len = ut_calc_align_down(file_size - read_offset,
 
3071
                                                 OS_FILE_LOG_BLOCK_SIZE);
 
3072
                }
 
3073
 
 
3074
                if (len == 0) {
 
3075
 
 
3076
                        break;
 
3077
                }
 
3078
 
 
3079
#ifdef UNIV_DEBUG
 
3080
                if (log_debug_writes) {
 
3081
                        fprintf(stderr,
 
3082
                                "InnoDB: Archive read starting at"
 
3083
                                " lsn %lu %lu, len %lu from file %s\n",
 
3084
                                (ulong) ut_dulint_get_high(start_lsn),
 
3085
                                (ulong) ut_dulint_get_low(start_lsn),
 
3086
                                (ulong) len, name);
 
3087
                }
 
3088
#endif /* UNIV_DEBUG */
 
3089
 
 
3090
                fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
 
3091
                       group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
 
3092
                       read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
 
3093
 
 
3094
                ret = recv_scan_log_recs(
 
3095
                        TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
 
3096
                        * UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
 
3097
                        &dummy_lsn, &scanned_lsn);
 
3098
 
 
3099
                if (ut_dulint_cmp(scanned_lsn, file_end_lsn) == 0) {
 
3100
 
 
3101
                        return(FALSE);
 
3102
                }
 
3103
 
 
3104
                if (ret) {
 
3105
                        fprintf(stderr,
 
3106
                                "InnoDB: Archive log file %s"
 
3107
                                " does not scan right\n",
 
3108
                                name);
 
3109
                        return(TRUE);
 
3110
                }
 
3111
 
 
3112
                read_offset += len;
 
3113
                start_lsn = ut_dulint_add(start_lsn, len);
 
3114
 
 
3115
                ut_ad(ut_dulint_cmp(start_lsn, scanned_lsn) == 0);
 
3116
        }
 
3117
 
 
3118
        return(FALSE);
 
3119
}
 
3120
 
 
3121
/************************************************************
 
3122
Recovers from archived log files, and also from log files, if they exist. */
 
3123
 
 
3124
ulint
 
3125
recv_recovery_from_archive_start(
 
3126
/*=============================*/
 
3127
                                /* out: error code or DB_SUCCESS */
 
3128
        dulint  min_flushed_lsn,/* in: min flushed lsn field from the
 
3129
                                data files */
 
3130
        dulint  limit_lsn,      /* in: recover up to this lsn if possible */
 
3131
        ulint   first_log_no)   /* in: number of the first archived log file
 
3132
                                to use in the recovery; the file will be
 
3133
                                searched from INNOBASE_LOG_ARCH_DIR specified
 
3134
                                in server config file */
 
3135
{
 
3136
        log_group_t*    group;
 
3137
        ulint           group_id;
 
3138
        ulint           trunc_len;
 
3139
        ibool           ret;
 
3140
        ulint           err;
 
3141
 
 
3142
        ut_a(0);
 
3143
 
 
3144
        recv_sys_create();
 
3145
        recv_sys_init(FALSE, buf_pool_get_curr_size());
 
3146
 
 
3147
        recv_recovery_on = TRUE;
 
3148
        recv_recovery_from_backup_on = TRUE;
 
3149
 
 
3150
        recv_sys->limit_lsn = limit_lsn;
 
3151
 
 
3152
        group_id = 0;
 
3153
 
 
3154
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
3155
 
 
3156
        while (group) {
 
3157
                if (group->id == group_id) {
 
3158
 
 
3159
                        break;
 
3160
                }
 
3161
 
 
3162
                group = UT_LIST_GET_NEXT(log_groups, group);
 
3163
        }
 
3164
 
 
3165
        if (!group) {
 
3166
                fprintf(stderr,
 
3167
                        "InnoDB: There is no log group defined with id %lu!\n",
 
3168
                        (ulong) group_id);
 
3169
                return(DB_ERROR);
 
3170
        }
 
3171
 
 
3172
        group->archived_file_no = first_log_no;
 
3173
 
 
3174
        recv_sys->parse_start_lsn = min_flushed_lsn;
 
3175
 
 
3176
        recv_sys->scanned_lsn = ut_dulint_zero;
 
3177
        recv_sys->scanned_checkpoint_no = 0;
 
3178
        recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
 
3179
 
 
3180
        recv_sys->archive_group = group;
 
3181
 
 
3182
        ret = FALSE;
 
3183
 
 
3184
        mutex_enter(&(log_sys->mutex));
 
3185
 
 
3186
        while (!ret) {
 
3187
                ret = log_group_recover_from_archive_file(group);
 
3188
 
 
3189
                /* Close and truncate a possible processed archive file
 
3190
                from the file space */
 
3191
 
 
3192
                trunc_len = UNIV_PAGE_SIZE
 
3193
                        * fil_space_get_size(group->archive_space_id);
 
3194
                if (trunc_len > 0) {
 
3195
                        fil_space_truncate_start(group->archive_space_id,
 
3196
                                                 trunc_len);
 
3197
                }
 
3198
 
 
3199
                group->archived_file_no++;
 
3200
        }
 
3201
 
 
3202
        if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) < 0) {
 
3203
 
 
3204
                if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
 
3205
 
 
3206
                        recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
 
3207
                }
 
3208
 
 
3209
                mutex_exit(&(log_sys->mutex));
 
3210
 
 
3211
                err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
 
3212
                                                          limit_lsn,
 
3213
                                                          ut_dulint_max,
 
3214
                                                          ut_dulint_max);
 
3215
                if (err != DB_SUCCESS) {
 
3216
 
 
3217
                        return(err);
 
3218
                }
 
3219
 
 
3220
                mutex_enter(&(log_sys->mutex));
 
3221
        }
 
3222
 
 
3223
        if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
 
3224
 
 
3225
                recv_apply_hashed_log_recs(FALSE);
 
3226
 
 
3227
                recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
 
3228
        }
 
3229
 
 
3230
        mutex_exit(&(log_sys->mutex));
 
3231
 
 
3232
        return(DB_SUCCESS);
 
3233
}
 
3234
 
 
3235
/************************************************************
 
3236
Completes recovery from archive. */
 
3237
 
 
3238
void
 
3239
recv_recovery_from_archive_finish(void)
 
3240
/*===================================*/
 
3241
{
 
3242
        recv_recovery_from_checkpoint_finish();
 
3243
 
 
3244
        recv_recovery_from_backup_on = FALSE;
 
3245
}
 
3246
#endif /* UNIV_LOG_ARCHIVE */