~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/innobase/log/log0recv.c

Merged Nathan from lp:~nlws/drizzle/fix-string-c-ptr-overrun

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*****************************************************************************
 
2
 
 
3
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
 
4
 
 
5
This program is free software; you can redistribute it and/or modify it under
 
6
the terms of the GNU General Public License as published by the Free Software
 
7
Foundation; version 2 of the License.
 
8
 
 
9
This program is distributed in the hope that it will be useful, but WITHOUT
 
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 
12
 
 
13
You should have received a copy of the GNU General Public License along with
 
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 
15
Place, Suite 330, Boston, MA 02111-1307 USA
 
16
 
 
17
*****************************************************************************/
 
18
 
 
19
/******************************************************
 
20
Recovery
 
21
 
 
22
Created 9/20/1997 Heikki Tuuri
 
23
*******************************************************/
 
24
 
 
25
#include "log0recv.h"
 
26
 
 
27
#ifdef UNIV_NONINL
 
28
#include "log0recv.ic"
 
29
#endif
 
30
 
 
31
#include "mem0mem.h"
 
32
#include "buf0buf.h"
 
33
#include "buf0flu.h"
 
34
#include "buf0rea.h"
 
35
#include "srv0srv.h"
 
36
#include "srv0start.h"
 
37
#include "mtr0log.h"
 
38
#include "page0cur.h"
 
39
#include "page0zip.h"
 
40
#include "btr0cur.h"
 
41
#include "ibuf0ibuf.h"
 
42
#include "trx0undo.h"
 
43
#include "trx0rec.h"
 
44
#include "trx0roll.h"
 
45
#include "row0merge.h"
 
46
 
 
47
#ifdef UNIV_HOTBACKUP
 
48
/* This is set to FALSE if the backup was originally taken with the
 
49
ibbackup --include regexp option: then we do not want to create tables in
 
50
directories which were not included */
 
51
UNIV_INTERN ibool       recv_replay_file_ops    = TRUE;
 
52
#endif /* UNIV_HOTBACKUP */
 
53
 
 
54
/* Log records are stored in the hash table in chunks at most of this size;
 
55
this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
 
56
#define RECV_DATA_BLOCK_SIZE    (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
 
57
 
 
58
/* Read-ahead area in applying log records to file pages */
 
59
#define RECV_READ_AHEAD_AREA    32
 
60
 
 
61
UNIV_INTERN recv_sys_t* recv_sys = NULL;
 
62
UNIV_INTERN ibool       recv_recovery_on = FALSE;
 
63
UNIV_INTERN ibool       recv_recovery_from_backup_on = FALSE;
 
64
 
 
65
UNIV_INTERN ibool       recv_needed_recovery = FALSE;
 
66
 
 
67
UNIV_INTERN ibool       recv_lsn_checks_on = FALSE;
 
68
 
 
69
/* There are two conditions under which we scan the logs, the first
 
70
is normal startup and the second is when we do a recovery from an
 
71
archive.
 
72
This flag is set if we are doing a scan from the last checkpoint during
 
73
startup. If we find log entries that were written after the last checkpoint
 
74
we know that the server was not cleanly shutdown. We must then initialize
 
75
the crash recovery environment before attempting to store these entries in
 
76
the log hash table. */
 
77
UNIV_INTERN ibool       recv_log_scan_is_startup_type = FALSE;
 
78
 
 
79
/* If the following is TRUE, the buffer pool file pages must be invalidated
 
80
after recovery and no ibuf operations are allowed; this becomes TRUE if
 
81
the log record hash table becomes too full, and log records must be merged
 
82
to file pages already before the recovery is finished: in this case no
 
83
ibuf operations are allowed, as they could modify the pages read in the
 
84
buffer pool before the pages have been recovered to the up-to-date state */
 
85
 
 
86
/* Recovery is running and no operations on the log files are allowed
 
87
yet: the variable name is misleading */
 
88
 
 
89
UNIV_INTERN ibool       recv_no_ibuf_operations = FALSE;
 
90
 
 
91
/* The following counter is used to decide when to print info on
 
92
log scan */
 
93
UNIV_INTERN ulint       recv_scan_print_counter = 0;
 
94
 
 
95
UNIV_INTERN ibool       recv_is_from_backup     = FALSE;
 
96
#ifdef UNIV_HOTBACKUP
 
97
UNIV_INTERN ibool       recv_is_making_a_backup = FALSE;
 
98
#else
 
99
# define recv_is_making_a_backup FALSE
 
100
#endif /* UNIV_HOTBACKUP */
 
101
 
 
102
UNIV_INTERN ulint       recv_previous_parsed_rec_type   = 999999;
 
103
UNIV_INTERN ulint       recv_previous_parsed_rec_offset = 0;
 
104
UNIV_INTERN ulint       recv_previous_parsed_rec_is_multi = 0;
 
105
 
 
106
UNIV_INTERN ulint       recv_max_parsed_page_no         = 0;
 
107
 
 
108
/* This many frames must be left free in the buffer pool when we scan
 
109
the log and store the scanned log records in the buffer pool: we will
 
110
use these free frames to read in pages when we start applying the
 
111
log records to the database. */
 
112
 
 
113
UNIV_INTERN ulint       recv_n_pool_free_frames         = 256;
 
114
 
 
115
/* The maximum lsn we see for a page during the recovery process. If this
 
116
is bigger than the lsn we are able to scan up to, that is an indication that
 
117
the recovery failed and the database may be corrupt. */
 
118
 
 
119
UNIV_INTERN ib_uint64_t recv_max_page_lsn;
 
120
 
 
121
/* prototypes */
 
122
 
 
123
/***********************************************************
 
124
Initialize crash recovery environment. Can be called iff
 
125
recv_needed_recovery == FALSE. */
 
126
static
 
127
void
 
128
recv_init_crash_recovery(void);
 
129
/*===========================*/
 
130
 
 
131
/************************************************************
 
132
Creates the recovery system. */
 
133
UNIV_INTERN
 
134
void
 
135
recv_sys_create(void)
 
136
/*=================*/
 
137
{
 
138
        if (recv_sys != NULL) {
 
139
 
 
140
                return;
 
141
        }
 
142
 
 
143
        recv_sys = mem_alloc(sizeof(recv_sys_t));
 
144
 
 
145
        mutex_create(&recv_sys->mutex, SYNC_RECV);
 
146
 
 
147
        recv_sys->heap = NULL;
 
148
        recv_sys->addr_hash = NULL;
 
149
}
 
150
 
 
151
/************************************************************
 
152
Inits the recovery system for a recovery operation. */
 
153
UNIV_INTERN
 
154
void
 
155
recv_sys_init(
 
156
/*==========*/
 
157
        ibool   recover_from_backup,    /* in: TRUE if this is called
 
158
                                        to recover from a hot backup */
 
159
        ulint   available_memory)       /* in: available memory in bytes */
 
160
{
 
161
        if (recv_sys->heap != NULL) {
 
162
 
 
163
                return;
 
164
        }
 
165
 
 
166
        mutex_enter(&(recv_sys->mutex));
 
167
 
 
168
        if (!recover_from_backup) {
 
169
                recv_sys->heap = mem_heap_create_in_buffer(256);
 
170
        } else {
 
171
                recv_sys->heap = mem_heap_create(256);
 
172
                recv_is_from_backup = TRUE;
 
173
        }
 
174
 
 
175
        recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
 
176
        recv_sys->len = 0;
 
177
        recv_sys->recovered_offset = 0;
 
178
 
 
179
        recv_sys->addr_hash = hash_create(available_memory / 64);
 
180
        recv_sys->n_addrs = 0;
 
181
 
 
182
        recv_sys->apply_log_recs = FALSE;
 
183
        recv_sys->apply_batch_on = FALSE;
 
184
 
 
185
        recv_sys->last_block_buf_start = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
 
186
 
 
187
        recv_sys->last_block = ut_align(recv_sys->last_block_buf_start,
 
188
                                        OS_FILE_LOG_BLOCK_SIZE);
 
189
        recv_sys->found_corrupt_log = FALSE;
 
190
 
 
191
        recv_max_page_lsn = 0;
 
192
 
 
193
        mutex_exit(&(recv_sys->mutex));
 
194
}
 
195
 
 
196
/************************************************************
 
197
Empties the hash table when it has been fully processed. */
 
198
static
 
199
void
 
200
recv_sys_empty_hash(void)
 
201
/*=====================*/
 
202
{
 
203
        ut_ad(mutex_own(&(recv_sys->mutex)));
 
204
 
 
205
        if (recv_sys->n_addrs != 0) {
 
206
                fprintf(stderr,
 
207
                        "InnoDB: Error: %lu pages with log records"
 
208
                        " were left unprocessed!\n"
 
209
                        "InnoDB: Maximum page number with"
 
210
                        " log records on it %lu\n",
 
211
                        (ulong) recv_sys->n_addrs,
 
212
                        (ulong) recv_max_parsed_page_no);
 
213
                ut_error;
 
214
        }
 
215
 
 
216
        hash_table_free(recv_sys->addr_hash);
 
217
        mem_heap_empty(recv_sys->heap);
 
218
 
 
219
        recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
 
220
}
 
221
 
 
222
#ifndef UNIV_LOG_DEBUG
 
223
/************************************************************
 
224
Frees the recovery system. */
 
225
static
 
226
void
 
227
recv_sys_free(void)
 
228
/*===============*/
 
229
{
 
230
        mutex_enter(&(recv_sys->mutex));
 
231
 
 
232
        hash_table_free(recv_sys->addr_hash);
 
233
        mem_heap_free(recv_sys->heap);
 
234
        ut_free(recv_sys->buf);
 
235
        mem_free(recv_sys->last_block_buf_start);
 
236
 
 
237
        recv_sys->addr_hash = NULL;
 
238
        recv_sys->heap = NULL;
 
239
 
 
240
        mutex_exit(&(recv_sys->mutex));
 
241
}
 
242
#endif /* UNIV_LOG_DEBUG */
 
243
 
 
244
/************************************************************
 
245
Truncates possible corrupted or extra records from a log group. */
 
246
static
 
247
void
 
248
recv_truncate_group(
 
249
/*================*/
 
250
        log_group_t*    group,          /* in: log group */
 
251
        ib_uint64_t     recovered_lsn,  /* in: recovery succeeded up to this
 
252
                                        lsn */
 
253
        ib_uint64_t     limit_lsn,      /* in: this was the limit for
 
254
                                        recovery */
 
255
        ib_uint64_t     checkpoint_lsn, /* in: recovery was started from this
 
256
                                        checkpoint */
 
257
        ib_uint64_t     archived_lsn)   /* in: the log has been archived up to
 
258
                                        this lsn */
 
259
{
 
260
        ib_uint64_t     start_lsn;
 
261
        ib_uint64_t     end_lsn;
 
262
        ib_uint64_t     finish_lsn1;
 
263
        ib_uint64_t     finish_lsn2;
 
264
        ib_uint64_t     finish_lsn;
 
265
        ulint           len;
 
266
        ulint           i;
 
267
 
 
268
        if (archived_lsn == IB_ULONGLONG_MAX) {
 
269
                /* Checkpoint was taken in the NOARCHIVELOG mode */
 
270
                archived_lsn = checkpoint_lsn;
 
271
        }
 
272
 
 
273
        finish_lsn1 = ut_uint64_align_down(archived_lsn,
 
274
                                           OS_FILE_LOG_BLOCK_SIZE)
 
275
                + log_group_get_capacity(group);
 
276
 
 
277
        finish_lsn2 = ut_uint64_align_up(recovered_lsn,
 
278
                                         OS_FILE_LOG_BLOCK_SIZE)
 
279
                + recv_sys->last_log_buf_size;
 
280
 
 
281
        if (limit_lsn != IB_ULONGLONG_MAX) {
 
282
                /* We do not know how far we should erase log records: erase
 
283
                as much as possible */
 
284
 
 
285
                finish_lsn = finish_lsn1;
 
286
        } else {
 
287
                /* It is enough to erase the length of the log buffer */
 
288
                finish_lsn = finish_lsn1 < finish_lsn2
 
289
                        ? finish_lsn1 : finish_lsn2;
 
290
        }
 
291
 
 
292
        ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
 
293
 
 
294
        /* Write the log buffer full of zeros */
 
295
        for (i = 0; i < RECV_SCAN_SIZE; i++) {
 
296
 
 
297
                *(log_sys->buf + i) = '\0';
 
298
        }
 
299
 
 
300
        start_lsn = ut_uint64_align_down(recovered_lsn,
 
301
                                         OS_FILE_LOG_BLOCK_SIZE);
 
302
 
 
303
        if (start_lsn != recovered_lsn) {
 
304
                /* Copy the last incomplete log block to the log buffer and
 
305
                edit its data length: */
 
306
 
 
307
                ut_memcpy(log_sys->buf, recv_sys->last_block,
 
308
                          OS_FILE_LOG_BLOCK_SIZE);
 
309
                log_block_set_data_len(log_sys->buf,
 
310
                                       (ulint) (recovered_lsn - start_lsn));
 
311
        }
 
312
 
 
313
        if (start_lsn >= finish_lsn) {
 
314
 
 
315
                return;
 
316
        }
 
317
 
 
318
        for (;;) {
 
319
                end_lsn = start_lsn + RECV_SCAN_SIZE;
 
320
 
 
321
                if (end_lsn > finish_lsn) {
 
322
 
 
323
                        end_lsn = finish_lsn;
 
324
                }
 
325
 
 
326
                len = (ulint) (end_lsn - start_lsn);
 
327
 
 
328
                log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
 
329
                if (end_lsn >= finish_lsn) {
 
330
 
 
331
                        return;
 
332
                }
 
333
 
 
334
                /* Write the log buffer full of zeros */
 
335
                for (i = 0; i < RECV_SCAN_SIZE; i++) {
 
336
 
 
337
                        *(log_sys->buf + i) = '\0';
 
338
                }
 
339
 
 
340
                start_lsn = end_lsn;
 
341
        }
 
342
}
 
343
 
 
344
/************************************************************
 
345
Copies the log segment between group->recovered_lsn and recovered_lsn from the
 
346
most up-to-date log group to group, so that it contains the latest log data. */
 
347
static
 
348
void
 
349
recv_copy_group(
 
350
/*============*/
 
351
        log_group_t*    up_to_date_group,       /* in: the most up-to-date log
 
352
                                                group */
 
353
        log_group_t*    group,                  /* in: copy to this log
 
354
                                                group */
 
355
        ib_uint64_t     recovered_lsn)          /* in: recovery succeeded up
 
356
                                                to this lsn */
 
357
{
 
358
        ib_uint64_t     start_lsn;
 
359
        ib_uint64_t     end_lsn;
 
360
        ulint           len;
 
361
 
 
362
        if (group->scanned_lsn >= recovered_lsn) {
 
363
 
 
364
                return;
 
365
        }
 
366
 
 
367
        ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
 
368
 
 
369
        start_lsn = ut_uint64_align_down(group->scanned_lsn,
 
370
                                         OS_FILE_LOG_BLOCK_SIZE);
 
371
        for (;;) {
 
372
                end_lsn = start_lsn + RECV_SCAN_SIZE;
 
373
 
 
374
                if (end_lsn > recovered_lsn) {
 
375
                        end_lsn = ut_uint64_align_up(recovered_lsn,
 
376
                                                     OS_FILE_LOG_BLOCK_SIZE);
 
377
                }
 
378
 
 
379
                log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
 
380
                                       up_to_date_group, start_lsn, end_lsn);
 
381
 
 
382
                len = (ulint) (end_lsn - start_lsn);
 
383
 
 
384
                log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
 
385
 
 
386
                if (end_lsn >= recovered_lsn) {
 
387
 
 
388
                        return;
 
389
                }
 
390
 
 
391
                start_lsn = end_lsn;
 
392
        }
 
393
}
 
394
 
 
395
/************************************************************
 
396
Copies a log segment from the most up-to-date log group to the other log
 
397
groups, so that they all contain the latest log data. Also writes the info
 
398
about the latest checkpoint to the groups, and inits the fields in the group
 
399
memory structs to up-to-date values. */
 
400
static
 
401
void
 
402
recv_synchronize_groups(
 
403
/*====================*/
 
404
        log_group_t*    up_to_date_group)       /* in: the most up-to-date
 
405
                                                log group */
 
406
{
 
407
        log_group_t*    group;
 
408
        ib_uint64_t     start_lsn;
 
409
        ib_uint64_t     end_lsn;
 
410
        ib_uint64_t     recovered_lsn;
 
411
        ib_uint64_t     limit_lsn;
 
412
 
 
413
        recovered_lsn = recv_sys->recovered_lsn;
 
414
        limit_lsn = recv_sys->limit_lsn;
 
415
 
 
416
        /* Read the last recovered log block to the recovery system buffer:
 
417
        the block is always incomplete */
 
418
 
 
419
        start_lsn = ut_uint64_align_down(recovered_lsn,
 
420
                                         OS_FILE_LOG_BLOCK_SIZE);
 
421
        end_lsn = ut_uint64_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
 
422
 
 
423
        ut_a(start_lsn != end_lsn);
 
424
 
 
425
        log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
 
426
                               up_to_date_group, start_lsn, end_lsn);
 
427
 
 
428
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
429
 
 
430
        while (group) {
 
431
                if (group != up_to_date_group) {
 
432
 
 
433
                        /* Copy log data if needed */
 
434
 
 
435
                        recv_copy_group(group, up_to_date_group,
 
436
                                        recovered_lsn);
 
437
                }
 
438
 
 
439
                /* Update the fields in the group struct to correspond to
 
440
                recovered_lsn */
 
441
 
 
442
                log_group_set_fields(group, recovered_lsn);
 
443
 
 
444
                group = UT_LIST_GET_NEXT(log_groups, group);
 
445
        }
 
446
 
 
447
        /* Copy the checkpoint info to the groups; remember that we have
 
448
        incremented checkpoint_no by one, and the info will not be written
 
449
        over the max checkpoint info, thus making the preservation of max
 
450
        checkpoint info on disk certain */
 
451
 
 
452
        log_groups_write_checkpoint_info();
 
453
 
 
454
        mutex_exit(&(log_sys->mutex));
 
455
 
 
456
        /* Wait for the checkpoint write to complete */
 
457
        rw_lock_s_lock(&(log_sys->checkpoint_lock));
 
458
        rw_lock_s_unlock(&(log_sys->checkpoint_lock));
 
459
 
 
460
        mutex_enter(&(log_sys->mutex));
 
461
}
 
462
 
 
463
/***************************************************************************
 
464
Checks the consistency of the checkpoint info */
 
465
static
 
466
ibool
 
467
recv_check_cp_is_consistent(
 
468
/*========================*/
 
469
                        /* out: TRUE if ok */
 
470
        byte*   buf)    /* in: buffer containing checkpoint info */
 
471
{
 
472
        ulint   fold;
 
473
 
 
474
        fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
 
475
 
 
476
        if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
 
477
                    buf + LOG_CHECKPOINT_CHECKSUM_1)) {
 
478
                return(FALSE);
 
479
        }
 
480
 
 
481
        fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
 
482
                              LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
 
483
 
 
484
        if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
 
485
                    buf + LOG_CHECKPOINT_CHECKSUM_2)) {
 
486
                return(FALSE);
 
487
        }
 
488
 
 
489
        return(TRUE);
 
490
}
 
491
 
 
492
/************************************************************
 
493
Looks for the maximum consistent checkpoint from the log groups. */
 
494
static
 
495
ulint
 
496
recv_find_max_checkpoint(
 
497
/*=====================*/
 
498
                                        /* out: error code or DB_SUCCESS */
 
499
        log_group_t**   max_group,      /* out: max group */
 
500
        ulint*          max_field)      /* out: LOG_CHECKPOINT_1 or
 
501
                                        LOG_CHECKPOINT_2 */
 
502
{
 
503
        log_group_t*    group;
 
504
        ib_uint64_t     max_no;
 
505
        ib_uint64_t     checkpoint_no;
 
506
        ulint           field;
 
507
        byte*           buf;
 
508
 
 
509
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
510
 
 
511
        max_no = 0;
 
512
        *max_group = NULL;
 
513
        *max_field = 0;
 
514
 
 
515
        buf = log_sys->checkpoint_buf;
 
516
 
 
517
        while (group) {
 
518
                group->state = LOG_GROUP_CORRUPTED;
 
519
 
 
520
                for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
 
521
                     field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
 
522
 
 
523
                        log_group_read_checkpoint_info(group, field);
 
524
 
 
525
                        if (!recv_check_cp_is_consistent(buf)) {
 
526
#ifdef UNIV_DEBUG
 
527
                                if (log_debug_writes) {
 
528
                                        fprintf(stderr,
 
529
                                                "InnoDB: Checkpoint in group"
 
530
                                                " %lu at %lu invalid, %lu\n",
 
531
                                                (ulong) group->id,
 
532
                                                (ulong) field,
 
533
                                                (ulong) mach_read_from_4(
 
534
                                                        buf
 
535
                                                        + LOG_CHECKPOINT_CHECKSUM_1));
 
536
 
 
537
                                }
 
538
#endif /* UNIV_DEBUG */
 
539
                                goto not_consistent;
 
540
                        }
 
541
 
 
542
                        group->state = LOG_GROUP_OK;
 
543
 
 
544
                        group->lsn = mach_read_ull(
 
545
                                buf + LOG_CHECKPOINT_LSN);
 
546
                        group->lsn_offset = mach_read_from_4(
 
547
                                buf + LOG_CHECKPOINT_OFFSET);
 
548
                        checkpoint_no = mach_read_ull(
 
549
                                buf + LOG_CHECKPOINT_NO);
 
550
 
 
551
#ifdef UNIV_DEBUG
 
552
                        if (log_debug_writes) {
 
553
                                fprintf(stderr,
 
554
                                        "InnoDB: Checkpoint number %lu"
 
555
                                        " found in group %lu\n",
 
556
                                        (ulong) checkpoint_no,
 
557
                                        (ulong) group->id);
 
558
                        }
 
559
#endif /* UNIV_DEBUG */
 
560
 
 
561
                        if (checkpoint_no >= max_no) {
 
562
                                *max_group = group;
 
563
                                *max_field = field;
 
564
                                max_no = checkpoint_no;
 
565
                        }
 
566
 
 
567
not_consistent:
 
568
                        ;
 
569
                }
 
570
 
 
571
                group = UT_LIST_GET_NEXT(log_groups, group);
 
572
        }
 
573
 
 
574
        if (*max_group == NULL) {
 
575
 
 
576
                fprintf(stderr,
 
577
                        "InnoDB: No valid checkpoint found.\n"
 
578
                        "InnoDB: If this error appears when you are"
 
579
                        " creating an InnoDB database,\n"
 
580
                        "InnoDB: the problem may be that during"
 
581
                        " an earlier attempt you managed\n"
 
582
                        "InnoDB: to create the InnoDB data files,"
 
583
                        " but log file creation failed.\n"
 
584
                        "InnoDB: If that is the case, please refer to\n"
 
585
                        "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
 
586
                        "error-creating-innodb.html\n");
 
587
                return(DB_ERROR);
 
588
        }
 
589
 
 
590
        return(DB_SUCCESS);
 
591
}
 
592
 
 
593
#ifdef UNIV_HOTBACKUP
 
594
/***********************************************************************
 
595
Reads the checkpoint info needed in hot backup. */
 
596
UNIV_INTERN
 
597
ibool
 
598
recv_read_cp_info_for_backup(
 
599
/*=========================*/
 
600
                                /* out: TRUE if success */
 
601
        byte*           hdr,    /* in: buffer containing the log group
 
602
                                header */
 
603
        ib_uint64_t*    lsn,    /* out: checkpoint lsn */
 
604
        ulint*          offset, /* out: checkpoint offset in the log group */
 
605
        ulint*          fsp_limit,/* out: fsp limit of space 0,
 
606
                                1000000000 if the database is running
 
607
                                with < version 3.23.50 of InnoDB */
 
608
        ib_uint64_t*    cp_no,  /* out: checkpoint number */
 
609
        ib_uint64_t*    first_header_lsn)
 
610
                                /* out: lsn of of the start of the
 
611
                                first log file */
 
612
{
 
613
        ulint           max_cp          = 0;
 
614
        ib_uint64_t     max_cp_no       = 0;
 
615
        byte*           cp_buf;
 
616
 
 
617
        cp_buf = hdr + LOG_CHECKPOINT_1;
 
618
 
 
619
        if (recv_check_cp_is_consistent(cp_buf)) {
 
620
                max_cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
 
621
                max_cp = LOG_CHECKPOINT_1;
 
622
        }
 
623
 
 
624
        cp_buf = hdr + LOG_CHECKPOINT_2;
 
625
 
 
626
        if (recv_check_cp_is_consistent(cp_buf)) {
 
627
                if (mach_read_ull(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) {
 
628
                        max_cp = LOG_CHECKPOINT_2;
 
629
                }
 
630
        }
 
631
 
 
632
        if (max_cp == 0) {
 
633
                return(FALSE);
 
634
        }
 
635
 
 
636
        cp_buf = hdr + max_cp;
 
637
 
 
638
        *lsn = mach_read_ull(cp_buf + LOG_CHECKPOINT_LSN);
 
639
        *offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
 
640
 
 
641
        /* If the user is running a pre-3.23.50 version of InnoDB, its
 
642
        checkpoint data does not contain the fsp limit info */
 
643
        if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N)
 
644
            == LOG_CHECKPOINT_FSP_MAGIC_N_VAL) {
 
645
 
 
646
                *fsp_limit = mach_read_from_4(
 
647
                        cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT);
 
648
 
 
649
                if (*fsp_limit == 0) {
 
650
                        *fsp_limit = 1000000000;
 
651
                }
 
652
        } else {
 
653
                *fsp_limit = 1000000000;
 
654
        }
 
655
 
 
656
        /*      fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */
 
657
 
 
658
        *cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
 
659
 
 
660
        *first_header_lsn = mach_read_ull(hdr + LOG_FILE_START_LSN);
 
661
 
 
662
        return(TRUE);
 
663
}
 
664
#endif /* UNIV_HOTBACKUP */
 
665
 
 
666
/**********************************************************
 
667
Checks the 4-byte checksum to the trailer checksum field of a log block.
 
668
We also accept a log block in the old format < InnoDB-3.23.52 where the
 
669
checksum field contains the log block number. */
 
670
static
 
671
ibool
 
672
log_block_checksum_is_ok_or_old_format(
 
673
/*===================================*/
 
674
                        /* out: TRUE if ok, or if the log block may be in the
 
675
                        format of InnoDB version < 3.23.52 */
 
676
        byte*   block)  /* in: pointer to a log block */
 
677
{
 
678
#ifdef UNIV_LOG_DEBUG
 
679
        return(TRUE);
 
680
#endif /* UNIV_LOG_DEBUG */
 
681
        if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
 
682
 
 
683
                return(TRUE);
 
684
        }
 
685
 
 
686
        if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
 
687
 
 
688
                /* We assume the log block is in the format of
 
689
                InnoDB version < 3.23.52 and the block is ok */
 
690
#if 0
 
691
                fprintf(stderr,
 
692
                        "InnoDB: Scanned old format < InnoDB-3.23.52"
 
693
                        " log block number %lu\n",
 
694
                        log_block_get_hdr_no(block));
 
695
#endif
 
696
                return(TRUE);
 
697
        }
 
698
 
 
699
        return(FALSE);
 
700
}
 
701
 
 
702
#ifdef UNIV_HOTBACKUP
 
703
/***********************************************************************
 
704
Scans the log segment and n_bytes_scanned is set to the length of valid
 
705
log scanned. */
 
706
UNIV_INTERN
 
707
void
 
708
recv_scan_log_seg_for_backup(
 
709
/*=========================*/
 
710
        byte*           buf,            /* in: buffer containing log data */
 
711
        ulint           buf_len,        /* in: data length in that buffer */
 
712
        ib_uint64_t*    scanned_lsn,    /* in/out: lsn of buffer start,
 
713
                                        we return scanned lsn */
 
714
        ulint*          scanned_checkpoint_no,
 
715
                                        /* in/out: 4 lowest bytes of the
 
716
                                        highest scanned checkpoint number so
 
717
                                        far */
 
718
        ulint*          n_bytes_scanned)/* out: how much we were able to
 
719
                                        scan, smaller than buf_len if log
 
720
                                        data ended here */
 
721
{
 
722
        ulint   data_len;
 
723
        byte*   log_block;
 
724
        ulint   no;
 
725
 
 
726
        *n_bytes_scanned = 0;
 
727
 
 
728
        for (log_block = buf; log_block < buf + buf_len;
 
729
             log_block += OS_FILE_LOG_BLOCK_SIZE) {
 
730
 
 
731
                no = log_block_get_hdr_no(log_block);
 
732
 
 
733
#if 0
 
734
                fprintf(stderr, "Log block header no %lu\n", no);
 
735
#endif
 
736
 
 
737
                if (no != log_block_convert_lsn_to_no(*scanned_lsn)
 
738
                    || !log_block_checksum_is_ok_or_old_format(log_block)) {
 
739
#if 0
 
740
                        fprintf(stderr,
 
741
                                "Log block n:o %lu, scanned lsn n:o %lu\n",
 
742
                                no, log_block_convert_lsn_to_no(*scanned_lsn));
 
743
#endif
 
744
                        /* Garbage or an incompletely written log block */
 
745
 
 
746
                        log_block += OS_FILE_LOG_BLOCK_SIZE;
 
747
#if 0
 
748
                        fprintf(stderr,
 
749
                                "Next log block n:o %lu\n",
 
750
                                log_block_get_hdr_no(log_block));
 
751
#endif
 
752
                        break;
 
753
                }
 
754
 
 
755
                if (*scanned_checkpoint_no > 0
 
756
                    && log_block_get_checkpoint_no(log_block)
 
757
                    < *scanned_checkpoint_no
 
758
                    && *scanned_checkpoint_no
 
759
                    - log_block_get_checkpoint_no(log_block)
 
760
                    > 0x80000000UL) {
 
761
 
 
762
                        /* Garbage from a log buffer flush which was made
 
763
                        before the most recent database recovery */
 
764
#if 0
 
765
                        fprintf(stderr,
 
766
                                "Scanned cp n:o %lu, block cp n:o %lu\n",
 
767
                                *scanned_checkpoint_no,
 
768
                                log_block_get_checkpoint_no(log_block));
 
769
#endif
 
770
                        break;
 
771
                }
 
772
 
 
773
                data_len = log_block_get_data_len(log_block);
 
774
 
 
775
                *scanned_checkpoint_no
 
776
                        = log_block_get_checkpoint_no(log_block);
 
777
                *scanned_lsn += data_len;
 
778
 
 
779
                *n_bytes_scanned += data_len;
 
780
 
 
781
                if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
 
782
                        /* Log data ends here */
 
783
 
 
784
#if 0
 
785
                        fprintf(stderr, "Log block data len %lu\n",
 
786
                                data_len);
 
787
#endif
 
788
                        break;
 
789
                }
 
790
        }
 
791
}
 
792
#endif /* UNIV_HOTBACKUP */
 
793
 
 
794
/***********************************************************************
 
795
Tries to parse a single log record body and also applies it to a page if
 
796
specified. File ops are parsed, but not applied in this function. */
 
797
static
 
798
byte*
 
799
recv_parse_or_apply_log_rec_body(
 
800
/*=============================*/
 
801
                                /* out: log record end, NULL if not a
 
802
                                complete record */
 
803
        byte            type,   /* in: type */
 
804
        byte*           ptr,    /* in: pointer to a buffer */
 
805
        byte*           end_ptr,/* in: pointer to the buffer end */
 
806
        buf_block_t*    block,  /* in/out: buffer block or NULL; if
 
807
                                not NULL, then the log record is
 
808
                                applied to the page, and the log
 
809
                                record should be complete then */
 
810
        mtr_t*          mtr)    /* in: mtr or NULL; should be non-NULL
 
811
                                if and only if block is non-NULL */
 
812
{
 
813
        dict_index_t*   index   = NULL;
 
814
        page_t*         page;
 
815
        page_zip_des_t* page_zip;
 
816
 
 
817
        ut_ad(!block == !mtr);
 
818
 
 
819
        if (block) {
 
820
                page = block->frame;
 
821
                page_zip = buf_block_get_page_zip(block);
 
822
        } else {
 
823
                page = NULL;
 
824
                page_zip = NULL;
 
825
        }
 
826
 
 
827
        switch (type) {
 
828
        case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
 
829
                ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip);
 
830
                break;
 
831
        case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
 
832
                if (NULL != (ptr = mlog_parse_index(
 
833
                                     ptr, end_ptr,
 
834
                                     type == MLOG_COMP_REC_INSERT,
 
835
                                     &index))) {
 
836
                        ut_a(!page
 
837
                             || (ibool)!!page_is_comp(page)
 
838
                             == dict_table_is_comp(index->table));
 
839
                        ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
 
840
                                                        block, index, mtr);
 
841
                }
 
842
                break;
 
843
        case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
 
844
                if (NULL != (ptr = mlog_parse_index(
 
845
                                     ptr, end_ptr,
 
846
                                     type == MLOG_COMP_REC_CLUST_DELETE_MARK,
 
847
                                     &index))) {
 
848
                        ut_a(!page
 
849
                             || (ibool)!!page_is_comp(page)
 
850
                             == dict_table_is_comp(index->table));
 
851
                        ptr = btr_cur_parse_del_mark_set_clust_rec(
 
852
                                ptr, end_ptr, page, page_zip, index);
 
853
                }
 
854
                break;
 
855
        case MLOG_COMP_REC_SEC_DELETE_MARK:
 
856
                /* This log record type is obsolete, but we process it for
 
857
                backward compatibility with MySQL 5.0.3 and 5.0.4. */
 
858
                ut_a(!page || page_is_comp(page));
 
859
                ut_a(!page_zip);
 
860
                ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
 
861
                if (!ptr) {
 
862
                        break;
 
863
                }
 
864
                /* Fall through */
 
865
        case MLOG_REC_SEC_DELETE_MARK:
 
866
                ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
 
867
                                                         page, page_zip);
 
868
                break;
 
869
        case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
 
870
                if (NULL != (ptr = mlog_parse_index(
 
871
                                     ptr, end_ptr,
 
872
                                     type == MLOG_COMP_REC_UPDATE_IN_PLACE,
 
873
                                     &index))) {
 
874
                        ut_a(!page
 
875
                             || (ibool)!!page_is_comp(page)
 
876
                             == dict_table_is_comp(index->table));
 
877
                        ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page,
 
878
                                                            page_zip, index);
 
879
                }
 
880
                break;
 
881
        case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
 
882
        case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
 
883
                if (NULL != (ptr = mlog_parse_index(
 
884
                                     ptr, end_ptr,
 
885
                                     type == MLOG_COMP_LIST_END_DELETE
 
886
                                     || type == MLOG_COMP_LIST_START_DELETE,
 
887
                                     &index))) {
 
888
                        ut_a(!page
 
889
                             || (ibool)!!page_is_comp(page)
 
890
                             == dict_table_is_comp(index->table));
 
891
                        ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
 
892
                                                         block, index, mtr);
 
893
                }
 
894
                break;
 
895
        case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
 
896
                if (NULL != (ptr = mlog_parse_index(
 
897
                                     ptr, end_ptr,
 
898
                                     type == MLOG_COMP_LIST_END_COPY_CREATED,
 
899
                                     &index))) {
 
900
                        ut_a(!page
 
901
                             || (ibool)!!page_is_comp(page)
 
902
                             == dict_table_is_comp(index->table));
 
903
                        ptr = page_parse_copy_rec_list_to_created_page(
 
904
                                ptr, end_ptr, block, index, mtr);
 
905
                }
 
906
                break;
 
907
        case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
 
908
                if (NULL != (ptr = mlog_parse_index(
 
909
                                     ptr, end_ptr,
 
910
                                     type == MLOG_COMP_PAGE_REORGANIZE,
 
911
                                     &index))) {
 
912
                        ut_a(!page
 
913
                             || (ibool)!!page_is_comp(page)
 
914
                             == dict_table_is_comp(index->table));
 
915
                        ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
 
916
                                                        block, mtr);
 
917
                }
 
918
                break;
 
919
        case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
 
920
                ut_a(!page_zip);
 
921
                ptr = page_parse_create(ptr, end_ptr,
 
922
                                        type == MLOG_COMP_PAGE_CREATE,
 
923
                                        block, mtr);
 
924
                break;
 
925
        case MLOG_UNDO_INSERT:
 
926
                ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
 
927
                break;
 
928
        case MLOG_UNDO_ERASE_END:
 
929
                ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
 
930
                break;
 
931
        case MLOG_UNDO_INIT:
 
932
                ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
 
933
                break;
 
934
        case MLOG_UNDO_HDR_DISCARD:
 
935
                ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
 
936
                break;
 
937
        case MLOG_UNDO_HDR_CREATE:
 
938
        case MLOG_UNDO_HDR_REUSE:
 
939
                ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
 
940
                                                 page, mtr);
 
941
                break;
 
942
        case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
 
943
                /* On a compressed page, MLOG_COMP_REC_MIN_MARK
 
944
                will be followed by MLOG_COMP_REC_DELETE
 
945
                or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL)
 
946
                in the same mini-transaction. */
 
947
                ut_a(type == MLOG_COMP_REC_MIN_MARK || !page_zip);
 
948
                ptr = btr_parse_set_min_rec_mark(
 
949
                        ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
 
950
                        page, mtr);
 
951
                break;
 
952
        case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
 
953
                if (NULL != (ptr = mlog_parse_index(
 
954
                                     ptr, end_ptr,
 
955
                                     type == MLOG_COMP_REC_DELETE,
 
956
                                     &index))) {
 
957
                        ut_a(!page
 
958
                             || (ibool)!!page_is_comp(page)
 
959
                             == dict_table_is_comp(index->table));
 
960
                        ptr = page_cur_parse_delete_rec(ptr, end_ptr,
 
961
                                                        block, index, mtr);
 
962
                }
 
963
                break;
 
964
        case MLOG_IBUF_BITMAP_INIT:
 
965
                ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
 
966
                break;
 
967
        case MLOG_INIT_FILE_PAGE:
 
968
                ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
 
969
                break;
 
970
        case MLOG_WRITE_STRING:
 
971
                ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
 
972
                break;
 
973
        case MLOG_FILE_CREATE:
 
974
        case MLOG_FILE_RENAME:
 
975
        case MLOG_FILE_DELETE:
 
976
        case MLOG_FILE_CREATE2:
 
977
                ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0);
 
978
                break;
 
979
        case MLOG_ZIP_WRITE_NODE_PTR:
 
980
                ptr = page_zip_parse_write_node_ptr(ptr, end_ptr,
 
981
                                                    page, page_zip);
 
982
                break;
 
983
        case MLOG_ZIP_WRITE_BLOB_PTR:
 
984
                ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr,
 
985
                                                    page, page_zip);
 
986
                break;
 
987
        case MLOG_ZIP_WRITE_HEADER:
 
988
                ptr = page_zip_parse_write_header(ptr, end_ptr,
 
989
                                                  page, page_zip);
 
990
                break;
 
991
        case MLOG_ZIP_PAGE_COMPRESS:
 
992
                ptr = page_zip_parse_compress(ptr, end_ptr,
 
993
                                              page, page_zip);
 
994
                break;
 
995
        default:
 
996
                ptr = NULL;
 
997
                recv_sys->found_corrupt_log = TRUE;
 
998
        }
 
999
 
 
1000
        if (index) {
 
1001
                dict_table_t*   table = index->table;
 
1002
 
 
1003
                dict_mem_index_free(index);
 
1004
                dict_mem_table_free(table);
 
1005
        }
 
1006
 
 
1007
        return(ptr);
 
1008
}
 
1009
 
 
1010
/*************************************************************************
 
1011
Calculates the fold value of a page file address: used in inserting or
 
1012
searching for a log record in the hash table. */
 
1013
UNIV_INLINE
 
1014
ulint
 
1015
recv_fold(
 
1016
/*======*/
 
1017
                        /* out: folded value */
 
1018
        ulint   space,  /* in: space */
 
1019
        ulint   page_no)/* in: page number */
 
1020
{
 
1021
        return(ut_fold_ulint_pair(space, page_no));
 
1022
}
 
1023
 
 
1024
/*************************************************************************
 
1025
Calculates the hash value of a page file address: used in inserting or
 
1026
searching for a log record in the hash table. */
 
1027
UNIV_INLINE
 
1028
ulint
 
1029
recv_hash(
 
1030
/*======*/
 
1031
                        /* out: folded value */
 
1032
        ulint   space,  /* in: space */
 
1033
        ulint   page_no)/* in: page number */
 
1034
{
 
1035
        return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
 
1036
}
 
1037
 
 
1038
/*************************************************************************
 
1039
Gets the hashed file address struct for a page. */
 
1040
static
 
1041
recv_addr_t*
 
1042
recv_get_fil_addr_struct(
 
1043
/*=====================*/
 
1044
                        /* out: file address struct, NULL if not found from
 
1045
                        the hash table */
 
1046
        ulint   space,  /* in: space id */
 
1047
        ulint   page_no)/* in: page number */
 
1048
{
 
1049
        recv_addr_t*    recv_addr;
 
1050
 
 
1051
        recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
 
1052
                                   recv_hash(space, page_no));
 
1053
        while (recv_addr) {
 
1054
                if ((recv_addr->space == space)
 
1055
                    && (recv_addr->page_no == page_no)) {
 
1056
 
 
1057
                        break;
 
1058
                }
 
1059
 
 
1060
                recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
 
1061
        }
 
1062
 
 
1063
        return(recv_addr);
 
1064
}
 
1065
 
 
1066
/***********************************************************************
 
1067
Adds a new log record to the hash table of log records. */
 
1068
static
 
1069
void
 
1070
recv_add_to_hash_table(
 
1071
/*===================*/
 
1072
        byte            type,           /* in: log record type */
 
1073
        ulint           space,          /* in: space id */
 
1074
        ulint           page_no,        /* in: page number */
 
1075
        byte*           body,           /* in: log record body */
 
1076
        byte*           rec_end,        /* in: log record end */
 
1077
        ib_uint64_t     start_lsn,      /* in: start lsn of the mtr */
 
1078
        ib_uint64_t     end_lsn)        /* in: end lsn of the mtr */
 
1079
{
 
1080
        recv_t*         recv;
 
1081
        ulint           len;
 
1082
        recv_data_t*    recv_data;
 
1083
        recv_data_t**   prev_field;
 
1084
        recv_addr_t*    recv_addr;
 
1085
 
 
1086
        if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
 
1087
                /* The tablespace does not exist any more: do not store the
 
1088
                log record */
 
1089
 
 
1090
                return;
 
1091
        }
 
1092
 
 
1093
        len = rec_end - body;
 
1094
 
 
1095
        recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
 
1096
        recv->type = type;
 
1097
        recv->len = rec_end - body;
 
1098
        recv->start_lsn = start_lsn;
 
1099
        recv->end_lsn = end_lsn;
 
1100
 
 
1101
        recv_addr = recv_get_fil_addr_struct(space, page_no);
 
1102
 
 
1103
        if (recv_addr == NULL) {
 
1104
                recv_addr = mem_heap_alloc(recv_sys->heap,
 
1105
                                           sizeof(recv_addr_t));
 
1106
                recv_addr->space = space;
 
1107
                recv_addr->page_no = page_no;
 
1108
                recv_addr->state = RECV_NOT_PROCESSED;
 
1109
 
 
1110
                UT_LIST_INIT(recv_addr->rec_list);
 
1111
 
 
1112
                HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
 
1113
                            recv_fold(space, page_no), recv_addr);
 
1114
                recv_sys->n_addrs++;
 
1115
#if 0
 
1116
                fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
 
1117
                        space, page_no);
 
1118
#endif
 
1119
        }
 
1120
 
 
1121
        UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
 
1122
 
 
1123
        prev_field = &(recv->data);
 
1124
 
 
1125
        /* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
 
1126
        recv_sys->heap grows into the buffer pool, and bigger chunks could not
 
1127
        be allocated */
 
1128
 
 
1129
        while (rec_end > body) {
 
1130
 
 
1131
                len = rec_end - body;
 
1132
 
 
1133
                if (len > RECV_DATA_BLOCK_SIZE) {
 
1134
                        len = RECV_DATA_BLOCK_SIZE;
 
1135
                }
 
1136
 
 
1137
                recv_data = mem_heap_alloc(recv_sys->heap,
 
1138
                                           sizeof(recv_data_t) + len);
 
1139
                *prev_field = recv_data;
 
1140
 
 
1141
                ut_memcpy(((byte*)recv_data) + sizeof(recv_data_t), body, len);
 
1142
 
 
1143
                prev_field = &(recv_data->next);
 
1144
 
 
1145
                body += len;
 
1146
        }
 
1147
 
 
1148
        *prev_field = NULL;
 
1149
}
 
1150
 
 
1151
/*************************************************************************
 
1152
Copies the log record body from recv to buf. */
 
1153
static
 
1154
void
 
1155
recv_data_copy_to_buf(
 
1156
/*==================*/
 
1157
        byte*   buf,    /* in: buffer of length at least recv->len */
 
1158
        recv_t* recv)   /* in: log record */
 
1159
{
 
1160
        recv_data_t*    recv_data;
 
1161
        ulint           part_len;
 
1162
        ulint           len;
 
1163
 
 
1164
        len = recv->len;
 
1165
        recv_data = recv->data;
 
1166
 
 
1167
        while (len > 0) {
 
1168
                if (len > RECV_DATA_BLOCK_SIZE) {
 
1169
                        part_len = RECV_DATA_BLOCK_SIZE;
 
1170
                } else {
 
1171
                        part_len = len;
 
1172
                }
 
1173
 
 
1174
                ut_memcpy(buf, ((byte*)recv_data) + sizeof(recv_data_t),
 
1175
                          part_len);
 
1176
                buf += part_len;
 
1177
                len -= part_len;
 
1178
 
 
1179
                recv_data = recv_data->next;
 
1180
        }
 
1181
}
 
1182
 
 
1183
/****************************************************************************
 
1184
Applies the hashed log records to the page, if the page lsn is less than the
 
1185
lsn of a log record. This can be called when a buffer page has just been
 
1186
read in, or also for a page already in the buffer pool. */
 
1187
UNIV_INTERN
 
1188
void
 
1189
recv_recover_page(
 
1190
/*==============*/
 
1191
        ibool           recover_backup,
 
1192
                                /* in: TRUE if we are recovering a backup
 
1193
                                page: then we do not acquire any latches
 
1194
                                since the page was read in outside the
 
1195
                                buffer pool */
 
1196
        ibool           just_read_in,
 
1197
                                /* in: TRUE if the i/o-handler calls this for
 
1198
                                a freshly read page */
 
1199
        buf_block_t*    block)  /* in: buffer block */
 
1200
{
 
1201
        page_t*         page;
 
1202
        recv_addr_t*    recv_addr;
 
1203
        recv_t*         recv;
 
1204
        byte*           buf;
 
1205
        ib_uint64_t     start_lsn;
 
1206
        ib_uint64_t     end_lsn;
 
1207
        ib_uint64_t     page_lsn;
 
1208
        ib_uint64_t     page_newest_lsn;
 
1209
        ibool           modification_to_page;
 
1210
        ibool           success;
 
1211
        mtr_t           mtr;
 
1212
 
 
1213
        mutex_enter(&(recv_sys->mutex));
 
1214
 
 
1215
        if (recv_sys->apply_log_recs == FALSE) {
 
1216
 
 
1217
                /* Log records should not be applied now */
 
1218
 
 
1219
                mutex_exit(&(recv_sys->mutex));
 
1220
 
 
1221
                return;
 
1222
        }
 
1223
 
 
1224
        recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block),
 
1225
                                             buf_block_get_page_no(block));
 
1226
 
 
1227
        if ((recv_addr == NULL)
 
1228
            || (recv_addr->state == RECV_BEING_PROCESSED)
 
1229
            || (recv_addr->state == RECV_PROCESSED)) {
 
1230
 
 
1231
                mutex_exit(&(recv_sys->mutex));
 
1232
 
 
1233
                return;
 
1234
        }
 
1235
 
 
1236
#if 0
 
1237
        fprintf(stderr, "Recovering space %lu, page %lu\n",
 
1238
                buf_block_get_space(block), buf_block_get_page_no(block));
 
1239
#endif
 
1240
 
 
1241
        recv_addr->state = RECV_BEING_PROCESSED;
 
1242
 
 
1243
        mutex_exit(&(recv_sys->mutex));
 
1244
 
 
1245
        mtr_start(&mtr);
 
1246
        mtr_set_log_mode(&mtr, MTR_LOG_NONE);
 
1247
 
 
1248
        page = block->frame;
 
1249
 
 
1250
        if (!recover_backup) {
 
1251
                if (just_read_in) {
 
1252
                        /* Move the ownership of the x-latch on the
 
1253
                        page to this OS thread, so that we can acquire
 
1254
                        a second x-latch on it. This is needed for the
 
1255
                        operations to the page to pass the debug
 
1256
                        checks. */
 
1257
 
 
1258
                        rw_lock_x_lock_move_ownership(&(block->lock));
 
1259
                }
 
1260
 
 
1261
                success = buf_page_get_known_nowait(RW_X_LATCH, block,
 
1262
                                                    BUF_KEEP_OLD,
 
1263
                                                    __FILE__, __LINE__,
 
1264
                                                    &mtr);
 
1265
                ut_a(success);
 
1266
 
 
1267
                buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
1268
        }
 
1269
 
 
1270
        /* Read the newest modification lsn from the page */
 
1271
        page_lsn = mach_read_ull(page + FIL_PAGE_LSN);
 
1272
 
 
1273
        if (!recover_backup) {
 
1274
                /* It may be that the page has been modified in the buffer
 
1275
                pool: read the newest modification lsn there */
 
1276
 
 
1277
                page_newest_lsn
 
1278
                        = buf_page_get_newest_modification(&block->page);
 
1279
 
 
1280
                if (page_newest_lsn) {
 
1281
 
 
1282
                        page_lsn = page_newest_lsn;
 
1283
                }
 
1284
        } else {
 
1285
                /* In recovery from a backup we do not really use the buffer
 
1286
                pool */
 
1287
 
 
1288
                page_newest_lsn = 0;
 
1289
        }
 
1290
 
 
1291
        modification_to_page = FALSE;
 
1292
        start_lsn = end_lsn = 0;
 
1293
 
 
1294
        recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
 
1295
 
 
1296
        while (recv) {
 
1297
                end_lsn = recv->end_lsn;
 
1298
 
 
1299
                if (recv->len > RECV_DATA_BLOCK_SIZE) {
 
1300
                        /* We have to copy the record body to a separate
 
1301
                        buffer */
 
1302
 
 
1303
                        buf = mem_alloc(recv->len);
 
1304
 
 
1305
                        recv_data_copy_to_buf(buf, recv);
 
1306
                } else {
 
1307
                        buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
 
1308
                }
 
1309
 
 
1310
                if (recv->type == MLOG_INIT_FILE_PAGE) {
 
1311
                        page_lsn = page_newest_lsn;
 
1312
 
 
1313
                        mach_write_ull(page + UNIV_PAGE_SIZE
 
1314
                                       - FIL_PAGE_END_LSN_OLD_CHKSUM, 0);
 
1315
                        mach_write_ull(page + FIL_PAGE_LSN, 0);
 
1316
                }
 
1317
 
 
1318
                if (recv->start_lsn >= page_lsn) {
 
1319
 
 
1320
                        if (!modification_to_page) {
 
1321
 
 
1322
                                modification_to_page = TRUE;
 
1323
                                start_lsn = recv->start_lsn;
 
1324
                        }
 
1325
 
 
1326
#ifdef UNIV_DEBUG
 
1327
                        if (log_debug_writes) {
 
1328
                                fprintf(stderr,
 
1329
                                        "InnoDB: Applying log rec"
 
1330
                                        " type %lu len %lu"
 
1331
                                        " to space %lu page no %lu\n",
 
1332
                                        (ulong) recv->type, (ulong) recv->len,
 
1333
                                        (ulong) recv_addr->space,
 
1334
                                        (ulong) recv_addr->page_no);
 
1335
                        }
 
1336
#endif /* UNIV_DEBUG */
 
1337
 
 
1338
                        recv_parse_or_apply_log_rec_body(recv->type, buf,
 
1339
                                                         buf + recv->len,
 
1340
                                                         block, &mtr);
 
1341
                        mach_write_ull(page + UNIV_PAGE_SIZE
 
1342
                                       - FIL_PAGE_END_LSN_OLD_CHKSUM,
 
1343
                                       recv->start_lsn + recv->len);
 
1344
                        mach_write_ull(page + FIL_PAGE_LSN,
 
1345
                                       recv->start_lsn + recv->len);
 
1346
                }
 
1347
 
 
1348
                if (recv->len > RECV_DATA_BLOCK_SIZE) {
 
1349
                        mem_free(buf);
 
1350
                }
 
1351
 
 
1352
                recv = UT_LIST_GET_NEXT(rec_list, recv);
 
1353
        }
 
1354
 
 
1355
#ifdef UNIV_ZIP_DEBUG
 
1356
        if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
 
1357
                page_zip_des_t* page_zip = buf_block_get_page_zip(block);
 
1358
 
 
1359
                if (page_zip) {
 
1360
                        ut_a(page_zip_validate_low(page_zip, page, FALSE));
 
1361
                }
 
1362
        }
 
1363
#endif /* UNIV_ZIP_DEBUG */
 
1364
 
 
1365
        mutex_enter(&(recv_sys->mutex));
 
1366
 
 
1367
        if (recv_max_page_lsn < page_lsn) {
 
1368
                recv_max_page_lsn = page_lsn;
 
1369
        }
 
1370
 
 
1371
        recv_addr->state = RECV_PROCESSED;
 
1372
 
 
1373
        ut_a(recv_sys->n_addrs);
 
1374
        recv_sys->n_addrs--;
 
1375
 
 
1376
        mutex_exit(&(recv_sys->mutex));
 
1377
 
 
1378
        if (!recover_backup && modification_to_page) {
 
1379
                ut_a(block);
 
1380
 
 
1381
                buf_flush_recv_note_modification(block, start_lsn, end_lsn);
 
1382
        }
 
1383
 
 
1384
        /* Make sure that committing mtr does not change the modification
 
1385
        lsn values of page */
 
1386
 
 
1387
        mtr.modifications = FALSE;
 
1388
 
 
1389
        mtr_commit(&mtr);
 
1390
}
 
1391
 
 
1392
/***********************************************************************
 
1393
Reads in pages which have hashed log records, from an area around a given
 
1394
page number. */
 
1395
static
 
1396
ulint
 
1397
recv_read_in_area(
 
1398
/*==============*/
 
1399
                        /* out: number of pages found */
 
1400
        ulint   space,  /* in: space */
 
1401
        ulint   zip_size,/* in: compressed page size in bytes, or 0 */
 
1402
        ulint   page_no)/* in: page number */
 
1403
{
 
1404
        recv_addr_t* recv_addr;
 
1405
        ulint   page_nos[RECV_READ_AHEAD_AREA];
 
1406
        ulint   low_limit;
 
1407
        ulint   n;
 
1408
 
 
1409
        low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
 
1410
 
 
1411
        n = 0;
 
1412
 
 
1413
        for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
 
1414
             page_no++) {
 
1415
                recv_addr = recv_get_fil_addr_struct(space, page_no);
 
1416
 
 
1417
                if (recv_addr && !buf_page_peek(space, page_no)) {
 
1418
 
 
1419
                        mutex_enter(&(recv_sys->mutex));
 
1420
 
 
1421
                        if (recv_addr->state == RECV_NOT_PROCESSED) {
 
1422
                                recv_addr->state = RECV_BEING_READ;
 
1423
 
 
1424
                                page_nos[n] = page_no;
 
1425
 
 
1426
                                n++;
 
1427
                        }
 
1428
 
 
1429
                        mutex_exit(&(recv_sys->mutex));
 
1430
                }
 
1431
        }
 
1432
 
 
1433
        buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
 
1434
        /*
 
1435
        fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
 
1436
        */
 
1437
        return(n);
 
1438
}
 
1439
 
 
1440
/***********************************************************************
 
1441
Empties the hash table of stored log records, applying them to appropriate
 
1442
pages. */
 
1443
UNIV_INTERN
 
1444
void
 
1445
recv_apply_hashed_log_recs(
 
1446
/*=======================*/
 
1447
        ibool   allow_ibuf)     /* in: if TRUE, also ibuf operations are
 
1448
                                allowed during the application; if FALSE,
 
1449
                                no ibuf operations are allowed, and after
 
1450
                                the application all file pages are flushed to
 
1451
                                disk and invalidated in buffer pool: this
 
1452
                                alternative means that no new log records
 
1453
                                can be generated during the application;
 
1454
                                the caller must in this case own the log
 
1455
                                mutex */
 
1456
{
 
1457
        recv_addr_t* recv_addr;
 
1458
        ulint   i;
 
1459
        ulint   n_pages;
 
1460
        ibool   has_printed     = FALSE;
 
1461
        mtr_t   mtr;
 
1462
loop:
 
1463
        mutex_enter(&(recv_sys->mutex));
 
1464
 
 
1465
        if (recv_sys->apply_batch_on) {
 
1466
 
 
1467
                mutex_exit(&(recv_sys->mutex));
 
1468
 
 
1469
                os_thread_sleep(500000);
 
1470
 
 
1471
                goto loop;
 
1472
        }
 
1473
 
 
1474
        ut_ad(!allow_ibuf == mutex_own(&log_sys->mutex));
 
1475
 
 
1476
        if (!allow_ibuf) {
 
1477
                recv_no_ibuf_operations = TRUE;
 
1478
        }
 
1479
 
 
1480
        recv_sys->apply_log_recs = TRUE;
 
1481
        recv_sys->apply_batch_on = TRUE;
 
1482
 
 
1483
        for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
 
1484
 
 
1485
                recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
 
1486
 
 
1487
                while (recv_addr) {
 
1488
                        ulint   space = recv_addr->space;
 
1489
                        ulint   zip_size = fil_space_get_zip_size(space);
 
1490
                        ulint   page_no = recv_addr->page_no;
 
1491
 
 
1492
                        if (recv_addr->state == RECV_NOT_PROCESSED) {
 
1493
                                if (!has_printed) {
 
1494
                                        ut_print_timestamp(stderr);
 
1495
                                        fputs("  InnoDB: Starting an"
 
1496
                                              " apply batch of log records"
 
1497
                                              " to the database...\n"
 
1498
                                              "InnoDB: Progress in percents: ",
 
1499
                                              stderr);
 
1500
                                        has_printed = TRUE;
 
1501
                                }
 
1502
 
 
1503
                                mutex_exit(&(recv_sys->mutex));
 
1504
 
 
1505
                                if (buf_page_peek(space, page_no)) {
 
1506
                                        buf_block_t*    block;
 
1507
 
 
1508
                                        mtr_start(&mtr);
 
1509
 
 
1510
                                        block = buf_page_get(
 
1511
                                                space, zip_size, page_no,
 
1512
                                                RW_X_LATCH, &mtr);
 
1513
                                        buf_block_dbg_add_level(
 
1514
                                                block, SYNC_NO_ORDER_CHECK);
 
1515
 
 
1516
                                        recv_recover_page(FALSE, FALSE, block);
 
1517
                                        mtr_commit(&mtr);
 
1518
                                } else {
 
1519
                                        recv_read_in_area(space, zip_size,
 
1520
                                                          page_no);
 
1521
                                }
 
1522
 
 
1523
                                mutex_enter(&(recv_sys->mutex));
 
1524
                        }
 
1525
 
 
1526
                        recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
 
1527
                }
 
1528
 
 
1529
                if (has_printed
 
1530
                    && (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
 
1531
                    != ((i + 1) * 100)
 
1532
                    / hash_get_n_cells(recv_sys->addr_hash)) {
 
1533
 
 
1534
                        fprintf(stderr, "%lu ", (ulong)
 
1535
                                ((i * 100)
 
1536
                                 / hash_get_n_cells(recv_sys->addr_hash)));
 
1537
                }
 
1538
        }
 
1539
 
 
1540
        /* Wait until all the pages have been processed */
 
1541
 
 
1542
        while (recv_sys->n_addrs != 0) {
 
1543
 
 
1544
                mutex_exit(&(recv_sys->mutex));
 
1545
 
 
1546
                os_thread_sleep(500000);
 
1547
 
 
1548
                mutex_enter(&(recv_sys->mutex));
 
1549
        }
 
1550
 
 
1551
        if (has_printed) {
 
1552
 
 
1553
                fprintf(stderr, "\n");
 
1554
        }
 
1555
 
 
1556
        if (!allow_ibuf) {
 
1557
                /* Flush all the file pages to disk and invalidate them in
 
1558
                the buffer pool */
 
1559
 
 
1560
                mutex_exit(&(recv_sys->mutex));
 
1561
                mutex_exit(&(log_sys->mutex));
 
1562
 
 
1563
                n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
 
1564
                                          IB_ULONGLONG_MAX);
 
1565
                ut_a(n_pages != ULINT_UNDEFINED);
 
1566
 
 
1567
                buf_flush_wait_batch_end(BUF_FLUSH_LIST);
 
1568
 
 
1569
                buf_pool_invalidate();
 
1570
 
 
1571
                mutex_enter(&(log_sys->mutex));
 
1572
                mutex_enter(&(recv_sys->mutex));
 
1573
 
 
1574
                recv_no_ibuf_operations = FALSE;
 
1575
        }
 
1576
 
 
1577
        recv_sys->apply_log_recs = FALSE;
 
1578
        recv_sys->apply_batch_on = FALSE;
 
1579
 
 
1580
        recv_sys_empty_hash();
 
1581
 
 
1582
        if (has_printed) {
 
1583
                fprintf(stderr, "InnoDB: Apply batch completed\n");
 
1584
        }
 
1585
 
 
1586
        mutex_exit(&(recv_sys->mutex));
 
1587
}
 
1588
 
 
1589
#ifdef UNIV_HOTBACKUP
 
1590
/***********************************************************************
 
1591
Applies log records in the hash table to a backup. */
 
1592
UNIV_INTERN
 
1593
void
 
1594
recv_apply_log_recs_for_backup(void)
 
1595
/*================================*/
 
1596
{
 
1597
        recv_addr_t*    recv_addr;
 
1598
        ulint           n_hash_cells;
 
1599
        buf_block_t*    block;
 
1600
        ulint           actual_size;
 
1601
        ibool           success;
 
1602
        ulint           error;
 
1603
        ulint           i;
 
1604
 
 
1605
        recv_sys->apply_log_recs = TRUE;
 
1606
        recv_sys->apply_batch_on = TRUE;
 
1607
 
 
1608
        block = buf_LRU_get_free_block(UNIV_PAGE_SIZE);
 
1609
 
 
1610
        fputs("InnoDB: Starting an apply batch of log records"
 
1611
              " to the database...\n"
 
1612
              "InnoDB: Progress in percents: ", stderr);
 
1613
 
 
1614
        n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
 
1615
 
 
1616
        for (i = 0; i < n_hash_cells; i++) {
 
1617
                /* The address hash table is externally chained */
 
1618
                recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
 
1619
 
 
1620
                while (recv_addr != NULL) {
 
1621
 
 
1622
                        ulint   zip_size
 
1623
                                = fil_space_get_zip_size(recv_addr->space);
 
1624
 
 
1625
                        if (zip_size == ULINT_UNDEFINED) {
 
1626
#if 0
 
1627
                                fprintf(stderr,
 
1628
                                        "InnoDB: Warning: cannot apply"
 
1629
                                        " log record to"
 
1630
                                        " tablespace %lu page %lu,\n"
 
1631
                                        "InnoDB: because tablespace with"
 
1632
                                        " that id does not exist.\n",
 
1633
                                        recv_addr->space, recv_addr->page_no);
 
1634
#endif
 
1635
                                recv_addr->state = RECV_PROCESSED;
 
1636
 
 
1637
                                ut_a(recv_sys->n_addrs);
 
1638
                                recv_sys->n_addrs--;
 
1639
 
 
1640
                                goto skip_this_recv_addr;
 
1641
                        }
 
1642
 
 
1643
                        /* We simulate a page read made by the buffer pool, to
 
1644
                        make sure the recovery apparatus works ok. We must init
 
1645
                        the block. */
 
1646
 
 
1647
                        buf_page_init_for_backup_restore(
 
1648
                                recv_addr->space, recv_addr->page_no,
 
1649
                                zip_size, block);
 
1650
 
 
1651
                        /* Extend the tablespace's last file if the page_no
 
1652
                        does not fall inside its bounds; we assume the last
 
1653
                        file is auto-extending, and ibbackup copied the file
 
1654
                        when it still was smaller */
 
1655
 
 
1656
                        success = fil_extend_space_to_desired_size(
 
1657
                                &actual_size,
 
1658
                                recv_addr->space, recv_addr->page_no + 1);
 
1659
                        if (!success) {
 
1660
                                fprintf(stderr,
 
1661
                                        "InnoDB: Fatal error: cannot extend"
 
1662
                                        " tablespace %lu to hold %lu pages\n",
 
1663
                                        recv_addr->space, recv_addr->page_no);
 
1664
 
 
1665
                                exit(1);
 
1666
                        }
 
1667
 
 
1668
                        /* Read the page from the tablespace file using the
 
1669
                        fil0fil.c routines */
 
1670
 
 
1671
                        if (zip_size) {
 
1672
                                error = fil_io(OS_FILE_READ, TRUE,
 
1673
                                               recv_addr->space, zip_size,
 
1674
                                               recv_addr->page_no, 0, zip_size,
 
1675
                                               block->page.zip.data, NULL);
 
1676
                        } else {
 
1677
                                error = fil_io(OS_FILE_READ, TRUE,
 
1678
                                               recv_addr->space, 0,
 
1679
                                               recv_addr->page_no, 0,
 
1680
                                               UNIV_PAGE_SIZE,
 
1681
                                               block->frame, NULL);
 
1682
                        }
 
1683
 
 
1684
                        if (error != DB_SUCCESS) {
 
1685
                                fprintf(stderr,
 
1686
                                        "InnoDB: Fatal error: cannot read"
 
1687
                                        " from tablespace"
 
1688
                                        " %lu page number %lu\n",
 
1689
                                        (ulong) recv_addr->space,
 
1690
                                        (ulong) recv_addr->page_no);
 
1691
 
 
1692
                                exit(1);
 
1693
                        }
 
1694
 
 
1695
                        /* Apply the log records to this page */
 
1696
                        recv_recover_page(TRUE, FALSE, block);
 
1697
 
 
1698
                        /* Write the page back to the tablespace file using the
 
1699
                        fil0fil.c routines */
 
1700
 
 
1701
                        buf_flush_init_for_writing(
 
1702
                                block->frame, buf_block_get_page_zip(block),
 
1703
                                mach_read_ull(block->frame + FIL_PAGE_LSN));
 
1704
 
 
1705
                        if (zip_size) {
 
1706
                                error = fil_io(OS_FILE_WRITE, TRUE,
 
1707
                                               recv_addr->space, zip_size,
 
1708
                                               recv_addr->page_no, 0,
 
1709
                                               zip_size,
 
1710
                                               block->page.zip.data, NULL);
 
1711
                        } else {
 
1712
                                error = fil_io(OS_FILE_WRITE, TRUE,
 
1713
                                               recv_addr->space, 0,
 
1714
                                               recv_addr->page_no, 0,
 
1715
                                               UNIV_PAGE_SIZE,
 
1716
                                               block->frame, NULL);
 
1717
                        }
 
1718
skip_this_recv_addr:
 
1719
                        recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
 
1720
                }
 
1721
 
 
1722
                if ((100 * i) / n_hash_cells
 
1723
                    != (100 * (i + 1)) / n_hash_cells) {
 
1724
                        fprintf(stderr, "%lu ",
 
1725
                                (ulong) ((100 * i) / n_hash_cells));
 
1726
                        fflush(stderr);
 
1727
                }
 
1728
        }
 
1729
 
 
1730
        buf_block_free(block);
 
1731
        recv_sys_empty_hash();
 
1732
}
 
1733
#endif /* UNIV_HOTBACKUP */
 
1734
 
 
1735
/***********************************************************************
 
1736
Tries to parse a single log record and returns its length. */
 
1737
static
 
1738
ulint
 
1739
recv_parse_log_rec(
 
1740
/*===============*/
 
1741
                        /* out: length of the record, or 0 if the record was
 
1742
                        not complete */
 
1743
        byte*   ptr,    /* in: pointer to a buffer */
 
1744
        byte*   end_ptr,/* in: pointer to the buffer end */
 
1745
        byte*   type,   /* out: type */
 
1746
        ulint*  space,  /* out: space id */
 
1747
        ulint*  page_no,/* out: page number */
 
1748
        byte**  body)   /* out: log record body start */
 
1749
{
 
1750
        byte*   new_ptr;
 
1751
 
 
1752
        *body = NULL;
 
1753
 
 
1754
        if (ptr == end_ptr) {
 
1755
 
 
1756
                return(0);
 
1757
        }
 
1758
 
 
1759
        if (*ptr == MLOG_MULTI_REC_END) {
 
1760
 
 
1761
                *type = *ptr;
 
1762
 
 
1763
                return(1);
 
1764
        }
 
1765
 
 
1766
        if (*ptr == MLOG_DUMMY_RECORD) {
 
1767
                *type = *ptr;
 
1768
 
 
1769
                *space = ULINT_UNDEFINED - 1; /* For debugging */
 
1770
 
 
1771
                return(1);
 
1772
        }
 
1773
 
 
1774
        new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
 
1775
                                                page_no);
 
1776
        *body = new_ptr;
 
1777
 
 
1778
        if (UNIV_UNLIKELY(!new_ptr)) {
 
1779
 
 
1780
                return(0);
 
1781
        }
 
1782
 
 
1783
        /* Check that page_no is sensible */
 
1784
 
 
1785
        if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
 
1786
 
 
1787
                recv_sys->found_corrupt_log = TRUE;
 
1788
 
 
1789
                return(0);
 
1790
        }
 
1791
 
 
1792
        new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
 
1793
                                                   NULL, NULL);
 
1794
        if (UNIV_UNLIKELY(new_ptr == NULL)) {
 
1795
 
 
1796
                return(0);
 
1797
        }
 
1798
 
 
1799
        if (*page_no > recv_max_parsed_page_no) {
 
1800
                recv_max_parsed_page_no = *page_no;
 
1801
        }
 
1802
 
 
1803
        return(new_ptr - ptr);
 
1804
}
 
1805
 
 
1806
/***********************************************************
 
1807
Calculates the new value for lsn when more data is added to the log. */
 
1808
static
 
1809
ib_uint64_t
 
1810
recv_calc_lsn_on_data_add(
 
1811
/*======================*/
 
1812
        ib_uint64_t     lsn,    /* in: old lsn */
 
1813
        ib_uint64_t     len)    /* in: this many bytes of data is
 
1814
                                added, log block headers not included */
 
1815
{
 
1816
        ulint   frag_len;
 
1817
        ulint   lsn_len;
 
1818
 
 
1819
        frag_len = (((ulint) lsn) % OS_FILE_LOG_BLOCK_SIZE)
 
1820
                - LOG_BLOCK_HDR_SIZE;
 
1821
        ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
 
1822
              - LOG_BLOCK_TRL_SIZE);
 
1823
        lsn_len = (ulint) len;
 
1824
        lsn_len += (lsn_len + frag_len)
 
1825
                / (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
 
1826
                   - LOG_BLOCK_TRL_SIZE)
 
1827
                * (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
 
1828
 
 
1829
        return(lsn + lsn_len);
 
1830
}
 
1831
 
 
1832
#ifdef UNIV_LOG_DEBUG
 
1833
/***********************************************************
 
1834
Checks that the parser recognizes incomplete initial segments of a log
 
1835
record as incomplete. */
 
1836
static
 
1837
void
 
1838
recv_check_incomplete_log_recs(
 
1839
/*===========================*/
 
1840
        byte*   ptr,    /* in: pointer to a complete log record */
 
1841
        ulint   len)    /* in: length of the log record */
 
1842
{
 
1843
        ulint   i;
 
1844
        byte    type;
 
1845
        ulint   space;
 
1846
        ulint   page_no;
 
1847
        byte*   body;
 
1848
 
 
1849
        for (i = 0; i < len; i++) {
 
1850
                ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
 
1851
                                             &page_no, &body));
 
1852
        }
 
1853
}
 
1854
#endif /* UNIV_LOG_DEBUG */
 
1855
 
 
1856
/***********************************************************
 
1857
Prints diagnostic info of corrupt log. */
 
1858
static
 
1859
void
 
1860
recv_report_corrupt_log(
 
1861
/*====================*/
 
1862
        byte*   ptr,    /* in: pointer to corrupt log record */
 
1863
        byte    type,   /* in: type of the record */
 
1864
        ulint   space,  /* in: space id, this may also be garbage */
 
1865
        ulint   page_no)/* in: page number, this may also be garbage */
 
1866
{
 
1867
        fprintf(stderr,
 
1868
                "InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
 
1869
                "InnoDB: Log record type %lu, space id %lu, page number %lu\n"
 
1870
                "InnoDB: Log parsing proceeded successfully up to %"PRIu64"\n"
 
1871
                "InnoDB: Previous log record type %lu, is multi %lu\n"
 
1872
                "InnoDB: Recv offset %lu, prev %lu\n",
 
1873
                (ulong) type, (ulong) space, (ulong) page_no,
 
1874
                recv_sys->recovered_lsn,
 
1875
                (ulong) recv_previous_parsed_rec_type,
 
1876
                (ulong) recv_previous_parsed_rec_is_multi,
 
1877
                (ulong) (ptr - recv_sys->buf),
 
1878
                (ulong) recv_previous_parsed_rec_offset);
 
1879
 
 
1880
        if ((ulint)(ptr - recv_sys->buf + 100)
 
1881
            > recv_previous_parsed_rec_offset
 
1882
            && (ulint)(ptr - recv_sys->buf + 100
 
1883
                       - recv_previous_parsed_rec_offset)
 
1884
            < 200000) {
 
1885
                fputs("InnoDB: Hex dump of corrupt log starting"
 
1886
                      " 100 bytes before the start\n"
 
1887
                      "InnoDB: of the previous log rec,\n"
 
1888
                      "InnoDB: and ending 100 bytes after the start"
 
1889
                      " of the corrupt rec:\n",
 
1890
                      stderr);
 
1891
 
 
1892
                ut_print_buf(stderr,
 
1893
                             recv_sys->buf
 
1894
                             + recv_previous_parsed_rec_offset - 100,
 
1895
                             ptr - recv_sys->buf + 200
 
1896
                             - recv_previous_parsed_rec_offset);
 
1897
                putc('\n', stderr);
 
1898
        }
 
1899
 
 
1900
        fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
 
1901
              "InnoDB: is possible that the log scan did not proceed\n"
 
1902
              "InnoDB: far enough in recovery! Please run CHECK TABLE\n"
 
1903
              "InnoDB: on your InnoDB tables to check that they are ok!\n"
 
1904
              "InnoDB: If mysqld crashes after this recovery, look at\n"
 
1905
              "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
 
1906
              "forcing-recovery.html\n"
 
1907
              "InnoDB: about forcing recovery.\n", stderr);
 
1908
 
 
1909
        fflush(stderr);
 
1910
}
 
1911
 
 
1912
/***********************************************************
 
1913
Parses log records from a buffer and stores them to a hash table to wait
 
1914
merging to file pages. */
 
1915
static
 
1916
ibool
 
1917
recv_parse_log_recs(
 
1918
/*================*/
 
1919
                                /* out: currently always returns FALSE */
 
1920
        ibool   store_to_hash)  /* in: TRUE if the records should be stored
 
1921
                                to the hash table; this is set to FALSE if just
 
1922
                                debug checking is needed */
 
1923
{
 
1924
        byte*           ptr;
 
1925
        byte*           end_ptr;
 
1926
        ulint           single_rec;
 
1927
        ulint           len;
 
1928
        ulint           total_len;
 
1929
        ib_uint64_t     new_recovered_lsn;
 
1930
        ib_uint64_t     old_lsn;
 
1931
        byte            type;
 
1932
        ulint           space;
 
1933
        ulint           page_no;
 
1934
        byte*           body;
 
1935
        ulint           n_recs;
 
1936
 
 
1937
        ut_ad(mutex_own(&(log_sys->mutex)));
 
1938
        ut_ad(recv_sys->parse_start_lsn != 0);
 
1939
loop:
 
1940
        ptr = recv_sys->buf + recv_sys->recovered_offset;
 
1941
 
 
1942
        end_ptr = recv_sys->buf + recv_sys->len;
 
1943
 
 
1944
        if (ptr == end_ptr) {
 
1945
 
 
1946
                return(FALSE);
 
1947
        }
 
1948
 
 
1949
        single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
 
1950
 
 
1951
        if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
 
1952
                /* The mtr only modified a single page, or this is a file op */
 
1953
 
 
1954
                old_lsn = recv_sys->recovered_lsn;
 
1955
 
 
1956
                /* Try to parse a log record, fetching its type, space id,
 
1957
                page no, and a pointer to the body of the log record */
 
1958
 
 
1959
                len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 
1960
                                         &page_no, &body);
 
1961
 
 
1962
                if (len == 0 || recv_sys->found_corrupt_log) {
 
1963
                        if (recv_sys->found_corrupt_log) {
 
1964
 
 
1965
                                recv_report_corrupt_log(ptr,
 
1966
                                                        type, space, page_no);
 
1967
                        }
 
1968
 
 
1969
                        return(FALSE);
 
1970
                }
 
1971
 
 
1972
                new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
 
1973
 
 
1974
                if (new_recovered_lsn > recv_sys->scanned_lsn) {
 
1975
                        /* The log record filled a log block, and we require
 
1976
                        that also the next log block should have been scanned
 
1977
                        in */
 
1978
 
 
1979
                        return(FALSE);
 
1980
                }
 
1981
 
 
1982
                recv_previous_parsed_rec_type = (ulint)type;
 
1983
                recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
 
1984
                recv_previous_parsed_rec_is_multi = 0;
 
1985
 
 
1986
                recv_sys->recovered_offset += len;
 
1987
                recv_sys->recovered_lsn = new_recovered_lsn;
 
1988
 
 
1989
#ifdef UNIV_DEBUG
 
1990
                if (log_debug_writes) {
 
1991
                        fprintf(stderr,
 
1992
                                "InnoDB: Parsed a single log rec"
 
1993
                                " type %lu len %lu space %lu page no %lu\n",
 
1994
                                (ulong) type, (ulong) len, (ulong) space,
 
1995
                                (ulong) page_no);
 
1996
                }
 
1997
#endif /* UNIV_DEBUG */
 
1998
 
 
1999
                if (type == MLOG_DUMMY_RECORD) {
 
2000
                        /* Do nothing */
 
2001
 
 
2002
                } else if (!store_to_hash) {
 
2003
                        /* In debug checking, update a replicate page
 
2004
                        according to the log record, and check that it
 
2005
                        becomes identical with the original page */
 
2006
#ifdef UNIV_LOG_DEBUG
 
2007
                        recv_check_incomplete_log_recs(ptr, len);
 
2008
#endif/* UNIV_LOG_DEBUG */
 
2009
 
 
2010
                } else if (type == MLOG_FILE_CREATE
 
2011
                           || type == MLOG_FILE_CREATE2
 
2012
                           || type == MLOG_FILE_RENAME
 
2013
                           || type == MLOG_FILE_DELETE) {
 
2014
                        ut_a(space);
 
2015
#ifdef UNIV_HOTBACKUP
 
2016
                        if (recv_replay_file_ops) {
 
2017
 
 
2018
                                /* In ibbackup --apply-log, replay an .ibd file
 
2019
                                operation, if possible; note that
 
2020
                                fil_path_to_mysql_datadir is set in ibbackup to
 
2021
                                point to the datadir we should use there */
 
2022
 
 
2023
                                if (NULL == fil_op_log_parse_or_replay(
 
2024
                                            body, end_ptr, type, space)) {
 
2025
                                        fprintf(stderr,
 
2026
                                                "InnoDB: Error: file op"
 
2027
                                                " log record of type %lu"
 
2028
                                                " space %lu not complete in\n"
 
2029
                                                "InnoDB: the replay phase."
 
2030
                                                " Path %s\n",
 
2031
                                                (ulint)type, space,
 
2032
                                                (char*)(body + 2));
 
2033
 
 
2034
                                        ut_error;
 
2035
                                }
 
2036
                        }
 
2037
#endif
 
2038
                        /* In normal mysqld crash recovery we do not try to
 
2039
                        replay file operations */
 
2040
                } else {
 
2041
                        recv_add_to_hash_table(type, space, page_no, body,
 
2042
                                               ptr + len, old_lsn,
 
2043
                                               recv_sys->recovered_lsn);
 
2044
                }
 
2045
        } else {
 
2046
                /* Check that all the records associated with the single mtr
 
2047
                are included within the buffer */
 
2048
 
 
2049
                total_len = 0;
 
2050
                n_recs = 0;
 
2051
 
 
2052
                for (;;) {
 
2053
                        len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 
2054
                                                 &page_no, &body);
 
2055
                        if (len == 0 || recv_sys->found_corrupt_log) {
 
2056
 
 
2057
                                if (recv_sys->found_corrupt_log) {
 
2058
 
 
2059
                                        recv_report_corrupt_log(
 
2060
                                                ptr, type, space, page_no);
 
2061
                                }
 
2062
 
 
2063
                                return(FALSE);
 
2064
                        }
 
2065
 
 
2066
                        recv_previous_parsed_rec_type = (ulint)type;
 
2067
                        recv_previous_parsed_rec_offset
 
2068
                                = recv_sys->recovered_offset + total_len;
 
2069
                        recv_previous_parsed_rec_is_multi = 1;
 
2070
 
 
2071
                        if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
 
2072
#ifdef UNIV_LOG_DEBUG
 
2073
                                recv_check_incomplete_log_recs(ptr, len);
 
2074
#endif /* UNIV_LOG_DEBUG */
 
2075
                        }
 
2076
 
 
2077
#ifdef UNIV_DEBUG
 
2078
                        if (log_debug_writes) {
 
2079
                                fprintf(stderr,
 
2080
                                        "InnoDB: Parsed a multi log rec"
 
2081
                                        " type %lu len %lu"
 
2082
                                        " space %lu page no %lu\n",
 
2083
                                        (ulong) type, (ulong) len,
 
2084
                                        (ulong) space, (ulong) page_no);
 
2085
                        }
 
2086
#endif /* UNIV_DEBUG */
 
2087
 
 
2088
                        total_len += len;
 
2089
                        n_recs++;
 
2090
 
 
2091
                        ptr += len;
 
2092
 
 
2093
                        if (type == MLOG_MULTI_REC_END) {
 
2094
 
 
2095
                                /* Found the end mark for the records */
 
2096
 
 
2097
                                break;
 
2098
                        }
 
2099
                }
 
2100
 
 
2101
                new_recovered_lsn = recv_calc_lsn_on_data_add(
 
2102
                        recv_sys->recovered_lsn, total_len);
 
2103
 
 
2104
                if (new_recovered_lsn > recv_sys->scanned_lsn) {
 
2105
                        /* The log record filled a log block, and we require
 
2106
                        that also the next log block should have been scanned
 
2107
                        in */
 
2108
 
 
2109
                        return(FALSE);
 
2110
                }
 
2111
 
 
2112
                /* Add all the records to the hash table */
 
2113
 
 
2114
                ptr = recv_sys->buf + recv_sys->recovered_offset;
 
2115
 
 
2116
                for (;;) {
 
2117
                        old_lsn = recv_sys->recovered_lsn;
 
2118
                        len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 
2119
                                                 &page_no, &body);
 
2120
                        if (recv_sys->found_corrupt_log) {
 
2121
 
 
2122
                                recv_report_corrupt_log(ptr,
 
2123
                                                        type, space, page_no);
 
2124
                        }
 
2125
 
 
2126
                        ut_a(len != 0);
 
2127
                        ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
 
2128
 
 
2129
                        recv_sys->recovered_offset += len;
 
2130
                        recv_sys->recovered_lsn
 
2131
                                = recv_calc_lsn_on_data_add(old_lsn, len);
 
2132
                        if (type == MLOG_MULTI_REC_END) {
 
2133
 
 
2134
                                /* Found the end mark for the records */
 
2135
 
 
2136
                                break;
 
2137
                        }
 
2138
 
 
2139
                        if (store_to_hash) {
 
2140
                                recv_add_to_hash_table(type, space, page_no,
 
2141
                                                       body, ptr + len,
 
2142
                                                       old_lsn,
 
2143
                                                       new_recovered_lsn);
 
2144
                        }
 
2145
 
 
2146
                        ptr += len;
 
2147
                }
 
2148
        }
 
2149
 
 
2150
        goto loop;
 
2151
}
 
2152
 
 
2153
/***********************************************************
 
2154
Adds data from a new log block to the parsing buffer of recv_sys if
 
2155
recv_sys->parse_start_lsn is non-zero. */
 
2156
static
 
2157
ibool
 
2158
recv_sys_add_to_parsing_buf(
 
2159
/*========================*/
 
2160
                                        /* out: TRUE if more data added */
 
2161
        byte*           log_block,      /* in: log block */
 
2162
        ib_uint64_t     scanned_lsn)    /* in: lsn of how far we were able
 
2163
                                        to find data in this log block */
 
2164
{
 
2165
        ulint   more_len;
 
2166
        ulint   data_len;
 
2167
        ulint   start_offset;
 
2168
        ulint   end_offset;
 
2169
 
 
2170
        ut_ad(scanned_lsn >= recv_sys->scanned_lsn);
 
2171
 
 
2172
        if (!recv_sys->parse_start_lsn) {
 
2173
                /* Cannot start parsing yet because no start point for
 
2174
                it found */
 
2175
 
 
2176
                return(FALSE);
 
2177
        }
 
2178
 
 
2179
        data_len = log_block_get_data_len(log_block);
 
2180
 
 
2181
        if (recv_sys->parse_start_lsn >= scanned_lsn) {
 
2182
 
 
2183
                return(FALSE);
 
2184
 
 
2185
        } else if (recv_sys->scanned_lsn >= scanned_lsn) {
 
2186
 
 
2187
                return(FALSE);
 
2188
 
 
2189
        } else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) {
 
2190
                more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn);
 
2191
        } else {
 
2192
                more_len = (ulint) (scanned_lsn - recv_sys->scanned_lsn);
 
2193
        }
 
2194
 
 
2195
        if (more_len == 0) {
 
2196
 
 
2197
                return(FALSE);
 
2198
        }
 
2199
 
 
2200
        ut_ad(data_len >= more_len);
 
2201
 
 
2202
        start_offset = data_len - more_len;
 
2203
 
 
2204
        if (start_offset < LOG_BLOCK_HDR_SIZE) {
 
2205
                start_offset = LOG_BLOCK_HDR_SIZE;
 
2206
        }
 
2207
 
 
2208
        end_offset = data_len;
 
2209
 
 
2210
        if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
 
2211
                end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
 
2212
        }
 
2213
 
 
2214
        ut_ad(start_offset <= end_offset);
 
2215
 
 
2216
        if (start_offset < end_offset) {
 
2217
                ut_memcpy(recv_sys->buf + recv_sys->len,
 
2218
                          log_block + start_offset, end_offset - start_offset);
 
2219
 
 
2220
                recv_sys->len += end_offset - start_offset;
 
2221
 
 
2222
                ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
 
2223
        }
 
2224
 
 
2225
        return(TRUE);
 
2226
}
 
2227
 
 
2228
/***********************************************************
 
2229
Moves the parsing buffer data left to the buffer start. */
 
2230
static
 
2231
void
 
2232
recv_sys_justify_left_parsing_buf(void)
 
2233
/*===================================*/
 
2234
{
 
2235
        ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
 
2236
                   recv_sys->len - recv_sys->recovered_offset);
 
2237
 
 
2238
        recv_sys->len -= recv_sys->recovered_offset;
 
2239
 
 
2240
        recv_sys->recovered_offset = 0;
 
2241
}
 
2242
 
 
2243
/***********************************************************
 
2244
Scans log from a buffer and stores new log data to the parsing buffer. Parses
 
2245
and hashes the log records if new data found. */
 
2246
UNIV_INTERN
 
2247
ibool
 
2248
recv_scan_log_recs(
 
2249
/*===============*/
 
2250
                                        /* out: TRUE if limit_lsn has been
 
2251
                                        reached, or not able to scan any more
 
2252
                                        in this log group */
 
2253
        ibool           apply_automatically,/* in: TRUE if we want this
 
2254
                                        function to apply log records
 
2255
                                        automatically when the hash table
 
2256
                                        becomes full; in the hot backup tool
 
2257
                                        the tool does the applying, not this
 
2258
                                        function */
 
2259
        ulint           available_memory,/* in: we let the hash table of recs
 
2260
                                        to grow to this size, at the maximum */
 
2261
        ibool           store_to_hash,  /* in: TRUE if the records should be
 
2262
                                        stored to the hash table; this is set
 
2263
                                        to FALSE if just debug checking is
 
2264
                                        needed */
 
2265
        byte*           buf,            /* in: buffer containing a log segment
 
2266
                                        or garbage */
 
2267
        ulint           len,            /* in: buffer length */
 
2268
        ib_uint64_t     start_lsn,      /* in: buffer start lsn */
 
2269
        ib_uint64_t*    contiguous_lsn, /* in/out: it is known that all log
 
2270
                                        groups contain contiguous log data up
 
2271
                                        to this lsn */
 
2272
        ib_uint64_t*    group_scanned_lsn)/* out: scanning succeeded up to
 
2273
                                        this lsn */
 
2274
{
 
2275
        byte*           log_block;
 
2276
        ulint           no;
 
2277
        ib_uint64_t     scanned_lsn;
 
2278
        ibool           finished;
 
2279
        ulint           data_len;
 
2280
        ibool           more_data;
 
2281
 
 
2282
        ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 
2283
        ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
 
2284
        ut_ad(len > 0);
 
2285
        ut_a(apply_automatically <= TRUE);
 
2286
        ut_a(store_to_hash <= TRUE);
 
2287
 
 
2288
        finished = FALSE;
 
2289
 
 
2290
        log_block = buf;
 
2291
        scanned_lsn = start_lsn;
 
2292
        more_data = FALSE;
 
2293
 
 
2294
        while (log_block < buf + len && !finished) {
 
2295
 
 
2296
                no = log_block_get_hdr_no(log_block);
 
2297
                /*
 
2298
                fprintf(stderr, "Log block header no %lu\n", no);
 
2299
 
 
2300
                fprintf(stderr, "Scanned lsn no %lu\n",
 
2301
                log_block_convert_lsn_to_no(scanned_lsn));
 
2302
                */
 
2303
                if (no != log_block_convert_lsn_to_no(scanned_lsn)
 
2304
                    || !log_block_checksum_is_ok_or_old_format(log_block)) {
 
2305
 
 
2306
                        if (no == log_block_convert_lsn_to_no(scanned_lsn)
 
2307
                            && !log_block_checksum_is_ok_or_old_format(
 
2308
                                    log_block)) {
 
2309
                                fprintf(stderr,
 
2310
                                        "InnoDB: Log block no %lu at"
 
2311
                                        " lsn %"PRIu64" has\n"
 
2312
                                        "InnoDB: ok header, but checksum field"
 
2313
                                        " contains %lu, should be %lu\n",
 
2314
                                        (ulong) no,
 
2315
                                        scanned_lsn,
 
2316
                                        (ulong) log_block_get_checksum(
 
2317
                                                log_block),
 
2318
                                        (ulong) log_block_calc_checksum(
 
2319
                                                log_block));
 
2320
                        }
 
2321
 
 
2322
                        /* Garbage or an incompletely written log block */
 
2323
 
 
2324
                        finished = TRUE;
 
2325
 
 
2326
                        break;
 
2327
                }
 
2328
 
 
2329
                if (log_block_get_flush_bit(log_block)) {
 
2330
                        /* This block was a start of a log flush operation:
 
2331
                        we know that the previous flush operation must have
 
2332
                        been completed for all log groups before this block
 
2333
                        can have been flushed to any of the groups. Therefore,
 
2334
                        we know that log data is contiguous up to scanned_lsn
 
2335
                        in all non-corrupt log groups. */
 
2336
 
 
2337
                        if (scanned_lsn > *contiguous_lsn) {
 
2338
                                *contiguous_lsn = scanned_lsn;
 
2339
                        }
 
2340
                }
 
2341
 
 
2342
                data_len = log_block_get_data_len(log_block);
 
2343
 
 
2344
                if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
 
2345
                    && scanned_lsn + data_len > recv_sys->scanned_lsn
 
2346
                    && (recv_sys->scanned_checkpoint_no > 0)
 
2347
                    && (log_block_get_checkpoint_no(log_block)
 
2348
                        < recv_sys->scanned_checkpoint_no)
 
2349
                    && (recv_sys->scanned_checkpoint_no
 
2350
                        - log_block_get_checkpoint_no(log_block)
 
2351
                        > 0x80000000UL)) {
 
2352
 
 
2353
                        /* Garbage from a log buffer flush which was made
 
2354
                        before the most recent database recovery */
 
2355
 
 
2356
                        finished = TRUE;
 
2357
#ifdef UNIV_LOG_DEBUG
 
2358
                        /* This is not really an error, but currently
 
2359
                        we stop here in the debug version: */
 
2360
 
 
2361
                        ut_error;
 
2362
#endif
 
2363
                        break;
 
2364
                }
 
2365
 
 
2366
                if (!recv_sys->parse_start_lsn
 
2367
                    && (log_block_get_first_rec_group(log_block) > 0)) {
 
2368
 
 
2369
                        /* We found a point from which to start the parsing
 
2370
                        of log records */
 
2371
 
 
2372
                        recv_sys->parse_start_lsn = scanned_lsn
 
2373
                                + log_block_get_first_rec_group(log_block);
 
2374
                        recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
 
2375
                        recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
 
2376
                }
 
2377
 
 
2378
                scanned_lsn += data_len;
 
2379
 
 
2380
                if (scanned_lsn > recv_sys->scanned_lsn) {
 
2381
 
 
2382
                        /* We have found more entries. If this scan is
 
2383
                        of startup type, we must initiate crash recovery
 
2384
                        environment before parsing these log records. */
 
2385
 
 
2386
                        if (recv_log_scan_is_startup_type
 
2387
                            && !recv_needed_recovery) {
 
2388
 
 
2389
                                fprintf(stderr,
 
2390
                                        "InnoDB: Log scan progressed"
 
2391
                                        " past the checkpoint lsn %"PRIu64"\n",
 
2392
                                        recv_sys->scanned_lsn);
 
2393
                                recv_init_crash_recovery();
 
2394
                        }
 
2395
 
 
2396
                        /* We were able to find more log data: add it to the
 
2397
                        parsing buffer if parse_start_lsn is already
 
2398
                        non-zero */
 
2399
 
 
2400
                        if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
 
2401
                            >= RECV_PARSING_BUF_SIZE) {
 
2402
                                fprintf(stderr,
 
2403
                                        "InnoDB: Error: log parsing"
 
2404
                                        " buffer overflow."
 
2405
                                        " Recovery may have failed!\n");
 
2406
 
 
2407
                                recv_sys->found_corrupt_log = TRUE;
 
2408
 
 
2409
                        } else if (!recv_sys->found_corrupt_log) {
 
2410
                                more_data = recv_sys_add_to_parsing_buf(
 
2411
                                        log_block, scanned_lsn);
 
2412
                        }
 
2413
 
 
2414
                        recv_sys->scanned_lsn = scanned_lsn;
 
2415
                        recv_sys->scanned_checkpoint_no
 
2416
                                = log_block_get_checkpoint_no(log_block);
 
2417
                }
 
2418
 
 
2419
                if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
 
2420
                        /* Log data for this group ends here */
 
2421
 
 
2422
                        finished = TRUE;
 
2423
                } else {
 
2424
                        log_block += OS_FILE_LOG_BLOCK_SIZE;
 
2425
                }
 
2426
        }
 
2427
 
 
2428
        *group_scanned_lsn = scanned_lsn;
 
2429
 
 
2430
        if (recv_needed_recovery
 
2431
            || (recv_is_from_backup && !recv_is_making_a_backup)) {
 
2432
                recv_scan_print_counter++;
 
2433
 
 
2434
                if (finished || (recv_scan_print_counter % 80 == 0)) {
 
2435
 
 
2436
                        fprintf(stderr,
 
2437
                                "InnoDB: Doing recovery: scanned up to"
 
2438
                                " log sequence number %"PRIu64"\n",
 
2439
                                *group_scanned_lsn);
 
2440
                }
 
2441
        }
 
2442
 
 
2443
        if (more_data && !recv_sys->found_corrupt_log) {
 
2444
                /* Try to parse more log records */
 
2445
 
 
2446
                recv_parse_log_recs(store_to_hash);
 
2447
 
 
2448
                if (store_to_hash && mem_heap_get_size(recv_sys->heap)
 
2449
                    > available_memory
 
2450
                    && apply_automatically) {
 
2451
 
 
2452
                        /* Hash table of log records has grown too big:
 
2453
                        empty it; FALSE means no ibuf operations
 
2454
                        allowed, as we cannot add new records to the
 
2455
                        log yet: they would be produced by ibuf
 
2456
                        operations */
 
2457
 
 
2458
                        recv_apply_hashed_log_recs(FALSE);
 
2459
                }
 
2460
 
 
2461
                if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
 
2462
                        /* Move parsing buffer data to the buffer start */
 
2463
 
 
2464
                        recv_sys_justify_left_parsing_buf();
 
2465
                }
 
2466
        }
 
2467
 
 
2468
        return(finished);
 
2469
}
 
2470
 
 
2471
/***********************************************************
 
2472
Scans log from a buffer and stores new log data to the parsing buffer. Parses
 
2473
and hashes the log records if new data found. */
 
2474
static
 
2475
void
 
2476
recv_group_scan_log_recs(
 
2477
/*=====================*/
 
2478
        log_group_t*    group,          /* in: log group */
 
2479
        ib_uint64_t*    contiguous_lsn, /* in/out: it is known that all log
 
2480
                                        groups contain contiguous log data up
 
2481
                                        to this lsn */
 
2482
        ib_uint64_t*    group_scanned_lsn)/* out: scanning succeeded up to
 
2483
                                        this lsn */
 
2484
{
 
2485
        ibool           finished;
 
2486
        ib_uint64_t     start_lsn;
 
2487
        ib_uint64_t     end_lsn;
 
2488
 
 
2489
        finished = FALSE;
 
2490
 
 
2491
        start_lsn = *contiguous_lsn;
 
2492
 
 
2493
        while (!finished) {
 
2494
                end_lsn = start_lsn + RECV_SCAN_SIZE;
 
2495
 
 
2496
                log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
 
2497
                                       group, start_lsn, end_lsn);
 
2498
 
 
2499
                finished = recv_scan_log_recs(
 
2500
                        TRUE, (buf_pool->curr_size - recv_n_pool_free_frames)
 
2501
                        * UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE,
 
2502
                        start_lsn, contiguous_lsn, group_scanned_lsn);
 
2503
                start_lsn = end_lsn;
 
2504
        }
 
2505
 
 
2506
#ifdef UNIV_DEBUG
 
2507
        if (log_debug_writes) {
 
2508
                fprintf(stderr,
 
2509
                        "InnoDB: Scanned group %lu up to"
 
2510
                        " log sequence number %"PRIu64"\n",
 
2511
                        (ulong) group->id,
 
2512
                        *group_scanned_lsn);
 
2513
        }
 
2514
#endif /* UNIV_DEBUG */
 
2515
}
 
2516
 
 
2517
/***********************************************************
 
2518
Initialize crash recovery environment. Can be called iff
 
2519
recv_needed_recovery == FALSE. */
 
2520
static
 
2521
void
 
2522
recv_init_crash_recovery(void)
 
2523
/*==========================*/
 
2524
{
 
2525
        ut_a(!recv_needed_recovery);
 
2526
 
 
2527
        recv_needed_recovery = TRUE;
 
2528
 
 
2529
        ut_print_timestamp(stderr);
 
2530
 
 
2531
        fprintf(stderr,
 
2532
                "  InnoDB: Database was not"
 
2533
                " shut down normally!\n"
 
2534
                "InnoDB: Starting crash recovery.\n");
 
2535
 
 
2536
        fprintf(stderr,
 
2537
                "InnoDB: Reading tablespace information"
 
2538
                " from the .ibd files...\n");
 
2539
 
 
2540
        fil_load_single_table_tablespaces();
 
2541
 
 
2542
        /* If we are using the doublewrite method, we will
 
2543
        check if there are half-written pages in data files,
 
2544
        and restore them from the doublewrite buffer if
 
2545
        possible */
 
2546
 
 
2547
        if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
 
2548
 
 
2549
                fprintf(stderr,
 
2550
                        "InnoDB: Restoring possible"
 
2551
                        " half-written data pages from"
 
2552
                        " the doublewrite\n"
 
2553
                        "InnoDB: buffer...\n");
 
2554
                trx_sys_doublewrite_init_or_restore_pages(TRUE);
 
2555
        }
 
2556
}
 
2557
 
 
2558
/************************************************************
 
2559
Recovers from a checkpoint. When this function returns, the database is able
 
2560
to start processing of new user transactions, but the function
 
2561
recv_recovery_from_checkpoint_finish should be called later to complete
 
2562
the recovery and free the resources used in it. */
 
2563
UNIV_INTERN
 
2564
ulint
 
2565
recv_recovery_from_checkpoint_start_func(
 
2566
/*=====================================*/
 
2567
                                        /* out: error code or DB_SUCCESS */
 
2568
#ifdef UNIV_LOG_ARCHIVE
 
2569
        ulint           type,           /* in: LOG_CHECKPOINT or LOG_ARCHIVE */
 
2570
        ib_uint64_t     limit_lsn,      /* in: recover up to this lsn
 
2571
                                        if possible */
 
2572
#endif /* UNIV_LOG_ARCHIVE */
 
2573
        ib_uint64_t     min_flushed_lsn,/* in: min flushed lsn from
 
2574
                                        data files */
 
2575
        ib_uint64_t     max_flushed_lsn)/* in: max flushed lsn from
 
2576
                                        data files */
 
2577
{
 
2578
        log_group_t*    group;
 
2579
        log_group_t*    max_cp_group;
 
2580
        log_group_t*    up_to_date_group;
 
2581
        ulint           max_cp_field;
 
2582
        ib_uint64_t     checkpoint_lsn;
 
2583
        ib_uint64_t     checkpoint_no;
 
2584
        ib_uint64_t     old_scanned_lsn;
 
2585
        ib_uint64_t     group_scanned_lsn= 0;
 
2586
        ib_uint64_t     contiguous_lsn;
 
2587
        ib_uint64_t     archived_lsn;
 
2588
        byte*           buf;
 
2589
        byte            log_hdr_buf[LOG_FILE_HDR_SIZE];
 
2590
        ulint           err;
 
2591
 
 
2592
#ifdef UNIV_LOG_ARCHIVE
 
2593
        ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
 
2594
# define TYPE_CHECKPOINT        (type == LOG_CHECKPOINT)
 
2595
# define LIMIT_LSN              limit_lsn
 
2596
#else /* UNIV_LOG_ARCHIVE */
 
2597
# define TYPE_CHECKPOINT        1
 
2598
# define LIMIT_LSN              IB_ULONGLONG_MAX
 
2599
#endif /* UNIV_LOG_ARCHIVE */
 
2600
 
 
2601
        if (TYPE_CHECKPOINT) {
 
2602
                recv_sys_create();
 
2603
                recv_sys_init(FALSE, buf_pool_get_curr_size());
 
2604
        }
 
2605
 
 
2606
        if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
 
2607
                fprintf(stderr,
 
2608
                        "InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n");
 
2609
                fprintf(stderr,
 
2610
                        "InnoDB: Skipping log redo\n");
 
2611
 
 
2612
                return(DB_SUCCESS);
 
2613
        }
 
2614
 
 
2615
        recv_recovery_on = TRUE;
 
2616
 
 
2617
        recv_sys->limit_lsn = LIMIT_LSN;
 
2618
 
 
2619
        mutex_enter(&(log_sys->mutex));
 
2620
 
 
2621
        /* Look for the latest checkpoint from any of the log groups */
 
2622
 
 
2623
        err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
 
2624
 
 
2625
        if (err != DB_SUCCESS) {
 
2626
 
 
2627
                mutex_exit(&(log_sys->mutex));
 
2628
 
 
2629
                return(err);
 
2630
        }
 
2631
 
 
2632
        log_group_read_checkpoint_info(max_cp_group, max_cp_field);
 
2633
 
 
2634
        buf = log_sys->checkpoint_buf;
 
2635
 
 
2636
        checkpoint_lsn = mach_read_ull(buf + LOG_CHECKPOINT_LSN);
 
2637
        checkpoint_no = mach_read_ull(buf + LOG_CHECKPOINT_NO);
 
2638
        archived_lsn = mach_read_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
 
2639
 
 
2640
        /* Read the first log file header to print a note if this is
 
2641
        a recovery from a restored InnoDB Hot Backup */
 
2642
 
 
2643
        fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id, 0,
 
2644
               0, 0, LOG_FILE_HDR_SIZE,
 
2645
               log_hdr_buf, max_cp_group);
 
2646
 
 
2647
        if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 
2648
                           (byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
 
2649
                /* This log file was created by ibbackup --restore: print
 
2650
                a note to the user about it */
 
2651
 
 
2652
                fprintf(stderr,
 
2653
                        "InnoDB: The log file was created by"
 
2654
                        " ibbackup --apply-log at\n"
 
2655
                        "InnoDB: %s\n",
 
2656
                        log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
 
2657
                fprintf(stderr,
 
2658
                        "InnoDB: NOTE: the following crash recovery"
 
2659
                        " is part of a normal restore.\n");
 
2660
 
 
2661
                /* Wipe over the label now */
 
2662
 
 
2663
                memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 
2664
                       ' ', 4);
 
2665
                /* Write to the log file to wipe over the label */
 
2666
                fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
 
2667
                       max_cp_group->space_id, 0,
 
2668
                       0, 0, OS_FILE_LOG_BLOCK_SIZE,
 
2669
                       log_hdr_buf, max_cp_group);
 
2670
        }
 
2671
 
 
2672
#ifdef UNIV_LOG_ARCHIVE
 
2673
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
2674
 
 
2675
        while (group) {
 
2676
                log_checkpoint_get_nth_group_info(buf, group->id,
 
2677
                                                  &(group->archived_file_no),
 
2678
                                                  &(group->archived_offset));
 
2679
 
 
2680
                group = UT_LIST_GET_NEXT(log_groups, group);
 
2681
        }
 
2682
#endif /* UNIV_LOG_ARCHIVE */
 
2683
 
 
2684
        if (TYPE_CHECKPOINT) {
 
2685
                /* Start reading the log groups from the checkpoint lsn up. The
 
2686
                variable contiguous_lsn contains an lsn up to which the log is
 
2687
                known to be contiguously written to all log groups. */
 
2688
 
 
2689
                recv_sys->parse_start_lsn = checkpoint_lsn;
 
2690
                recv_sys->scanned_lsn = checkpoint_lsn;
 
2691
                recv_sys->scanned_checkpoint_no = 0;
 
2692
                recv_sys->recovered_lsn = checkpoint_lsn;
 
2693
 
 
2694
                srv_start_lsn = checkpoint_lsn;
 
2695
        }
 
2696
 
 
2697
        contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
 
2698
                                              OS_FILE_LOG_BLOCK_SIZE);
 
2699
        if (TYPE_CHECKPOINT) {
 
2700
                up_to_date_group = max_cp_group;
 
2701
#ifdef UNIV_LOG_ARCHIVE
 
2702
        } else {
 
2703
                ulint   capacity;
 
2704
 
 
2705
                /* Try to recover the remaining part from logs: first from
 
2706
                the logs of the archived group */
 
2707
 
 
2708
                group = recv_sys->archive_group;
 
2709
                capacity = log_group_get_capacity(group);
 
2710
 
 
2711
                if (recv_sys->scanned_lsn > checkpoint_lsn + capacity
 
2712
                    || checkpoint_lsn > recv_sys->scanned_lsn + capacity) {
 
2713
 
 
2714
                        mutex_exit(&(log_sys->mutex));
 
2715
 
 
2716
                        /* The group does not contain enough log: probably
 
2717
                        an archived log file was missing or corrupt */
 
2718
 
 
2719
                        return(DB_ERROR);
 
2720
                }
 
2721
 
 
2722
                recv_group_scan_log_recs(group, &contiguous_lsn,
 
2723
                                         &group_scanned_lsn);
 
2724
                if (recv_sys->scanned_lsn < checkpoint_lsn) {
 
2725
 
 
2726
                        mutex_exit(&(log_sys->mutex));
 
2727
 
 
2728
                        /* The group did not contain enough log: an archived
 
2729
                        log file was missing or invalid, or the log group
 
2730
                        was corrupt */
 
2731
 
 
2732
                        return(DB_ERROR);
 
2733
                }
 
2734
 
 
2735
                group->scanned_lsn = group_scanned_lsn;
 
2736
                up_to_date_group = group;
 
2737
#endif /* UNIV_LOG_ARCHIVE */
 
2738
        }
 
2739
 
 
2740
        ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
 
2741
 
 
2742
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
2743
 
 
2744
#ifdef UNIV_LOG_ARCHIVE
 
2745
        if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
 
2746
                group = UT_LIST_GET_NEXT(log_groups, group);
 
2747
        }
 
2748
#endif /* UNIV_LOG_ARCHIVE */
 
2749
 
 
2750
        /* Set the flag to publish that we are doing startup scan. */
 
2751
        recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
 
2752
        while (group) {
 
2753
                old_scanned_lsn = recv_sys->scanned_lsn;
 
2754
 
 
2755
                recv_group_scan_log_recs(group, &contiguous_lsn,
 
2756
                                         &group_scanned_lsn);
 
2757
                group->scanned_lsn = group_scanned_lsn;
 
2758
 
 
2759
                if (old_scanned_lsn < group_scanned_lsn) {
 
2760
                        /* We found a more up-to-date group */
 
2761
 
 
2762
                        up_to_date_group = group;
 
2763
                }
 
2764
 
 
2765
#ifdef UNIV_LOG_ARCHIVE
 
2766
                if ((type == LOG_ARCHIVE)
 
2767
                    && (group == recv_sys->archive_group)) {
 
2768
                        group = UT_LIST_GET_NEXT(log_groups, group);
 
2769
                }
 
2770
#endif /* UNIV_LOG_ARCHIVE */
 
2771
 
 
2772
                group = UT_LIST_GET_NEXT(log_groups, group);
 
2773
        }
 
2774
 
 
2775
        /* Done with startup scan. Clear the flag. */
 
2776
        recv_log_scan_is_startup_type = FALSE;
 
2777
        if (TYPE_CHECKPOINT) {
 
2778
                /* NOTE: we always do a 'recovery' at startup, but only if
 
2779
                there is something wrong we will print a message to the
 
2780
                user about recovery: */
 
2781
 
 
2782
                if (checkpoint_lsn != max_flushed_lsn
 
2783
                    || checkpoint_lsn != min_flushed_lsn) {
 
2784
 
 
2785
                        if (checkpoint_lsn < max_flushed_lsn) {
 
2786
                                fprintf(stderr,
 
2787
                                        "InnoDB: #########################"
 
2788
                                        "#################################\n"
 
2789
                                        "InnoDB:                          "
 
2790
                                        "WARNING!\n"
 
2791
                                        "InnoDB: The log sequence number"
 
2792
                                        " in ibdata files is higher\n"
 
2793
                                        "InnoDB: than the log sequence number"
 
2794
                                        " in the ib_logfiles! Are you sure\n"
 
2795
                                        "InnoDB: you are using the right"
 
2796
                                        " ib_logfiles to start up"
 
2797
                                        " the database?\n"
 
2798
                                        "InnoDB: Log sequence number in"
 
2799
                                        " ib_logfiles is %"PRIu64", log\n"
 
2800
                                        "InnoDB: sequence numbers stamped"
 
2801
                                        " to ibdata file headers are between\n"
 
2802
                                        "InnoDB: %"PRIu64" and %"PRIu64".\n"
 
2803
                                        "InnoDB: #########################"
 
2804
                                        "#################################\n",
 
2805
                                        checkpoint_lsn,
 
2806
                                        min_flushed_lsn,
 
2807
                                        max_flushed_lsn);
 
2808
                        }
 
2809
 
 
2810
                        if (!recv_needed_recovery) {
 
2811
                                fprintf(stderr,
 
2812
                                        "InnoDB: The log sequence number"
 
2813
                                        " in ibdata files does not match\n"
 
2814
                                        "InnoDB: the log sequence number"
 
2815
                                        " in the ib_logfiles!\n");
 
2816
                                recv_init_crash_recovery();
 
2817
                        }
 
2818
                }
 
2819
 
 
2820
                if (!recv_needed_recovery) {
 
2821
                        /* Init the doublewrite buffer memory structure */
 
2822
                        trx_sys_doublewrite_init_or_restore_pages(FALSE);
 
2823
                }
 
2824
        }
 
2825
 
 
2826
        /* We currently have only one log group */
 
2827
        if (group_scanned_lsn < checkpoint_lsn) {
 
2828
                ut_print_timestamp(stderr);
 
2829
                fprintf(stderr,
 
2830
                        "  InnoDB: ERROR: We were only able to scan the log"
 
2831
                        " up to\n"
 
2832
                        "InnoDB: %"PRIu64", but a checkpoint was at %"PRIu64".\n"
 
2833
                        "InnoDB: It is possible that"
 
2834
                        " the database is now corrupt!\n",
 
2835
                        group_scanned_lsn,
 
2836
                        checkpoint_lsn);
 
2837
        }
 
2838
 
 
2839
        if (group_scanned_lsn < recv_max_page_lsn) {
 
2840
                ut_print_timestamp(stderr);
 
2841
                fprintf(stderr,
 
2842
                        "  InnoDB: ERROR: We were only able to scan the log"
 
2843
                        " up to %"PRIu64"\n"
 
2844
                        "InnoDB: but a database page a had an lsn %"PRIu64"."
 
2845
                        " It is possible that the\n"
 
2846
                        "InnoDB: database is now corrupt!\n",
 
2847
                        group_scanned_lsn,
 
2848
                        recv_max_page_lsn);
 
2849
        }
 
2850
 
 
2851
        if (recv_sys->recovered_lsn < checkpoint_lsn) {
 
2852
 
 
2853
                mutex_exit(&(log_sys->mutex));
 
2854
 
 
2855
                if (recv_sys->recovered_lsn >= LIMIT_LSN) {
 
2856
 
 
2857
                        return(DB_SUCCESS);
 
2858
                }
 
2859
 
 
2860
                ut_error;
 
2861
 
 
2862
                return(DB_ERROR);
 
2863
        }
 
2864
 
 
2865
        /* Synchronize the uncorrupted log groups to the most up-to-date log
 
2866
        group; we also copy checkpoint info to groups */
 
2867
 
 
2868
        log_sys->next_checkpoint_lsn = checkpoint_lsn;
 
2869
        log_sys->next_checkpoint_no = checkpoint_no + 1;
 
2870
 
 
2871
#ifdef UNIV_LOG_ARCHIVE
 
2872
        log_sys->archived_lsn = archived_lsn;
 
2873
#endif /* UNIV_LOG_ARCHIVE */
 
2874
 
 
2875
        recv_synchronize_groups(up_to_date_group);
 
2876
 
 
2877
        if (!recv_needed_recovery) {
 
2878
                ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
 
2879
        } else {
 
2880
                srv_start_lsn = recv_sys->recovered_lsn;
 
2881
        }
 
2882
 
 
2883
        log_sys->lsn = recv_sys->recovered_lsn;
 
2884
 
 
2885
        ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
 
2886
 
 
2887
        log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
 
2888
        log_sys->buf_next_to_write = log_sys->buf_free;
 
2889
        log_sys->written_to_some_lsn = log_sys->lsn;
 
2890
        log_sys->written_to_all_lsn = log_sys->lsn;
 
2891
 
 
2892
        log_sys->last_checkpoint_lsn = checkpoint_lsn;
 
2893
 
 
2894
        log_sys->next_checkpoint_no = checkpoint_no + 1;
 
2895
 
 
2896
#ifdef UNIV_LOG_ARCHIVE
 
2897
        if (archived_lsn == IB_ULONGLONG_MAX) {
 
2898
 
 
2899
                log_sys->archiving_state = LOG_ARCH_OFF;
 
2900
        }
 
2901
#endif /* UNIV_LOG_ARCHIVE */
 
2902
 
 
2903
        mutex_enter(&(recv_sys->mutex));
 
2904
 
 
2905
        recv_sys->apply_log_recs = TRUE;
 
2906
 
 
2907
        mutex_exit(&(recv_sys->mutex));
 
2908
 
 
2909
        mutex_exit(&(log_sys->mutex));
 
2910
 
 
2911
        recv_lsn_checks_on = TRUE;
 
2912
 
 
2913
        /* The database is now ready to start almost normal processing of user
 
2914
        transactions: transaction rollbacks and the application of the log
 
2915
        records in the hash table can be run in background. */
 
2916
 
 
2917
        return(DB_SUCCESS);
 
2918
 
 
2919
#undef TYPE_CHECKPOINT
 
2920
#undef LIMIT_LSN
 
2921
}
 
2922
 
 
2923
/************************************************************
 
2924
Completes recovery from a checkpoint. */
 
2925
UNIV_INTERN
 
2926
void
 
2927
recv_recovery_from_checkpoint_finish(void)
 
2928
/*======================================*/
 
2929
{
 
2930
        int             i;
 
2931
 
 
2932
        /* Apply the hashed log records to the respective file pages */
 
2933
 
 
2934
        if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
 
2935
 
 
2936
                recv_apply_hashed_log_recs(TRUE);
 
2937
        }
 
2938
 
 
2939
#ifdef UNIV_DEBUG
 
2940
        if (log_debug_writes) {
 
2941
                fprintf(stderr,
 
2942
                        "InnoDB: Log records applied to the database\n");
 
2943
        }
 
2944
#endif /* UNIV_DEBUG */
 
2945
 
 
2946
        if (recv_needed_recovery) {
 
2947
                trx_sys_print_mysql_master_log_pos();
 
2948
                trx_sys_print_mysql_binlog_offset();
 
2949
        }
 
2950
 
 
2951
        if (recv_sys->found_corrupt_log) {
 
2952
 
 
2953
                fprintf(stderr,
 
2954
                        "InnoDB: WARNING: the log file may have been"
 
2955
                        " corrupt and it\n"
 
2956
                        "InnoDB: is possible that the log scan or parsing"
 
2957
                        " did not proceed\n"
 
2958
                        "InnoDB: far enough in recovery. Please run"
 
2959
                        " CHECK TABLE\n"
 
2960
                        "InnoDB: on your InnoDB tables to check that"
 
2961
                        " they are ok!\n"
 
2962
                        "InnoDB: It may be safest to recover your"
 
2963
                        " InnoDB database from\n"
 
2964
                        "InnoDB: a backup!\n");
 
2965
        }
 
2966
 
 
2967
        /* Free the resources of the recovery system */
 
2968
 
 
2969
        recv_recovery_on = FALSE;
 
2970
 
 
2971
#ifndef UNIV_LOG_DEBUG
 
2972
        recv_sys_free();
 
2973
#endif
 
2974
 
 
2975
        /* Drop partially created indexes. */
 
2976
        row_merge_drop_temp_indexes();
 
2977
 
 
2978
#ifdef UNIV_SYNC_DEBUG
 
2979
        /* Wait for a while so that created threads have time to suspend
 
2980
        themselves before we switch the latching order checks on */
 
2981
        os_thread_sleep(1000000);
 
2982
 
 
2983
        /* Switch latching order checks on in sync0sync.c */
 
2984
        sync_order_checks_on = TRUE;
 
2985
#endif
 
2986
        if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
 
2987
                /* Rollback the uncommitted transactions which have no user
 
2988
                session */
 
2989
 
 
2990
                os_thread_create(trx_rollback_or_clean_all_recovered,
 
2991
                                 (void *)&i, NULL);
 
2992
        }
 
2993
}
 
2994
 
 
2995
/**********************************************************
 
2996
Resets the logs. The contents of log files will be lost! */
 
2997
UNIV_INTERN
 
2998
void
 
2999
recv_reset_logs(
 
3000
/*============*/
 
3001
        ib_uint64_t     lsn,            /* in: reset to this lsn
 
3002
                                        rounded up to be divisible by
 
3003
                                        OS_FILE_LOG_BLOCK_SIZE, after
 
3004
                                        which we add
 
3005
                                        LOG_BLOCK_HDR_SIZE */
 
3006
#ifdef UNIV_LOG_ARCHIVE
 
3007
        ulint           arch_log_no,    /* in: next archived log file number */
 
3008
#endif /* UNIV_LOG_ARCHIVE */
 
3009
        ibool           new_logs_created)/* in: TRUE if resetting logs
 
3010
                                        is done at the log creation;
 
3011
                                        FALSE if it is done after
 
3012
                                        archive recovery */
 
3013
{
 
3014
        log_group_t*    group;
 
3015
 
 
3016
        ut_ad(mutex_own(&(log_sys->mutex)));
 
3017
 
 
3018
        log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
 
3019
 
 
3020
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
3021
 
 
3022
        while (group) {
 
3023
                group->lsn = log_sys->lsn;
 
3024
                group->lsn_offset = LOG_FILE_HDR_SIZE;
 
3025
#ifdef UNIV_LOG_ARCHIVE
 
3026
                group->archived_file_no = arch_log_no;
 
3027
                group->archived_offset = 0;
 
3028
#endif /* UNIV_LOG_ARCHIVE */
 
3029
 
 
3030
                if (!new_logs_created) {
 
3031
                        recv_truncate_group(group, group->lsn, group->lsn,
 
3032
                                            group->lsn, group->lsn);
 
3033
                }
 
3034
 
 
3035
                group = UT_LIST_GET_NEXT(log_groups, group);
 
3036
        }
 
3037
 
 
3038
        log_sys->buf_next_to_write = 0;
 
3039
        log_sys->written_to_some_lsn = log_sys->lsn;
 
3040
        log_sys->written_to_all_lsn = log_sys->lsn;
 
3041
 
 
3042
        log_sys->next_checkpoint_no = 0;
 
3043
        log_sys->last_checkpoint_lsn = 0;
 
3044
 
 
3045
#ifdef UNIV_LOG_ARCHIVE
 
3046
        log_sys->archived_lsn = log_sys->lsn;
 
3047
#endif /* UNIV_LOG_ARCHIVE */
 
3048
 
 
3049
        log_block_init(log_sys->buf, log_sys->lsn);
 
3050
        log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
 
3051
 
 
3052
        log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
 
3053
        log_sys->lsn += LOG_BLOCK_HDR_SIZE;
 
3054
 
 
3055
        mutex_exit(&(log_sys->mutex));
 
3056
 
 
3057
        /* Reset the checkpoint fields in logs */
 
3058
 
 
3059
        log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
3060
        log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
3061
 
 
3062
        mutex_enter(&(log_sys->mutex));
 
3063
}
 
3064
 
 
3065
#ifdef UNIV_HOTBACKUP
 
3066
/**********************************************************
 
3067
Creates new log files after a backup has been restored. */
 
3068
UNIV_INTERN
 
3069
void
 
3070
recv_reset_log_files_for_backup(
 
3071
/*============================*/
 
3072
        const char*     log_dir,        /* in: log file directory path */
 
3073
        ulint           n_log_files,    /* in: number of log files */
 
3074
        ulint           log_file_size,  /* in: log file size */
 
3075
        ib_uint64_t     lsn)            /* in: new start lsn, must be
 
3076
                                        divisible by OS_FILE_LOG_BLOCK_SIZE */
 
3077
{
 
3078
        os_file_t       log_file;
 
3079
        ibool           success;
 
3080
        byte*           buf;
 
3081
        ulint           i;
 
3082
        ulint           log_dir_len;
 
3083
        char            name[5000];
 
3084
        static const char ib_logfile_basename[] = "ib_logfile";
 
3085
 
 
3086
        log_dir_len = strlen(log_dir);
 
3087
        /* full path name of ib_logfile consists of log dir path + basename
 
3088
        + number. This must fit in the name buffer.
 
3089
        */
 
3090
        ut_a(log_dir_len + strlen(ib_logfile_basename) + 11  < sizeof(name));
 
3091
 
 
3092
        buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 
3093
        memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 
3094
 
 
3095
        for (i = 0; i < n_log_files; i++) {
 
3096
 
 
3097
                sprintf(name, "%s%s%lu", log_dir,
 
3098
                        ib_logfile_basename, (ulong)i);
 
3099
 
 
3100
                log_file = os_file_create_simple(name, OS_FILE_CREATE,
 
3101
                                                 OS_FILE_READ_WRITE, &success);
 
3102
                if (!success) {
 
3103
                        fprintf(stderr,
 
3104
                                "InnoDB: Cannot create %s. Check that"
 
3105
                                " the file does not exist yet.\n", name);
 
3106
 
 
3107
                        exit(1);
 
3108
                }
 
3109
 
 
3110
                fprintf(stderr,
 
3111
                        "Setting log file size to %lu %lu\n",
 
3112
                        (ulong) ut_get_high32(log_file_size),
 
3113
                        (ulong) log_file_size & 0xFFFFFFFFUL);
 
3114
 
 
3115
                success = os_file_set_size(name, log_file,
 
3116
                                           log_file_size & 0xFFFFFFFFUL,
 
3117
                                           ut_get_high32(log_file_size));
 
3118
 
 
3119
                if (!success) {
 
3120
                        fprintf(stderr,
 
3121
                                "InnoDB: Cannot set %s size to %lu %lu\n",
 
3122
                                name, (ulong) ut_get_high32(log_file_size),
 
3123
                                (ulong) (log_file_size & 0xFFFFFFFFUL));
 
3124
                        exit(1);
 
3125
                }
 
3126
 
 
3127
                os_file_flush(log_file);
 
3128
                os_file_close(log_file);
 
3129
        }
 
3130
 
 
3131
        /* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
 
3132
 
 
3133
        log_reset_first_header_and_checkpoint(buf, lsn);
 
3134
 
 
3135
        log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
 
3136
        log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
 
3137
                                      LOG_BLOCK_HDR_SIZE);
 
3138
        sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
 
3139
 
 
3140
        log_file = os_file_create_simple(name, OS_FILE_OPEN,
 
3141
                                         OS_FILE_READ_WRITE, &success);
 
3142
        if (!success) {
 
3143
                fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
 
3144
 
 
3145
                exit(1);
 
3146
        }
 
3147
 
 
3148
        os_file_write(name, log_file, buf, 0, 0,
 
3149
                      LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 
3150
        os_file_flush(log_file);
 
3151
        os_file_close(log_file);
 
3152
 
 
3153
        ut_free(buf);
 
3154
}
 
3155
#endif /* UNIV_HOTBACKUP */
 
3156
 
 
3157
#ifdef UNIV_LOG_ARCHIVE
 
3158
/**********************************************************
 
3159
Reads from the archive of a log group and performs recovery. */
 
3160
static
 
3161
ibool
 
3162
log_group_recover_from_archive_file(
 
3163
/*================================*/
 
3164
                                        /* out: TRUE if no more complete
 
3165
                                        consistent archive files */
 
3166
        log_group_t*    group)          /* in: log group */
 
3167
{
 
3168
        os_file_t       file_handle;
 
3169
        ib_uint64_t     start_lsn;
 
3170
        ib_uint64_t     file_end_lsn;
 
3171
        ib_uint64_t     dummy_lsn;
 
3172
        ib_uint64_t     scanned_lsn;
 
3173
        ulint           len;
 
3174
        ibool           ret;
 
3175
        byte*           buf;
 
3176
        ulint           read_offset;
 
3177
        ulint           file_size;
 
3178
        ulint           file_size_high;
 
3179
        int             input_char;
 
3180
        char            name[10000];
 
3181
 
 
3182
        ut_a(0);
 
3183
 
 
3184
try_open_again:
 
3185
        buf = log_sys->buf;
 
3186
 
 
3187
        /* Add the file to the archive file space; open the file */
 
3188
 
 
3189
        log_archived_file_name_gen(name, group->id, group->archived_file_no);
 
3190
 
 
3191
        file_handle = os_file_create(name, OS_FILE_OPEN,
 
3192
                                     OS_FILE_LOG, OS_FILE_AIO, &ret);
 
3193
 
 
3194
        if (ret == FALSE) {
 
3195
ask_again:
 
3196
                fprintf(stderr,
 
3197
                        "InnoDB: Do you want to copy additional"
 
3198
                        " archived log files\n"
 
3199
                        "InnoDB: to the directory\n");
 
3200
                fprintf(stderr,
 
3201
                        "InnoDB: or were these all the files needed"
 
3202
                        " in recovery?\n");
 
3203
                fprintf(stderr,
 
3204
                        "InnoDB: (Y == copy more files; N == this is all)?");
 
3205
 
 
3206
                input_char = getchar();
 
3207
 
 
3208
                if (input_char == (int) 'N') {
 
3209
 
 
3210
                        return(TRUE);
 
3211
                } else if (input_char == (int) 'Y') {
 
3212
 
 
3213
                        goto try_open_again;
 
3214
                } else {
 
3215
                        goto ask_again;
 
3216
                }
 
3217
        }
 
3218
 
 
3219
        ret = os_file_get_size(file_handle, &file_size, &file_size_high);
 
3220
        ut_a(ret);
 
3221
 
 
3222
        ut_a(file_size_high == 0);
 
3223
 
 
3224
        fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
 
3225
 
 
3226
        ret = os_file_close(file_handle);
 
3227
 
 
3228
        if (file_size < LOG_FILE_HDR_SIZE) {
 
3229
                fprintf(stderr,
 
3230
                        "InnoDB: Archive file header incomplete %s\n", name);
 
3231
 
 
3232
                return(TRUE);
 
3233
        }
 
3234
 
 
3235
        ut_a(ret);
 
3236
 
 
3237
        /* Add the archive file as a node to the space */
 
3238
 
 
3239
        fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
 
3240
                        group->archive_space_id, FALSE);
 
3241
#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
 
3242
# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
 
3243
#endif
 
3244
 
 
3245
        /* Read the archive file header */
 
3246
        fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
 
3247
               LOG_FILE_HDR_SIZE, buf, NULL);
 
3248
 
 
3249
        /* Check if the archive file header is consistent */
 
3250
 
 
3251
        if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
 
3252
            || mach_read_from_4(buf + LOG_FILE_NO)
 
3253
            != group->archived_file_no) {
 
3254
                fprintf(stderr,
 
3255
                        "InnoDB: Archive file header inconsistent %s\n", name);
 
3256
 
 
3257
                return(TRUE);
 
3258
        }
 
3259
 
 
3260
        if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
 
3261
                fprintf(stderr,
 
3262
                        "InnoDB: Archive file not completely written %s\n",
 
3263
                        name);
 
3264
 
 
3265
                return(TRUE);
 
3266
        }
 
3267
 
 
3268
        start_lsn = mach_read_ull(buf + LOG_FILE_START_LSN);
 
3269
        file_end_lsn = mach_read_ull(buf + LOG_FILE_END_LSN);
 
3270
 
 
3271
        if (!recv_sys->scanned_lsn) {
 
3272
 
 
3273
                if (recv_sys->parse_start_lsn < start_lsn) {
 
3274
                        fprintf(stderr,
 
3275
                                "InnoDB: Archive log file %s"
 
3276
                                " starts from too big a lsn\n",
 
3277
                                name);
 
3278
                        return(TRUE);
 
3279
                }
 
3280
 
 
3281
                recv_sys->scanned_lsn = start_lsn;
 
3282
        }
 
3283
 
 
3284
        if (recv_sys->scanned_lsn != start_lsn) {
 
3285
 
 
3286
                fprintf(stderr,
 
3287
                        "InnoDB: Archive log file %s starts from"
 
3288
                        " a wrong lsn\n",
 
3289
                        name);
 
3290
                return(TRUE);
 
3291
        }
 
3292
 
 
3293
        read_offset = LOG_FILE_HDR_SIZE;
 
3294
 
 
3295
        for (;;) {
 
3296
                len = RECV_SCAN_SIZE;
 
3297
 
 
3298
                if (read_offset + len > file_size) {
 
3299
                        len = ut_calc_align_down(file_size - read_offset,
 
3300
                                                 OS_FILE_LOG_BLOCK_SIZE);
 
3301
                }
 
3302
 
 
3303
                if (len == 0) {
 
3304
 
 
3305
                        break;
 
3306
                }
 
3307
 
 
3308
#ifdef UNIV_DEBUG
 
3309
                if (log_debug_writes) {
 
3310
                        fprintf(stderr,
 
3311
                                "InnoDB: Archive read starting at"
 
3312
                                " lsn %"PRIu64", len %lu from file %s\n",
 
3313
                                start_lsn,
 
3314
                                (ulong) len, name);
 
3315
                }
 
3316
#endif /* UNIV_DEBUG */
 
3317
 
 
3318
                fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
 
3319
                       group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
 
3320
                       read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
 
3321
 
 
3322
                ret = recv_scan_log_recs(
 
3323
                        TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
 
3324
                        * UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
 
3325
                        &dummy_lsn, &scanned_lsn);
 
3326
 
 
3327
                if (scanned_lsn == file_end_lsn) {
 
3328
 
 
3329
                        return(FALSE);
 
3330
                }
 
3331
 
 
3332
                if (ret) {
 
3333
                        fprintf(stderr,
 
3334
                                "InnoDB: Archive log file %s"
 
3335
                                " does not scan right\n",
 
3336
                                name);
 
3337
                        return(TRUE);
 
3338
                }
 
3339
 
 
3340
                read_offset += len;
 
3341
                start_lsn += len;
 
3342
 
 
3343
                ut_ad(start_lsn == scanned_lsn);
 
3344
        }
 
3345
 
 
3346
        return(FALSE);
 
3347
}
 
3348
 
 
3349
/************************************************************
 
3350
Recovers from archived log files, and also from log files, if they exist. */
 
3351
UNIV_INTERN
 
3352
ulint
 
3353
recv_recovery_from_archive_start(
 
3354
/*=============================*/
 
3355
                                        /* out: error code or DB_SUCCESS */
 
3356
        ib_uint64_t     min_flushed_lsn,/* in: min flushed lsn field from the
 
3357
                                        data files */
 
3358
        ib_uint64_t     limit_lsn,      /* in: recover up to this lsn if
 
3359
                                        possible */
 
3360
        ulint           first_log_no)   /* in: number of the first archived
 
3361
                                        log file to use in the recovery; the
 
3362
                                        file will be searched from
 
3363
                                        INNOBASE_LOG_ARCH_DIR specified in
 
3364
                                        server config file */
 
3365
{
 
3366
        log_group_t*    group;
 
3367
        ulint           group_id;
 
3368
        ulint           trunc_len;
 
3369
        ibool           ret;
 
3370
        ulint           err;
 
3371
 
 
3372
        ut_a(0);
 
3373
 
 
3374
        recv_sys_create();
 
3375
        recv_sys_init(FALSE, buf_pool_get_curr_size());
 
3376
 
 
3377
        recv_recovery_on = TRUE;
 
3378
        recv_recovery_from_backup_on = TRUE;
 
3379
 
 
3380
        recv_sys->limit_lsn = limit_lsn;
 
3381
 
 
3382
        group_id = 0;
 
3383
 
 
3384
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
3385
 
 
3386
        while (group) {
 
3387
                if (group->id == group_id) {
 
3388
 
 
3389
                        break;
 
3390
                }
 
3391
 
 
3392
                group = UT_LIST_GET_NEXT(log_groups, group);
 
3393
        }
 
3394
 
 
3395
        if (!group) {
 
3396
                fprintf(stderr,
 
3397
                        "InnoDB: There is no log group defined with id %lu!\n",
 
3398
                        (ulong) group_id);
 
3399
                return(DB_ERROR);
 
3400
        }
 
3401
 
 
3402
        group->archived_file_no = first_log_no;
 
3403
 
 
3404
        recv_sys->parse_start_lsn = min_flushed_lsn;
 
3405
 
 
3406
        recv_sys->scanned_lsn = 0;
 
3407
        recv_sys->scanned_checkpoint_no = 0;
 
3408
        recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
 
3409
 
 
3410
        recv_sys->archive_group = group;
 
3411
 
 
3412
        ret = FALSE;
 
3413
 
 
3414
        mutex_enter(&(log_sys->mutex));
 
3415
 
 
3416
        while (!ret) {
 
3417
                ret = log_group_recover_from_archive_file(group);
 
3418
 
 
3419
                /* Close and truncate a possible processed archive file
 
3420
                from the file space */
 
3421
 
 
3422
                trunc_len = UNIV_PAGE_SIZE
 
3423
                        * fil_space_get_size(group->archive_space_id);
 
3424
                if (trunc_len > 0) {
 
3425
                        fil_space_truncate_start(group->archive_space_id,
 
3426
                                                 trunc_len);
 
3427
                }
 
3428
 
 
3429
                group->archived_file_no++;
 
3430
        }
 
3431
 
 
3432
        if (recv_sys->recovered_lsn < limit_lsn) {
 
3433
 
 
3434
                if (!recv_sys->scanned_lsn) {
 
3435
 
 
3436
                        recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
 
3437
                }
 
3438
 
 
3439
                mutex_exit(&(log_sys->mutex));
 
3440
 
 
3441
                err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
 
3442
                                                          limit_lsn,
 
3443
                                                          IB_ULONGLONG_MAX,
 
3444
                                                          IB_ULONGLONG_MAX);
 
3445
                if (err != DB_SUCCESS) {
 
3446
 
 
3447
                        return(err);
 
3448
                }
 
3449
 
 
3450
                mutex_enter(&(log_sys->mutex));
 
3451
        }
 
3452
 
 
3453
        if (limit_lsn != IB_ULONGLONG_MAX) {
 
3454
 
 
3455
                recv_apply_hashed_log_recs(FALSE);
 
3456
 
 
3457
                recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
 
3458
        }
 
3459
 
 
3460
        mutex_exit(&(log_sys->mutex));
 
3461
 
 
3462
        return(DB_SUCCESS);
 
3463
}
 
3464
 
 
3465
/************************************************************
 
3466
Completes recovery from archive. */
 
3467
UNIV_INTERN
 
3468
void
 
3469
recv_recovery_from_archive_finish(void)
 
3470
/*===================================*/
 
3471
{
 
3472
        recv_recovery_from_checkpoint_finish();
 
3473
 
 
3474
        recv_recovery_from_backup_on = FALSE;
 
3475
}
 
3476
#endif /* UNIV_LOG_ARCHIVE */