1
/******************************************************
6
Created 9/20/1997 Heikki Tuuri
7
*******************************************************/
12
#include "log0recv.ic"
20
#include "srv0start.h"
23
#include "page0page.h"
27
#include "ibuf0ibuf.h"
34
#include "dict0boot.h"
36
#include "sync0sync.h"
39
/* This is set to FALSE if the backup was originally taken with the
40
ibbackup --include regexp option: then we do not want to create tables in
41
directories which were not included */
42
ibool recv_replay_file_ops = TRUE;
43
#endif /* UNIV_HOTBACKUP */
45
/* Log records are stored in the hash table in chunks at most of this size;
46
this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
47
#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
49
/* Read-ahead area in applying log records to file pages */
50
#define RECV_READ_AHEAD_AREA 32
52
recv_sys_t* recv_sys = NULL;
53
ibool recv_recovery_on = FALSE;
54
ibool recv_recovery_from_backup_on = FALSE;
56
ibool recv_needed_recovery = FALSE;
58
ibool recv_lsn_checks_on = FALSE;
60
/* There are two conditions under which we scan the logs, the first
61
is normal startup and the second is when we do a recovery from an
63
This flag is set if we are doing a scan from the last checkpoint during
64
startup. If we find log entries that were written after the last checkpoint
65
we know that the server was not cleanly shutdown. We must then initialize
66
the crash recovery environment before attempting to store these entries in
67
the log hash table. */
68
ibool recv_log_scan_is_startup_type = FALSE;
70
/* If the following is TRUE, the buffer pool file pages must be invalidated
71
after recovery and no ibuf operations are allowed; this becomes TRUE if
72
the log record hash table becomes too full, and log records must be merged
73
to file pages already before the recovery is finished: in this case no
74
ibuf operations are allowed, as they could modify the pages read in the
75
buffer pool before the pages have been recovered to the up-to-date state */
77
/* Recovery is running and no operations on the log files are allowed
78
yet: the variable name is misleading */
80
ibool recv_no_ibuf_operations = FALSE;
82
/* The following counter is used to decide when to print info on
84
ulint recv_scan_print_counter = 0;
86
ibool recv_is_from_backup = FALSE;
88
ibool recv_is_making_a_backup = FALSE;
90
# define recv_is_making_a_backup FALSE
91
#endif /* UNIV_HOTBACKUP */
93
ulint recv_previous_parsed_rec_type = 999999;
94
ulint recv_previous_parsed_rec_offset = 0;
95
ulint recv_previous_parsed_rec_is_multi = 0;
97
ulint recv_max_parsed_page_no = 0;
99
/* This many frames must be left free in the buffer pool when we scan
100
the log and store the scanned log records in the buffer pool: we will
101
use these free frames to read in pages when we start applying the
102
log records to the database. */
104
ulint recv_n_pool_free_frames = 256;
106
/* The maximum lsn we see for a page during the recovery process. If this
107
is bigger than the lsn we are able to scan up to, that is an indication that
108
the recovery failed and the database may be corrupt. */
110
dulint recv_max_page_lsn;
114
/***********************************************************
115
Initialize crash recovery environment. Can be called iff
116
recv_needed_recovery == FALSE. */
119
recv_init_crash_recovery(void);
120
/*===========================*/
122
/************************************************************
123
Creates the recovery system. */
126
recv_sys_create(void)
127
/*=================*/
129
if (recv_sys != NULL) {
134
recv_sys = mem_alloc(sizeof(recv_sys_t));
136
mutex_create(&recv_sys->mutex, SYNC_RECV);
138
recv_sys->heap = NULL;
139
recv_sys->addr_hash = NULL;
142
/************************************************************
143
Inits the recovery system for a recovery operation. */
148
ibool recover_from_backup, /* in: TRUE if this is called
149
to recover from a hot backup */
150
ulint available_memory) /* in: available memory in bytes */
152
if (recv_sys->heap != NULL) {
157
mutex_enter(&(recv_sys->mutex));
159
if (!recover_from_backup) {
160
recv_sys->heap = mem_heap_create_in_buffer(256);
162
recv_sys->heap = mem_heap_create(256);
163
recv_is_from_backup = TRUE;
166
recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
168
recv_sys->recovered_offset = 0;
170
recv_sys->addr_hash = hash_create(available_memory / 64);
171
recv_sys->n_addrs = 0;
173
recv_sys->apply_log_recs = FALSE;
174
recv_sys->apply_batch_on = FALSE;
176
recv_sys->last_block_buf_start = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
178
recv_sys->last_block = ut_align(recv_sys->last_block_buf_start,
179
OS_FILE_LOG_BLOCK_SIZE);
180
recv_sys->found_corrupt_log = FALSE;
182
recv_max_page_lsn = ut_dulint_zero;
184
mutex_exit(&(recv_sys->mutex));
187
/************************************************************
188
Empties the hash table when it has been fully processed. */
191
recv_sys_empty_hash(void)
192
/*=====================*/
194
ut_ad(mutex_own(&(recv_sys->mutex)));
196
if (recv_sys->n_addrs != 0) {
198
"InnoDB: Error: %lu pages with log records"
199
" were left unprocessed!\n"
200
"InnoDB: Maximum page number with"
201
" log records on it %lu\n",
202
(ulong) recv_sys->n_addrs,
203
(ulong) recv_max_parsed_page_no);
207
hash_table_free(recv_sys->addr_hash);
208
mem_heap_empty(recv_sys->heap);
210
recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
213
#ifndef UNIV_LOG_DEBUG
214
/************************************************************
215
Frees the recovery system. */
221
mutex_enter(&(recv_sys->mutex));
223
hash_table_free(recv_sys->addr_hash);
224
mem_heap_free(recv_sys->heap);
225
ut_free(recv_sys->buf);
226
mem_free(recv_sys->last_block_buf_start);
228
recv_sys->addr_hash = NULL;
229
recv_sys->heap = NULL;
231
mutex_exit(&(recv_sys->mutex));
233
#endif /* UNIV_LOG_DEBUG */
235
/************************************************************
236
Truncates possible corrupted or extra records from a log group. */
241
log_group_t* group, /* in: log group */
242
dulint recovered_lsn, /* in: recovery succeeded up to this
244
dulint limit_lsn, /* in: this was the limit for
246
dulint checkpoint_lsn, /* in: recovery was started from this
248
dulint archived_lsn) /* in: the log has been archived up to
259
if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
260
/* Checkpoint was taken in the NOARCHIVELOG mode */
261
archived_lsn = checkpoint_lsn;
264
finish_lsn1 = ut_dulint_add(ut_dulint_align_down(
266
OS_FILE_LOG_BLOCK_SIZE),
267
log_group_get_capacity(group));
269
finish_lsn2 = ut_dulint_add(ut_dulint_align_up(
271
OS_FILE_LOG_BLOCK_SIZE),
272
recv_sys->last_log_buf_size);
274
if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
275
/* We do not know how far we should erase log records: erase
276
as much as possible */
278
finish_lsn = finish_lsn1;
280
/* It is enough to erase the length of the log buffer */
281
finish_lsn = ut_dulint_get_min(finish_lsn1, finish_lsn2);
284
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
286
/* Write the log buffer full of zeros */
287
for (i = 0; i < RECV_SCAN_SIZE; i++) {
289
*(log_sys->buf + i) = '\0';
292
start_lsn = ut_dulint_align_down(recovered_lsn,
293
OS_FILE_LOG_BLOCK_SIZE);
295
if (ut_dulint_cmp(start_lsn, recovered_lsn) != 0) {
296
/* Copy the last incomplete log block to the log buffer and
297
edit its data length: */
299
ut_memcpy(log_sys->buf, recv_sys->last_block,
300
OS_FILE_LOG_BLOCK_SIZE);
301
log_block_set_data_len(log_sys->buf, ut_dulint_minus(
302
recovered_lsn, start_lsn));
305
if (ut_dulint_cmp(start_lsn, finish_lsn) >= 0) {
311
end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
313
if (ut_dulint_cmp(end_lsn, finish_lsn) > 0) {
315
end_lsn = finish_lsn;
318
len = ut_dulint_minus(end_lsn, start_lsn);
320
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
321
if (ut_dulint_cmp(end_lsn, finish_lsn) >= 0) {
326
/* Write the log buffer full of zeros */
327
for (i = 0; i < RECV_SCAN_SIZE; i++) {
329
*(log_sys->buf + i) = '\0';
336
/************************************************************
337
Copies the log segment between group->recovered_lsn and recovered_lsn from the
338
most up-to-date log group to group, so that it contains the latest log data. */
343
log_group_t* up_to_date_group, /* in: the most up-to-date log
345
log_group_t* group, /* in: copy to this log
347
dulint recovered_lsn) /* in: recovery succeeded up
354
if (ut_dulint_cmp(group->scanned_lsn, recovered_lsn) >= 0) {
359
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
361
start_lsn = ut_dulint_align_down(group->scanned_lsn,
362
OS_FILE_LOG_BLOCK_SIZE);
364
end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
366
if (ut_dulint_cmp(end_lsn, recovered_lsn) > 0) {
367
end_lsn = ut_dulint_align_up(recovered_lsn,
368
OS_FILE_LOG_BLOCK_SIZE);
371
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
372
up_to_date_group, start_lsn, end_lsn);
374
len = ut_dulint_minus(end_lsn, start_lsn);
376
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
378
if (ut_dulint_cmp(end_lsn, recovered_lsn) >= 0) {
387
/************************************************************
388
Copies a log segment from the most up-to-date log group to the other log
389
groups, so that they all contain the latest log data. Also writes the info
390
about the latest checkpoint to the groups, and inits the fields in the group
391
memory structs to up-to-date values. */
394
recv_synchronize_groups(
395
/*====================*/
396
log_group_t* up_to_date_group) /* in: the most up-to-date
402
dulint recovered_lsn;
405
recovered_lsn = recv_sys->recovered_lsn;
406
limit_lsn = recv_sys->limit_lsn;
408
/* Read the last recovered log block to the recovery system buffer:
409
the block is always incomplete */
411
start_lsn = ut_dulint_align_down(recovered_lsn,
412
OS_FILE_LOG_BLOCK_SIZE);
413
end_lsn = ut_dulint_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
415
ut_a(ut_dulint_cmp(start_lsn, end_lsn) != 0);
417
log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
418
up_to_date_group, start_lsn, end_lsn);
420
group = UT_LIST_GET_FIRST(log_sys->log_groups);
423
if (group != up_to_date_group) {
425
/* Copy log data if needed */
427
recv_copy_group(group, up_to_date_group,
431
/* Update the fields in the group struct to correspond to
434
log_group_set_fields(group, recovered_lsn);
436
group = UT_LIST_GET_NEXT(log_groups, group);
439
/* Copy the checkpoint info to the groups; remember that we have
440
incremented checkpoint_no by one, and the info will not be written
441
over the max checkpoint info, thus making the preservation of max
442
checkpoint info on disk certain */
444
log_groups_write_checkpoint_info();
446
mutex_exit(&(log_sys->mutex));
448
/* Wait for the checkpoint write to complete */
449
rw_lock_s_lock(&(log_sys->checkpoint_lock));
450
rw_lock_s_unlock(&(log_sys->checkpoint_lock));
452
mutex_enter(&(log_sys->mutex));
455
/***************************************************************************
456
Checks the consistency of the checkpoint info */
459
recv_check_cp_is_consistent(
460
/*========================*/
461
/* out: TRUE if ok */
462
byte* buf) /* in: buffer containing checkpoint info */
466
fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
468
if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
469
buf + LOG_CHECKPOINT_CHECKSUM_1)) {
473
fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
474
LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
476
if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
477
buf + LOG_CHECKPOINT_CHECKSUM_2)) {
484
/************************************************************
485
Looks for the maximum consistent checkpoint from the log groups. */
488
recv_find_max_checkpoint(
489
/*=====================*/
490
/* out: error code or DB_SUCCESS */
491
log_group_t** max_group, /* out: max group */
492
ulint* max_field) /* out: LOG_CHECKPOINT_1 or
497
dulint checkpoint_no;
501
group = UT_LIST_GET_FIRST(log_sys->log_groups);
503
max_no = ut_dulint_zero;
507
buf = log_sys->checkpoint_buf;
510
group->state = LOG_GROUP_CORRUPTED;
512
for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
513
field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
515
log_group_read_checkpoint_info(group, field);
517
if (!recv_check_cp_is_consistent(buf)) {
519
if (log_debug_writes) {
521
"InnoDB: Checkpoint in group"
522
" %lu at %lu invalid, %lu\n",
525
(ulong) mach_read_from_4(
527
+ LOG_CHECKPOINT_CHECKSUM_1));
530
#endif /* UNIV_DEBUG */
534
group->state = LOG_GROUP_OK;
536
group->lsn = mach_read_from_8(
537
buf + LOG_CHECKPOINT_LSN);
538
group->lsn_offset = mach_read_from_4(
539
buf + LOG_CHECKPOINT_OFFSET);
540
checkpoint_no = mach_read_from_8(
541
buf + LOG_CHECKPOINT_NO);
544
if (log_debug_writes) {
546
"InnoDB: Checkpoint number %lu"
547
" found in group %lu\n",
548
(ulong) ut_dulint_get_low(
552
#endif /* UNIV_DEBUG */
554
if (ut_dulint_cmp(checkpoint_no, max_no) >= 0) {
557
max_no = checkpoint_no;
564
group = UT_LIST_GET_NEXT(log_groups, group);
567
if (*max_group == NULL) {
570
"InnoDB: No valid checkpoint found.\n"
571
"InnoDB: If this error appears when you are"
572
" creating an InnoDB database,\n"
573
"InnoDB: the problem may be that during"
574
" an earlier attempt you managed\n"
575
"InnoDB: to create the InnoDB data files,"
576
" but log file creation failed.\n"
577
"InnoDB: If that is the case, please refer to\n"
578
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
579
"error-creating-innodb.html\n");
586
/***********************************************************************
587
Reads the checkpoint info needed in hot backup. */
590
recv_read_cp_info_for_backup(
591
/*=========================*/
592
/* out: TRUE if success */
593
byte* hdr, /* in: buffer containing the log group header */
594
dulint* lsn, /* out: checkpoint lsn */
595
ulint* offset, /* out: checkpoint offset in the log group */
596
ulint* fsp_limit,/* out: fsp limit of space 0, 1000000000 if the
597
database is running with < version 3.23.50 of InnoDB */
598
dulint* cp_no, /* out: checkpoint number */
599
dulint* first_header_lsn)
600
/* out: lsn of of the start of the first log file */
603
dulint max_cp_no = ut_dulint_zero;
606
cp_buf = hdr + LOG_CHECKPOINT_1;
608
if (recv_check_cp_is_consistent(cp_buf)) {
609
max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
610
max_cp = LOG_CHECKPOINT_1;
613
cp_buf = hdr + LOG_CHECKPOINT_2;
615
if (recv_check_cp_is_consistent(cp_buf)) {
616
if (ut_dulint_cmp(mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO),
618
max_cp = LOG_CHECKPOINT_2;
626
cp_buf = hdr + max_cp;
628
*lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN);
629
*offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
631
/* If the user is running a pre-3.23.50 version of InnoDB, its
632
checkpoint data does not contain the fsp limit info */
633
if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N)
634
== LOG_CHECKPOINT_FSP_MAGIC_N_VAL) {
636
*fsp_limit = mach_read_from_4(
637
cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT);
639
if (*fsp_limit == 0) {
640
*fsp_limit = 1000000000;
643
*fsp_limit = 1000000000;
646
/* fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */
648
*cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
650
*first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN);
655
/**********************************************************
656
Checks the 4-byte checksum to the trailer checksum field of a log block.
657
We also accept a log block in the old format < InnoDB-3.23.52 where the
658
checksum field contains the log block number. */
661
log_block_checksum_is_ok_or_old_format(
662
/*===================================*/
663
/* out: TRUE if ok, or if the log block may be in the
664
format of InnoDB version < 3.23.52 */
665
byte* block) /* in: pointer to a log block */
667
#ifdef UNIV_LOG_DEBUG
669
#endif /* UNIV_LOG_DEBUG */
670
if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
675
if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
677
/* We assume the log block is in the format of
678
InnoDB version < 3.23.52 and the block is ok */
681
"InnoDB: Scanned old format < InnoDB-3.23.52"
682
" log block number %lu\n",
683
log_block_get_hdr_no(block));
691
/***********************************************************************
692
Scans the log segment and n_bytes_scanned is set to the length of valid
696
recv_scan_log_seg_for_backup(
697
/*=========================*/
698
byte* buf, /* in: buffer containing log data */
699
ulint buf_len, /* in: data length in that buffer */
700
dulint* scanned_lsn, /* in/out: lsn of buffer start,
701
we return scanned lsn */
702
ulint* scanned_checkpoint_no,
703
/* in/out: 4 lowest bytes of the
704
highest scanned checkpoint number so
706
ulint* n_bytes_scanned)/* out: how much we were able to
707
scan, smaller than buf_len if log
714
*n_bytes_scanned = 0;
716
for (log_block = buf; log_block < buf + buf_len;
717
log_block += OS_FILE_LOG_BLOCK_SIZE) {
719
no = log_block_get_hdr_no(log_block);
722
fprintf(stderr, "Log block header no %lu\n", no);
725
if (no != log_block_convert_lsn_to_no(*scanned_lsn)
726
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
729
"Log block n:o %lu, scanned lsn n:o %lu\n",
730
no, log_block_convert_lsn_to_no(*scanned_lsn));
732
/* Garbage or an incompletely written log block */
734
log_block += OS_FILE_LOG_BLOCK_SIZE;
737
"Next log block n:o %lu\n",
738
log_block_get_hdr_no(log_block));
743
if (*scanned_checkpoint_no > 0
744
&& log_block_get_checkpoint_no(log_block)
745
< *scanned_checkpoint_no
746
&& *scanned_checkpoint_no
747
- log_block_get_checkpoint_no(log_block)
750
/* Garbage from a log buffer flush which was made
751
before the most recent database recovery */
754
"Scanned cp n:o %lu, block cp n:o %lu\n",
755
*scanned_checkpoint_no,
756
log_block_get_checkpoint_no(log_block));
761
data_len = log_block_get_data_len(log_block);
763
*scanned_checkpoint_no
764
= log_block_get_checkpoint_no(log_block);
765
*scanned_lsn = ut_dulint_add(*scanned_lsn, data_len);
767
*n_bytes_scanned += data_len;
769
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
770
/* Log data ends here */
773
fprintf(stderr, "Log block data len %lu\n",
781
/***********************************************************************
782
Tries to parse a single log record body and also applies it to a page if
783
specified. File ops are parsed, but not applied in this function. */
786
recv_parse_or_apply_log_rec_body(
787
/*=============================*/
788
/* out: log record end, NULL if not a complete
790
byte type, /* in: type */
791
byte* ptr, /* in: pointer to a buffer */
792
byte* end_ptr,/* in: pointer to the buffer end */
793
page_t* page, /* in: buffer page or NULL; if not NULL, then the log
794
record is applied to the page, and the log record
795
should be complete then */
796
mtr_t* mtr) /* in: mtr or NULL; should be non-NULL if and only if
799
dict_index_t* index = NULL;
802
case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
803
ptr = mlog_parse_nbytes(type, ptr, end_ptr, page);
805
case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
806
if (NULL != (ptr = mlog_parse_index(
808
type == MLOG_COMP_REC_INSERT,
811
|| (ibool)!!page_is_comp(page)
812
== dict_table_is_comp(index->table));
813
ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
817
case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
818
if (NULL != (ptr = mlog_parse_index(
820
type == MLOG_COMP_REC_CLUST_DELETE_MARK,
823
|| (ibool)!!page_is_comp(page)
824
== dict_table_is_comp(index->table));
825
ptr = btr_cur_parse_del_mark_set_clust_rec(
826
ptr, end_ptr, index, page);
829
case MLOG_COMP_REC_SEC_DELETE_MARK:
830
/* This log record type is obsolete, but we process it for
831
backward compatibility with MySQL 5.0.3 and 5.0.4. */
832
ut_a(!page || page_is_comp(page));
833
ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
838
case MLOG_REC_SEC_DELETE_MARK:
839
ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, page);
841
case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
842
if (NULL != (ptr = mlog_parse_index(
844
type == MLOG_COMP_REC_UPDATE_IN_PLACE,
847
|| (ibool)!!page_is_comp(page)
848
== dict_table_is_comp(index->table));
849
ptr = btr_cur_parse_update_in_place(ptr, end_ptr,
853
case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
854
case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
855
if (NULL != (ptr = mlog_parse_index(
857
type == MLOG_COMP_LIST_END_DELETE
858
|| type == MLOG_COMP_LIST_START_DELETE,
861
|| (ibool)!!page_is_comp(page)
862
== dict_table_is_comp(index->table));
863
ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
867
case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
868
if (NULL != (ptr = mlog_parse_index(
870
type == MLOG_COMP_LIST_END_COPY_CREATED,
873
|| (ibool)!!page_is_comp(page)
874
== dict_table_is_comp(index->table));
875
ptr = page_parse_copy_rec_list_to_created_page(
876
ptr, end_ptr, index, page, mtr);
879
case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
880
if (NULL != (ptr = mlog_parse_index(
882
type == MLOG_COMP_PAGE_REORGANIZE,
885
|| (ibool)!!page_is_comp(page)
886
== dict_table_is_comp(index->table));
887
ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
891
case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
892
ptr = page_parse_create(ptr, end_ptr,
893
type == MLOG_COMP_PAGE_CREATE,
896
case MLOG_UNDO_INSERT:
897
ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
899
case MLOG_UNDO_ERASE_END:
900
ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
903
ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
905
case MLOG_UNDO_HDR_DISCARD:
906
ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
908
case MLOG_UNDO_HDR_CREATE:
909
case MLOG_UNDO_HDR_REUSE:
910
ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
913
case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
914
ptr = btr_parse_set_min_rec_mark(
915
ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
918
case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
919
if (NULL != (ptr = mlog_parse_index(
921
type == MLOG_COMP_REC_DELETE,
924
|| (ibool)!!page_is_comp(page)
925
== dict_table_is_comp(index->table));
926
ptr = page_cur_parse_delete_rec(ptr, end_ptr,
930
case MLOG_IBUF_BITMAP_INIT:
931
ptr = ibuf_parse_bitmap_init(ptr, end_ptr, page, mtr);
933
case MLOG_INIT_FILE_PAGE:
934
ptr = fsp_parse_init_file_page(ptr, end_ptr, page);
936
case MLOG_WRITE_STRING:
937
ptr = mlog_parse_string(ptr, end_ptr, page);
939
case MLOG_FILE_CREATE:
940
case MLOG_FILE_RENAME:
941
case MLOG_FILE_DELETE:
942
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE,
947
recv_sys->found_corrupt_log = TRUE;
951
dict_table_t* table = index->table;
953
dict_mem_index_free(index);
954
dict_mem_table_free(table);
960
/*************************************************************************
961
Calculates the fold value of a page file address: used in inserting or
962
searching for a log record in the hash table. */
967
/* out: folded value */
968
ulint space, /* in: space */
969
ulint page_no)/* in: page number */
971
return(ut_fold_ulint_pair(space, page_no));
974
/*************************************************************************
975
Calculates the hash value of a page file address: used in inserting or
976
searching for a log record in the hash table. */
981
/* out: folded value */
982
ulint space, /* in: space */
983
ulint page_no)/* in: page number */
985
return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
988
/*************************************************************************
989
Gets the hashed file address struct for a page. */
992
recv_get_fil_addr_struct(
993
/*=====================*/
994
/* out: file address struct, NULL if not found from
996
ulint space, /* in: space id */
997
ulint page_no)/* in: page number */
999
recv_addr_t* recv_addr;
1001
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
1002
recv_hash(space, page_no));
1004
if ((recv_addr->space == space)
1005
&& (recv_addr->page_no == page_no)) {
1010
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1016
/***********************************************************************
1017
Adds a new log record to the hash table of log records. */
1020
recv_add_to_hash_table(
1021
/*===================*/
1022
byte type, /* in: log record type */
1023
ulint space, /* in: space id */
1024
ulint page_no, /* in: page number */
1025
byte* body, /* in: log record body */
1026
byte* rec_end, /* in: log record end */
1027
dulint start_lsn, /* in: start lsn of the mtr */
1028
dulint end_lsn) /* in: end lsn of the mtr */
1032
recv_data_t* recv_data;
1033
recv_data_t** prev_field;
1034
recv_addr_t* recv_addr;
1036
if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
1037
/* The tablespace does not exist any more: do not store the
1043
len = rec_end - body;
1045
recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
1047
recv->len = rec_end - body;
1048
recv->start_lsn = start_lsn;
1049
recv->end_lsn = end_lsn;
1051
recv_addr = recv_get_fil_addr_struct(space, page_no);
1053
if (recv_addr == NULL) {
1054
recv_addr = mem_heap_alloc(recv_sys->heap,
1055
sizeof(recv_addr_t));
1056
recv_addr->space = space;
1057
recv_addr->page_no = page_no;
1058
recv_addr->state = RECV_NOT_PROCESSED;
1060
UT_LIST_INIT(recv_addr->rec_list);
1062
HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
1063
recv_fold(space, page_no), recv_addr);
1064
recv_sys->n_addrs++;
1066
fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
1071
UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
1073
prev_field = &(recv->data);
1075
/* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
1076
recv_sys->heap grows into the buffer pool, and bigger chunks could not
1079
while (rec_end > body) {
1081
len = rec_end - body;
1083
if (len > RECV_DATA_BLOCK_SIZE) {
1084
len = RECV_DATA_BLOCK_SIZE;
1087
recv_data = mem_heap_alloc(recv_sys->heap,
1088
sizeof(recv_data_t) + len);
1089
*prev_field = recv_data;
1091
ut_memcpy(((byte*)recv_data) + sizeof(recv_data_t), body, len);
1093
prev_field = &(recv_data->next);
1101
/*************************************************************************
1102
Copies the log record body from recv to buf. */
1105
recv_data_copy_to_buf(
1106
/*==================*/
1107
byte* buf, /* in: buffer of length at least recv->len */
1108
recv_t* recv) /* in: log record */
1110
recv_data_t* recv_data;
1115
recv_data = recv->data;
1118
if (len > RECV_DATA_BLOCK_SIZE) {
1119
part_len = RECV_DATA_BLOCK_SIZE;
1124
ut_memcpy(buf, ((byte*)recv_data) + sizeof(recv_data_t),
1129
recv_data = recv_data->next;
1133
/****************************************************************************
1134
Applies the hashed log records to the page, if the page lsn is less than the
1135
lsn of a log record. This can be called when a buffer page has just been
1136
read in, or also for a page already in the buffer pool. */
1141
ibool recover_backup, /* in: TRUE if we are recovering a backup
1142
page: then we do not acquire any latches
1143
since the page was read in outside the
1145
ibool just_read_in, /* in: TRUE if the i/o-handler calls this for
1146
a freshly read page */
1147
page_t* page, /* in: buffer page */
1148
ulint space, /* in: space id */
1149
ulint page_no) /* in: page number */
1151
buf_block_t* block = NULL;
1152
recv_addr_t* recv_addr;
1158
dulint page_newest_lsn;
1159
ibool modification_to_page;
1163
mutex_enter(&(recv_sys->mutex));
1165
if (recv_sys->apply_log_recs == FALSE) {
1167
/* Log records should not be applied now */
1169
mutex_exit(&(recv_sys->mutex));
1174
recv_addr = recv_get_fil_addr_struct(space, page_no);
1176
if ((recv_addr == NULL)
1177
|| (recv_addr->state == RECV_BEING_PROCESSED)
1178
|| (recv_addr->state == RECV_PROCESSED)) {
1180
mutex_exit(&(recv_sys->mutex));
1186
fprintf(stderr, "Recovering space %lu, page %lu\n", space, page_no);
1189
recv_addr->state = RECV_BEING_PROCESSED;
1191
mutex_exit(&(recv_sys->mutex));
1194
mtr_set_log_mode(&mtr, MTR_LOG_NONE);
1196
if (!recover_backup) {
1197
block = buf_block_align(page);
1200
/* Move the ownership of the x-latch on the
1201
page to this OS thread, so that we can acquire
1202
a second x-latch on it. This is needed for the
1203
operations to the page to pass the debug
1206
rw_lock_x_lock_move_ownership(&(block->lock));
1209
success = buf_page_get_known_nowait(RW_X_LATCH, page,
1215
#ifdef UNIV_SYNC_DEBUG
1216
buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
1217
#endif /* UNIV_SYNC_DEBUG */
1220
/* Read the newest modification lsn from the page */
1221
page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
1223
if (!recover_backup) {
1224
/* It may be that the page has been modified in the buffer
1225
pool: read the newest modification lsn there */
1227
page_newest_lsn = buf_frame_get_newest_modification(page);
1229
if (!ut_dulint_is_zero(page_newest_lsn)) {
1231
page_lsn = page_newest_lsn;
1234
/* In recovery from a backup we do not really use the buffer
1237
page_newest_lsn = ut_dulint_zero;
1240
modification_to_page = FALSE;
1241
start_lsn = end_lsn = ut_dulint_zero;
1243
recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
1246
end_lsn = recv->end_lsn;
1248
if (recv->len > RECV_DATA_BLOCK_SIZE) {
1249
/* We have to copy the record body to a separate
1252
buf = mem_alloc(recv->len);
1254
recv_data_copy_to_buf(buf, recv);
1256
buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
1259
if (recv->type == MLOG_INIT_FILE_PAGE) {
1260
page_lsn = page_newest_lsn;
1262
mach_write_to_8(page + UNIV_PAGE_SIZE
1263
- FIL_PAGE_END_LSN_OLD_CHKSUM,
1265
mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
1268
if (ut_dulint_cmp(recv->start_lsn, page_lsn) >= 0) {
1270
if (!modification_to_page) {
1272
modification_to_page = TRUE;
1273
start_lsn = recv->start_lsn;
1277
if (log_debug_writes) {
1279
"InnoDB: Applying log rec"
1281
" to space %lu page no %lu\n",
1282
(ulong) recv->type, (ulong) recv->len,
1283
(ulong) recv_addr->space,
1284
(ulong) recv_addr->page_no);
1286
#endif /* UNIV_DEBUG */
1288
recv_parse_or_apply_log_rec_body(recv->type, buf,
1291
mach_write_to_8(page + UNIV_PAGE_SIZE
1292
- FIL_PAGE_END_LSN_OLD_CHKSUM,
1293
ut_dulint_add(recv->start_lsn,
1295
mach_write_to_8(page + FIL_PAGE_LSN,
1296
ut_dulint_add(recv->start_lsn,
1300
if (recv->len > RECV_DATA_BLOCK_SIZE) {
1304
recv = UT_LIST_GET_NEXT(rec_list, recv);
1307
mutex_enter(&(recv_sys->mutex));
1309
if (ut_dulint_cmp(recv_max_page_lsn, page_lsn) < 0) {
1310
recv_max_page_lsn = page_lsn;
1313
recv_addr->state = RECV_PROCESSED;
1315
ut_a(recv_sys->n_addrs);
1316
recv_sys->n_addrs--;
1318
mutex_exit(&(recv_sys->mutex));
1320
if (!recover_backup && modification_to_page) {
1323
buf_flush_recv_note_modification(block, start_lsn, end_lsn);
1326
/* Make sure that committing mtr does not change the modification
1327
lsn values of page */
1329
mtr.modifications = FALSE;
1334
/***********************************************************************
1335
Reads in pages which have hashed log records, from an area around a given
1341
/* out: number of pages found */
1342
ulint space, /* in: space */
1343
ulint page_no)/* in: page number */
1345
recv_addr_t* recv_addr;
1346
ulint page_nos[RECV_READ_AHEAD_AREA];
1350
low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
1354
for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
1356
recv_addr = recv_get_fil_addr_struct(space, page_no);
1358
if (recv_addr && !buf_page_peek(space, page_no)) {
1360
mutex_enter(&(recv_sys->mutex));
1362
if (recv_addr->state == RECV_NOT_PROCESSED) {
1363
recv_addr->state = RECV_BEING_READ;
1365
page_nos[n] = page_no;
1370
mutex_exit(&(recv_sys->mutex));
1374
buf_read_recv_pages(FALSE, space, page_nos, n);
1376
fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
1381
/***********************************************************************
1382
Empties the hash table of stored log records, applying them to appropriate
1386
recv_apply_hashed_log_recs(
1387
/*=======================*/
1388
ibool allow_ibuf) /* in: if TRUE, also ibuf operations are
1389
allowed during the application; if FALSE,
1390
no ibuf operations are allowed, and after
1391
the application all file pages are flushed to
1392
disk and invalidated in buffer pool: this
1393
alternative means that no new log records
1394
can be generated during the application;
1395
the caller must in this case own the log
1398
recv_addr_t* recv_addr;
1404
ibool has_printed = FALSE;
1407
mutex_enter(&(recv_sys->mutex));
1409
if (recv_sys->apply_batch_on) {
1411
mutex_exit(&(recv_sys->mutex));
1413
os_thread_sleep(500000);
1418
ut_ad(!allow_ibuf == mutex_own(&log_sys->mutex));
1421
recv_no_ibuf_operations = TRUE;
1424
recv_sys->apply_log_recs = TRUE;
1425
recv_sys->apply_batch_on = TRUE;
1427
for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
1429
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
1432
space = recv_addr->space;
1433
page_no = recv_addr->page_no;
1435
if (recv_addr->state == RECV_NOT_PROCESSED) {
1437
ut_print_timestamp(stderr);
1438
fputs(" InnoDB: Starting an"
1439
" apply batch of log records"
1440
" to the database...\n"
1441
"InnoDB: Progress in percents: ",
1446
mutex_exit(&(recv_sys->mutex));
1448
if (buf_page_peek(space, page_no)) {
1452
page = buf_page_get(space, page_no,
1455
#ifdef UNIV_SYNC_DEBUG
1456
buf_page_dbg_add_level(
1457
page, SYNC_NO_ORDER_CHECK);
1458
#endif /* UNIV_SYNC_DEBUG */
1459
recv_recover_page(FALSE, FALSE, page,
1463
recv_read_in_area(space, page_no);
1466
mutex_enter(&(recv_sys->mutex));
1469
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1473
&& (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
1475
/ hash_get_n_cells(recv_sys->addr_hash)) {
1477
fprintf(stderr, "%lu ", (ulong)
1479
/ hash_get_n_cells(recv_sys->addr_hash)));
1483
/* Wait until all the pages have been processed */
1485
while (recv_sys->n_addrs != 0) {
1487
mutex_exit(&(recv_sys->mutex));
1489
os_thread_sleep(500000);
1491
mutex_enter(&(recv_sys->mutex));
1496
fprintf(stderr, "\n");
1500
/* Flush all the file pages to disk and invalidate them in
1503
mutex_exit(&(recv_sys->mutex));
1504
mutex_exit(&(log_sys->mutex));
1506
n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
1508
ut_a(n_pages != ULINT_UNDEFINED);
1510
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
1512
buf_pool_invalidate();
1514
mutex_enter(&(log_sys->mutex));
1515
mutex_enter(&(recv_sys->mutex));
1517
recv_no_ibuf_operations = FALSE;
1520
recv_sys->apply_log_recs = FALSE;
1521
recv_sys->apply_batch_on = FALSE;
1523
recv_sys_empty_hash();
1526
fprintf(stderr, "InnoDB: Apply batch completed\n");
1529
mutex_exit(&(recv_sys->mutex));
1533
/***********************************************************************
1534
Tries to parse a single log record and returns its length. */
1539
/* out: length of the record, or 0 if the record was
1541
byte* ptr, /* in: pointer to a buffer */
1542
byte* end_ptr,/* in: pointer to the buffer end */
1543
byte* type, /* out: type */
1544
ulint* space, /* out: space id */
1545
ulint* page_no,/* out: page number */
1546
byte** body) /* out: log record body start */
1552
if (ptr == end_ptr) {
1557
if (*ptr == MLOG_MULTI_REC_END) {
1564
if (*ptr == MLOG_DUMMY_RECORD) {
1567
*space = ULINT_UNDEFINED - 1; /* For debugging */
1572
new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
1576
if (UNIV_UNLIKELY(!new_ptr)) {
1581
/* Check that page_no is sensible */
1583
if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
1585
recv_sys->found_corrupt_log = TRUE;
1590
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
1592
if (UNIV_UNLIKELY(new_ptr == NULL)) {
1597
if (*page_no > recv_max_parsed_page_no) {
1598
recv_max_parsed_page_no = *page_no;
1601
return(new_ptr - ptr);
1604
/***********************************************************
1605
Calculates the new value for lsn when more data is added to the log. */
1608
recv_calc_lsn_on_data_add(
1609
/*======================*/
1610
dulint lsn, /* in: old lsn */
1611
ulint len) /* in: this many bytes of data is added, log block
1612
headers not included */
1617
frag_len = (ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
1618
- LOG_BLOCK_HDR_SIZE;
1619
ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
1620
- LOG_BLOCK_TRL_SIZE);
1621
lsn_len = len + ((len + frag_len)
1622
/ (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
1623
- LOG_BLOCK_TRL_SIZE))
1624
* (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
1626
return(ut_dulint_add(lsn, lsn_len));
1629
/***********************************************************
1630
Prints diagnostic info of corrupt log. */
1633
recv_report_corrupt_log(
1634
/*====================*/
1635
byte* ptr, /* in: pointer to corrupt log record */
1636
byte type, /* in: type of the record */
1637
ulint space, /* in: space id, this may also be garbage */
1638
ulint page_no)/* in: page number, this may also be garbage */
1641
"InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
1642
"InnoDB: Log record type %lu, space id %lu, page number %lu\n"
1643
"InnoDB: Log parsing proceeded successfully up to %lu %lu\n"
1644
"InnoDB: Previous log record type %lu, is multi %lu\n"
1645
"InnoDB: Recv offset %lu, prev %lu\n",
1646
(ulong) type, (ulong) space, (ulong) page_no,
1647
(ulong) ut_dulint_get_high(recv_sys->recovered_lsn),
1648
(ulong) ut_dulint_get_low(recv_sys->recovered_lsn),
1649
(ulong) recv_previous_parsed_rec_type,
1650
(ulong) recv_previous_parsed_rec_is_multi,
1651
(ulong) (ptr - recv_sys->buf),
1652
(ulong) recv_previous_parsed_rec_offset);
1654
if ((ulint)(ptr - recv_sys->buf + 100)
1655
> recv_previous_parsed_rec_offset
1656
&& (ulint)(ptr - recv_sys->buf + 100
1657
- recv_previous_parsed_rec_offset)
1659
fputs("InnoDB: Hex dump of corrupt log starting"
1660
" 100 bytes before the start\n"
1661
"InnoDB: of the previous log rec,\n"
1662
"InnoDB: and ending 100 bytes after the start"
1663
" of the corrupt rec:\n",
1666
ut_print_buf(stderr,
1668
+ recv_previous_parsed_rec_offset - 100,
1669
ptr - recv_sys->buf + 200
1670
- recv_previous_parsed_rec_offset);
1674
fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
1675
"InnoDB: is possible that the log scan did not proceed\n"
1676
"InnoDB: far enough in recovery! Please run CHECK TABLE\n"
1677
"InnoDB: on your InnoDB tables to check that they are ok!\n"
1678
"InnoDB: If mysqld crashes after this recovery, look at\n"
1679
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
1680
"forcing-recovery.html\n"
1681
"InnoDB: about forcing recovery.\n", stderr);
1686
/***********************************************************
1687
Parses log records from a buffer and stores them to a hash table to wait
1688
merging to file pages. */
1691
recv_parse_log_recs(
1692
/*================*/
1693
/* out: currently always returns FALSE */
1694
ibool store_to_hash) /* in: TRUE if the records should be stored
1695
to the hash table; this is set to FALSE if just
1696
debug checking is needed */
1703
dulint new_recovered_lsn;
1711
ut_ad(mutex_own(&(log_sys->mutex)));
1712
ut_ad(!ut_dulint_is_zero(recv_sys->parse_start_lsn));
1714
ptr = recv_sys->buf + recv_sys->recovered_offset;
1716
end_ptr = recv_sys->buf + recv_sys->len;
1718
if (ptr == end_ptr) {
1723
single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
1725
if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
1726
/* The mtr only modified a single page, or this is a file op */
1728
old_lsn = recv_sys->recovered_lsn;
1730
/* Try to parse a log record, fetching its type, space id,
1731
page no, and a pointer to the body of the log record */
1733
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
1736
if (len == 0 || recv_sys->found_corrupt_log) {
1737
if (recv_sys->found_corrupt_log) {
1739
recv_report_corrupt_log(ptr,
1740
type, space, page_no);
1746
new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
1748
if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
1750
/* The log record filled a log block, and we require
1751
that also the next log block should have been scanned
1757
recv_previous_parsed_rec_type = (ulint)type;
1758
recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
1759
recv_previous_parsed_rec_is_multi = 0;
1761
recv_sys->recovered_offset += len;
1762
recv_sys->recovered_lsn = new_recovered_lsn;
1765
if (log_debug_writes) {
1767
"InnoDB: Parsed a single log rec"
1768
" type %lu len %lu space %lu page no %lu\n",
1769
(ulong) type, (ulong) len, (ulong) space,
1772
#endif /* UNIV_DEBUG */
1774
if (type == MLOG_DUMMY_RECORD) {
1777
} else if (store_to_hash && (type == MLOG_FILE_CREATE
1778
|| type == MLOG_FILE_RENAME
1779
|| type == MLOG_FILE_DELETE)) {
1780
#ifdef UNIV_HOTBACKUP
1781
if (recv_replay_file_ops) {
1783
/* In ibbackup --apply-log, replay an .ibd file
1784
operation, if possible; note that
1785
fil_path_to_mysql_datadir is set in ibbackup to
1786
point to the datadir we should use there */
1788
if (NULL == fil_op_log_parse_or_replay(
1789
body, end_ptr, type, TRUE,
1792
"InnoDB: Error: file op"
1793
" log record of type %lu"
1794
" space %lu not complete in\n"
1795
"InnoDB: the replay phase."
1804
/* In normal mysqld crash recovery we do not try to
1805
replay file operations */
1806
} else if (store_to_hash) {
1807
recv_add_to_hash_table(type, space, page_no, body,
1809
recv_sys->recovered_lsn);
1811
#ifdef UNIV_LOG_DEBUG
1812
recv_check_incomplete_log_recs(ptr, len);
1813
#endif/* UNIV_LOG_DEBUG */
1816
/* Check that all the records associated with the single mtr
1817
are included within the buffer */
1823
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
1825
if (len == 0 || recv_sys->found_corrupt_log) {
1827
if (recv_sys->found_corrupt_log) {
1829
recv_report_corrupt_log(
1830
ptr, type, space, page_no);
1836
recv_previous_parsed_rec_type = (ulint)type;
1837
recv_previous_parsed_rec_offset
1838
= recv_sys->recovered_offset + total_len;
1839
recv_previous_parsed_rec_is_multi = 1;
1841
if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
1842
#ifdef UNIV_LOG_DEBUG
1843
recv_check_incomplete_log_recs(ptr, len);
1844
#endif /* UNIV_LOG_DEBUG */
1848
if (log_debug_writes) {
1850
"InnoDB: Parsed a multi log rec"
1852
" space %lu page no %lu\n",
1853
(ulong) type, (ulong) len,
1854
(ulong) space, (ulong) page_no);
1856
#endif /* UNIV_DEBUG */
1863
if (type == MLOG_MULTI_REC_END) {
1865
/* Found the end mark for the records */
1871
new_recovered_lsn = recv_calc_lsn_on_data_add(
1872
recv_sys->recovered_lsn, total_len);
1874
if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
1876
/* The log record filled a log block, and we require
1877
that also the next log block should have been scanned
1883
/* Add all the records to the hash table */
1885
ptr = recv_sys->buf + recv_sys->recovered_offset;
1888
old_lsn = recv_sys->recovered_lsn;
1889
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
1891
if (recv_sys->found_corrupt_log) {
1893
recv_report_corrupt_log(ptr,
1894
type, space, page_no);
1898
ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
1900
recv_sys->recovered_offset += len;
1901
recv_sys->recovered_lsn
1902
= recv_calc_lsn_on_data_add(old_lsn, len);
1903
if (type == MLOG_MULTI_REC_END) {
1905
/* Found the end mark for the records */
1910
if (store_to_hash) {
1911
recv_add_to_hash_table(type, space, page_no,
1924
/***********************************************************
1925
Adds data from a new log block to the parsing buffer of recv_sys if
1926
recv_sys->parse_start_lsn is non-zero. */
1929
recv_sys_add_to_parsing_buf(
1930
/*========================*/
1931
/* out: TRUE if more data added */
1932
byte* log_block, /* in: log block */
1933
dulint scanned_lsn) /* in: lsn of how far we were able to find
1934
data in this log block */
1941
ut_ad(ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) >= 0);
1943
if (ut_dulint_is_zero(recv_sys->parse_start_lsn)) {
1944
/* Cannot start parsing yet because no start point for
1950
data_len = log_block_get_data_len(log_block);
1952
if (ut_dulint_cmp(recv_sys->parse_start_lsn, scanned_lsn) >= 0) {
1956
} else if (ut_dulint_cmp(recv_sys->scanned_lsn, scanned_lsn) >= 0) {
1960
} else if (ut_dulint_cmp(recv_sys->parse_start_lsn,
1961
recv_sys->scanned_lsn) > 0) {
1962
more_len = ut_dulint_minus(scanned_lsn,
1963
recv_sys->parse_start_lsn);
1965
more_len = ut_dulint_minus(scanned_lsn, recv_sys->scanned_lsn);
1968
if (more_len == 0) {
1973
ut_ad(data_len >= more_len);
1975
start_offset = data_len - more_len;
1977
if (start_offset < LOG_BLOCK_HDR_SIZE) {
1978
start_offset = LOG_BLOCK_HDR_SIZE;
1981
end_offset = data_len;
1983
if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
1984
end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
1987
ut_ad(start_offset <= end_offset);
1989
if (start_offset < end_offset) {
1990
ut_memcpy(recv_sys->buf + recv_sys->len,
1991
log_block + start_offset, end_offset - start_offset);
1993
recv_sys->len += end_offset - start_offset;
1995
ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
2001
/***********************************************************
2002
Moves the parsing buffer data left to the buffer start. */
2005
recv_sys_justify_left_parsing_buf(void)
2006
/*===================================*/
2008
ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
2009
recv_sys->len - recv_sys->recovered_offset);
2011
recv_sys->len -= recv_sys->recovered_offset;
2013
recv_sys->recovered_offset = 0;
2016
/***********************************************************
2017
Scans log from a buffer and stores new log data to the parsing buffer. Parses
2018
and hashes the log records if new data found. */
2023
/* out: TRUE if limit_lsn has been reached, or
2024
not able to scan any more in this log group */
2025
ibool apply_automatically,/* in: TRUE if we want this function to
2026
apply log records automatically when the
2027
hash table becomes full; in the hot backup tool
2028
the tool does the applying, not this
2030
ulint available_memory,/* in: we let the hash table of recs to grow
2031
to this size, at the maximum */
2032
ibool store_to_hash, /* in: TRUE if the records should be stored
2033
to the hash table; this is set to FALSE if just
2034
debug checking is needed */
2035
byte* buf, /* in: buffer containing a log segment or
2037
ulint len, /* in: buffer length */
2038
dulint start_lsn, /* in: buffer start lsn */
2039
dulint* contiguous_lsn, /* in/out: it is known that all log groups
2040
contain contiguous log data up to this lsn */
2041
dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */
2050
ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
2051
ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
2053
ut_a(apply_automatically <= TRUE);
2054
ut_a(store_to_hash <= TRUE);
2059
scanned_lsn = start_lsn;
2062
while (log_block < buf + len && !finished) {
2064
no = log_block_get_hdr_no(log_block);
2066
fprintf(stderr, "Log block header no %lu\n", no);
2068
fprintf(stderr, "Scanned lsn no %lu\n",
2069
log_block_convert_lsn_to_no(scanned_lsn));
2071
if (no != log_block_convert_lsn_to_no(scanned_lsn)
2072
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
2074
if (no == log_block_convert_lsn_to_no(scanned_lsn)
2075
&& !log_block_checksum_is_ok_or_old_format(
2078
"InnoDB: Log block no %lu at"
2079
" lsn %lu %lu has\n"
2080
"InnoDB: ok header, but checksum field"
2081
" contains %lu, should be %lu\n",
2083
(ulong) ut_dulint_get_high(
2085
(ulong) ut_dulint_get_low(scanned_lsn),
2086
(ulong) log_block_get_checksum(
2088
(ulong) log_block_calc_checksum(
2092
/* Garbage or an incompletely written log block */
2099
if (log_block_get_flush_bit(log_block)) {
2100
/* This block was a start of a log flush operation:
2101
we know that the previous flush operation must have
2102
been completed for all log groups before this block
2103
can have been flushed to any of the groups. Therefore,
2104
we know that log data is contiguous up to scanned_lsn
2105
in all non-corrupt log groups. */
2107
if (ut_dulint_cmp(scanned_lsn, *contiguous_lsn) > 0) {
2108
*contiguous_lsn = scanned_lsn;
2112
data_len = log_block_get_data_len(log_block);
2114
if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
2115
&& (ut_dulint_cmp(ut_dulint_add(scanned_lsn, data_len),
2116
recv_sys->scanned_lsn) > 0)
2117
&& (recv_sys->scanned_checkpoint_no > 0)
2118
&& (log_block_get_checkpoint_no(log_block)
2119
< recv_sys->scanned_checkpoint_no)
2120
&& (recv_sys->scanned_checkpoint_no
2121
- log_block_get_checkpoint_no(log_block)
2124
/* Garbage from a log buffer flush which was made
2125
before the most recent database recovery */
2128
#ifdef UNIV_LOG_DEBUG
2129
/* This is not really an error, but currently
2130
we stop here in the debug version: */
2137
if (ut_dulint_is_zero(recv_sys->parse_start_lsn)
2138
&& (log_block_get_first_rec_group(log_block) > 0)) {
2140
/* We found a point from which to start the parsing
2143
recv_sys->parse_start_lsn
2144
= ut_dulint_add(scanned_lsn,
2145
log_block_get_first_rec_group(
2147
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
2148
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
2151
scanned_lsn = ut_dulint_add(scanned_lsn, data_len);
2153
if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) {
2155
/* We have found more entries. If this scan is
2156
of startup type, we must initiate crash recovery
2157
environment before parsing these log records. */
2159
if (recv_log_scan_is_startup_type
2160
&& !recv_needed_recovery) {
2163
"InnoDB: Log scan progressed"
2164
" past the checkpoint lsn %lu %lu\n",
2165
(ulong) ut_dulint_get_high(
2166
recv_sys->scanned_lsn),
2167
(ulong) ut_dulint_get_low(
2168
recv_sys->scanned_lsn));
2169
recv_init_crash_recovery();
2172
/* We were able to find more log data: add it to the
2173
parsing buffer if parse_start_lsn is already
2176
if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
2177
>= RECV_PARSING_BUF_SIZE) {
2179
"InnoDB: Error: log parsing"
2181
" Recovery may have failed!\n");
2183
recv_sys->found_corrupt_log = TRUE;
2185
} else if (!recv_sys->found_corrupt_log) {
2186
more_data = recv_sys_add_to_parsing_buf(
2187
log_block, scanned_lsn);
2190
recv_sys->scanned_lsn = scanned_lsn;
2191
recv_sys->scanned_checkpoint_no
2192
= log_block_get_checkpoint_no(log_block);
2195
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
2196
/* Log data for this group ends here */
2200
log_block += OS_FILE_LOG_BLOCK_SIZE;
2204
*group_scanned_lsn = scanned_lsn;
2206
if (recv_needed_recovery
2207
|| (recv_is_from_backup && !recv_is_making_a_backup)) {
2208
recv_scan_print_counter++;
2210
if (finished || (recv_scan_print_counter % 80 == 0)) {
2213
"InnoDB: Doing recovery: scanned up to"
2214
" log sequence number %lu %lu\n",
2215
(ulong) ut_dulint_get_high(*group_scanned_lsn),
2216
(ulong) ut_dulint_get_low(*group_scanned_lsn));
2220
if (more_data && !recv_sys->found_corrupt_log) {
2221
/* Try to parse more log records */
2223
recv_parse_log_recs(store_to_hash);
2225
if (store_to_hash && mem_heap_get_size(recv_sys->heap)
2227
&& apply_automatically) {
2229
/* Hash table of log records has grown too big:
2230
empty it; FALSE means no ibuf operations
2231
allowed, as we cannot add new records to the
2232
log yet: they would be produced by ibuf
2235
recv_apply_hashed_log_recs(FALSE);
2238
if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
2239
/* Move parsing buffer data to the buffer start */
2241
recv_sys_justify_left_parsing_buf();
2248
/***********************************************************
2249
Scans log from a buffer and stores new log data to the parsing buffer. Parses
2250
and hashes the log records if new data found. */
2253
recv_group_scan_log_recs(
2254
/*=====================*/
2255
log_group_t* group, /* in: log group */
2256
dulint* contiguous_lsn, /* in/out: it is known that all log groups
2257
contain contiguous log data up to this lsn */
2258
dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */
2266
start_lsn = *contiguous_lsn;
2269
end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
2271
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
2272
group, start_lsn, end_lsn);
2274
finished = recv_scan_log_recs(
2275
TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
2276
* UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE,
2277
start_lsn, contiguous_lsn, group_scanned_lsn);
2278
start_lsn = end_lsn;
2282
if (log_debug_writes) {
2284
"InnoDB: Scanned group %lu up to"
2285
" log sequence number %lu %lu\n",
2287
(ulong) ut_dulint_get_high(*group_scanned_lsn),
2288
(ulong) ut_dulint_get_low(*group_scanned_lsn));
2290
#endif /* UNIV_DEBUG */
2293
/***********************************************************
2294
Initialize crash recovery environment. Can be called iff
2295
recv_needed_recovery == FALSE. */
2298
recv_init_crash_recovery(void)
2299
/*==========================*/
2301
ut_a(!recv_needed_recovery);
2303
recv_needed_recovery = TRUE;
2305
ut_print_timestamp(stderr);
2308
" InnoDB: Database was not"
2309
" shut down normally!\n"
2310
"InnoDB: Starting crash recovery.\n");
2313
"InnoDB: Reading tablespace information"
2314
" from the .ibd files...\n");
2316
fil_load_single_table_tablespaces();
2318
/* If we are using the doublewrite method, we will
2319
check if there are half-written pages in data files,
2320
and restore them from the doublewrite buffer if
2323
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2326
"InnoDB: Restoring possible"
2327
" half-written data pages from"
2328
" the doublewrite\n"
2329
"InnoDB: buffer...\n");
2330
trx_sys_doublewrite_init_or_restore_pages(TRUE);
2334
/************************************************************
2335
Recovers from a checkpoint. When this function returns, the database is able
2336
to start processing of new user transactions, but the function
2337
recv_recovery_from_checkpoint_finish should be called later to complete
2338
the recovery and free the resources used in it. */
2341
recv_recovery_from_checkpoint_start(
2342
/*================================*/
2343
/* out: error code or DB_SUCCESS */
2344
ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */
2345
dulint limit_lsn, /* in: recover up to this lsn if possible */
2346
dulint min_flushed_lsn,/* in: min flushed lsn from data files */
2347
dulint max_flushed_lsn)/* in: max flushed lsn from data files */
2350
log_group_t* max_cp_group;
2351
log_group_t* up_to_date_group;
2353
dulint checkpoint_lsn;
2354
dulint checkpoint_no;
2355
dulint old_scanned_lsn;
2356
dulint group_scanned_lsn;
2357
dulint contiguous_lsn;
2358
dulint archived_lsn;
2361
byte log_hdr_buf[LOG_FILE_HDR_SIZE];
2364
ut_ad((type != LOG_CHECKPOINT)
2365
|| (ut_dulint_cmp(limit_lsn, ut_dulint_max) == 0));
2367
if (type == LOG_CHECKPOINT) {
2369
recv_sys_init(FALSE, buf_pool_get_curr_size());
2372
if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
2374
"InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n");
2376
"InnoDB: Skipping log redo\n");
2381
recv_recovery_on = TRUE;
2383
recv_sys->limit_lsn = limit_lsn;
2385
mutex_enter(&(log_sys->mutex));
2387
/* Look for the latest checkpoint from any of the log groups */
2389
err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
2391
if (err != DB_SUCCESS) {
2393
mutex_exit(&(log_sys->mutex));
2398
log_group_read_checkpoint_info(max_cp_group, max_cp_field);
2400
buf = log_sys->checkpoint_buf;
2402
checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
2403
checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
2404
archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
2406
/* Read the first log file header to print a note if this is
2407
a recovery from a restored InnoDB Hot Backup */
2409
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id,
2410
0, 0, LOG_FILE_HDR_SIZE,
2411
log_hdr_buf, max_cp_group);
2413
if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
2414
(byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
2415
/* This log file was created by ibbackup --restore: print
2416
a note to the user about it */
2419
"InnoDB: The log file was created by"
2420
" ibbackup --apply-log at\n"
2422
log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
2424
"InnoDB: NOTE: the following crash recovery"
2425
" is part of a normal restore.\n");
2427
/* Wipe over the label now */
2429
memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
2431
/* Write to the log file to wipe over the label */
2432
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
2433
max_cp_group->space_id,
2434
0, 0, OS_FILE_LOG_BLOCK_SIZE,
2435
log_hdr_buf, max_cp_group);
2438
#ifdef UNIV_LOG_ARCHIVE
2439
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2442
log_checkpoint_get_nth_group_info(buf, group->id,
2443
&(group->archived_file_no),
2444
&(group->archived_offset));
2446
group = UT_LIST_GET_NEXT(log_groups, group);
2448
#endif /* UNIV_LOG_ARCHIVE */
2450
if (type == LOG_CHECKPOINT) {
2451
/* Start reading the log groups from the checkpoint lsn up. The
2452
variable contiguous_lsn contains an lsn up to which the log is
2453
known to be contiguously written to all log groups. */
2455
recv_sys->parse_start_lsn = checkpoint_lsn;
2456
recv_sys->scanned_lsn = checkpoint_lsn;
2457
recv_sys->scanned_checkpoint_no = 0;
2458
recv_sys->recovered_lsn = checkpoint_lsn;
2460
srv_start_lsn = checkpoint_lsn;
2463
contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn,
2464
OS_FILE_LOG_BLOCK_SIZE);
2465
if (type == LOG_ARCHIVE) {
2466
/* Try to recover the remaining part from logs: first from
2467
the logs of the archived group */
2469
group = recv_sys->archive_group;
2470
capacity = log_group_get_capacity(group);
2472
if ((ut_dulint_cmp(recv_sys->scanned_lsn, ut_dulint_add(
2473
checkpoint_lsn, capacity)) > 0)
2474
|| (ut_dulint_cmp(checkpoint_lsn, ut_dulint_add(
2475
recv_sys->scanned_lsn, capacity))
2478
mutex_exit(&(log_sys->mutex));
2480
/* The group does not contain enough log: probably
2481
an archived log file was missing or corrupt */
2486
recv_group_scan_log_recs(group, &contiguous_lsn,
2487
&group_scanned_lsn);
2488
if (ut_dulint_cmp(recv_sys->scanned_lsn, checkpoint_lsn) < 0) {
2490
mutex_exit(&(log_sys->mutex));
2492
/* The group did not contain enough log: an archived
2493
log file was missing or invalid, or the log group
2499
group->scanned_lsn = group_scanned_lsn;
2500
up_to_date_group = group;
2502
up_to_date_group = max_cp_group;
2505
ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
2507
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2509
if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
2510
group = UT_LIST_GET_NEXT(log_groups, group);
2513
/* Set the flag to publish that we are doing startup scan. */
2514
recv_log_scan_is_startup_type = (type == LOG_CHECKPOINT);
2516
old_scanned_lsn = recv_sys->scanned_lsn;
2518
recv_group_scan_log_recs(group, &contiguous_lsn,
2519
&group_scanned_lsn);
2520
group->scanned_lsn = group_scanned_lsn;
2522
if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) {
2523
/* We found a more up-to-date group */
2525
up_to_date_group = group;
2528
if ((type == LOG_ARCHIVE)
2529
&& (group == recv_sys->archive_group)) {
2530
group = UT_LIST_GET_NEXT(log_groups, group);
2533
group = UT_LIST_GET_NEXT(log_groups, group);
2536
/* Done with startup scan. Clear the flag. */
2537
recv_log_scan_is_startup_type = FALSE;
2538
if (type == LOG_CHECKPOINT) {
2539
/* NOTE: we always do a 'recovery' at startup, but only if
2540
there is something wrong we will print a message to the
2541
user about recovery: */
2543
if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
2544
|| ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
2546
if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn)
2549
"InnoDB: #########################"
2550
"#################################\n"
2553
"InnoDB: The log sequence number"
2554
" in ibdata files is higher\n"
2555
"InnoDB: than the log sequence number"
2556
" in the ib_logfiles! Are you sure\n"
2557
"InnoDB: you are using the right"
2558
" ib_logfiles to start up"
2560
"InnoDB: Log sequence number in"
2561
" ib_logfiles is %lu %lu, log\n"
2562
"InnoDB: sequence numbers stamped"
2563
" to ibdata file headers are between\n"
2564
"InnoDB: %lu %lu and %lu %lu.\n"
2565
"InnoDB: #########################"
2566
"#################################\n",
2567
(ulong) ut_dulint_get_high(
2569
(ulong) ut_dulint_get_low(
2571
(ulong) ut_dulint_get_high(
2573
(ulong) ut_dulint_get_low(
2575
(ulong) ut_dulint_get_high(
2577
(ulong) ut_dulint_get_low(
2583
if (!recv_needed_recovery) {
2585
"InnoDB: The log sequence number"
2586
" in ibdata files does not match\n"
2587
"InnoDB: the log sequence number"
2588
" in the ib_logfiles!\n");
2589
recv_init_crash_recovery();
2593
if (!recv_needed_recovery) {
2594
/* Init the doublewrite buffer memory structure */
2595
trx_sys_doublewrite_init_or_restore_pages(FALSE);
2599
/* We currently have only one log group */
2600
if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) {
2601
ut_print_timestamp(stderr);
2603
" InnoDB: ERROR: We were only able to scan the log"
2605
"InnoDB: %lu %lu, but a checkpoint was at %lu %lu.\n"
2606
"InnoDB: It is possible that"
2607
" the database is now corrupt!\n",
2608
(ulong) ut_dulint_get_high(group_scanned_lsn),
2609
(ulong) ut_dulint_get_low(group_scanned_lsn),
2610
(ulong) ut_dulint_get_high(checkpoint_lsn),
2611
(ulong) ut_dulint_get_low(checkpoint_lsn));
2614
if (ut_dulint_cmp(group_scanned_lsn, recv_max_page_lsn) < 0) {
2615
ut_print_timestamp(stderr);
2617
" InnoDB: ERROR: We were only able to scan the log"
2619
"InnoDB: but a database page a had an lsn %lu %lu."
2620
" It is possible that the\n"
2621
"InnoDB: database is now corrupt!\n",
2622
(ulong) ut_dulint_get_high(group_scanned_lsn),
2623
(ulong) ut_dulint_get_low(group_scanned_lsn),
2624
(ulong) ut_dulint_get_high(recv_max_page_lsn),
2625
(ulong) ut_dulint_get_low(recv_max_page_lsn));
2628
if (ut_dulint_cmp(recv_sys->recovered_lsn, checkpoint_lsn) < 0) {
2630
mutex_exit(&(log_sys->mutex));
2632
if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) >= 0) {
2642
/* Synchronize the uncorrupted log groups to the most up-to-date log
2643
group; we also copy checkpoint info to groups */
2645
log_sys->next_checkpoint_lsn = checkpoint_lsn;
2646
log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
2648
#ifdef UNIV_LOG_ARCHIVE
2649
log_sys->archived_lsn = archived_lsn;
2650
#endif /* UNIV_LOG_ARCHIVE */
2652
recv_synchronize_groups(up_to_date_group);
2654
if (!recv_needed_recovery) {
2655
ut_a(ut_dulint_cmp(checkpoint_lsn,
2656
recv_sys->recovered_lsn) == 0);
2659
srv_start_lsn = recv_sys->recovered_lsn;
2662
log_sys->lsn = recv_sys->recovered_lsn;
2664
ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
2666
log_sys->buf_free = ut_dulint_get_low(log_sys->lsn)
2667
% OS_FILE_LOG_BLOCK_SIZE;
2668
log_sys->buf_next_to_write = log_sys->buf_free;
2669
log_sys->written_to_some_lsn = log_sys->lsn;
2670
log_sys->written_to_all_lsn = log_sys->lsn;
2672
log_sys->last_checkpoint_lsn = checkpoint_lsn;
2674
log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
2676
#ifdef UNIV_LOG_ARCHIVE
2677
if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
2679
log_sys->archiving_state = LOG_ARCH_OFF;
2681
#endif /* UNIV_LOG_ARCHIVE */
2683
mutex_enter(&(recv_sys->mutex));
2685
recv_sys->apply_log_recs = TRUE;
2687
mutex_exit(&(recv_sys->mutex));
2689
mutex_exit(&(log_sys->mutex));
2691
recv_lsn_checks_on = TRUE;
2693
/* The database is now ready to start almost normal processing of user
2694
transactions: transaction rollbacks and the application of the log
2695
records in the hash table can be run in background. */
2700
/************************************************************
2701
Completes recovery from a checkpoint. */
2704
recv_recovery_from_checkpoint_finish(void)
2705
/*======================================*/
2709
/* Apply the hashed log records to the respective file pages */
2711
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2713
recv_apply_hashed_log_recs(TRUE);
2717
if (log_debug_writes) {
2719
"InnoDB: Log records applied to the database\n");
2721
#endif /* UNIV_DEBUG */
2723
if (recv_needed_recovery) {
2724
trx_sys_print_mysql_master_log_pos();
2725
trx_sys_print_mysql_binlog_offset();
2728
if (recv_sys->found_corrupt_log) {
2731
"InnoDB: WARNING: the log file may have been"
2733
"InnoDB: is possible that the log scan or parsing"
2734
" did not proceed\n"
2735
"InnoDB: far enough in recovery. Please run"
2737
"InnoDB: on your InnoDB tables to check that"
2739
"InnoDB: It may be safest to recover your"
2740
" InnoDB database from\n"
2741
"InnoDB: a backup!\n");
2744
/* Free the resources of the recovery system */
2746
recv_recovery_on = FALSE;
2748
#ifndef UNIV_LOG_DEBUG
2752
#ifdef UNIV_SYNC_DEBUG
2753
/* Wait for a while so that created threads have time to suspend
2754
themselves before we switch the latching order checks on */
2755
os_thread_sleep(1000000);
2757
/* Switch latching order checks on in sync0sync.c */
2758
sync_order_checks_on = TRUE;
2760
if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
2761
/* Rollback the uncommitted transactions which have no user
2764
os_thread_create(trx_rollback_or_clean_all_without_sess,
2769
/**********************************************************
2770
Resets the logs. The contents of log files will be lost! */
2775
dulint lsn, /* in: reset to this lsn rounded up to
2776
be divisible by OS_FILE_LOG_BLOCK_SIZE,
2777
after which we add LOG_BLOCK_HDR_SIZE */
2778
#ifdef UNIV_LOG_ARCHIVE
2779
ulint arch_log_no, /* in: next archived log file number */
2780
#endif /* UNIV_LOG_ARCHIVE */
2781
ibool new_logs_created)/* in: TRUE if resetting logs is done
2782
at the log creation; FALSE if it is done
2783
after archive recovery */
2787
ut_ad(mutex_own(&(log_sys->mutex)));
2789
log_sys->lsn = ut_dulint_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
2791
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2794
group->lsn = log_sys->lsn;
2795
group->lsn_offset = LOG_FILE_HDR_SIZE;
2796
#ifdef UNIV_LOG_ARCHIVE
2797
group->archived_file_no = arch_log_no;
2798
group->archived_offset = 0;
2799
#endif /* UNIV_LOG_ARCHIVE */
2801
if (!new_logs_created) {
2802
recv_truncate_group(group, group->lsn, group->lsn,
2803
group->lsn, group->lsn);
2806
group = UT_LIST_GET_NEXT(log_groups, group);
2809
log_sys->buf_next_to_write = 0;
2810
log_sys->written_to_some_lsn = log_sys->lsn;
2811
log_sys->written_to_all_lsn = log_sys->lsn;
2813
log_sys->next_checkpoint_no = ut_dulint_zero;
2814
log_sys->last_checkpoint_lsn = ut_dulint_zero;
2816
#ifdef UNIV_LOG_ARCHIVE
2817
log_sys->archived_lsn = log_sys->lsn;
2818
#endif /* UNIV_LOG_ARCHIVE */
2820
log_block_init(log_sys->buf, log_sys->lsn);
2821
log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
2823
log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
2824
log_sys->lsn = ut_dulint_add(log_sys->lsn, LOG_BLOCK_HDR_SIZE);
2826
mutex_exit(&(log_sys->mutex));
2828
/* Reset the checkpoint fields in logs */
2830
log_make_checkpoint_at(ut_dulint_max, TRUE);
2831
log_make_checkpoint_at(ut_dulint_max, TRUE);
2833
mutex_enter(&(log_sys->mutex));
2836
#ifdef UNIV_HOTBACKUP
2837
/**********************************************************
2838
Creates new log files after a backup has been restored. */
2841
recv_reset_log_files_for_backup(
2842
/*============================*/
2843
const char* log_dir, /* in: log file directory path */
2844
ulint n_log_files, /* in: number of log files */
2845
ulint log_file_size, /* in: log file size */
2846
dulint lsn) /* in: new start lsn, must be
2847
divisible by OS_FILE_LOG_BLOCK_SIZE */
2855
static const char ib_logfile_basename[] = "ib_logfile";
2857
log_dir_len = strlen(log_dir);
2858
/* full path name of ib_logfile consists of log dir path + basename
2859
+ number. This must fit in the name buffer.
2861
ut_a(log_dir_len + strlen(ib_logfile_basename) + 11 < sizeof(name));
2863
buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
2864
memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
2866
for (i = 0; i < n_log_files; i++) {
2868
sprintf(name, "%s%s%lu", log_dir,
2869
ib_logfile_basename, (ulong)i);
2871
log_file = os_file_create_simple(name, OS_FILE_CREATE,
2872
OS_FILE_READ_WRITE, &success);
2875
"InnoDB: Cannot create %s. Check that"
2876
" the file does not exist yet.\n", name);
2882
"Setting log file size to %lu %lu\n",
2883
(ulong) ut_get_high32(log_file_size),
2884
(ulong) log_file_size & 0xFFFFFFFFUL);
2886
success = os_file_set_size(name, log_file,
2887
log_file_size & 0xFFFFFFFFUL,
2888
ut_get_high32(log_file_size));
2892
"InnoDB: Cannot set %s size to %lu %lu\n",
2893
name, (ulong) ut_get_high32(log_file_size),
2894
(ulong) (log_file_size & 0xFFFFFFFFUL));
2898
os_file_flush(log_file);
2899
os_file_close(log_file);
2902
/* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
2904
log_reset_first_header_and_checkpoint(buf, lsn);
2906
log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
2907
log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
2908
LOG_BLOCK_HDR_SIZE);
2909
sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
2911
log_file = os_file_create_simple(name, OS_FILE_OPEN,
2912
OS_FILE_READ_WRITE, &success);
2914
fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
2919
os_file_write(name, log_file, buf, 0, 0,
2920
LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
2921
os_file_flush(log_file);
2922
os_file_close(log_file);
2926
#endif /* UNIV_HOTBACKUP */
2928
#ifdef UNIV_LOG_ARCHIVE
2929
/**********************************************************
2930
Reads from the archive of a log group and performs recovery. */
2933
log_group_recover_from_archive_file(
2934
/*================================*/
2935
/* out: TRUE if no more complete
2936
consistent archive files */
2937
log_group_t* group) /* in: log group */
2939
os_file_t file_handle;
2941
dulint file_end_lsn;
2949
ulint file_size_high;
2958
/* Add the file to the archive file space; open the file */
2960
log_archived_file_name_gen(name, group->id, group->archived_file_no);
2962
file_handle = os_file_create(name, OS_FILE_OPEN,
2963
OS_FILE_LOG, OS_FILE_AIO, &ret);
2968
"InnoDB: Do you want to copy additional"
2969
" archived log files\n"
2970
"InnoDB: to the directory\n");
2972
"InnoDB: or were these all the files needed"
2975
"InnoDB: (Y == copy more files; N == this is all)?");
2977
input_char = getchar();
2979
if (input_char == (int) 'N') {
2982
} else if (input_char == (int) 'Y') {
2984
goto try_open_again;
2990
ret = os_file_get_size(file_handle, &file_size, &file_size_high);
2993
ut_a(file_size_high == 0);
2995
fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
2997
ret = os_file_close(file_handle);
2999
if (file_size < LOG_FILE_HDR_SIZE) {
3001
"InnoDB: Archive file header incomplete %s\n", name);
3008
/* Add the archive file as a node to the space */
3010
fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
3011
group->archive_space_id, FALSE);
3012
#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
3013
# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
3016
/* Read the archive file header */
3017
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
3018
LOG_FILE_HDR_SIZE, buf, NULL);
3020
/* Check if the archive file header is consistent */
3022
if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
3023
|| mach_read_from_4(buf + LOG_FILE_NO)
3024
!= group->archived_file_no) {
3026
"InnoDB: Archive file header inconsistent %s\n", name);
3031
if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
3033
"InnoDB: Archive file not completely written %s\n",
3039
start_lsn = mach_read_from_8(buf + LOG_FILE_START_LSN);
3040
file_end_lsn = mach_read_from_8(buf + LOG_FILE_END_LSN);
3042
if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
3044
if (ut_dulint_cmp(recv_sys->parse_start_lsn, start_lsn) < 0) {
3046
"InnoDB: Archive log file %s"
3047
" starts from too big a lsn\n",
3052
recv_sys->scanned_lsn = start_lsn;
3055
if (ut_dulint_cmp(recv_sys->scanned_lsn, start_lsn) != 0) {
3058
"InnoDB: Archive log file %s starts from"
3064
read_offset = LOG_FILE_HDR_SIZE;
3067
len = RECV_SCAN_SIZE;
3069
if (read_offset + len > file_size) {
3070
len = ut_calc_align_down(file_size - read_offset,
3071
OS_FILE_LOG_BLOCK_SIZE);
3080
if (log_debug_writes) {
3082
"InnoDB: Archive read starting at"
3083
" lsn %lu %lu, len %lu from file %s\n",
3084
(ulong) ut_dulint_get_high(start_lsn),
3085
(ulong) ut_dulint_get_low(start_lsn),
3088
#endif /* UNIV_DEBUG */
3090
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
3091
group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
3092
read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
3094
ret = recv_scan_log_recs(
3095
TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
3096
* UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
3097
&dummy_lsn, &scanned_lsn);
3099
if (ut_dulint_cmp(scanned_lsn, file_end_lsn) == 0) {
3106
"InnoDB: Archive log file %s"
3107
" does not scan right\n",
3113
start_lsn = ut_dulint_add(start_lsn, len);
3115
ut_ad(ut_dulint_cmp(start_lsn, scanned_lsn) == 0);
3121
/************************************************************
3122
Recovers from archived log files, and also from log files, if they exist. */
3125
recv_recovery_from_archive_start(
3126
/*=============================*/
3127
/* out: error code or DB_SUCCESS */
3128
dulint min_flushed_lsn,/* in: min flushed lsn field from the
3130
dulint limit_lsn, /* in: recover up to this lsn if possible */
3131
ulint first_log_no) /* in: number of the first archived log file
3132
to use in the recovery; the file will be
3133
searched from INNOBASE_LOG_ARCH_DIR specified
3134
in server config file */
3145
recv_sys_init(FALSE, buf_pool_get_curr_size());
3147
recv_recovery_on = TRUE;
3148
recv_recovery_from_backup_on = TRUE;
3150
recv_sys->limit_lsn = limit_lsn;
3154
group = UT_LIST_GET_FIRST(log_sys->log_groups);
3157
if (group->id == group_id) {
3162
group = UT_LIST_GET_NEXT(log_groups, group);
3167
"InnoDB: There is no log group defined with id %lu!\n",
3172
group->archived_file_no = first_log_no;
3174
recv_sys->parse_start_lsn = min_flushed_lsn;
3176
recv_sys->scanned_lsn = ut_dulint_zero;
3177
recv_sys->scanned_checkpoint_no = 0;
3178
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
3180
recv_sys->archive_group = group;
3184
mutex_enter(&(log_sys->mutex));
3187
ret = log_group_recover_from_archive_file(group);
3189
/* Close and truncate a possible processed archive file
3190
from the file space */
3192
trunc_len = UNIV_PAGE_SIZE
3193
* fil_space_get_size(group->archive_space_id);
3194
if (trunc_len > 0) {
3195
fil_space_truncate_start(group->archive_space_id,
3199
group->archived_file_no++;
3202
if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) < 0) {
3204
if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
3206
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
3209
mutex_exit(&(log_sys->mutex));
3211
err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
3215
if (err != DB_SUCCESS) {
3220
mutex_enter(&(log_sys->mutex));
3223
if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
3225
recv_apply_hashed_log_recs(FALSE);
3227
recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
3230
mutex_exit(&(log_sys->mutex));
3235
/************************************************************
3236
Completes recovery from archive. */
3239
recv_recovery_from_archive_finish(void)
3240
/*===================================*/
3242
recv_recovery_from_checkpoint_finish();
3244
recv_recovery_from_backup_on = FALSE;
3246
#endif /* UNIV_LOG_ARCHIVE */