1
/******************************************************
6
Created 9/20/1997 Heikki Tuuri
7
*******************************************************/
12
#include "log0recv.ic"
20
#include "srv0start.h"
23
#include "page0page.h"
27
#include "ibuf0ibuf.h"
34
#include "dict0boot.h"
36
#include "sync0sync.h"
39
/* This is set to FALSE if the backup was originally taken with the
40
ibbackup --include regexp option: then we do not want to create tables in
41
directories which were not included */
42
ibool recv_replay_file_ops = TRUE;
43
#endif /* UNIV_HOTBACKUP */
45
/* Log records are stored in the hash table in chunks at most of this size;
46
this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
47
#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
49
/* Read-ahead area in applying log records to file pages */
50
#define RECV_READ_AHEAD_AREA 32
52
recv_sys_t* recv_sys = NULL;
53
ibool recv_recovery_on = FALSE;
54
ibool recv_recovery_from_backup_on = FALSE;
56
ibool recv_needed_recovery = FALSE;
58
ibool recv_lsn_checks_on = FALSE;
60
/* There are two conditions under which we scan the logs, the first
61
is normal startup and the second is when we do a recovery from an
63
This flag is set if we are doing a scan from the last checkpoint during
64
startup. If we find log entries that were written after the last checkpoint
65
we know that the server was not cleanly shutdown. We must then initialize
66
the crash recovery environment before attempting to store these entries in
67
the log hash table. */
68
ibool recv_log_scan_is_startup_type = FALSE;
70
/* If the following is TRUE, the buffer pool file pages must be invalidated
71
after recovery and no ibuf operations are allowed; this becomes TRUE if
72
the log record hash table becomes too full, and log records must be merged
73
to file pages already before the recovery is finished: in this case no
74
ibuf operations are allowed, as they could modify the pages read in the
75
buffer pool before the pages have been recovered to the up-to-date state */
77
/* Recovery is running and no operations on the log files are allowed
78
yet: the variable name is misleading */
80
ibool recv_no_ibuf_operations = FALSE;
82
/* The following counter is used to decide when to print info on
84
ulint recv_scan_print_counter = 0;
86
ibool recv_is_from_backup = FALSE;
88
ibool recv_is_making_a_backup = FALSE;
90
# define recv_is_making_a_backup FALSE
91
#endif /* UNIV_HOTBACKUP */
93
ulint recv_previous_parsed_rec_type = 999999;
94
ulint recv_previous_parsed_rec_offset = 0;
95
ulint recv_previous_parsed_rec_is_multi = 0;
97
ulint recv_max_parsed_page_no = 0;
99
/* This many frames must be left free in the buffer pool when we scan
100
the log and store the scanned log records in the buffer pool: we will
101
use these free frames to read in pages when we start applying the
102
log records to the database. */
104
ulint recv_n_pool_free_frames = 256;
106
/* The maximum lsn we see for a page during the recovery process. If this
107
is bigger than the lsn we are able to scan up to, that is an indication that
108
the recovery failed and the database may be corrupt. */
110
dulint recv_max_page_lsn;
114
/***********************************************************
115
Initialize crash recovery environment. Can be called iff
116
recv_needed_recovery == FALSE. */
119
recv_init_crash_recovery(void);
120
/*===========================*/
122
/************************************************************
123
Creates the recovery system. */
126
recv_sys_create(void)
127
/*=================*/
129
if (recv_sys != NULL) {
134
recv_sys = mem_alloc(sizeof(recv_sys_t));
136
mutex_create(&recv_sys->mutex, SYNC_RECV);
138
recv_sys->heap = NULL;
139
recv_sys->addr_hash = NULL;
142
/************************************************************
143
Inits the recovery system for a recovery operation. */
148
ibool recover_from_backup, /* in: TRUE if this is called
149
to recover from a hot backup */
150
ulint available_memory) /* in: available memory in bytes */
152
if (recv_sys->heap != NULL) {
157
mutex_enter(&(recv_sys->mutex));
159
if (!recover_from_backup) {
160
recv_sys->heap = mem_heap_create_in_buffer(256);
162
recv_sys->heap = mem_heap_create(256);
163
recv_is_from_backup = TRUE;
166
recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
168
recv_sys->recovered_offset = 0;
170
recv_sys->addr_hash = hash_create(available_memory / 64);
171
recv_sys->n_addrs = 0;
173
recv_sys->apply_log_recs = FALSE;
174
recv_sys->apply_batch_on = FALSE;
176
recv_sys->last_block_buf_start = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
178
recv_sys->last_block = ut_align(recv_sys->last_block_buf_start,
179
OS_FILE_LOG_BLOCK_SIZE);
180
recv_sys->found_corrupt_log = FALSE;
182
recv_max_page_lsn = ut_dulint_zero;
184
mutex_exit(&(recv_sys->mutex));
187
/************************************************************
188
Empties the hash table when it has been fully processed. */
191
recv_sys_empty_hash(void)
192
/*=====================*/
194
ut_ad(mutex_own(&(recv_sys->mutex)));
196
if (recv_sys->n_addrs != 0) {
198
"InnoDB: Error: %lu pages with log records"
199
" were left unprocessed!\n"
200
"InnoDB: Maximum page number with"
201
" log records on it %lu\n",
202
(ulong) recv_sys->n_addrs,
203
(ulong) recv_max_parsed_page_no);
207
hash_table_free(recv_sys->addr_hash);
208
mem_heap_empty(recv_sys->heap);
210
recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
213
#ifndef UNIV_LOG_DEBUG
214
/************************************************************
215
Frees the recovery system. */
221
mutex_enter(&(recv_sys->mutex));
223
hash_table_free(recv_sys->addr_hash);
224
mem_heap_free(recv_sys->heap);
225
ut_free(recv_sys->buf);
226
mem_free(recv_sys->last_block_buf_start);
228
recv_sys->addr_hash = NULL;
229
recv_sys->heap = NULL;
231
mutex_exit(&(recv_sys->mutex));
233
#endif /* UNIV_LOG_DEBUG */
235
/************************************************************
236
Truncates possible corrupted or extra records from a log group. */
241
log_group_t* group, /* in: log group */
242
dulint recovered_lsn, /* in: recovery succeeded up to this
244
dulint limit_lsn, /* in: this was the limit for
246
dulint checkpoint_lsn, /* in: recovery was started from this
248
dulint archived_lsn) /* in: the log has been archived up to
259
if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
260
/* Checkpoint was taken in the NOARCHIVELOG mode */
261
archived_lsn = checkpoint_lsn;
264
finish_lsn1 = ut_dulint_add(ut_dulint_align_down(
266
OS_FILE_LOG_BLOCK_SIZE),
267
log_group_get_capacity(group));
269
finish_lsn2 = ut_dulint_add(ut_dulint_align_up(
271
OS_FILE_LOG_BLOCK_SIZE),
272
recv_sys->last_log_buf_size);
274
if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
275
/* We do not know how far we should erase log records: erase
276
as much as possible */
278
finish_lsn = finish_lsn1;
280
/* It is enough to erase the length of the log buffer */
281
finish_lsn = ut_dulint_get_min(finish_lsn1, finish_lsn2);
284
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
286
/* Write the log buffer full of zeros */
287
for (i = 0; i < RECV_SCAN_SIZE; i++) {
289
*(log_sys->buf + i) = '\0';
292
start_lsn = ut_dulint_align_down(recovered_lsn,
293
OS_FILE_LOG_BLOCK_SIZE);
295
if (ut_dulint_cmp(start_lsn, recovered_lsn) != 0) {
296
/* Copy the last incomplete log block to the log buffer and
297
edit its data length: */
299
ut_memcpy(log_sys->buf, recv_sys->last_block,
300
OS_FILE_LOG_BLOCK_SIZE);
301
log_block_set_data_len(log_sys->buf, ut_dulint_minus(
302
recovered_lsn, start_lsn));
305
if (ut_dulint_cmp(start_lsn, finish_lsn) >= 0) {
311
end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
313
if (ut_dulint_cmp(end_lsn, finish_lsn) > 0) {
315
end_lsn = finish_lsn;
318
len = ut_dulint_minus(end_lsn, start_lsn);
320
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
321
if (ut_dulint_cmp(end_lsn, finish_lsn) >= 0) {
326
/* Write the log buffer full of zeros */
327
for (i = 0; i < RECV_SCAN_SIZE; i++) {
329
*(log_sys->buf + i) = '\0';
336
/************************************************************
337
Copies the log segment between group->recovered_lsn and recovered_lsn from the
338
most up-to-date log group to group, so that it contains the latest log data. */
343
log_group_t* up_to_date_group, /* in: the most up-to-date log
345
log_group_t* group, /* in: copy to this log
347
dulint recovered_lsn) /* in: recovery succeeded up
354
if (ut_dulint_cmp(group->scanned_lsn, recovered_lsn) >= 0) {
359
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
361
start_lsn = ut_dulint_align_down(group->scanned_lsn,
362
OS_FILE_LOG_BLOCK_SIZE);
364
end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
366
if (ut_dulint_cmp(end_lsn, recovered_lsn) > 0) {
367
end_lsn = ut_dulint_align_up(recovered_lsn,
368
OS_FILE_LOG_BLOCK_SIZE);
371
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
372
up_to_date_group, start_lsn, end_lsn);
374
len = ut_dulint_minus(end_lsn, start_lsn);
376
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
378
if (ut_dulint_cmp(end_lsn, recovered_lsn) >= 0) {
387
/************************************************************
388
Copies a log segment from the most up-to-date log group to the other log
389
groups, so that they all contain the latest log data. Also writes the info
390
about the latest checkpoint to the groups, and inits the fields in the group
391
memory structs to up-to-date values. */
394
recv_synchronize_groups(
395
/*====================*/
396
log_group_t* up_to_date_group) /* in: the most up-to-date
402
dulint recovered_lsn;
405
recovered_lsn = recv_sys->recovered_lsn;
406
limit_lsn = recv_sys->limit_lsn;
408
/* Read the last recovered log block to the recovery system buffer:
409
the block is always incomplete */
411
start_lsn = ut_dulint_align_down(recovered_lsn,
412
OS_FILE_LOG_BLOCK_SIZE);
413
end_lsn = ut_dulint_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
415
ut_a(ut_dulint_cmp(start_lsn, end_lsn) != 0);
417
log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
418
up_to_date_group, start_lsn, end_lsn);
420
group = UT_LIST_GET_FIRST(log_sys->log_groups);
423
if (group != up_to_date_group) {
425
/* Copy log data if needed */
427
recv_copy_group(group, up_to_date_group,
431
/* Update the fields in the group struct to correspond to
434
log_group_set_fields(group, recovered_lsn);
436
group = UT_LIST_GET_NEXT(log_groups, group);
439
/* Copy the checkpoint info to the groups; remember that we have
440
incremented checkpoint_no by one, and the info will not be written
441
over the max checkpoint info, thus making the preservation of max
442
checkpoint info on disk certain */
444
log_groups_write_checkpoint_info();
446
mutex_exit(&(log_sys->mutex));
448
/* Wait for the checkpoint write to complete */
449
rw_lock_s_lock(&(log_sys->checkpoint_lock));
450
rw_lock_s_unlock(&(log_sys->checkpoint_lock));
452
mutex_enter(&(log_sys->mutex));
455
/***************************************************************************
456
Checks the consistency of the checkpoint info */
459
recv_check_cp_is_consistent(
460
/*========================*/
461
/* out: TRUE if ok */
462
byte* buf) /* in: buffer containing checkpoint info */
466
fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
468
if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
469
buf + LOG_CHECKPOINT_CHECKSUM_1)) {
473
fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
474
LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
476
if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
477
buf + LOG_CHECKPOINT_CHECKSUM_2)) {
484
/************************************************************
485
Looks for the maximum consistent checkpoint from the log groups. */
488
recv_find_max_checkpoint(
489
/*=====================*/
490
/* out: error code or DB_SUCCESS */
491
log_group_t** max_group, /* out: max group */
492
ulint* max_field) /* out: LOG_CHECKPOINT_1 or
497
dulint checkpoint_no;
501
group = UT_LIST_GET_FIRST(log_sys->log_groups);
503
max_no = ut_dulint_zero;
507
buf = log_sys->checkpoint_buf;
510
group->state = LOG_GROUP_CORRUPTED;
512
for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
513
field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
515
log_group_read_checkpoint_info(group, field);
517
if (!recv_check_cp_is_consistent(buf)) {
519
if (log_debug_writes) {
521
"InnoDB: Checkpoint in group"
522
" %lu at %lu invalid, %lu\n",
525
(ulong) mach_read_from_4(
527
+ LOG_CHECKPOINT_CHECKSUM_1));
530
#endif /* UNIV_DEBUG */
534
group->state = LOG_GROUP_OK;
536
group->lsn = mach_read_from_8(
537
buf + LOG_CHECKPOINT_LSN);
538
group->lsn_offset = mach_read_from_4(
539
buf + LOG_CHECKPOINT_OFFSET);
540
checkpoint_no = mach_read_from_8(
541
buf + LOG_CHECKPOINT_NO);
544
if (log_debug_writes) {
546
"InnoDB: Checkpoint number %lu"
547
" found in group %lu\n",
548
(ulong) ut_dulint_get_low(
552
#endif /* UNIV_DEBUG */
554
if (ut_dulint_cmp(checkpoint_no, max_no) >= 0) {
557
max_no = checkpoint_no;
564
group = UT_LIST_GET_NEXT(log_groups, group);
567
if (*max_group == NULL) {
570
"InnoDB: No valid checkpoint found.\n"
571
"InnoDB: If this error appears when you are"
572
" creating an InnoDB database,\n"
573
"InnoDB: the problem may be that during"
574
" an earlier attempt you managed\n"
575
"InnoDB: to create the InnoDB data files,"
576
" but log file creation failed.\n"
577
"InnoDB: If that is the case, please refer to\n"
578
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
579
"error-creating-innodb.html\n");
586
/***********************************************************************
587
Reads the checkpoint info needed in hot backup. */
590
recv_read_cp_info_for_backup(
591
/*=========================*/
592
/* out: TRUE if success */
593
byte* hdr, /* in: buffer containing the log group header */
594
dulint* lsn, /* out: checkpoint lsn */
595
ulint* offset, /* out: checkpoint offset in the log group */
596
ulint* fsp_limit,/* out: fsp limit of space 0, 1000000000 if the
597
database is running with < version 3.23.50 of InnoDB */
598
dulint* cp_no, /* out: checkpoint number */
599
dulint* first_header_lsn)
600
/* out: lsn of of the start of the first log file */
603
dulint max_cp_no = ut_dulint_zero;
606
cp_buf = hdr + LOG_CHECKPOINT_1;
608
if (recv_check_cp_is_consistent(cp_buf)) {
609
max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
610
max_cp = LOG_CHECKPOINT_1;
613
cp_buf = hdr + LOG_CHECKPOINT_2;
615
if (recv_check_cp_is_consistent(cp_buf)) {
616
if (ut_dulint_cmp(mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO),
618
max_cp = LOG_CHECKPOINT_2;
626
cp_buf = hdr + max_cp;
628
*lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN);
629
*offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
631
/* If the user is running a pre-3.23.50 version of InnoDB, its
632
checkpoint data does not contain the fsp limit info */
633
if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N)
634
== LOG_CHECKPOINT_FSP_MAGIC_N_VAL) {
636
*fsp_limit = mach_read_from_4(
637
cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT);
639
if (*fsp_limit == 0) {
640
*fsp_limit = 1000000000;
643
*fsp_limit = 1000000000;
646
/* fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */
648
*cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
650
*first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN);
655
/**********************************************************
656
Checks the 4-byte checksum to the trailer checksum field of a log block.
657
We also accept a log block in the old format < InnoDB-3.23.52 where the
658
checksum field contains the log block number. */
661
log_block_checksum_is_ok_or_old_format(
662
/*===================================*/
663
/* out: TRUE if ok, or if the log block may be in the
664
format of InnoDB version < 3.23.52 */
665
byte* block) /* in: pointer to a log block */
667
#ifdef UNIV_LOG_DEBUG
669
#endif /* UNIV_LOG_DEBUG */
670
if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
675
if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
677
/* We assume the log block is in the format of
678
InnoDB version < 3.23.52 and the block is ok */
681
"InnoDB: Scanned old format < InnoDB-3.23.52"
682
" log block number %lu\n",
683
log_block_get_hdr_no(block));
691
/***********************************************************************
692
Scans the log segment and n_bytes_scanned is set to the length of valid
696
recv_scan_log_seg_for_backup(
697
/*=========================*/
698
byte* buf, /* in: buffer containing log data */
699
ulint buf_len, /* in: data length in that buffer */
700
dulint* scanned_lsn, /* in/out: lsn of buffer start,
701
we return scanned lsn */
702
ulint* scanned_checkpoint_no,
703
/* in/out: 4 lowest bytes of the
704
highest scanned checkpoint number so
706
ulint* n_bytes_scanned)/* out: how much we were able to
707
scan, smaller than buf_len if log
714
*n_bytes_scanned = 0;
716
for (log_block = buf; log_block < buf + buf_len;
717
log_block += OS_FILE_LOG_BLOCK_SIZE) {
719
no = log_block_get_hdr_no(log_block);
722
fprintf(stderr, "Log block header no %lu\n", no);
725
if (no != log_block_convert_lsn_to_no(*scanned_lsn)
726
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
729
"Log block n:o %lu, scanned lsn n:o %lu\n",
730
no, log_block_convert_lsn_to_no(*scanned_lsn));
732
/* Garbage or an incompletely written log block */
734
log_block += OS_FILE_LOG_BLOCK_SIZE;
737
"Next log block n:o %lu\n",
738
log_block_get_hdr_no(log_block));
743
if (*scanned_checkpoint_no > 0
744
&& log_block_get_checkpoint_no(log_block)
745
< *scanned_checkpoint_no
746
&& *scanned_checkpoint_no
747
- log_block_get_checkpoint_no(log_block)
750
/* Garbage from a log buffer flush which was made
751
before the most recent database recovery */
754
"Scanned cp n:o %lu, block cp n:o %lu\n",
755
*scanned_checkpoint_no,
756
log_block_get_checkpoint_no(log_block));
761
data_len = log_block_get_data_len(log_block);
763
*scanned_checkpoint_no
764
= log_block_get_checkpoint_no(log_block);
765
*scanned_lsn = ut_dulint_add(*scanned_lsn, data_len);
767
*n_bytes_scanned += data_len;
769
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
770
/* Log data ends here */
773
fprintf(stderr, "Log block data len %lu\n",
781
/***********************************************************************
782
Tries to parse a single log record body and also applies it to a page if
783
specified. File ops are parsed, but not applied in this function. */
786
recv_parse_or_apply_log_rec_body(
787
/*=============================*/
788
/* out: log record end, NULL if not a complete
790
byte type, /* in: type */
791
byte* ptr, /* in: pointer to a buffer */
792
byte* end_ptr,/* in: pointer to the buffer end */
793
page_t* page, /* in: buffer page or NULL; if not NULL, then the log
794
record is applied to the page, and the log record
795
should be complete then */
796
mtr_t* mtr) /* in: mtr or NULL; should be non-NULL if and only if
799
dict_index_t* index = NULL;
802
case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
803
ptr = mlog_parse_nbytes(type, ptr, end_ptr, page);
805
case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
806
if (NULL != (ptr = mlog_parse_index(
808
type == MLOG_COMP_REC_INSERT,
811
|| (ibool)!!page_is_comp(page)
812
== dict_table_is_comp(index->table));
813
ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
817
case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
818
if (NULL != (ptr = mlog_parse_index(
820
type == MLOG_COMP_REC_CLUST_DELETE_MARK,
823
|| (ibool)!!page_is_comp(page)
824
== dict_table_is_comp(index->table));
825
ptr = btr_cur_parse_del_mark_set_clust_rec(
826
ptr, end_ptr, index, page);
829
case MLOG_COMP_REC_SEC_DELETE_MARK:
830
/* This log record type is obsolete, but we process it for
831
backward compatibility with MySQL 5.0.3 and 5.0.4. */
832
ut_a(!page || page_is_comp(page));
833
ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
838
case MLOG_REC_SEC_DELETE_MARK:
839
ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, page);
841
case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
842
if (NULL != (ptr = mlog_parse_index(
844
type == MLOG_COMP_REC_UPDATE_IN_PLACE,
847
|| (ibool)!!page_is_comp(page)
848
== dict_table_is_comp(index->table));
849
ptr = btr_cur_parse_update_in_place(ptr, end_ptr,
853
case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
854
case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
855
if (NULL != (ptr = mlog_parse_index(
857
type == MLOG_COMP_LIST_END_DELETE
858
|| type == MLOG_COMP_LIST_START_DELETE,
861
|| (ibool)!!page_is_comp(page)
862
== dict_table_is_comp(index->table));
863
ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
867
case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
868
if (NULL != (ptr = mlog_parse_index(
870
type == MLOG_COMP_LIST_END_COPY_CREATED,
873
|| (ibool)!!page_is_comp(page)
874
== dict_table_is_comp(index->table));
875
ptr = page_parse_copy_rec_list_to_created_page(
876
ptr, end_ptr, index, page, mtr);
879
case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
880
if (NULL != (ptr = mlog_parse_index(
882
type == MLOG_COMP_PAGE_REORGANIZE,
885
|| (ibool)!!page_is_comp(page)
886
== dict_table_is_comp(index->table));
887
ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
891
case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
892
ptr = page_parse_create(ptr, end_ptr,
893
type == MLOG_COMP_PAGE_CREATE,
896
case MLOG_UNDO_INSERT:
897
ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
899
case MLOG_UNDO_ERASE_END:
900
ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
903
ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
905
case MLOG_UNDO_HDR_DISCARD:
906
ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
908
case MLOG_UNDO_HDR_CREATE:
909
case MLOG_UNDO_HDR_REUSE:
910
ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
913
case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
914
ptr = btr_parse_set_min_rec_mark(
915
ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
918
case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
919
if (NULL != (ptr = mlog_parse_index(
921
type == MLOG_COMP_REC_DELETE,
924
|| (ibool)!!page_is_comp(page)
925
== dict_table_is_comp(index->table));
926
ptr = page_cur_parse_delete_rec(ptr, end_ptr,
930
case MLOG_IBUF_BITMAP_INIT:
931
ptr = ibuf_parse_bitmap_init(ptr, end_ptr, page, mtr);
933
case MLOG_INIT_FILE_PAGE:
934
ptr = fsp_parse_init_file_page(ptr, end_ptr, page);
936
case MLOG_WRITE_STRING:
937
ptr = mlog_parse_string(ptr, end_ptr, page);
939
case MLOG_FILE_CREATE:
940
case MLOG_FILE_RENAME:
941
case MLOG_FILE_DELETE:
942
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE,
947
recv_sys->found_corrupt_log = TRUE;
951
dict_table_t* table = index->table;
953
dict_mem_index_free(index);
954
dict_mem_table_free(table);
960
/*************************************************************************
961
Calculates the fold value of a page file address: used in inserting or
962
searching for a log record in the hash table. */
967
/* out: folded value */
968
ulint space, /* in: space */
969
ulint page_no)/* in: page number */
971
return(ut_fold_ulint_pair(space, page_no));
974
/*************************************************************************
975
Calculates the hash value of a page file address: used in inserting or
976
searching for a log record in the hash table. */
981
/* out: folded value */
982
ulint space, /* in: space */
983
ulint page_no)/* in: page number */
985
return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
988
/*************************************************************************
989
Gets the hashed file address struct for a page. */
992
recv_get_fil_addr_struct(
993
/*=====================*/
994
/* out: file address struct, NULL if not found from
996
ulint space, /* in: space id */
997
ulint page_no)/* in: page number */
999
recv_addr_t* recv_addr;
1001
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
1002
recv_hash(space, page_no));
1004
if ((recv_addr->space == space)
1005
&& (recv_addr->page_no == page_no)) {
1010
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1016
/***********************************************************************
1017
Adds a new log record to the hash table of log records. */
1020
recv_add_to_hash_table(
1021
/*===================*/
1022
byte type, /* in: log record type */
1023
ulint space, /* in: space id */
1024
ulint page_no, /* in: page number */
1025
byte* body, /* in: log record body */
1026
byte* rec_end, /* in: log record end */
1027
dulint start_lsn, /* in: start lsn of the mtr */
1028
dulint end_lsn) /* in: end lsn of the mtr */
1032
recv_data_t* recv_data;
1033
recv_data_t** prev_field;
1034
recv_addr_t* recv_addr;
1036
if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
1037
/* The tablespace does not exist any more: do not store the
1043
len = rec_end - body;
1045
recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
1047
recv->len = rec_end - body;
1048
recv->start_lsn = start_lsn;
1049
recv->end_lsn = end_lsn;
1051
recv_addr = recv_get_fil_addr_struct(space, page_no);
1053
if (recv_addr == NULL) {
1054
recv_addr = mem_heap_alloc(recv_sys->heap,
1055
sizeof(recv_addr_t));
1056
recv_addr->space = space;
1057
recv_addr->page_no = page_no;
1058
recv_addr->state = RECV_NOT_PROCESSED;
1060
UT_LIST_INIT(recv_addr->rec_list);
1062
HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
1063
recv_fold(space, page_no), recv_addr);
1064
recv_sys->n_addrs++;
1066
fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
1071
UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
1073
prev_field = &(recv->data);
1075
/* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
1076
recv_sys->heap grows into the buffer pool, and bigger chunks could not
1079
while (rec_end > body) {
1081
len = rec_end - body;
1083
if (len > RECV_DATA_BLOCK_SIZE) {
1084
len = RECV_DATA_BLOCK_SIZE;
1087
recv_data = mem_heap_alloc(recv_sys->heap,
1088
sizeof(recv_data_t) + len);
1089
*prev_field = recv_data;
1091
ut_memcpy(((byte*)recv_data) + sizeof(recv_data_t), body, len);
1093
prev_field = &(recv_data->next);
1101
/*************************************************************************
1102
Copies the log record body from recv to buf. */
1105
recv_data_copy_to_buf(
1106
/*==================*/
1107
byte* buf, /* in: buffer of length at least recv->len */
1108
recv_t* recv) /* in: log record */
1110
recv_data_t* recv_data;
1115
recv_data = recv->data;
1118
if (len > RECV_DATA_BLOCK_SIZE) {
1119
part_len = RECV_DATA_BLOCK_SIZE;
1124
ut_memcpy(buf, ((byte*)recv_data) + sizeof(recv_data_t),
1129
recv_data = recv_data->next;
1133
/****************************************************************************
1134
Applies the hashed log records to the page, if the page lsn is less than the
1135
lsn of a log record. This can be called when a buffer page has just been
1136
read in, or also for a page already in the buffer pool. */
1141
ibool recover_backup, /* in: TRUE if we are recovering a backup
1142
page: then we do not acquire any latches
1143
since the page was read in outside the
1145
ibool just_read_in, /* in: TRUE if the i/o-handler calls this for
1146
a freshly read page */
1147
page_t* page, /* in: buffer page */
1148
ulint space, /* in: space id */
1149
ulint page_no) /* in: page number */
1151
buf_block_t* block = NULL;
1152
recv_addr_t* recv_addr;
1158
dulint page_newest_lsn;
1159
ibool modification_to_page;
1163
mutex_enter(&(recv_sys->mutex));
1165
if (recv_sys->apply_log_recs == FALSE) {
1167
/* Log records should not be applied now */
1169
mutex_exit(&(recv_sys->mutex));
1174
recv_addr = recv_get_fil_addr_struct(space, page_no);
1176
if ((recv_addr == NULL)
1177
|| (recv_addr->state == RECV_BEING_PROCESSED)
1178
|| (recv_addr->state == RECV_PROCESSED)) {
1180
mutex_exit(&(recv_sys->mutex));
1186
fprintf(stderr, "Recovering space %lu, page %lu\n", space, page_no);
1189
recv_addr->state = RECV_BEING_PROCESSED;
1191
mutex_exit(&(recv_sys->mutex));
1194
mtr_set_log_mode(&mtr, MTR_LOG_NONE);
1196
if (!recover_backup) {
1197
block = buf_block_align(page);
1200
/* Move the ownership of the x-latch on the
1201
page to this OS thread, so that we can acquire
1202
a second x-latch on it. This is needed for the
1203
operations to the page to pass the debug
1206
rw_lock_x_lock_move_ownership(&(block->lock));
1209
success = buf_page_get_known_nowait(RW_X_LATCH, page,
1215
#ifdef UNIV_SYNC_DEBUG
1216
buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
1217
#endif /* UNIV_SYNC_DEBUG */
1220
/* Read the newest modification lsn from the page */
1221
page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
1223
if (!recover_backup) {
1224
/* It may be that the page has been modified in the buffer
1225
pool: read the newest modification lsn there */
1227
page_newest_lsn = buf_frame_get_newest_modification(page);
1229
if (!ut_dulint_is_zero(page_newest_lsn)) {
1231
page_lsn = page_newest_lsn;
1234
/* In recovery from a backup we do not really use the buffer
1237
page_newest_lsn = ut_dulint_zero;
1240
modification_to_page = FALSE;
1241
start_lsn = end_lsn = ut_dulint_zero;
1243
recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
1246
end_lsn = recv->end_lsn;
1248
if (recv->len > RECV_DATA_BLOCK_SIZE) {
1249
/* We have to copy the record body to a separate
1252
buf = mem_alloc(recv->len);
1254
recv_data_copy_to_buf(buf, recv);
1256
buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
1259
if (recv->type == MLOG_INIT_FILE_PAGE) {
1260
page_lsn = page_newest_lsn;
1262
mach_write_to_8(page + UNIV_PAGE_SIZE
1263
- FIL_PAGE_END_LSN_OLD_CHKSUM,
1265
mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
1268
if (ut_dulint_cmp(recv->start_lsn, page_lsn) >= 0) {
1270
if (!modification_to_page) {
1272
modification_to_page = TRUE;
1273
start_lsn = recv->start_lsn;
1277
if (log_debug_writes) {
1279
"InnoDB: Applying log rec"
1281
" to space %lu page no %lu\n",
1282
(ulong) recv->type, (ulong) recv->len,
1283
(ulong) recv_addr->space,
1284
(ulong) recv_addr->page_no);
1286
#endif /* UNIV_DEBUG */
1288
recv_parse_or_apply_log_rec_body(recv->type, buf,
1291
mach_write_to_8(page + UNIV_PAGE_SIZE
1292
- FIL_PAGE_END_LSN_OLD_CHKSUM,
1293
ut_dulint_add(recv->start_lsn,
1295
mach_write_to_8(page + FIL_PAGE_LSN,
1296
ut_dulint_add(recv->start_lsn,
1300
if (recv->len > RECV_DATA_BLOCK_SIZE) {
1304
recv = UT_LIST_GET_NEXT(rec_list, recv);
1307
mutex_enter(&(recv_sys->mutex));
1309
if (ut_dulint_cmp(recv_max_page_lsn, page_lsn) < 0) {
1310
recv_max_page_lsn = page_lsn;
1313
recv_addr->state = RECV_PROCESSED;
1315
ut_a(recv_sys->n_addrs);
1316
recv_sys->n_addrs--;
1318
mutex_exit(&(recv_sys->mutex));
1320
if (!recover_backup && modification_to_page) {
1323
buf_flush_recv_note_modification(block, start_lsn, end_lsn);
1326
/* Make sure that committing mtr does not change the modification
1327
lsn values of page */
1329
mtr.modifications = FALSE;
1334
/***********************************************************************
1335
Reads in pages which have hashed log records, from an area around a given
1341
/* out: number of pages found */
1342
ulint space, /* in: space */
1343
ulint page_no)/* in: page number */
1345
recv_addr_t* recv_addr;
1346
ulint page_nos[RECV_READ_AHEAD_AREA];
1350
low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
1354
for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
1356
recv_addr = recv_get_fil_addr_struct(space, page_no);
1358
if (recv_addr && !buf_page_peek(space, page_no)) {
1360
mutex_enter(&(recv_sys->mutex));
1362
if (recv_addr->state == RECV_NOT_PROCESSED) {
1363
recv_addr->state = RECV_BEING_READ;
1365
page_nos[n] = page_no;
1370
mutex_exit(&(recv_sys->mutex));
1374
buf_read_recv_pages(FALSE, space, page_nos, n);
1376
fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
1381
/***********************************************************************
1382
Empties the hash table of stored log records, applying them to appropriate
1386
recv_apply_hashed_log_recs(
1387
/*=======================*/
1388
ibool allow_ibuf) /* in: if TRUE, also ibuf operations are
1389
allowed during the application; if FALSE,
1390
no ibuf operations are allowed, and after
1391
the application all file pages are flushed to
1392
disk and invalidated in buffer pool: this
1393
alternative means that no new log records
1394
can be generated during the application;
1395
the caller must in this case own the log
1398
recv_addr_t* recv_addr;
1404
ibool has_printed = FALSE;
1407
mutex_enter(&(recv_sys->mutex));
1409
if (recv_sys->apply_batch_on) {
1411
mutex_exit(&(recv_sys->mutex));
1413
os_thread_sleep(500000);
1418
ut_ad(!allow_ibuf == mutex_own(&log_sys->mutex));
1421
recv_no_ibuf_operations = TRUE;
1424
recv_sys->apply_log_recs = TRUE;
1425
recv_sys->apply_batch_on = TRUE;
1427
for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
1429
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
1432
space = recv_addr->space;
1433
page_no = recv_addr->page_no;
1435
if (recv_addr->state == RECV_NOT_PROCESSED) {
1437
ut_print_timestamp(stderr);
1438
fputs(" InnoDB: Starting an"
1439
" apply batch of log records"
1440
" to the database...\n"
1441
"InnoDB: Progress in percents: ",
1446
mutex_exit(&(recv_sys->mutex));
1448
if (buf_page_peek(space, page_no)) {
1452
page = buf_page_get(space, page_no,
1455
#ifdef UNIV_SYNC_DEBUG
1456
buf_page_dbg_add_level(
1457
page, SYNC_NO_ORDER_CHECK);
1458
#endif /* UNIV_SYNC_DEBUG */
1459
recv_recover_page(FALSE, FALSE, page,
1463
recv_read_in_area(space, page_no);
1466
mutex_enter(&(recv_sys->mutex));
1469
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1473
&& (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
1475
/ hash_get_n_cells(recv_sys->addr_hash)) {
1477
fprintf(stderr, "%lu ", (ulong)
1479
/ hash_get_n_cells(recv_sys->addr_hash)));
1483
/* Wait until all the pages have been processed */
1485
while (recv_sys->n_addrs != 0) {
1487
mutex_exit(&(recv_sys->mutex));
1489
os_thread_sleep(500000);
1491
mutex_enter(&(recv_sys->mutex));
1496
fprintf(stderr, "\n");
1500
/* Flush all the file pages to disk and invalidate them in
1503
mutex_exit(&(recv_sys->mutex));
1504
mutex_exit(&(log_sys->mutex));
1506
n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
1508
ut_a(n_pages != ULINT_UNDEFINED);
1510
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
1512
buf_pool_invalidate();
1514
mutex_enter(&(log_sys->mutex));
1515
mutex_enter(&(recv_sys->mutex));
1517
recv_no_ibuf_operations = FALSE;
1520
recv_sys->apply_log_recs = FALSE;
1521
recv_sys->apply_batch_on = FALSE;
1523
recv_sys_empty_hash();
1526
fprintf(stderr, "InnoDB: Apply batch completed\n");
1529
mutex_exit(&(recv_sys->mutex));
1532
/* This page is allocated from the buffer pool and used in the function
1534
static page_t* recv_backup_application_page = NULL;
1536
/***********************************************************************
1537
Applies log records in the hash table to a backup. */
1540
recv_apply_log_recs_for_backup(void)
1541
/*================================*/
1543
recv_addr_t* recv_addr;
1551
recv_sys->apply_log_recs = TRUE;
1552
recv_sys->apply_batch_on = TRUE;
1554
if (recv_backup_application_page == NULL) {
1555
recv_backup_application_page = buf_frame_alloc();
1558
page = recv_backup_application_page;
1560
fputs("InnoDB: Starting an apply batch of log records"
1561
" to the database...\n"
1562
"InnoDB: Progress in percents: ", stderr);
1564
n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
1566
for (i = 0; i < n_hash_cells; i++) {
1567
/* The address hash table is externally chained */
1568
recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
1570
while (recv_addr != NULL) {
1572
if (!fil_tablespace_exists_in_mem(recv_addr->space)) {
1575
"InnoDB: Warning: cannot apply"
1577
" tablespace %lu page %lu,\n"
1578
"InnoDB: because tablespace with"
1579
" that id does not exist.\n",
1580
recv_addr->space, recv_addr->page_no);
1582
recv_addr->state = RECV_PROCESSED;
1584
ut_a(recv_sys->n_addrs);
1585
recv_sys->n_addrs--;
1587
goto skip_this_recv_addr;
1590
/* We simulate a page read made by the buffer pool, to
1591
make sure the recovery apparatus works ok, for
1592
example, the buf_frame_align() function. We must init
1593
the block corresponding to buf_pool->frame_zero
1596
buf_page_init_for_backup_restore(
1597
recv_addr->space, recv_addr->page_no,
1598
buf_block_align(page));
1600
/* Extend the tablespace's last file if the page_no
1601
does not fall inside its bounds; we assume the last
1602
file is auto-extending, and ibbackup copied the file
1603
when it still was smaller */
1605
success = fil_extend_space_to_desired_size(
1607
recv_addr->space, recv_addr->page_no + 1);
1610
"InnoDB: Fatal error: cannot extend"
1611
" tablespace %lu to hold %lu pages\n",
1612
recv_addr->space, recv_addr->page_no);
1617
/* Read the page from the tablespace file using the
1618
fil0fil.c routines */
1620
error = fil_io(OS_FILE_READ, TRUE, recv_addr->space,
1621
recv_addr->page_no, 0, UNIV_PAGE_SIZE,
1623
if (error != DB_SUCCESS) {
1625
"InnoDB: Fatal error: cannot read"
1627
" %lu page number %lu\n",
1628
(ulong) recv_addr->space,
1629
(ulong) recv_addr->page_no);
1634
/* Apply the log records to this page */
1635
recv_recover_page(TRUE, FALSE, page, recv_addr->space,
1636
recv_addr->page_no);
1638
/* Write the page back to the tablespace file using the
1639
fil0fil.c routines */
1641
buf_flush_init_for_writing(
1642
page, mach_read_from_8(page + FIL_PAGE_LSN),
1643
recv_addr->space, recv_addr->page_no);
1645
error = fil_io(OS_FILE_WRITE, TRUE, recv_addr->space,
1646
recv_addr->page_no, 0, UNIV_PAGE_SIZE,
1648
skip_this_recv_addr:
1649
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1652
if ((100 * i) / n_hash_cells
1653
!= (100 * (i + 1)) / n_hash_cells) {
1654
fprintf(stderr, "%lu ",
1655
(ulong) ((100 * i) / n_hash_cells));
1660
recv_sys_empty_hash();
1663
/***********************************************************************
1664
Tries to parse a single log record and returns its length. */
1669
/* out: length of the record, or 0 if the record was
1671
byte* ptr, /* in: pointer to a buffer */
1672
byte* end_ptr,/* in: pointer to the buffer end */
1673
byte* type, /* out: type */
1674
ulint* space, /* out: space id */
1675
ulint* page_no,/* out: page number */
1676
byte** body) /* out: log record body start */
1682
if (ptr == end_ptr) {
1687
if (*ptr == MLOG_MULTI_REC_END) {
1694
if (*ptr == MLOG_DUMMY_RECORD) {
1697
*space = ULINT_UNDEFINED - 1; /* For debugging */
1702
new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
1706
if (UNIV_UNLIKELY(!new_ptr)) {
1711
/* Check that page_no is sensible */
1713
if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
1715
recv_sys->found_corrupt_log = TRUE;
1720
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
1722
if (UNIV_UNLIKELY(new_ptr == NULL)) {
1727
if (*page_no > recv_max_parsed_page_no) {
1728
recv_max_parsed_page_no = *page_no;
1731
return(new_ptr - ptr);
1734
/***********************************************************
1735
Calculates the new value for lsn when more data is added to the log. */
1738
recv_calc_lsn_on_data_add(
1739
/*======================*/
1740
dulint lsn, /* in: old lsn */
1741
ulint len) /* in: this many bytes of data is added, log block
1742
headers not included */
1747
frag_len = (ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
1748
- LOG_BLOCK_HDR_SIZE;
1749
ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
1750
- LOG_BLOCK_TRL_SIZE);
1751
lsn_len = len + ((len + frag_len)
1752
/ (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
1753
- LOG_BLOCK_TRL_SIZE))
1754
* (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
1756
return(ut_dulint_add(lsn, lsn_len));
1759
/***********************************************************
1760
Checks that the parser recognizes incomplete initial segments of a log
1761
record as incomplete. */
1764
recv_check_incomplete_log_recs(
1765
/*===========================*/
1766
byte* ptr, /* in: pointer to a complete log record */
1767
ulint len) /* in: length of the log record */
1775
for (i = 0; i < len; i++) {
1776
ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
1781
/***********************************************************
1782
Prints diagnostic info of corrupt log. */
1785
recv_report_corrupt_log(
1786
/*====================*/
1787
byte* ptr, /* in: pointer to corrupt log record */
1788
byte type, /* in: type of the record */
1789
ulint space, /* in: space id, this may also be garbage */
1790
ulint page_no)/* in: page number, this may also be garbage */
1793
"InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
1794
"InnoDB: Log record type %lu, space id %lu, page number %lu\n"
1795
"InnoDB: Log parsing proceeded successfully up to %lu %lu\n"
1796
"InnoDB: Previous log record type %lu, is multi %lu\n"
1797
"InnoDB: Recv offset %lu, prev %lu\n",
1798
(ulong) type, (ulong) space, (ulong) page_no,
1799
(ulong) ut_dulint_get_high(recv_sys->recovered_lsn),
1800
(ulong) ut_dulint_get_low(recv_sys->recovered_lsn),
1801
(ulong) recv_previous_parsed_rec_type,
1802
(ulong) recv_previous_parsed_rec_is_multi,
1803
(ulong) (ptr - recv_sys->buf),
1804
(ulong) recv_previous_parsed_rec_offset);
1806
if ((ulint)(ptr - recv_sys->buf + 100)
1807
> recv_previous_parsed_rec_offset
1808
&& (ulint)(ptr - recv_sys->buf + 100
1809
- recv_previous_parsed_rec_offset)
1811
fputs("InnoDB: Hex dump of corrupt log starting"
1812
" 100 bytes before the start\n"
1813
"InnoDB: of the previous log rec,\n"
1814
"InnoDB: and ending 100 bytes after the start"
1815
" of the corrupt rec:\n",
1818
ut_print_buf(stderr,
1820
+ recv_previous_parsed_rec_offset - 100,
1821
ptr - recv_sys->buf + 200
1822
- recv_previous_parsed_rec_offset);
1826
fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
1827
"InnoDB: is possible that the log scan did not proceed\n"
1828
"InnoDB: far enough in recovery! Please run CHECK TABLE\n"
1829
"InnoDB: on your InnoDB tables to check that they are ok!\n"
1830
"InnoDB: If mysqld crashes after this recovery, look at\n"
1831
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
1832
"forcing-recovery.html\n"
1833
"InnoDB: about forcing recovery.\n", stderr);
1838
/***********************************************************
1839
Parses log records from a buffer and stores them to a hash table to wait
1840
merging to file pages. */
1843
recv_parse_log_recs(
1844
/*================*/
1845
/* out: currently always returns FALSE */
1846
ibool store_to_hash) /* in: TRUE if the records should be stored
1847
to the hash table; this is set to FALSE if just
1848
debug checking is needed */
1855
dulint new_recovered_lsn;
1863
ut_ad(mutex_own(&(log_sys->mutex)));
1864
ut_ad(!ut_dulint_is_zero(recv_sys->parse_start_lsn));
1866
ptr = recv_sys->buf + recv_sys->recovered_offset;
1868
end_ptr = recv_sys->buf + recv_sys->len;
1870
if (ptr == end_ptr) {
1875
single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
1877
if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
1878
/* The mtr only modified a single page, or this is a file op */
1880
old_lsn = recv_sys->recovered_lsn;
1882
/* Try to parse a log record, fetching its type, space id,
1883
page no, and a pointer to the body of the log record */
1885
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
1888
if (len == 0 || recv_sys->found_corrupt_log) {
1889
if (recv_sys->found_corrupt_log) {
1891
recv_report_corrupt_log(ptr,
1892
type, space, page_no);
1898
new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
1900
if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
1902
/* The log record filled a log block, and we require
1903
that also the next log block should have been scanned
1909
recv_previous_parsed_rec_type = (ulint)type;
1910
recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
1911
recv_previous_parsed_rec_is_multi = 0;
1913
recv_sys->recovered_offset += len;
1914
recv_sys->recovered_lsn = new_recovered_lsn;
1917
if (log_debug_writes) {
1919
"InnoDB: Parsed a single log rec"
1920
" type %lu len %lu space %lu page no %lu\n",
1921
(ulong) type, (ulong) len, (ulong) space,
1924
#endif /* UNIV_DEBUG */
1926
if (type == MLOG_DUMMY_RECORD) {
1929
} else if (store_to_hash && (type == MLOG_FILE_CREATE
1930
|| type == MLOG_FILE_RENAME
1931
|| type == MLOG_FILE_DELETE)) {
1932
#ifdef UNIV_HOTBACKUP
1933
if (recv_replay_file_ops) {
1935
/* In ibbackup --apply-log, replay an .ibd file
1936
operation, if possible; note that
1937
fil_path_to_mysql_datadir is set in ibbackup to
1938
point to the datadir we should use there */
1940
if (NULL == fil_op_log_parse_or_replay(
1941
body, end_ptr, type, TRUE,
1944
"InnoDB: Error: file op"
1945
" log record of type %lu"
1946
" space %lu not complete in\n"
1947
"InnoDB: the replay phase."
1956
/* In normal mysqld crash recovery we do not try to
1957
replay file operations */
1958
} else if (store_to_hash) {
1959
recv_add_to_hash_table(type, space, page_no, body,
1961
recv_sys->recovered_lsn);
1963
#ifdef UNIV_LOG_DEBUG
1964
recv_check_incomplete_log_recs(ptr, len);
1965
#endif/* UNIV_LOG_DEBUG */
1968
/* Check that all the records associated with the single mtr
1969
are included within the buffer */
1975
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
1977
if (len == 0 || recv_sys->found_corrupt_log) {
1979
if (recv_sys->found_corrupt_log) {
1981
recv_report_corrupt_log(
1982
ptr, type, space, page_no);
1988
recv_previous_parsed_rec_type = (ulint)type;
1989
recv_previous_parsed_rec_offset
1990
= recv_sys->recovered_offset + total_len;
1991
recv_previous_parsed_rec_is_multi = 1;
1993
if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
1994
#ifdef UNIV_LOG_DEBUG
1995
recv_check_incomplete_log_recs(ptr, len);
1996
#endif /* UNIV_LOG_DEBUG */
2000
if (log_debug_writes) {
2002
"InnoDB: Parsed a multi log rec"
2004
" space %lu page no %lu\n",
2005
(ulong) type, (ulong) len,
2006
(ulong) space, (ulong) page_no);
2008
#endif /* UNIV_DEBUG */
2015
if (type == MLOG_MULTI_REC_END) {
2017
/* Found the end mark for the records */
2023
new_recovered_lsn = recv_calc_lsn_on_data_add(
2024
recv_sys->recovered_lsn, total_len);
2026
if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
2028
/* The log record filled a log block, and we require
2029
that also the next log block should have been scanned
2035
/* Add all the records to the hash table */
2037
ptr = recv_sys->buf + recv_sys->recovered_offset;
2040
old_lsn = recv_sys->recovered_lsn;
2041
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
2043
if (recv_sys->found_corrupt_log) {
2045
recv_report_corrupt_log(ptr,
2046
type, space, page_no);
2050
ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
2052
recv_sys->recovered_offset += len;
2053
recv_sys->recovered_lsn
2054
= recv_calc_lsn_on_data_add(old_lsn, len);
2055
if (type == MLOG_MULTI_REC_END) {
2057
/* Found the end mark for the records */
2062
if (store_to_hash) {
2063
recv_add_to_hash_table(type, space, page_no,
2076
/***********************************************************
2077
Adds data from a new log block to the parsing buffer of recv_sys if
2078
recv_sys->parse_start_lsn is non-zero. */
2081
recv_sys_add_to_parsing_buf(
2082
/*========================*/
2083
/* out: TRUE if more data added */
2084
byte* log_block, /* in: log block */
2085
dulint scanned_lsn) /* in: lsn of how far we were able to find
2086
data in this log block */
2093
ut_ad(ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) >= 0);
2095
if (ut_dulint_is_zero(recv_sys->parse_start_lsn)) {
2096
/* Cannot start parsing yet because no start point for
2102
data_len = log_block_get_data_len(log_block);
2104
if (ut_dulint_cmp(recv_sys->parse_start_lsn, scanned_lsn) >= 0) {
2108
} else if (ut_dulint_cmp(recv_sys->scanned_lsn, scanned_lsn) >= 0) {
2112
} else if (ut_dulint_cmp(recv_sys->parse_start_lsn,
2113
recv_sys->scanned_lsn) > 0) {
2114
more_len = ut_dulint_minus(scanned_lsn,
2115
recv_sys->parse_start_lsn);
2117
more_len = ut_dulint_minus(scanned_lsn, recv_sys->scanned_lsn);
2120
if (more_len == 0) {
2125
ut_ad(data_len >= more_len);
2127
start_offset = data_len - more_len;
2129
if (start_offset < LOG_BLOCK_HDR_SIZE) {
2130
start_offset = LOG_BLOCK_HDR_SIZE;
2133
end_offset = data_len;
2135
if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
2136
end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
2139
ut_ad(start_offset <= end_offset);
2141
if (start_offset < end_offset) {
2142
ut_memcpy(recv_sys->buf + recv_sys->len,
2143
log_block + start_offset, end_offset - start_offset);
2145
recv_sys->len += end_offset - start_offset;
2147
ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
2153
/***********************************************************
2154
Moves the parsing buffer data left to the buffer start. */
2157
recv_sys_justify_left_parsing_buf(void)
2158
/*===================================*/
2160
ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
2161
recv_sys->len - recv_sys->recovered_offset);
2163
recv_sys->len -= recv_sys->recovered_offset;
2165
recv_sys->recovered_offset = 0;
2168
/***********************************************************
2169
Scans log from a buffer and stores new log data to the parsing buffer. Parses
2170
and hashes the log records if new data found. */
2175
/* out: TRUE if limit_lsn has been reached, or
2176
not able to scan any more in this log group */
2177
ibool apply_automatically,/* in: TRUE if we want this function to
2178
apply log records automatically when the
2179
hash table becomes full; in the hot backup tool
2180
the tool does the applying, not this
2182
ulint available_memory,/* in: we let the hash table of recs to grow
2183
to this size, at the maximum */
2184
ibool store_to_hash, /* in: TRUE if the records should be stored
2185
to the hash table; this is set to FALSE if just
2186
debug checking is needed */
2187
byte* buf, /* in: buffer containing a log segment or
2189
ulint len, /* in: buffer length */
2190
dulint start_lsn, /* in: buffer start lsn */
2191
dulint* contiguous_lsn, /* in/out: it is known that all log groups
2192
contain contiguous log data up to this lsn */
2193
dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */
2202
ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
2203
ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
2205
ut_a(apply_automatically <= TRUE);
2206
ut_a(store_to_hash <= TRUE);
2211
scanned_lsn = start_lsn;
2214
while (log_block < buf + len && !finished) {
2216
no = log_block_get_hdr_no(log_block);
2218
fprintf(stderr, "Log block header no %lu\n", no);
2220
fprintf(stderr, "Scanned lsn no %lu\n",
2221
log_block_convert_lsn_to_no(scanned_lsn));
2223
if (no != log_block_convert_lsn_to_no(scanned_lsn)
2224
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
2226
if (no == log_block_convert_lsn_to_no(scanned_lsn)
2227
&& !log_block_checksum_is_ok_or_old_format(
2230
"InnoDB: Log block no %lu at"
2231
" lsn %lu %lu has\n"
2232
"InnoDB: ok header, but checksum field"
2233
" contains %lu, should be %lu\n",
2235
(ulong) ut_dulint_get_high(
2237
(ulong) ut_dulint_get_low(scanned_lsn),
2238
(ulong) log_block_get_checksum(
2240
(ulong) log_block_calc_checksum(
2244
/* Garbage or an incompletely written log block */
2251
if (log_block_get_flush_bit(log_block)) {
2252
/* This block was a start of a log flush operation:
2253
we know that the previous flush operation must have
2254
been completed for all log groups before this block
2255
can have been flushed to any of the groups. Therefore,
2256
we know that log data is contiguous up to scanned_lsn
2257
in all non-corrupt log groups. */
2259
if (ut_dulint_cmp(scanned_lsn, *contiguous_lsn) > 0) {
2260
*contiguous_lsn = scanned_lsn;
2264
data_len = log_block_get_data_len(log_block);
2266
if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
2267
&& (ut_dulint_cmp(ut_dulint_add(scanned_lsn, data_len),
2268
recv_sys->scanned_lsn) > 0)
2269
&& (recv_sys->scanned_checkpoint_no > 0)
2270
&& (log_block_get_checkpoint_no(log_block)
2271
< recv_sys->scanned_checkpoint_no)
2272
&& (recv_sys->scanned_checkpoint_no
2273
- log_block_get_checkpoint_no(log_block)
2276
/* Garbage from a log buffer flush which was made
2277
before the most recent database recovery */
2280
#ifdef UNIV_LOG_DEBUG
2281
/* This is not really an error, but currently
2282
we stop here in the debug version: */
2289
if (ut_dulint_is_zero(recv_sys->parse_start_lsn)
2290
&& (log_block_get_first_rec_group(log_block) > 0)) {
2292
/* We found a point from which to start the parsing
2295
recv_sys->parse_start_lsn
2296
= ut_dulint_add(scanned_lsn,
2297
log_block_get_first_rec_group(
2299
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
2300
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
2303
scanned_lsn = ut_dulint_add(scanned_lsn, data_len);
2305
if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) {
2307
/* We have found more entries. If this scan is
2308
of startup type, we must initiate crash recovery
2309
environment before parsing these log records. */
2311
if (recv_log_scan_is_startup_type
2312
&& !recv_needed_recovery) {
2315
"InnoDB: Log scan progressed"
2316
" past the checkpoint lsn %lu %lu\n",
2317
(ulong) ut_dulint_get_high(
2318
recv_sys->scanned_lsn),
2319
(ulong) ut_dulint_get_low(
2320
recv_sys->scanned_lsn));
2321
recv_init_crash_recovery();
2324
/* We were able to find more log data: add it to the
2325
parsing buffer if parse_start_lsn is already
2328
if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
2329
>= RECV_PARSING_BUF_SIZE) {
2331
"InnoDB: Error: log parsing"
2333
" Recovery may have failed!\n");
2335
recv_sys->found_corrupt_log = TRUE;
2337
} else if (!recv_sys->found_corrupt_log) {
2338
more_data = recv_sys_add_to_parsing_buf(
2339
log_block, scanned_lsn);
2342
recv_sys->scanned_lsn = scanned_lsn;
2343
recv_sys->scanned_checkpoint_no
2344
= log_block_get_checkpoint_no(log_block);
2347
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
2348
/* Log data for this group ends here */
2352
log_block += OS_FILE_LOG_BLOCK_SIZE;
2356
*group_scanned_lsn = scanned_lsn;
2358
if (recv_needed_recovery
2359
|| (recv_is_from_backup && !recv_is_making_a_backup)) {
2360
recv_scan_print_counter++;
2362
if (finished || (recv_scan_print_counter % 80 == 0)) {
2365
"InnoDB: Doing recovery: scanned up to"
2366
" log sequence number %lu %lu\n",
2367
(ulong) ut_dulint_get_high(*group_scanned_lsn),
2368
(ulong) ut_dulint_get_low(*group_scanned_lsn));
2372
if (more_data && !recv_sys->found_corrupt_log) {
2373
/* Try to parse more log records */
2375
recv_parse_log_recs(store_to_hash);
2377
if (store_to_hash && mem_heap_get_size(recv_sys->heap)
2379
&& apply_automatically) {
2381
/* Hash table of log records has grown too big:
2382
empty it; FALSE means no ibuf operations
2383
allowed, as we cannot add new records to the
2384
log yet: they would be produced by ibuf
2387
recv_apply_hashed_log_recs(FALSE);
2390
if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
2391
/* Move parsing buffer data to the buffer start */
2393
recv_sys_justify_left_parsing_buf();
2400
/***********************************************************
2401
Scans log from a buffer and stores new log data to the parsing buffer. Parses
2402
and hashes the log records if new data found. */
2405
recv_group_scan_log_recs(
2406
/*=====================*/
2407
log_group_t* group, /* in: log group */
2408
dulint* contiguous_lsn, /* in/out: it is known that all log groups
2409
contain contiguous log data up to this lsn */
2410
dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */
2418
start_lsn = *contiguous_lsn;
2421
end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
2423
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
2424
group, start_lsn, end_lsn);
2426
finished = recv_scan_log_recs(
2427
TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
2428
* UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE,
2429
start_lsn, contiguous_lsn, group_scanned_lsn);
2430
start_lsn = end_lsn;
2434
if (log_debug_writes) {
2436
"InnoDB: Scanned group %lu up to"
2437
" log sequence number %lu %lu\n",
2439
(ulong) ut_dulint_get_high(*group_scanned_lsn),
2440
(ulong) ut_dulint_get_low(*group_scanned_lsn));
2442
#endif /* UNIV_DEBUG */
2445
/***********************************************************
2446
Initialize crash recovery environment. Can be called iff
2447
recv_needed_recovery == FALSE. */
2450
recv_init_crash_recovery(void)
2451
/*==========================*/
2453
ut_a(!recv_needed_recovery);
2455
recv_needed_recovery = TRUE;
2457
ut_print_timestamp(stderr);
2460
" InnoDB: Database was not"
2461
" shut down normally!\n"
2462
"InnoDB: Starting crash recovery.\n");
2465
"InnoDB: Reading tablespace information"
2466
" from the .ibd files...\n");
2468
fil_load_single_table_tablespaces();
2470
/* If we are using the doublewrite method, we will
2471
check if there are half-written pages in data files,
2472
and restore them from the doublewrite buffer if
2475
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2478
"InnoDB: Restoring possible"
2479
" half-written data pages from"
2480
" the doublewrite\n"
2481
"InnoDB: buffer...\n");
2482
trx_sys_doublewrite_init_or_restore_pages(TRUE);
2486
/************************************************************
2487
Recovers from a checkpoint. When this function returns, the database is able
2488
to start processing of new user transactions, but the function
2489
recv_recovery_from_checkpoint_finish should be called later to complete
2490
the recovery and free the resources used in it. */
2493
recv_recovery_from_checkpoint_start(
2494
/*================================*/
2495
/* out: error code or DB_SUCCESS */
2496
ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */
2497
dulint limit_lsn, /* in: recover up to this lsn if possible */
2498
dulint min_flushed_lsn,/* in: min flushed lsn from data files */
2499
dulint max_flushed_lsn)/* in: max flushed lsn from data files */
2502
log_group_t* max_cp_group;
2503
log_group_t* up_to_date_group;
2505
dulint checkpoint_lsn;
2506
dulint checkpoint_no;
2507
dulint old_scanned_lsn;
2508
dulint group_scanned_lsn;
2509
dulint contiguous_lsn;
2510
dulint archived_lsn;
2513
byte log_hdr_buf[LOG_FILE_HDR_SIZE];
2516
ut_ad((type != LOG_CHECKPOINT)
2517
|| (ut_dulint_cmp(limit_lsn, ut_dulint_max) == 0));
2519
if (type == LOG_CHECKPOINT) {
2521
recv_sys_init(FALSE, buf_pool_get_curr_size());
2524
if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
2526
"InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n");
2528
"InnoDB: Skipping log redo\n");
2533
recv_recovery_on = TRUE;
2535
recv_sys->limit_lsn = limit_lsn;
2537
mutex_enter(&(log_sys->mutex));
2539
/* Look for the latest checkpoint from any of the log groups */
2541
err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
2543
if (err != DB_SUCCESS) {
2545
mutex_exit(&(log_sys->mutex));
2550
log_group_read_checkpoint_info(max_cp_group, max_cp_field);
2552
buf = log_sys->checkpoint_buf;
2554
checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
2555
checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
2556
archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
2558
/* Read the first log file header to print a note if this is
2559
a recovery from a restored InnoDB Hot Backup */
2561
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id,
2562
0, 0, LOG_FILE_HDR_SIZE,
2563
log_hdr_buf, max_cp_group);
2565
if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
2566
(byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
2567
/* This log file was created by ibbackup --restore: print
2568
a note to the user about it */
2571
"InnoDB: The log file was created by"
2572
" ibbackup --apply-log at\n"
2574
log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
2576
"InnoDB: NOTE: the following crash recovery"
2577
" is part of a normal restore.\n");
2579
/* Wipe over the label now */
2581
memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
2583
/* Write to the log file to wipe over the label */
2584
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
2585
max_cp_group->space_id,
2586
0, 0, OS_FILE_LOG_BLOCK_SIZE,
2587
log_hdr_buf, max_cp_group);
2590
#ifdef UNIV_LOG_ARCHIVE
2591
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2594
log_checkpoint_get_nth_group_info(buf, group->id,
2595
&(group->archived_file_no),
2596
&(group->archived_offset));
2598
group = UT_LIST_GET_NEXT(log_groups, group);
2600
#endif /* UNIV_LOG_ARCHIVE */
2602
if (type == LOG_CHECKPOINT) {
2603
/* Start reading the log groups from the checkpoint lsn up. The
2604
variable contiguous_lsn contains an lsn up to which the log is
2605
known to be contiguously written to all log groups. */
2607
recv_sys->parse_start_lsn = checkpoint_lsn;
2608
recv_sys->scanned_lsn = checkpoint_lsn;
2609
recv_sys->scanned_checkpoint_no = 0;
2610
recv_sys->recovered_lsn = checkpoint_lsn;
2612
srv_start_lsn = checkpoint_lsn;
2615
contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn,
2616
OS_FILE_LOG_BLOCK_SIZE);
2617
if (type == LOG_ARCHIVE) {
2618
/* Try to recover the remaining part from logs: first from
2619
the logs of the archived group */
2621
group = recv_sys->archive_group;
2622
capacity = log_group_get_capacity(group);
2624
if ((ut_dulint_cmp(recv_sys->scanned_lsn, ut_dulint_add(
2625
checkpoint_lsn, capacity)) > 0)
2626
|| (ut_dulint_cmp(checkpoint_lsn, ut_dulint_add(
2627
recv_sys->scanned_lsn, capacity))
2630
mutex_exit(&(log_sys->mutex));
2632
/* The group does not contain enough log: probably
2633
an archived log file was missing or corrupt */
2638
recv_group_scan_log_recs(group, &contiguous_lsn,
2639
&group_scanned_lsn);
2640
if (ut_dulint_cmp(recv_sys->scanned_lsn, checkpoint_lsn) < 0) {
2642
mutex_exit(&(log_sys->mutex));
2644
/* The group did not contain enough log: an archived
2645
log file was missing or invalid, or the log group
2651
group->scanned_lsn = group_scanned_lsn;
2652
up_to_date_group = group;
2654
up_to_date_group = max_cp_group;
2657
ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
2659
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2661
if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
2662
group = UT_LIST_GET_NEXT(log_groups, group);
2665
/* Set the flag to publish that we are doing startup scan. */
2666
recv_log_scan_is_startup_type = (type == LOG_CHECKPOINT);
2668
old_scanned_lsn = recv_sys->scanned_lsn;
2670
recv_group_scan_log_recs(group, &contiguous_lsn,
2671
&group_scanned_lsn);
2672
group->scanned_lsn = group_scanned_lsn;
2674
if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) {
2675
/* We found a more up-to-date group */
2677
up_to_date_group = group;
2680
if ((type == LOG_ARCHIVE)
2681
&& (group == recv_sys->archive_group)) {
2682
group = UT_LIST_GET_NEXT(log_groups, group);
2685
group = UT_LIST_GET_NEXT(log_groups, group);
2688
/* Done with startup scan. Clear the flag. */
2689
recv_log_scan_is_startup_type = FALSE;
2690
if (type == LOG_CHECKPOINT) {
2691
/* NOTE: we always do a 'recovery' at startup, but only if
2692
there is something wrong we will print a message to the
2693
user about recovery: */
2695
if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
2696
|| ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
2698
if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn)
2701
"InnoDB: #########################"
2702
"#################################\n"
2705
"InnoDB: The log sequence number"
2706
" in ibdata files is higher\n"
2707
"InnoDB: than the log sequence number"
2708
" in the ib_logfiles! Are you sure\n"
2709
"InnoDB: you are using the right"
2710
" ib_logfiles to start up"
2712
"InnoDB: Log sequence number in"
2713
" ib_logfiles is %lu %lu, log\n"
2714
"InnoDB: sequence numbers stamped"
2715
" to ibdata file headers are between\n"
2716
"InnoDB: %lu %lu and %lu %lu.\n"
2717
"InnoDB: #########################"
2718
"#################################\n",
2719
(ulong) ut_dulint_get_high(
2721
(ulong) ut_dulint_get_low(
2723
(ulong) ut_dulint_get_high(
2725
(ulong) ut_dulint_get_low(
2727
(ulong) ut_dulint_get_high(
2729
(ulong) ut_dulint_get_low(
2735
if (!recv_needed_recovery) {
2737
"InnoDB: The log sequence number"
2738
" in ibdata files does not match\n"
2739
"InnoDB: the log sequence number"
2740
" in the ib_logfiles!\n");
2741
recv_init_crash_recovery();
2745
if (!recv_needed_recovery) {
2746
/* Init the doublewrite buffer memory structure */
2747
trx_sys_doublewrite_init_or_restore_pages(FALSE);
2751
/* We currently have only one log group */
2752
if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) {
2753
ut_print_timestamp(stderr);
2755
" InnoDB: ERROR: We were only able to scan the log"
2757
"InnoDB: %lu %lu, but a checkpoint was at %lu %lu.\n"
2758
"InnoDB: It is possible that"
2759
" the database is now corrupt!\n",
2760
(ulong) ut_dulint_get_high(group_scanned_lsn),
2761
(ulong) ut_dulint_get_low(group_scanned_lsn),
2762
(ulong) ut_dulint_get_high(checkpoint_lsn),
2763
(ulong) ut_dulint_get_low(checkpoint_lsn));
2766
if (ut_dulint_cmp(group_scanned_lsn, recv_max_page_lsn) < 0) {
2767
ut_print_timestamp(stderr);
2769
" InnoDB: ERROR: We were only able to scan the log"
2771
"InnoDB: but a database page a had an lsn %lu %lu."
2772
" It is possible that the\n"
2773
"InnoDB: database is now corrupt!\n",
2774
(ulong) ut_dulint_get_high(group_scanned_lsn),
2775
(ulong) ut_dulint_get_low(group_scanned_lsn),
2776
(ulong) ut_dulint_get_high(recv_max_page_lsn),
2777
(ulong) ut_dulint_get_low(recv_max_page_lsn));
2780
if (ut_dulint_cmp(recv_sys->recovered_lsn, checkpoint_lsn) < 0) {
2782
mutex_exit(&(log_sys->mutex));
2784
if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) >= 0) {
2794
/* Synchronize the uncorrupted log groups to the most up-to-date log
2795
group; we also copy checkpoint info to groups */
2797
log_sys->next_checkpoint_lsn = checkpoint_lsn;
2798
log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
2800
#ifdef UNIV_LOG_ARCHIVE
2801
log_sys->archived_lsn = archived_lsn;
2802
#endif /* UNIV_LOG_ARCHIVE */
2804
recv_synchronize_groups(up_to_date_group);
2806
if (!recv_needed_recovery) {
2807
ut_a(ut_dulint_cmp(checkpoint_lsn,
2808
recv_sys->recovered_lsn) == 0);
2811
srv_start_lsn = recv_sys->recovered_lsn;
2814
log_sys->lsn = recv_sys->recovered_lsn;
2816
ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
2818
log_sys->buf_free = ut_dulint_get_low(log_sys->lsn)
2819
% OS_FILE_LOG_BLOCK_SIZE;
2820
log_sys->buf_next_to_write = log_sys->buf_free;
2821
log_sys->written_to_some_lsn = log_sys->lsn;
2822
log_sys->written_to_all_lsn = log_sys->lsn;
2824
log_sys->last_checkpoint_lsn = checkpoint_lsn;
2826
log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
2828
#ifdef UNIV_LOG_ARCHIVE
2829
if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
2831
log_sys->archiving_state = LOG_ARCH_OFF;
2833
#endif /* UNIV_LOG_ARCHIVE */
2835
mutex_enter(&(recv_sys->mutex));
2837
recv_sys->apply_log_recs = TRUE;
2839
mutex_exit(&(recv_sys->mutex));
2841
mutex_exit(&(log_sys->mutex));
2843
recv_lsn_checks_on = TRUE;
2845
/* The database is now ready to start almost normal processing of user
2846
transactions: transaction rollbacks and the application of the log
2847
records in the hash table can be run in background. */
2852
/************************************************************
2853
Completes recovery from a checkpoint. */
2856
recv_recovery_from_checkpoint_finish(void)
2857
/*======================================*/
2861
/* Apply the hashed log records to the respective file pages */
2863
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2865
recv_apply_hashed_log_recs(TRUE);
2869
if (log_debug_writes) {
2871
"InnoDB: Log records applied to the database\n");
2873
#endif /* UNIV_DEBUG */
2875
if (recv_needed_recovery) {
2876
trx_sys_print_mysql_master_log_pos();
2877
trx_sys_print_mysql_binlog_offset();
2880
if (recv_sys->found_corrupt_log) {
2883
"InnoDB: WARNING: the log file may have been"
2885
"InnoDB: is possible that the log scan or parsing"
2886
" did not proceed\n"
2887
"InnoDB: far enough in recovery. Please run"
2889
"InnoDB: on your InnoDB tables to check that"
2891
"InnoDB: It may be safest to recover your"
2892
" InnoDB database from\n"
2893
"InnoDB: a backup!\n");
2896
/* Free the resources of the recovery system */
2898
recv_recovery_on = FALSE;
2900
#ifndef UNIV_LOG_DEBUG
2904
#ifdef UNIV_SYNC_DEBUG
2905
/* Wait for a while so that created threads have time to suspend
2906
themselves before we switch the latching order checks on */
2907
os_thread_sleep(1000000);
2909
/* Switch latching order checks on in sync0sync.c */
2910
sync_order_checks_on = TRUE;
2912
if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
2913
/* Rollback the uncommitted transactions which have no user
2916
os_thread_create(trx_rollback_or_clean_all_without_sess,
2921
/**********************************************************
2922
Resets the logs. The contents of log files will be lost! */
2927
dulint lsn, /* in: reset to this lsn rounded up to
2928
be divisible by OS_FILE_LOG_BLOCK_SIZE,
2929
after which we add LOG_BLOCK_HDR_SIZE */
2930
#ifdef UNIV_LOG_ARCHIVE
2931
ulint arch_log_no, /* in: next archived log file number */
2932
#endif /* UNIV_LOG_ARCHIVE */
2933
ibool new_logs_created)/* in: TRUE if resetting logs is done
2934
at the log creation; FALSE if it is done
2935
after archive recovery */
2939
ut_ad(mutex_own(&(log_sys->mutex)));
2941
log_sys->lsn = ut_dulint_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
2943
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2946
group->lsn = log_sys->lsn;
2947
group->lsn_offset = LOG_FILE_HDR_SIZE;
2948
#ifdef UNIV_LOG_ARCHIVE
2949
group->archived_file_no = arch_log_no;
2950
group->archived_offset = 0;
2951
#endif /* UNIV_LOG_ARCHIVE */
2953
if (!new_logs_created) {
2954
recv_truncate_group(group, group->lsn, group->lsn,
2955
group->lsn, group->lsn);
2958
group = UT_LIST_GET_NEXT(log_groups, group);
2961
log_sys->buf_next_to_write = 0;
2962
log_sys->written_to_some_lsn = log_sys->lsn;
2963
log_sys->written_to_all_lsn = log_sys->lsn;
2965
log_sys->next_checkpoint_no = ut_dulint_zero;
2966
log_sys->last_checkpoint_lsn = ut_dulint_zero;
2968
#ifdef UNIV_LOG_ARCHIVE
2969
log_sys->archived_lsn = log_sys->lsn;
2970
#endif /* UNIV_LOG_ARCHIVE */
2972
log_block_init(log_sys->buf, log_sys->lsn);
2973
log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
2975
log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
2976
log_sys->lsn = ut_dulint_add(log_sys->lsn, LOG_BLOCK_HDR_SIZE);
2978
mutex_exit(&(log_sys->mutex));
2980
/* Reset the checkpoint fields in logs */
2982
log_make_checkpoint_at(ut_dulint_max, TRUE);
2983
log_make_checkpoint_at(ut_dulint_max, TRUE);
2985
mutex_enter(&(log_sys->mutex));
2988
#ifdef UNIV_HOTBACKUP
2989
/**********************************************************
2990
Creates new log files after a backup has been restored. */
2993
recv_reset_log_files_for_backup(
2994
/*============================*/
2995
const char* log_dir, /* in: log file directory path */
2996
ulint n_log_files, /* in: number of log files */
2997
ulint log_file_size, /* in: log file size */
2998
dulint lsn) /* in: new start lsn, must be
2999
divisible by OS_FILE_LOG_BLOCK_SIZE */
3007
static const char ib_logfile_basename[] = "ib_logfile";
3009
log_dir_len = strlen(log_dir);
3010
/* full path name of ib_logfile consists of log dir path + basename
3011
+ number. This must fit in the name buffer.
3013
ut_a(log_dir_len + strlen(ib_logfile_basename) + 11 < sizeof(name));
3015
buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3016
memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3018
for (i = 0; i < n_log_files; i++) {
3020
sprintf(name, "%s%s%lu", log_dir,
3021
ib_logfile_basename, (ulong)i);
3023
log_file = os_file_create_simple(name, OS_FILE_CREATE,
3024
OS_FILE_READ_WRITE, &success);
3027
"InnoDB: Cannot create %s. Check that"
3028
" the file does not exist yet.\n", name);
3034
"Setting log file size to %lu %lu\n",
3035
(ulong) ut_get_high32(log_file_size),
3036
(ulong) log_file_size & 0xFFFFFFFFUL);
3038
success = os_file_set_size(name, log_file,
3039
log_file_size & 0xFFFFFFFFUL,
3040
ut_get_high32(log_file_size));
3044
"InnoDB: Cannot set %s size to %lu %lu\n",
3045
name, (ulong) ut_get_high32(log_file_size),
3046
(ulong) (log_file_size & 0xFFFFFFFFUL));
3050
os_file_flush(log_file);
3051
os_file_close(log_file);
3054
/* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
3056
log_reset_first_header_and_checkpoint(buf, lsn);
3058
log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
3059
log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
3060
LOG_BLOCK_HDR_SIZE);
3061
sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
3063
log_file = os_file_create_simple(name, OS_FILE_OPEN,
3064
OS_FILE_READ_WRITE, &success);
3066
fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
3071
os_file_write(name, log_file, buf, 0, 0,
3072
LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3073
os_file_flush(log_file);
3074
os_file_close(log_file);
3078
#endif /* UNIV_HOTBACKUP */
3080
#ifdef UNIV_LOG_ARCHIVE
3081
/**********************************************************
3082
Reads from the archive of a log group and performs recovery. */
3085
log_group_recover_from_archive_file(
3086
/*================================*/
3087
/* out: TRUE if no more complete
3088
consistent archive files */
3089
log_group_t* group) /* in: log group */
3091
os_file_t file_handle;
3093
dulint file_end_lsn;
3101
ulint file_size_high;
3110
/* Add the file to the archive file space; open the file */
3112
log_archived_file_name_gen(name, group->id, group->archived_file_no);
3114
file_handle = os_file_create(name, OS_FILE_OPEN,
3115
OS_FILE_LOG, OS_FILE_AIO, &ret);
3120
"InnoDB: Do you want to copy additional"
3121
" archived log files\n"
3122
"InnoDB: to the directory\n");
3124
"InnoDB: or were these all the files needed"
3127
"InnoDB: (Y == copy more files; N == this is all)?");
3129
input_char = getchar();
3131
if (input_char == (int) 'N') {
3134
} else if (input_char == (int) 'Y') {
3136
goto try_open_again;
3142
ret = os_file_get_size(file_handle, &file_size, &file_size_high);
3145
ut_a(file_size_high == 0);
3147
fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
3149
ret = os_file_close(file_handle);
3151
if (file_size < LOG_FILE_HDR_SIZE) {
3153
"InnoDB: Archive file header incomplete %s\n", name);
3160
/* Add the archive file as a node to the space */
3162
fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
3163
group->archive_space_id, FALSE);
3164
#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
3165
# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
3168
/* Read the archive file header */
3169
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
3170
LOG_FILE_HDR_SIZE, buf, NULL);
3172
/* Check if the archive file header is consistent */
3174
if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
3175
|| mach_read_from_4(buf + LOG_FILE_NO)
3176
!= group->archived_file_no) {
3178
"InnoDB: Archive file header inconsistent %s\n", name);
3183
if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
3185
"InnoDB: Archive file not completely written %s\n",
3191
start_lsn = mach_read_from_8(buf + LOG_FILE_START_LSN);
3192
file_end_lsn = mach_read_from_8(buf + LOG_FILE_END_LSN);
3194
if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
3196
if (ut_dulint_cmp(recv_sys->parse_start_lsn, start_lsn) < 0) {
3198
"InnoDB: Archive log file %s"
3199
" starts from too big a lsn\n",
3204
recv_sys->scanned_lsn = start_lsn;
3207
if (ut_dulint_cmp(recv_sys->scanned_lsn, start_lsn) != 0) {
3210
"InnoDB: Archive log file %s starts from"
3216
read_offset = LOG_FILE_HDR_SIZE;
3219
len = RECV_SCAN_SIZE;
3221
if (read_offset + len > file_size) {
3222
len = ut_calc_align_down(file_size - read_offset,
3223
OS_FILE_LOG_BLOCK_SIZE);
3232
if (log_debug_writes) {
3234
"InnoDB: Archive read starting at"
3235
" lsn %lu %lu, len %lu from file %s\n",
3236
(ulong) ut_dulint_get_high(start_lsn),
3237
(ulong) ut_dulint_get_low(start_lsn),
3240
#endif /* UNIV_DEBUG */
3242
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
3243
group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
3244
read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
3246
ret = recv_scan_log_recs(
3247
TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
3248
* UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
3249
&dummy_lsn, &scanned_lsn);
3251
if (ut_dulint_cmp(scanned_lsn, file_end_lsn) == 0) {
3258
"InnoDB: Archive log file %s"
3259
" does not scan right\n",
3265
start_lsn = ut_dulint_add(start_lsn, len);
3267
ut_ad(ut_dulint_cmp(start_lsn, scanned_lsn) == 0);
3273
/************************************************************
3274
Recovers from archived log files, and also from log files, if they exist. */
3277
recv_recovery_from_archive_start(
3278
/*=============================*/
3279
/* out: error code or DB_SUCCESS */
3280
dulint min_flushed_lsn,/* in: min flushed lsn field from the
3282
dulint limit_lsn, /* in: recover up to this lsn if possible */
3283
ulint first_log_no) /* in: number of the first archived log file
3284
to use in the recovery; the file will be
3285
searched from INNOBASE_LOG_ARCH_DIR specified
3286
in server config file */
3297
recv_sys_init(FALSE, buf_pool_get_curr_size());
3299
recv_recovery_on = TRUE;
3300
recv_recovery_from_backup_on = TRUE;
3302
recv_sys->limit_lsn = limit_lsn;
3306
group = UT_LIST_GET_FIRST(log_sys->log_groups);
3309
if (group->id == group_id) {
3314
group = UT_LIST_GET_NEXT(log_groups, group);
3319
"InnoDB: There is no log group defined with id %lu!\n",
3324
group->archived_file_no = first_log_no;
3326
recv_sys->parse_start_lsn = min_flushed_lsn;
3328
recv_sys->scanned_lsn = ut_dulint_zero;
3329
recv_sys->scanned_checkpoint_no = 0;
3330
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
3332
recv_sys->archive_group = group;
3336
mutex_enter(&(log_sys->mutex));
3339
ret = log_group_recover_from_archive_file(group);
3341
/* Close and truncate a possible processed archive file
3342
from the file space */
3344
trunc_len = UNIV_PAGE_SIZE
3345
* fil_space_get_size(group->archive_space_id);
3346
if (trunc_len > 0) {
3347
fil_space_truncate_start(group->archive_space_id,
3351
group->archived_file_no++;
3354
if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) < 0) {
3356
if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
3358
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
3361
mutex_exit(&(log_sys->mutex));
3363
err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
3367
if (err != DB_SUCCESS) {
3372
mutex_enter(&(log_sys->mutex));
3375
if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
3377
recv_apply_hashed_log_recs(FALSE);
3379
recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
3382
mutex_exit(&(log_sys->mutex));
3387
/************************************************************
3388
Completes recovery from archive. */
3391
recv_recovery_from_archive_finish(void)
3392
/*===================================*/
3394
recv_recovery_from_checkpoint_finish();
3396
recv_recovery_from_backup_on = FALSE;
3398
#endif /* UNIV_LOG_ARCHIVE */