1
/******************************************************
6
Created 9/20/1997 Heikki Tuuri
7
*******************************************************/
12
#include "log0recv.ic"
20
#include "srv0start.h"
23
#include "page0page.h"
28
#include "ibuf0ibuf.h"
35
#include "dict0boot.h"
37
#include "sync0sync.h"
38
#include "row0merge.h"
41
/* This is set to FALSE if the backup was originally taken with the
42
ibbackup --include regexp option: then we do not want to create tables in
43
directories which were not included */
44
UNIV_INTERN ibool recv_replay_file_ops = TRUE;
45
#endif /* UNIV_HOTBACKUP */
47
/* Log records are stored in the hash table in chunks at most of this size;
48
this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
49
#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
51
/* Read-ahead area in applying log records to file pages */
52
#define RECV_READ_AHEAD_AREA 32
54
UNIV_INTERN recv_sys_t* recv_sys = NULL;
55
UNIV_INTERN ibool recv_recovery_on = FALSE;
56
UNIV_INTERN ibool recv_recovery_from_backup_on = FALSE;
58
UNIV_INTERN ibool recv_needed_recovery = FALSE;
60
UNIV_INTERN ibool recv_lsn_checks_on = FALSE;
62
/* There are two conditions under which we scan the logs, the first
63
is normal startup and the second is when we do a recovery from an
65
This flag is set if we are doing a scan from the last checkpoint during
66
startup. If we find log entries that were written after the last checkpoint
67
we know that the server was not cleanly shutdown. We must then initialize
68
the crash recovery environment before attempting to store these entries in
69
the log hash table. */
70
UNIV_INTERN ibool recv_log_scan_is_startup_type = FALSE;
72
/* If the following is TRUE, the buffer pool file pages must be invalidated
73
after recovery and no ibuf operations are allowed; this becomes TRUE if
74
the log record hash table becomes too full, and log records must be merged
75
to file pages already before the recovery is finished: in this case no
76
ibuf operations are allowed, as they could modify the pages read in the
77
buffer pool before the pages have been recovered to the up-to-date state */
79
/* Recovery is running and no operations on the log files are allowed
80
yet: the variable name is misleading */
82
UNIV_INTERN ibool recv_no_ibuf_operations = FALSE;
84
/* The following counter is used to decide when to print info on
86
UNIV_INTERN ulint recv_scan_print_counter = 0;
88
UNIV_INTERN ibool recv_is_from_backup = FALSE;
90
UNIV_INTERN ibool recv_is_making_a_backup = FALSE;
92
# define recv_is_making_a_backup FALSE
93
#endif /* UNIV_HOTBACKUP */
95
UNIV_INTERN ulint recv_previous_parsed_rec_type = 999999;
96
UNIV_INTERN ulint recv_previous_parsed_rec_offset = 0;
97
UNIV_INTERN ulint recv_previous_parsed_rec_is_multi = 0;
99
UNIV_INTERN ulint recv_max_parsed_page_no = 0;
101
/* This many frames must be left free in the buffer pool when we scan
102
the log and store the scanned log records in the buffer pool: we will
103
use these free frames to read in pages when we start applying the
104
log records to the database. */
106
UNIV_INTERN ulint recv_n_pool_free_frames = 256;
108
/* The maximum lsn we see for a page during the recovery process. If this
109
is bigger than the lsn we are able to scan up to, that is an indication that
110
the recovery failed and the database may be corrupt. */
112
UNIV_INTERN ib_uint64_t recv_max_page_lsn;
116
/***********************************************************
117
Initialize crash recovery environment. Can be called iff
118
recv_needed_recovery == FALSE. */
121
recv_init_crash_recovery(void);
122
/*===========================*/
124
/************************************************************
125
Creates the recovery system. */
128
recv_sys_create(void)
129
/*=================*/
131
if (recv_sys != NULL) {
136
recv_sys = mem_alloc(sizeof(recv_sys_t));
138
mutex_create(&recv_sys->mutex, SYNC_RECV);
140
recv_sys->heap = NULL;
141
recv_sys->addr_hash = NULL;
144
/************************************************************
145
Inits the recovery system for a recovery operation. */
150
ibool recover_from_backup, /* in: TRUE if this is called
151
to recover from a hot backup */
152
ulint available_memory) /* in: available memory in bytes */
154
if (recv_sys->heap != NULL) {
159
mutex_enter(&(recv_sys->mutex));
161
if (!recover_from_backup) {
162
recv_sys->heap = mem_heap_create_in_buffer(256);
164
recv_sys->heap = mem_heap_create(256);
165
recv_is_from_backup = TRUE;
168
recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
170
recv_sys->recovered_offset = 0;
172
recv_sys->addr_hash = hash_create(available_memory / 64);
173
recv_sys->n_addrs = 0;
175
recv_sys->apply_log_recs = FALSE;
176
recv_sys->apply_batch_on = FALSE;
178
recv_sys->last_block_buf_start = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
180
recv_sys->last_block = ut_align(recv_sys->last_block_buf_start,
181
OS_FILE_LOG_BLOCK_SIZE);
182
recv_sys->found_corrupt_log = FALSE;
184
recv_max_page_lsn = 0;
186
mutex_exit(&(recv_sys->mutex));
189
/************************************************************
190
Empties the hash table when it has been fully processed. */
193
recv_sys_empty_hash(void)
194
/*=====================*/
196
ut_ad(mutex_own(&(recv_sys->mutex)));
198
if (recv_sys->n_addrs != 0) {
200
"InnoDB: Error: %lu pages with log records"
201
" were left unprocessed!\n"
202
"InnoDB: Maximum page number with"
203
" log records on it %lu\n",
204
(ulong) recv_sys->n_addrs,
205
(ulong) recv_max_parsed_page_no);
209
hash_table_free(recv_sys->addr_hash);
210
mem_heap_empty(recv_sys->heap);
212
recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
215
#ifndef UNIV_LOG_DEBUG
216
/************************************************************
217
Frees the recovery system. */
223
mutex_enter(&(recv_sys->mutex));
225
hash_table_free(recv_sys->addr_hash);
226
mem_heap_free(recv_sys->heap);
227
ut_free(recv_sys->buf);
228
mem_free(recv_sys->last_block_buf_start);
230
recv_sys->addr_hash = NULL;
231
recv_sys->heap = NULL;
233
mutex_exit(&(recv_sys->mutex));
235
#endif /* UNIV_LOG_DEBUG */
237
/************************************************************
238
Truncates possible corrupted or extra records from a log group. */
243
log_group_t* group, /* in: log group */
244
ib_uint64_t recovered_lsn, /* in: recovery succeeded up to this
246
ib_uint64_t limit_lsn, /* in: this was the limit for
248
ib_uint64_t checkpoint_lsn, /* in: recovery was started from this
250
ib_uint64_t archived_lsn) /* in: the log has been archived up to
253
ib_uint64_t start_lsn;
255
ib_uint64_t finish_lsn1;
256
ib_uint64_t finish_lsn2;
257
ib_uint64_t finish_lsn;
261
if (archived_lsn == IB_ULONGLONG_MAX) {
262
/* Checkpoint was taken in the NOARCHIVELOG mode */
263
archived_lsn = checkpoint_lsn;
266
finish_lsn1 = ut_uint64_align_down(archived_lsn,
267
OS_FILE_LOG_BLOCK_SIZE)
268
+ log_group_get_capacity(group);
270
finish_lsn2 = ut_uint64_align_up(recovered_lsn,
271
OS_FILE_LOG_BLOCK_SIZE)
272
+ recv_sys->last_log_buf_size;
274
if (limit_lsn != IB_ULONGLONG_MAX) {
275
/* We do not know how far we should erase log records: erase
276
as much as possible */
278
finish_lsn = finish_lsn1;
280
/* It is enough to erase the length of the log buffer */
281
finish_lsn = finish_lsn1 < finish_lsn2
282
? finish_lsn1 : finish_lsn2;
285
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
287
/* Write the log buffer full of zeros */
288
for (i = 0; i < RECV_SCAN_SIZE; i++) {
290
*(log_sys->buf + i) = '\0';
293
start_lsn = ut_uint64_align_down(recovered_lsn,
294
OS_FILE_LOG_BLOCK_SIZE);
296
if (start_lsn != recovered_lsn) {
297
/* Copy the last incomplete log block to the log buffer and
298
edit its data length: */
300
ut_memcpy(log_sys->buf, recv_sys->last_block,
301
OS_FILE_LOG_BLOCK_SIZE);
302
log_block_set_data_len(log_sys->buf,
303
(ulint) (recovered_lsn - start_lsn));
306
if (start_lsn >= finish_lsn) {
312
end_lsn = start_lsn + RECV_SCAN_SIZE;
314
if (end_lsn > finish_lsn) {
316
end_lsn = finish_lsn;
319
len = (ulint) (end_lsn - start_lsn);
321
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
322
if (end_lsn >= finish_lsn) {
327
/* Write the log buffer full of zeros */
328
for (i = 0; i < RECV_SCAN_SIZE; i++) {
330
*(log_sys->buf + i) = '\0';
337
/************************************************************
338
Copies the log segment between group->recovered_lsn and recovered_lsn from the
339
most up-to-date log group to group, so that it contains the latest log data. */
344
log_group_t* up_to_date_group, /* in: the most up-to-date log
346
log_group_t* group, /* in: copy to this log
348
ib_uint64_t recovered_lsn) /* in: recovery succeeded up
351
ib_uint64_t start_lsn;
355
if (group->scanned_lsn >= recovered_lsn) {
360
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
362
start_lsn = ut_uint64_align_down(group->scanned_lsn,
363
OS_FILE_LOG_BLOCK_SIZE);
365
end_lsn = start_lsn + RECV_SCAN_SIZE;
367
if (end_lsn > recovered_lsn) {
368
end_lsn = ut_uint64_align_up(recovered_lsn,
369
OS_FILE_LOG_BLOCK_SIZE);
372
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
373
up_to_date_group, start_lsn, end_lsn);
375
len = (ulint) (end_lsn - start_lsn);
377
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
379
if (end_lsn >= recovered_lsn) {
388
/************************************************************
389
Copies a log segment from the most up-to-date log group to the other log
390
groups, so that they all contain the latest log data. Also writes the info
391
about the latest checkpoint to the groups, and inits the fields in the group
392
memory structs to up-to-date values. */
395
recv_synchronize_groups(
396
/*====================*/
397
log_group_t* up_to_date_group) /* in: the most up-to-date
401
ib_uint64_t start_lsn;
403
ib_uint64_t recovered_lsn;
404
ib_uint64_t limit_lsn;
406
recovered_lsn = recv_sys->recovered_lsn;
407
limit_lsn = recv_sys->limit_lsn;
409
/* Read the last recovered log block to the recovery system buffer:
410
the block is always incomplete */
412
start_lsn = ut_uint64_align_down(recovered_lsn,
413
OS_FILE_LOG_BLOCK_SIZE);
414
end_lsn = ut_uint64_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
416
ut_a(start_lsn != end_lsn);
418
log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
419
up_to_date_group, start_lsn, end_lsn);
421
group = UT_LIST_GET_FIRST(log_sys->log_groups);
424
if (group != up_to_date_group) {
426
/* Copy log data if needed */
428
recv_copy_group(group, up_to_date_group,
432
/* Update the fields in the group struct to correspond to
435
log_group_set_fields(group, recovered_lsn);
437
group = UT_LIST_GET_NEXT(log_groups, group);
440
/* Copy the checkpoint info to the groups; remember that we have
441
incremented checkpoint_no by one, and the info will not be written
442
over the max checkpoint info, thus making the preservation of max
443
checkpoint info on disk certain */
445
log_groups_write_checkpoint_info();
447
mutex_exit(&(log_sys->mutex));
449
/* Wait for the checkpoint write to complete */
450
rw_lock_s_lock(&(log_sys->checkpoint_lock));
451
rw_lock_s_unlock(&(log_sys->checkpoint_lock));
453
mutex_enter(&(log_sys->mutex));
456
/***************************************************************************
457
Checks the consistency of the checkpoint info */
460
recv_check_cp_is_consistent(
461
/*========================*/
462
/* out: TRUE if ok */
463
byte* buf) /* in: buffer containing checkpoint info */
467
fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
469
if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
470
buf + LOG_CHECKPOINT_CHECKSUM_1)) {
474
fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
475
LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
477
if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
478
buf + LOG_CHECKPOINT_CHECKSUM_2)) {
485
/************************************************************
486
Looks for the maximum consistent checkpoint from the log groups. */
489
recv_find_max_checkpoint(
490
/*=====================*/
491
/* out: error code or DB_SUCCESS */
492
log_group_t** max_group, /* out: max group */
493
ulint* max_field) /* out: LOG_CHECKPOINT_1 or
498
ib_uint64_t checkpoint_no;
502
group = UT_LIST_GET_FIRST(log_sys->log_groups);
508
buf = log_sys->checkpoint_buf;
511
group->state = LOG_GROUP_CORRUPTED;
513
for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
514
field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
516
log_group_read_checkpoint_info(group, field);
518
if (!recv_check_cp_is_consistent(buf)) {
520
if (log_debug_writes) {
522
"InnoDB: Checkpoint in group"
523
" %lu at %lu invalid, %lu\n",
526
(ulong) mach_read_from_4(
528
+ LOG_CHECKPOINT_CHECKSUM_1));
531
#endif /* UNIV_DEBUG */
535
group->state = LOG_GROUP_OK;
537
group->lsn = mach_read_ull(
538
buf + LOG_CHECKPOINT_LSN);
539
group->lsn_offset = mach_read_from_4(
540
buf + LOG_CHECKPOINT_OFFSET);
541
checkpoint_no = mach_read_ull(
542
buf + LOG_CHECKPOINT_NO);
545
if (log_debug_writes) {
547
"InnoDB: Checkpoint number %lu"
548
" found in group %lu\n",
549
(ulong) checkpoint_no,
552
#endif /* UNIV_DEBUG */
554
if (checkpoint_no >= max_no) {
557
max_no = checkpoint_no;
564
group = UT_LIST_GET_NEXT(log_groups, group);
567
if (*max_group == NULL) {
570
"InnoDB: No valid checkpoint found.\n"
571
"InnoDB: If this error appears when you are"
572
" creating an InnoDB database,\n"
573
"InnoDB: the problem may be that during"
574
" an earlier attempt you managed\n"
575
"InnoDB: to create the InnoDB data files,"
576
" but log file creation failed.\n"
577
"InnoDB: If that is the case, please refer to\n"
578
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
579
"error-creating-innodb.html\n");
586
/***********************************************************************
587
Reads the checkpoint info needed in hot backup. */
590
recv_read_cp_info_for_backup(
591
/*=========================*/
592
/* out: TRUE if success */
593
byte* hdr, /* in: buffer containing the log group
595
ib_uint64_t* lsn, /* out: checkpoint lsn */
596
ulint* offset, /* out: checkpoint offset in the log group */
597
ulint* fsp_limit,/* out: fsp limit of space 0,
598
1000000000 if the database is running
599
with < version 3.23.50 of InnoDB */
600
ib_uint64_t* cp_no, /* out: checkpoint number */
601
ib_uint64_t* first_header_lsn)
602
/* out: lsn of of the start of the
606
ib_uint64_t max_cp_no = 0;
609
cp_buf = hdr + LOG_CHECKPOINT_1;
611
if (recv_check_cp_is_consistent(cp_buf)) {
612
max_cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
613
max_cp = LOG_CHECKPOINT_1;
616
cp_buf = hdr + LOG_CHECKPOINT_2;
618
if (recv_check_cp_is_consistent(cp_buf)) {
619
if (mach_read_ull(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) {
620
max_cp = LOG_CHECKPOINT_2;
628
cp_buf = hdr + max_cp;
630
*lsn = mach_read_ull(cp_buf + LOG_CHECKPOINT_LSN);
631
*offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
633
/* If the user is running a pre-3.23.50 version of InnoDB, its
634
checkpoint data does not contain the fsp limit info */
635
if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N)
636
== LOG_CHECKPOINT_FSP_MAGIC_N_VAL) {
638
*fsp_limit = mach_read_from_4(
639
cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT);
641
if (*fsp_limit == 0) {
642
*fsp_limit = 1000000000;
645
*fsp_limit = 1000000000;
648
/* fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */
650
*cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
652
*first_header_lsn = mach_read_ull(hdr + LOG_FILE_START_LSN);
657
/**********************************************************
658
Checks the 4-byte checksum to the trailer checksum field of a log block.
659
We also accept a log block in the old format < InnoDB-3.23.52 where the
660
checksum field contains the log block number. */
663
log_block_checksum_is_ok_or_old_format(
664
/*===================================*/
665
/* out: TRUE if ok, or if the log block may be in the
666
format of InnoDB version < 3.23.52 */
667
byte* block) /* in: pointer to a log block */
669
#ifdef UNIV_LOG_DEBUG
671
#endif /* UNIV_LOG_DEBUG */
672
if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
677
if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
679
/* We assume the log block is in the format of
680
InnoDB version < 3.23.52 and the block is ok */
683
"InnoDB: Scanned old format < InnoDB-3.23.52"
684
" log block number %lu\n",
685
log_block_get_hdr_no(block));
693
/***********************************************************************
694
Scans the log segment and n_bytes_scanned is set to the length of valid
698
recv_scan_log_seg_for_backup(
699
/*=========================*/
700
byte* buf, /* in: buffer containing log data */
701
ulint buf_len, /* in: data length in that buffer */
702
ib_uint64_t* scanned_lsn, /* in/out: lsn of buffer start,
703
we return scanned lsn */
704
ulint* scanned_checkpoint_no,
705
/* in/out: 4 lowest bytes of the
706
highest scanned checkpoint number so
708
ulint* n_bytes_scanned)/* out: how much we were able to
709
scan, smaller than buf_len if log
716
*n_bytes_scanned = 0;
718
for (log_block = buf; log_block < buf + buf_len;
719
log_block += OS_FILE_LOG_BLOCK_SIZE) {
721
no = log_block_get_hdr_no(log_block);
724
fprintf(stderr, "Log block header no %lu\n", no);
727
if (no != log_block_convert_lsn_to_no(*scanned_lsn)
728
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
731
"Log block n:o %lu, scanned lsn n:o %lu\n",
732
no, log_block_convert_lsn_to_no(*scanned_lsn));
734
/* Garbage or an incompletely written log block */
736
log_block += OS_FILE_LOG_BLOCK_SIZE;
739
"Next log block n:o %lu\n",
740
log_block_get_hdr_no(log_block));
745
if (*scanned_checkpoint_no > 0
746
&& log_block_get_checkpoint_no(log_block)
747
< *scanned_checkpoint_no
748
&& *scanned_checkpoint_no
749
- log_block_get_checkpoint_no(log_block)
752
/* Garbage from a log buffer flush which was made
753
before the most recent database recovery */
756
"Scanned cp n:o %lu, block cp n:o %lu\n",
757
*scanned_checkpoint_no,
758
log_block_get_checkpoint_no(log_block));
763
data_len = log_block_get_data_len(log_block);
765
*scanned_checkpoint_no
766
= log_block_get_checkpoint_no(log_block);
767
*scanned_lsn += data_len;
769
*n_bytes_scanned += data_len;
771
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
772
/* Log data ends here */
775
fprintf(stderr, "Log block data len %lu\n",
783
/***********************************************************************
784
Tries to parse a single log record body and also applies it to a page if
785
specified. File ops are parsed, but not applied in this function. */
788
recv_parse_or_apply_log_rec_body(
789
/*=============================*/
790
/* out: log record end, NULL if not a
792
byte type, /* in: type */
793
byte* ptr, /* in: pointer to a buffer */
794
byte* end_ptr,/* in: pointer to the buffer end */
795
buf_block_t* block, /* in/out: buffer block or NULL; if
796
not NULL, then the log record is
797
applied to the page, and the log
798
record should be complete then */
799
mtr_t* mtr) /* in: mtr or NULL; should be non-NULL
800
if and only if block is non-NULL */
802
dict_index_t* index = NULL;
804
page_zip_des_t* page_zip;
806
ut_ad(!block == !mtr);
810
page_zip = buf_block_get_page_zip(block);
817
case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
818
ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip);
820
case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
821
if (NULL != (ptr = mlog_parse_index(
823
type == MLOG_COMP_REC_INSERT,
826
|| (ibool)!!page_is_comp(page)
827
== dict_table_is_comp(index->table));
828
ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
832
case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
833
if (NULL != (ptr = mlog_parse_index(
835
type == MLOG_COMP_REC_CLUST_DELETE_MARK,
838
|| (ibool)!!page_is_comp(page)
839
== dict_table_is_comp(index->table));
840
ptr = btr_cur_parse_del_mark_set_clust_rec(
841
ptr, end_ptr, page, page_zip, index);
844
case MLOG_COMP_REC_SEC_DELETE_MARK:
845
/* This log record type is obsolete, but we process it for
846
backward compatibility with MySQL 5.0.3 and 5.0.4. */
847
ut_a(!page || page_is_comp(page));
849
ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
854
case MLOG_REC_SEC_DELETE_MARK:
855
ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
858
case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
859
if (NULL != (ptr = mlog_parse_index(
861
type == MLOG_COMP_REC_UPDATE_IN_PLACE,
864
|| (ibool)!!page_is_comp(page)
865
== dict_table_is_comp(index->table));
866
ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page,
870
case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
871
case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
872
if (NULL != (ptr = mlog_parse_index(
874
type == MLOG_COMP_LIST_END_DELETE
875
|| type == MLOG_COMP_LIST_START_DELETE,
878
|| (ibool)!!page_is_comp(page)
879
== dict_table_is_comp(index->table));
880
ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
884
case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
885
if (NULL != (ptr = mlog_parse_index(
887
type == MLOG_COMP_LIST_END_COPY_CREATED,
890
|| (ibool)!!page_is_comp(page)
891
== dict_table_is_comp(index->table));
892
ptr = page_parse_copy_rec_list_to_created_page(
893
ptr, end_ptr, block, index, mtr);
896
case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
897
if (NULL != (ptr = mlog_parse_index(
899
type == MLOG_COMP_PAGE_REORGANIZE,
902
|| (ibool)!!page_is_comp(page)
903
== dict_table_is_comp(index->table));
904
ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
908
case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
910
ptr = page_parse_create(ptr, end_ptr,
911
type == MLOG_COMP_PAGE_CREATE,
914
case MLOG_UNDO_INSERT:
915
ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
917
case MLOG_UNDO_ERASE_END:
918
ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
921
ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
923
case MLOG_UNDO_HDR_DISCARD:
924
ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
926
case MLOG_UNDO_HDR_CREATE:
927
case MLOG_UNDO_HDR_REUSE:
928
ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
931
case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
932
/* On a compressed page, MLOG_COMP_REC_MIN_MARK
933
will be followed by MLOG_COMP_REC_DELETE
934
or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL)
935
in the same mini-transaction. */
936
ut_a(type == MLOG_COMP_REC_MIN_MARK || !page_zip);
937
ptr = btr_parse_set_min_rec_mark(
938
ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
941
case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
942
if (NULL != (ptr = mlog_parse_index(
944
type == MLOG_COMP_REC_DELETE,
947
|| (ibool)!!page_is_comp(page)
948
== dict_table_is_comp(index->table));
949
ptr = page_cur_parse_delete_rec(ptr, end_ptr,
953
case MLOG_IBUF_BITMAP_INIT:
954
ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
956
case MLOG_INIT_FILE_PAGE:
957
ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
959
case MLOG_WRITE_STRING:
960
ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
962
case MLOG_FILE_CREATE:
963
case MLOG_FILE_RENAME:
964
case MLOG_FILE_DELETE:
965
case MLOG_FILE_CREATE2:
966
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0);
968
case MLOG_ZIP_WRITE_NODE_PTR:
969
ptr = page_zip_parse_write_node_ptr(ptr, end_ptr,
972
case MLOG_ZIP_WRITE_BLOB_PTR:
973
ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr,
976
case MLOG_ZIP_WRITE_HEADER:
977
ptr = page_zip_parse_write_header(ptr, end_ptr,
980
case MLOG_ZIP_PAGE_COMPRESS:
981
ptr = page_zip_parse_compress(ptr, end_ptr,
986
recv_sys->found_corrupt_log = TRUE;
990
dict_table_t* table = index->table;
992
dict_mem_index_free(index);
993
dict_mem_table_free(table);
999
/*************************************************************************
1000
Calculates the fold value of a page file address: used in inserting or
1001
searching for a log record in the hash table. */
1006
/* out: folded value */
1007
ulint space, /* in: space */
1008
ulint page_no)/* in: page number */
1010
return(ut_fold_ulint_pair(space, page_no));
1013
/*************************************************************************
1014
Calculates the hash value of a page file address: used in inserting or
1015
searching for a log record in the hash table. */
1020
/* out: folded value */
1021
ulint space, /* in: space */
1022
ulint page_no)/* in: page number */
1024
return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
1027
/*************************************************************************
1028
Gets the hashed file address struct for a page. */
1031
recv_get_fil_addr_struct(
1032
/*=====================*/
1033
/* out: file address struct, NULL if not found from
1035
ulint space, /* in: space id */
1036
ulint page_no)/* in: page number */
1038
recv_addr_t* recv_addr;
1040
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
1041
recv_hash(space, page_no));
1043
if ((recv_addr->space == space)
1044
&& (recv_addr->page_no == page_no)) {
1049
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1055
/***********************************************************************
1056
Adds a new log record to the hash table of log records. */
1059
recv_add_to_hash_table(
1060
/*===================*/
1061
byte type, /* in: log record type */
1062
ulint space, /* in: space id */
1063
ulint page_no, /* in: page number */
1064
byte* body, /* in: log record body */
1065
byte* rec_end, /* in: log record end */
1066
ib_uint64_t start_lsn, /* in: start lsn of the mtr */
1067
ib_uint64_t end_lsn) /* in: end lsn of the mtr */
1071
recv_data_t* recv_data;
1072
recv_data_t** prev_field;
1073
recv_addr_t* recv_addr;
1075
if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
1076
/* The tablespace does not exist any more: do not store the
1082
len = rec_end - body;
1084
recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
1086
recv->len = rec_end - body;
1087
recv->start_lsn = start_lsn;
1088
recv->end_lsn = end_lsn;
1090
recv_addr = recv_get_fil_addr_struct(space, page_no);
1092
if (recv_addr == NULL) {
1093
recv_addr = mem_heap_alloc(recv_sys->heap,
1094
sizeof(recv_addr_t));
1095
recv_addr->space = space;
1096
recv_addr->page_no = page_no;
1097
recv_addr->state = RECV_NOT_PROCESSED;
1099
UT_LIST_INIT(recv_addr->rec_list);
1101
HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
1102
recv_fold(space, page_no), recv_addr);
1103
recv_sys->n_addrs++;
1105
fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
1110
UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
1112
prev_field = &(recv->data);
1114
/* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
1115
recv_sys->heap grows into the buffer pool, and bigger chunks could not
1118
while (rec_end > body) {
1120
len = rec_end - body;
1122
if (len > RECV_DATA_BLOCK_SIZE) {
1123
len = RECV_DATA_BLOCK_SIZE;
1126
recv_data = mem_heap_alloc(recv_sys->heap,
1127
sizeof(recv_data_t) + len);
1128
*prev_field = recv_data;
1130
ut_memcpy(((byte*)recv_data) + sizeof(recv_data_t), body, len);
1132
prev_field = &(recv_data->next);
1140
/*************************************************************************
1141
Copies the log record body from recv to buf. */
1144
recv_data_copy_to_buf(
1145
/*==================*/
1146
byte* buf, /* in: buffer of length at least recv->len */
1147
recv_t* recv) /* in: log record */
1149
recv_data_t* recv_data;
1154
recv_data = recv->data;
1157
if (len > RECV_DATA_BLOCK_SIZE) {
1158
part_len = RECV_DATA_BLOCK_SIZE;
1163
ut_memcpy(buf, ((byte*)recv_data) + sizeof(recv_data_t),
1168
recv_data = recv_data->next;
1172
/****************************************************************************
1173
Applies the hashed log records to the page, if the page lsn is less than the
1174
lsn of a log record. This can be called when a buffer page has just been
1175
read in, or also for a page already in the buffer pool. */
1180
ibool recover_backup,
1181
/* in: TRUE if we are recovering a backup
1182
page: then we do not acquire any latches
1183
since the page was read in outside the
1186
/* in: TRUE if the i/o-handler calls this for
1187
a freshly read page */
1188
buf_block_t* block) /* in: buffer block */
1191
recv_addr_t* recv_addr;
1194
ib_uint64_t start_lsn;
1195
ib_uint64_t end_lsn;
1196
ib_uint64_t page_lsn;
1197
ib_uint64_t page_newest_lsn;
1198
ibool modification_to_page;
1202
mutex_enter(&(recv_sys->mutex));
1204
if (recv_sys->apply_log_recs == FALSE) {
1206
/* Log records should not be applied now */
1208
mutex_exit(&(recv_sys->mutex));
1213
recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block),
1214
buf_block_get_page_no(block));
1216
if ((recv_addr == NULL)
1217
|| (recv_addr->state == RECV_BEING_PROCESSED)
1218
|| (recv_addr->state == RECV_PROCESSED)) {
1220
mutex_exit(&(recv_sys->mutex));
1226
fprintf(stderr, "Recovering space %lu, page %lu\n",
1227
buf_block_get_space(block), buf_block_get_page_no(block));
1230
recv_addr->state = RECV_BEING_PROCESSED;
1232
mutex_exit(&(recv_sys->mutex));
1235
mtr_set_log_mode(&mtr, MTR_LOG_NONE);
1237
page = block->frame;
1239
if (!recover_backup) {
1241
/* Move the ownership of the x-latch on the
1242
page to this OS thread, so that we can acquire
1243
a second x-latch on it. This is needed for the
1244
operations to the page to pass the debug
1247
rw_lock_x_lock_move_ownership(&(block->lock));
1250
success = buf_page_get_known_nowait(RW_X_LATCH, block,
1256
#ifdef UNIV_SYNC_DEBUG
1257
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
1258
#endif /* UNIV_SYNC_DEBUG */
1261
/* Read the newest modification lsn from the page */
1262
page_lsn = mach_read_ull(page + FIL_PAGE_LSN);
1264
if (!recover_backup) {
1265
/* It may be that the page has been modified in the buffer
1266
pool: read the newest modification lsn there */
1269
= buf_page_get_newest_modification(&block->page);
1271
if (page_newest_lsn) {
1273
page_lsn = page_newest_lsn;
1276
/* In recovery from a backup we do not really use the buffer
1279
page_newest_lsn = 0;
1282
modification_to_page = FALSE;
1283
start_lsn = end_lsn = 0;
1285
recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
1288
end_lsn = recv->end_lsn;
1290
if (recv->len > RECV_DATA_BLOCK_SIZE) {
1291
/* We have to copy the record body to a separate
1294
buf = mem_alloc(recv->len);
1296
recv_data_copy_to_buf(buf, recv);
1298
buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
1301
if (recv->type == MLOG_INIT_FILE_PAGE) {
1302
page_lsn = page_newest_lsn;
1304
mach_write_ull(page + UNIV_PAGE_SIZE
1305
- FIL_PAGE_END_LSN_OLD_CHKSUM, 0);
1306
mach_write_ull(page + FIL_PAGE_LSN, 0);
1309
if (recv->start_lsn >= page_lsn) {
1311
if (!modification_to_page) {
1313
modification_to_page = TRUE;
1314
start_lsn = recv->start_lsn;
1318
if (log_debug_writes) {
1320
"InnoDB: Applying log rec"
1322
" to space %lu page no %lu\n",
1323
(ulong) recv->type, (ulong) recv->len,
1324
(ulong) recv_addr->space,
1325
(ulong) recv_addr->page_no);
1327
#endif /* UNIV_DEBUG */
1329
recv_parse_or_apply_log_rec_body(recv->type, buf,
1332
mach_write_ull(page + UNIV_PAGE_SIZE
1333
- FIL_PAGE_END_LSN_OLD_CHKSUM,
1334
recv->start_lsn + recv->len);
1335
mach_write_ull(page + FIL_PAGE_LSN,
1336
recv->start_lsn + recv->len);
1339
if (recv->len > RECV_DATA_BLOCK_SIZE) {
1343
recv = UT_LIST_GET_NEXT(rec_list, recv);
1346
mutex_enter(&(recv_sys->mutex));
1348
if (recv_max_page_lsn < page_lsn) {
1349
recv_max_page_lsn = page_lsn;
1352
recv_addr->state = RECV_PROCESSED;
1354
ut_a(recv_sys->n_addrs);
1355
recv_sys->n_addrs--;
1357
mutex_exit(&(recv_sys->mutex));
1359
if (!recover_backup && modification_to_page) {
1362
buf_flush_recv_note_modification(block, start_lsn, end_lsn);
1365
/* Make sure that committing mtr does not change the modification
1366
lsn values of page */
1368
mtr.modifications = FALSE;
1373
/***********************************************************************
1374
Reads in pages which have hashed log records, from an area around a given
1380
/* out: number of pages found */
1381
ulint space, /* in: space */
1382
ulint zip_size,/* in: compressed page size in bytes, or 0 */
1383
ulint page_no)/* in: page number */
1385
recv_addr_t* recv_addr;
1386
ulint page_nos[RECV_READ_AHEAD_AREA];
1390
low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
1394
for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
1396
recv_addr = recv_get_fil_addr_struct(space, page_no);
1398
if (recv_addr && !buf_page_peek(space, page_no)) {
1400
mutex_enter(&(recv_sys->mutex));
1402
if (recv_addr->state == RECV_NOT_PROCESSED) {
1403
recv_addr->state = RECV_BEING_READ;
1405
page_nos[n] = page_no;
1410
mutex_exit(&(recv_sys->mutex));
1414
buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
1416
fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
1421
/***********************************************************************
1422
Empties the hash table of stored log records, applying them to appropriate
1426
recv_apply_hashed_log_recs(
1427
/*=======================*/
1428
ibool allow_ibuf) /* in: if TRUE, also ibuf operations are
1429
allowed during the application; if FALSE,
1430
no ibuf operations are allowed, and after
1431
the application all file pages are flushed to
1432
disk and invalidated in buffer pool: this
1433
alternative means that no new log records
1434
can be generated during the application;
1435
the caller must in this case own the log
1438
recv_addr_t* recv_addr;
1441
ibool has_printed = FALSE;
1444
mutex_enter(&(recv_sys->mutex));
1446
if (recv_sys->apply_batch_on) {
1448
mutex_exit(&(recv_sys->mutex));
1450
os_thread_sleep(500000);
1455
ut_ad(!allow_ibuf == mutex_own(&log_sys->mutex));
1458
recv_no_ibuf_operations = TRUE;
1461
recv_sys->apply_log_recs = TRUE;
1462
recv_sys->apply_batch_on = TRUE;
1464
for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
1466
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
1469
ulint space = recv_addr->space;
1470
ulint zip_size = fil_space_get_zip_size(space);
1471
ulint page_no = recv_addr->page_no;
1473
if (recv_addr->state == RECV_NOT_PROCESSED) {
1475
ut_print_timestamp(stderr);
1476
fputs(" InnoDB: Starting an"
1477
" apply batch of log records"
1478
" to the database...\n"
1479
"InnoDB: Progress in percents: ",
1484
mutex_exit(&(recv_sys->mutex));
1486
if (buf_page_peek(space, page_no)) {
1491
block = buf_page_get(
1492
space, zip_size, page_no,
1494
#ifdef UNIV_SYNC_DEBUG
1495
buf_block_dbg_add_level(
1496
block, SYNC_NO_ORDER_CHECK);
1497
#endif /* UNIV_SYNC_DEBUG */
1498
recv_recover_page(FALSE, FALSE, block);
1501
recv_read_in_area(space, zip_size,
1505
mutex_enter(&(recv_sys->mutex));
1508
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1512
&& (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
1514
/ hash_get_n_cells(recv_sys->addr_hash)) {
1516
fprintf(stderr, "%lu ", (ulong)
1518
/ hash_get_n_cells(recv_sys->addr_hash)));
1522
/* Wait until all the pages have been processed */
1524
while (recv_sys->n_addrs != 0) {
1526
mutex_exit(&(recv_sys->mutex));
1528
os_thread_sleep(500000);
1530
mutex_enter(&(recv_sys->mutex));
1535
fprintf(stderr, "\n");
1539
/* Flush all the file pages to disk and invalidate them in
1542
mutex_exit(&(recv_sys->mutex));
1543
mutex_exit(&(log_sys->mutex));
1545
n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
1547
ut_a(n_pages != ULINT_UNDEFINED);
1549
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
1551
buf_pool_invalidate();
1553
mutex_enter(&(log_sys->mutex));
1554
mutex_enter(&(recv_sys->mutex));
1556
recv_no_ibuf_operations = FALSE;
1559
recv_sys->apply_log_recs = FALSE;
1560
recv_sys->apply_batch_on = FALSE;
1562
recv_sys_empty_hash();
1565
fprintf(stderr, "InnoDB: Apply batch completed\n");
1568
mutex_exit(&(recv_sys->mutex));
1571
#ifdef UNIV_HOTBACKUP
1572
/***********************************************************************
1573
Applies log records in the hash table to a backup. */
1576
recv_apply_log_recs_for_backup(void)
1577
/*================================*/
1579
recv_addr_t* recv_addr;
1587
recv_sys->apply_log_recs = TRUE;
1588
recv_sys->apply_batch_on = TRUE;
1590
block = buf_LRU_get_free_block(UNIV_PAGE_SIZE);
1592
fputs("InnoDB: Starting an apply batch of log records"
1593
" to the database...\n"
1594
"InnoDB: Progress in percents: ", stderr);
1596
n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
1598
for (i = 0; i < n_hash_cells; i++) {
1599
/* The address hash table is externally chained */
1600
recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
1602
while (recv_addr != NULL) {
1605
= fil_space_get_zip_size(recv_addr->space);
1607
if (zip_size == ULINT_UNDEFINED) {
1610
"InnoDB: Warning: cannot apply"
1612
" tablespace %lu page %lu,\n"
1613
"InnoDB: because tablespace with"
1614
" that id does not exist.\n",
1615
recv_addr->space, recv_addr->page_no);
1617
recv_addr->state = RECV_PROCESSED;
1619
ut_a(recv_sys->n_addrs);
1620
recv_sys->n_addrs--;
1622
goto skip_this_recv_addr;
1625
/* We simulate a page read made by the buffer pool, to
1626
make sure the recovery apparatus works ok. We must init
1629
buf_page_init_for_backup_restore(
1630
recv_addr->space, recv_addr->page_no,
1633
/* Extend the tablespace's last file if the page_no
1634
does not fall inside its bounds; we assume the last
1635
file is auto-extending, and ibbackup copied the file
1636
when it still was smaller */
1638
success = fil_extend_space_to_desired_size(
1640
recv_addr->space, recv_addr->page_no + 1);
1643
"InnoDB: Fatal error: cannot extend"
1644
" tablespace %lu to hold %lu pages\n",
1645
recv_addr->space, recv_addr->page_no);
1650
/* Read the page from the tablespace file using the
1651
fil0fil.c routines */
1654
error = fil_io(OS_FILE_READ, TRUE,
1655
recv_addr->space, zip_size,
1656
recv_addr->page_no, 0, zip_size,
1657
block->page.zip.data, NULL);
1659
error = fil_io(OS_FILE_READ, TRUE,
1660
recv_addr->space, 0,
1661
recv_addr->page_no, 0,
1663
block->frame, NULL);
1666
if (error != DB_SUCCESS) {
1668
"InnoDB: Fatal error: cannot read"
1670
" %lu page number %lu\n",
1671
(ulong) recv_addr->space,
1672
(ulong) recv_addr->page_no);
1677
/* Apply the log records to this page */
1678
recv_recover_page(TRUE, FALSE, block);
1680
/* Write the page back to the tablespace file using the
1681
fil0fil.c routines */
1683
buf_flush_init_for_writing(
1684
block->frame, buf_block_get_page_zip(block),
1685
mach_read_ull(block->frame + FIL_PAGE_LSN));
1688
error = fil_io(OS_FILE_WRITE, TRUE,
1689
recv_addr->space, zip_size,
1690
recv_addr->page_no, 0,
1692
block->page.zip.data, NULL);
1694
error = fil_io(OS_FILE_WRITE, TRUE,
1695
recv_addr->space, 0,
1696
recv_addr->page_no, 0,
1698
block->frame, NULL);
1700
skip_this_recv_addr:
1701
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1704
if ((100 * i) / n_hash_cells
1705
!= (100 * (i + 1)) / n_hash_cells) {
1706
fprintf(stderr, "%lu ",
1707
(ulong) ((100 * i) / n_hash_cells));
1712
buf_block_free(block);
1713
recv_sys_empty_hash();
1715
#endif /* UNIV_HOTBACKUP */
1717
/***********************************************************************
1718
Tries to parse a single log record and returns its length. */
1723
/* out: length of the record, or 0 if the record was
1725
byte* ptr, /* in: pointer to a buffer */
1726
byte* end_ptr,/* in: pointer to the buffer end */
1727
byte* type, /* out: type */
1728
ulint* space, /* out: space id */
1729
ulint* page_no,/* out: page number */
1730
byte** body) /* out: log record body start */
1736
if (ptr == end_ptr) {
1741
if (*ptr == MLOG_MULTI_REC_END) {
1748
if (*ptr == MLOG_DUMMY_RECORD) {
1751
*space = ULINT_UNDEFINED - 1; /* For debugging */
1756
new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
1760
if (UNIV_UNLIKELY(!new_ptr)) {
1765
/* Check that page_no is sensible */
1767
if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
1769
recv_sys->found_corrupt_log = TRUE;
1774
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
1776
if (UNIV_UNLIKELY(new_ptr == NULL)) {
1781
if (*page_no > recv_max_parsed_page_no) {
1782
recv_max_parsed_page_no = *page_no;
1785
return(new_ptr - ptr);
1788
/***********************************************************
1789
Calculates the new value for lsn when more data is added to the log. */
1792
recv_calc_lsn_on_data_add(
1793
/*======================*/
1794
ib_uint64_t lsn, /* in: old lsn */
1795
ib_uint64_t len) /* in: this many bytes of data is
1796
added, log block headers not included */
1801
frag_len = (((ulint) lsn) % OS_FILE_LOG_BLOCK_SIZE)
1802
- LOG_BLOCK_HDR_SIZE;
1803
ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
1804
- LOG_BLOCK_TRL_SIZE);
1805
lsn_len = (ulint) len;
1806
lsn_len += (lsn_len + frag_len)
1807
/ (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
1808
- LOG_BLOCK_TRL_SIZE)
1809
* (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
1811
return(lsn + lsn_len);
1814
#ifdef UNIV_LOG_DEBUG
1815
/***********************************************************
1816
Checks that the parser recognizes incomplete initial segments of a log
1817
record as incomplete. */
1820
recv_check_incomplete_log_recs(
1821
/*===========================*/
1822
byte* ptr, /* in: pointer to a complete log record */
1823
ulint len) /* in: length of the log record */
1831
for (i = 0; i < len; i++) {
1832
ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
1836
#endif /* UNIV_LOG_DEBUG */
1838
/***********************************************************
1839
Prints diagnostic info of corrupt log. */
1842
recv_report_corrupt_log(
1843
/*====================*/
1844
byte* ptr, /* in: pointer to corrupt log record */
1845
byte type, /* in: type of the record */
1846
ulint space, /* in: space id, this may also be garbage */
1847
ulint page_no)/* in: page number, this may also be garbage */
1850
"InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
1851
"InnoDB: Log record type %lu, space id %lu, page number %lu\n"
1852
"InnoDB: Log parsing proceeded successfully up to %"PRIu64"\n"
1853
"InnoDB: Previous log record type %lu, is multi %lu\n"
1854
"InnoDB: Recv offset %lu, prev %lu\n",
1855
(ulong) type, (ulong) space, (ulong) page_no,
1856
recv_sys->recovered_lsn,
1857
(ulong) recv_previous_parsed_rec_type,
1858
(ulong) recv_previous_parsed_rec_is_multi,
1859
(ulong) (ptr - recv_sys->buf),
1860
(ulong) recv_previous_parsed_rec_offset);
1862
if ((ulint)(ptr - recv_sys->buf + 100)
1863
> recv_previous_parsed_rec_offset
1864
&& (ulint)(ptr - recv_sys->buf + 100
1865
- recv_previous_parsed_rec_offset)
1867
fputs("InnoDB: Hex dump of corrupt log starting"
1868
" 100 bytes before the start\n"
1869
"InnoDB: of the previous log rec,\n"
1870
"InnoDB: and ending 100 bytes after the start"
1871
" of the corrupt rec:\n",
1874
ut_print_buf(stderr,
1876
+ recv_previous_parsed_rec_offset - 100,
1877
ptr - recv_sys->buf + 200
1878
- recv_previous_parsed_rec_offset);
1882
fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
1883
"InnoDB: is possible that the log scan did not proceed\n"
1884
"InnoDB: far enough in recovery! Please run CHECK TABLE\n"
1885
"InnoDB: on your InnoDB tables to check that they are ok!\n"
1886
"InnoDB: If mysqld crashes after this recovery, look at\n"
1887
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
1888
"forcing-recovery.html\n"
1889
"InnoDB: about forcing recovery.\n", stderr);
1894
/***********************************************************
1895
Parses log records from a buffer and stores them to a hash table to wait
1896
merging to file pages. */
1899
recv_parse_log_recs(
1900
/*================*/
1901
/* out: currently always returns FALSE */
1902
ibool store_to_hash) /* in: TRUE if the records should be stored
1903
to the hash table; this is set to FALSE if just
1904
debug checking is needed */
1911
ib_uint64_t new_recovered_lsn;
1912
ib_uint64_t old_lsn;
1919
ut_ad(mutex_own(&(log_sys->mutex)));
1920
ut_ad(recv_sys->parse_start_lsn != 0);
1922
ptr = recv_sys->buf + recv_sys->recovered_offset;
1924
end_ptr = recv_sys->buf + recv_sys->len;
1926
if (ptr == end_ptr) {
1931
single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
1933
if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
1934
/* The mtr only modified a single page, or this is a file op */
1936
old_lsn = recv_sys->recovered_lsn;
1938
/* Try to parse a log record, fetching its type, space id,
1939
page no, and a pointer to the body of the log record */
1941
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
1944
if (len == 0 || recv_sys->found_corrupt_log) {
1945
if (recv_sys->found_corrupt_log) {
1947
recv_report_corrupt_log(ptr,
1948
type, space, page_no);
1954
new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
1956
if (new_recovered_lsn > recv_sys->scanned_lsn) {
1957
/* The log record filled a log block, and we require
1958
that also the next log block should have been scanned
1964
recv_previous_parsed_rec_type = (ulint)type;
1965
recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
1966
recv_previous_parsed_rec_is_multi = 0;
1968
recv_sys->recovered_offset += len;
1969
recv_sys->recovered_lsn = new_recovered_lsn;
1972
if (log_debug_writes) {
1974
"InnoDB: Parsed a single log rec"
1975
" type %lu len %lu space %lu page no %lu\n",
1976
(ulong) type, (ulong) len, (ulong) space,
1979
#endif /* UNIV_DEBUG */
1981
if (type == MLOG_DUMMY_RECORD) {
1984
} else if (!store_to_hash) {
1985
/* In debug checking, update a replicate page
1986
according to the log record, and check that it
1987
becomes identical with the original page */
1988
#ifdef UNIV_LOG_DEBUG
1989
recv_check_incomplete_log_recs(ptr, len);
1990
#endif/* UNIV_LOG_DEBUG */
1992
} else if (type == MLOG_FILE_CREATE
1993
|| type == MLOG_FILE_CREATE2
1994
|| type == MLOG_FILE_RENAME
1995
|| type == MLOG_FILE_DELETE) {
1997
#ifdef UNIV_HOTBACKUP
1998
if (recv_replay_file_ops) {
2000
/* In ibbackup --apply-log, replay an .ibd file
2001
operation, if possible; note that
2002
fil_path_to_mysql_datadir is set in ibbackup to
2003
point to the datadir we should use there */
2005
if (NULL == fil_op_log_parse_or_replay(
2006
body, end_ptr, type, space)) {
2008
"InnoDB: Error: file op"
2009
" log record of type %lu"
2010
" space %lu not complete in\n"
2011
"InnoDB: the replay phase."
2020
/* In normal mysqld crash recovery we do not try to
2021
replay file operations */
2023
recv_add_to_hash_table(type, space, page_no, body,
2025
recv_sys->recovered_lsn);
2028
/* Check that all the records associated with the single mtr
2029
are included within the buffer */
2035
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
2037
if (len == 0 || recv_sys->found_corrupt_log) {
2039
if (recv_sys->found_corrupt_log) {
2041
recv_report_corrupt_log(
2042
ptr, type, space, page_no);
2048
recv_previous_parsed_rec_type = (ulint)type;
2049
recv_previous_parsed_rec_offset
2050
= recv_sys->recovered_offset + total_len;
2051
recv_previous_parsed_rec_is_multi = 1;
2053
if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
2054
#ifdef UNIV_LOG_DEBUG
2055
recv_check_incomplete_log_recs(ptr, len);
2056
#endif /* UNIV_LOG_DEBUG */
2060
if (log_debug_writes) {
2062
"InnoDB: Parsed a multi log rec"
2064
" space %lu page no %lu\n",
2065
(ulong) type, (ulong) len,
2066
(ulong) space, (ulong) page_no);
2068
#endif /* UNIV_DEBUG */
2075
if (type == MLOG_MULTI_REC_END) {
2077
/* Found the end mark for the records */
2083
new_recovered_lsn = recv_calc_lsn_on_data_add(
2084
recv_sys->recovered_lsn, total_len);
2086
if (new_recovered_lsn > recv_sys->scanned_lsn) {
2087
/* The log record filled a log block, and we require
2088
that also the next log block should have been scanned
2094
/* Add all the records to the hash table */
2096
ptr = recv_sys->buf + recv_sys->recovered_offset;
2099
old_lsn = recv_sys->recovered_lsn;
2100
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
2102
if (recv_sys->found_corrupt_log) {
2104
recv_report_corrupt_log(ptr,
2105
type, space, page_no);
2109
ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
2111
recv_sys->recovered_offset += len;
2112
recv_sys->recovered_lsn
2113
= recv_calc_lsn_on_data_add(old_lsn, len);
2114
if (type == MLOG_MULTI_REC_END) {
2116
/* Found the end mark for the records */
2121
if (store_to_hash) {
2122
recv_add_to_hash_table(type, space, page_no,
2135
/***********************************************************
2136
Adds data from a new log block to the parsing buffer of recv_sys if
2137
recv_sys->parse_start_lsn is non-zero. */
2140
recv_sys_add_to_parsing_buf(
2141
/*========================*/
2142
/* out: TRUE if more data added */
2143
byte* log_block, /* in: log block */
2144
ib_uint64_t scanned_lsn) /* in: lsn of how far we were able
2145
to find data in this log block */
2152
ut_ad(scanned_lsn >= recv_sys->scanned_lsn);
2154
if (!recv_sys->parse_start_lsn) {
2155
/* Cannot start parsing yet because no start point for
2161
data_len = log_block_get_data_len(log_block);
2163
if (recv_sys->parse_start_lsn >= scanned_lsn) {
2167
} else if (recv_sys->scanned_lsn >= scanned_lsn) {
2171
} else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) {
2172
more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn);
2174
more_len = (ulint) (scanned_lsn - recv_sys->scanned_lsn);
2177
if (more_len == 0) {
2182
ut_ad(data_len >= more_len);
2184
start_offset = data_len - more_len;
2186
if (start_offset < LOG_BLOCK_HDR_SIZE) {
2187
start_offset = LOG_BLOCK_HDR_SIZE;
2190
end_offset = data_len;
2192
if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
2193
end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
2196
ut_ad(start_offset <= end_offset);
2198
if (start_offset < end_offset) {
2199
ut_memcpy(recv_sys->buf + recv_sys->len,
2200
log_block + start_offset, end_offset - start_offset);
2202
recv_sys->len += end_offset - start_offset;
2204
ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
2210
/***********************************************************
2211
Moves the parsing buffer data left to the buffer start. */
2214
recv_sys_justify_left_parsing_buf(void)
2215
/*===================================*/
2217
ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
2218
recv_sys->len - recv_sys->recovered_offset);
2220
recv_sys->len -= recv_sys->recovered_offset;
2222
recv_sys->recovered_offset = 0;
2225
/***********************************************************
2226
Scans log from a buffer and stores new log data to the parsing buffer. Parses
2227
and hashes the log records if new data found. */
2232
/* out: TRUE if limit_lsn has been
2233
reached, or not able to scan any more
2234
in this log group */
2235
ibool apply_automatically,/* in: TRUE if we want this
2236
function to apply log records
2237
automatically when the hash table
2238
becomes full; in the hot backup tool
2239
the tool does the applying, not this
2241
ulint available_memory,/* in: we let the hash table of recs
2242
to grow to this size, at the maximum */
2243
ibool store_to_hash, /* in: TRUE if the records should be
2244
stored to the hash table; this is set
2245
to FALSE if just debug checking is
2247
byte* buf, /* in: buffer containing a log segment
2249
ulint len, /* in: buffer length */
2250
ib_uint64_t start_lsn, /* in: buffer start lsn */
2251
ib_uint64_t* contiguous_lsn, /* in/out: it is known that all log
2252
groups contain contiguous log data up
2254
ib_uint64_t* group_scanned_lsn)/* out: scanning succeeded up to
2259
ib_uint64_t scanned_lsn;
2264
ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
2265
ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
2267
ut_a(apply_automatically <= TRUE);
2268
ut_a(store_to_hash <= TRUE);
2273
scanned_lsn = start_lsn;
2276
while (log_block < buf + len && !finished) {
2278
no = log_block_get_hdr_no(log_block);
2280
fprintf(stderr, "Log block header no %lu\n", no);
2282
fprintf(stderr, "Scanned lsn no %lu\n",
2283
log_block_convert_lsn_to_no(scanned_lsn));
2285
if (no != log_block_convert_lsn_to_no(scanned_lsn)
2286
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
2288
if (no == log_block_convert_lsn_to_no(scanned_lsn)
2289
&& !log_block_checksum_is_ok_or_old_format(
2292
"InnoDB: Log block no %lu at"
2293
" lsn %"PRIu64" has\n"
2294
"InnoDB: ok header, but checksum field"
2295
" contains %lu, should be %lu\n",
2298
(ulong) log_block_get_checksum(
2300
(ulong) log_block_calc_checksum(
2304
/* Garbage or an incompletely written log block */
2311
if (log_block_get_flush_bit(log_block)) {
2312
/* This block was a start of a log flush operation:
2313
we know that the previous flush operation must have
2314
been completed for all log groups before this block
2315
can have been flushed to any of the groups. Therefore,
2316
we know that log data is contiguous up to scanned_lsn
2317
in all non-corrupt log groups. */
2319
if (scanned_lsn > *contiguous_lsn) {
2320
*contiguous_lsn = scanned_lsn;
2324
data_len = log_block_get_data_len(log_block);
2326
if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
2327
&& scanned_lsn + data_len > recv_sys->scanned_lsn
2328
&& (recv_sys->scanned_checkpoint_no > 0)
2329
&& (log_block_get_checkpoint_no(log_block)
2330
< recv_sys->scanned_checkpoint_no)
2331
&& (recv_sys->scanned_checkpoint_no
2332
- log_block_get_checkpoint_no(log_block)
2335
/* Garbage from a log buffer flush which was made
2336
before the most recent database recovery */
2339
#ifdef UNIV_LOG_DEBUG
2340
/* This is not really an error, but currently
2341
we stop here in the debug version: */
2348
if (!recv_sys->parse_start_lsn
2349
&& (log_block_get_first_rec_group(log_block) > 0)) {
2351
/* We found a point from which to start the parsing
2354
recv_sys->parse_start_lsn = scanned_lsn
2355
+ log_block_get_first_rec_group(log_block);
2356
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
2357
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
2360
scanned_lsn += data_len;
2362
if (scanned_lsn > recv_sys->scanned_lsn) {
2364
/* We have found more entries. If this scan is
2365
of startup type, we must initiate crash recovery
2366
environment before parsing these log records. */
2368
if (recv_log_scan_is_startup_type
2369
&& !recv_needed_recovery) {
2372
"InnoDB: Log scan progressed"
2373
" past the checkpoint lsn %"PRIu64"\n",
2374
recv_sys->scanned_lsn);
2375
recv_init_crash_recovery();
2378
/* We were able to find more log data: add it to the
2379
parsing buffer if parse_start_lsn is already
2382
if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
2383
>= RECV_PARSING_BUF_SIZE) {
2385
"InnoDB: Error: log parsing"
2387
" Recovery may have failed!\n");
2389
recv_sys->found_corrupt_log = TRUE;
2391
} else if (!recv_sys->found_corrupt_log) {
2392
more_data = recv_sys_add_to_parsing_buf(
2393
log_block, scanned_lsn);
2396
recv_sys->scanned_lsn = scanned_lsn;
2397
recv_sys->scanned_checkpoint_no
2398
= log_block_get_checkpoint_no(log_block);
2401
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
2402
/* Log data for this group ends here */
2406
log_block += OS_FILE_LOG_BLOCK_SIZE;
2410
*group_scanned_lsn = scanned_lsn;
2412
if (recv_needed_recovery
2413
|| (recv_is_from_backup && !recv_is_making_a_backup)) {
2414
recv_scan_print_counter++;
2416
if (finished || (recv_scan_print_counter % 80 == 0)) {
2419
"InnoDB: Doing recovery: scanned up to"
2420
" log sequence number %"PRIu64"\n",
2421
*group_scanned_lsn);
2425
if (more_data && !recv_sys->found_corrupt_log) {
2426
/* Try to parse more log records */
2428
recv_parse_log_recs(store_to_hash);
2430
if (store_to_hash && mem_heap_get_size(recv_sys->heap)
2432
&& apply_automatically) {
2434
/* Hash table of log records has grown too big:
2435
empty it; FALSE means no ibuf operations
2436
allowed, as we cannot add new records to the
2437
log yet: they would be produced by ibuf
2440
recv_apply_hashed_log_recs(FALSE);
2443
if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
2444
/* Move parsing buffer data to the buffer start */
2446
recv_sys_justify_left_parsing_buf();
2453
/***********************************************************
2454
Scans log from a buffer and stores new log data to the parsing buffer. Parses
2455
and hashes the log records if new data found. */
2458
recv_group_scan_log_recs(
2459
/*=====================*/
2460
log_group_t* group, /* in: log group */
2461
ib_uint64_t* contiguous_lsn, /* in/out: it is known that all log
2462
groups contain contiguous log data up
2464
ib_uint64_t* group_scanned_lsn)/* out: scanning succeeded up to
2468
ib_uint64_t start_lsn;
2469
ib_uint64_t end_lsn;
2473
start_lsn = *contiguous_lsn;
2476
end_lsn = start_lsn + RECV_SCAN_SIZE;
2478
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
2479
group, start_lsn, end_lsn);
2481
finished = recv_scan_log_recs(
2482
TRUE, (buf_pool->curr_size - recv_n_pool_free_frames)
2483
* UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE,
2484
start_lsn, contiguous_lsn, group_scanned_lsn);
2485
start_lsn = end_lsn;
2489
if (log_debug_writes) {
2491
"InnoDB: Scanned group %lu up to"
2492
" log sequence number %"PRIu64"\n",
2494
*group_scanned_lsn);
2496
#endif /* UNIV_DEBUG */
2499
/***********************************************************
2500
Initialize crash recovery environment. Can be called iff
2501
recv_needed_recovery == FALSE. */
2504
recv_init_crash_recovery(void)
2505
/*==========================*/
2507
ut_a(!recv_needed_recovery);
2509
recv_needed_recovery = TRUE;
2511
ut_print_timestamp(stderr);
2514
" InnoDB: Database was not"
2515
" shut down normally!\n"
2516
"InnoDB: Starting crash recovery.\n");
2519
"InnoDB: Reading tablespace information"
2520
" from the .ibd files...\n");
2522
fil_load_single_table_tablespaces();
2524
/* If we are using the doublewrite method, we will
2525
check if there are half-written pages in data files,
2526
and restore them from the doublewrite buffer if
2529
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2532
"InnoDB: Restoring possible"
2533
" half-written data pages from"
2534
" the doublewrite\n"
2535
"InnoDB: buffer...\n");
2536
trx_sys_doublewrite_init_or_restore_pages(TRUE);
2540
/************************************************************
2541
Recovers from a checkpoint. When this function returns, the database is able
2542
to start processing of new user transactions, but the function
2543
recv_recovery_from_checkpoint_finish should be called later to complete
2544
the recovery and free the resources used in it. */
2547
recv_recovery_from_checkpoint_start(
2548
/*================================*/
2549
/* out: error code or DB_SUCCESS */
2550
ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */
2551
ib_uint64_t limit_lsn, /* in: recover up to this lsn
2553
ib_uint64_t min_flushed_lsn,/* in: min flushed lsn from
2555
ib_uint64_t max_flushed_lsn)/* in: max flushed lsn from
2559
log_group_t* max_cp_group;
2560
log_group_t* up_to_date_group;
2562
ib_uint64_t checkpoint_lsn;
2563
ib_uint64_t checkpoint_no;
2564
ib_uint64_t old_scanned_lsn;
2565
ib_uint64_t group_scanned_lsn;
2566
ib_uint64_t contiguous_lsn;
2567
ib_uint64_t archived_lsn;
2570
byte log_hdr_buf[LOG_FILE_HDR_SIZE];
2573
ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
2575
if (type == LOG_CHECKPOINT) {
2577
recv_sys_init(FALSE, buf_pool_get_curr_size());
2580
if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
2582
"InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n");
2584
"InnoDB: Skipping log redo\n");
2589
recv_recovery_on = TRUE;
2591
recv_sys->limit_lsn = limit_lsn;
2593
mutex_enter(&(log_sys->mutex));
2595
/* Look for the latest checkpoint from any of the log groups */
2597
err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
2599
if (err != DB_SUCCESS) {
2601
mutex_exit(&(log_sys->mutex));
2606
log_group_read_checkpoint_info(max_cp_group, max_cp_field);
2608
buf = log_sys->checkpoint_buf;
2610
checkpoint_lsn = mach_read_ull(buf + LOG_CHECKPOINT_LSN);
2611
checkpoint_no = mach_read_ull(buf + LOG_CHECKPOINT_NO);
2612
archived_lsn = mach_read_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
2614
/* Read the first log file header to print a note if this is
2615
a recovery from a restored InnoDB Hot Backup */
2617
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id, 0,
2618
0, 0, LOG_FILE_HDR_SIZE,
2619
log_hdr_buf, max_cp_group);
2621
if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
2622
(byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
2623
/* This log file was created by ibbackup --restore: print
2624
a note to the user about it */
2627
"InnoDB: The log file was created by"
2628
" ibbackup --apply-log at\n"
2630
log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
2632
"InnoDB: NOTE: the following crash recovery"
2633
" is part of a normal restore.\n");
2635
/* Wipe over the label now */
2637
memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
2639
/* Write to the log file to wipe over the label */
2640
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
2641
max_cp_group->space_id, 0,
2642
0, 0, OS_FILE_LOG_BLOCK_SIZE,
2643
log_hdr_buf, max_cp_group);
2646
#ifdef UNIV_LOG_ARCHIVE
2647
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2650
log_checkpoint_get_nth_group_info(buf, group->id,
2651
&(group->archived_file_no),
2652
&(group->archived_offset));
2654
group = UT_LIST_GET_NEXT(log_groups, group);
2656
#endif /* UNIV_LOG_ARCHIVE */
2658
if (type == LOG_CHECKPOINT) {
2659
/* Start reading the log groups from the checkpoint lsn up. The
2660
variable contiguous_lsn contains an lsn up to which the log is
2661
known to be contiguously written to all log groups. */
2663
recv_sys->parse_start_lsn = checkpoint_lsn;
2664
recv_sys->scanned_lsn = checkpoint_lsn;
2665
recv_sys->scanned_checkpoint_no = 0;
2666
recv_sys->recovered_lsn = checkpoint_lsn;
2668
srv_start_lsn = checkpoint_lsn;
2671
contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
2672
OS_FILE_LOG_BLOCK_SIZE);
2673
if (type == LOG_ARCHIVE) {
2674
/* Try to recover the remaining part from logs: first from
2675
the logs of the archived group */
2677
group = recv_sys->archive_group;
2678
capacity = log_group_get_capacity(group);
2680
if (recv_sys->scanned_lsn > checkpoint_lsn + capacity
2681
|| checkpoint_lsn > recv_sys->scanned_lsn + capacity) {
2683
mutex_exit(&(log_sys->mutex));
2685
/* The group does not contain enough log: probably
2686
an archived log file was missing or corrupt */
2691
recv_group_scan_log_recs(group, &contiguous_lsn,
2692
&group_scanned_lsn);
2693
if (recv_sys->scanned_lsn < checkpoint_lsn) {
2695
mutex_exit(&(log_sys->mutex));
2697
/* The group did not contain enough log: an archived
2698
log file was missing or invalid, or the log group
2704
group->scanned_lsn = group_scanned_lsn;
2705
up_to_date_group = group;
2707
up_to_date_group = max_cp_group;
2710
ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
2712
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2714
if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
2715
group = UT_LIST_GET_NEXT(log_groups, group);
2718
/* Set the flag to publish that we are doing startup scan. */
2719
recv_log_scan_is_startup_type = (type == LOG_CHECKPOINT);
2721
old_scanned_lsn = recv_sys->scanned_lsn;
2723
recv_group_scan_log_recs(group, &contiguous_lsn,
2724
&group_scanned_lsn);
2725
group->scanned_lsn = group_scanned_lsn;
2727
if (old_scanned_lsn < group_scanned_lsn) {
2728
/* We found a more up-to-date group */
2730
up_to_date_group = group;
2733
if ((type == LOG_ARCHIVE)
2734
&& (group == recv_sys->archive_group)) {
2735
group = UT_LIST_GET_NEXT(log_groups, group);
2738
group = UT_LIST_GET_NEXT(log_groups, group);
2741
/* Done with startup scan. Clear the flag. */
2742
recv_log_scan_is_startup_type = FALSE;
2743
if (type == LOG_CHECKPOINT) {
2744
/* NOTE: we always do a 'recovery' at startup, but only if
2745
there is something wrong we will print a message to the
2746
user about recovery: */
2748
if (checkpoint_lsn != max_flushed_lsn
2749
|| checkpoint_lsn != min_flushed_lsn) {
2751
if (checkpoint_lsn < max_flushed_lsn) {
2753
"InnoDB: #########################"
2754
"#################################\n"
2757
"InnoDB: The log sequence number"
2758
" in ibdata files is higher\n"
2759
"InnoDB: than the log sequence number"
2760
" in the ib_logfiles! Are you sure\n"
2761
"InnoDB: you are using the right"
2762
" ib_logfiles to start up"
2764
"InnoDB: Log sequence number in"
2765
" ib_logfiles is %"PRIu64", log\n"
2766
"InnoDB: sequence numbers stamped"
2767
" to ibdata file headers are between\n"
2768
"InnoDB: %"PRIu64" and %"PRIu64".\n"
2769
"InnoDB: #########################"
2770
"#################################\n",
2776
if (!recv_needed_recovery) {
2778
"InnoDB: The log sequence number"
2779
" in ibdata files does not match\n"
2780
"InnoDB: the log sequence number"
2781
" in the ib_logfiles!\n");
2782
recv_init_crash_recovery();
2786
if (!recv_needed_recovery) {
2787
/* Init the doublewrite buffer memory structure */
2788
trx_sys_doublewrite_init_or_restore_pages(FALSE);
2792
/* We currently have only one log group */
2793
if (group_scanned_lsn < checkpoint_lsn) {
2794
ut_print_timestamp(stderr);
2796
" InnoDB: ERROR: We were only able to scan the log"
2798
"InnoDB: %"PRIu64", but a checkpoint was at %"PRIu64".\n"
2799
"InnoDB: It is possible that"
2800
" the database is now corrupt!\n",
2805
if (group_scanned_lsn < recv_max_page_lsn) {
2806
ut_print_timestamp(stderr);
2808
" InnoDB: ERROR: We were only able to scan the log"
2809
" up to %"PRIu64"\n"
2810
"InnoDB: but a database page a had an lsn %"PRIu64"."
2811
" It is possible that the\n"
2812
"InnoDB: database is now corrupt!\n",
2817
if (recv_sys->recovered_lsn < checkpoint_lsn) {
2819
mutex_exit(&(log_sys->mutex));
2821
if (recv_sys->recovered_lsn >= limit_lsn) {
2831
/* Synchronize the uncorrupted log groups to the most up-to-date log
2832
group; we also copy checkpoint info to groups */
2834
log_sys->next_checkpoint_lsn = checkpoint_lsn;
2835
log_sys->next_checkpoint_no = checkpoint_no + 1;
2837
#ifdef UNIV_LOG_ARCHIVE
2838
log_sys->archived_lsn = archived_lsn;
2839
#endif /* UNIV_LOG_ARCHIVE */
2841
recv_synchronize_groups(up_to_date_group);
2843
if (!recv_needed_recovery) {
2844
ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
2846
srv_start_lsn = recv_sys->recovered_lsn;
2849
log_sys->lsn = recv_sys->recovered_lsn;
2851
ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
2853
log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
2854
log_sys->buf_next_to_write = log_sys->buf_free;
2855
log_sys->written_to_some_lsn = log_sys->lsn;
2856
log_sys->written_to_all_lsn = log_sys->lsn;
2858
log_sys->last_checkpoint_lsn = checkpoint_lsn;
2860
log_sys->next_checkpoint_no = checkpoint_no + 1;
2862
#ifdef UNIV_LOG_ARCHIVE
2863
if (archived_lsn == IB_ULONGLONG_MAX) {
2865
log_sys->archiving_state = LOG_ARCH_OFF;
2867
#endif /* UNIV_LOG_ARCHIVE */
2869
mutex_enter(&(recv_sys->mutex));
2871
recv_sys->apply_log_recs = TRUE;
2873
mutex_exit(&(recv_sys->mutex));
2875
mutex_exit(&(log_sys->mutex));
2877
recv_lsn_checks_on = TRUE;
2879
/* The database is now ready to start almost normal processing of user
2880
transactions: transaction rollbacks and the application of the log
2881
records in the hash table can be run in background. */
2886
/************************************************************
2887
Completes recovery from a checkpoint. */
2890
recv_recovery_from_checkpoint_finish(void)
2891
/*======================================*/
2895
/* Apply the hashed log records to the respective file pages */
2897
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2899
recv_apply_hashed_log_recs(TRUE);
2903
if (log_debug_writes) {
2905
"InnoDB: Log records applied to the database\n");
2907
#endif /* UNIV_DEBUG */
2909
if (recv_needed_recovery) {
2910
trx_sys_print_mysql_master_log_pos();
2911
trx_sys_print_mysql_binlog_offset();
2914
if (recv_sys->found_corrupt_log) {
2917
"InnoDB: WARNING: the log file may have been"
2919
"InnoDB: is possible that the log scan or parsing"
2920
" did not proceed\n"
2921
"InnoDB: far enough in recovery. Please run"
2923
"InnoDB: on your InnoDB tables to check that"
2925
"InnoDB: It may be safest to recover your"
2926
" InnoDB database from\n"
2927
"InnoDB: a backup!\n");
2930
/* Free the resources of the recovery system */
2932
recv_recovery_on = FALSE;
2934
#ifndef UNIV_LOG_DEBUG
2938
/* Drop partially created indexes. */
2939
row_merge_drop_temp_indexes();
2941
#ifdef UNIV_SYNC_DEBUG
2942
/* Wait for a while so that created threads have time to suspend
2943
themselves before we switch the latching order checks on */
2944
os_thread_sleep(1000000);
2946
/* Switch latching order checks on in sync0sync.c */
2947
sync_order_checks_on = TRUE;
2949
if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
2950
/* Rollback the uncommitted transactions which have no user
2953
os_thread_create(trx_rollback_or_clean_all_recovered,
2958
/**********************************************************
2959
Resets the logs. The contents of log files will be lost! */
2964
ib_uint64_t lsn, /* in: reset to this lsn
2965
rounded up to be divisible by
2966
OS_FILE_LOG_BLOCK_SIZE, after
2968
LOG_BLOCK_HDR_SIZE */
2969
#ifdef UNIV_LOG_ARCHIVE
2970
ulint arch_log_no, /* in: next archived log file number */
2971
#endif /* UNIV_LOG_ARCHIVE */
2972
ibool new_logs_created)/* in: TRUE if resetting logs
2973
is done at the log creation;
2974
FALSE if it is done after
2979
ut_ad(mutex_own(&(log_sys->mutex)));
2981
log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
2983
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2986
group->lsn = log_sys->lsn;
2987
group->lsn_offset = LOG_FILE_HDR_SIZE;
2988
#ifdef UNIV_LOG_ARCHIVE
2989
group->archived_file_no = arch_log_no;
2990
group->archived_offset = 0;
2991
#endif /* UNIV_LOG_ARCHIVE */
2993
if (!new_logs_created) {
2994
recv_truncate_group(group, group->lsn, group->lsn,
2995
group->lsn, group->lsn);
2998
group = UT_LIST_GET_NEXT(log_groups, group);
3001
log_sys->buf_next_to_write = 0;
3002
log_sys->written_to_some_lsn = log_sys->lsn;
3003
log_sys->written_to_all_lsn = log_sys->lsn;
3005
log_sys->next_checkpoint_no = 0;
3006
log_sys->last_checkpoint_lsn = 0;
3008
#ifdef UNIV_LOG_ARCHIVE
3009
log_sys->archived_lsn = log_sys->lsn;
3010
#endif /* UNIV_LOG_ARCHIVE */
3012
log_block_init(log_sys->buf, log_sys->lsn);
3013
log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
3015
log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
3016
log_sys->lsn += LOG_BLOCK_HDR_SIZE;
3018
mutex_exit(&(log_sys->mutex));
3020
/* Reset the checkpoint fields in logs */
3022
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
3023
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
3025
mutex_enter(&(log_sys->mutex));
3028
#ifdef UNIV_HOTBACKUP
3029
/**********************************************************
3030
Creates new log files after a backup has been restored. */
3033
recv_reset_log_files_for_backup(
3034
/*============================*/
3035
const char* log_dir, /* in: log file directory path */
3036
ulint n_log_files, /* in: number of log files */
3037
ulint log_file_size, /* in: log file size */
3038
ib_uint64_t lsn) /* in: new start lsn, must be
3039
divisible by OS_FILE_LOG_BLOCK_SIZE */
3047
static const char ib_logfile_basename[] = "ib_logfile";
3049
log_dir_len = strlen(log_dir);
3050
/* full path name of ib_logfile consists of log dir path + basename
3051
+ number. This must fit in the name buffer.
3053
ut_a(log_dir_len + strlen(ib_logfile_basename) + 11 < sizeof(name));
3055
buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3056
memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3058
for (i = 0; i < n_log_files; i++) {
3060
sprintf(name, "%s%s%lu", log_dir,
3061
ib_logfile_basename, (ulong)i);
3063
log_file = os_file_create_simple(name, OS_FILE_CREATE,
3064
OS_FILE_READ_WRITE, &success);
3067
"InnoDB: Cannot create %s. Check that"
3068
" the file does not exist yet.\n", name);
3074
"Setting log file size to %lu %lu\n",
3075
(ulong) ut_get_high32(log_file_size),
3076
(ulong) log_file_size & 0xFFFFFFFFUL);
3078
success = os_file_set_size(name, log_file,
3079
log_file_size & 0xFFFFFFFFUL,
3080
ut_get_high32(log_file_size));
3084
"InnoDB: Cannot set %s size to %lu %lu\n",
3085
name, (ulong) ut_get_high32(log_file_size),
3086
(ulong) (log_file_size & 0xFFFFFFFFUL));
3090
os_file_flush(log_file);
3091
os_file_close(log_file);
3094
/* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
3096
log_reset_first_header_and_checkpoint(buf, lsn);
3098
log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
3099
log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
3100
LOG_BLOCK_HDR_SIZE);
3101
sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
3103
log_file = os_file_create_simple(name, OS_FILE_OPEN,
3104
OS_FILE_READ_WRITE, &success);
3106
fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
3111
os_file_write(name, log_file, buf, 0, 0,
3112
LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3113
os_file_flush(log_file);
3114
os_file_close(log_file);
3118
#endif /* UNIV_HOTBACKUP */
3120
#ifdef UNIV_LOG_ARCHIVE
3121
/**********************************************************
3122
Reads from the archive of a log group and performs recovery. */
3125
log_group_recover_from_archive_file(
3126
/*================================*/
3127
/* out: TRUE if no more complete
3128
consistent archive files */
3129
log_group_t* group) /* in: log group */
3131
os_file_t file_handle;
3132
ib_uint64_t start_lsn;
3133
ib_uint64_t file_end_lsn;
3134
ib_uint64_t dummy_lsn;
3135
ib_uint64_t scanned_lsn;
3141
ulint file_size_high;
3150
/* Add the file to the archive file space; open the file */
3152
log_archived_file_name_gen(name, group->id, group->archived_file_no);
3154
file_handle = os_file_create(name, OS_FILE_OPEN,
3155
OS_FILE_LOG, OS_FILE_AIO, &ret);
3160
"InnoDB: Do you want to copy additional"
3161
" archived log files\n"
3162
"InnoDB: to the directory\n");
3164
"InnoDB: or were these all the files needed"
3167
"InnoDB: (Y == copy more files; N == this is all)?");
3169
input_char = getchar();
3171
if (input_char == (int) 'N') {
3174
} else if (input_char == (int) 'Y') {
3176
goto try_open_again;
3182
ret = os_file_get_size(file_handle, &file_size, &file_size_high);
3185
ut_a(file_size_high == 0);
3187
fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
3189
ret = os_file_close(file_handle);
3191
if (file_size < LOG_FILE_HDR_SIZE) {
3193
"InnoDB: Archive file header incomplete %s\n", name);
3200
/* Add the archive file as a node to the space */
3202
fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
3203
group->archive_space_id, FALSE);
3204
#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
3205
# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
3208
/* Read the archive file header */
3209
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
3210
LOG_FILE_HDR_SIZE, buf, NULL);
3212
/* Check if the archive file header is consistent */
3214
if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
3215
|| mach_read_from_4(buf + LOG_FILE_NO)
3216
!= group->archived_file_no) {
3218
"InnoDB: Archive file header inconsistent %s\n", name);
3223
if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
3225
"InnoDB: Archive file not completely written %s\n",
3231
start_lsn = mach_read_ull(buf + LOG_FILE_START_LSN);
3232
file_end_lsn = mach_read_ull(buf + LOG_FILE_END_LSN);
3234
if (!recv_sys->scanned_lsn) {
3236
if (recv_sys->parse_start_lsn < start_lsn) {
3238
"InnoDB: Archive log file %s"
3239
" starts from too big a lsn\n",
3244
recv_sys->scanned_lsn = start_lsn;
3247
if (recv_sys->scanned_lsn != start_lsn) {
3250
"InnoDB: Archive log file %s starts from"
3256
read_offset = LOG_FILE_HDR_SIZE;
3259
len = RECV_SCAN_SIZE;
3261
if (read_offset + len > file_size) {
3262
len = ut_calc_align_down(file_size - read_offset,
3263
OS_FILE_LOG_BLOCK_SIZE);
3272
if (log_debug_writes) {
3274
"InnoDB: Archive read starting at"
3275
" lsn %"PRIu64", len %lu from file %s\n",
3279
#endif /* UNIV_DEBUG */
3281
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
3282
group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
3283
read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
3285
ret = recv_scan_log_recs(
3286
TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
3287
* UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
3288
&dummy_lsn, &scanned_lsn);
3290
if (scanned_lsn == file_end_lsn) {
3297
"InnoDB: Archive log file %s"
3298
" does not scan right\n",
3306
ut_ad(start_lsn == scanned_lsn);
3312
/************************************************************
3313
Recovers from archived log files, and also from log files, if they exist. */
3316
recv_recovery_from_archive_start(
3317
/*=============================*/
3318
/* out: error code or DB_SUCCESS */
3319
ib_uint64_t min_flushed_lsn,/* in: min flushed lsn field from the
3321
ib_uint64_t limit_lsn, /* in: recover up to this lsn if
3323
ulint first_log_no) /* in: number of the first archived
3324
log file to use in the recovery; the
3325
file will be searched from
3326
INNOBASE_LOG_ARCH_DIR specified in
3327
server config file */
3338
recv_sys_init(FALSE, buf_pool_get_curr_size());
3340
recv_recovery_on = TRUE;
3341
recv_recovery_from_backup_on = TRUE;
3343
recv_sys->limit_lsn = limit_lsn;
3347
group = UT_LIST_GET_FIRST(log_sys->log_groups);
3350
if (group->id == group_id) {
3355
group = UT_LIST_GET_NEXT(log_groups, group);
3360
"InnoDB: There is no log group defined with id %lu!\n",
3365
group->archived_file_no = first_log_no;
3367
recv_sys->parse_start_lsn = min_flushed_lsn;
3369
recv_sys->scanned_lsn = 0;
3370
recv_sys->scanned_checkpoint_no = 0;
3371
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
3373
recv_sys->archive_group = group;
3377
mutex_enter(&(log_sys->mutex));
3380
ret = log_group_recover_from_archive_file(group);
3382
/* Close and truncate a possible processed archive file
3383
from the file space */
3385
trunc_len = UNIV_PAGE_SIZE
3386
* fil_space_get_size(group->archive_space_id);
3387
if (trunc_len > 0) {
3388
fil_space_truncate_start(group->archive_space_id,
3392
group->archived_file_no++;
3395
if (recv_sys->recovered_lsn < limit_lsn) {
3397
if (!recv_sys->scanned_lsn) {
3399
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
3402
mutex_exit(&(log_sys->mutex));
3404
err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
3408
if (err != DB_SUCCESS) {
3413
mutex_enter(&(log_sys->mutex));
3416
if (limit_lsn != IB_ULONGLONG_MAX) {
3418
recv_apply_hashed_log_recs(FALSE);
3420
recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
3423
mutex_exit(&(log_sys->mutex));
3428
/************************************************************
3429
Completes recovery from archive. */
3432
recv_recovery_from_archive_finish(void)
3433
/*===================================*/
3435
recv_recovery_from_checkpoint_finish();
3437
recv_recovery_from_backup_on = FALSE;
3439
#endif /* UNIV_LOG_ARCHIVE */