1
/*****************************************************************************
3
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
Place, Suite 330, Boston, MA 02111-1307 USA
17
*****************************************************************************/
19
/******************************************************
22
Created 9/20/1997 Heikki Tuuri
23
*******************************************************/
28
#include "log0recv.ic"
36
#include "srv0start.h"
41
#include "ibuf0ibuf.h"
45
#include "row0merge.h"
48
/* This is set to FALSE if the backup was originally taken with the
49
ibbackup --include regexp option: then we do not want to create tables in
50
directories which were not included */
51
UNIV_INTERN ibool recv_replay_file_ops = TRUE;
52
#endif /* UNIV_HOTBACKUP */
54
/* Log records are stored in the hash table in chunks at most of this size;
55
this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
56
#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
58
/* Read-ahead area in applying log records to file pages */
59
#define RECV_READ_AHEAD_AREA 32
61
UNIV_INTERN recv_sys_t* recv_sys = NULL;
62
UNIV_INTERN ibool recv_recovery_on = FALSE;
63
UNIV_INTERN ibool recv_recovery_from_backup_on = FALSE;
65
UNIV_INTERN ibool recv_needed_recovery = FALSE;
67
UNIV_INTERN ibool recv_lsn_checks_on = FALSE;
69
/* There are two conditions under which we scan the logs, the first
70
is normal startup and the second is when we do a recovery from an
72
This flag is set if we are doing a scan from the last checkpoint during
73
startup. If we find log entries that were written after the last checkpoint
74
we know that the server was not cleanly shutdown. We must then initialize
75
the crash recovery environment before attempting to store these entries in
76
the log hash table. */
77
UNIV_INTERN ibool recv_log_scan_is_startup_type = FALSE;
79
/* If the following is TRUE, the buffer pool file pages must be invalidated
80
after recovery and no ibuf operations are allowed; this becomes TRUE if
81
the log record hash table becomes too full, and log records must be merged
82
to file pages already before the recovery is finished: in this case no
83
ibuf operations are allowed, as they could modify the pages read in the
84
buffer pool before the pages have been recovered to the up-to-date state */
86
/* Recovery is running and no operations on the log files are allowed
87
yet: the variable name is misleading */
89
UNIV_INTERN ibool recv_no_ibuf_operations = FALSE;
91
/* The following counter is used to decide when to print info on
93
UNIV_INTERN ulint recv_scan_print_counter = 0;
95
UNIV_INTERN ibool recv_is_from_backup = FALSE;
97
UNIV_INTERN ibool recv_is_making_a_backup = FALSE;
99
# define recv_is_making_a_backup FALSE
100
#endif /* UNIV_HOTBACKUP */
102
UNIV_INTERN ulint recv_previous_parsed_rec_type = 999999;
103
UNIV_INTERN ulint recv_previous_parsed_rec_offset = 0;
104
UNIV_INTERN ulint recv_previous_parsed_rec_is_multi = 0;
106
UNIV_INTERN ulint recv_max_parsed_page_no = 0;
108
/* This many frames must be left free in the buffer pool when we scan
109
the log and store the scanned log records in the buffer pool: we will
110
use these free frames to read in pages when we start applying the
111
log records to the database. */
113
UNIV_INTERN ulint recv_n_pool_free_frames = 256;
115
/* The maximum lsn we see for a page during the recovery process. If this
116
is bigger than the lsn we are able to scan up to, that is an indication that
117
the recovery failed and the database may be corrupt. */
119
UNIV_INTERN ib_uint64_t recv_max_page_lsn;
123
/***********************************************************
124
Initialize crash recovery environment. Can be called iff
125
recv_needed_recovery == FALSE. */
128
recv_init_crash_recovery(void);
129
/*===========================*/
131
/************************************************************
132
Creates the recovery system. */
135
recv_sys_create(void)
136
/*=================*/
138
if (recv_sys != NULL) {
143
recv_sys = mem_alloc(sizeof(recv_sys_t));
145
mutex_create(&recv_sys->mutex, SYNC_RECV);
147
recv_sys->heap = NULL;
148
recv_sys->addr_hash = NULL;
151
/************************************************************
152
Inits the recovery system for a recovery operation. */
157
ibool recover_from_backup, /* in: TRUE if this is called
158
to recover from a hot backup */
159
ulint available_memory) /* in: available memory in bytes */
161
if (recv_sys->heap != NULL) {
166
mutex_enter(&(recv_sys->mutex));
168
if (!recover_from_backup) {
169
recv_sys->heap = mem_heap_create_in_buffer(256);
171
recv_sys->heap = mem_heap_create(256);
172
recv_is_from_backup = TRUE;
175
recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
177
recv_sys->recovered_offset = 0;
179
recv_sys->addr_hash = hash_create(available_memory / 64);
180
recv_sys->n_addrs = 0;
182
recv_sys->apply_log_recs = FALSE;
183
recv_sys->apply_batch_on = FALSE;
185
recv_sys->last_block_buf_start = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
187
recv_sys->last_block = ut_align(recv_sys->last_block_buf_start,
188
OS_FILE_LOG_BLOCK_SIZE);
189
recv_sys->found_corrupt_log = FALSE;
191
recv_max_page_lsn = 0;
193
mutex_exit(&(recv_sys->mutex));
196
/************************************************************
197
Empties the hash table when it has been fully processed. */
200
recv_sys_empty_hash(void)
201
/*=====================*/
203
ut_ad(mutex_own(&(recv_sys->mutex)));
205
if (recv_sys->n_addrs != 0) {
207
"InnoDB: Error: %lu pages with log records"
208
" were left unprocessed!\n"
209
"InnoDB: Maximum page number with"
210
" log records on it %lu\n",
211
(ulong) recv_sys->n_addrs,
212
(ulong) recv_max_parsed_page_no);
216
hash_table_free(recv_sys->addr_hash);
217
mem_heap_empty(recv_sys->heap);
219
recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
222
#ifndef UNIV_LOG_DEBUG
223
/************************************************************
224
Frees the recovery system. */
230
mutex_enter(&(recv_sys->mutex));
232
hash_table_free(recv_sys->addr_hash);
233
mem_heap_free(recv_sys->heap);
234
ut_free(recv_sys->buf);
235
mem_free(recv_sys->last_block_buf_start);
237
recv_sys->addr_hash = NULL;
238
recv_sys->heap = NULL;
240
mutex_exit(&(recv_sys->mutex));
242
#endif /* UNIV_LOG_DEBUG */
244
/************************************************************
245
Truncates possible corrupted or extra records from a log group. */
250
log_group_t* group, /* in: log group */
251
ib_uint64_t recovered_lsn, /* in: recovery succeeded up to this
253
ib_uint64_t limit_lsn, /* in: this was the limit for
255
ib_uint64_t checkpoint_lsn, /* in: recovery was started from this
257
ib_uint64_t archived_lsn) /* in: the log has been archived up to
260
ib_uint64_t start_lsn;
262
ib_uint64_t finish_lsn1;
263
ib_uint64_t finish_lsn2;
264
ib_uint64_t finish_lsn;
268
if (archived_lsn == IB_ULONGLONG_MAX) {
269
/* Checkpoint was taken in the NOARCHIVELOG mode */
270
archived_lsn = checkpoint_lsn;
273
finish_lsn1 = ut_uint64_align_down(archived_lsn,
274
OS_FILE_LOG_BLOCK_SIZE)
275
+ log_group_get_capacity(group);
277
finish_lsn2 = ut_uint64_align_up(recovered_lsn,
278
OS_FILE_LOG_BLOCK_SIZE)
279
+ recv_sys->last_log_buf_size;
281
if (limit_lsn != IB_ULONGLONG_MAX) {
282
/* We do not know how far we should erase log records: erase
283
as much as possible */
285
finish_lsn = finish_lsn1;
287
/* It is enough to erase the length of the log buffer */
288
finish_lsn = finish_lsn1 < finish_lsn2
289
? finish_lsn1 : finish_lsn2;
292
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
294
/* Write the log buffer full of zeros */
295
for (i = 0; i < RECV_SCAN_SIZE; i++) {
297
*(log_sys->buf + i) = '\0';
300
start_lsn = ut_uint64_align_down(recovered_lsn,
301
OS_FILE_LOG_BLOCK_SIZE);
303
if (start_lsn != recovered_lsn) {
304
/* Copy the last incomplete log block to the log buffer and
305
edit its data length: */
307
ut_memcpy(log_sys->buf, recv_sys->last_block,
308
OS_FILE_LOG_BLOCK_SIZE);
309
log_block_set_data_len(log_sys->buf,
310
(ulint) (recovered_lsn - start_lsn));
313
if (start_lsn >= finish_lsn) {
319
end_lsn = start_lsn + RECV_SCAN_SIZE;
321
if (end_lsn > finish_lsn) {
323
end_lsn = finish_lsn;
326
len = (ulint) (end_lsn - start_lsn);
328
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
329
if (end_lsn >= finish_lsn) {
334
/* Write the log buffer full of zeros */
335
for (i = 0; i < RECV_SCAN_SIZE; i++) {
337
*(log_sys->buf + i) = '\0';
344
/************************************************************
345
Copies the log segment between group->recovered_lsn and recovered_lsn from the
346
most up-to-date log group to group, so that it contains the latest log data. */
351
log_group_t* up_to_date_group, /* in: the most up-to-date log
353
log_group_t* group, /* in: copy to this log
355
ib_uint64_t recovered_lsn) /* in: recovery succeeded up
358
ib_uint64_t start_lsn;
362
if (group->scanned_lsn >= recovered_lsn) {
367
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
369
start_lsn = ut_uint64_align_down(group->scanned_lsn,
370
OS_FILE_LOG_BLOCK_SIZE);
372
end_lsn = start_lsn + RECV_SCAN_SIZE;
374
if (end_lsn > recovered_lsn) {
375
end_lsn = ut_uint64_align_up(recovered_lsn,
376
OS_FILE_LOG_BLOCK_SIZE);
379
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
380
up_to_date_group, start_lsn, end_lsn);
382
len = (ulint) (end_lsn - start_lsn);
384
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
386
if (end_lsn >= recovered_lsn) {
395
/************************************************************
396
Copies a log segment from the most up-to-date log group to the other log
397
groups, so that they all contain the latest log data. Also writes the info
398
about the latest checkpoint to the groups, and inits the fields in the group
399
memory structs to up-to-date values. */
402
recv_synchronize_groups(
403
/*====================*/
404
log_group_t* up_to_date_group) /* in: the most up-to-date
408
ib_uint64_t start_lsn;
410
ib_uint64_t recovered_lsn;
411
ib_uint64_t limit_lsn;
413
recovered_lsn = recv_sys->recovered_lsn;
414
limit_lsn = recv_sys->limit_lsn;
416
/* Read the last recovered log block to the recovery system buffer:
417
the block is always incomplete */
419
start_lsn = ut_uint64_align_down(recovered_lsn,
420
OS_FILE_LOG_BLOCK_SIZE);
421
end_lsn = ut_uint64_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
423
ut_a(start_lsn != end_lsn);
425
log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
426
up_to_date_group, start_lsn, end_lsn);
428
group = UT_LIST_GET_FIRST(log_sys->log_groups);
431
if (group != up_to_date_group) {
433
/* Copy log data if needed */
435
recv_copy_group(group, up_to_date_group,
439
/* Update the fields in the group struct to correspond to
442
log_group_set_fields(group, recovered_lsn);
444
group = UT_LIST_GET_NEXT(log_groups, group);
447
/* Copy the checkpoint info to the groups; remember that we have
448
incremented checkpoint_no by one, and the info will not be written
449
over the max checkpoint info, thus making the preservation of max
450
checkpoint info on disk certain */
452
log_groups_write_checkpoint_info();
454
mutex_exit(&(log_sys->mutex));
456
/* Wait for the checkpoint write to complete */
457
rw_lock_s_lock(&(log_sys->checkpoint_lock));
458
rw_lock_s_unlock(&(log_sys->checkpoint_lock));
460
mutex_enter(&(log_sys->mutex));
463
/***************************************************************************
464
Checks the consistency of the checkpoint info */
467
recv_check_cp_is_consistent(
468
/*========================*/
469
/* out: TRUE if ok */
470
byte* buf) /* in: buffer containing checkpoint info */
474
fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
476
if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
477
buf + LOG_CHECKPOINT_CHECKSUM_1)) {
481
fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
482
LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
484
if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
485
buf + LOG_CHECKPOINT_CHECKSUM_2)) {
492
/************************************************************
493
Looks for the maximum consistent checkpoint from the log groups. */
496
recv_find_max_checkpoint(
497
/*=====================*/
498
/* out: error code or DB_SUCCESS */
499
log_group_t** max_group, /* out: max group */
500
ulint* max_field) /* out: LOG_CHECKPOINT_1 or
505
ib_uint64_t checkpoint_no;
509
group = UT_LIST_GET_FIRST(log_sys->log_groups);
515
buf = log_sys->checkpoint_buf;
518
group->state = LOG_GROUP_CORRUPTED;
520
for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
521
field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
523
log_group_read_checkpoint_info(group, field);
525
if (!recv_check_cp_is_consistent(buf)) {
527
if (log_debug_writes) {
529
"InnoDB: Checkpoint in group"
530
" %lu at %lu invalid, %lu\n",
533
(ulong) mach_read_from_4(
535
+ LOG_CHECKPOINT_CHECKSUM_1));
538
#endif /* UNIV_DEBUG */
542
group->state = LOG_GROUP_OK;
544
group->lsn = mach_read_ull(
545
buf + LOG_CHECKPOINT_LSN);
546
group->lsn_offset = mach_read_from_4(
547
buf + LOG_CHECKPOINT_OFFSET);
548
checkpoint_no = mach_read_ull(
549
buf + LOG_CHECKPOINT_NO);
552
if (log_debug_writes) {
554
"InnoDB: Checkpoint number %lu"
555
" found in group %lu\n",
556
(ulong) checkpoint_no,
559
#endif /* UNIV_DEBUG */
561
if (checkpoint_no >= max_no) {
564
max_no = checkpoint_no;
571
group = UT_LIST_GET_NEXT(log_groups, group);
574
if (*max_group == NULL) {
577
"InnoDB: No valid checkpoint found.\n"
578
"InnoDB: If this error appears when you are"
579
" creating an InnoDB database,\n"
580
"InnoDB: the problem may be that during"
581
" an earlier attempt you managed\n"
582
"InnoDB: to create the InnoDB data files,"
583
" but log file creation failed.\n"
584
"InnoDB: If that is the case, please refer to\n"
585
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
586
"error-creating-innodb.html\n");
593
#ifdef UNIV_HOTBACKUP
594
/***********************************************************************
595
Reads the checkpoint info needed in hot backup. */
598
recv_read_cp_info_for_backup(
599
/*=========================*/
600
/* out: TRUE if success */
601
byte* hdr, /* in: buffer containing the log group
603
ib_uint64_t* lsn, /* out: checkpoint lsn */
604
ulint* offset, /* out: checkpoint offset in the log group */
605
ulint* fsp_limit,/* out: fsp limit of space 0,
606
1000000000 if the database is running
607
with < version 3.23.50 of InnoDB */
608
ib_uint64_t* cp_no, /* out: checkpoint number */
609
ib_uint64_t* first_header_lsn)
610
/* out: lsn of of the start of the
614
ib_uint64_t max_cp_no = 0;
617
cp_buf = hdr + LOG_CHECKPOINT_1;
619
if (recv_check_cp_is_consistent(cp_buf)) {
620
max_cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
621
max_cp = LOG_CHECKPOINT_1;
624
cp_buf = hdr + LOG_CHECKPOINT_2;
626
if (recv_check_cp_is_consistent(cp_buf)) {
627
if (mach_read_ull(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) {
628
max_cp = LOG_CHECKPOINT_2;
636
cp_buf = hdr + max_cp;
638
*lsn = mach_read_ull(cp_buf + LOG_CHECKPOINT_LSN);
639
*offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
641
/* If the user is running a pre-3.23.50 version of InnoDB, its
642
checkpoint data does not contain the fsp limit info */
643
if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N)
644
== LOG_CHECKPOINT_FSP_MAGIC_N_VAL) {
646
*fsp_limit = mach_read_from_4(
647
cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT);
649
if (*fsp_limit == 0) {
650
*fsp_limit = 1000000000;
653
*fsp_limit = 1000000000;
656
/* fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */
658
*cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
660
*first_header_lsn = mach_read_ull(hdr + LOG_FILE_START_LSN);
664
#endif /* UNIV_HOTBACKUP */
666
/**********************************************************
667
Checks the 4-byte checksum to the trailer checksum field of a log block.
668
We also accept a log block in the old format < InnoDB-3.23.52 where the
669
checksum field contains the log block number. */
672
log_block_checksum_is_ok_or_old_format(
673
/*===================================*/
674
/* out: TRUE if ok, or if the log block may be in the
675
format of InnoDB version < 3.23.52 */
676
byte* block) /* in: pointer to a log block */
678
#ifdef UNIV_LOG_DEBUG
680
#endif /* UNIV_LOG_DEBUG */
681
if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
686
if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
688
/* We assume the log block is in the format of
689
InnoDB version < 3.23.52 and the block is ok */
692
"InnoDB: Scanned old format < InnoDB-3.23.52"
693
" log block number %lu\n",
694
log_block_get_hdr_no(block));
702
#ifdef UNIV_HOTBACKUP
703
/***********************************************************************
704
Scans the log segment and n_bytes_scanned is set to the length of valid
708
recv_scan_log_seg_for_backup(
709
/*=========================*/
710
byte* buf, /* in: buffer containing log data */
711
ulint buf_len, /* in: data length in that buffer */
712
ib_uint64_t* scanned_lsn, /* in/out: lsn of buffer start,
713
we return scanned lsn */
714
ulint* scanned_checkpoint_no,
715
/* in/out: 4 lowest bytes of the
716
highest scanned checkpoint number so
718
ulint* n_bytes_scanned)/* out: how much we were able to
719
scan, smaller than buf_len if log
726
*n_bytes_scanned = 0;
728
for (log_block = buf; log_block < buf + buf_len;
729
log_block += OS_FILE_LOG_BLOCK_SIZE) {
731
no = log_block_get_hdr_no(log_block);
734
fprintf(stderr, "Log block header no %lu\n", no);
737
if (no != log_block_convert_lsn_to_no(*scanned_lsn)
738
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
741
"Log block n:o %lu, scanned lsn n:o %lu\n",
742
no, log_block_convert_lsn_to_no(*scanned_lsn));
744
/* Garbage or an incompletely written log block */
746
log_block += OS_FILE_LOG_BLOCK_SIZE;
749
"Next log block n:o %lu\n",
750
log_block_get_hdr_no(log_block));
755
if (*scanned_checkpoint_no > 0
756
&& log_block_get_checkpoint_no(log_block)
757
< *scanned_checkpoint_no
758
&& *scanned_checkpoint_no
759
- log_block_get_checkpoint_no(log_block)
762
/* Garbage from a log buffer flush which was made
763
before the most recent database recovery */
766
"Scanned cp n:o %lu, block cp n:o %lu\n",
767
*scanned_checkpoint_no,
768
log_block_get_checkpoint_no(log_block));
773
data_len = log_block_get_data_len(log_block);
775
*scanned_checkpoint_no
776
= log_block_get_checkpoint_no(log_block);
777
*scanned_lsn += data_len;
779
*n_bytes_scanned += data_len;
781
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
782
/* Log data ends here */
785
fprintf(stderr, "Log block data len %lu\n",
792
#endif /* UNIV_HOTBACKUP */
794
/***********************************************************************
795
Tries to parse a single log record body and also applies it to a page if
796
specified. File ops are parsed, but not applied in this function. */
799
recv_parse_or_apply_log_rec_body(
800
/*=============================*/
801
/* out: log record end, NULL if not a
803
byte type, /* in: type */
804
byte* ptr, /* in: pointer to a buffer */
805
byte* end_ptr,/* in: pointer to the buffer end */
806
buf_block_t* block, /* in/out: buffer block or NULL; if
807
not NULL, then the log record is
808
applied to the page, and the log
809
record should be complete then */
810
mtr_t* mtr) /* in: mtr or NULL; should be non-NULL
811
if and only if block is non-NULL */
813
dict_index_t* index = NULL;
815
page_zip_des_t* page_zip;
817
ut_ad(!block == !mtr);
821
page_zip = buf_block_get_page_zip(block);
828
case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
829
ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip);
831
case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
832
if (NULL != (ptr = mlog_parse_index(
834
type == MLOG_COMP_REC_INSERT,
837
|| (ibool)!!page_is_comp(page)
838
== dict_table_is_comp(index->table));
839
ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
843
case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
844
if (NULL != (ptr = mlog_parse_index(
846
type == MLOG_COMP_REC_CLUST_DELETE_MARK,
849
|| (ibool)!!page_is_comp(page)
850
== dict_table_is_comp(index->table));
851
ptr = btr_cur_parse_del_mark_set_clust_rec(
852
ptr, end_ptr, page, page_zip, index);
855
case MLOG_COMP_REC_SEC_DELETE_MARK:
856
/* This log record type is obsolete, but we process it for
857
backward compatibility with MySQL 5.0.3 and 5.0.4. */
858
ut_a(!page || page_is_comp(page));
860
ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
865
case MLOG_REC_SEC_DELETE_MARK:
866
ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
869
case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
870
if (NULL != (ptr = mlog_parse_index(
872
type == MLOG_COMP_REC_UPDATE_IN_PLACE,
875
|| (ibool)!!page_is_comp(page)
876
== dict_table_is_comp(index->table));
877
ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page,
881
case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
882
case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
883
if (NULL != (ptr = mlog_parse_index(
885
type == MLOG_COMP_LIST_END_DELETE
886
|| type == MLOG_COMP_LIST_START_DELETE,
889
|| (ibool)!!page_is_comp(page)
890
== dict_table_is_comp(index->table));
891
ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
895
case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
896
if (NULL != (ptr = mlog_parse_index(
898
type == MLOG_COMP_LIST_END_COPY_CREATED,
901
|| (ibool)!!page_is_comp(page)
902
== dict_table_is_comp(index->table));
903
ptr = page_parse_copy_rec_list_to_created_page(
904
ptr, end_ptr, block, index, mtr);
907
case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
908
if (NULL != (ptr = mlog_parse_index(
910
type == MLOG_COMP_PAGE_REORGANIZE,
913
|| (ibool)!!page_is_comp(page)
914
== dict_table_is_comp(index->table));
915
ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
919
case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
921
ptr = page_parse_create(ptr, end_ptr,
922
type == MLOG_COMP_PAGE_CREATE,
925
case MLOG_UNDO_INSERT:
926
ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
928
case MLOG_UNDO_ERASE_END:
929
ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
932
ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
934
case MLOG_UNDO_HDR_DISCARD:
935
ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
937
case MLOG_UNDO_HDR_CREATE:
938
case MLOG_UNDO_HDR_REUSE:
939
ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
942
case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
943
/* On a compressed page, MLOG_COMP_REC_MIN_MARK
944
will be followed by MLOG_COMP_REC_DELETE
945
or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL)
946
in the same mini-transaction. */
947
ut_a(type == MLOG_COMP_REC_MIN_MARK || !page_zip);
948
ptr = btr_parse_set_min_rec_mark(
949
ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
952
case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
953
if (NULL != (ptr = mlog_parse_index(
955
type == MLOG_COMP_REC_DELETE,
958
|| (ibool)!!page_is_comp(page)
959
== dict_table_is_comp(index->table));
960
ptr = page_cur_parse_delete_rec(ptr, end_ptr,
964
case MLOG_IBUF_BITMAP_INIT:
965
ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
967
case MLOG_INIT_FILE_PAGE:
968
ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
970
case MLOG_WRITE_STRING:
971
ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
973
case MLOG_FILE_CREATE:
974
case MLOG_FILE_RENAME:
975
case MLOG_FILE_DELETE:
976
case MLOG_FILE_CREATE2:
977
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0);
979
case MLOG_ZIP_WRITE_NODE_PTR:
980
ptr = page_zip_parse_write_node_ptr(ptr, end_ptr,
983
case MLOG_ZIP_WRITE_BLOB_PTR:
984
ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr,
987
case MLOG_ZIP_WRITE_HEADER:
988
ptr = page_zip_parse_write_header(ptr, end_ptr,
991
case MLOG_ZIP_PAGE_COMPRESS:
992
ptr = page_zip_parse_compress(ptr, end_ptr,
997
recv_sys->found_corrupt_log = TRUE;
1001
dict_table_t* table = index->table;
1003
dict_mem_index_free(index);
1004
dict_mem_table_free(table);
1010
/*************************************************************************
1011
Calculates the fold value of a page file address: used in inserting or
1012
searching for a log record in the hash table. */
1017
/* out: folded value */
1018
ulint space, /* in: space */
1019
ulint page_no)/* in: page number */
1021
return(ut_fold_ulint_pair(space, page_no));
1024
/*************************************************************************
1025
Calculates the hash value of a page file address: used in inserting or
1026
searching for a log record in the hash table. */
1031
/* out: folded value */
1032
ulint space, /* in: space */
1033
ulint page_no)/* in: page number */
1035
return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
1038
/*************************************************************************
1039
Gets the hashed file address struct for a page. */
1042
recv_get_fil_addr_struct(
1043
/*=====================*/
1044
/* out: file address struct, NULL if not found from
1046
ulint space, /* in: space id */
1047
ulint page_no)/* in: page number */
1049
recv_addr_t* recv_addr;
1051
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
1052
recv_hash(space, page_no));
1054
if ((recv_addr->space == space)
1055
&& (recv_addr->page_no == page_no)) {
1060
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1066
/***********************************************************************
1067
Adds a new log record to the hash table of log records. */
1070
recv_add_to_hash_table(
1071
/*===================*/
1072
byte type, /* in: log record type */
1073
ulint space, /* in: space id */
1074
ulint page_no, /* in: page number */
1075
byte* body, /* in: log record body */
1076
byte* rec_end, /* in: log record end */
1077
ib_uint64_t start_lsn, /* in: start lsn of the mtr */
1078
ib_uint64_t end_lsn) /* in: end lsn of the mtr */
1082
recv_data_t* recv_data;
1083
recv_data_t** prev_field;
1084
recv_addr_t* recv_addr;
1086
if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
1087
/* The tablespace does not exist any more: do not store the
1093
len = rec_end - body;
1095
recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
1097
recv->len = rec_end - body;
1098
recv->start_lsn = start_lsn;
1099
recv->end_lsn = end_lsn;
1101
recv_addr = recv_get_fil_addr_struct(space, page_no);
1103
if (recv_addr == NULL) {
1104
recv_addr = mem_heap_alloc(recv_sys->heap,
1105
sizeof(recv_addr_t));
1106
recv_addr->space = space;
1107
recv_addr->page_no = page_no;
1108
recv_addr->state = RECV_NOT_PROCESSED;
1110
UT_LIST_INIT(recv_addr->rec_list);
1112
HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
1113
recv_fold(space, page_no), recv_addr);
1114
recv_sys->n_addrs++;
1116
fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
1121
UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
1123
prev_field = &(recv->data);
1125
/* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
1126
recv_sys->heap grows into the buffer pool, and bigger chunks could not
1129
while (rec_end > body) {
1131
len = rec_end - body;
1133
if (len > RECV_DATA_BLOCK_SIZE) {
1134
len = RECV_DATA_BLOCK_SIZE;
1137
recv_data = mem_heap_alloc(recv_sys->heap,
1138
sizeof(recv_data_t) + len);
1139
*prev_field = recv_data;
1141
ut_memcpy(((byte*)recv_data) + sizeof(recv_data_t), body, len);
1143
prev_field = &(recv_data->next);
1151
/*************************************************************************
1152
Copies the log record body from recv to buf. */
1155
recv_data_copy_to_buf(
1156
/*==================*/
1157
byte* buf, /* in: buffer of length at least recv->len */
1158
recv_t* recv) /* in: log record */
1160
recv_data_t* recv_data;
1165
recv_data = recv->data;
1168
if (len > RECV_DATA_BLOCK_SIZE) {
1169
part_len = RECV_DATA_BLOCK_SIZE;
1174
ut_memcpy(buf, ((byte*)recv_data) + sizeof(recv_data_t),
1179
recv_data = recv_data->next;
1183
/****************************************************************************
1184
Applies the hashed log records to the page, if the page lsn is less than the
1185
lsn of a log record. This can be called when a buffer page has just been
1186
read in, or also for a page already in the buffer pool. */
1191
ibool recover_backup,
1192
/* in: TRUE if we are recovering a backup
1193
page: then we do not acquire any latches
1194
since the page was read in outside the
1197
/* in: TRUE if the i/o-handler calls this for
1198
a freshly read page */
1199
buf_block_t* block) /* in: buffer block */
1202
recv_addr_t* recv_addr;
1205
ib_uint64_t start_lsn;
1206
ib_uint64_t end_lsn;
1207
ib_uint64_t page_lsn;
1208
ib_uint64_t page_newest_lsn;
1209
ibool modification_to_page;
1213
mutex_enter(&(recv_sys->mutex));
1215
if (recv_sys->apply_log_recs == FALSE) {
1217
/* Log records should not be applied now */
1219
mutex_exit(&(recv_sys->mutex));
1224
recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block),
1225
buf_block_get_page_no(block));
1227
if ((recv_addr == NULL)
1228
|| (recv_addr->state == RECV_BEING_PROCESSED)
1229
|| (recv_addr->state == RECV_PROCESSED)) {
1231
mutex_exit(&(recv_sys->mutex));
1237
fprintf(stderr, "Recovering space %lu, page %lu\n",
1238
buf_block_get_space(block), buf_block_get_page_no(block));
1241
recv_addr->state = RECV_BEING_PROCESSED;
1243
mutex_exit(&(recv_sys->mutex));
1246
mtr_set_log_mode(&mtr, MTR_LOG_NONE);
1248
page = block->frame;
1250
if (!recover_backup) {
1252
/* Move the ownership of the x-latch on the
1253
page to this OS thread, so that we can acquire
1254
a second x-latch on it. This is needed for the
1255
operations to the page to pass the debug
1258
rw_lock_x_lock_move_ownership(&(block->lock));
1261
success = buf_page_get_known_nowait(RW_X_LATCH, block,
1267
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
1270
/* Read the newest modification lsn from the page */
1271
page_lsn = mach_read_ull(page + FIL_PAGE_LSN);
1273
if (!recover_backup) {
1274
/* It may be that the page has been modified in the buffer
1275
pool: read the newest modification lsn there */
1278
= buf_page_get_newest_modification(&block->page);
1280
if (page_newest_lsn) {
1282
page_lsn = page_newest_lsn;
1285
/* In recovery from a backup we do not really use the buffer
1288
page_newest_lsn = 0;
1291
modification_to_page = FALSE;
1292
start_lsn = end_lsn = 0;
1294
recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
1297
end_lsn = recv->end_lsn;
1299
if (recv->len > RECV_DATA_BLOCK_SIZE) {
1300
/* We have to copy the record body to a separate
1303
buf = mem_alloc(recv->len);
1305
recv_data_copy_to_buf(buf, recv);
1307
buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
1310
if (recv->type == MLOG_INIT_FILE_PAGE) {
1311
page_lsn = page_newest_lsn;
1313
mach_write_ull(page + UNIV_PAGE_SIZE
1314
- FIL_PAGE_END_LSN_OLD_CHKSUM, 0);
1315
mach_write_ull(page + FIL_PAGE_LSN, 0);
1318
if (recv->start_lsn >= page_lsn) {
1320
if (!modification_to_page) {
1322
modification_to_page = TRUE;
1323
start_lsn = recv->start_lsn;
1327
if (log_debug_writes) {
1329
"InnoDB: Applying log rec"
1331
" to space %lu page no %lu\n",
1332
(ulong) recv->type, (ulong) recv->len,
1333
(ulong) recv_addr->space,
1334
(ulong) recv_addr->page_no);
1336
#endif /* UNIV_DEBUG */
1338
recv_parse_or_apply_log_rec_body(recv->type, buf,
1341
mach_write_ull(page + UNIV_PAGE_SIZE
1342
- FIL_PAGE_END_LSN_OLD_CHKSUM,
1343
recv->start_lsn + recv->len);
1344
mach_write_ull(page + FIL_PAGE_LSN,
1345
recv->start_lsn + recv->len);
1348
if (recv->len > RECV_DATA_BLOCK_SIZE) {
1352
recv = UT_LIST_GET_NEXT(rec_list, recv);
1355
#ifdef UNIV_ZIP_DEBUG
1356
if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
1357
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
1360
ut_a(page_zip_validate_low(page_zip, page, FALSE));
1363
#endif /* UNIV_ZIP_DEBUG */
1365
mutex_enter(&(recv_sys->mutex));
1367
if (recv_max_page_lsn < page_lsn) {
1368
recv_max_page_lsn = page_lsn;
1371
recv_addr->state = RECV_PROCESSED;
1373
ut_a(recv_sys->n_addrs);
1374
recv_sys->n_addrs--;
1376
mutex_exit(&(recv_sys->mutex));
1378
if (!recover_backup && modification_to_page) {
1381
buf_flush_recv_note_modification(block, start_lsn, end_lsn);
1384
/* Make sure that committing mtr does not change the modification
1385
lsn values of page */
1387
mtr.modifications = FALSE;
1392
/***********************************************************************
1393
Reads in pages which have hashed log records, from an area around a given
1399
/* out: number of pages found */
1400
ulint space, /* in: space */
1401
ulint zip_size,/* in: compressed page size in bytes, or 0 */
1402
ulint page_no)/* in: page number */
1404
recv_addr_t* recv_addr;
1405
ulint page_nos[RECV_READ_AHEAD_AREA];
1409
low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
1413
for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
1415
recv_addr = recv_get_fil_addr_struct(space, page_no);
1417
if (recv_addr && !buf_page_peek(space, page_no)) {
1419
mutex_enter(&(recv_sys->mutex));
1421
if (recv_addr->state == RECV_NOT_PROCESSED) {
1422
recv_addr->state = RECV_BEING_READ;
1424
page_nos[n] = page_no;
1429
mutex_exit(&(recv_sys->mutex));
1433
buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
1435
fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
1440
/***********************************************************************
1441
Empties the hash table of stored log records, applying them to appropriate
1445
recv_apply_hashed_log_recs(
1446
/*=======================*/
1447
ibool allow_ibuf) /* in: if TRUE, also ibuf operations are
1448
allowed during the application; if FALSE,
1449
no ibuf operations are allowed, and after
1450
the application all file pages are flushed to
1451
disk and invalidated in buffer pool: this
1452
alternative means that no new log records
1453
can be generated during the application;
1454
the caller must in this case own the log
1457
recv_addr_t* recv_addr;
1460
ibool has_printed = FALSE;
1463
mutex_enter(&(recv_sys->mutex));
1465
if (recv_sys->apply_batch_on) {
1467
mutex_exit(&(recv_sys->mutex));
1469
os_thread_sleep(500000);
1474
ut_ad(!allow_ibuf == mutex_own(&log_sys->mutex));
1477
recv_no_ibuf_operations = TRUE;
1480
recv_sys->apply_log_recs = TRUE;
1481
recv_sys->apply_batch_on = TRUE;
1483
for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
1485
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
1488
ulint space = recv_addr->space;
1489
ulint zip_size = fil_space_get_zip_size(space);
1490
ulint page_no = recv_addr->page_no;
1492
if (recv_addr->state == RECV_NOT_PROCESSED) {
1494
ut_print_timestamp(stderr);
1495
fputs(" InnoDB: Starting an"
1496
" apply batch of log records"
1497
" to the database...\n"
1498
"InnoDB: Progress in percents: ",
1503
mutex_exit(&(recv_sys->mutex));
1505
if (buf_page_peek(space, page_no)) {
1510
block = buf_page_get(
1511
space, zip_size, page_no,
1513
buf_block_dbg_add_level(
1514
block, SYNC_NO_ORDER_CHECK);
1516
recv_recover_page(FALSE, FALSE, block);
1519
recv_read_in_area(space, zip_size,
1523
mutex_enter(&(recv_sys->mutex));
1526
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1530
&& (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
1532
/ hash_get_n_cells(recv_sys->addr_hash)) {
1534
fprintf(stderr, "%lu ", (ulong)
1536
/ hash_get_n_cells(recv_sys->addr_hash)));
1540
/* Wait until all the pages have been processed */
1542
while (recv_sys->n_addrs != 0) {
1544
mutex_exit(&(recv_sys->mutex));
1546
os_thread_sleep(500000);
1548
mutex_enter(&(recv_sys->mutex));
1553
fprintf(stderr, "\n");
1557
/* Flush all the file pages to disk and invalidate them in
1560
mutex_exit(&(recv_sys->mutex));
1561
mutex_exit(&(log_sys->mutex));
1563
n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
1565
ut_a(n_pages != ULINT_UNDEFINED);
1567
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
1569
buf_pool_invalidate();
1571
mutex_enter(&(log_sys->mutex));
1572
mutex_enter(&(recv_sys->mutex));
1574
recv_no_ibuf_operations = FALSE;
1577
recv_sys->apply_log_recs = FALSE;
1578
recv_sys->apply_batch_on = FALSE;
1580
recv_sys_empty_hash();
1583
fprintf(stderr, "InnoDB: Apply batch completed\n");
1586
mutex_exit(&(recv_sys->mutex));
1589
#ifdef UNIV_HOTBACKUP
1590
/***********************************************************************
1591
Applies log records in the hash table to a backup. */
1594
recv_apply_log_recs_for_backup(void)
1595
/*================================*/
1597
recv_addr_t* recv_addr;
1605
recv_sys->apply_log_recs = TRUE;
1606
recv_sys->apply_batch_on = TRUE;
1608
block = buf_LRU_get_free_block(UNIV_PAGE_SIZE);
1610
fputs("InnoDB: Starting an apply batch of log records"
1611
" to the database...\n"
1612
"InnoDB: Progress in percents: ", stderr);
1614
n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
1616
for (i = 0; i < n_hash_cells; i++) {
1617
/* The address hash table is externally chained */
1618
recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
1620
while (recv_addr != NULL) {
1623
= fil_space_get_zip_size(recv_addr->space);
1625
if (zip_size == ULINT_UNDEFINED) {
1628
"InnoDB: Warning: cannot apply"
1630
" tablespace %lu page %lu,\n"
1631
"InnoDB: because tablespace with"
1632
" that id does not exist.\n",
1633
recv_addr->space, recv_addr->page_no);
1635
recv_addr->state = RECV_PROCESSED;
1637
ut_a(recv_sys->n_addrs);
1638
recv_sys->n_addrs--;
1640
goto skip_this_recv_addr;
1643
/* We simulate a page read made by the buffer pool, to
1644
make sure the recovery apparatus works ok. We must init
1647
buf_page_init_for_backup_restore(
1648
recv_addr->space, recv_addr->page_no,
1651
/* Extend the tablespace's last file if the page_no
1652
does not fall inside its bounds; we assume the last
1653
file is auto-extending, and ibbackup copied the file
1654
when it still was smaller */
1656
success = fil_extend_space_to_desired_size(
1658
recv_addr->space, recv_addr->page_no + 1);
1661
"InnoDB: Fatal error: cannot extend"
1662
" tablespace %lu to hold %lu pages\n",
1663
recv_addr->space, recv_addr->page_no);
1668
/* Read the page from the tablespace file using the
1669
fil0fil.c routines */
1672
error = fil_io(OS_FILE_READ, TRUE,
1673
recv_addr->space, zip_size,
1674
recv_addr->page_no, 0, zip_size,
1675
block->page.zip.data, NULL);
1677
error = fil_io(OS_FILE_READ, TRUE,
1678
recv_addr->space, 0,
1679
recv_addr->page_no, 0,
1681
block->frame, NULL);
1684
if (error != DB_SUCCESS) {
1686
"InnoDB: Fatal error: cannot read"
1688
" %lu page number %lu\n",
1689
(ulong) recv_addr->space,
1690
(ulong) recv_addr->page_no);
1695
/* Apply the log records to this page */
1696
recv_recover_page(TRUE, FALSE, block);
1698
/* Write the page back to the tablespace file using the
1699
fil0fil.c routines */
1701
buf_flush_init_for_writing(
1702
block->frame, buf_block_get_page_zip(block),
1703
mach_read_ull(block->frame + FIL_PAGE_LSN));
1706
error = fil_io(OS_FILE_WRITE, TRUE,
1707
recv_addr->space, zip_size,
1708
recv_addr->page_no, 0,
1710
block->page.zip.data, NULL);
1712
error = fil_io(OS_FILE_WRITE, TRUE,
1713
recv_addr->space, 0,
1714
recv_addr->page_no, 0,
1716
block->frame, NULL);
1718
skip_this_recv_addr:
1719
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1722
if ((100 * i) / n_hash_cells
1723
!= (100 * (i + 1)) / n_hash_cells) {
1724
fprintf(stderr, "%lu ",
1725
(ulong) ((100 * i) / n_hash_cells));
1730
buf_block_free(block);
1731
recv_sys_empty_hash();
1733
#endif /* UNIV_HOTBACKUP */
1735
/***********************************************************************
1736
Tries to parse a single log record and returns its length. */
1741
/* out: length of the record, or 0 if the record was
1743
byte* ptr, /* in: pointer to a buffer */
1744
byte* end_ptr,/* in: pointer to the buffer end */
1745
byte* type, /* out: type */
1746
ulint* space, /* out: space id */
1747
ulint* page_no,/* out: page number */
1748
byte** body) /* out: log record body start */
1754
if (ptr == end_ptr) {
1759
if (*ptr == MLOG_MULTI_REC_END) {
1766
if (*ptr == MLOG_DUMMY_RECORD) {
1769
*space = ULINT_UNDEFINED - 1; /* For debugging */
1774
new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
1778
if (UNIV_UNLIKELY(!new_ptr)) {
1783
/* Check that page_no is sensible */
1785
if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
1787
recv_sys->found_corrupt_log = TRUE;
1792
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
1794
if (UNIV_UNLIKELY(new_ptr == NULL)) {
1799
if (*page_no > recv_max_parsed_page_no) {
1800
recv_max_parsed_page_no = *page_no;
1803
return(new_ptr - ptr);
1806
/***********************************************************
1807
Calculates the new value for lsn when more data is added to the log. */
1810
recv_calc_lsn_on_data_add(
1811
/*======================*/
1812
ib_uint64_t lsn, /* in: old lsn */
1813
ib_uint64_t len) /* in: this many bytes of data is
1814
added, log block headers not included */
1819
frag_len = (((ulint) lsn) % OS_FILE_LOG_BLOCK_SIZE)
1820
- LOG_BLOCK_HDR_SIZE;
1821
ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
1822
- LOG_BLOCK_TRL_SIZE);
1823
lsn_len = (ulint) len;
1824
lsn_len += (lsn_len + frag_len)
1825
/ (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
1826
- LOG_BLOCK_TRL_SIZE)
1827
* (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
1829
return(lsn + lsn_len);
1832
#ifdef UNIV_LOG_DEBUG
1833
/***********************************************************
1834
Checks that the parser recognizes incomplete initial segments of a log
1835
record as incomplete. */
1838
recv_check_incomplete_log_recs(
1839
/*===========================*/
1840
byte* ptr, /* in: pointer to a complete log record */
1841
ulint len) /* in: length of the log record */
1849
for (i = 0; i < len; i++) {
1850
ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
1854
#endif /* UNIV_LOG_DEBUG */
1856
/***********************************************************
1857
Prints diagnostic info of corrupt log. */
1860
recv_report_corrupt_log(
1861
/*====================*/
1862
byte* ptr, /* in: pointer to corrupt log record */
1863
byte type, /* in: type of the record */
1864
ulint space, /* in: space id, this may also be garbage */
1865
ulint page_no)/* in: page number, this may also be garbage */
1868
"InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
1869
"InnoDB: Log record type %lu, space id %lu, page number %lu\n"
1870
"InnoDB: Log parsing proceeded successfully up to %"PRIu64"\n"
1871
"InnoDB: Previous log record type %lu, is multi %lu\n"
1872
"InnoDB: Recv offset %lu, prev %lu\n",
1873
(ulong) type, (ulong) space, (ulong) page_no,
1874
recv_sys->recovered_lsn,
1875
(ulong) recv_previous_parsed_rec_type,
1876
(ulong) recv_previous_parsed_rec_is_multi,
1877
(ulong) (ptr - recv_sys->buf),
1878
(ulong) recv_previous_parsed_rec_offset);
1880
if ((ulint)(ptr - recv_sys->buf + 100)
1881
> recv_previous_parsed_rec_offset
1882
&& (ulint)(ptr - recv_sys->buf + 100
1883
- recv_previous_parsed_rec_offset)
1885
fputs("InnoDB: Hex dump of corrupt log starting"
1886
" 100 bytes before the start\n"
1887
"InnoDB: of the previous log rec,\n"
1888
"InnoDB: and ending 100 bytes after the start"
1889
" of the corrupt rec:\n",
1892
ut_print_buf(stderr,
1894
+ recv_previous_parsed_rec_offset - 100,
1895
ptr - recv_sys->buf + 200
1896
- recv_previous_parsed_rec_offset);
1900
fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
1901
"InnoDB: is possible that the log scan did not proceed\n"
1902
"InnoDB: far enough in recovery! Please run CHECK TABLE\n"
1903
"InnoDB: on your InnoDB tables to check that they are ok!\n"
1904
"InnoDB: If mysqld crashes after this recovery, look at\n"
1905
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
1906
"forcing-recovery.html\n"
1907
"InnoDB: about forcing recovery.\n", stderr);
1912
/***********************************************************
1913
Parses log records from a buffer and stores them to a hash table to wait
1914
merging to file pages. */
1917
recv_parse_log_recs(
1918
/*================*/
1919
/* out: currently always returns FALSE */
1920
ibool store_to_hash) /* in: TRUE if the records should be stored
1921
to the hash table; this is set to FALSE if just
1922
debug checking is needed */
1929
ib_uint64_t new_recovered_lsn;
1930
ib_uint64_t old_lsn;
1937
ut_ad(mutex_own(&(log_sys->mutex)));
1938
ut_ad(recv_sys->parse_start_lsn != 0);
1940
ptr = recv_sys->buf + recv_sys->recovered_offset;
1942
end_ptr = recv_sys->buf + recv_sys->len;
1944
if (ptr == end_ptr) {
1949
single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
1951
if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
1952
/* The mtr only modified a single page, or this is a file op */
1954
old_lsn = recv_sys->recovered_lsn;
1956
/* Try to parse a log record, fetching its type, space id,
1957
page no, and a pointer to the body of the log record */
1959
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
1962
if (len == 0 || recv_sys->found_corrupt_log) {
1963
if (recv_sys->found_corrupt_log) {
1965
recv_report_corrupt_log(ptr,
1966
type, space, page_no);
1972
new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
1974
if (new_recovered_lsn > recv_sys->scanned_lsn) {
1975
/* The log record filled a log block, and we require
1976
that also the next log block should have been scanned
1982
recv_previous_parsed_rec_type = (ulint)type;
1983
recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
1984
recv_previous_parsed_rec_is_multi = 0;
1986
recv_sys->recovered_offset += len;
1987
recv_sys->recovered_lsn = new_recovered_lsn;
1990
if (log_debug_writes) {
1992
"InnoDB: Parsed a single log rec"
1993
" type %lu len %lu space %lu page no %lu\n",
1994
(ulong) type, (ulong) len, (ulong) space,
1997
#endif /* UNIV_DEBUG */
1999
if (type == MLOG_DUMMY_RECORD) {
2002
} else if (!store_to_hash) {
2003
/* In debug checking, update a replicate page
2004
according to the log record, and check that it
2005
becomes identical with the original page */
2006
#ifdef UNIV_LOG_DEBUG
2007
recv_check_incomplete_log_recs(ptr, len);
2008
#endif/* UNIV_LOG_DEBUG */
2010
} else if (type == MLOG_FILE_CREATE
2011
|| type == MLOG_FILE_CREATE2
2012
|| type == MLOG_FILE_RENAME
2013
|| type == MLOG_FILE_DELETE) {
2015
#ifdef UNIV_HOTBACKUP
2016
if (recv_replay_file_ops) {
2018
/* In ibbackup --apply-log, replay an .ibd file
2019
operation, if possible; note that
2020
fil_path_to_mysql_datadir is set in ibbackup to
2021
point to the datadir we should use there */
2023
if (NULL == fil_op_log_parse_or_replay(
2024
body, end_ptr, type, space)) {
2026
"InnoDB: Error: file op"
2027
" log record of type %lu"
2028
" space %lu not complete in\n"
2029
"InnoDB: the replay phase."
2038
/* In normal mysqld crash recovery we do not try to
2039
replay file operations */
2041
recv_add_to_hash_table(type, space, page_no, body,
2043
recv_sys->recovered_lsn);
2046
/* Check that all the records associated with the single mtr
2047
are included within the buffer */
2053
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
2055
if (len == 0 || recv_sys->found_corrupt_log) {
2057
if (recv_sys->found_corrupt_log) {
2059
recv_report_corrupt_log(
2060
ptr, type, space, page_no);
2066
recv_previous_parsed_rec_type = (ulint)type;
2067
recv_previous_parsed_rec_offset
2068
= recv_sys->recovered_offset + total_len;
2069
recv_previous_parsed_rec_is_multi = 1;
2071
if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
2072
#ifdef UNIV_LOG_DEBUG
2073
recv_check_incomplete_log_recs(ptr, len);
2074
#endif /* UNIV_LOG_DEBUG */
2078
if (log_debug_writes) {
2080
"InnoDB: Parsed a multi log rec"
2082
" space %lu page no %lu\n",
2083
(ulong) type, (ulong) len,
2084
(ulong) space, (ulong) page_no);
2086
#endif /* UNIV_DEBUG */
2093
if (type == MLOG_MULTI_REC_END) {
2095
/* Found the end mark for the records */
2101
new_recovered_lsn = recv_calc_lsn_on_data_add(
2102
recv_sys->recovered_lsn, total_len);
2104
if (new_recovered_lsn > recv_sys->scanned_lsn) {
2105
/* The log record filled a log block, and we require
2106
that also the next log block should have been scanned
2112
/* Add all the records to the hash table */
2114
ptr = recv_sys->buf + recv_sys->recovered_offset;
2117
old_lsn = recv_sys->recovered_lsn;
2118
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
2120
if (recv_sys->found_corrupt_log) {
2122
recv_report_corrupt_log(ptr,
2123
type, space, page_no);
2127
ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
2129
recv_sys->recovered_offset += len;
2130
recv_sys->recovered_lsn
2131
= recv_calc_lsn_on_data_add(old_lsn, len);
2132
if (type == MLOG_MULTI_REC_END) {
2134
/* Found the end mark for the records */
2139
if (store_to_hash) {
2140
recv_add_to_hash_table(type, space, page_no,
2153
/***********************************************************
2154
Adds data from a new log block to the parsing buffer of recv_sys if
2155
recv_sys->parse_start_lsn is non-zero. */
2158
recv_sys_add_to_parsing_buf(
2159
/*========================*/
2160
/* out: TRUE if more data added */
2161
byte* log_block, /* in: log block */
2162
ib_uint64_t scanned_lsn) /* in: lsn of how far we were able
2163
to find data in this log block */
2170
ut_ad(scanned_lsn >= recv_sys->scanned_lsn);
2172
if (!recv_sys->parse_start_lsn) {
2173
/* Cannot start parsing yet because no start point for
2179
data_len = log_block_get_data_len(log_block);
2181
if (recv_sys->parse_start_lsn >= scanned_lsn) {
2185
} else if (recv_sys->scanned_lsn >= scanned_lsn) {
2189
} else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) {
2190
more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn);
2192
more_len = (ulint) (scanned_lsn - recv_sys->scanned_lsn);
2195
if (more_len == 0) {
2200
ut_ad(data_len >= more_len);
2202
start_offset = data_len - more_len;
2204
if (start_offset < LOG_BLOCK_HDR_SIZE) {
2205
start_offset = LOG_BLOCK_HDR_SIZE;
2208
end_offset = data_len;
2210
if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
2211
end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
2214
ut_ad(start_offset <= end_offset);
2216
if (start_offset < end_offset) {
2217
ut_memcpy(recv_sys->buf + recv_sys->len,
2218
log_block + start_offset, end_offset - start_offset);
2220
recv_sys->len += end_offset - start_offset;
2222
ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
2228
/***********************************************************
2229
Moves the parsing buffer data left to the buffer start. */
2232
recv_sys_justify_left_parsing_buf(void)
2233
/*===================================*/
2235
ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
2236
recv_sys->len - recv_sys->recovered_offset);
2238
recv_sys->len -= recv_sys->recovered_offset;
2240
recv_sys->recovered_offset = 0;
2243
/***********************************************************
2244
Scans log from a buffer and stores new log data to the parsing buffer. Parses
2245
and hashes the log records if new data found. */
2250
/* out: TRUE if limit_lsn has been
2251
reached, or not able to scan any more
2252
in this log group */
2253
ibool apply_automatically,/* in: TRUE if we want this
2254
function to apply log records
2255
automatically when the hash table
2256
becomes full; in the hot backup tool
2257
the tool does the applying, not this
2259
ulint available_memory,/* in: we let the hash table of recs
2260
to grow to this size, at the maximum */
2261
ibool store_to_hash, /* in: TRUE if the records should be
2262
stored to the hash table; this is set
2263
to FALSE if just debug checking is
2265
byte* buf, /* in: buffer containing a log segment
2267
ulint len, /* in: buffer length */
2268
ib_uint64_t start_lsn, /* in: buffer start lsn */
2269
ib_uint64_t* contiguous_lsn, /* in/out: it is known that all log
2270
groups contain contiguous log data up
2272
ib_uint64_t* group_scanned_lsn)/* out: scanning succeeded up to
2277
ib_uint64_t scanned_lsn;
2282
ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
2283
ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
2285
ut_a(apply_automatically <= TRUE);
2286
ut_a(store_to_hash <= TRUE);
2291
scanned_lsn = start_lsn;
2294
while (log_block < buf + len && !finished) {
2296
no = log_block_get_hdr_no(log_block);
2298
fprintf(stderr, "Log block header no %lu\n", no);
2300
fprintf(stderr, "Scanned lsn no %lu\n",
2301
log_block_convert_lsn_to_no(scanned_lsn));
2303
if (no != log_block_convert_lsn_to_no(scanned_lsn)
2304
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
2306
if (no == log_block_convert_lsn_to_no(scanned_lsn)
2307
&& !log_block_checksum_is_ok_or_old_format(
2310
"InnoDB: Log block no %lu at"
2311
" lsn %"PRIu64" has\n"
2312
"InnoDB: ok header, but checksum field"
2313
" contains %lu, should be %lu\n",
2316
(ulong) log_block_get_checksum(
2318
(ulong) log_block_calc_checksum(
2322
/* Garbage or an incompletely written log block */
2329
if (log_block_get_flush_bit(log_block)) {
2330
/* This block was a start of a log flush operation:
2331
we know that the previous flush operation must have
2332
been completed for all log groups before this block
2333
can have been flushed to any of the groups. Therefore,
2334
we know that log data is contiguous up to scanned_lsn
2335
in all non-corrupt log groups. */
2337
if (scanned_lsn > *contiguous_lsn) {
2338
*contiguous_lsn = scanned_lsn;
2342
data_len = log_block_get_data_len(log_block);
2344
if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
2345
&& scanned_lsn + data_len > recv_sys->scanned_lsn
2346
&& (recv_sys->scanned_checkpoint_no > 0)
2347
&& (log_block_get_checkpoint_no(log_block)
2348
< recv_sys->scanned_checkpoint_no)
2349
&& (recv_sys->scanned_checkpoint_no
2350
- log_block_get_checkpoint_no(log_block)
2353
/* Garbage from a log buffer flush which was made
2354
before the most recent database recovery */
2357
#ifdef UNIV_LOG_DEBUG
2358
/* This is not really an error, but currently
2359
we stop here in the debug version: */
2366
if (!recv_sys->parse_start_lsn
2367
&& (log_block_get_first_rec_group(log_block) > 0)) {
2369
/* We found a point from which to start the parsing
2372
recv_sys->parse_start_lsn = scanned_lsn
2373
+ log_block_get_first_rec_group(log_block);
2374
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
2375
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
2378
scanned_lsn += data_len;
2380
if (scanned_lsn > recv_sys->scanned_lsn) {
2382
/* We have found more entries. If this scan is
2383
of startup type, we must initiate crash recovery
2384
environment before parsing these log records. */
2386
if (recv_log_scan_is_startup_type
2387
&& !recv_needed_recovery) {
2390
"InnoDB: Log scan progressed"
2391
" past the checkpoint lsn %"PRIu64"\n",
2392
recv_sys->scanned_lsn);
2393
recv_init_crash_recovery();
2396
/* We were able to find more log data: add it to the
2397
parsing buffer if parse_start_lsn is already
2400
if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
2401
>= RECV_PARSING_BUF_SIZE) {
2403
"InnoDB: Error: log parsing"
2405
" Recovery may have failed!\n");
2407
recv_sys->found_corrupt_log = TRUE;
2409
} else if (!recv_sys->found_corrupt_log) {
2410
more_data = recv_sys_add_to_parsing_buf(
2411
log_block, scanned_lsn);
2414
recv_sys->scanned_lsn = scanned_lsn;
2415
recv_sys->scanned_checkpoint_no
2416
= log_block_get_checkpoint_no(log_block);
2419
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
2420
/* Log data for this group ends here */
2424
log_block += OS_FILE_LOG_BLOCK_SIZE;
2428
*group_scanned_lsn = scanned_lsn;
2430
if (recv_needed_recovery
2431
|| (recv_is_from_backup && !recv_is_making_a_backup)) {
2432
recv_scan_print_counter++;
2434
if (finished || (recv_scan_print_counter % 80 == 0)) {
2437
"InnoDB: Doing recovery: scanned up to"
2438
" log sequence number %"PRIu64"\n",
2439
*group_scanned_lsn);
2443
if (more_data && !recv_sys->found_corrupt_log) {
2444
/* Try to parse more log records */
2446
recv_parse_log_recs(store_to_hash);
2448
if (store_to_hash && mem_heap_get_size(recv_sys->heap)
2450
&& apply_automatically) {
2452
/* Hash table of log records has grown too big:
2453
empty it; FALSE means no ibuf operations
2454
allowed, as we cannot add new records to the
2455
log yet: they would be produced by ibuf
2458
recv_apply_hashed_log_recs(FALSE);
2461
if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
2462
/* Move parsing buffer data to the buffer start */
2464
recv_sys_justify_left_parsing_buf();
2471
/***********************************************************
2472
Scans log from a buffer and stores new log data to the parsing buffer. Parses
2473
and hashes the log records if new data found. */
2476
recv_group_scan_log_recs(
2477
/*=====================*/
2478
log_group_t* group, /* in: log group */
2479
ib_uint64_t* contiguous_lsn, /* in/out: it is known that all log
2480
groups contain contiguous log data up
2482
ib_uint64_t* group_scanned_lsn)/* out: scanning succeeded up to
2486
ib_uint64_t start_lsn;
2487
ib_uint64_t end_lsn;
2491
start_lsn = *contiguous_lsn;
2494
end_lsn = start_lsn + RECV_SCAN_SIZE;
2496
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
2497
group, start_lsn, end_lsn);
2499
finished = recv_scan_log_recs(
2500
TRUE, (buf_pool->curr_size - recv_n_pool_free_frames)
2501
* UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE,
2502
start_lsn, contiguous_lsn, group_scanned_lsn);
2503
start_lsn = end_lsn;
2507
if (log_debug_writes) {
2509
"InnoDB: Scanned group %lu up to"
2510
" log sequence number %"PRIu64"\n",
2512
*group_scanned_lsn);
2514
#endif /* UNIV_DEBUG */
2517
/***********************************************************
2518
Initialize crash recovery environment. Can be called iff
2519
recv_needed_recovery == FALSE. */
2522
recv_init_crash_recovery(void)
2523
/*==========================*/
2525
ut_a(!recv_needed_recovery);
2527
recv_needed_recovery = TRUE;
2529
ut_print_timestamp(stderr);
2532
" InnoDB: Database was not"
2533
" shut down normally!\n"
2534
"InnoDB: Starting crash recovery.\n");
2537
"InnoDB: Reading tablespace information"
2538
" from the .ibd files...\n");
2540
fil_load_single_table_tablespaces();
2542
/* If we are using the doublewrite method, we will
2543
check if there are half-written pages in data files,
2544
and restore them from the doublewrite buffer if
2547
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2550
"InnoDB: Restoring possible"
2551
" half-written data pages from"
2552
" the doublewrite\n"
2553
"InnoDB: buffer...\n");
2554
trx_sys_doublewrite_init_or_restore_pages(TRUE);
2558
/************************************************************
2559
Recovers from a checkpoint. When this function returns, the database is able
2560
to start processing of new user transactions, but the function
2561
recv_recovery_from_checkpoint_finish should be called later to complete
2562
the recovery and free the resources used in it. */
2565
recv_recovery_from_checkpoint_start_func(
2566
/*=====================================*/
2567
/* out: error code or DB_SUCCESS */
2568
#ifdef UNIV_LOG_ARCHIVE
2569
ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */
2570
ib_uint64_t limit_lsn, /* in: recover up to this lsn
2572
#endif /* UNIV_LOG_ARCHIVE */
2573
ib_uint64_t min_flushed_lsn,/* in: min flushed lsn from
2575
ib_uint64_t max_flushed_lsn)/* in: max flushed lsn from
2579
log_group_t* max_cp_group;
2580
log_group_t* up_to_date_group;
2582
ib_uint64_t checkpoint_lsn;
2583
ib_uint64_t checkpoint_no;
2584
ib_uint64_t old_scanned_lsn;
2585
ib_uint64_t group_scanned_lsn= 0;
2586
ib_uint64_t contiguous_lsn;
2587
ib_uint64_t archived_lsn;
2589
byte log_hdr_buf[LOG_FILE_HDR_SIZE];
2592
#ifdef UNIV_LOG_ARCHIVE
2593
ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
2594
# define TYPE_CHECKPOINT (type == LOG_CHECKPOINT)
2595
# define LIMIT_LSN limit_lsn
2596
#else /* UNIV_LOG_ARCHIVE */
2597
# define TYPE_CHECKPOINT 1
2598
# define LIMIT_LSN IB_ULONGLONG_MAX
2599
#endif /* UNIV_LOG_ARCHIVE */
2601
if (TYPE_CHECKPOINT) {
2603
recv_sys_init(FALSE, buf_pool_get_curr_size());
2606
if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
2608
"InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n");
2610
"InnoDB: Skipping log redo\n");
2615
recv_recovery_on = TRUE;
2617
recv_sys->limit_lsn = LIMIT_LSN;
2619
mutex_enter(&(log_sys->mutex));
2621
/* Look for the latest checkpoint from any of the log groups */
2623
err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
2625
if (err != DB_SUCCESS) {
2627
mutex_exit(&(log_sys->mutex));
2632
log_group_read_checkpoint_info(max_cp_group, max_cp_field);
2634
buf = log_sys->checkpoint_buf;
2636
checkpoint_lsn = mach_read_ull(buf + LOG_CHECKPOINT_LSN);
2637
checkpoint_no = mach_read_ull(buf + LOG_CHECKPOINT_NO);
2638
archived_lsn = mach_read_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
2640
/* Read the first log file header to print a note if this is
2641
a recovery from a restored InnoDB Hot Backup */
2643
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id, 0,
2644
0, 0, LOG_FILE_HDR_SIZE,
2645
log_hdr_buf, max_cp_group);
2647
if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
2648
(byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
2649
/* This log file was created by ibbackup --restore: print
2650
a note to the user about it */
2653
"InnoDB: The log file was created by"
2654
" ibbackup --apply-log at\n"
2656
log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
2658
"InnoDB: NOTE: the following crash recovery"
2659
" is part of a normal restore.\n");
2661
/* Wipe over the label now */
2663
memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
2665
/* Write to the log file to wipe over the label */
2666
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
2667
max_cp_group->space_id, 0,
2668
0, 0, OS_FILE_LOG_BLOCK_SIZE,
2669
log_hdr_buf, max_cp_group);
2672
#ifdef UNIV_LOG_ARCHIVE
2673
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2676
log_checkpoint_get_nth_group_info(buf, group->id,
2677
&(group->archived_file_no),
2678
&(group->archived_offset));
2680
group = UT_LIST_GET_NEXT(log_groups, group);
2682
#endif /* UNIV_LOG_ARCHIVE */
2684
if (TYPE_CHECKPOINT) {
2685
/* Start reading the log groups from the checkpoint lsn up. The
2686
variable contiguous_lsn contains an lsn up to which the log is
2687
known to be contiguously written to all log groups. */
2689
recv_sys->parse_start_lsn = checkpoint_lsn;
2690
recv_sys->scanned_lsn = checkpoint_lsn;
2691
recv_sys->scanned_checkpoint_no = 0;
2692
recv_sys->recovered_lsn = checkpoint_lsn;
2694
srv_start_lsn = checkpoint_lsn;
2697
contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
2698
OS_FILE_LOG_BLOCK_SIZE);
2699
if (TYPE_CHECKPOINT) {
2700
up_to_date_group = max_cp_group;
2701
#ifdef UNIV_LOG_ARCHIVE
2705
/* Try to recover the remaining part from logs: first from
2706
the logs of the archived group */
2708
group = recv_sys->archive_group;
2709
capacity = log_group_get_capacity(group);
2711
if (recv_sys->scanned_lsn > checkpoint_lsn + capacity
2712
|| checkpoint_lsn > recv_sys->scanned_lsn + capacity) {
2714
mutex_exit(&(log_sys->mutex));
2716
/* The group does not contain enough log: probably
2717
an archived log file was missing or corrupt */
2722
recv_group_scan_log_recs(group, &contiguous_lsn,
2723
&group_scanned_lsn);
2724
if (recv_sys->scanned_lsn < checkpoint_lsn) {
2726
mutex_exit(&(log_sys->mutex));
2728
/* The group did not contain enough log: an archived
2729
log file was missing or invalid, or the log group
2735
group->scanned_lsn = group_scanned_lsn;
2736
up_to_date_group = group;
2737
#endif /* UNIV_LOG_ARCHIVE */
2740
ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
2742
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2744
#ifdef UNIV_LOG_ARCHIVE
2745
if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
2746
group = UT_LIST_GET_NEXT(log_groups, group);
2748
#endif /* UNIV_LOG_ARCHIVE */
2750
/* Set the flag to publish that we are doing startup scan. */
2751
recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
2753
old_scanned_lsn = recv_sys->scanned_lsn;
2755
recv_group_scan_log_recs(group, &contiguous_lsn,
2756
&group_scanned_lsn);
2757
group->scanned_lsn = group_scanned_lsn;
2759
if (old_scanned_lsn < group_scanned_lsn) {
2760
/* We found a more up-to-date group */
2762
up_to_date_group = group;
2765
#ifdef UNIV_LOG_ARCHIVE
2766
if ((type == LOG_ARCHIVE)
2767
&& (group == recv_sys->archive_group)) {
2768
group = UT_LIST_GET_NEXT(log_groups, group);
2770
#endif /* UNIV_LOG_ARCHIVE */
2772
group = UT_LIST_GET_NEXT(log_groups, group);
2775
/* Done with startup scan. Clear the flag. */
2776
recv_log_scan_is_startup_type = FALSE;
2777
if (TYPE_CHECKPOINT) {
2778
/* NOTE: we always do a 'recovery' at startup, but only if
2779
there is something wrong we will print a message to the
2780
user about recovery: */
2782
if (checkpoint_lsn != max_flushed_lsn
2783
|| checkpoint_lsn != min_flushed_lsn) {
2785
if (checkpoint_lsn < max_flushed_lsn) {
2787
"InnoDB: #########################"
2788
"#################################\n"
2791
"InnoDB: The log sequence number"
2792
" in ibdata files is higher\n"
2793
"InnoDB: than the log sequence number"
2794
" in the ib_logfiles! Are you sure\n"
2795
"InnoDB: you are using the right"
2796
" ib_logfiles to start up"
2798
"InnoDB: Log sequence number in"
2799
" ib_logfiles is %"PRIu64", log\n"
2800
"InnoDB: sequence numbers stamped"
2801
" to ibdata file headers are between\n"
2802
"InnoDB: %"PRIu64" and %"PRIu64".\n"
2803
"InnoDB: #########################"
2804
"#################################\n",
2810
if (!recv_needed_recovery) {
2812
"InnoDB: The log sequence number"
2813
" in ibdata files does not match\n"
2814
"InnoDB: the log sequence number"
2815
" in the ib_logfiles!\n");
2816
recv_init_crash_recovery();
2820
if (!recv_needed_recovery) {
2821
/* Init the doublewrite buffer memory structure */
2822
trx_sys_doublewrite_init_or_restore_pages(FALSE);
2826
/* We currently have only one log group */
2827
if (group_scanned_lsn < checkpoint_lsn) {
2828
ut_print_timestamp(stderr);
2830
" InnoDB: ERROR: We were only able to scan the log"
2832
"InnoDB: %"PRIu64", but a checkpoint was at %"PRIu64".\n"
2833
"InnoDB: It is possible that"
2834
" the database is now corrupt!\n",
2839
if (group_scanned_lsn < recv_max_page_lsn) {
2840
ut_print_timestamp(stderr);
2842
" InnoDB: ERROR: We were only able to scan the log"
2843
" up to %"PRIu64"\n"
2844
"InnoDB: but a database page a had an lsn %"PRIu64"."
2845
" It is possible that the\n"
2846
"InnoDB: database is now corrupt!\n",
2851
if (recv_sys->recovered_lsn < checkpoint_lsn) {
2853
mutex_exit(&(log_sys->mutex));
2855
if (recv_sys->recovered_lsn >= LIMIT_LSN) {
2865
/* Synchronize the uncorrupted log groups to the most up-to-date log
2866
group; we also copy checkpoint info to groups */
2868
log_sys->next_checkpoint_lsn = checkpoint_lsn;
2869
log_sys->next_checkpoint_no = checkpoint_no + 1;
2871
#ifdef UNIV_LOG_ARCHIVE
2872
log_sys->archived_lsn = archived_lsn;
2873
#endif /* UNIV_LOG_ARCHIVE */
2875
recv_synchronize_groups(up_to_date_group);
2877
if (!recv_needed_recovery) {
2878
ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
2880
srv_start_lsn = recv_sys->recovered_lsn;
2883
log_sys->lsn = recv_sys->recovered_lsn;
2885
ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
2887
log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
2888
log_sys->buf_next_to_write = log_sys->buf_free;
2889
log_sys->written_to_some_lsn = log_sys->lsn;
2890
log_sys->written_to_all_lsn = log_sys->lsn;
2892
log_sys->last_checkpoint_lsn = checkpoint_lsn;
2894
log_sys->next_checkpoint_no = checkpoint_no + 1;
2896
#ifdef UNIV_LOG_ARCHIVE
2897
if (archived_lsn == IB_ULONGLONG_MAX) {
2899
log_sys->archiving_state = LOG_ARCH_OFF;
2901
#endif /* UNIV_LOG_ARCHIVE */
2903
mutex_enter(&(recv_sys->mutex));
2905
recv_sys->apply_log_recs = TRUE;
2907
mutex_exit(&(recv_sys->mutex));
2909
mutex_exit(&(log_sys->mutex));
2911
recv_lsn_checks_on = TRUE;
2913
/* The database is now ready to start almost normal processing of user
2914
transactions: transaction rollbacks and the application of the log
2915
records in the hash table can be run in background. */
2919
#undef TYPE_CHECKPOINT
2923
/************************************************************
2924
Completes recovery from a checkpoint. */
2927
recv_recovery_from_checkpoint_finish(void)
2928
/*======================================*/
2932
/* Apply the hashed log records to the respective file pages */
2934
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2936
recv_apply_hashed_log_recs(TRUE);
2940
if (log_debug_writes) {
2942
"InnoDB: Log records applied to the database\n");
2944
#endif /* UNIV_DEBUG */
2946
if (recv_needed_recovery) {
2947
trx_sys_print_mysql_master_log_pos();
2948
trx_sys_print_mysql_binlog_offset();
2951
if (recv_sys->found_corrupt_log) {
2954
"InnoDB: WARNING: the log file may have been"
2956
"InnoDB: is possible that the log scan or parsing"
2957
" did not proceed\n"
2958
"InnoDB: far enough in recovery. Please run"
2960
"InnoDB: on your InnoDB tables to check that"
2962
"InnoDB: It may be safest to recover your"
2963
" InnoDB database from\n"
2964
"InnoDB: a backup!\n");
2967
/* Free the resources of the recovery system */
2969
recv_recovery_on = FALSE;
2971
#ifndef UNIV_LOG_DEBUG
2975
/* Drop partially created indexes. */
2976
row_merge_drop_temp_indexes();
2978
#ifdef UNIV_SYNC_DEBUG
2979
/* Wait for a while so that created threads have time to suspend
2980
themselves before we switch the latching order checks on */
2981
os_thread_sleep(1000000);
2983
/* Switch latching order checks on in sync0sync.c */
2984
sync_order_checks_on = TRUE;
2986
if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
2987
/* Rollback the uncommitted transactions which have no user
2990
os_thread_create(trx_rollback_or_clean_all_recovered,
2995
/**********************************************************
2996
Resets the logs. The contents of log files will be lost! */
3001
ib_uint64_t lsn, /* in: reset to this lsn
3002
rounded up to be divisible by
3003
OS_FILE_LOG_BLOCK_SIZE, after
3005
LOG_BLOCK_HDR_SIZE */
3006
#ifdef UNIV_LOG_ARCHIVE
3007
ulint arch_log_no, /* in: next archived log file number */
3008
#endif /* UNIV_LOG_ARCHIVE */
3009
ibool new_logs_created)/* in: TRUE if resetting logs
3010
is done at the log creation;
3011
FALSE if it is done after
3016
ut_ad(mutex_own(&(log_sys->mutex)));
3018
log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
3020
group = UT_LIST_GET_FIRST(log_sys->log_groups);
3023
group->lsn = log_sys->lsn;
3024
group->lsn_offset = LOG_FILE_HDR_SIZE;
3025
#ifdef UNIV_LOG_ARCHIVE
3026
group->archived_file_no = arch_log_no;
3027
group->archived_offset = 0;
3028
#endif /* UNIV_LOG_ARCHIVE */
3030
if (!new_logs_created) {
3031
recv_truncate_group(group, group->lsn, group->lsn,
3032
group->lsn, group->lsn);
3035
group = UT_LIST_GET_NEXT(log_groups, group);
3038
log_sys->buf_next_to_write = 0;
3039
log_sys->written_to_some_lsn = log_sys->lsn;
3040
log_sys->written_to_all_lsn = log_sys->lsn;
3042
log_sys->next_checkpoint_no = 0;
3043
log_sys->last_checkpoint_lsn = 0;
3045
#ifdef UNIV_LOG_ARCHIVE
3046
log_sys->archived_lsn = log_sys->lsn;
3047
#endif /* UNIV_LOG_ARCHIVE */
3049
log_block_init(log_sys->buf, log_sys->lsn);
3050
log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
3052
log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
3053
log_sys->lsn += LOG_BLOCK_HDR_SIZE;
3055
mutex_exit(&(log_sys->mutex));
3057
/* Reset the checkpoint fields in logs */
3059
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
3060
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
3062
mutex_enter(&(log_sys->mutex));
3065
#ifdef UNIV_HOTBACKUP
3066
/**********************************************************
3067
Creates new log files after a backup has been restored. */
3070
recv_reset_log_files_for_backup(
3071
/*============================*/
3072
const char* log_dir, /* in: log file directory path */
3073
ulint n_log_files, /* in: number of log files */
3074
ulint log_file_size, /* in: log file size */
3075
ib_uint64_t lsn) /* in: new start lsn, must be
3076
divisible by OS_FILE_LOG_BLOCK_SIZE */
3084
static const char ib_logfile_basename[] = "ib_logfile";
3086
log_dir_len = strlen(log_dir);
3087
/* full path name of ib_logfile consists of log dir path + basename
3088
+ number. This must fit in the name buffer.
3090
ut_a(log_dir_len + strlen(ib_logfile_basename) + 11 < sizeof(name));
3092
buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3093
memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3095
for (i = 0; i < n_log_files; i++) {
3097
sprintf(name, "%s%s%lu", log_dir,
3098
ib_logfile_basename, (ulong)i);
3100
log_file = os_file_create_simple(name, OS_FILE_CREATE,
3101
OS_FILE_READ_WRITE, &success);
3104
"InnoDB: Cannot create %s. Check that"
3105
" the file does not exist yet.\n", name);
3111
"Setting log file size to %lu %lu\n",
3112
(ulong) ut_get_high32(log_file_size),
3113
(ulong) log_file_size & 0xFFFFFFFFUL);
3115
success = os_file_set_size(name, log_file,
3116
log_file_size & 0xFFFFFFFFUL,
3117
ut_get_high32(log_file_size));
3121
"InnoDB: Cannot set %s size to %lu %lu\n",
3122
name, (ulong) ut_get_high32(log_file_size),
3123
(ulong) (log_file_size & 0xFFFFFFFFUL));
3127
os_file_flush(log_file);
3128
os_file_close(log_file);
3131
/* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
3133
log_reset_first_header_and_checkpoint(buf, lsn);
3135
log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
3136
log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
3137
LOG_BLOCK_HDR_SIZE);
3138
sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
3140
log_file = os_file_create_simple(name, OS_FILE_OPEN,
3141
OS_FILE_READ_WRITE, &success);
3143
fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
3148
os_file_write(name, log_file, buf, 0, 0,
3149
LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3150
os_file_flush(log_file);
3151
os_file_close(log_file);
3155
#endif /* UNIV_HOTBACKUP */
3157
#ifdef UNIV_LOG_ARCHIVE
3158
/**********************************************************
3159
Reads from the archive of a log group and performs recovery. */
3162
log_group_recover_from_archive_file(
3163
/*================================*/
3164
/* out: TRUE if no more complete
3165
consistent archive files */
3166
log_group_t* group) /* in: log group */
3168
os_file_t file_handle;
3169
ib_uint64_t start_lsn;
3170
ib_uint64_t file_end_lsn;
3171
ib_uint64_t dummy_lsn;
3172
ib_uint64_t scanned_lsn;
3178
ulint file_size_high;
3187
/* Add the file to the archive file space; open the file */
3189
log_archived_file_name_gen(name, group->id, group->archived_file_no);
3191
file_handle = os_file_create(name, OS_FILE_OPEN,
3192
OS_FILE_LOG, OS_FILE_AIO, &ret);
3197
"InnoDB: Do you want to copy additional"
3198
" archived log files\n"
3199
"InnoDB: to the directory\n");
3201
"InnoDB: or were these all the files needed"
3204
"InnoDB: (Y == copy more files; N == this is all)?");
3206
input_char = getchar();
3208
if (input_char == (int) 'N') {
3211
} else if (input_char == (int) 'Y') {
3213
goto try_open_again;
3219
ret = os_file_get_size(file_handle, &file_size, &file_size_high);
3222
ut_a(file_size_high == 0);
3224
fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
3226
ret = os_file_close(file_handle);
3228
if (file_size < LOG_FILE_HDR_SIZE) {
3230
"InnoDB: Archive file header incomplete %s\n", name);
3237
/* Add the archive file as a node to the space */
3239
fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
3240
group->archive_space_id, FALSE);
3241
#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
3242
# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
3245
/* Read the archive file header */
3246
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
3247
LOG_FILE_HDR_SIZE, buf, NULL);
3249
/* Check if the archive file header is consistent */
3251
if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
3252
|| mach_read_from_4(buf + LOG_FILE_NO)
3253
!= group->archived_file_no) {
3255
"InnoDB: Archive file header inconsistent %s\n", name);
3260
if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
3262
"InnoDB: Archive file not completely written %s\n",
3268
start_lsn = mach_read_ull(buf + LOG_FILE_START_LSN);
3269
file_end_lsn = mach_read_ull(buf + LOG_FILE_END_LSN);
3271
if (!recv_sys->scanned_lsn) {
3273
if (recv_sys->parse_start_lsn < start_lsn) {
3275
"InnoDB: Archive log file %s"
3276
" starts from too big a lsn\n",
3281
recv_sys->scanned_lsn = start_lsn;
3284
if (recv_sys->scanned_lsn != start_lsn) {
3287
"InnoDB: Archive log file %s starts from"
3293
read_offset = LOG_FILE_HDR_SIZE;
3296
len = RECV_SCAN_SIZE;
3298
if (read_offset + len > file_size) {
3299
len = ut_calc_align_down(file_size - read_offset,
3300
OS_FILE_LOG_BLOCK_SIZE);
3309
if (log_debug_writes) {
3311
"InnoDB: Archive read starting at"
3312
" lsn %"PRIu64", len %lu from file %s\n",
3316
#endif /* UNIV_DEBUG */
3318
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
3319
group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
3320
read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
3322
ret = recv_scan_log_recs(
3323
TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
3324
* UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
3325
&dummy_lsn, &scanned_lsn);
3327
if (scanned_lsn == file_end_lsn) {
3334
"InnoDB: Archive log file %s"
3335
" does not scan right\n",
3343
ut_ad(start_lsn == scanned_lsn);
3349
/************************************************************
3350
Recovers from archived log files, and also from log files, if they exist. */
3353
recv_recovery_from_archive_start(
3354
/*=============================*/
3355
/* out: error code or DB_SUCCESS */
3356
ib_uint64_t min_flushed_lsn,/* in: min flushed lsn field from the
3358
ib_uint64_t limit_lsn, /* in: recover up to this lsn if
3360
ulint first_log_no) /* in: number of the first archived
3361
log file to use in the recovery; the
3362
file will be searched from
3363
INNOBASE_LOG_ARCH_DIR specified in
3364
server config file */
3375
recv_sys_init(FALSE, buf_pool_get_curr_size());
3377
recv_recovery_on = TRUE;
3378
recv_recovery_from_backup_on = TRUE;
3380
recv_sys->limit_lsn = limit_lsn;
3384
group = UT_LIST_GET_FIRST(log_sys->log_groups);
3387
if (group->id == group_id) {
3392
group = UT_LIST_GET_NEXT(log_groups, group);
3397
"InnoDB: There is no log group defined with id %lu!\n",
3402
group->archived_file_no = first_log_no;
3404
recv_sys->parse_start_lsn = min_flushed_lsn;
3406
recv_sys->scanned_lsn = 0;
3407
recv_sys->scanned_checkpoint_no = 0;
3408
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
3410
recv_sys->archive_group = group;
3414
mutex_enter(&(log_sys->mutex));
3417
ret = log_group_recover_from_archive_file(group);
3419
/* Close and truncate a possible processed archive file
3420
from the file space */
3422
trunc_len = UNIV_PAGE_SIZE
3423
* fil_space_get_size(group->archive_space_id);
3424
if (trunc_len > 0) {
3425
fil_space_truncate_start(group->archive_space_id,
3429
group->archived_file_no++;
3432
if (recv_sys->recovered_lsn < limit_lsn) {
3434
if (!recv_sys->scanned_lsn) {
3436
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
3439
mutex_exit(&(log_sys->mutex));
3441
err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
3445
if (err != DB_SUCCESS) {
3450
mutex_enter(&(log_sys->mutex));
3453
if (limit_lsn != IB_ULONGLONG_MAX) {
3455
recv_apply_hashed_log_recs(FALSE);
3457
recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
3460
mutex_exit(&(log_sys->mutex));
3465
/************************************************************
3466
Completes recovery from archive. */
3469
recv_recovery_from_archive_finish(void)
3470
/*===================================*/
3472
recv_recovery_from_checkpoint_finish();
3474
recv_recovery_from_backup_on = FALSE;
3476
#endif /* UNIV_LOG_ARCHIVE */