1
/******************************************************
4
(c) 1995-1997 Innobase Oy
6
Created 12/9/1995 Heikki Tuuri
7
*******************************************************/
21
#include "dict0boot.h"
23
#include "srv0start.h"
28
General philosophy of InnoDB redo-logs:
30
1) Every change to a contents of a data page must be done
31
through mtr, which in mtr_commit() writes log records
32
to the InnoDB redo log.
34
2) Normally these changes are performed using a mlog_write_ulint()
37
3) In some page level operations only a code number of a
38
c-function and its parameters are written to the log to
39
reduce the size of the log.
41
3a) You should not add parameters to these kind of functions
42
(e.g. trx_undo_header_create(), trx_undo_insert_header_reuse())
44
3b) You should not add such functionality which either change
45
working when compared with the old or are dependent on data
46
outside of the page. These kind of functions should implement
47
self-contained page transformation and it should be unchanged
48
if you don't have very essential reasons to change log
53
/* Current free limit of space 0; protected by the log sys mutex; 0 means
55
ulint log_fsp_current_free_limit = 0;
57
/* Global log system variable */
58
log_t* log_sys = NULL;
61
ibool log_do_write = TRUE;
63
ibool log_debug_writes = FALSE;
64
#endif /* UNIV_DEBUG */
66
/* These control how often we print warnings if the last checkpoint is too
68
ibool log_has_printed_chkp_warning = FALSE;
69
time_t log_last_warning_time;
71
#ifdef UNIV_LOG_ARCHIVE
72
/* Pointer to this variable is used as the i/o-message when we do i/o to an
75
#endif /* UNIV_LOG_ARCHIVE */
77
/* A margin for free space in the log buffer before a log entry is catenated */
78
#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
80
/* Margins for free space in the log buffer after a log entry is catenated */
81
#define LOG_BUF_FLUSH_RATIO 2
82
#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
84
/* Margin for the free space in the smallest log group, before a new query
85
step which modifies the database, is started */
87
#define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE)
88
#define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE)
90
/* This parameter controls asynchronous making of a new checkpoint; the value
91
should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
93
#define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32
95
/* This parameter controls synchronous preflushing of modified buffer pages */
96
#define LOG_POOL_PREFLUSH_RATIO_SYNC 16
98
/* The same ratio for asynchronous preflushing; this value should be less than
100
#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8
102
/* Extra margin, in addition to one log file, used in archiving */
103
#define LOG_ARCHIVE_EXTRA_MARGIN (4 * UNIV_PAGE_SIZE)
105
/* This parameter controls asynchronous writing to the archive */
106
#define LOG_ARCHIVE_RATIO_ASYNC 16
108
/* Codes used in unlocking flush latches */
109
#define LOG_UNLOCK_NONE_FLUSHED_LOCK 1
110
#define LOG_UNLOCK_FLUSH_LOCK 2
112
/* States of an archiving operation */
113
#define LOG_ARCHIVE_READ 1
114
#define LOG_ARCHIVE_WRITE 2
116
/**********************************************************
117
Completes a checkpoint write i/o to a log file. */
120
log_io_complete_checkpoint(void);
121
/*============================*/
122
#ifdef UNIV_LOG_ARCHIVE
123
/**********************************************************
124
Completes an archiving i/o. */
127
log_io_complete_archive(void);
128
/*=========================*/
129
#endif /* UNIV_LOG_ARCHIVE */
131
/********************************************************************
132
Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
133
so that we know that the limit has been written to a log checkpoint field
137
log_fsp_current_free_limit_set_and_checkpoint(
138
/*==========================================*/
139
ulint limit) /* in: limit to set */
143
mutex_enter(&(log_sys->mutex));
145
log_fsp_current_free_limit = limit;
147
mutex_exit(&(log_sys->mutex));
149
/* Try to make a synchronous checkpoint */
154
success = log_checkpoint(TRUE, TRUE);
158
/********************************************************************
159
Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
163
log_buf_pool_get_oldest_modification(void)
164
/*======================================*/
168
ut_ad(mutex_own(&(log_sys->mutex)));
170
lsn = buf_pool_get_oldest_modification();
172
if (ut_dulint_is_zero(lsn)) {
180
/****************************************************************
181
Opens the log for log_write_low. The log must be closed with log_close and
182
released with log_release. */
185
log_reserve_and_open(
186
/*=================*/
187
/* out: start lsn of the log record */
188
ulint len) /* in: length of data to be catenated */
190
log_t* log = log_sys;
191
ulint len_upper_limit;
192
#ifdef UNIV_LOG_ARCHIVE
193
ulint archived_lsn_age;
195
#endif /* UNIV_LOG_ARCHIVE */
198
#endif /* UNIV_DEBUG */
200
ut_a(len < log->buf_size / 2);
202
mutex_enter(&(log->mutex));
204
/* Calculate an upper limit for the space the string may take in the
207
len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4;
209
if (log->buf_free + len_upper_limit > log->buf_size) {
211
mutex_exit(&(log->mutex));
213
/* Not enough free space, do a syncronous flush of the log
216
log_buffer_flush_to_disk();
225
#ifdef UNIV_LOG_ARCHIVE
226
if (log->archiving_state != LOG_ARCH_OFF) {
228
archived_lsn_age = ut_dulint_minus(log->lsn,
230
if (archived_lsn_age + len_upper_limit
231
> log->max_archived_lsn_age) {
232
/* Not enough free archived space in log groups: do a
233
synchronous archive write batch: */
235
mutex_exit(&(log->mutex));
237
ut_ad(len_upper_limit <= log->max_archived_lsn_age);
239
log_archive_do(TRUE, &dummy);
246
#endif /* UNIV_LOG_ARCHIVE */
248
#ifdef UNIV_LOG_DEBUG
249
log->old_buf_free = log->buf_free;
250
log->old_lsn = log->lsn;
255
/****************************************************************
256
Writes to the log the string given. It is assumed that the caller holds the
262
byte* str, /* in: string */
263
ulint str_len) /* in: string length */
265
log_t* log = log_sys;
270
ut_ad(mutex_own(&(log->mutex)));
272
/* Calculate a part length */
274
data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
276
if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
278
/* The string fits within the current log block */
282
data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
284
len = OS_FILE_LOG_BLOCK_SIZE
285
- (log->buf_free % OS_FILE_LOG_BLOCK_SIZE)
286
- LOG_BLOCK_TRL_SIZE;
289
ut_memcpy(log->buf + log->buf_free, str, len);
294
log_block = ut_align_down(log->buf + log->buf_free,
295
OS_FILE_LOG_BLOCK_SIZE);
296
log_block_set_data_len(log_block, data_len);
298
if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
299
/* This block became full */
300
log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
301
log_block_set_checkpoint_no(log_block,
302
log_sys->next_checkpoint_no);
303
len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
305
log->lsn = ut_dulint_add(log->lsn, len);
307
/* Initialize the next block header */
308
log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
310
log->lsn = ut_dulint_add(log->lsn, len);
313
log->buf_free += len;
315
ut_ad(log->buf_free <= log->buf_size);
321
srv_log_write_requests++;
324
/****************************************************************
333
ulint first_rec_group;
336
log_t* log = log_sys;
337
ulint checkpoint_age;
339
ut_ad(mutex_own(&(log->mutex)));
343
log_block = ut_align_down(log->buf + log->buf_free,
344
OS_FILE_LOG_BLOCK_SIZE);
345
first_rec_group = log_block_get_first_rec_group(log_block);
347
if (first_rec_group == 0) {
348
/* We initialized a new log block which was not written
349
full by the current mtr: the next mtr log record group
350
will start within this block at the offset data_len */
352
log_block_set_first_rec_group(
353
log_block, log_block_get_data_len(log_block));
356
if (log->buf_free > log->max_buf_free) {
358
log->check_flush_or_checkpoint = TRUE;
361
checkpoint_age = ut_dulint_minus(lsn, log->last_checkpoint_lsn);
363
if (checkpoint_age >= log->log_group_capacity) {
364
/* TODO: split btr_store_big_rec_extern_fields() into small
365
steps so that we can release all latches in the middle, and
366
call log_free_check() to ensure we never write over log written
367
after the latest checkpoint. In principle, we should split all
368
big_rec operations, but other operations are smaller. */
370
if (!log_has_printed_chkp_warning
371
|| difftime(time(NULL), log_last_warning_time) > 15) {
373
log_has_printed_chkp_warning = TRUE;
374
log_last_warning_time = time(NULL);
376
ut_print_timestamp(stderr);
378
" InnoDB: ERROR: the age of the last"
379
" checkpoint is %lu,\n"
380
"InnoDB: which exceeds the log group"
382
"InnoDB: If you are using big"
383
" BLOB or TEXT rows, you must set the\n"
384
"InnoDB: combined size of log files"
385
" at least 10 times bigger than the\n"
386
"InnoDB: largest such row.\n",
387
(ulong) checkpoint_age,
388
(ulong) log->log_group_capacity);
392
if (checkpoint_age <= log->max_modified_age_async) {
397
oldest_lsn = buf_pool_get_oldest_modification();
399
if (ut_dulint_is_zero(oldest_lsn)
400
|| (ut_dulint_minus(lsn, oldest_lsn)
401
> log->max_modified_age_async)
402
|| checkpoint_age > log->max_checkpoint_age_async) {
404
log->check_flush_or_checkpoint = TRUE;
408
#ifdef UNIV_LOG_DEBUG
409
log_check_log_recs(log->buf + log->old_buf_free,
410
log->buf_free - log->old_buf_free, log->old_lsn);
416
#ifdef UNIV_LOG_ARCHIVE
417
/**********************************************************
418
Pads the current log block full with dummy log records. Used in producing
419
consistent archived log files. */
422
log_pad_current_log_block(void)
423
/*===========================*/
425
byte b = MLOG_DUMMY_RECORD;
430
/* We retrieve lsn only because otherwise gcc crashed on HP-UX */
431
lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
433
pad_length = OS_FILE_LOG_BLOCK_SIZE
434
- (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
435
- LOG_BLOCK_TRL_SIZE;
437
for (i = 0; i < pad_length; i++) {
438
log_write_low(&b, 1);
446
ut_a((ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
447
== LOG_BLOCK_HDR_SIZE);
449
#endif /* UNIV_LOG_ARCHIVE */
451
/**********************************************************
452
Calculates the data capacity of a log group, when the log file headers are not
456
log_group_get_capacity(
457
/*===================*/
458
/* out: capacity in bytes */
459
log_group_t* group) /* in: log group */
461
ut_ad(mutex_own(&(log_sys->mutex)));
463
return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
466
/**********************************************************
467
Calculates the offset within a log group, when the log file headers are not
471
log_group_calc_size_offset(
472
/*=======================*/
473
/* out: size offset (<= offset) */
474
ulint offset, /* in: real offset within the log group */
475
log_group_t* group) /* in: log group */
477
ut_ad(mutex_own(&(log_sys->mutex)));
479
return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
482
/**********************************************************
483
Calculates the offset within a log group, when the log file headers are
487
log_group_calc_real_offset(
488
/*=======================*/
489
/* out: real offset (>= offset) */
490
ulint offset, /* in: size offset within the log group */
491
log_group_t* group) /* in: log group */
493
ut_ad(mutex_own(&(log_sys->mutex)));
495
return(offset + LOG_FILE_HDR_SIZE
496
* (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
499
/**********************************************************
500
Calculates the offset of an lsn within a log group. */
503
log_group_calc_lsn_offset(
504
/*======================*/
505
/* out: offset within the log group */
506
dulint lsn, /* in: lsn, must be within 4 GB of
508
log_group_t* group) /* in: log group */
511
ib_longlong gr_lsn_size_offset;
512
ib_longlong difference;
513
ib_longlong group_size;
516
ut_ad(mutex_own(&(log_sys->mutex)));
518
/* If total log file size is > 2 GB we can easily get overflows
519
with 32-bit integers. Use 64-bit integers instead. */
523
gr_lsn_size_offset = (ib_longlong)
524
log_group_calc_size_offset(group->lsn_offset, group);
526
group_size = (ib_longlong) log_group_get_capacity(group);
528
if (ut_dulint_cmp(lsn, gr_lsn) >= 0) {
530
difference = (ib_longlong) ut_dulint_minus(lsn, gr_lsn);
532
difference = (ib_longlong) ut_dulint_minus(gr_lsn, lsn);
534
difference = difference % group_size;
536
difference = group_size - difference;
539
offset = (gr_lsn_size_offset + difference) % group_size;
541
ut_a(offset < (((ib_longlong) 1) << 32)); /* offset must be < 4 GB */
544
"Offset is %lu gr_lsn_offset is %lu difference is %lu\n",
545
(ulint)offset,(ulint)gr_lsn_size_offset, (ulint)difference);
548
return(log_group_calc_real_offset((ulint)offset, group));
551
/***********************************************************************
552
Calculates where in log files we find a specified lsn. */
555
log_calc_where_lsn_is(
556
/*==================*/
557
/* out: log file number */
558
ib_longlong* log_file_offset, /* out: offset in that file
559
(including the header) */
560
dulint first_header_lsn, /* in: first log file start
562
dulint lsn, /* in: lsn whose position to
564
ulint n_log_files, /* in: total number of log
566
ib_longlong log_file_size) /* in: log file size
567
(including the header) */
570
ib_longlong ib_first_header_lsn;
571
ib_longlong capacity = log_file_size - LOG_FILE_HDR_SIZE;
573
ib_longlong add_this_many;
575
ib_lsn = ut_conv_dulint_to_longlong(lsn);
576
ib_first_header_lsn = ut_conv_dulint_to_longlong(first_header_lsn);
578
if (ib_lsn < ib_first_header_lsn) {
579
add_this_many = 1 + (ib_first_header_lsn - ib_lsn)
580
/ (capacity * (ib_longlong)n_log_files);
581
ib_lsn += add_this_many
582
* capacity * (ib_longlong)n_log_files;
585
ut_a(ib_lsn >= ib_first_header_lsn);
587
file_no = ((ulint)((ib_lsn - ib_first_header_lsn) / capacity))
589
*log_file_offset = (ib_lsn - ib_first_header_lsn) % capacity;
591
*log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
596
/************************************************************
597
Sets the field values in group to correspond to a given lsn. For this function
598
to work, the values must already be correctly initialized to correspond to
599
some lsn, for instance, a checkpoint lsn. */
602
log_group_set_fields(
603
/*=================*/
604
log_group_t* group, /* in: group */
605
dulint lsn) /* in: lsn for which the values should be
608
group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
612
/*********************************************************************
613
Calculates the recommended highest values for lsn - last_checkpoint_lsn,
614
lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age. */
617
log_calc_max_ages(void)
618
/*===================*/
619
/* out: error value FALSE if the smallest log group is
620
too small to accommodate the number of OS threads in
621
the database server */
626
ibool success = TRUE;
627
ulint smallest_capacity;
628
ulint archive_margin;
629
ulint smallest_archive_margin;
631
ut_ad(!mutex_own(&(log_sys->mutex)));
633
mutex_enter(&(log_sys->mutex));
635
group = UT_LIST_GET_FIRST(log_sys->log_groups);
639
smallest_capacity = ULINT_MAX;
640
smallest_archive_margin = ULINT_MAX;
643
if (log_group_get_capacity(group) < smallest_capacity) {
645
smallest_capacity = log_group_get_capacity(group);
648
archive_margin = log_group_get_capacity(group)
649
- (group->file_size - LOG_FILE_HDR_SIZE)
650
- LOG_ARCHIVE_EXTRA_MARGIN;
652
if (archive_margin < smallest_archive_margin) {
654
smallest_archive_margin = archive_margin;
657
group = UT_LIST_GET_NEXT(log_groups, group);
660
/* Add extra safety */
661
smallest_capacity = smallest_capacity - smallest_capacity / 10;
663
/* For each OS thread we must reserve so much free space in the
664
smallest log group that it can accommodate the log entries produced
665
by single query steps: running out of free log space is a serious
666
system error which requires rebooting the database. */
668
free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
669
+ LOG_CHECKPOINT_EXTRA_FREE;
670
if (free >= smallest_capacity / 2) {
675
margin = smallest_capacity - free;
678
margin = ut_min(margin, log_sys->adm_checkpoint_interval);
680
margin = margin - margin / 10; /* Add still some extra safety */
682
log_sys->log_group_capacity = smallest_capacity;
684
log_sys->max_modified_age_async = margin
685
- margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
686
log_sys->max_modified_age_sync = margin
687
- margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
689
log_sys->max_checkpoint_age_async = margin - margin
690
/ LOG_POOL_CHECKPOINT_RATIO_ASYNC;
691
log_sys->max_checkpoint_age = margin;
693
#ifdef UNIV_LOG_ARCHIVE
694
log_sys->max_archived_lsn_age = smallest_archive_margin;
696
log_sys->max_archived_lsn_age_async = smallest_archive_margin
697
- smallest_archive_margin / LOG_ARCHIVE_RATIO_ASYNC;
698
#endif /* UNIV_LOG_ARCHIVE */
700
mutex_exit(&(log_sys->mutex));
704
"InnoDB: Error: ib_logfiles are too small"
705
" for innodb_thread_concurrency %lu.\n"
706
"InnoDB: The combined size of ib_logfiles"
707
" should be bigger than\n"
708
"InnoDB: 200 kB * innodb_thread_concurrency.\n"
709
"InnoDB: To get mysqld to start up, set"
710
" innodb_thread_concurrency in my.cnf\n"
711
"InnoDB: to a lower value, for example, to 8."
712
" After an ERROR-FREE shutdown\n"
713
"InnoDB: of mysqld you can adjust the size of"
714
" ib_logfiles, as explained in\n"
715
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
716
"adding-and-removing.html\n"
717
"InnoDB: Cannot continue operation."
718
" Calling exit(1).\n",
719
(ulong)srv_thread_concurrency);
727
/**********************************************************
728
Initializes the log. */
736
log_sys = mem_alloc(sizeof(log_t));
738
mutex_create(&log_sys->mutex, SYNC_LOG);
740
mutex_enter(&(log_sys->mutex));
742
/* Start the lsn from one log block from zero: this way every
743
log record has a start lsn != zero, a fact which we will use */
745
log_sys->lsn = LOG_START_LSN;
747
ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
748
ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
750
buf = ut_malloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE);
751
log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE);
753
log_sys->buf_size = LOG_BUFFER_SIZE;
755
memset(log_sys->buf, '\0', LOG_BUFFER_SIZE);
757
log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
758
- LOG_BUF_FLUSH_MARGIN;
759
log_sys->check_flush_or_checkpoint = TRUE;
760
UT_LIST_INIT(log_sys->log_groups);
762
log_sys->n_log_ios = 0;
764
log_sys->n_log_ios_old = log_sys->n_log_ios;
765
log_sys->last_printout_time = time(NULL);
766
/*----------------------------*/
768
log_sys->buf_next_to_write = 0;
770
log_sys->write_lsn = ut_dulint_zero;
771
log_sys->current_flush_lsn = ut_dulint_zero;
772
log_sys->flushed_to_disk_lsn = ut_dulint_zero;
774
log_sys->written_to_some_lsn = log_sys->lsn;
775
log_sys->written_to_all_lsn = log_sys->lsn;
777
log_sys->n_pending_writes = 0;
779
log_sys->no_flush_event = os_event_create(NULL);
781
os_event_set(log_sys->no_flush_event);
783
log_sys->one_flushed_event = os_event_create(NULL);
785
os_event_set(log_sys->one_flushed_event);
787
/*----------------------------*/
788
log_sys->adm_checkpoint_interval = ULINT_MAX;
790
log_sys->next_checkpoint_no = ut_dulint_zero;
791
log_sys->last_checkpoint_lsn = log_sys->lsn;
792
log_sys->n_pending_checkpoint_writes = 0;
794
rw_lock_create(&log_sys->checkpoint_lock, SYNC_NO_ORDER_CHECK);
796
log_sys->checkpoint_buf
797
= ut_align(mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE),
798
OS_FILE_LOG_BLOCK_SIZE);
799
memset(log_sys->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
800
/*----------------------------*/
802
#ifdef UNIV_LOG_ARCHIVE
803
/* Under MySQL, log archiving is always off */
804
log_sys->archiving_state = LOG_ARCH_OFF;
805
log_sys->archived_lsn = log_sys->lsn;
806
log_sys->next_archived_lsn = ut_dulint_zero;
808
log_sys->n_pending_archive_ios = 0;
810
rw_lock_create(&log_sys->archive_lock, SYNC_NO_ORDER_CHECK);
812
log_sys->archive_buf = NULL;
815
ut_malloc(LOG_ARCHIVE_BUF_SIZE
816
+ OS_FILE_LOG_BLOCK_SIZE),
817
OS_FILE_LOG_BLOCK_SIZE); */
818
log_sys->archive_buf_size = 0;
820
/* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */
822
log_sys->archiving_on = os_event_create(NULL);
823
#endif /* UNIV_LOG_ARCHIVE */
825
/*----------------------------*/
827
log_block_init(log_sys->buf, log_sys->lsn);
828
log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
830
log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
831
log_sys->lsn = ut_dulint_add(LOG_START_LSN, LOG_BLOCK_HDR_SIZE);
833
mutex_exit(&(log_sys->mutex));
835
#ifdef UNIV_LOG_DEBUG
837
recv_sys_init(FALSE, buf_pool_get_curr_size());
839
recv_sys->parse_start_lsn = log_sys->lsn;
840
recv_sys->scanned_lsn = log_sys->lsn;
841
recv_sys->scanned_checkpoint_no = 0;
842
recv_sys->recovered_lsn = log_sys->lsn;
843
recv_sys->limit_lsn = ut_dulint_max;
847
/**********************************************************************
848
Inits a log group to the log system. */
853
ulint id, /* in: group id */
854
ulint n_files, /* in: number of log files */
855
ulint file_size, /* in: log file size in bytes */
856
ulint space_id, /* in: space id of the file space
857
which contains the log files of this
859
ulint archive_space_id __attribute__((unused)))
860
/* in: space id of the file space
861
which contains some archived log
862
files for this group; currently, only
863
for the first log group this is
870
group = mem_alloc(sizeof(log_group_t));
873
group->n_files = n_files;
874
group->file_size = file_size;
875
group->space_id = space_id;
876
group->state = LOG_GROUP_OK;
877
group->lsn = LOG_START_LSN;
878
group->lsn_offset = LOG_FILE_HDR_SIZE;
879
group->n_pending_writes = 0;
881
group->file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
882
#ifdef UNIV_LOG_ARCHIVE
883
group->archive_file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
884
#endif /* UNIV_LOG_ARCHIVE */
886
for (i = 0; i < n_files; i++) {
887
*(group->file_header_bufs + i) = ut_align(
888
mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
889
OS_FILE_LOG_BLOCK_SIZE);
891
memset(*(group->file_header_bufs + i), '\0',
894
#ifdef UNIV_LOG_ARCHIVE
895
*(group->archive_file_header_bufs + i) = ut_align(
896
mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
897
OS_FILE_LOG_BLOCK_SIZE);
898
memset(*(group->archive_file_header_bufs + i), '\0',
900
#endif /* UNIV_LOG_ARCHIVE */
903
#ifdef UNIV_LOG_ARCHIVE
904
group->archive_space_id = archive_space_id;
906
group->archived_file_no = 0;
907
group->archived_offset = 0;
908
#endif /* UNIV_LOG_ARCHIVE */
910
group->checkpoint_buf = ut_align(
911
mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE), OS_FILE_LOG_BLOCK_SIZE);
913
memset(group->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
915
UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
917
ut_a(log_calc_max_ages());
920
/**********************************************************************
921
Does the unlockings needed in flush i/o completion. */
924
log_flush_do_unlocks(
925
/*=================*/
926
ulint code) /* in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
927
and LOG_UNLOCK_NONE_FLUSHED_LOCK */
929
ut_ad(mutex_own(&(log_sys->mutex)));
931
/* NOTE that we must own the log mutex when doing the setting of the
932
events: this is because transactions will wait for these events to
933
be set, and at that moment the log flush they were waiting for must
934
have ended. If the log mutex were not reserved here, the i/o-thread
935
calling this function might be preempted for a while, and when it
936
resumed execution, it might be that a new flush had been started, and
937
this function would erroneously signal the NEW flush as completed.
938
Thus, the changes in the state of these events are performed
939
atomically in conjunction with the changes in the state of
940
log_sys->n_pending_writes etc. */
942
if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) {
943
os_event_set(log_sys->one_flushed_event);
946
if (code & LOG_UNLOCK_FLUSH_LOCK) {
947
os_event_set(log_sys->no_flush_event);
951
/**********************************************************************
952
Checks if a flush is completed for a log group and does the completion
956
log_group_check_flush_completion(
957
/*=============================*/
958
/* out: LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
959
log_group_t* group) /* in: log group */
961
ut_ad(mutex_own(&(log_sys->mutex)));
963
if (!log_sys->one_flushed && group->n_pending_writes == 0) {
965
if (log_debug_writes) {
967
"Log flushed first to group %lu\n",
970
#endif /* UNIV_DEBUG */
971
log_sys->written_to_some_lsn = log_sys->write_lsn;
972
log_sys->one_flushed = TRUE;
974
return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
978
if (log_debug_writes && (group->n_pending_writes == 0)) {
980
fprintf(stderr, "Log flushed to group %lu\n",
983
#endif /* UNIV_DEBUG */
987
/**********************************************************
988
Checks if a flush is completed and does the completion routine if yes. */
991
log_sys_check_flush_completion(void)
992
/*================================*/
993
/* out: LOG_UNLOCK_FLUSH_LOCK or 0 */
998
ut_ad(mutex_own(&(log_sys->mutex)));
1000
if (log_sys->n_pending_writes == 0) {
1002
log_sys->written_to_all_lsn = log_sys->write_lsn;
1003
log_sys->buf_next_to_write = log_sys->write_end_offset;
1005
if (log_sys->write_end_offset > log_sys->max_buf_free / 2) {
1006
/* Move the log buffer content to the start of the
1009
move_start = ut_calc_align_down(
1010
log_sys->write_end_offset,
1011
OS_FILE_LOG_BLOCK_SIZE);
1012
move_end = ut_calc_align(log_sys->buf_free,
1013
OS_FILE_LOG_BLOCK_SIZE);
1015
ut_memmove(log_sys->buf, log_sys->buf + move_start,
1016
move_end - move_start);
1017
log_sys->buf_free -= move_start;
1019
log_sys->buf_next_to_write -= move_start;
1022
return(LOG_UNLOCK_FLUSH_LOCK);
1028
/**********************************************************
1029
Completes an i/o to a log file. */
1034
log_group_t* group) /* in: log group or a dummy pointer */
1038
#ifdef UNIV_LOG_ARCHIVE
1039
if ((byte*)group == &log_archive_io) {
1040
/* It was an archive write */
1042
log_io_complete_archive();
1046
#endif /* UNIV_LOG_ARCHIVE */
1048
if ((ulint)group & 0x1UL) {
1049
/* It was a checkpoint write */
1050
group = (log_group_t*)((ulint)group - 1);
1052
if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
1053
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
1055
fil_flush(group->space_id);
1059
if (log_debug_writes) {
1061
"Checkpoint info written to group %lu\n",
1064
#endif /* UNIV_DEBUG */
1065
log_io_complete_checkpoint();
1070
ut_error; /* We currently use synchronous writing of the
1071
logs and cannot end up here! */
1073
if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
1074
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC
1075
&& srv_flush_log_at_trx_commit != 2) {
1077
fil_flush(group->space_id);
1080
mutex_enter(&(log_sys->mutex));
1082
ut_a(group->n_pending_writes > 0);
1083
ut_a(log_sys->n_pending_writes > 0);
1085
group->n_pending_writes--;
1086
log_sys->n_pending_writes--;
1088
unlock = log_group_check_flush_completion(group);
1089
unlock = unlock | log_sys_check_flush_completion();
1091
log_flush_do_unlocks(unlock);
1093
mutex_exit(&(log_sys->mutex));
1096
/**********************************************************
1097
Writes a log file header to a log file space. */
1100
log_group_file_header_flush(
1101
/*========================*/
1102
log_group_t* group, /* in: log group */
1103
ulint nth_file, /* in: header to the nth file in the
1105
dulint start_lsn) /* in: log file data starts at this
1111
ut_ad(mutex_own(&(log_sys->mutex)));
1112
ut_a(nth_file < group->n_files);
1114
buf = *(group->file_header_bufs + nth_file);
1116
mach_write_to_4(buf + LOG_GROUP_ID, group->id);
1117
mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
1119
/* Wipe over possible label of ibbackup --restore */
1120
memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4);
1122
dest_offset = nth_file * group->file_size;
1125
if (log_debug_writes) {
1127
"Writing log file header to group %lu file %lu\n",
1128
(ulong) group->id, (ulong) nth_file);
1130
#endif /* UNIV_DEBUG */
1132
log_sys->n_log_ios++;
1134
srv_os_log_pending_writes++;
1136
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id,
1137
dest_offset / UNIV_PAGE_SIZE,
1138
dest_offset % UNIV_PAGE_SIZE,
1139
OS_FILE_LOG_BLOCK_SIZE,
1142
srv_os_log_pending_writes--;
1146
/**********************************************************
1147
Stores a 4-byte checksum to the trailer checksum field of a log block
1148
before writing it to a log file. This checksum is used in recovery to
1149
check the consistency of a log block. */
1152
log_block_store_checksum(
1153
/*=====================*/
1154
byte* block) /* in/out: pointer to a log block */
1156
log_block_set_checksum(block, log_block_calc_checksum(block));
1159
/**********************************************************
1160
Writes a buffer to a log file group. */
1163
log_group_write_buf(
1164
/*================*/
1165
log_group_t* group, /* in: log group */
1166
byte* buf, /* in: buffer */
1167
ulint len, /* in: buffer len; must be divisible
1168
by OS_FILE_LOG_BLOCK_SIZE */
1169
dulint start_lsn, /* in: start lsn of the buffer; must
1171
OS_FILE_LOG_BLOCK_SIZE */
1172
ulint new_data_offset)/* in: start offset of new data in
1173
buf: this parameter is used to decide
1174
if we have to write a new log file
1182
ut_ad(mutex_own(&(log_sys->mutex)));
1183
ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
1184
ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
1186
if (new_data_offset == 0) {
1187
write_header = TRUE;
1189
write_header = FALSE;
1197
next_offset = log_group_calc_lsn_offset(start_lsn, group);
1199
if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE)
1201
/* We start to write a new log file instance in the group */
1203
log_group_file_header_flush(group,
1204
next_offset / group->file_size,
1206
srv_os_log_written+= OS_FILE_LOG_BLOCK_SIZE;
1210
if ((next_offset % group->file_size) + len > group->file_size) {
1212
write_len = group->file_size
1213
- (next_offset % group->file_size);
1219
if (log_debug_writes) {
1222
"Writing log file segment to group %lu"
1223
" offset %lu len %lu\n"
1224
"start lsn %lu %lu\n"
1225
"First block n:o %lu last block n:o %lu\n",
1226
(ulong) group->id, (ulong) next_offset,
1228
(ulong) ut_dulint_get_high(start_lsn),
1229
(ulong) ut_dulint_get_low(start_lsn),
1230
(ulong) log_block_get_hdr_no(buf),
1231
(ulong) log_block_get_hdr_no(
1232
buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
1233
ut_a(log_block_get_hdr_no(buf)
1234
== log_block_convert_lsn_to_no(start_lsn));
1236
for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
1238
ut_a(log_block_get_hdr_no(buf) + i
1239
== log_block_get_hdr_no(
1240
buf + i * OS_FILE_LOG_BLOCK_SIZE));
1243
#endif /* UNIV_DEBUG */
1244
/* Calculate the checksums for each log block and write them to
1245
the trailer fields of the log blocks */
1247
for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
1248
log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
1252
log_sys->n_log_ios++;
1254
srv_os_log_pending_writes++;
1256
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id,
1257
next_offset / UNIV_PAGE_SIZE,
1258
next_offset % UNIV_PAGE_SIZE, write_len, buf, group);
1260
srv_os_log_pending_writes--;
1262
srv_os_log_written+= write_len;
1266
if (write_len < len) {
1267
start_lsn = ut_dulint_add(start_lsn, write_len);
1271
write_header = TRUE;
1277
/**********************************************************
1278
This function is called, e.g., when a transaction wants to commit. It checks
1279
that the log has been written to the log file up to the last log entry written
1280
by the transaction. If there is a flush running, it waits and checks if the
1281
flush flushed enough. If not, starts a new flush. */
1286
dulint lsn, /* in: log sequence number up to which the log should
1287
be written, ut_dulint_max if not specified */
1288
ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
1289
or LOG_WAIT_ALL_GROUPS */
1290
ibool flush_to_disk)
1291
/* in: TRUE if we want the written log also to be
1300
ulint loop_count = 0;
1301
#endif /* UNIV_DEBUG */
1304
if (recv_no_ibuf_operations) {
1305
/* Recovery is running and no operations on the log files are
1306
allowed yet (the variable name .._no_ibuf_.. is misleading) */
1315
ut_ad(loop_count < 5);
1318
if (loop_count > 2) {
1319
fprintf(stderr, "Log loop count %lu\n", loop_count);
1324
mutex_enter(&(log_sys->mutex));
1327
&& ut_dulint_cmp(log_sys->flushed_to_disk_lsn, lsn) >= 0) {
1329
mutex_exit(&(log_sys->mutex));
1335
&& (ut_dulint_cmp(log_sys->written_to_all_lsn, lsn) >= 0
1336
|| (ut_dulint_cmp(log_sys->written_to_some_lsn, lsn)
1338
&& wait != LOG_WAIT_ALL_GROUPS))) {
1340
mutex_exit(&(log_sys->mutex));
1345
if (log_sys->n_pending_writes > 0) {
1346
/* A write (+ possibly flush to disk) is running */
1349
&& ut_dulint_cmp(log_sys->current_flush_lsn, lsn)
1351
/* The write + flush will write enough: wait for it to
1358
&& ut_dulint_cmp(log_sys->write_lsn, lsn) >= 0) {
1359
/* The write will write enough: wait for it to
1365
mutex_exit(&(log_sys->mutex));
1367
/* Wait for the write to complete and try to start a new
1370
os_event_wait(log_sys->no_flush_event);
1376
&& log_sys->buf_free == log_sys->buf_next_to_write) {
1377
/* Nothing to write and no flush to disk requested */
1379
mutex_exit(&(log_sys->mutex));
1385
if (log_debug_writes) {
1387
"Writing log from %lu %lu up to lsn %lu %lu\n",
1388
(ulong) ut_dulint_get_high(
1389
log_sys->written_to_all_lsn),
1390
(ulong) ut_dulint_get_low(
1391
log_sys->written_to_all_lsn),
1392
(ulong) ut_dulint_get_high(log_sys->lsn),
1393
(ulong) ut_dulint_get_low(log_sys->lsn));
1395
#endif /* UNIV_DEBUG */
1396
log_sys->n_pending_writes++;
1398
group = UT_LIST_GET_FIRST(log_sys->log_groups);
1399
group->n_pending_writes++; /* We assume here that we have only
1402
os_event_reset(log_sys->no_flush_event);
1403
os_event_reset(log_sys->one_flushed_event);
1405
start_offset = log_sys->buf_next_to_write;
1406
end_offset = log_sys->buf_free;
1408
area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
1409
area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
1411
ut_ad(area_end - area_start > 0);
1413
log_sys->write_lsn = log_sys->lsn;
1415
if (flush_to_disk) {
1416
log_sys->current_flush_lsn = log_sys->lsn;
1419
log_sys->one_flushed = FALSE;
1421
log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
1422
log_block_set_checkpoint_no(
1423
log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
1424
log_sys->next_checkpoint_no);
1426
/* Copy the last, incompletely written, log block a log block length
1427
up, so that when the flush operation writes from the log buffer, the
1428
segment to write will not be changed by writers to the log */
1430
ut_memcpy(log_sys->buf + area_end,
1431
log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
1432
OS_FILE_LOG_BLOCK_SIZE);
1434
log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
1435
log_sys->write_end_offset = log_sys->buf_free;
1437
group = UT_LIST_GET_FIRST(log_sys->log_groups);
1439
/* Do the write to the log files */
1442
log_group_write_buf(
1443
group, log_sys->buf + area_start,
1444
area_end - area_start,
1445
ut_dulint_align_down(log_sys->written_to_all_lsn,
1446
OS_FILE_LOG_BLOCK_SIZE),
1447
start_offset - area_start);
1449
log_group_set_fields(group, log_sys->write_lsn);
1451
group = UT_LIST_GET_NEXT(log_groups, group);
1454
mutex_exit(&(log_sys->mutex));
1456
if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
1457
/* O_DSYNC means the OS did not buffer the log file at all:
1458
so we have also flushed to disk what we have written */
1460
log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
1462
} else if (flush_to_disk) {
1464
group = UT_LIST_GET_FIRST(log_sys->log_groups);
1466
fil_flush(group->space_id);
1467
log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
1470
mutex_enter(&(log_sys->mutex));
1472
group = UT_LIST_GET_FIRST(log_sys->log_groups);
1474
ut_a(group->n_pending_writes == 1);
1475
ut_a(log_sys->n_pending_writes == 1);
1477
group->n_pending_writes--;
1478
log_sys->n_pending_writes--;
1480
unlock = log_group_check_flush_completion(group);
1481
unlock = unlock | log_sys_check_flush_completion();
1483
log_flush_do_unlocks(unlock);
1485
mutex_exit(&(log_sys->mutex));
1490
mutex_exit(&(log_sys->mutex));
1492
if (wait == LOG_WAIT_ONE_GROUP) {
1493
os_event_wait(log_sys->one_flushed_event);
1494
} else if (wait == LOG_WAIT_ALL_GROUPS) {
1495
os_event_wait(log_sys->no_flush_event);
1497
ut_ad(wait == LOG_NO_WAIT);
1501
/********************************************************************
1502
Does a syncronous flush of the log buffer to disk. */
1505
log_buffer_flush_to_disk(void)
1506
/*==========================*/
1510
mutex_enter(&(log_sys->mutex));
1514
mutex_exit(&(log_sys->mutex));
1516
log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
1519
/********************************************************************
1520
Tries to establish a big enough margin of free space in the log buffer, such
1521
that a new log entry can be catenated without an immediate need for a flush. */
1524
log_flush_margin(void)
1525
/*==================*/
1527
ibool do_flush = FALSE;
1528
log_t* log = log_sys;
1531
mutex_enter(&(log->mutex));
1533
if (log->buf_free > log->max_buf_free) {
1535
if (log->n_pending_writes > 0) {
1536
/* A flush is running: hope that it will provide enough
1544
mutex_exit(&(log->mutex));
1547
log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
1551
/********************************************************************
1552
Advances the smallest lsn for which there are unflushed dirty blocks in the
1553
buffer pool. NOTE: this function may only be called if the calling thread owns
1554
no synchronization objects! */
1557
log_preflush_pool_modified_pages(
1558
/*=============================*/
1559
/* out: FALSE if there was a flush batch of
1560
the same type running, which means that we
1561
could not start this flush batch */
1562
dulint new_oldest, /* in: try to advance oldest_modified_lsn
1563
at least to this lsn */
1564
ibool sync) /* in: TRUE if synchronous operation is
1569
if (recv_recovery_on) {
1570
/* If the recovery is running, we must first apply all
1571
log records to their respective file pages to get the
1572
right modify lsn values to these pages: otherwise, there
1573
might be pages on disk which are not yet recovered to the
1574
current lsn, and even after calling this function, we could
1575
not know how up-to-date the disk version of the database is,
1576
and we could not make a new checkpoint on the basis of the
1577
info on the buffer pool only. */
1579
recv_apply_hashed_log_recs(TRUE);
1582
n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX, new_oldest);
1585
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
1588
if (n_pages == ULINT_UNDEFINED) {
1596
/**********************************************************
1597
Completes a checkpoint. */
1600
log_complete_checkpoint(void)
1601
/*=========================*/
1603
ut_ad(mutex_own(&(log_sys->mutex)));
1604
ut_ad(log_sys->n_pending_checkpoint_writes == 0);
1606
log_sys->next_checkpoint_no
1607
= ut_dulint_add(log_sys->next_checkpoint_no, 1);
1609
log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
1611
rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
1614
/**********************************************************
1615
Completes an asynchronous checkpoint info write i/o to a log file. */
1618
log_io_complete_checkpoint(void)
1619
/*============================*/
1621
mutex_enter(&(log_sys->mutex));
1623
ut_ad(log_sys->n_pending_checkpoint_writes > 0);
1625
log_sys->n_pending_checkpoint_writes--;
1627
if (log_sys->n_pending_checkpoint_writes == 0) {
1628
log_complete_checkpoint();
1631
mutex_exit(&(log_sys->mutex));
1634
/***********************************************************************
1635
Writes info to a checkpoint about a log group. */
1638
log_checkpoint_set_nth_group_info(
1639
/*==============================*/
1640
byte* buf, /* in: buffer for checkpoint info */
1641
ulint n, /* in: nth slot */
1642
ulint file_no,/* in: archived file number */
1643
ulint offset) /* in: archived file offset */
1645
ut_ad(n < LOG_MAX_N_GROUPS);
1647
mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
1648
+ 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no);
1649
mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
1650
+ 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset);
1653
/***********************************************************************
1654
Gets info from a checkpoint about a log group. */
1657
log_checkpoint_get_nth_group_info(
1658
/*==============================*/
1659
byte* buf, /* in: buffer containing checkpoint info */
1660
ulint n, /* in: nth slot */
1661
ulint* file_no,/* out: archived file number */
1662
ulint* offset) /* out: archived file offset */
1664
ut_ad(n < LOG_MAX_N_GROUPS);
1666
*file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
1667
+ 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO);
1668
*offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
1669
+ 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET);
1672
/**********************************************************
1673
Writes the checkpoint info to a log group header. */
1676
log_group_checkpoint(
1677
/*=================*/
1678
log_group_t* group) /* in: log group */
1680
log_group_t* group2;
1681
#ifdef UNIV_LOG_ARCHIVE
1682
dulint archived_lsn;
1683
dulint next_archived_lsn;
1684
#endif /* UNIV_LOG_ARCHIVE */
1690
ut_ad(mutex_own(&(log_sys->mutex)));
1691
#if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
1692
# error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE"
1695
buf = group->checkpoint_buf;
1697
mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
1698
mach_write_to_8(buf + LOG_CHECKPOINT_LSN,
1699
log_sys->next_checkpoint_lsn);
1701
mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
1702
log_group_calc_lsn_offset(
1703
log_sys->next_checkpoint_lsn, group));
1705
mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
1707
#ifdef UNIV_LOG_ARCHIVE
1708
if (log_sys->archiving_state == LOG_ARCH_OFF) {
1709
archived_lsn = ut_dulint_max;
1711
archived_lsn = log_sys->archived_lsn;
1713
if (0 != ut_dulint_cmp(archived_lsn,
1714
log_sys->next_archived_lsn)) {
1715
next_archived_lsn = log_sys->next_archived_lsn;
1716
/* For debugging only */
1720
mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
1721
#else /* UNIV_LOG_ARCHIVE */
1722
mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, ut_dulint_max);
1723
#endif /* UNIV_LOG_ARCHIVE */
1725
for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
1726
log_checkpoint_set_nth_group_info(buf, i, 0, 0);
1729
group2 = UT_LIST_GET_FIRST(log_sys->log_groups);
1732
log_checkpoint_set_nth_group_info(buf, group2->id,
1733
#ifdef UNIV_LOG_ARCHIVE
1734
group2->archived_file_no,
1735
group2->archived_offset
1736
#else /* UNIV_LOG_ARCHIVE */
1738
#endif /* UNIV_LOG_ARCHIVE */
1741
group2 = UT_LIST_GET_NEXT(log_groups, group2);
1744
fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
1745
mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
1747
fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
1748
LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
1749
mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
1751
/* Starting from InnoDB-3.23.50, we also write info on allocated
1752
size in the tablespace */
1754
mach_write_to_4(buf + LOG_CHECKPOINT_FSP_FREE_LIMIT,
1755
log_fsp_current_free_limit);
1757
mach_write_to_4(buf + LOG_CHECKPOINT_FSP_MAGIC_N,
1758
LOG_CHECKPOINT_FSP_MAGIC_N_VAL);
1760
/* We alternate the physical place of the checkpoint info in the first
1763
if (ut_dulint_get_low(log_sys->next_checkpoint_no) % 2 == 0) {
1764
write_offset = LOG_CHECKPOINT_1;
1766
write_offset = LOG_CHECKPOINT_2;
1770
if (log_sys->n_pending_checkpoint_writes == 0) {
1772
rw_lock_x_lock_gen(&(log_sys->checkpoint_lock),
1776
log_sys->n_pending_checkpoint_writes++;
1778
log_sys->n_log_ios++;
1780
/* We send as the last parameter the group machine address
1781
added with 1, as we want to distinguish between a normal log
1782
file write and a checkpoint field write */
1784
fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id,
1785
write_offset / UNIV_PAGE_SIZE,
1786
write_offset % UNIV_PAGE_SIZE,
1787
OS_FILE_LOG_BLOCK_SIZE,
1788
buf, ((byte*)group + 1));
1790
ut_ad(((ulint)group & 0x1UL) == 0);
1794
/**********************************************************
1795
Writes info to a buffer of a log group when log files are created in
1796
backup restoration. */
1799
log_reset_first_header_and_checkpoint(
1800
/*==================================*/
1801
byte* hdr_buf,/* in: buffer which will be written to the start
1802
of the first log file */
1803
dulint start) /* in: lsn of the start of the first log file;
1804
we pretend that there is a checkpoint at
1805
start + LOG_BLOCK_HDR_SIZE */
1811
mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
1812
mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start);
1814
lsn = ut_dulint_add(start, LOG_BLOCK_HDR_SIZE);
1816
/* Write the label of ibbackup --restore */
1817
strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
1819
ut_sprintf_timestamp((char*) hdr_buf
1820
+ (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
1821
+ (sizeof "ibbackup ") - 1));
1822
buf = hdr_buf + LOG_CHECKPOINT_1;
1824
mach_write_to_8(buf + LOG_CHECKPOINT_NO, ut_dulint_zero);
1825
mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
1827
mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
1828
LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
1830
mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
1832
mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, ut_dulint_max);
1834
fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
1835
mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
1837
fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
1838
LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
1839
mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
1841
/* Starting from InnoDB-3.23.50, we should also write info on
1842
allocated size in the tablespace, but unfortunately we do not
1846
/**********************************************************
1847
Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
1850
log_group_read_checkpoint_info(
1851
/*===========================*/
1852
log_group_t* group, /* in: log group */
1853
ulint field) /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
1855
ut_ad(mutex_own(&(log_sys->mutex)));
1857
log_sys->n_log_ios++;
1859
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id,
1860
field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
1861
OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
1864
/**********************************************************
1865
Writes checkpoint info to groups. */
1868
log_groups_write_checkpoint_info(void)
1869
/*==================================*/
1873
ut_ad(mutex_own(&(log_sys->mutex)));
1875
group = UT_LIST_GET_FIRST(log_sys->log_groups);
1878
log_group_checkpoint(group);
1880
group = UT_LIST_GET_NEXT(log_groups, group);
1884
/**********************************************************
1885
Makes a checkpoint. Note that this function does not flush dirty
1886
blocks from the buffer pool: it only checks what is lsn of the oldest
1887
modification in the pool, and writes information about the lsn in
1888
log files. Use log_make_checkpoint_at to flush also the pool. */
1893
/* out: TRUE if success, FALSE if a checkpoint
1894
write was already running */
1895
ibool sync, /* in: TRUE if synchronous operation is
1897
ibool write_always) /* in: the function normally checks if the
1898
the new checkpoint would have a greater
1899
lsn than the previous one: if not, then no
1900
physical write is done; by setting this
1901
parameter TRUE, a physical write will always be
1902
made to log files */
1906
if (recv_recovery_is_on()) {
1907
recv_apply_hashed_log_recs(TRUE);
1910
if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
1911
fil_flush_file_spaces(FIL_TABLESPACE);
1914
mutex_enter(&(log_sys->mutex));
1916
oldest_lsn = log_buf_pool_get_oldest_modification();
1918
mutex_exit(&(log_sys->mutex));
1920
/* Because log also contains headers and dummy log records,
1921
if the buffer pool contains no dirty buffers, oldest_lsn
1922
gets the value log_sys->lsn from the previous function,
1923
and we must make sure that the log is flushed up to that
1924
lsn. If there are dirty buffers in the buffer pool, then our
1925
write-ahead-logging algorithm ensures that the log has been flushed
1926
up to oldest_lsn. */
1928
log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
1930
mutex_enter(&(log_sys->mutex));
1933
&& ut_dulint_cmp(log_sys->last_checkpoint_lsn, oldest_lsn) >= 0) {
1935
mutex_exit(&(log_sys->mutex));
1940
ut_ad(ut_dulint_cmp(log_sys->written_to_all_lsn, oldest_lsn) >= 0);
1942
if (log_sys->n_pending_checkpoint_writes > 0) {
1943
/* A checkpoint write is running */
1945
mutex_exit(&(log_sys->mutex));
1948
/* Wait for the checkpoint write to complete */
1949
rw_lock_s_lock(&(log_sys->checkpoint_lock));
1950
rw_lock_s_unlock(&(log_sys->checkpoint_lock));
1956
log_sys->next_checkpoint_lsn = oldest_lsn;
1959
if (log_debug_writes) {
1960
fprintf(stderr, "Making checkpoint no %lu at lsn %lu %lu\n",
1961
(ulong) ut_dulint_get_low(log_sys->next_checkpoint_no),
1962
(ulong) ut_dulint_get_high(oldest_lsn),
1963
(ulong) ut_dulint_get_low(oldest_lsn));
1965
#endif /* UNIV_DEBUG */
1967
log_groups_write_checkpoint_info();
1969
mutex_exit(&(log_sys->mutex));
1972
/* Wait for the checkpoint write to complete */
1973
rw_lock_s_lock(&(log_sys->checkpoint_lock));
1974
rw_lock_s_unlock(&(log_sys->checkpoint_lock));
1980
/********************************************************************
1981
Makes a checkpoint at a given lsn or later. */
1984
log_make_checkpoint_at(
1985
/*===================*/
1986
dulint lsn, /* in: make a checkpoint at this or a later
1987
lsn, if ut_dulint_max, makes a checkpoint at
1989
ibool write_always) /* in: the function normally checks if the
1990
the new checkpoint would have a greater
1991
lsn than the previous one: if not, then no
1992
physical write is done; by setting this
1993
parameter TRUE, a physical write will always be
1994
made to log files */
1998
/* Preflush pages synchronously */
2003
success = log_preflush_pool_modified_pages(lsn, TRUE);
2009
success = log_checkpoint(TRUE, write_always);
2013
/********************************************************************
2014
Tries to establish a big enough margin of free space in the log groups, such
2015
that a new log entry can be catenated without an immediate need for a
2016
checkpoint. NOTE: this function may only be called if the calling thread
2017
owns no synchronization objects! */
2020
log_checkpoint_margin(void)
2021
/*=======================*/
2023
log_t* log = log_sys;
2025
ulint checkpoint_age;
2029
ibool checkpoint_sync;
2030
ibool do_checkpoint;
2034
checkpoint_sync = FALSE;
2035
do_checkpoint = FALSE;
2037
mutex_enter(&(log->mutex));
2039
if (log->check_flush_or_checkpoint == FALSE) {
2040
mutex_exit(&(log->mutex));
2045
oldest_lsn = log_buf_pool_get_oldest_modification();
2047
age = ut_dulint_minus(log->lsn, oldest_lsn);
2049
if (age > log->max_modified_age_sync) {
2051
/* A flush is urgent: we have to do a synchronous preflush */
2054
advance = 2 * (age - log->max_modified_age_sync);
2055
} else if (age > log->max_modified_age_async) {
2057
/* A flush is not urgent: we do an asynchronous preflush */
2058
advance = age - log->max_modified_age_async;
2063
checkpoint_age = ut_dulint_minus(log->lsn, log->last_checkpoint_lsn);
2065
if (checkpoint_age > log->max_checkpoint_age) {
2066
/* A checkpoint is urgent: we do it synchronously */
2068
checkpoint_sync = TRUE;
2070
do_checkpoint = TRUE;
2072
} else if (checkpoint_age > log->max_checkpoint_age_async) {
2073
/* A checkpoint is not urgent: do it asynchronously */
2075
do_checkpoint = TRUE;
2077
log->check_flush_or_checkpoint = FALSE;
2079
log->check_flush_or_checkpoint = FALSE;
2082
mutex_exit(&(log->mutex));
2085
dulint new_oldest = ut_dulint_add(oldest_lsn, advance);
2087
success = log_preflush_pool_modified_pages(new_oldest, sync);
2089
/* If the flush succeeded, this thread has done its part
2090
and can proceed. If it did not succeed, there was another
2091
thread doing a flush at the same time. If sync was FALSE,
2092
the flush was not urgent, and we let this thread proceed.
2093
Otherwise, we let it start from the beginning again. */
2095
if (sync && !success) {
2096
mutex_enter(&(log->mutex));
2098
log->check_flush_or_checkpoint = TRUE;
2100
mutex_exit(&(log->mutex));
2105
if (do_checkpoint) {
2106
log_checkpoint(checkpoint_sync, FALSE);
2108
if (checkpoint_sync) {
2115
/**********************************************************
2116
Reads a specified log segment to a buffer. */
2119
log_group_read_log_seg(
2120
/*===================*/
2121
ulint type, /* in: LOG_ARCHIVE or LOG_RECOVER */
2122
byte* buf, /* in: buffer where to read */
2123
log_group_t* group, /* in: log group */
2124
dulint start_lsn, /* in: read area start */
2125
dulint end_lsn) /* in: read area end */
2128
ulint source_offset;
2131
ut_ad(mutex_own(&(log_sys->mutex)));
2135
if (type == LOG_RECOVER) {
2139
source_offset = log_group_calc_lsn_offset(start_lsn, group);
2141
len = ut_dulint_minus(end_lsn, start_lsn);
2145
if ((source_offset % group->file_size) + len > group->file_size) {
2147
len = group->file_size - (source_offset % group->file_size);
2150
#ifdef UNIV_LOG_ARCHIVE
2151
if (type == LOG_ARCHIVE) {
2153
log_sys->n_pending_archive_ios++;
2155
#endif /* UNIV_LOG_ARCHIVE */
2157
log_sys->n_log_ios++;
2159
fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id,
2160
source_offset / UNIV_PAGE_SIZE, source_offset % UNIV_PAGE_SIZE,
2163
start_lsn = ut_dulint_add(start_lsn, len);
2166
if (ut_dulint_cmp(start_lsn, end_lsn) != 0) {
2172
#ifdef UNIV_LOG_ARCHIVE
2173
/**********************************************************
2174
Generates an archived log file name. */
2177
log_archived_file_name_gen(
2178
/*=======================*/
2179
char* buf, /* in: buffer where to write */
2180
ulint id __attribute__((unused)),
2182
currently we only archive the first group */
2183
ulint file_no)/* in: file number */
2185
sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no);
2188
/**********************************************************
2189
Writes a log file header to a log file space. */
2192
log_group_archive_file_header_write(
2193
/*================================*/
2194
log_group_t* group, /* in: log group */
2195
ulint nth_file, /* in: header to the nth file in the
2196
archive log file space */
2197
ulint file_no, /* in: archived file number */
2198
dulint start_lsn) /* in: log file data starts at this
2204
ut_ad(mutex_own(&(log_sys->mutex)));
2206
ut_a(nth_file < group->n_files);
2208
buf = *(group->archive_file_header_bufs + nth_file);
2210
mach_write_to_4(buf + LOG_GROUP_ID, group->id);
2211
mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
2212
mach_write_to_4(buf + LOG_FILE_NO, file_no);
2214
mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
2216
dest_offset = nth_file * group->file_size;
2218
log_sys->n_log_ios++;
2220
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
2221
dest_offset / UNIV_PAGE_SIZE,
2222
dest_offset % UNIV_PAGE_SIZE,
2223
2 * OS_FILE_LOG_BLOCK_SIZE,
2224
buf, &log_archive_io);
2227
/**********************************************************
2228
Writes a log file header to a completed archived log file. */
2231
log_group_archive_completed_header_write(
2232
/*=====================================*/
2233
log_group_t* group, /* in: log group */
2234
ulint nth_file, /* in: header to the nth file in the
2235
archive log file space */
2236
dulint end_lsn) /* in: end lsn of the file */
2241
ut_ad(mutex_own(&(log_sys->mutex)));
2242
ut_a(nth_file < group->n_files);
2244
buf = *(group->archive_file_header_bufs + nth_file);
2246
mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
2247
mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn);
2249
dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
2251
log_sys->n_log_ios++;
2253
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
2254
dest_offset / UNIV_PAGE_SIZE,
2255
dest_offset % UNIV_PAGE_SIZE,
2256
OS_FILE_LOG_BLOCK_SIZE,
2257
buf + LOG_FILE_ARCH_COMPLETED,
2261
/**********************************************************
2262
Does the archive writes for a single log group. */
2267
log_group_t* group) /* in: log group */
2269
os_file_t file_handle;
2280
ut_ad(mutex_own(&(log_sys->mutex)));
2282
start_lsn = log_sys->archived_lsn;
2284
ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
2286
end_lsn = log_sys->next_archived_lsn;
2288
ut_a(ut_dulint_get_low(end_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
2290
buf = log_sys->archive_buf;
2294
next_offset = group->archived_offset;
2296
if ((next_offset % group->file_size == 0)
2297
|| (fil_space_get_size(group->archive_space_id) == 0)) {
2299
/* Add the file to the archive file space; create or open the
2302
if (next_offset % group->file_size == 0) {
2303
open_mode = OS_FILE_CREATE;
2305
open_mode = OS_FILE_OPEN;
2308
log_archived_file_name_gen(name, group->id,
2309
group->archived_file_no + n_files);
2311
file_handle = os_file_create(name, open_mode, OS_FILE_AIO,
2312
OS_DATA_FILE, &ret);
2314
if (!ret && (open_mode == OS_FILE_CREATE)) {
2315
file_handle = os_file_create(
2316
name, OS_FILE_OPEN, OS_FILE_AIO,
2317
OS_DATA_FILE, &ret);
2322
"InnoDB: Cannot create or open"
2323
" archive log file %s.\n"
2324
"InnoDB: Cannot continue operation.\n"
2325
"InnoDB: Check that the log archive"
2326
" directory exists,\n"
2327
"InnoDB: you have access rights to it, and\n"
2328
"InnoDB: there is space available.\n", name);
2333
if (log_debug_writes) {
2334
fprintf(stderr, "Created archive file %s\n", name);
2336
#endif /* UNIV_DEBUG */
2338
ret = os_file_close(file_handle);
2342
/* Add the archive file as a node to the space */
2344
fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
2345
group->archive_space_id, FALSE);
2347
if (next_offset % group->file_size == 0) {
2348
log_group_archive_file_header_write(
2350
group->archived_file_no + n_files,
2353
next_offset += LOG_FILE_HDR_SIZE;
2357
len = ut_dulint_minus(end_lsn, start_lsn);
2359
if (group->file_size < (next_offset % group->file_size) + len) {
2361
len = group->file_size - (next_offset % group->file_size);
2365
if (log_debug_writes) {
2367
"Archiving starting at lsn %lu %lu, len %lu"
2369
(ulong) ut_dulint_get_high(start_lsn),
2370
(ulong) ut_dulint_get_low(start_lsn),
2371
(ulong) len, (ulong) group->id);
2373
#endif /* UNIV_DEBUG */
2375
log_sys->n_pending_archive_ios++;
2377
log_sys->n_log_ios++;
2379
fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->archive_space_id,
2380
next_offset / UNIV_PAGE_SIZE, next_offset % UNIV_PAGE_SIZE,
2381
ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
2384
start_lsn = ut_dulint_add(start_lsn, len);
2388
if (next_offset % group->file_size == 0) {
2392
if (ut_dulint_cmp(end_lsn, start_lsn) != 0) {
2397
group->next_archived_file_no = group->archived_file_no + n_files;
2398
group->next_archived_offset = next_offset % group->file_size;
2400
ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
2403
/*********************************************************
2404
(Writes to the archive of each log group.) Currently, only the first
2405
group is archived. */
2408
log_archive_groups(void)
2409
/*====================*/
2413
ut_ad(mutex_own(&(log_sys->mutex)));
2415
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2417
log_group_archive(group);
2420
/*********************************************************
2421
Completes the archiving write phase for (each log group), currently,
2422
the first log group. */
2425
log_archive_write_complete_groups(void)
2426
/*===================================*/
2436
ut_ad(mutex_own(&(log_sys->mutex)));
2438
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2440
group->archived_file_no = group->next_archived_file_no;
2441
group->archived_offset = group->next_archived_offset;
2443
/* Truncate from the archive file space all but the last
2444
file, or if it has been written full, all files */
2446
n_files = (UNIV_PAGE_SIZE
2447
* fil_space_get_size(group->archive_space_id))
2451
end_offset = group->archived_offset;
2453
if (end_offset % group->file_size == 0) {
2455
trunc_files = n_files;
2457
trunc_files = n_files - 1;
2461
if (log_debug_writes && trunc_files) {
2463
"Complete file(s) archived to group %lu\n",
2466
#endif /* UNIV_DEBUG */
2468
/* Calculate the archive file space start lsn */
2469
start_lsn = ut_dulint_subtract(
2470
log_sys->next_archived_lsn,
2471
end_offset - LOG_FILE_HDR_SIZE + trunc_files
2472
* (group->file_size - LOG_FILE_HDR_SIZE));
2473
end_lsn = start_lsn;
2475
for (i = 0; i < trunc_files; i++) {
2477
end_lsn = ut_dulint_add(end_lsn,
2478
group->file_size - LOG_FILE_HDR_SIZE);
2480
/* Write a notice to the headers of archived log
2481
files that the file write has been completed */
2483
log_group_archive_completed_header_write(group, i, end_lsn);
2486
fil_space_truncate_start(group->archive_space_id,
2487
trunc_files * group->file_size);
2490
if (log_debug_writes) {
2491
fputs("Archiving writes completed\n", stderr);
2493
#endif /* UNIV_DEBUG */
2496
/**********************************************************
2497
Completes an archiving i/o. */
2500
log_archive_check_completion_low(void)
2501
/*==================================*/
2503
ut_ad(mutex_own(&(log_sys->mutex)));
2505
if (log_sys->n_pending_archive_ios == 0
2506
&& log_sys->archiving_phase == LOG_ARCHIVE_READ) {
2509
if (log_debug_writes) {
2510
fputs("Archiving read completed\n", stderr);
2512
#endif /* UNIV_DEBUG */
2514
/* Archive buffer has now been read in: start archive writes */
2516
log_sys->archiving_phase = LOG_ARCHIVE_WRITE;
2518
log_archive_groups();
2521
if (log_sys->n_pending_archive_ios == 0
2522
&& log_sys->archiving_phase == LOG_ARCHIVE_WRITE) {
2524
log_archive_write_complete_groups();
2526
log_sys->archived_lsn = log_sys->next_archived_lsn;
2528
rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
2532
/**********************************************************
2533
Completes an archiving i/o. */
2536
log_io_complete_archive(void)
2537
/*=========================*/
2541
mutex_enter(&(log_sys->mutex));
2543
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2545
mutex_exit(&(log_sys->mutex));
2547
fil_flush(group->archive_space_id);
2549
mutex_enter(&(log_sys->mutex));
2551
ut_ad(log_sys->n_pending_archive_ios > 0);
2553
log_sys->n_pending_archive_ios--;
2555
log_archive_check_completion_low();
2557
mutex_exit(&(log_sys->mutex));
2560
/************************************************************************
2561
Starts an archiving operation. */
2566
/* out: TRUE if succeed, FALSE if an archiving
2567
operation was already running */
2568
ibool sync, /* in: TRUE if synchronous operation is desired */
2569
ulint* n_bytes)/* out: archive log buffer size, 0 if nothing to
2572
ibool calc_new_limit;
2576
calc_new_limit = TRUE;
2578
mutex_enter(&(log_sys->mutex));
2580
if (log_sys->archiving_state == LOG_ARCH_OFF) {
2581
mutex_exit(&(log_sys->mutex));
2587
} else if (log_sys->archiving_state == LOG_ARCH_STOPPED
2588
|| log_sys->archiving_state == LOG_ARCH_STOPPING2) {
2590
mutex_exit(&(log_sys->mutex));
2592
os_event_wait(log_sys->archiving_on);
2594
mutex_enter(&(log_sys->mutex));
2599
start_lsn = log_sys->archived_lsn;
2601
if (calc_new_limit) {
2602
ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
2603
limit_lsn = ut_dulint_add(start_lsn,
2604
log_sys->archive_buf_size);
2606
*n_bytes = log_sys->archive_buf_size;
2608
if (ut_dulint_cmp(limit_lsn, log_sys->lsn) >= 0) {
2610
limit_lsn = ut_dulint_align_down(
2611
log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE);
2615
if (ut_dulint_cmp(log_sys->archived_lsn, limit_lsn) >= 0) {
2617
mutex_exit(&(log_sys->mutex));
2624
if (ut_dulint_cmp(log_sys->written_to_all_lsn, limit_lsn) < 0) {
2626
mutex_exit(&(log_sys->mutex));
2628
log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
2630
calc_new_limit = FALSE;
2635
if (log_sys->n_pending_archive_ios > 0) {
2636
/* An archiving operation is running */
2638
mutex_exit(&(log_sys->mutex));
2641
rw_lock_s_lock(&(log_sys->archive_lock));
2642
rw_lock_s_unlock(&(log_sys->archive_lock));
2645
*n_bytes = log_sys->archive_buf_size;
2650
rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
2652
log_sys->archiving_phase = LOG_ARCHIVE_READ;
2654
log_sys->next_archived_lsn = limit_lsn;
2657
if (log_debug_writes) {
2659
"Archiving from lsn %lu %lu to lsn %lu %lu\n",
2660
(ulong) ut_dulint_get_high(log_sys->archived_lsn),
2661
(ulong) ut_dulint_get_low(log_sys->archived_lsn),
2662
(ulong) ut_dulint_get_high(limit_lsn),
2663
(ulong) ut_dulint_get_low(limit_lsn));
2665
#endif /* UNIV_DEBUG */
2667
/* Read the log segment to the archive buffer */
2669
log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf,
2670
UT_LIST_GET_FIRST(log_sys->log_groups),
2671
start_lsn, limit_lsn);
2673
mutex_exit(&(log_sys->mutex));
2676
rw_lock_s_lock(&(log_sys->archive_lock));
2677
rw_lock_s_unlock(&(log_sys->archive_lock));
2680
*n_bytes = log_sys->archive_buf_size;
2685
/********************************************************************
2686
Writes the log contents to the archive at least up to the lsn when this
2687
function was called. */
2690
log_archive_all(void)
2691
/*=================*/
2696
mutex_enter(&(log_sys->mutex));
2698
if (log_sys->archiving_state == LOG_ARCH_OFF) {
2699
mutex_exit(&(log_sys->mutex));
2704
present_lsn = log_sys->lsn;
2706
mutex_exit(&(log_sys->mutex));
2708
log_pad_current_log_block();
2711
mutex_enter(&(log_sys->mutex));
2713
if (ut_dulint_cmp(present_lsn, log_sys->archived_lsn) <= 0) {
2715
mutex_exit(&(log_sys->mutex));
2720
mutex_exit(&(log_sys->mutex));
2722
log_archive_do(TRUE, &dummy);
2726
/*********************************************************
2727
Closes the possible open archive log file (for each group) the first group,
2728
and if it was open, increments the group file count by 2, if desired. */
2731
log_archive_close_groups(
2732
/*=====================*/
2733
ibool increment_file_count) /* in: TRUE if we want to increment
2739
ut_ad(mutex_own(&(log_sys->mutex)));
2741
if (log_sys->archiving_state == LOG_ARCH_OFF) {
2746
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2748
trunc_len = UNIV_PAGE_SIZE
2749
* fil_space_get_size(group->archive_space_id);
2750
if (trunc_len > 0) {
2751
ut_a(trunc_len == group->file_size);
2753
/* Write a notice to the headers of archived log
2754
files that the file write has been completed */
2756
log_group_archive_completed_header_write(
2757
group, 0, log_sys->archived_lsn);
2759
fil_space_truncate_start(group->archive_space_id,
2761
if (increment_file_count) {
2762
group->archived_offset = 0;
2763
group->archived_file_no += 2;
2767
if (log_debug_writes) {
2769
"Incrementing arch file no to %lu"
2770
" in log group %lu\n",
2771
(ulong) group->archived_file_no + 2,
2774
#endif /* UNIV_DEBUG */
2778
/********************************************************************
2779
Writes the log contents to the archive up to the lsn when this function was
2780
called, and stops the archiving. When archiving is started again, the archived
2781
log file numbers start from 2 higher, so that the archiving will not write
2782
again to the archived log files which exist when this function returns. */
2785
log_archive_stop(void)
2786
/*==================*/
2787
/* out: DB_SUCCESS or DB_ERROR */
2791
mutex_enter(&(log_sys->mutex));
2793
if (log_sys->archiving_state != LOG_ARCH_ON) {
2795
mutex_exit(&(log_sys->mutex));
2800
log_sys->archiving_state = LOG_ARCH_STOPPING;
2802
mutex_exit(&(log_sys->mutex));
2806
mutex_enter(&(log_sys->mutex));
2808
log_sys->archiving_state = LOG_ARCH_STOPPING2;
2809
os_event_reset(log_sys->archiving_on);
2811
mutex_exit(&(log_sys->mutex));
2813
/* Wait for a possible archiving operation to end */
2815
rw_lock_s_lock(&(log_sys->archive_lock));
2816
rw_lock_s_unlock(&(log_sys->archive_lock));
2818
mutex_enter(&(log_sys->mutex));
2820
/* Close all archived log files, incrementing the file count by 2,
2823
log_archive_close_groups(TRUE);
2825
mutex_exit(&(log_sys->mutex));
2827
/* Make a checkpoint, so that if recovery is needed, the file numbers
2828
of new archived log files will start from the right value */
2833
success = log_checkpoint(TRUE, TRUE);
2836
mutex_enter(&(log_sys->mutex));
2838
log_sys->archiving_state = LOG_ARCH_STOPPED;
2840
mutex_exit(&(log_sys->mutex));
2845
/********************************************************************
2846
Starts again archiving which has been stopped. */
2849
log_archive_start(void)
2850
/*===================*/
2851
/* out: DB_SUCCESS or DB_ERROR */
2853
mutex_enter(&(log_sys->mutex));
2855
if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
2857
mutex_exit(&(log_sys->mutex));
2862
log_sys->archiving_state = LOG_ARCH_ON;
2864
os_event_set(log_sys->archiving_on);
2866
mutex_exit(&(log_sys->mutex));
2871
/********************************************************************
2872
Stop archiving the log so that a gap may occur in the archived log files. */
2875
log_archive_noarchivelog(void)
2876
/*==========================*/
2877
/* out: DB_SUCCESS or DB_ERROR */
2880
mutex_enter(&(log_sys->mutex));
2882
if (log_sys->archiving_state == LOG_ARCH_STOPPED
2883
|| log_sys->archiving_state == LOG_ARCH_OFF) {
2885
log_sys->archiving_state = LOG_ARCH_OFF;
2887
os_event_set(log_sys->archiving_on);
2889
mutex_exit(&(log_sys->mutex));
2894
mutex_exit(&(log_sys->mutex));
2898
os_thread_sleep(500000);
2903
/********************************************************************
2904
Start archiving the log so that a gap may occur in the archived log files. */
2907
log_archive_archivelog(void)
2908
/*========================*/
2909
/* out: DB_SUCCESS or DB_ERROR */
2911
mutex_enter(&(log_sys->mutex));
2913
if (log_sys->archiving_state == LOG_ARCH_OFF) {
2915
log_sys->archiving_state = LOG_ARCH_ON;
2917
log_sys->archived_lsn
2918
= ut_dulint_align_down(log_sys->lsn,
2919
OS_FILE_LOG_BLOCK_SIZE);
2920
mutex_exit(&(log_sys->mutex));
2925
mutex_exit(&(log_sys->mutex));
2930
/********************************************************************
2931
Tries to establish a big enough margin of free space in the log groups, such
2932
that a new log entry can be catenated without an immediate need for
2936
log_archive_margin(void)
2937
/*====================*/
2939
log_t* log = log_sys;
2944
mutex_enter(&(log->mutex));
2946
if (log->archiving_state == LOG_ARCH_OFF) {
2947
mutex_exit(&(log->mutex));
2952
age = ut_dulint_minus(log->lsn, log->archived_lsn);
2954
if (age > log->max_archived_lsn_age) {
2956
/* An archiving is urgent: we have to do synchronous i/o */
2960
} else if (age > log->max_archived_lsn_age_async) {
2962
/* An archiving is not urgent: we do asynchronous i/o */
2966
/* No archiving required yet */
2968
mutex_exit(&(log->mutex));
2973
mutex_exit(&(log->mutex));
2975
log_archive_do(sync, &dummy);
2978
/* Check again that enough was written to the archive */
2983
#endif /* UNIV_LOG_ARCHIVE */
2985
/************************************************************************
2986
Checks that there is enough free space in the log to start a new query step.
2987
Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
2988
function may only be called if the calling thread owns no synchronization
2992
log_check_margins(void)
2993
/*===================*/
2998
log_checkpoint_margin();
3000
#ifdef UNIV_LOG_ARCHIVE
3001
log_archive_margin();
3002
#endif /* UNIV_LOG_ARCHIVE */
3004
mutex_enter(&(log_sys->mutex));
3006
if (log_sys->check_flush_or_checkpoint) {
3008
mutex_exit(&(log_sys->mutex));
3013
mutex_exit(&(log_sys->mutex));
3016
/********************************************************************
3017
Makes a checkpoint at the latest lsn and writes it to first page of each
3018
data file in the database, so that we know that the file spaces contain
3019
all modifications up to that lsn. This can only be called at database
3020
shutdown. This function also writes all log in log files to the log archive. */
3023
logs_empty_and_mark_files_at_shutdown(void)
3024
/*=======================================*/
3029
if (srv_print_verbose_log) {
3030
ut_print_timestamp(stderr);
3031
fprintf(stderr, " InnoDB: Starting shutdown...\n");
3033
/* Wait until the master thread and all other operations are idle: our
3034
algorithm only works if the server is idle at shutdown */
3036
srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
3038
os_thread_sleep(100000);
3040
mutex_enter(&kernel_mutex);
3042
/* We need the monitor threads to stop before we proceed with a
3043
normal shutdown. In case of very fast shutdown, however, we can
3044
proceed without waiting for monitor threads. */
3046
if (srv_fast_shutdown < 2
3047
&& (srv_error_monitor_active
3048
|| srv_lock_timeout_and_monitor_active)) {
3050
mutex_exit(&kernel_mutex);
3055
/* Check that there are no longer transactions. We need this wait even
3056
for the 'very fast' shutdown, because the InnoDB layer may have
3057
committed or prepared transactions and we don't want to lose them. */
3059
if (trx_n_mysql_transactions > 0
3060
|| UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
3062
mutex_exit(&kernel_mutex);
3067
if (srv_fast_shutdown == 2) {
3068
/* In this fastest shutdown we do not flush the buffer pool:
3069
it is essentially a 'crash' of the InnoDB server. Make sure
3070
that the log is all flushed to disk, so that we can recover
3071
all committed transactions in a crash recovery. We must not
3072
write the lsn stamps to the data files, since at a startup
3073
InnoDB deduces from the stamps if the previous shutdown was
3076
log_buffer_flush_to_disk();
3078
return; /* We SKIP ALL THE REST !! */
3081
/* Check that the master thread is suspended */
3083
if (srv_n_threads_active[SRV_MASTER] != 0) {
3085
mutex_exit(&kernel_mutex);
3090
mutex_exit(&kernel_mutex);
3092
mutex_enter(&(log_sys->mutex));
3094
if (log_sys->n_pending_checkpoint_writes
3095
#ifdef UNIV_LOG_ARCHIVE
3096
|| log_sys->n_pending_archive_ios
3097
#endif /* UNIV_LOG_ARCHIVE */
3098
|| log_sys->n_pending_writes) {
3100
mutex_exit(&(log_sys->mutex));
3105
mutex_exit(&(log_sys->mutex));
3107
if (!buf_pool_check_no_pending_io()) {
3112
#ifdef UNIV_LOG_ARCHIVE
3114
#endif /* UNIV_LOG_ARCHIVE */
3116
log_make_checkpoint_at(ut_dulint_max, TRUE);
3118
mutex_enter(&(log_sys->mutex));
3122
if ((ut_dulint_cmp(lsn, log_sys->last_checkpoint_lsn) != 0)
3123
#ifdef UNIV_LOG_ARCHIVE
3124
|| (srv_log_archive_on
3125
&& ut_dulint_cmp(lsn,
3126
ut_dulint_add(log_sys->archived_lsn,
3127
LOG_BLOCK_HDR_SIZE))
3129
#endif /* UNIV_LOG_ARCHIVE */
3132
mutex_exit(&(log_sys->mutex));
3139
#ifdef UNIV_LOG_ARCHIVE
3140
UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no;
3142
if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) {
3147
log_archive_close_groups(TRUE);
3148
#endif /* UNIV_LOG_ARCHIVE */
3150
mutex_exit(&(log_sys->mutex));
3152
mutex_enter(&kernel_mutex);
3153
/* Check that the master thread has stayed suspended */
3154
if (srv_n_threads_active[SRV_MASTER] != 0) {
3156
"InnoDB: Warning: the master thread woke up"
3157
" during shutdown\n");
3159
mutex_exit(&kernel_mutex);
3163
mutex_exit(&kernel_mutex);
3165
fil_flush_file_spaces(FIL_TABLESPACE);
3166
fil_flush_file_spaces(FIL_LOG);
3168
/* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
3169
pool: therefore it is essential that the buffer pool has been
3170
completely flushed to disk! (We do not call fil_write... if the
3171
'very fast' shutdown is enabled.) */
3173
if (!buf_all_freed()) {
3178
srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
3180
/* Make some checks that the server really is quiet */
3181
ut_a(srv_n_threads_active[SRV_MASTER] == 0);
3182
ut_a(buf_all_freed());
3183
ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn));
3185
if (ut_dulint_cmp(lsn, srv_start_lsn) < 0) {
3187
"InnoDB: Error: log sequence number"
3188
" at shutdown %lu %lu\n"
3189
"InnoDB: is lower than at startup %lu %lu!\n",
3190
(ulong) ut_dulint_get_high(lsn),
3191
(ulong) ut_dulint_get_low(lsn),
3192
(ulong) ut_dulint_get_high(srv_start_lsn),
3193
(ulong) ut_dulint_get_low(srv_start_lsn));
3196
srv_shutdown_lsn = lsn;
3198
fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
3200
fil_flush_file_spaces(FIL_TABLESPACE);
3202
fil_close_all_files();
3204
/* Make some checks that the server really is quiet */
3205
ut_a(srv_n_threads_active[SRV_MASTER] == 0);
3206
ut_a(buf_all_freed());
3207
ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn));
3210
/**********************************************************
3211
Checks by parsing that the catenated log segment for a single mtr is
3217
byte* buf, /* in: pointer to the start of the log segment
3218
in the log_sys->buf log buffer */
3219
ulint len, /* in: segment length in bytes */
3220
dulint buf_start_lsn) /* in: buffer start lsn */
3222
dulint contiguous_lsn;
3229
ut_ad(mutex_own(&(log_sys->mutex)));
3236
start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE);
3237
end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE);
3239
buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE);
3240
scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE);
3242
ut_memcpy(scan_buf, start, end - start);
3244
recv_scan_log_recs(TRUE,
3246
- recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
3247
FALSE, scan_buf, end - start,
3248
ut_dulint_align_down(buf_start_lsn,
3249
OS_FILE_LOG_BLOCK_SIZE),
3250
&contiguous_lsn, &scanned_lsn);
3252
ut_a(ut_dulint_cmp(scanned_lsn, ut_dulint_add(buf_start_lsn, len))
3254
ut_a(ut_dulint_cmp(recv_sys->recovered_lsn, scanned_lsn) == 0);
3261
/**********************************************************
3262
Peeks the current lsn. */
3267
/* out: TRUE if success, FALSE if could not get the
3269
dulint* lsn) /* out: if returns TRUE, current lsn is here */
3271
if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
3272
*lsn = log_sys->lsn;
3274
mutex_exit(&(log_sys->mutex));
3282
/**********************************************************
3283
Prints info of the log. */
3288
FILE* file) /* in: file where to print */
3290
double time_elapsed;
3291
time_t current_time;
3293
mutex_enter(&(log_sys->mutex));
3296
"Log sequence number %lu %lu\n"
3297
"Log flushed up to %lu %lu\n"
3298
"Last checkpoint at %lu %lu\n",
3299
(ulong) ut_dulint_get_high(log_sys->lsn),
3300
(ulong) ut_dulint_get_low(log_sys->lsn),
3301
(ulong) ut_dulint_get_high(log_sys->flushed_to_disk_lsn),
3302
(ulong) ut_dulint_get_low(log_sys->flushed_to_disk_lsn),
3303
(ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn),
3304
(ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn));
3306
current_time = time(NULL);
3308
time_elapsed = 0.001 + difftime(current_time,
3309
log_sys->last_printout_time);
3311
"%lu pending log writes, %lu pending chkp writes\n"
3312
"%lu log i/o's done, %.2f log i/o's/second\n",
3313
(ulong) log_sys->n_pending_writes,
3314
(ulong) log_sys->n_pending_checkpoint_writes,
3315
(ulong) log_sys->n_log_ios,
3316
((log_sys->n_log_ios - log_sys->n_log_ios_old)
3319
log_sys->n_log_ios_old = log_sys->n_log_ios;
3320
log_sys->last_printout_time = current_time;
3322
mutex_exit(&(log_sys->mutex));
3325
/**************************************************************************
3326
Refreshes the statistics used to print per-second averages. */
3329
log_refresh_stats(void)
3330
/*===================*/
3332
log_sys->n_log_ios_old = log_sys->n_log_ios;
3333
log_sys->last_printout_time = time(NULL);