1
/******************************************************
2
The database buffer buf_pool flush algorithm
4
(c) 1995-2001 Innobase Oy
6
Created 11/11/1995 Heikki Tuuri
7
*******************************************************/
18
#include "page0page.h"
23
#include "ibuf0ibuf.h"
29
/* When flushed, dirty blocks are searched in neighborhoods of this size, and
30
flushed along with the original page. */
32
#define BUF_FLUSH_AREA ut_min(BUF_READ_AHEAD_AREA,\
33
buf_pool->curr_size / 16)
35
/**********************************************************************
36
Validates the flush list. */
39
buf_flush_validate_low(void);
40
/*========================*/
43
/************************************************************************
44
Inserts a modified block into the flush list. */
47
buf_flush_insert_into_flush_list(
48
/*=============================*/
49
buf_block_t* block) /* in: block which is modified */
51
ut_ad(mutex_own(&(buf_pool->mutex)));
52
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
54
ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
55
|| (ut_dulint_cmp((UT_LIST_GET_FIRST(buf_pool->flush_list))
56
->oldest_modification,
57
block->oldest_modification) <= 0));
59
UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
61
ut_ad(buf_flush_validate_low());
64
/************************************************************************
65
Inserts a modified block into the flush list in the right sorted position.
66
This function is used by recovery, because there the modifications do not
67
necessarily come in the order of lsn's. */
70
buf_flush_insert_sorted_into_flush_list(
71
/*====================================*/
72
buf_block_t* block) /* in: block which is modified */
77
ut_ad(mutex_own(&(buf_pool->mutex)));
80
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
82
while (b && (ut_dulint_cmp(b->oldest_modification,
83
block->oldest_modification) > 0)) {
85
b = UT_LIST_GET_NEXT(flush_list, b);
89
UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
91
UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list, prev_b,
95
ut_ad(buf_flush_validate_low());
98
/************************************************************************
99
Returns TRUE if the file page block is immediately suitable for replacement,
100
i.e., the transition FILE_PAGE => NOT_USED allowed. */
103
buf_flush_ready_for_replace(
104
/*========================*/
105
/* out: TRUE if can replace immediately */
106
buf_block_t* block) /* in: buffer control block, must be in state
107
BUF_BLOCK_FILE_PAGE and in the LRU list */
109
ut_ad(mutex_own(&(buf_pool->mutex)));
110
ut_ad(mutex_own(&block->mutex));
111
if (block->state != BUF_BLOCK_FILE_PAGE) {
112
ut_print_timestamp(stderr);
114
" InnoDB: Error: buffer block state %lu"
115
" in the LRU list!\n",
116
(ulong)block->state);
117
ut_print_buf(stderr, block, sizeof(buf_block_t));
122
if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
123
|| (block->buf_fix_count != 0)
124
|| (block->io_fix != 0)) {
132
/************************************************************************
133
Returns TRUE if the block is modified and ready for flushing. */
136
buf_flush_ready_for_flush(
137
/*======================*/
138
/* out: TRUE if can flush immediately */
139
buf_block_t* block, /* in: buffer control block, must be in state
140
BUF_BLOCK_FILE_PAGE */
141
ulint flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
143
ut_ad(mutex_own(&(buf_pool->mutex)));
144
ut_ad(mutex_own(&(block->mutex)));
145
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
147
if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
148
&& (block->io_fix == 0)) {
149
if (flush_type != BUF_FLUSH_LRU) {
153
} else if (block->buf_fix_count == 0) {
155
/* If we are flushing the LRU list, to avoid deadlocks
156
we require the block not to be bufferfixed, and hence
166
/************************************************************************
167
Updates the flush system data structures when a write is completed. */
170
buf_flush_write_complete(
171
/*=====================*/
172
buf_block_t* block) /* in: pointer to the block in question */
175
#ifdef UNIV_SYNC_DEBUG
176
ut_ad(mutex_own(&(buf_pool->mutex)));
177
#endif /* UNIV_SYNC_DEBUG */
178
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
180
block->oldest_modification = ut_dulint_zero;
182
UT_LIST_REMOVE(flush_list, buf_pool->flush_list, block);
184
ut_d(UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list));
186
(buf_pool->n_flush[block->flush_type])--;
188
if (block->flush_type == BUF_FLUSH_LRU) {
189
/* Put the block to the end of the LRU list to wait to be
190
moved to the free list */
192
buf_LRU_make_block_old(block);
194
buf_pool->LRU_flush_ended++;
197
/* fprintf(stderr, "n pending flush %lu\n",
198
buf_pool->n_flush[block->flush_type]); */
200
if ((buf_pool->n_flush[block->flush_type] == 0)
201
&& (buf_pool->init_flush[block->flush_type] == FALSE)) {
203
/* The running flush batch has ended */
205
os_event_set(buf_pool->no_flush[block->flush_type]);
209
/************************************************************************
210
Flushes possible buffered writes from the doublewrite memory buffer to disk,
211
and also wakes up the aio thread if simulated aio is used. It is very
212
important to call this function after a batch of writes has been posted,
213
and also when we may have to wait for a page latch! Otherwise a deadlock
214
of threads can occur. */
217
buf_flush_buffered_writes(void)
218
/*===========================*/
226
if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) {
227
os_aio_simulated_wake_handler_threads();
232
mutex_enter(&(trx_doublewrite->mutex));
234
/* Write first to doublewrite buffer blocks. We use synchronous
235
aio and thus know that file write has been completed when the
238
if (trx_doublewrite->first_free == 0) {
240
mutex_exit(&(trx_doublewrite->mutex));
245
for (i = 0; i < trx_doublewrite->first_free; i++) {
247
block = trx_doublewrite->buf_block_arr[i];
248
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
250
if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
251
!= mach_read_from_4(block->frame + UNIV_PAGE_SIZE
252
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
253
ut_print_timestamp(stderr);
255
" InnoDB: ERROR: The page to be written"
257
"InnoDB: The lsn fields do not match!"
258
" Noticed in the buffer pool\n"
259
"InnoDB: before posting to the"
260
" doublewrite buffer.\n");
263
if (block->check_index_page_at_flush
264
&& !page_simple_validate(block->frame)) {
266
buf_page_print(block->frame);
268
ut_print_timestamp(stderr);
270
" InnoDB: Apparent corruption of an"
271
" index page n:o %lu in space %lu\n"
272
"InnoDB: to be written to data file."
273
" We intentionally crash server\n"
274
"InnoDB: to prevent corrupt data"
275
" from ending up in data\n"
277
(ulong) block->offset, (ulong) block->space);
283
/* increment the doublewrite flushed pages counter */
284
srv_dblwr_pages_written+= trx_doublewrite->first_free;
287
if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
288
len = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
290
len = trx_doublewrite->first_free * UNIV_PAGE_SIZE;
293
fil_io(OS_FILE_WRITE,
295
trx_doublewrite->block1, 0, len,
296
(void*)trx_doublewrite->write_buf, NULL);
298
write_buf = trx_doublewrite->write_buf;
300
for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len; len2 += UNIV_PAGE_SIZE) {
301
if (mach_read_from_4(write_buf + len2 + FIL_PAGE_LSN + 4)
302
!= mach_read_from_4(write_buf + len2 + UNIV_PAGE_SIZE
303
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
304
ut_print_timestamp(stderr);
306
" InnoDB: ERROR: The page to be written"
308
"InnoDB: The lsn fields do not match!"
309
" Noticed in the doublewrite block1.\n");
313
if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
314
len = (trx_doublewrite->first_free
315
- TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE;
317
fil_io(OS_FILE_WRITE,
319
trx_doublewrite->block2, 0, len,
320
(void*)(trx_doublewrite->write_buf
321
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
325
write_buf = trx_doublewrite->write_buf
326
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
327
for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
328
len2 += UNIV_PAGE_SIZE) {
329
if (mach_read_from_4(write_buf + len2
331
!= mach_read_from_4(write_buf + len2
333
- FIL_PAGE_END_LSN_OLD_CHKSUM
335
ut_print_timestamp(stderr);
337
" InnoDB: ERROR: The page to be"
338
" written seems corrupt!\n"
339
"InnoDB: The lsn fields do not match!"
341
" the doublewrite block2.\n");
346
/* Now flush the doublewrite buffer data to disk */
348
fil_flush(TRX_SYS_SPACE);
350
/* We know that the writes have been flushed to disk now
351
and in recovery we will find them in the doublewrite buffer
352
blocks. Next do the writes to the intended positions. */
354
for (i = 0; i < trx_doublewrite->first_free; i++) {
355
block = trx_doublewrite->buf_block_arr[i];
357
if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
358
!= mach_read_from_4(block->frame + UNIV_PAGE_SIZE
359
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
360
ut_print_timestamp(stderr);
362
" InnoDB: ERROR: The page to be written"
364
"InnoDB: The lsn fields do not match!"
365
" Noticed in the buffer pool\n"
366
"InnoDB: after posting and flushing"
367
" the doublewrite buffer.\n"
368
"InnoDB: Page buf fix count %lu,"
369
" io fix %lu, state %lu\n",
370
(ulong)block->buf_fix_count,
371
(ulong)block->io_fix,
372
(ulong)block->state);
374
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
376
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
377
FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
378
(void*)block->frame, (void*)block);
381
/* Wake possible simulated aio thread to actually post the
382
writes to the operating system */
384
os_aio_simulated_wake_handler_threads();
386
/* Wait that all async writes to tablespaces have been posted to
389
os_aio_wait_until_no_pending_writes();
391
/* Now we flush the data to disk (for example, with fsync) */
393
fil_flush_file_spaces(FIL_TABLESPACE);
395
/* We can now reuse the doublewrite memory buffer: */
397
trx_doublewrite->first_free = 0;
399
mutex_exit(&(trx_doublewrite->mutex));
402
/************************************************************************
403
Posts a buffer page for writing. If the doublewrite memory buffer is
404
full, calls buf_flush_buffered_writes and waits for for free space to
408
buf_flush_post_to_doublewrite_buf(
409
/*==============================*/
410
buf_block_t* block) /* in: buffer block to write */
413
mutex_enter(&(trx_doublewrite->mutex));
415
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
417
if (trx_doublewrite->first_free
418
>= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
419
mutex_exit(&(trx_doublewrite->mutex));
421
buf_flush_buffered_writes();
426
ut_memcpy(trx_doublewrite->write_buf
427
+ UNIV_PAGE_SIZE * trx_doublewrite->first_free,
428
block->frame, UNIV_PAGE_SIZE);
430
trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = block;
432
trx_doublewrite->first_free++;
434
if (trx_doublewrite->first_free
435
>= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
436
mutex_exit(&(trx_doublewrite->mutex));
438
buf_flush_buffered_writes();
443
mutex_exit(&(trx_doublewrite->mutex));
446
/************************************************************************
447
Initializes a page for writing to the tablespace. */
450
buf_flush_init_for_writing(
451
/*=======================*/
452
byte* page, /* in: page */
453
dulint newest_lsn, /* in: newest modification lsn to the page */
454
ulint space, /* in: space id */
455
ulint page_no) /* in: page number */
457
/* Write the newest modification lsn to the page header and trailer */
458
mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
460
mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
462
/* Write the page number and the space id */
464
mach_write_to_4(page + FIL_PAGE_OFFSET, page_no);
465
mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space);
467
/* Store the new formula checksum */
469
mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
471
? buf_calc_page_new_checksum(page)
472
: BUF_NO_CHECKSUM_MAGIC);
474
/* We overwrite the first 4 bytes of the end lsn field to store
475
the old formula checksum. Since it depends also on the field
476
FIL_PAGE_SPACE_OR_CHKSUM, it has to be calculated after storing the
477
new formula checksum. */
479
mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
481
? buf_calc_page_old_checksum(page)
482
: BUF_NO_CHECKSUM_MAGIC);
485
/************************************************************************
486
Does an asynchronous write of a buffer page. NOTE: in simulated aio and
487
also when the doublewrite buffer is used, we must call
488
buf_flush_buffered_writes after we have posted a batch of writes! */
491
buf_flush_write_block_low(
492
/*======================*/
493
buf_block_t* block) /* in: buffer block to write */
495
#ifdef UNIV_LOG_DEBUG
496
static ibool univ_log_debug_warned;
497
#endif /* UNIV_LOG_DEBUG */
498
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
500
#ifdef UNIV_IBUF_DEBUG
501
ut_a(ibuf_count_get(block->space, block->offset) == 0);
503
ut_ad(!ut_dulint_is_zero(block->newest_modification));
505
#ifdef UNIV_LOG_DEBUG
506
if (!univ_log_debug_warned) {
507
univ_log_debug_warned = TRUE;
508
fputs("Warning: cannot force log to disk if"
509
" UNIV_LOG_DEBUG is defined!\n"
510
"Crash recovery will not work!\n",
514
/* Force the log to the disk before writing the modified block */
515
log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
517
buf_flush_init_for_writing(block->frame, block->newest_modification,
518
block->space, block->offset);
519
if (!srv_use_doublewrite_buf || !trx_doublewrite) {
520
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
521
FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
522
(void*)block->frame, (void*)block);
524
buf_flush_post_to_doublewrite_buf(block);
528
/************************************************************************
529
Writes a page asynchronously from the buffer buf_pool to a file, if it can be
530
found in the buf_pool and it is in a flushable state. NOTE: in simulated aio
531
we must call os_aio_simulated_wake_handler_threads after we have posted a batch
537
/* out: 1 if a page was flushed, 0 otherwise */
538
ulint space, /* in: space id */
539
ulint offset, /* in: page offset */
540
ulint flush_type) /* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST, or
541
BUF_FLUSH_SINGLE_PAGE */
546
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
547
|| flush_type == BUF_FLUSH_SINGLE_PAGE);
549
mutex_enter(&(buf_pool->mutex));
551
block = buf_page_hash_get(space, offset);
553
ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
556
mutex_exit(&(buf_pool->mutex));
560
mutex_enter(&block->mutex);
562
if (flush_type == BUF_FLUSH_LIST
563
&& buf_flush_ready_for_flush(block, flush_type)) {
565
block->io_fix = BUF_IO_WRITE;
567
/* If AWE is enabled and the page is not mapped to a frame,
570
if (block->frame == NULL) {
573
/* We set second parameter TRUE because the block is
574
in the LRU list and we must put it to
575
awe_LRU_free_mapped list once mapped to a frame */
577
buf_awe_map_page_to_frame(block, TRUE);
580
block->flush_type = flush_type;
582
if (buf_pool->n_flush[flush_type] == 0) {
584
os_event_reset(buf_pool->no_flush[flush_type]);
587
(buf_pool->n_flush[flush_type])++;
591
/* If the simulated aio thread is not running, we must
592
not wait for any latch, as we may end up in a deadlock:
593
if buf_fix_count == 0, then we know we need not wait */
595
if (block->buf_fix_count == 0) {
596
rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
601
mutex_exit(&block->mutex);
602
mutex_exit(&(buf_pool->mutex));
605
buf_flush_buffered_writes();
607
rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
611
if (buf_debug_prints) {
613
"Flushing page space %lu, page no %lu \n",
614
(ulong) block->space, (ulong) block->offset);
616
#endif /* UNIV_DEBUG */
618
buf_flush_write_block_low(block);
622
} else if (flush_type == BUF_FLUSH_LRU
623
&& buf_flush_ready_for_flush(block, flush_type)) {
626
Because any thread may call the LRU flush, even when owning
627
locks on pages, to avoid deadlocks, we must make sure that the
628
s-lock is acquired on the page without waiting: this is
629
accomplished because in the if-condition above we require
630
the page not to be bufferfixed (in function
631
..._ready_for_flush). */
633
block->io_fix = BUF_IO_WRITE;
635
/* If AWE is enabled and the page is not mapped to a frame,
638
if (block->frame == NULL) {
641
/* We set second parameter TRUE because the block is
642
in the LRU list and we must put it to
643
awe_LRU_free_mapped list once mapped to a frame */
645
buf_awe_map_page_to_frame(block, TRUE);
648
block->flush_type = flush_type;
650
if (buf_pool->n_flush[flush_type] == 0) {
652
os_event_reset(buf_pool->no_flush[flush_type]);
655
(buf_pool->n_flush[flush_type])++;
657
rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
659
/* Note that the s-latch is acquired before releasing the
660
buf_pool mutex: this ensures that the latch is acquired
663
mutex_exit(&block->mutex);
664
mutex_exit(&(buf_pool->mutex));
666
buf_flush_write_block_low(block);
670
} else if (flush_type == BUF_FLUSH_SINGLE_PAGE
671
&& buf_flush_ready_for_flush(block, flush_type)) {
673
block->io_fix = BUF_IO_WRITE;
675
/* If AWE is enabled and the page is not mapped to a frame,
678
if (block->frame == NULL) {
681
/* We set second parameter TRUE because the block is
682
in the LRU list and we must put it to
683
awe_LRU_free_mapped list once mapped to a frame */
685
buf_awe_map_page_to_frame(block, TRUE);
688
block->flush_type = flush_type;
690
if (buf_pool->n_flush[block->flush_type] == 0) {
692
os_event_reset(buf_pool->no_flush[block->flush_type]);
695
(buf_pool->n_flush[flush_type])++;
697
mutex_exit(&block->mutex);
698
mutex_exit(&(buf_pool->mutex));
700
rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
703
if (buf_debug_prints) {
705
"Flushing single page space %lu,"
707
(ulong) block->space,
708
(ulong) block->offset);
710
#endif /* UNIV_DEBUG */
712
buf_flush_write_block_low(block);
717
mutex_exit(&block->mutex);
718
mutex_exit(&(buf_pool->mutex));
723
/***************************************************************
724
Flushes to disk all flushable pages within the flush area. */
727
buf_flush_try_neighbors(
728
/*====================*/
729
/* out: number of pages flushed */
730
ulint space, /* in: space id */
731
ulint offset, /* in: page offset */
732
ulint flush_type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
739
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
741
low = (offset / BUF_FLUSH_AREA) * BUF_FLUSH_AREA;
742
high = (offset / BUF_FLUSH_AREA + 1) * BUF_FLUSH_AREA;
744
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
745
/* If there is little space, it is better not to flush any
746
block except from the end of the LRU list */
752
/* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
754
if (high > fil_space_get_size(space)) {
755
high = fil_space_get_size(space);
758
mutex_enter(&(buf_pool->mutex));
760
for (i = low; i < high; i++) {
762
block = buf_page_hash_get(space, i);
763
ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
769
} else if (flush_type == BUF_FLUSH_LRU && i != offset
772
/* We avoid flushing 'non-old' blocks in an LRU flush,
773
because the flushed blocks are soon freed */
778
mutex_enter(&block->mutex);
780
if (buf_flush_ready_for_flush(block, flush_type)
781
&& (i == offset || block->buf_fix_count == 0)) {
782
/* We only try to flush those
783
neighbors != offset where the buf fix count is
784
zero, as we then know that we probably can
785
latch the page without a semaphore wait.
786
Semaphore waits are expensive because we must
787
flush the doublewrite buffer before we start
790
mutex_exit(&block->mutex);
792
mutex_exit(&(buf_pool->mutex));
794
/* Note: as we release the buf_pool mutex
795
above, in buf_flush_try_page we cannot be sure
796
the page is still in a flushable state:
797
therefore we check it again inside that
800
count += buf_flush_try_page(space, i,
803
mutex_enter(&(buf_pool->mutex));
805
mutex_exit(&block->mutex);
810
mutex_exit(&(buf_pool->mutex));
815
/***********************************************************************
816
This utility flushes dirty blocks from the end of the LRU list or flush_list.
817
NOTE 1: in the case of an LRU flush the calling thread may own latches to
818
pages: to avoid deadlocks, this function must be written so that it cannot
819
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
820
the calling thread is not allowed to own any latches on pages! */
825
/* out: number of blocks for which the write
826
request was queued; ULINT_UNDEFINED if there
827
was a flush of the same type already running */
828
ulint flush_type, /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
829
BUF_FLUSH_LIST, then the caller must not own
830
any latches on pages */
831
ulint min_n, /* in: wished minimum mumber of blocks flushed
832
(it is not guaranteed that the actual number
833
is that big, though) */
834
dulint lsn_limit) /* in the case BUF_FLUSH_LIST all blocks whose
835
oldest_modification is smaller than this
836
should be flushed (if their number does not
837
exceed min_n), otherwise ignored */
840
ulint page_count = 0;
841
ulint old_page_count;
846
ut_ad((flush_type == BUF_FLUSH_LRU)
847
|| (flush_type == BUF_FLUSH_LIST));
848
#ifdef UNIV_SYNC_DEBUG
849
ut_ad((flush_type != BUF_FLUSH_LIST)
850
|| sync_thread_levels_empty_gen(TRUE));
851
#endif /* UNIV_SYNC_DEBUG */
852
mutex_enter(&(buf_pool->mutex));
854
if ((buf_pool->n_flush[flush_type] > 0)
855
|| (buf_pool->init_flush[flush_type] == TRUE)) {
857
/* There is already a flush batch of the same type running */
859
mutex_exit(&(buf_pool->mutex));
861
return(ULINT_UNDEFINED);
864
(buf_pool->init_flush)[flush_type] = TRUE;
867
/* If we have flushed enough, leave the loop */
868
if (page_count >= min_n) {
873
/* Start from the end of the list looking for a suitable
874
block to be flushed. */
876
if (flush_type == BUF_FLUSH_LRU) {
877
block = UT_LIST_GET_LAST(buf_pool->LRU);
879
ut_ad(flush_type == BUF_FLUSH_LIST);
881
block = UT_LIST_GET_LAST(buf_pool->flush_list);
883
|| (ut_dulint_cmp(block->oldest_modification,
885
/* We have flushed enough */
893
/* Note that after finding a single flushable page, we try to
894
flush also all its neighbors, and after that start from the
895
END of the LRU list or flush list again: the list may change
896
during the flushing and we cannot safely preserve within this
897
function a pointer to a block in the list! */
899
while ((block != NULL) && !found) {
900
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
902
mutex_enter(&block->mutex);
904
if (buf_flush_ready_for_flush(block, flush_type)) {
907
space = block->space;
908
offset = block->offset;
910
mutex_exit(&block->mutex);
911
mutex_exit(&(buf_pool->mutex));
913
old_page_count = page_count;
915
/* Try to flush also all the neighbors */
916
page_count += buf_flush_try_neighbors(
917
space, offset, flush_type);
919
"Flush type %lu, page no %lu, neighb %lu\n",
921
page_count - old_page_count); */
923
mutex_enter(&(buf_pool->mutex));
925
} else if (flush_type == BUF_FLUSH_LRU) {
927
mutex_exit(&block->mutex);
929
block = UT_LIST_GET_PREV(LRU, block);
931
ut_ad(flush_type == BUF_FLUSH_LIST);
933
mutex_exit(&block->mutex);
935
block = UT_LIST_GET_PREV(flush_list, block);
939
/* If we could not find anything to flush, leave the loop */
946
(buf_pool->init_flush)[flush_type] = FALSE;
948
if ((buf_pool->n_flush[flush_type] == 0)
949
&& (buf_pool->init_flush[flush_type] == FALSE)) {
951
/* The running flush batch has ended */
953
os_event_set(buf_pool->no_flush[flush_type]);
956
mutex_exit(&(buf_pool->mutex));
958
buf_flush_buffered_writes();
961
if (buf_debug_prints && page_count > 0) {
962
ut_a(flush_type == BUF_FLUSH_LRU
963
|| flush_type == BUF_FLUSH_LIST);
964
fprintf(stderr, flush_type == BUF_FLUSH_LRU
965
? "Flushed %lu pages in LRU flush\n"
966
: "Flushed %lu pages in flush list flush\n",
969
#endif /* UNIV_DEBUG */
971
srv_buf_pool_flushed += page_count;
976
/**********************************************************************
977
Waits until a flush batch of the given type ends */
980
buf_flush_wait_batch_end(
981
/*=====================*/
982
ulint type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
984
ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
986
os_event_wait(buf_pool->no_flush[type]);
989
/**********************************************************************
990
Gives a recommendation of how many blocks should be flushed to establish
991
a big enough margin of replaceable blocks near the end of the LRU list
992
and in the free list. */
995
buf_flush_LRU_recommendation(void)
996
/*==============================*/
997
/* out: number of blocks which should be flushed
998
from the end of the LRU list */
1001
ulint n_replaceable;
1004
mutex_enter(&(buf_pool->mutex));
1006
n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
1008
block = UT_LIST_GET_LAST(buf_pool->LRU);
1010
while ((block != NULL)
1011
&& (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
1012
+ BUF_FLUSH_EXTRA_MARGIN)
1013
&& (distance < BUF_LRU_FREE_SEARCH_LEN)) {
1015
mutex_enter(&block->mutex);
1017
if (buf_flush_ready_for_replace(block)) {
1021
mutex_exit(&block->mutex);
1025
block = UT_LIST_GET_PREV(LRU, block);
1028
mutex_exit(&(buf_pool->mutex));
1030
if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
1035
return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN
1039
/*************************************************************************
1040
Flushes pages from the end of the LRU list if there is too small a margin
1041
of replaceable pages there or in the free list. VERY IMPORTANT: this function
1042
is called also by threads which have locks on pages. To avoid deadlocks, we
1043
flush only pages such that the s-lock required for flushing can be acquired
1044
immediately, without waiting. */
1047
buf_flush_free_margin(void)
1048
/*=======================*/
1053
n_to_flush = buf_flush_LRU_recommendation();
1055
if (n_to_flush > 0) {
1056
n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush,
1058
if (n_flushed == ULINT_UNDEFINED) {
1059
/* There was an LRU type flush batch already running;
1060
let us wait for it to end */
1062
buf_flush_wait_batch_end(BUF_FLUSH_LRU);
1067
/**********************************************************************
1068
Validates the flush list. */
1071
buf_flush_validate_low(void)
1072
/*========================*/
1073
/* out: TRUE if ok */
1078
UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list);
1080
block = UT_LIST_GET_FIRST(buf_pool->flush_list);
1082
while (block != NULL) {
1083
om = block->oldest_modification;
1084
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
1085
ut_a(ut_dulint_cmp(om, ut_dulint_zero) > 0);
1087
block = UT_LIST_GET_NEXT(flush_list, block);
1090
ut_a(ut_dulint_cmp(om, block->oldest_modification)
1098
/**********************************************************************
1099
Validates the flush list. */
1102
buf_flush_validate(void)
1103
/*====================*/
1104
/* out: TRUE if ok */
1108
mutex_enter(&(buf_pool->mutex));
1110
ret = buf_flush_validate_low();
1112
mutex_exit(&(buf_pool->mutex));