29
12
#include "buf0flu.ic"
35
#ifndef UNIV_HOTBACKUP
36
16
#include "ut0byte.h"
37
17
#include "ut0lst.h"
38
18
#include "page0page.h"
39
20
#include "fil0fil.h"
40
22
#include "buf0lru.h"
41
23
#include "buf0rea.h"
42
24
#include "ibuf0ibuf.h"
43
25
#include "log0log.h"
44
26
#include "os0file.h"
45
27
#include "trx0sys.h"
30
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
47
31
/**********************************************************************
48
These statistics are generated for heuristics used in estimating the
49
rate at which we should flush the dirty blocks to avoid bursty IO
50
activity. Note that the rate of flushing not only depends on how many
51
dirty pages we have in the buffer pool but it is also a fucntion of
52
how much redo the workload is generating and at what rate. */
55
/** Number of intervals for which we keep the history of these stats.
56
Each interval is 1 second, defined by the rate at which
57
srv_error_monitor_thread() calls buf_flush_stat_update(). */
58
#define BUF_FLUSH_STAT_N_INTERVAL 20
60
/** Sampled values buf_flush_stat_cur.
61
Not protected by any mutex. Updated by buf_flush_stat_update(). */
62
static buf_flush_stat_t buf_flush_stat_arr[BUF_FLUSH_STAT_N_INTERVAL];
64
/** Cursor to buf_flush_stat_arr[]. Updated in a round-robin fashion. */
65
static ulint buf_flush_stat_arr_ind;
67
/** Values at start of the current interval. Reset by
68
buf_flush_stat_update(). */
69
static buf_flush_stat_t buf_flush_stat_cur;
71
/** Running sum of past values of buf_flush_stat_cur.
72
Updated by buf_flush_stat_update(). Not protected by any mutex. */
73
static buf_flush_stat_t buf_flush_stat_sum;
75
/** Number of pages flushed through non flush_list flushes. */
76
static ulint buf_lru_flush_page_count = 0;
80
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
81
/******************************************************************//**
82
Validates the flush list.
32
Validates the flush list. */
86
buf_flush_validate_low(
87
/*===================*/
88
buf_pool_t* buf_pool); /*!< in: Buffer pool instance */
89
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
91
/******************************************************************//**
92
Insert a block in the flush_rbt and returns a pointer to its
93
predecessor or NULL if no predecessor. The ordering is maintained
94
on the basis of the <oldest_modification, space, offset> key.
95
@return pointer to the predecessor or NULL if no predecessor. */
98
buf_flush_insert_in_flush_rbt(
99
/*==========================*/
100
buf_page_t* bpage) /*!< in: bpage to be inserted. */
102
const ib_rbt_node_t* c_node;
103
const ib_rbt_node_t* p_node;
104
buf_page_t* prev = NULL;
105
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
107
ut_ad(buf_flush_list_mutex_own(buf_pool));
109
/* Insert this buffer into the rbt. */
110
c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
111
ut_a(c_node != NULL);
113
/* Get the predecessor. */
114
p_node = rbt_prev(buf_pool->flush_rbt, c_node);
116
if (p_node != NULL) {
118
value = rbt_value(buf_page_t*, p_node);
126
/*********************************************************//**
127
Delete a bpage from the flush_rbt. */
130
buf_flush_delete_from_flush_rbt(
131
/*============================*/
132
buf_page_t* bpage) /*!< in: bpage to be removed. */
136
#endif /* UNIV_DEBUG */
137
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
139
ut_ad(buf_flush_list_mutex_own(buf_pool));
143
#endif /* UNIV_DEBUG */
144
rbt_delete(buf_pool->flush_rbt, &bpage);
148
/*****************************************************************//**
149
Compare two modified blocks in the buffer pool. The key for comparison
151
key = <oldest_modification, space, offset>
152
This comparison is used to maintian ordering of blocks in the
154
Note that for the purpose of flush_rbt, we only need to order blocks
155
on the oldest_modification. The other two fields are used to uniquely
157
@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
162
const void* p1, /*!< in: block1 */
163
const void* p2) /*!< in: block2 */
166
const buf_page_t* b1 = *(const buf_page_t**) p1;
167
const buf_page_t* b2 = *(const buf_page_t**) p2;
169
buf_pool_t* buf_pool = buf_pool_from_bpage(b1);
170
#endif /* UNIV_DEBUG */
175
ut_ad(buf_flush_list_mutex_own(buf_pool));
177
ut_ad(b1->in_flush_list);
178
ut_ad(b2->in_flush_list);
180
if (b2->oldest_modification > b1->oldest_modification) {
182
} else if (b2->oldest_modification < b1->oldest_modification) {
186
/* If oldest_modification is same then decide on the space. */
187
ret = (int)(b2->space - b1->space);
189
/* Or else decide ordering on the offset field. */
190
return(ret ? ret : (int)(b2->offset - b1->offset));
193
/********************************************************************//**
194
Initialize the red-black tree to speed up insertions into the flush_list
195
during recovery process. Should be called at the start of recovery
196
process before any page has been read/written. */
199
buf_flush_init_flush_rbt(void)
200
/*==========================*/
204
for (i = 0; i < srv_buf_pool_instances; i++) {
205
buf_pool_t* buf_pool;
207
buf_pool = buf_pool_from_array(i);
209
buf_flush_list_mutex_enter(buf_pool);
211
/* Create red black tree for speedy insertions in flush list. */
212
buf_pool->flush_rbt = rbt_create(
213
sizeof(buf_page_t*), buf_flush_block_cmp);
215
buf_flush_list_mutex_exit(buf_pool);
219
/********************************************************************//**
220
Frees up the red-black tree. */
223
buf_flush_free_flush_rbt(void)
224
/*==========================*/
228
for (i = 0; i < srv_buf_pool_instances; i++) {
229
buf_pool_t* buf_pool;
231
buf_pool = buf_pool_from_array(i);
233
buf_flush_list_mutex_enter(buf_pool);
235
#ifdef UNIV_DEBUG_VALGRIND
237
ulint zip_size = buf_block_get_zip_size(block);
239
if (UNIV_UNLIKELY(zip_size)) {
240
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
242
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
245
#endif /* UNIV_DEBUG_VALGRIND */
246
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
247
ut_a(buf_flush_validate_low(buf_pool));
248
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
250
rbt_free(buf_pool->flush_rbt);
251
buf_pool->flush_rbt = NULL;
253
buf_flush_list_mutex_exit(buf_pool);
257
/********************************************************************//**
35
buf_flush_validate_low(void);
36
/*========================*/
38
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
40
/************************************************************************
258
41
Inserts a modified block into the flush list. */
261
44
buf_flush_insert_into_flush_list(
262
45
/*=============================*/
263
buf_pool_t* buf_pool, /*!< buffer pool instance */
264
buf_block_t* block, /*!< in/out: block which is modified */
265
ib_uint64_t lsn) /*!< in: oldest modification */
46
buf_page_t* bpage) /* in: block which is modified */
267
ut_ad(!buf_pool_mutex_own(buf_pool));
268
ut_ad(log_flush_order_mutex_own());
269
ut_ad(mutex_own(&block->mutex));
271
buf_flush_list_mutex_enter(buf_pool);
48
ut_ad(buf_pool_mutex_own());
273
49
ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
274
50
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
51
<= bpage->oldest_modification));
277
/* If we are in the recovery then we need to update the flush
278
red-black tree as well. */
279
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
280
buf_flush_list_mutex_exit(buf_pool);
281
buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
53
switch (buf_page_get_state(bpage)) {
54
case BUF_BLOCK_ZIP_PAGE:
55
mutex_enter(&buf_pool_zip_mutex);
56
buf_page_set_state(bpage, BUF_BLOCK_ZIP_DIRTY);
57
mutex_exit(&buf_pool_zip_mutex);
58
UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
60
case BUF_BLOCK_ZIP_DIRTY:
61
case BUF_BLOCK_FILE_PAGE:
62
ut_ad(bpage->in_LRU_list);
63
ut_ad(bpage->in_page_hash);
64
ut_ad(!bpage->in_zip_hash);
65
ut_ad(!bpage->in_flush_list);
66
ut_d(bpage->in_flush_list = TRUE);
67
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, bpage);
69
case BUF_BLOCK_ZIP_FREE:
70
case BUF_BLOCK_NOT_USED:
71
case BUF_BLOCK_READY_FOR_USE:
72
case BUF_BLOCK_MEMORY:
73
case BUF_BLOCK_REMOVE_HASH:
285
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
286
ut_ad(!block->page.in_flush_list);
288
ut_d(block->page.in_flush_list = TRUE);
289
block->page.oldest_modification = lsn;
290
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
292
#ifdef UNIV_DEBUG_VALGRIND
294
ulint zip_size = buf_block_get_zip_size(block);
296
if (UNIV_UNLIKELY(zip_size)) {
297
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
299
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
302
#endif /* UNIV_DEBUG_VALGRIND */
303
78
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
304
ut_a(buf_flush_validate_low(buf_pool));
79
ut_a(buf_flush_validate_low());
305
80
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
307
buf_flush_list_mutex_exit(buf_pool);
310
/********************************************************************//**
83
/************************************************************************
311
84
Inserts a modified block into the flush list in the right sorted position.
312
85
This function is used by recovery, because there the modifications do not
313
86
necessarily come in the order of lsn's. */
316
89
buf_flush_insert_sorted_into_flush_list(
317
90
/*====================================*/
318
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
319
buf_block_t* block, /*!< in/out: block which is modified */
320
ib_uint64_t lsn) /*!< in: oldest modification */
91
buf_page_t* bpage) /* in: block which is modified */
322
93
buf_page_t* prev_b;
325
ut_ad(!buf_pool_mutex_own(buf_pool));
326
ut_ad(log_flush_order_mutex_own());
327
ut_ad(mutex_own(&block->mutex));
328
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
330
buf_flush_list_mutex_enter(buf_pool);
332
/* The field in_LRU_list is protected by buf_pool_mutex, which
333
we are not holding. However, while a block is in the flush
334
list, it is dirty and cannot be discarded, not from the
335
page_hash or from the LRU list. At most, the uncompressed
336
page frame of a compressed block may be discarded or created
337
(copying the block->page to or from a buf_page_t that is
338
dynamically allocated from buf_buddy_alloc()). Because those
339
transitions hold block->mutex and the flush list mutex (via
340
buf_flush_relocate_on_flush_list()), there is no possibility
341
of a race condition in the assertions below. */
342
ut_ad(block->page.in_LRU_list);
343
ut_ad(block->page.in_page_hash);
344
/* buf_buddy_block_register() will take a block in the
345
BUF_BLOCK_MEMORY state, not a file page. */
346
ut_ad(!block->page.in_zip_hash);
348
ut_ad(!block->page.in_flush_list);
349
ut_d(block->page.in_flush_list = TRUE);
350
block->page.oldest_modification = lsn;
352
#ifdef UNIV_DEBUG_VALGRIND
354
ulint zip_size = buf_block_get_zip_size(block);
356
if (UNIV_UNLIKELY(zip_size)) {
357
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
359
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
362
#endif /* UNIV_DEBUG_VALGRIND */
364
#ifdef UNIV_DEBUG_VALGRIND
366
ulint zip_size = buf_block_get_zip_size(block);
368
if (UNIV_UNLIKELY(zip_size)) {
369
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
371
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
374
#endif /* UNIV_DEBUG_VALGRIND */
96
ut_ad(buf_pool_mutex_own());
98
switch (buf_page_get_state(bpage)) {
99
case BUF_BLOCK_ZIP_PAGE:
100
mutex_enter(&buf_pool_zip_mutex);
101
buf_page_set_state(bpage, BUF_BLOCK_ZIP_DIRTY);
102
mutex_exit(&buf_pool_zip_mutex);
103
UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
105
case BUF_BLOCK_ZIP_DIRTY:
106
case BUF_BLOCK_FILE_PAGE:
107
ut_ad(bpage->in_LRU_list);
108
ut_ad(bpage->in_page_hash);
109
ut_ad(!bpage->in_zip_hash);
110
ut_ad(!bpage->in_flush_list);
111
ut_d(bpage->in_flush_list = TRUE);
113
case BUF_BLOCK_ZIP_FREE:
114
case BUF_BLOCK_NOT_USED:
115
case BUF_BLOCK_READY_FOR_USE:
116
case BUF_BLOCK_MEMORY:
117
case BUF_BLOCK_REMOVE_HASH:
378
/* For the most part when this function is called the flush_rbt
379
should not be NULL. In a very rare boundary case it is possible
380
that the flush_rbt has already been freed by the recovery thread
381
before the last page was hooked up in the flush_list by the
382
io-handler thread. In that case we'll just do a simple
383
linear search in the else block. */
384
if (buf_pool->flush_rbt) {
386
prev_b = buf_flush_insert_in_flush_rbt(&block->page);
390
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
392
while (b && b->oldest_modification
393
> block->page.oldest_modification) {
394
ut_ad(b->in_flush_list);
396
b = UT_LIST_GET_NEXT(list, b);
123
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
125
while (b && b->oldest_modification > bpage->oldest_modification) {
126
ut_ad(b->in_flush_list);
128
b = UT_LIST_GET_NEXT(list, b);
400
131
if (prev_b == NULL) {
401
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
132
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, bpage);
403
134
UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
404
prev_b, &block->page);
407
138
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
408
ut_a(buf_flush_validate_low(buf_pool));
139
ut_a(buf_flush_validate_low());
409
140
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
411
buf_flush_list_mutex_exit(buf_pool);
414
/********************************************************************//**
143
/************************************************************************
415
144
Returns TRUE if the file page block is immediately suitable for replacement,
416
i.e., the transition FILE_PAGE => NOT_USED allowed.
417
@return TRUE if can replace immediately */
145
i.e., the transition FILE_PAGE => NOT_USED allowed. */
420
148
buf_flush_ready_for_replace(
421
149
/*========================*/
422
buf_page_t* bpage) /*!< in: buffer control block, must be
150
/* out: TRUE if can replace immediately */
151
buf_page_t* bpage) /* in: buffer control block, must be
423
152
buf_page_in_file(bpage) and in the LRU list */
426
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
427
ut_ad(buf_pool_mutex_own(buf_pool));
154
ut_ad(buf_pool_mutex_own());
429
155
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
430
156
ut_ad(bpage->in_LRU_list);
526
/* If the flush_rbt is active then delete from there as well. */
527
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
528
buf_flush_delete_from_flush_rbt(bpage);
531
/* Must be done after we have removed it from the flush_rbt
532
because we assert on in_flush_list in comparison function. */
533
ut_d(bpage->in_flush_list = FALSE);
535
245
bpage->oldest_modification = 0;
537
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
538
ut_a(buf_flush_validate_low(buf_pool));
539
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
541
buf_flush_list_mutex_exit(buf_pool);
544
/*******************************************************************//**
545
Relocates a buffer control block on the flush_list.
546
Note that it is assumed that the contents of bpage have already been
548
IMPORTANT: When this function is called bpage and dpage are not
549
exact copies of each other. For example, they both will have different
550
::state. Also the ::list pointers in dpage may be stale. We need to
551
use the current list node (bpage) to do the list manipulation because
552
the list pointers could have changed between the time that we copied
553
the contents of bpage to the dpage and the flush list manipulation
557
buf_flush_relocate_on_flush_list(
558
/*=============================*/
559
buf_page_t* bpage, /*!< in/out: control block being moved */
560
buf_page_t* dpage) /*!< in/out: destination block */
563
buf_page_t* prev_b = NULL;
564
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
566
ut_ad(buf_pool_mutex_own(buf_pool));
567
/* Must reside in the same buffer pool. */
568
ut_ad(buf_pool == buf_pool_from_bpage(dpage));
570
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
572
buf_flush_list_mutex_enter(buf_pool);
574
/* FIXME: At this point we have both buf_pool and flush_list
575
mutexes. Theoretically removal of a block from flush list is
576
only covered by flush_list mutex but currently we do
577
have buf_pool mutex in buf_flush_remove() therefore this block
578
is guaranteed to be in the flush list. We need to check if
579
this will work without the assumption of block removing code
580
having the buf_pool mutex. */
581
ut_ad(bpage->in_flush_list);
582
ut_ad(dpage->in_flush_list);
584
/* If recovery is active we must swap the control blocks in
585
the flush_rbt as well. */
586
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
587
buf_flush_delete_from_flush_rbt(bpage);
588
prev_b = buf_flush_insert_in_flush_rbt(dpage);
591
/* Must be done after we have removed it from the flush_rbt
592
because we assert on in_flush_list in comparison function. */
593
ut_d(bpage->in_flush_list = FALSE);
595
prev = UT_LIST_GET_PREV(list, bpage);
596
UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
599
ut_ad(prev->in_flush_list);
600
UT_LIST_INSERT_AFTER(
602
buf_pool->flush_list,
607
buf_pool->flush_list,
611
/* Just an extra check. Previous in flush_list
612
should be the same control block as in flush_rbt. */
613
ut_a(!buf_pool->flush_rbt || prev_b == prev);
615
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
616
ut_a(buf_flush_validate_low(buf_pool));
617
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
619
buf_flush_list_mutex_exit(buf_pool);
622
/********************************************************************//**
247
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list));
250
/************************************************************************
623
251
Updates the flush system data structures when a write is completed. */
626
254
buf_flush_write_complete(
627
255
/*=====================*/
628
buf_page_t* bpage) /*!< in: pointer to the block in question */
256
buf_page_t* bpage) /* in: pointer to the block in question */
630
258
enum buf_flush flush_type;
631
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1155
/********************************************************************//**
1156
Writes a flushable page asynchronously from the buffer pool to a file.
1157
NOTE: in simulated aio we must call
1158
os_aio_simulated_wake_handler_threads after we have posted a batch of
1159
writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
1160
held upon entering this function, and they will be released by this
750
/************************************************************************
751
Writes a page asynchronously from the buffer buf_pool to a file, if it can be
752
found in the buf_pool and it is in a flushable state. NOTE: in simulated aio
753
we must call os_aio_simulated_wake_handler_threads after we have posted a batch
1166
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1167
buf_page_t* bpage, /*!< in: buffer control block */
1168
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
1169
or BUF_FLUSH_LIST */
759
/* out: 1 if a page was
760
flushed, 0 otherwise */
761
ulint space, /* in: space id */
762
ulint offset, /* in: page offset */
763
enum buf_flush flush_type) /* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST,
764
or BUF_FLUSH_SINGLE_PAGE */
1171
767
mutex_t* block_mutex;
1172
ibool is_uncompressed;
1174
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1175
ut_ad(buf_pool_mutex_own(buf_pool));
1176
ut_ad(buf_page_in_file(bpage));
770
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
771
|| flush_type == BUF_FLUSH_SINGLE_PAGE);
773
buf_pool_mutex_enter();
775
bpage = buf_page_hash_get(space, offset);
778
buf_pool_mutex_exit();
782
ut_a(buf_page_in_file(bpage));
1178
783
block_mutex = buf_page_get_mutex(bpage);
1179
ut_ad(mutex_own(block_mutex));
1181
ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
1183
buf_page_set_io_fix(bpage, BUF_IO_WRITE);
1185
buf_page_set_flush_type(bpage, flush_type);
1187
if (buf_pool->n_flush[flush_type] == 0) {
1189
os_event_reset(buf_pool->no_flush[flush_type]);
785
mutex_enter(block_mutex);
787
if (!buf_flush_ready_for_flush(bpage, flush_type)) {
788
mutex_exit(block_mutex);
789
buf_pool_mutex_exit();
1192
buf_pool->n_flush[flush_type]++;
1194
is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
1195
ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
1197
793
switch (flush_type) {
1199
794
case BUF_FLUSH_LIST:
795
buf_page_set_io_fix(bpage, BUF_IO_WRITE);
797
buf_page_set_flush_type(bpage, flush_type);
799
if (buf_pool->n_flush[flush_type] == 0) {
801
os_event_reset(buf_pool->no_flush[flush_type]);
804
buf_pool->n_flush[flush_type]++;
1200
806
/* If the simulated aio thread is not running, we must
1201
807
not wait for any latch, as we may end up in a deadlock:
1202
808
if buf_fix_count == 0, then we know we need not wait */
1204
is_s_latched = (bpage->buf_fix_count == 0);
1205
if (is_s_latched && is_uncompressed) {
810
locked = bpage->buf_fix_count == 0;
812
&& buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
1206
813
rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
1210
817
mutex_exit(block_mutex);
1211
buf_pool_mutex_exit(buf_pool);
1213
/* Even though bpage is not protected by any mutex at
1214
this point, it is safe to access bpage, because it is
1215
io_fixed and oldest_modification != 0. Thus, it
1216
cannot be relocated in the buffer pool or removed from
1217
flush_list or LRU_list. */
1219
if (!is_s_latched) {
818
buf_pool_mutex_exit();
1220
821
buf_flush_buffered_writes();
1222
if (is_uncompressed) {
823
if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
1223
824
rw_lock_s_lock_gen(&((buf_block_t*) bpage)
1224
825
->lock, BUF_IO_WRITE);
1365
967
if (buf_flush_ready_for_flush(bpage, flush_type)
1366
968
&& (i == offset || !bpage->buf_fix_count)) {
1367
969
/* We only try to flush those
1368
neighbors != offset where the buf fix
1369
count is zero, as we then know that we
1370
probably can latch the page without a
1371
semaphore wait. Semaphore waits are
1372
expensive because we must flush the
1373
doublewrite buffer before we start
970
neighbors != offset where the buf fix count is
971
zero, as we then know that we probably can
972
latch the page without a semaphore wait.
973
Semaphore waits are expensive because we must
974
flush the doublewrite buffer before we start
1376
buf_flush_page(buf_pool, bpage, flush_type);
1377
ut_ad(!mutex_own(block_mutex));
1378
ut_ad(!buf_pool_mutex_own(buf_pool));
977
buf_pool_mutex_exit();
979
mutex_exit(block_mutex);
981
/* Note: as we release the buf_pool mutex
982
above, in buf_flush_try_page we cannot be sure
983
the page is still in a flushable state:
984
therefore we check it again inside that
987
count += buf_flush_try_page(space, i,
990
buf_pool_mutex_enter();
1382
992
mutex_exit(block_mutex);
1385
buf_pool_mutex_exit(buf_pool);
1391
/********************************************************************//**
1392
Check if the block is modified and ready for flushing. If the the block
1393
is ready to flush then flush the page and try o flush its neighbors.
1395
@return TRUE if buf_pool mutex was not released during this function.
1396
This does not guarantee that some pages were written as well.
1397
Number of pages written are incremented to the count. */
1400
buf_flush_page_and_try_neighbors(
1401
/*=============================*/
1402
buf_page_t* bpage, /*!< in: buffer control block,
1404
buf_page_in_file(bpage) */
1405
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU
1406
or BUF_FLUSH_LIST */
1407
ulint n_to_flush, /*!< in: number of pages to
1409
ulint* count) /*!< in/out: number of pages
1412
mutex_t* block_mutex;
1413
ibool flushed = FALSE;
1415
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1416
#endif /* UNIV_DEBUG */
1418
ut_ad(buf_pool_mutex_own(buf_pool));
1420
block_mutex = buf_page_get_mutex(bpage);
1421
mutex_enter(block_mutex);
1423
ut_a(buf_page_in_file(bpage));
1425
if (buf_flush_ready_for_flush(bpage, flush_type)) {
1428
buf_pool_t* buf_pool;
1430
buf_pool = buf_pool_from_bpage(bpage);
1432
buf_pool_mutex_exit(buf_pool);
1434
/* These fields are protected by both the
1435
buffer pool mutex and block mutex. */
1436
space = buf_page_get_space(bpage);
1437
offset = buf_page_get_page_no(bpage);
1439
mutex_exit(block_mutex);
1441
/* Try to flush also all the neighbors */
1442
*count += buf_flush_try_neighbors(space,
1448
buf_pool_mutex_enter(buf_pool);
1451
mutex_exit(block_mutex);
1454
ut_ad(buf_pool_mutex_own(buf_pool));
1459
/*******************************************************************//**
1460
This utility flushes dirty blocks from the end of the LRU list.
1461
In the case of an LRU flush the calling thread may own latches to
1462
pages: to avoid deadlocks, this function must be written so that it
1463
cannot end up waiting for these latches!
1464
@return number of blocks for which the write request was queued. */
1467
buf_flush_LRU_list_batch(
1468
/*=====================*/
1469
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1470
ulint max) /*!< in: max of blocks to flush */
1475
ut_ad(buf_pool_mutex_own(buf_pool));
1478
/* Start from the end of the list looking for a
1479
suitable block to be flushed. */
1480
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1482
/* Iterate backwards over the flush list till we find
1483
a page that isn't ready for flushing. */
1484
while (bpage != NULL
1485
&& !buf_flush_page_and_try_neighbors(
1486
bpage, BUF_FLUSH_LRU, max, &count)) {
1488
bpage = UT_LIST_GET_PREV(LRU, bpage);
1490
} while (bpage != NULL && count < max);
1492
/* We keep track of all flushes happening as part of LRU
1493
flush. When estimating the desired rate at which flush_list
1494
should be flushed, we factor in this value. */
1495
buf_lru_flush_page_count += count;
1497
ut_ad(buf_pool_mutex_own(buf_pool));
1502
/*******************************************************************//**
1503
This utility flushes dirty blocks from the end of the flush_list.
1504
the calling thread is not allowed to own any latches on pages!
1505
@return number of blocks for which the write request was queued;
1506
ULINT_UNDEFINED if there was a flush of the same type already
1510
buf_flush_flush_list_batch(
1511
/*=======================*/
1512
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1513
ulint min_n, /*!< in: wished minimum mumber
1514
of blocks flushed (it is not
1515
guaranteed that the actual
1516
number is that big, though) */
1517
ib_uint64_t lsn_limit) /*!< all blocks whose
1518
oldest_modification is smaller
1519
than this should be flushed (if
1520
their number does not exceed
1527
ut_ad(buf_pool_mutex_own(buf_pool));
1529
/* If we have flushed enough, leave the loop */
1531
/* Start from the end of the list looking for a suitable
1532
block to be flushed. */
1534
buf_flush_list_mutex_enter(buf_pool);
1536
/* We use len here because theoretically insertions can
1537
happen in the flush_list below while we are traversing
1538
it for a suitable candidate for flushing. We'd like to
1539
set a limit on how farther we are willing to traverse
1541
len = UT_LIST_GET_LEN(buf_pool->flush_list);
1542
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1545
ut_a(bpage->oldest_modification > 0);
1548
if (!bpage || bpage->oldest_modification >= lsn_limit) {
1550
/* We have flushed enough */
1551
buf_flush_list_mutex_exit(buf_pool);
1555
ut_a(bpage->oldest_modification > 0);
1557
ut_ad(bpage->in_flush_list);
1559
buf_flush_list_mutex_exit(buf_pool);
1561
/* The list may change during the flushing and we cannot
1562
safely preserve within this function a pointer to a
1563
block in the list! */
1564
while (bpage != NULL
1566
&& !buf_flush_page_and_try_neighbors(
1567
bpage, BUF_FLUSH_LIST, min_n, &count)) {
1569
buf_flush_list_mutex_enter(buf_pool);
1571
/* If we are here that means that buf_pool->mutex
1572
was not released in buf_flush_page_and_try_neighbors()
1573
above and this guarantees that bpage didn't get
1574
relocated since we released the flush_list
1575
mutex above. There is a chance, however, that
1576
the bpage got removed from flush_list (not
1577
currently possible because flush_list_remove()
1578
also obtains buf_pool mutex but that may change
1579
in future). To avoid this scenario we check
1580
the oldest_modification and if it is zero
1581
we start all over again. */
1582
if (bpage->oldest_modification == 0) {
1583
buf_flush_list_mutex_exit(buf_pool);
1587
bpage = UT_LIST_GET_PREV(list, bpage);
1589
ut_ad(!bpage || bpage->in_flush_list);
1591
buf_flush_list_mutex_exit(buf_pool);
1596
} while (count < min_n && bpage != NULL && len > 0);
1598
ut_ad(buf_pool_mutex_own(buf_pool));
1603
/*******************************************************************//**
997
buf_pool_mutex_exit();
1002
/***********************************************************************
1604
1003
This utility flushes dirty blocks from the end of the LRU list or flush_list.
1605
1004
NOTE 1: in the case of an LRU flush the calling thread may own latches to
1606
1005
pages: to avoid deadlocks, this function must be written so that it cannot
1607
1006
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
1608
the calling thread is not allowed to own any latches on pages!
1609
@return number of blocks for which the write request was queued;
1610
ULINT_UNDEFINED if there was a flush of the same type already running */
1007
the calling thread is not allowed to own any latches on pages! */
1613
1010
buf_flush_batch(
1614
1011
/*============*/
1615
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1616
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
1012
/* out: number of blocks for which the
1013
write request was queued;
1014
ULINT_UNDEFINED if there was a flush
1015
of the same type already running */
1016
enum buf_flush flush_type, /* in: BUF_FLUSH_LRU or
1617
1017
BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
1618
1018
then the caller must not own any
1619
1019
latches on pages */
1620
ulint min_n, /*!< in: wished minimum mumber of blocks
1020
ulint min_n, /* in: wished minimum mumber of blocks
1621
1021
flushed (it is not guaranteed that the
1622
1022
actual number is that big, though) */
1623
ib_uint64_t lsn_limit) /*!< in: in the case of BUF_FLUSH_LIST
1624
all blocks whose oldest_modification is
1023
ib_uint64_t lsn_limit) /* in the case BUF_FLUSH_LIST all
1024
blocks whose oldest_modification is
1625
1025
smaller than this should be flushed
1626
1026
(if their number does not exceed
1627
1027
min_n), otherwise ignored */
1030
ulint page_count = 0;
1031
ulint old_page_count;
1631
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1035
ut_ad((flush_type == BUF_FLUSH_LRU)
1036
|| (flush_type == BUF_FLUSH_LIST));
1632
1037
#ifdef UNIV_SYNC_DEBUG
1633
1038
ut_ad((flush_type != BUF_FLUSH_LIST)
1634
1039
|| sync_thread_levels_empty_gen(TRUE));
1635
1040
#endif /* UNIV_SYNC_DEBUG */
1637
buf_pool_mutex_enter(buf_pool);
1639
/* Note: The buffer pool mutex is released and reacquired within
1640
the flush functions. */
1641
switch(flush_type) {
1643
count = buf_flush_LRU_list_batch(buf_pool, min_n);
1645
case BUF_FLUSH_LIST:
1646
count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
1652
buf_pool_mutex_exit(buf_pool);
1654
buf_flush_buffered_writes();
1657
if (buf_debug_prints && count > 0) {
1658
fprintf(stderr, flush_type == BUF_FLUSH_LRU
1659
? "Flushed %lu pages in LRU flush\n"
1660
: "Flushed %lu pages in flush list flush\n",
1663
#endif /* UNIV_DEBUG */
1665
srv_buf_pool_flushed += count;
1670
/******************************************************************//**
1671
Gather the aggregated stats for both flush list and LRU list flushing */
1676
enum buf_flush flush_type, /*!< in: type of flush */
1677
ulint page_count) /*!< in: number of pages flushed */
1679
buf_flush_buffered_writes();
1681
ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1041
buf_pool_mutex_enter();
1043
if ((buf_pool->n_flush[flush_type] > 0)
1044
|| (buf_pool->init_flush[flush_type] == TRUE)) {
1046
/* There is already a flush batch of the same type running */
1048
buf_pool_mutex_exit();
1050
return(ULINT_UNDEFINED);
1053
buf_pool->init_flush[flush_type] = TRUE;
1057
/* If we have flushed enough, leave the loop */
1058
if (page_count >= min_n) {
1063
/* Start from the end of the list looking for a suitable
1064
block to be flushed. */
1066
if (flush_type == BUF_FLUSH_LRU) {
1067
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1069
ut_ad(flush_type == BUF_FLUSH_LIST);
1071
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1073
|| bpage->oldest_modification >= lsn_limit) {
1074
/* We have flushed enough */
1078
ut_ad(bpage->in_flush_list);
1081
/* Note that after finding a single flushable page, we try to
1082
flush also all its neighbors, and after that start from the
1083
END of the LRU list or flush list again: the list may change
1084
during the flushing and we cannot safely preserve within this
1085
function a pointer to a block in the list! */
1088
mutex_t* block_mutex = buf_page_get_mutex(bpage);
1090
ut_a(buf_page_in_file(bpage));
1092
mutex_enter(block_mutex);
1094
if (buf_flush_ready_for_flush(bpage, flush_type)) {
1096
space = buf_page_get_space(bpage);
1097
offset = buf_page_get_page_no(bpage);
1099
buf_pool_mutex_exit();
1100
mutex_exit(block_mutex);
1102
old_page_count = page_count;
1104
/* Try to flush also all the neighbors */
1105
page_count += buf_flush_try_neighbors(
1106
space, offset, flush_type);
1108
"Flush type %lu, page no %lu, neighb %lu\n",
1110
page_count - old_page_count); */
1112
buf_pool_mutex_enter();
1115
} else if (flush_type == BUF_FLUSH_LRU) {
1117
mutex_exit(block_mutex);
1119
bpage = UT_LIST_GET_PREV(LRU, bpage);
1121
ut_ad(flush_type == BUF_FLUSH_LIST);
1123
mutex_exit(block_mutex);
1125
bpage = UT_LIST_GET_PREV(list, bpage);
1126
ut_ad(!bpage || bpage->in_flush_list);
1128
} while (bpage != NULL);
1130
/* If we could not find anything to flush, leave the loop */
1135
buf_pool->init_flush[flush_type] = FALSE;
1137
if ((buf_pool->n_flush[flush_type] == 0)
1138
&& (buf_pool->init_flush[flush_type] == FALSE)) {
1140
/* The running flush batch has ended */
1142
os_event_set(buf_pool->no_flush[flush_type]);
1145
buf_pool_mutex_exit();
1147
buf_flush_buffered_writes();
1683
1149
#ifdef UNIV_DEBUG
1684
1150
if (buf_debug_prints && page_count > 0) {
1151
ut_a(flush_type == BUF_FLUSH_LRU
1152
|| flush_type == BUF_FLUSH_LIST);
1685
1153
fprintf(stderr, flush_type == BUF_FLUSH_LRU
1686
1154
? "Flushed %lu pages in LRU flush\n"
1687
1155
: "Flushed %lu pages in flush list flush\n",
1692
1160
srv_buf_pool_flushed += page_count;
1694
if (flush_type == BUF_FLUSH_LRU) {
1695
/* We keep track of all flushes happening as part of LRU
1696
flush. When estimating the desired rate at which flush_list
1697
should be flushed we factor in this value. */
1698
buf_lru_flush_page_count += page_count;
1702
/******************************************************************//**
1703
Start a buffer flush batch for LRU or flush list */
1708
buf_pool_t* buf_pool, /*!< buffer pool instance */
1709
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
1710
or BUF_FLUSH_LIST */
1712
buf_pool_mutex_enter(buf_pool);
1714
if (buf_pool->n_flush[flush_type] > 0
1715
|| buf_pool->init_flush[flush_type] == TRUE) {
1717
/* There is already a flush batch of the same type running */
1719
buf_pool_mutex_exit(buf_pool);
1724
buf_pool->init_flush[flush_type] = TRUE;
1726
buf_pool_mutex_exit(buf_pool);
1731
/******************************************************************//**
1732
End a buffer flush batch for LRU or flush list */
1737
buf_pool_t* buf_pool, /*!< buffer pool instance */
1738
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
1739
or BUF_FLUSH_LIST */
1741
buf_pool_mutex_enter(buf_pool);
1743
buf_pool->init_flush[flush_type] = FALSE;
1745
if (buf_pool->n_flush[flush_type] == 0) {
1747
/* The running flush batch has ended */
1749
os_event_set(buf_pool->no_flush[flush_type]);
1752
buf_pool_mutex_exit(buf_pool);
1755
/******************************************************************//**
1165
/**********************************************************************
1756
1166
Waits until a flush batch of the given type ends */
1759
1169
buf_flush_wait_batch_end(
1760
1170
/*=====================*/
1761
buf_pool_t* buf_pool, /*!< buffer pool instance */
1762
enum buf_flush type) /*!< in: BUF_FLUSH_LRU
1763
or BUF_FLUSH_LIST */
1765
ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
1767
if (buf_pool == NULL) {
1770
for (i = 0; i < srv_buf_pool_instances; ++i) {
1771
buf_pool_t* buf_pool;
1773
buf_pool = buf_pool_from_array(i);
1775
os_event_wait(buf_pool->no_flush[type]);
1778
os_event_wait(buf_pool->no_flush[type]);
1782
/*******************************************************************//**
1783
This utility flushes dirty blocks from the end of the LRU list.
1784
NOTE: The calling thread may own latches to pages: to avoid deadlocks,
1785
this function must be written so that it cannot end up waiting for these
1787
@return number of blocks for which the write request was queued;
1788
ULINT_UNDEFINED if there was a flush of the same type already running */
1793
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1794
ulint min_n) /*!< in: wished minimum mumber of blocks
1795
flushed (it is not guaranteed that the
1796
actual number is that big, though) */
1800
if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
1801
return(ULINT_UNDEFINED);
1804
page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
1806
buf_flush_end(buf_pool, BUF_FLUSH_LRU);
1808
buf_flush_common(BUF_FLUSH_LRU, page_count);
1813
/*******************************************************************//**
1814
This utility flushes dirty blocks from the end of the flush list of
1815
all buffer pool instances.
1816
NOTE: The calling thread is not allowed to own any latches on pages!
1817
@return number of blocks for which the write request was queued;
1818
ULINT_UNDEFINED if there was a flush of the same type already running */
1823
ulint min_n, /*!< in: wished minimum mumber of blocks
1824
flushed (it is not guaranteed that the
1825
actual number is that big, though) */
1826
ib_uint64_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all
1827
blocks whose oldest_modification is
1828
smaller than this should be flushed
1829
(if their number does not exceed
1830
min_n), otherwise ignored */
1833
ulint total_page_count = 0;
1834
ibool skipped = FALSE;
1836
if (min_n != ULINT_MAX) {
1837
/* Ensure that flushing is spread evenly amongst the
1838
buffer pool instances. When min_n is ULINT_MAX
1839
we need to flush everything up to the lsn limit
1840
so no limit here. */
1841
min_n = (min_n + srv_buf_pool_instances - 1)
1842
/ srv_buf_pool_instances;
1845
/* Flush to lsn_limit in all buffer pool instances */
1846
for (i = 0; i < srv_buf_pool_instances; i++) {
1847
buf_pool_t* buf_pool;
1848
ulint page_count = 0;
1850
buf_pool = buf_pool_from_array(i);
1852
if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
1853
/* We have two choices here. If lsn_limit was
1854
specified then skipping an instance of buffer
1855
pool means we cannot guarantee that all pages
1856
up to lsn_limit has been flushed. We can
1857
return right now with failure or we can try
1858
to flush remaining buffer pools up to the
1859
lsn_limit. We attempt to flush other buffer
1860
pools based on the assumption that it will
1861
help in the retry which will follow the
1868
page_count = buf_flush_batch(
1869
buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
1871
buf_flush_end(buf_pool, BUF_FLUSH_LIST);
1873
buf_flush_common(BUF_FLUSH_LIST, page_count);
1875
total_page_count += page_count;
1878
return(lsn_limit != IB_ULONGLONG_MAX && skipped
1879
? ULINT_UNDEFINED : total_page_count);
1882
/******************************************************************//**
1171
enum buf_flush type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1173
ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
1175
os_event_wait(buf_pool->no_flush[type]);
1178
/**********************************************************************
1883
1179
Gives a recommendation of how many blocks should be flushed to establish
1884
1180
a big enough margin of replaceable blocks near the end of the LRU list
1885
and in the free list.
1886
@return number of blocks which should be flushed from the end of the
1181
and in the free list. */
1890
buf_flush_LRU_recommendation(
1891
/*=========================*/
1892
buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
1184
buf_flush_LRU_recommendation(void)
1185
/*==============================*/
1186
/* out: number of blocks which should be flushed
1187
from the end of the LRU list */
1894
1189
buf_page_t* bpage;
1895
1190
ulint n_replaceable;
1896
1191
ulint distance = 0;
1898
buf_pool_mutex_enter(buf_pool);
1193
buf_pool_mutex_enter();
1900
1195
n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
1902
1197
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1904
1199
while ((bpage != NULL)
1905
&& (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
1906
+ BUF_FLUSH_EXTRA_MARGIN(buf_pool))
1907
&& (distance < BUF_LRU_FREE_SEARCH_LEN(buf_pool))) {
1200
&& (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
1201
+ BUF_FLUSH_EXTRA_MARGIN)
1202
&& (distance < BUF_LRU_FREE_SEARCH_LEN)) {
1909
1204
mutex_t* block_mutex = buf_page_get_mutex(bpage);
1941
1235
immediately, without waiting. */
1944
buf_flush_free_margin(
1945
/*==================*/
1946
buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
1238
buf_flush_free_margin(void)
1239
/*=======================*/
1948
1241
ulint n_to_flush;
1950
n_to_flush = buf_flush_LRU_recommendation(buf_pool);
1244
n_to_flush = buf_flush_LRU_recommendation();
1952
1246
if (n_to_flush > 0) {
1955
n_flushed = buf_flush_LRU(buf_pool, n_to_flush);
1247
n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
1957
1248
if (n_flushed == ULINT_UNDEFINED) {
1958
1249
/* There was an LRU type flush batch already running;
1959
1250
let us wait for it to end */
1961
buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
1252
buf_flush_wait_batch_end(BUF_FLUSH_LRU);
1966
/*********************************************************************//**
1967
Flushes pages from the end of all the LRU lists. */
1970
buf_flush_free_margins(void)
1971
/*========================*/
1975
for (i = 0; i < srv_buf_pool_instances; i++) {
1976
buf_pool_t* buf_pool;
1978
buf_pool = buf_pool_from_array(i);
1980
buf_flush_free_margin(buf_pool);
1984
/*********************************************************************
1985
Update the historical stats that we are collecting for flush rate
1986
heuristics at the end of each interval.
1987
Flush rate heuristic depends on (a) rate of redo log generation and
1988
(b) the rate at which LRU flush is happening. */
1991
buf_flush_stat_update(void)
1992
/*=======================*/
1994
buf_flush_stat_t* item;
1995
ib_uint64_t lsn_diff;
1999
lsn = log_get_lsn();
2000
if (buf_flush_stat_cur.redo == 0) {
2001
/* First time around. Just update the current LSN
2003
buf_flush_stat_cur.redo = lsn;
2007
item = &buf_flush_stat_arr[buf_flush_stat_arr_ind];
2009
/* values for this interval */
2010
lsn_diff = lsn - buf_flush_stat_cur.redo;
2011
n_flushed = buf_lru_flush_page_count
2012
- buf_flush_stat_cur.n_flushed;
2014
/* add the current value and subtract the obsolete entry. */
2015
buf_flush_stat_sum.redo += lsn_diff - item->redo;
2016
buf_flush_stat_sum.n_flushed += n_flushed - item->n_flushed;
2018
/* put current entry in the array. */
2019
item->redo = lsn_diff;
2020
item->n_flushed = n_flushed;
2022
/* update the index */
2023
buf_flush_stat_arr_ind++;
2024
buf_flush_stat_arr_ind %= BUF_FLUSH_STAT_N_INTERVAL;
2026
/* reset the current entry. */
2027
buf_flush_stat_cur.redo = lsn;
2028
buf_flush_stat_cur.n_flushed = buf_lru_flush_page_count;
2031
/*********************************************************************
2032
Determines the fraction of dirty pages that need to be flushed based
2033
on the speed at which we generate redo log. Note that if redo log
2034
is generated at a significant rate without corresponding increase
2035
in the number of dirty pages (for example, an in-memory workload)
2036
it can cause IO bursts of flushing. This function implements heuristics
2037
to avoid this burstiness.
2038
@return number of dirty pages to be flushed / second */
2041
buf_flush_get_desired_flush_rate(void)
2042
/*==================================*/
2049
ulint lru_flush_avg;
2050
ib_uint64_t lsn = log_get_lsn();
2051
ulint log_capacity = log_get_capacity();
2053
/* log_capacity should never be zero after the initialization
2054
of log subsystem. */
2055
ut_ad(log_capacity != 0);
2057
/* Get total number of dirty pages. It is OK to access
2058
flush_list without holding any mutex as we are using this
2059
only for heuristics. */
2060
for (i = 0; i < srv_buf_pool_instances; i++) {
2061
buf_pool_t* buf_pool;
2063
buf_pool = buf_pool_from_array(i);
2064
n_dirty += UT_LIST_GET_LEN(buf_pool->flush_list);
2067
/* An overflow can happen if we generate more than 2^32 bytes
2068
of redo in this interval i.e.: 4G of redo in 1 second. We can
2069
safely consider this as infinity because if we ever come close
2070
to 4G we'll start a synchronous flush of dirty pages. */
2071
/* redo_avg below is average at which redo is generated in
2072
past BUF_FLUSH_STAT_N_INTERVAL + redo generated in the current
2074
redo_avg = (ulint) (buf_flush_stat_sum.redo
2075
/ BUF_FLUSH_STAT_N_INTERVAL
2076
+ (lsn - buf_flush_stat_cur.redo));
2078
/* An overflow can happen possibly if we flush more than 2^32
2079
pages in BUF_FLUSH_STAT_N_INTERVAL. This is a very very
2080
unlikely scenario. Even when this happens it means that our
2081
flush rate will be off the mark. It won't affect correctness
2082
of any subsystem. */
2083
/* lru_flush_avg below is rate at which pages are flushed as
2084
part of LRU flush in past BUF_FLUSH_STAT_N_INTERVAL + the
2085
number of pages flushed in the current interval. */
2086
lru_flush_avg = buf_flush_stat_sum.n_flushed
2087
/ BUF_FLUSH_STAT_N_INTERVAL
2088
+ (buf_lru_flush_page_count
2089
- buf_flush_stat_cur.n_flushed);
2091
n_flush_req = (n_dirty * redo_avg) / log_capacity;
2093
/* The number of pages that we want to flush from the flush
2094
list is the difference between the required rate and the
2095
number of pages that we are historically flushing from the
2097
rate = n_flush_req - lru_flush_avg;
2098
return(rate > 0 ? (ulint) rate : 0);
2101
1257
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2102
/******************************************************************//**
2103
Validates the flush list.
2104
@return TRUE if ok */
1258
/**********************************************************************
1259
Validates the flush list. */
2107
buf_flush_validate_low(
2108
/*===================*/
2109
buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
1262
buf_flush_validate_low(void)
1263
/*========================*/
1264
/* out: TRUE if ok */
2112
const ib_rbt_node_t* rnode = NULL;
2114
ut_ad(buf_flush_list_mutex_own(buf_pool));
2116
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2117
ut_ad(ut_list_node_313->in_flush_list));
1268
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list);
2119
1270
bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2121
/* If we are in recovery mode i.e.: flush_rbt != NULL
2122
then each block in the flush_list must also be present
2123
in the flush_rbt. */
2124
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2125
rnode = rbt_first(buf_pool->flush_rbt);
2128
1272
while (bpage != NULL) {
2129
1273
const ib_uint64_t om = bpage->oldest_modification;
2131
ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
2133
1274
ut_ad(bpage->in_flush_list);
2135
/* A page in flush_list can be in BUF_BLOCK_REMOVE_HASH
2136
state. This happens when a page is in the middle of
2137
being relocated. In that case the original descriptor
2138
can have this state and still be in the flush list
2139
waiting to acquire the flush_list_mutex to complete
2141
ut_a(buf_page_in_file(bpage)
2142
|| buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
1275
ut_a(buf_page_in_file(bpage));
2145
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2146
buf_page_t** prpage;
2149
prpage = rbt_value(buf_page_t*, rnode);
2152
ut_a(*prpage == bpage);
2153
rnode = rbt_next(buf_pool->flush_rbt, rnode);
2156
1278
bpage = UT_LIST_GET_NEXT(list, bpage);
2158
1280
ut_a(!bpage || om >= bpage->oldest_modification);
2161
/* By this time we must have exhausted the traversal of
2162
flush_rbt (if active) as well. */
2163
ut_a(rnode == NULL);
2168
/******************************************************************//**
2169
Validates the flush list.
2170
@return TRUE if ok */
1286
/**********************************************************************
1287
Validates the flush list. */
2175
buf_pool_t* buf_pool) /*!< buffer pool instance */
1290
buf_flush_validate(void)
1291
/*====================*/
1292
/* out: TRUE if ok */
2179
buf_flush_list_mutex_enter(buf_pool);
2181
ret = buf_flush_validate_low(buf_pool);
2183
buf_flush_list_mutex_exit(buf_pool);
1296
buf_pool_mutex_enter();
1298
ret = buf_flush_validate_low();
1300
buf_pool_mutex_exit();
2187
1304
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2188
#endif /* !UNIV_HOTBACKUP */