29
12
#include "buf0flu.ic"
35
#ifndef UNIV_HOTBACKUP
36
16
#include "ut0byte.h"
37
17
#include "ut0lst.h"
38
18
#include "page0page.h"
39
20
#include "fil0fil.h"
40
22
#include "buf0lru.h"
41
23
#include "buf0rea.h"
42
24
#include "ibuf0ibuf.h"
43
25
#include "log0log.h"
44
26
#include "os0file.h"
45
27
#include "trx0sys.h"
30
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
47
31
/**********************************************************************
48
These statistics are generated for heuristics used in estimating the
49
rate at which we should flush the dirty blocks to avoid bursty IO
50
activity. Note that the rate of flushing not only depends on how many
51
dirty pages we have in the buffer pool but it is also a fucntion of
52
how much redo the workload is generating and at what rate. */
55
/** Number of intervals for which we keep the history of these stats.
56
Each interval is 1 second, defined by the rate at which
57
srv_error_monitor_thread() calls buf_flush_stat_update(). */
58
#define BUF_FLUSH_STAT_N_INTERVAL 20
60
/** Sampled values buf_flush_stat_cur.
61
Not protected by any mutex. Updated by buf_flush_stat_update(). */
62
static buf_flush_stat_t buf_flush_stat_arr[BUF_FLUSH_STAT_N_INTERVAL];
64
/** Cursor to buf_flush_stat_arr[]. Updated in a round-robin fashion. */
65
static ulint buf_flush_stat_arr_ind;
67
/** Values at start of the current interval. Reset by
68
buf_flush_stat_update(). */
69
static buf_flush_stat_t buf_flush_stat_cur;
71
/** Running sum of past values of buf_flush_stat_cur.
72
Updated by buf_flush_stat_update(). Not protected by any mutex. */
73
static buf_flush_stat_t buf_flush_stat_sum;
75
/** Number of pages flushed through non flush_list flushes. */
76
static ulint buf_lru_flush_page_count = 0;
80
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
81
/******************************************************************//**
82
Validates the flush list.
32
Validates the flush list. */
86
buf_flush_validate_low(
87
/*===================*/
88
buf_pool_t* buf_pool); /*!< in: Buffer pool instance */
89
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
91
/******************************************************************//**
92
Insert a block in the flush_rbt and returns a pointer to its
93
predecessor or NULL if no predecessor. The ordering is maintained
94
on the basis of the <oldest_modification, space, offset> key.
95
@return pointer to the predecessor or NULL if no predecessor. */
98
buf_flush_insert_in_flush_rbt(
99
/*==========================*/
100
buf_page_t* bpage) /*!< in: bpage to be inserted. */
102
const ib_rbt_node_t* c_node;
103
const ib_rbt_node_t* p_node;
104
buf_page_t* prev = NULL;
105
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
107
ut_ad(buf_flush_list_mutex_own(buf_pool));
109
/* Insert this buffer into the rbt. */
110
c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
111
ut_a(c_node != NULL);
113
/* Get the predecessor. */
114
p_node = rbt_prev(buf_pool->flush_rbt, c_node);
116
if (p_node != NULL) {
118
value = rbt_value(buf_page_t*, p_node);
126
/*********************************************************//**
127
Delete a bpage from the flush_rbt. */
130
buf_flush_delete_from_flush_rbt(
131
/*============================*/
132
buf_page_t* bpage) /*!< in: bpage to be removed. */
136
#endif /* UNIV_DEBUG */
137
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
139
ut_ad(buf_flush_list_mutex_own(buf_pool));
143
#endif /* UNIV_DEBUG */
144
rbt_delete(buf_pool->flush_rbt, &bpage);
148
/*****************************************************************//**
149
Compare two modified blocks in the buffer pool. The key for comparison
151
key = <oldest_modification, space, offset>
152
This comparison is used to maintian ordering of blocks in the
154
Note that for the purpose of flush_rbt, we only need to order blocks
155
on the oldest_modification. The other two fields are used to uniquely
157
@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
162
const void* p1, /*!< in: block1 */
163
const void* p2) /*!< in: block2 */
166
const buf_page_t* b1 = *(const buf_page_t**) p1;
167
const buf_page_t* b2 = *(const buf_page_t**) p2;
169
buf_pool_t* buf_pool = buf_pool_from_bpage(b1);
170
#endif /* UNIV_DEBUG */
175
ut_ad(buf_flush_list_mutex_own(buf_pool));
177
ut_ad(b1->in_flush_list);
178
ut_ad(b2->in_flush_list);
180
if (b2->oldest_modification > b1->oldest_modification) {
182
} else if (b2->oldest_modification < b1->oldest_modification) {
186
/* If oldest_modification is same then decide on the space. */
187
ret = (int)(b2->space - b1->space);
189
/* Or else decide ordering on the offset field. */
190
return(ret ? ret : (int)(b2->offset - b1->offset));
193
/********************************************************************//**
194
Initialize the red-black tree to speed up insertions into the flush_list
195
during recovery process. Should be called at the start of recovery
196
process before any page has been read/written. */
199
buf_flush_init_flush_rbt(void)
200
/*==========================*/
204
for (i = 0; i < srv_buf_pool_instances; i++) {
205
buf_pool_t* buf_pool;
207
buf_pool = buf_pool_from_array(i);
209
buf_flush_list_mutex_enter(buf_pool);
211
/* Create red black tree for speedy insertions in flush list. */
212
buf_pool->flush_rbt = rbt_create(
213
sizeof(buf_page_t*), buf_flush_block_cmp);
215
buf_flush_list_mutex_exit(buf_pool);
219
/********************************************************************//**
220
Frees up the red-black tree. */
223
buf_flush_free_flush_rbt(void)
224
/*==========================*/
228
for (i = 0; i < srv_buf_pool_instances; i++) {
229
buf_pool_t* buf_pool;
231
buf_pool = buf_pool_from_array(i);
233
buf_flush_list_mutex_enter(buf_pool);
235
#ifdef UNIV_DEBUG_VALGRIND
237
ulint zip_size = buf_block_get_zip_size(block);
239
if (UNIV_UNLIKELY(zip_size)) {
240
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
242
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
245
#endif /* UNIV_DEBUG_VALGRIND */
246
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
247
ut_a(buf_flush_validate_low(buf_pool));
248
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
250
rbt_free(buf_pool->flush_rbt);
251
buf_pool->flush_rbt = NULL;
253
buf_flush_list_mutex_exit(buf_pool);
257
/********************************************************************//**
35
buf_flush_validate_low(void);
36
/*========================*/
38
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
40
/************************************************************************
258
41
Inserts a modified block into the flush list. */
261
44
buf_flush_insert_into_flush_list(
262
45
/*=============================*/
263
buf_pool_t* buf_pool, /*!< buffer pool instance */
264
buf_block_t* block, /*!< in/out: block which is modified */
265
ib_uint64_t lsn) /*!< in: oldest modification */
46
buf_page_t* bpage) /* in: block which is modified */
267
ut_ad(!buf_pool_mutex_own(buf_pool));
268
ut_ad(log_flush_order_mutex_own());
269
ut_ad(mutex_own(&block->mutex));
271
buf_flush_list_mutex_enter(buf_pool);
48
ut_ad(buf_pool_mutex_own());
273
49
ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
274
50
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
51
<= bpage->oldest_modification));
277
/* If we are in the recovery then we need to update the flush
278
red-black tree as well. */
279
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
280
buf_flush_list_mutex_exit(buf_pool);
281
buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
53
switch (buf_page_get_state(bpage)) {
54
case BUF_BLOCK_ZIP_PAGE:
55
mutex_enter(&buf_pool_zip_mutex);
56
buf_page_set_state(bpage, BUF_BLOCK_ZIP_DIRTY);
57
mutex_exit(&buf_pool_zip_mutex);
58
UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
60
case BUF_BLOCK_ZIP_DIRTY:
61
case BUF_BLOCK_FILE_PAGE:
62
ut_ad(bpage->in_LRU_list);
63
ut_ad(bpage->in_page_hash);
64
ut_ad(!bpage->in_zip_hash);
65
ut_ad(!bpage->in_flush_list);
66
ut_d(bpage->in_flush_list = TRUE);
67
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, bpage);
69
case BUF_BLOCK_ZIP_FREE:
70
case BUF_BLOCK_NOT_USED:
71
case BUF_BLOCK_READY_FOR_USE:
72
case BUF_BLOCK_MEMORY:
73
case BUF_BLOCK_REMOVE_HASH:
285
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
286
ut_ad(!block->page.in_flush_list);
288
ut_d(block->page.in_flush_list = TRUE);
289
block->page.oldest_modification = lsn;
290
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
292
#ifdef UNIV_DEBUG_VALGRIND
294
ulint zip_size = buf_block_get_zip_size(block);
296
if (UNIV_UNLIKELY(zip_size)) {
297
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
299
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
302
#endif /* UNIV_DEBUG_VALGRIND */
303
78
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
304
ut_a(buf_flush_validate_low(buf_pool));
79
ut_a(buf_flush_validate_low());
305
80
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
307
buf_flush_list_mutex_exit(buf_pool);
310
/********************************************************************//**
83
/************************************************************************
311
84
Inserts a modified block into the flush list in the right sorted position.
312
85
This function is used by recovery, because there the modifications do not
313
86
necessarily come in the order of lsn's. */
316
89
buf_flush_insert_sorted_into_flush_list(
317
90
/*====================================*/
318
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
319
buf_block_t* block, /*!< in/out: block which is modified */
320
ib_uint64_t lsn) /*!< in: oldest modification */
91
buf_page_t* bpage) /* in: block which is modified */
322
93
buf_page_t* prev_b;
325
ut_ad(!buf_pool_mutex_own(buf_pool));
326
ut_ad(log_flush_order_mutex_own());
327
ut_ad(mutex_own(&block->mutex));
328
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
330
buf_flush_list_mutex_enter(buf_pool);
332
/* The field in_LRU_list is protected by buf_pool->mutex, which
333
we are not holding. However, while a block is in the flush
334
list, it is dirty and cannot be discarded, not from the
335
page_hash or from the LRU list. At most, the uncompressed
336
page frame of a compressed block may be discarded or created
337
(copying the block->page to or from a buf_page_t that is
338
dynamically allocated from buf_buddy_alloc()). Because those
339
transitions hold block->mutex and the flush list mutex (via
340
buf_flush_relocate_on_flush_list()), there is no possibility
341
of a race condition in the assertions below. */
342
ut_ad(block->page.in_LRU_list);
343
ut_ad(block->page.in_page_hash);
344
/* buf_buddy_block_register() will take a block in the
345
BUF_BLOCK_MEMORY state, not a file page. */
346
ut_ad(!block->page.in_zip_hash);
348
ut_ad(!block->page.in_flush_list);
349
ut_d(block->page.in_flush_list = TRUE);
350
block->page.oldest_modification = lsn;
352
#ifdef UNIV_DEBUG_VALGRIND
354
ulint zip_size = buf_block_get_zip_size(block);
356
if (UNIV_UNLIKELY(zip_size)) {
357
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
359
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
362
#endif /* UNIV_DEBUG_VALGRIND */
364
#ifdef UNIV_DEBUG_VALGRIND
366
ulint zip_size = buf_block_get_zip_size(block);
368
if (UNIV_UNLIKELY(zip_size)) {
369
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
371
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
374
#endif /* UNIV_DEBUG_VALGRIND */
96
ut_ad(buf_pool_mutex_own());
98
switch (buf_page_get_state(bpage)) {
99
case BUF_BLOCK_ZIP_PAGE:
100
mutex_enter(&buf_pool_zip_mutex);
101
buf_page_set_state(bpage, BUF_BLOCK_ZIP_DIRTY);
102
mutex_exit(&buf_pool_zip_mutex);
103
UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
105
case BUF_BLOCK_ZIP_DIRTY:
106
case BUF_BLOCK_FILE_PAGE:
107
ut_ad(bpage->in_LRU_list);
108
ut_ad(bpage->in_page_hash);
109
ut_ad(!bpage->in_zip_hash);
110
ut_ad(!bpage->in_flush_list);
111
ut_d(bpage->in_flush_list = TRUE);
113
case BUF_BLOCK_ZIP_FREE:
114
case BUF_BLOCK_NOT_USED:
115
case BUF_BLOCK_READY_FOR_USE:
116
case BUF_BLOCK_MEMORY:
117
case BUF_BLOCK_REMOVE_HASH:
378
/* For the most part when this function is called the flush_rbt
379
should not be NULL. In a very rare boundary case it is possible
380
that the flush_rbt has already been freed by the recovery thread
381
before the last page was hooked up in the flush_list by the
382
io-handler thread. In that case we'll just do a simple
383
linear search in the else block. */
384
if (buf_pool->flush_rbt) {
386
prev_b = buf_flush_insert_in_flush_rbt(&block->page);
390
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
392
while (b && b->oldest_modification
393
> block->page.oldest_modification) {
394
ut_ad(b->in_flush_list);
396
b = UT_LIST_GET_NEXT(list, b);
123
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
125
while (b && b->oldest_modification > bpage->oldest_modification) {
126
ut_ad(b->in_flush_list);
128
b = UT_LIST_GET_NEXT(list, b);
400
131
if (prev_b == NULL) {
401
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
132
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, bpage);
403
134
UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
404
prev_b, &block->page);
407
138
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
408
ut_a(buf_flush_validate_low(buf_pool));
139
ut_a(buf_flush_validate_low());
409
140
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
411
buf_flush_list_mutex_exit(buf_pool);
414
/********************************************************************//**
143
/************************************************************************
415
144
Returns TRUE if the file page block is immediately suitable for replacement,
416
i.e., the transition FILE_PAGE => NOT_USED allowed.
417
@return TRUE if can replace immediately */
145
i.e., the transition FILE_PAGE => NOT_USED allowed. */
420
148
buf_flush_ready_for_replace(
421
149
/*========================*/
422
buf_page_t* bpage) /*!< in: buffer control block, must be
150
/* out: TRUE if can replace immediately */
151
buf_page_t* bpage) /* in: buffer control block, must be
423
152
buf_page_in_file(bpage) and in the LRU list */
426
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
427
ut_ad(buf_pool_mutex_own(buf_pool));
154
ut_ad(buf_pool_mutex_own());
429
155
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
430
156
ut_ad(bpage->in_LRU_list);
526
/* If the flush_rbt is active then delete from there as well. */
527
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
528
buf_flush_delete_from_flush_rbt(bpage);
531
/* Must be done after we have removed it from the flush_rbt
532
because we assert on in_flush_list in comparison function. */
533
ut_d(bpage->in_flush_list = FALSE);
535
245
bpage->oldest_modification = 0;
537
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
538
ut_a(buf_flush_validate_low(buf_pool));
539
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
541
buf_flush_list_mutex_exit(buf_pool);
544
/*******************************************************************//**
545
Relocates a buffer control block on the flush_list.
546
Note that it is assumed that the contents of bpage have already been
548
IMPORTANT: When this function is called bpage and dpage are not
549
exact copies of each other. For example, they both will have different
550
::state. Also the ::list pointers in dpage may be stale. We need to
551
use the current list node (bpage) to do the list manipulation because
552
the list pointers could have changed between the time that we copied
553
the contents of bpage to the dpage and the flush list manipulation
557
buf_flush_relocate_on_flush_list(
558
/*=============================*/
559
buf_page_t* bpage, /*!< in/out: control block being moved */
560
buf_page_t* dpage) /*!< in/out: destination block */
563
buf_page_t* prev_b = NULL;
564
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
566
ut_ad(buf_pool_mutex_own(buf_pool));
567
/* Must reside in the same buffer pool. */
568
ut_ad(buf_pool == buf_pool_from_bpage(dpage));
570
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
572
buf_flush_list_mutex_enter(buf_pool);
574
/* FIXME: At this point we have both buf_pool and flush_list
575
mutexes. Theoretically removal of a block from flush list is
576
only covered by flush_list mutex but currently we do
577
have buf_pool mutex in buf_flush_remove() therefore this block
578
is guaranteed to be in the flush list. We need to check if
579
this will work without the assumption of block removing code
580
having the buf_pool mutex. */
581
ut_ad(bpage->in_flush_list);
582
ut_ad(dpage->in_flush_list);
584
/* If recovery is active we must swap the control blocks in
585
the flush_rbt as well. */
586
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
587
buf_flush_delete_from_flush_rbt(bpage);
588
prev_b = buf_flush_insert_in_flush_rbt(dpage);
591
/* Must be done after we have removed it from the flush_rbt
592
because we assert on in_flush_list in comparison function. */
593
ut_d(bpage->in_flush_list = FALSE);
595
prev = UT_LIST_GET_PREV(list, bpage);
596
UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
599
ut_ad(prev->in_flush_list);
600
UT_LIST_INSERT_AFTER(
602
buf_pool->flush_list,
607
buf_pool->flush_list,
611
/* Just an extra check. Previous in flush_list
612
should be the same control block as in flush_rbt. */
613
ut_a(!buf_pool->flush_rbt || prev_b == prev);
615
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
616
ut_a(buf_flush_validate_low(buf_pool));
617
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
619
buf_flush_list_mutex_exit(buf_pool);
622
/********************************************************************//**
247
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list));
250
/************************************************************************
623
251
Updates the flush system data structures when a write is completed. */
626
254
buf_flush_write_complete(
627
255
/*=====================*/
628
buf_page_t* bpage) /*!< in: pointer to the block in question */
256
buf_page_t* bpage) /* in: pointer to the block in question */
630
258
enum buf_flush flush_type;
631
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1155
# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
1156
/********************************************************************//**
1157
Writes a flushable page asynchronously from the buffer pool to a file.
1158
NOTE: buf_pool->mutex and block->mutex must be held upon entering this
1159
function, and they will be released by this function after flushing.
1160
This is loosely based on buf_flush_batch() and buf_flush_page().
1161
@return TRUE if the page was flushed and the mutexes released */
750
/************************************************************************
751
Writes a page asynchronously from the buffer buf_pool to a file, if it can be
752
found in the buf_pool and it is in a flushable state. NOTE: in simulated aio
753
we must call os_aio_simulated_wake_handler_threads after we have posted a batch
1165
758
/*===============*/
1166
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
1167
buf_block_t* block) /*!< in/out: buffer control block */
1169
ut_ad(buf_pool_mutex_own(buf_pool));
1170
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1171
ut_ad(mutex_own(&block->mutex));
1173
if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_LRU)) {
1177
if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0
1178
|| buf_pool->init_flush[BUF_FLUSH_LRU]) {
1179
/* There is already a flush batch of the same type running */
1183
buf_pool->init_flush[BUF_FLUSH_LRU] = TRUE;
1185
buf_page_set_io_fix(&block->page, BUF_IO_WRITE);
1187
buf_page_set_flush_type(&block->page, BUF_FLUSH_LRU);
1189
if (buf_pool->n_flush[BUF_FLUSH_LRU]++ == 0) {
1191
os_event_reset(buf_pool->no_flush[BUF_FLUSH_LRU]);
1195
Because any thread may call the LRU flush, even when owning
1196
locks on pages, to avoid deadlocks, we must make sure that the
1197
s-lock is acquired on the page without waiting: this is
1198
accomplished because buf_flush_ready_for_flush() must hold,
1199
and that requires the page not to be bufferfixed. */
1201
rw_lock_s_lock_gen(&block->lock, BUF_IO_WRITE);
1203
/* Note that the s-latch is acquired before releasing the
1204
buf_pool mutex: this ensures that the latch is acquired
1207
mutex_exit(&block->mutex);
1208
buf_pool_mutex_exit(buf_pool);
1210
/* Even though block is not protected by any mutex at this
1211
point, it is safe to access block, because it is io_fixed and
1212
oldest_modification != 0. Thus, it cannot be relocated in the
1213
buffer pool or removed from flush_list or LRU_list. */
1215
buf_flush_write_block_low(&block->page);
1217
buf_pool_mutex_enter(buf_pool);
1218
buf_pool->init_flush[BUF_FLUSH_LRU] = FALSE;
1220
if (buf_pool->n_flush[BUF_FLUSH_LRU] == 0) {
1221
/* The running flush batch has ended */
1222
os_event_set(buf_pool->no_flush[BUF_FLUSH_LRU]);
1225
buf_pool_mutex_exit(buf_pool);
1226
buf_flush_buffered_writes();
1230
# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
1232
/********************************************************************//**
1233
Writes a flushable page asynchronously from the buffer pool to a file.
1234
NOTE: in simulated aio we must call
1235
os_aio_simulated_wake_handler_threads after we have posted a batch of
1236
writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
1237
held upon entering this function, and they will be released by this
1243
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1244
buf_page_t* bpage, /*!< in: buffer control block */
1245
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
1246
or BUF_FLUSH_LIST */
759
/* out: 1 if a page was
760
flushed, 0 otherwise */
761
ulint space, /* in: space id */
762
ulint offset, /* in: page offset */
763
enum buf_flush flush_type) /* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST,
764
or BUF_FLUSH_SINGLE_PAGE */
1248
767
mutex_t* block_mutex;
1249
ibool is_uncompressed;
1251
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1252
ut_ad(buf_pool_mutex_own(buf_pool));
1253
ut_ad(buf_page_in_file(bpage));
770
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
771
|| flush_type == BUF_FLUSH_SINGLE_PAGE);
773
buf_pool_mutex_enter();
775
bpage = buf_page_hash_get(space, offset);
778
buf_pool_mutex_exit();
782
ut_a(buf_page_in_file(bpage));
1255
783
block_mutex = buf_page_get_mutex(bpage);
1256
ut_ad(mutex_own(block_mutex));
1258
ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
1260
buf_page_set_io_fix(bpage, BUF_IO_WRITE);
1262
buf_page_set_flush_type(bpage, flush_type);
1264
if (buf_pool->n_flush[flush_type] == 0) {
1266
os_event_reset(buf_pool->no_flush[flush_type]);
785
mutex_enter(block_mutex);
787
if (!buf_flush_ready_for_flush(bpage, flush_type)) {
788
mutex_exit(block_mutex);
789
buf_pool_mutex_exit();
1269
buf_pool->n_flush[flush_type]++;
1271
is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
1272
ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
1274
793
switch (flush_type) {
1276
794
case BUF_FLUSH_LIST:
795
buf_page_set_io_fix(bpage, BUF_IO_WRITE);
797
buf_page_set_flush_type(bpage, flush_type);
799
if (buf_pool->n_flush[flush_type] == 0) {
801
os_event_reset(buf_pool->no_flush[flush_type]);
804
buf_pool->n_flush[flush_type]++;
1277
806
/* If the simulated aio thread is not running, we must
1278
807
not wait for any latch, as we may end up in a deadlock:
1279
808
if buf_fix_count == 0, then we know we need not wait */
1281
is_s_latched = (bpage->buf_fix_count == 0);
1282
if (is_s_latched && is_uncompressed) {
810
locked = bpage->buf_fix_count == 0;
812
&& buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
1283
813
rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
1287
817
mutex_exit(block_mutex);
1288
buf_pool_mutex_exit(buf_pool);
1290
/* Even though bpage is not protected by any mutex at
1291
this point, it is safe to access bpage, because it is
1292
io_fixed and oldest_modification != 0. Thus, it
1293
cannot be relocated in the buffer pool or removed from
1294
flush_list or LRU_list. */
1296
if (!is_s_latched) {
818
buf_pool_mutex_exit();
1297
821
buf_flush_buffered_writes();
1299
if (is_uncompressed) {
823
if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
1300
824
rw_lock_s_lock_gen(&((buf_block_t*) bpage)
1301
825
->lock, BUF_IO_WRITE);
1442
967
if (buf_flush_ready_for_flush(bpage, flush_type)
1443
968
&& (i == offset || !bpage->buf_fix_count)) {
1444
969
/* We only try to flush those
1445
neighbors != offset where the buf fix
1446
count is zero, as we then know that we
1447
probably can latch the page without a
1448
semaphore wait. Semaphore waits are
1449
expensive because we must flush the
1450
doublewrite buffer before we start
970
neighbors != offset where the buf fix count is
971
zero, as we then know that we probably can
972
latch the page without a semaphore wait.
973
Semaphore waits are expensive because we must
974
flush the doublewrite buffer before we start
1453
buf_flush_page(buf_pool, bpage, flush_type);
1454
ut_ad(!mutex_own(block_mutex));
1455
ut_ad(!buf_pool_mutex_own(buf_pool));
977
buf_pool_mutex_exit();
979
mutex_exit(block_mutex);
981
/* Note: as we release the buf_pool mutex
982
above, in buf_flush_try_page we cannot be sure
983
the page is still in a flushable state:
984
therefore we check it again inside that
987
count += buf_flush_try_page(space, i,
990
buf_pool_mutex_enter();
1459
992
mutex_exit(block_mutex);
1462
buf_pool_mutex_exit(buf_pool);
1468
/********************************************************************//**
1469
Check if the block is modified and ready for flushing. If the the block
1470
is ready to flush then flush the page and try o flush its neighbors.
1472
@return TRUE if buf_pool mutex was not released during this function.
1473
This does not guarantee that some pages were written as well.
1474
Number of pages written are incremented to the count. */
1477
buf_flush_page_and_try_neighbors(
1478
/*=============================*/
1479
buf_page_t* bpage, /*!< in: buffer control block,
1481
buf_page_in_file(bpage) */
1482
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU
1483
or BUF_FLUSH_LIST */
1484
ulint n_to_flush, /*!< in: number of pages to
1486
ulint* count) /*!< in/out: number of pages
1489
mutex_t* block_mutex;
1490
ibool flushed = FALSE;
1492
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1493
#endif /* UNIV_DEBUG */
1495
ut_ad(buf_pool_mutex_own(buf_pool));
1497
block_mutex = buf_page_get_mutex(bpage);
1498
mutex_enter(block_mutex);
1500
ut_a(buf_page_in_file(bpage));
1502
if (buf_flush_ready_for_flush(bpage, flush_type)) {
1505
buf_pool_t* buf_pool;
1507
buf_pool = buf_pool_from_bpage(bpage);
1509
buf_pool_mutex_exit(buf_pool);
1511
/* These fields are protected by both the
1512
buffer pool mutex and block mutex. */
1513
space = buf_page_get_space(bpage);
1514
offset = buf_page_get_page_no(bpage);
1516
mutex_exit(block_mutex);
1518
/* Try to flush also all the neighbors */
1519
*count += buf_flush_try_neighbors(space,
1525
buf_pool_mutex_enter(buf_pool);
1528
mutex_exit(block_mutex);
1531
ut_ad(buf_pool_mutex_own(buf_pool));
1536
/*******************************************************************//**
1537
This utility flushes dirty blocks from the end of the LRU list.
1538
In the case of an LRU flush the calling thread may own latches to
1539
pages: to avoid deadlocks, this function must be written so that it
1540
cannot end up waiting for these latches!
1541
@return number of blocks for which the write request was queued. */
1544
buf_flush_LRU_list_batch(
1545
/*=====================*/
1546
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1547
ulint max) /*!< in: max of blocks to flush */
1552
ut_ad(buf_pool_mutex_own(buf_pool));
1555
/* Start from the end of the list looking for a
1556
suitable block to be flushed. */
1557
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1559
/* Iterate backwards over the flush list till we find
1560
a page that isn't ready for flushing. */
1561
while (bpage != NULL
1562
&& !buf_flush_page_and_try_neighbors(
1563
bpage, BUF_FLUSH_LRU, max, &count)) {
1565
bpage = UT_LIST_GET_PREV(LRU, bpage);
1567
} while (bpage != NULL && count < max);
1569
/* We keep track of all flushes happening as part of LRU
1570
flush. When estimating the desired rate at which flush_list
1571
should be flushed, we factor in this value. */
1572
buf_lru_flush_page_count += count;
1574
ut_ad(buf_pool_mutex_own(buf_pool));
1579
/*******************************************************************//**
1580
This utility flushes dirty blocks from the end of the flush_list.
1581
the calling thread is not allowed to own any latches on pages!
1582
@return number of blocks for which the write request was queued;
1583
ULINT_UNDEFINED if there was a flush of the same type already
1587
buf_flush_flush_list_batch(
1588
/*=======================*/
1589
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1590
ulint min_n, /*!< in: wished minimum mumber
1591
of blocks flushed (it is not
1592
guaranteed that the actual
1593
number is that big, though) */
1594
ib_uint64_t lsn_limit) /*!< all blocks whose
1595
oldest_modification is smaller
1596
than this should be flushed (if
1597
their number does not exceed
1604
ut_ad(buf_pool_mutex_own(buf_pool));
1606
/* If we have flushed enough, leave the loop */
1608
/* Start from the end of the list looking for a suitable
1609
block to be flushed. */
1611
buf_flush_list_mutex_enter(buf_pool);
1613
/* We use len here because theoretically insertions can
1614
happen in the flush_list below while we are traversing
1615
it for a suitable candidate for flushing. We'd like to
1616
set a limit on how farther we are willing to traverse
1618
len = UT_LIST_GET_LEN(buf_pool->flush_list);
1619
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1622
ut_a(bpage->oldest_modification > 0);
1625
if (!bpage || bpage->oldest_modification >= lsn_limit) {
1627
/* We have flushed enough */
1628
buf_flush_list_mutex_exit(buf_pool);
1632
ut_a(bpage->oldest_modification > 0);
1634
ut_ad(bpage->in_flush_list);
1636
buf_flush_list_mutex_exit(buf_pool);
1638
/* The list may change during the flushing and we cannot
1639
safely preserve within this function a pointer to a
1640
block in the list! */
1641
while (bpage != NULL
1643
&& !buf_flush_page_and_try_neighbors(
1644
bpage, BUF_FLUSH_LIST, min_n, &count)) {
1646
buf_flush_list_mutex_enter(buf_pool);
1648
/* If we are here that means that buf_pool->mutex
1649
was not released in buf_flush_page_and_try_neighbors()
1650
above and this guarantees that bpage didn't get
1651
relocated since we released the flush_list
1652
mutex above. There is a chance, however, that
1653
the bpage got removed from flush_list (not
1654
currently possible because flush_list_remove()
1655
also obtains buf_pool mutex but that may change
1656
in future). To avoid this scenario we check
1657
the oldest_modification and if it is zero
1658
we start all over again. */
1659
if (bpage->oldest_modification == 0) {
1660
buf_flush_list_mutex_exit(buf_pool);
1664
bpage = UT_LIST_GET_PREV(list, bpage);
1666
ut_ad(!bpage || bpage->in_flush_list);
1668
buf_flush_list_mutex_exit(buf_pool);
1673
} while (count < min_n && bpage != NULL && len > 0);
1675
ut_ad(buf_pool_mutex_own(buf_pool));
1680
/*******************************************************************//**
997
buf_pool_mutex_exit();
1002
/***********************************************************************
1681
1003
This utility flushes dirty blocks from the end of the LRU list or flush_list.
1682
1004
NOTE 1: in the case of an LRU flush the calling thread may own latches to
1683
1005
pages: to avoid deadlocks, this function must be written so that it cannot
1684
1006
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
1685
the calling thread is not allowed to own any latches on pages!
1686
@return number of blocks for which the write request was queued;
1687
ULINT_UNDEFINED if there was a flush of the same type already running */
1007
the calling thread is not allowed to own any latches on pages! */
1690
1010
buf_flush_batch(
1691
1011
/*============*/
1692
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1693
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
1012
/* out: number of blocks for which the
1013
write request was queued;
1014
ULINT_UNDEFINED if there was a flush
1015
of the same type already running */
1016
enum buf_flush flush_type, /* in: BUF_FLUSH_LRU or
1694
1017
BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
1695
1018
then the caller must not own any
1696
1019
latches on pages */
1697
ulint min_n, /*!< in: wished minimum mumber of blocks
1020
ulint min_n, /* in: wished minimum mumber of blocks
1698
1021
flushed (it is not guaranteed that the
1699
1022
actual number is that big, though) */
1700
ib_uint64_t lsn_limit) /*!< in: in the case of BUF_FLUSH_LIST
1701
all blocks whose oldest_modification is
1023
ib_uint64_t lsn_limit) /* in the case BUF_FLUSH_LIST all
1024
blocks whose oldest_modification is
1702
1025
smaller than this should be flushed
1703
1026
(if their number does not exceed
1704
1027
min_n), otherwise ignored */
1030
ulint page_count = 0;
1031
ulint old_page_count;
1708
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1035
ut_ad((flush_type == BUF_FLUSH_LRU)
1036
|| (flush_type == BUF_FLUSH_LIST));
1709
1037
#ifdef UNIV_SYNC_DEBUG
1710
1038
ut_ad((flush_type != BUF_FLUSH_LIST)
1711
1039
|| sync_thread_levels_empty_gen(TRUE));
1712
1040
#endif /* UNIV_SYNC_DEBUG */
1714
buf_pool_mutex_enter(buf_pool);
1716
/* Note: The buffer pool mutex is released and reacquired within
1717
the flush functions. */
1718
switch(flush_type) {
1720
count = buf_flush_LRU_list_batch(buf_pool, min_n);
1722
case BUF_FLUSH_LIST:
1723
count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
1729
buf_pool_mutex_exit(buf_pool);
1731
buf_flush_buffered_writes();
1734
if (buf_debug_prints && count > 0) {
1735
fprintf(stderr, flush_type == BUF_FLUSH_LRU
1736
? "Flushed %lu pages in LRU flush\n"
1737
: "Flushed %lu pages in flush list flush\n",
1740
#endif /* UNIV_DEBUG */
1742
srv_buf_pool_flushed += count;
1747
/******************************************************************//**
1748
Gather the aggregated stats for both flush list and LRU list flushing */
1753
enum buf_flush flush_type, /*!< in: type of flush */
1754
ulint page_count) /*!< in: number of pages flushed */
1756
buf_flush_buffered_writes();
1758
ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1041
buf_pool_mutex_enter();
1043
if ((buf_pool->n_flush[flush_type] > 0)
1044
|| (buf_pool->init_flush[flush_type] == TRUE)) {
1046
/* There is already a flush batch of the same type running */
1048
buf_pool_mutex_exit();
1050
return(ULINT_UNDEFINED);
1053
buf_pool->init_flush[flush_type] = TRUE;
1055
bool done_with_loop= false;
1056
for (;done_with_loop != true;) {
1058
/* If we have flushed enough, leave the loop */
1059
if (page_count >= min_n) {
1064
/* Start from the end of the list looking for a suitable
1065
block to be flushed. */
1067
if (flush_type == BUF_FLUSH_LRU) {
1068
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1070
ut_ad(flush_type == BUF_FLUSH_LIST);
1072
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1074
|| bpage->oldest_modification >= lsn_limit) {
1075
/* We have flushed enough */
1079
ut_ad(bpage->in_flush_list);
1082
/* Note that after finding a single flushable page, we try to
1083
flush also all its neighbors, and after that start from the
1084
END of the LRU list or flush list again: the list may change
1085
during the flushing and we cannot safely preserve within this
1086
function a pointer to a block in the list! */
1089
mutex_t* block_mutex = buf_page_get_mutex(bpage);
1091
ut_a(buf_page_in_file(bpage));
1093
mutex_enter(block_mutex);
1095
if (buf_flush_ready_for_flush(bpage, flush_type)) {
1097
space = buf_page_get_space(bpage);
1098
offset = buf_page_get_page_no(bpage);
1100
buf_pool_mutex_exit();
1101
mutex_exit(block_mutex);
1103
old_page_count = page_count;
1105
/* Try to flush also all the neighbors */
1106
page_count += buf_flush_try_neighbors(
1107
space, offset, flush_type);
1109
"Flush type %lu, page no %lu, neighb %lu\n",
1111
page_count - old_page_count); */
1113
buf_pool_mutex_enter();
1116
} else if (flush_type == BUF_FLUSH_LRU) {
1118
mutex_exit(block_mutex);
1120
bpage = UT_LIST_GET_PREV(LRU, bpage);
1122
ut_ad(flush_type == BUF_FLUSH_LIST);
1124
mutex_exit(block_mutex);
1126
bpage = UT_LIST_GET_PREV(list, bpage);
1127
ut_ad(!bpage || bpage->in_flush_list);
1129
} while (bpage != NULL);
1131
/* If we could not find anything to flush, leave the loop */
1133
done_with_loop= true;
1137
buf_pool->init_flush[flush_type] = FALSE;
1139
if ((buf_pool->n_flush[flush_type] == 0)
1140
&& (buf_pool->init_flush[flush_type] == FALSE)) {
1142
/* The running flush batch has ended */
1144
os_event_set(buf_pool->no_flush[flush_type]);
1147
buf_pool_mutex_exit();
1149
buf_flush_buffered_writes();
1760
1151
#ifdef UNIV_DEBUG
1761
1152
if (buf_debug_prints && page_count > 0) {
1153
ut_a(flush_type == BUF_FLUSH_LRU
1154
|| flush_type == BUF_FLUSH_LIST);
1762
1155
fprintf(stderr, flush_type == BUF_FLUSH_LRU
1763
1156
? "Flushed %lu pages in LRU flush\n"
1764
1157
: "Flushed %lu pages in flush list flush\n",
1769
1162
srv_buf_pool_flushed += page_count;
1771
if (flush_type == BUF_FLUSH_LRU) {
1772
/* We keep track of all flushes happening as part of LRU
1773
flush. When estimating the desired rate at which flush_list
1774
should be flushed we factor in this value. */
1775
buf_lru_flush_page_count += page_count;
1779
/******************************************************************//**
1780
Start a buffer flush batch for LRU or flush list */
1785
buf_pool_t* buf_pool, /*!< buffer pool instance */
1786
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
1787
or BUF_FLUSH_LIST */
1789
buf_pool_mutex_enter(buf_pool);
1791
if (buf_pool->n_flush[flush_type] > 0
1792
|| buf_pool->init_flush[flush_type] == TRUE) {
1794
/* There is already a flush batch of the same type running */
1796
buf_pool_mutex_exit(buf_pool);
1801
buf_pool->init_flush[flush_type] = TRUE;
1803
buf_pool_mutex_exit(buf_pool);
1808
/******************************************************************//**
1809
End a buffer flush batch for LRU or flush list */
1814
buf_pool_t* buf_pool, /*!< buffer pool instance */
1815
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
1816
or BUF_FLUSH_LIST */
1818
buf_pool_mutex_enter(buf_pool);
1820
buf_pool->init_flush[flush_type] = FALSE;
1822
if (buf_pool->n_flush[flush_type] == 0) {
1824
/* The running flush batch has ended */
1826
os_event_set(buf_pool->no_flush[flush_type]);
1829
buf_pool_mutex_exit(buf_pool);
1832
/******************************************************************//**
1167
/**********************************************************************
1833
1168
Waits until a flush batch of the given type ends */
1836
1171
buf_flush_wait_batch_end(
1837
1172
/*=====================*/
1838
buf_pool_t* buf_pool, /*!< buffer pool instance */
1839
enum buf_flush type) /*!< in: BUF_FLUSH_LRU
1840
or BUF_FLUSH_LIST */
1842
ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
1844
if (buf_pool == NULL) {
1847
for (i = 0; i < srv_buf_pool_instances; ++i) {
1848
buf_pool_t* i_buf_pool = buf_pool_from_array(i);
1850
os_event_wait(i_buf_pool->no_flush[type]);
1853
os_event_wait(buf_pool->no_flush[type]);
1857
/*******************************************************************//**
1858
This utility flushes dirty blocks from the end of the LRU list.
1859
NOTE: The calling thread may own latches to pages: to avoid deadlocks,
1860
this function must be written so that it cannot end up waiting for these
1862
@return number of blocks for which the write request was queued;
1863
ULINT_UNDEFINED if there was a flush of the same type already running */
1868
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1869
ulint min_n) /*!< in: wished minimum mumber of blocks
1870
flushed (it is not guaranteed that the
1871
actual number is that big, though) */
1875
if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
1876
return(ULINT_UNDEFINED);
1879
page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
1881
buf_flush_end(buf_pool, BUF_FLUSH_LRU);
1883
buf_flush_common(BUF_FLUSH_LRU, page_count);
1888
/*******************************************************************//**
1889
This utility flushes dirty blocks from the end of the flush list of
1890
all buffer pool instances.
1891
NOTE: The calling thread is not allowed to own any latches on pages!
1892
@return number of blocks for which the write request was queued;
1893
ULINT_UNDEFINED if there was a flush of the same type already running */
1898
ulint min_n, /*!< in: wished minimum mumber of blocks
1899
flushed (it is not guaranteed that the
1900
actual number is that big, though) */
1901
ib_uint64_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all
1902
blocks whose oldest_modification is
1903
smaller than this should be flushed
1904
(if their number does not exceed
1905
min_n), otherwise ignored */
1908
ulint total_page_count = 0;
1909
ibool skipped = FALSE;
1911
if (min_n != ULINT_MAX) {
1912
/* Ensure that flushing is spread evenly amongst the
1913
buffer pool instances. When min_n is ULINT_MAX
1914
we need to flush everything up to the lsn limit
1915
so no limit here. */
1916
min_n = (min_n + srv_buf_pool_instances - 1)
1917
/ srv_buf_pool_instances;
1920
/* Flush to lsn_limit in all buffer pool instances */
1921
for (i = 0; i < srv_buf_pool_instances; i++) {
1922
buf_pool_t* buf_pool;
1923
ulint page_count = 0;
1925
buf_pool = buf_pool_from_array(i);
1927
if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
1928
/* We have two choices here. If lsn_limit was
1929
specified then skipping an instance of buffer
1930
pool means we cannot guarantee that all pages
1931
up to lsn_limit has been flushed. We can
1932
return right now with failure or we can try
1933
to flush remaining buffer pools up to the
1934
lsn_limit. We attempt to flush other buffer
1935
pools based on the assumption that it will
1936
help in the retry which will follow the
1943
page_count = buf_flush_batch(
1944
buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
1946
buf_flush_end(buf_pool, BUF_FLUSH_LIST);
1948
buf_flush_common(BUF_FLUSH_LIST, page_count);
1950
total_page_count += page_count;
1953
return(lsn_limit != IB_ULONGLONG_MAX && skipped
1954
? ULINT_UNDEFINED : total_page_count);
1957
/******************************************************************//**
1173
enum buf_flush type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1175
ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
1177
os_event_wait(buf_pool->no_flush[type]);
1180
/**********************************************************************
1958
1181
Gives a recommendation of how many blocks should be flushed to establish
1959
1182
a big enough margin of replaceable blocks near the end of the LRU list
1960
and in the free list.
1961
@return number of blocks which should be flushed from the end of the
1183
and in the free list. */
1965
buf_flush_LRU_recommendation(
1966
/*=========================*/
1967
buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
1186
buf_flush_LRU_recommendation(void)
1187
/*==============================*/
1188
/* out: number of blocks which should be flushed
1189
from the end of the LRU list */
1969
1191
buf_page_t* bpage;
1970
1192
ulint n_replaceable;
1971
1193
ulint distance = 0;
1973
buf_pool_mutex_enter(buf_pool);
1195
buf_pool_mutex_enter();
1975
1197
n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
1977
1199
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1979
1201
while ((bpage != NULL)
1980
&& (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
1981
+ BUF_FLUSH_EXTRA_MARGIN(buf_pool))
1982
&& (distance < BUF_LRU_FREE_SEARCH_LEN(buf_pool))) {
1202
&& (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
1203
+ BUF_FLUSH_EXTRA_MARGIN)
1204
&& (distance < BUF_LRU_FREE_SEARCH_LEN)) {
1984
1206
mutex_t* block_mutex = buf_page_get_mutex(bpage);
2016
1237
immediately, without waiting. */
2019
buf_flush_free_margin(
2020
/*==================*/
2021
buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
1240
buf_flush_free_margin(void)
1241
/*=======================*/
2023
1243
ulint n_to_flush;
2025
n_to_flush = buf_flush_LRU_recommendation(buf_pool);
1246
n_to_flush = buf_flush_LRU_recommendation();
2027
1248
if (n_to_flush > 0) {
2030
n_flushed = buf_flush_LRU(buf_pool, n_to_flush);
1249
n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
2032
1250
if (n_flushed == ULINT_UNDEFINED) {
2033
1251
/* There was an LRU type flush batch already running;
2034
1252
let us wait for it to end */
2036
buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
1254
buf_flush_wait_batch_end(BUF_FLUSH_LRU);
2041
/*********************************************************************//**
2042
Flushes pages from the end of all the LRU lists. */
2045
buf_flush_free_margins(void)
2046
/*========================*/
2050
for (i = 0; i < srv_buf_pool_instances; i++) {
2051
buf_pool_t* buf_pool;
2053
buf_pool = buf_pool_from_array(i);
2055
buf_flush_free_margin(buf_pool);
2059
/*********************************************************************
2060
Update the historical stats that we are collecting for flush rate
2061
heuristics at the end of each interval.
2062
Flush rate heuristic depends on (a) rate of redo log generation and
2063
(b) the rate at which LRU flush is happening. */
2066
buf_flush_stat_update(void)
2067
/*=======================*/
2069
buf_flush_stat_t* item;
2070
ib_uint64_t lsn_diff;
2074
lsn = log_get_lsn();
2075
if (buf_flush_stat_cur.redo == 0) {
2076
/* First time around. Just update the current LSN
2078
buf_flush_stat_cur.redo = lsn;
2082
item = &buf_flush_stat_arr[buf_flush_stat_arr_ind];
2084
/* values for this interval */
2085
lsn_diff = lsn - buf_flush_stat_cur.redo;
2086
n_flushed = buf_lru_flush_page_count
2087
- buf_flush_stat_cur.n_flushed;
2089
/* add the current value and subtract the obsolete entry. */
2090
buf_flush_stat_sum.redo += lsn_diff - item->redo;
2091
buf_flush_stat_sum.n_flushed += n_flushed - item->n_flushed;
2093
/* put current entry in the array. */
2094
item->redo = lsn_diff;
2095
item->n_flushed = n_flushed;
2097
/* update the index */
2098
buf_flush_stat_arr_ind++;
2099
buf_flush_stat_arr_ind %= BUF_FLUSH_STAT_N_INTERVAL;
2101
/* reset the current entry. */
2102
buf_flush_stat_cur.redo = lsn;
2103
buf_flush_stat_cur.n_flushed = buf_lru_flush_page_count;
2106
/*********************************************************************
2107
Determines the fraction of dirty pages that need to be flushed based
2108
on the speed at which we generate redo log. Note that if redo log
2109
is generated at a significant rate without corresponding increase
2110
in the number of dirty pages (for example, an in-memory workload)
2111
it can cause IO bursts of flushing. This function implements heuristics
2112
to avoid this burstiness.
2113
@return number of dirty pages to be flushed / second */
2116
buf_flush_get_desired_flush_rate(void)
2117
/*==================================*/
2124
ulint lru_flush_avg;
2125
ib_uint64_t lsn = log_get_lsn();
2126
ulint log_capacity = log_get_capacity();
2128
/* log_capacity should never be zero after the initialization
2129
of log subsystem. */
2130
ut_ad(log_capacity != 0);
2132
/* Get total number of dirty pages. It is OK to access
2133
flush_list without holding any mutex as we are using this
2134
only for heuristics. */
2135
for (i = 0; i < srv_buf_pool_instances; i++) {
2136
buf_pool_t* buf_pool;
2138
buf_pool = buf_pool_from_array(i);
2139
n_dirty += UT_LIST_GET_LEN(buf_pool->flush_list);
2142
/* An overflow can happen if we generate more than 2^32 bytes
2143
of redo in this interval i.e.: 4G of redo in 1 second. We can
2144
safely consider this as infinity because if we ever come close
2145
to 4G we'll start a synchronous flush of dirty pages. */
2146
/* redo_avg below is average at which redo is generated in
2147
past BUF_FLUSH_STAT_N_INTERVAL + redo generated in the current
2149
redo_avg = (ulint) (buf_flush_stat_sum.redo
2150
/ BUF_FLUSH_STAT_N_INTERVAL
2151
+ (lsn - buf_flush_stat_cur.redo));
2153
/* An overflow can happen possibly if we flush more than 2^32
2154
pages in BUF_FLUSH_STAT_N_INTERVAL. This is a very very
2155
unlikely scenario. Even when this happens it means that our
2156
flush rate will be off the mark. It won't affect correctness
2157
of any subsystem. */
2158
/* lru_flush_avg below is rate at which pages are flushed as
2159
part of LRU flush in past BUF_FLUSH_STAT_N_INTERVAL + the
2160
number of pages flushed in the current interval. */
2161
lru_flush_avg = buf_flush_stat_sum.n_flushed
2162
/ BUF_FLUSH_STAT_N_INTERVAL
2163
+ (buf_lru_flush_page_count
2164
- buf_flush_stat_cur.n_flushed);
2166
n_flush_req = (n_dirty * redo_avg) / log_capacity;
2168
/* The number of pages that we want to flush from the flush
2169
list is the difference between the required rate and the
2170
number of pages that we are historically flushing from the
2172
rate = n_flush_req - lru_flush_avg;
2173
return(rate > 0 ? (ulint) rate : 0);
2176
1259
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2177
/******************************************************************//**
2178
Validates the flush list.
2179
@return TRUE if ok */
1260
/**********************************************************************
1261
Validates the flush list. */
2182
buf_flush_validate_low(
2183
/*===================*/
2184
buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
1264
buf_flush_validate_low(void)
1265
/*========================*/
1266
/* out: TRUE if ok */
2187
const ib_rbt_node_t* rnode = NULL;
2189
ut_ad(buf_flush_list_mutex_own(buf_pool));
2191
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2192
ut_ad(ut_list_node_313->in_flush_list));
1270
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list);
2194
1272
bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2196
/* If we are in recovery mode i.e.: flush_rbt != NULL
2197
then each block in the flush_list must also be present
2198
in the flush_rbt. */
2199
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2200
rnode = rbt_first(buf_pool->flush_rbt);
2203
1274
while (bpage != NULL) {
2204
1275
const ib_uint64_t om = bpage->oldest_modification;
2206
ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
2208
1276
ut_ad(bpage->in_flush_list);
2210
/* A page in buf_pool->flush_list can be in
2211
BUF_BLOCK_REMOVE_HASH state. This happens when a page
2212
is in the middle of being relocated. In that case the
2213
original descriptor can have this state and still be
2214
in the flush list waiting to acquire the
2215
buf_pool->flush_list_mutex to complete the relocation. */
2216
ut_a(buf_page_in_file(bpage)
2217
|| buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
1277
ut_a(buf_page_in_file(bpage));
2220
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2221
buf_page_t** prpage;
2224
prpage = rbt_value(buf_page_t*, rnode);
2227
ut_a(*prpage == bpage);
2228
rnode = rbt_next(buf_pool->flush_rbt, rnode);
2231
1280
bpage = UT_LIST_GET_NEXT(list, bpage);
2233
1282
ut_a(!bpage || om >= bpage->oldest_modification);
2236
/* By this time we must have exhausted the traversal of
2237
flush_rbt (if active) as well. */
2238
ut_a(rnode == NULL);
2243
/******************************************************************//**
2244
Validates the flush list.
2245
@return TRUE if ok */
1288
/**********************************************************************
1289
Validates the flush list. */
2250
buf_pool_t* buf_pool) /*!< buffer pool instance */
1292
buf_flush_validate(void)
1293
/*====================*/
1294
/* out: TRUE if ok */
2254
buf_flush_list_mutex_enter(buf_pool);
2256
ret = buf_flush_validate_low(buf_pool);
2258
buf_flush_list_mutex_exit(buf_pool);
1298
buf_pool_mutex_enter();
1300
ret = buf_flush_validate_low();
1302
buf_pool_mutex_exit();
2262
1306
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2263
#endif /* !UNIV_HOTBACKUP */