83
83
@return TRUE if ok */
86
buf_flush_validate_low(void);
87
/*========================*/
88
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
86
buf_flush_validate_low(
87
/*===================*/
88
buf_pool_t* buf_pool); /*!< in: Buffer pool instance */
89
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
91
/******************************************************************//**
92
Insert a block in the flush_rbt and returns a pointer to its
93
predecessor or NULL if no predecessor. The ordering is maintained
94
on the basis of the <oldest_modification, space, offset> key.
95
@return pointer to the predecessor or NULL if no predecessor. */
98
buf_flush_insert_in_flush_rbt(
99
/*==========================*/
100
buf_page_t* bpage) /*!< in: bpage to be inserted. */
102
const ib_rbt_node_t* c_node;
103
const ib_rbt_node_t* p_node;
104
buf_page_t* prev = NULL;
105
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
107
ut_ad(buf_flush_list_mutex_own(buf_pool));
109
/* Insert this buffer into the rbt. */
110
c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
111
ut_a(c_node != NULL);
113
/* Get the predecessor. */
114
p_node = rbt_prev(buf_pool->flush_rbt, c_node);
116
if (p_node != NULL) {
118
value = rbt_value(buf_page_t*, p_node);
126
/*********************************************************//**
127
Delete a bpage from the flush_rbt. */
130
buf_flush_delete_from_flush_rbt(
131
/*============================*/
132
buf_page_t* bpage) /*!< in: bpage to be removed. */
136
#endif /* UNIV_DEBUG */
137
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
139
ut_ad(buf_flush_list_mutex_own(buf_pool));
143
#endif /* UNIV_DEBUG */
144
rbt_delete(buf_pool->flush_rbt, &bpage);
148
/*****************************************************************//**
149
Compare two modified blocks in the buffer pool. The key for comparison
151
key = <oldest_modification, space, offset>
152
This comparison is used to maintian ordering of blocks in the
154
Note that for the purpose of flush_rbt, we only need to order blocks
155
on the oldest_modification. The other two fields are used to uniquely
157
@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
162
const void* p1, /*!< in: block1 */
163
const void* p2) /*!< in: block2 */
166
const buf_page_t* b1 = *(const buf_page_t**) p1;
167
const buf_page_t* b2 = *(const buf_page_t**) p2;
169
buf_pool_t* buf_pool = buf_pool_from_bpage(b1);
170
#endif /* UNIV_DEBUG */
175
ut_ad(buf_flush_list_mutex_own(buf_pool));
177
ut_ad(b1->in_flush_list);
178
ut_ad(b2->in_flush_list);
180
if (b2->oldest_modification > b1->oldest_modification) {
182
} else if (b2->oldest_modification < b1->oldest_modification) {
186
/* If oldest_modification is same then decide on the space. */
187
ret = (int)(b2->space - b1->space);
189
/* Or else decide ordering on the offset field. */
190
return(ret ? ret : (int)(b2->offset - b1->offset));
193
/********************************************************************//**
194
Initialize the red-black tree to speed up insertions into the flush_list
195
during recovery process. Should be called at the start of recovery
196
process before any page has been read/written. */
199
buf_flush_init_flush_rbt(void)
200
/*==========================*/
204
for (i = 0; i < srv_buf_pool_instances; i++) {
205
buf_pool_t* buf_pool;
207
buf_pool = buf_pool_from_array(i);
209
buf_flush_list_mutex_enter(buf_pool);
211
/* Create red black tree for speedy insertions in flush list. */
212
buf_pool->flush_rbt = rbt_create(
213
sizeof(buf_page_t*), buf_flush_block_cmp);
215
buf_flush_list_mutex_exit(buf_pool);
219
/********************************************************************//**
220
Frees up the red-black tree. */
223
buf_flush_free_flush_rbt(void)
224
/*==========================*/
228
for (i = 0; i < srv_buf_pool_instances; i++) {
229
buf_pool_t* buf_pool;
231
buf_pool = buf_pool_from_array(i);
233
buf_flush_list_mutex_enter(buf_pool);
235
#ifdef UNIV_DEBUG_VALGRIND
237
ulint zip_size = buf_block_get_zip_size(block);
239
if (UNIV_UNLIKELY(zip_size)) {
240
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
242
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
245
#endif /* UNIV_DEBUG_VALGRIND */
246
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
247
ut_a(buf_flush_validate_low(buf_pool));
248
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
250
rbt_free(buf_pool->flush_rbt);
251
buf_pool->flush_rbt = NULL;
253
buf_flush_list_mutex_exit(buf_pool);
90
257
/********************************************************************//**
91
258
Inserts a modified block into the flush list. */
94
261
buf_flush_insert_into_flush_list(
95
262
/*=============================*/
96
buf_block_t* block) /*!< in/out: block which is modified */
263
buf_pool_t* buf_pool, /*!< buffer pool instance */
264
buf_block_t* block, /*!< in/out: block which is modified */
265
ib_uint64_t lsn) /*!< in: oldest modification */
98
ut_ad(buf_pool_mutex_own());
267
ut_ad(!buf_pool_mutex_own(buf_pool));
268
ut_ad(log_flush_order_mutex_own());
269
ut_ad(mutex_own(&block->mutex));
271
buf_flush_list_mutex_enter(buf_pool);
99
273
ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
100
274
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
101
<= block->page.oldest_modification));
277
/* If we are in the recovery then we need to update the flush
278
red-black tree as well. */
279
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
280
buf_flush_list_mutex_exit(buf_pool);
281
buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
103
285
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
104
ut_ad(block->page.in_LRU_list);
105
ut_ad(block->page.in_page_hash);
106
ut_ad(!block->page.in_zip_hash);
107
286
ut_ad(!block->page.in_flush_list);
108
288
ut_d(block->page.in_flush_list = TRUE);
289
block->page.oldest_modification = lsn;
109
290
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
292
#ifdef UNIV_DEBUG_VALGRIND
294
ulint zip_size = buf_block_get_zip_size(block);
296
if (UNIV_UNLIKELY(zip_size)) {
297
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
299
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
302
#endif /* UNIV_DEBUG_VALGRIND */
111
303
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
112
ut_a(buf_flush_validate_low());
304
ut_a(buf_flush_validate_low(buf_pool));
113
305
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
307
buf_flush_list_mutex_exit(buf_pool);
116
310
/********************************************************************//**
122
316
buf_flush_insert_sorted_into_flush_list(
123
317
/*====================================*/
124
buf_block_t* block) /*!< in/out: block which is modified */
318
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
319
buf_block_t* block, /*!< in/out: block which is modified */
320
ib_uint64_t lsn) /*!< in: oldest modification */
126
322
buf_page_t* prev_b;
129
ut_ad(buf_pool_mutex_own());
325
ut_ad(!buf_pool_mutex_own(buf_pool));
326
ut_ad(log_flush_order_mutex_own());
327
ut_ad(mutex_own(&block->mutex));
130
328
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
330
buf_flush_list_mutex_enter(buf_pool);
332
/* The field in_LRU_list is protected by buf_pool->mutex, which
333
we are not holding. However, while a block is in the flush
334
list, it is dirty and cannot be discarded, not from the
335
page_hash or from the LRU list. At most, the uncompressed
336
page frame of a compressed block may be discarded or created
337
(copying the block->page to or from a buf_page_t that is
338
dynamically allocated from buf_buddy_alloc()). Because those
339
transitions hold block->mutex and the flush list mutex (via
340
buf_flush_relocate_on_flush_list()), there is no possibility
341
of a race condition in the assertions below. */
132
342
ut_ad(block->page.in_LRU_list);
133
343
ut_ad(block->page.in_page_hash);
344
/* buf_buddy_block_register() will take a block in the
345
BUF_BLOCK_MEMORY state, not a file page. */
134
346
ut_ad(!block->page.in_zip_hash);
135
348
ut_ad(!block->page.in_flush_list);
136
349
ut_d(block->page.in_flush_list = TRUE);
350
block->page.oldest_modification = lsn;
352
#ifdef UNIV_DEBUG_VALGRIND
354
ulint zip_size = buf_block_get_zip_size(block);
356
if (UNIV_UNLIKELY(zip_size)) {
357
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
359
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
362
#endif /* UNIV_DEBUG_VALGRIND */
364
#ifdef UNIV_DEBUG_VALGRIND
366
ulint zip_size = buf_block_get_zip_size(block);
368
if (UNIV_UNLIKELY(zip_size)) {
369
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
371
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
374
#endif /* UNIV_DEBUG_VALGRIND */
139
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
141
while (b && b->oldest_modification > block->page.oldest_modification) {
142
ut_ad(b->in_flush_list);
144
b = UT_LIST_GET_NEXT(list, b);
378
/* For the most part when this function is called the flush_rbt
379
should not be NULL. In a very rare boundary case it is possible
380
that the flush_rbt has already been freed by the recovery thread
381
before the last page was hooked up in the flush_list by the
382
io-handler thread. In that case we'll just do a simple
383
linear search in the else block. */
384
if (buf_pool->flush_rbt) {
386
prev_b = buf_flush_insert_in_flush_rbt(&block->page);
390
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
392
while (b && b->oldest_modification
393
> block->page.oldest_modification) {
394
ut_ad(b->in_flush_list);
396
b = UT_LIST_GET_NEXT(list, b);
147
400
if (prev_b == NULL) {
526
/* If the flush_rbt is active then delete from there as well. */
527
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
528
buf_flush_delete_from_flush_rbt(bpage);
531
/* Must be done after we have removed it from the flush_rbt
532
because we assert on in_flush_list in comparison function. */
533
ut_d(bpage->in_flush_list = FALSE);
262
535
bpage->oldest_modification = 0;
264
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
265
ut_ad(ut_list_node_313->in_flush_list)));
537
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
538
ut_a(buf_flush_validate_low(buf_pool));
539
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
541
buf_flush_list_mutex_exit(buf_pool);
544
/*******************************************************************//**
545
Relocates a buffer control block on the flush_list.
546
Note that it is assumed that the contents of bpage have already been
548
IMPORTANT: When this function is called bpage and dpage are not
549
exact copies of each other. For example, they both will have different
550
::state. Also the ::list pointers in dpage may be stale. We need to
551
use the current list node (bpage) to do the list manipulation because
552
the list pointers could have changed between the time that we copied
553
the contents of bpage to the dpage and the flush list manipulation
557
buf_flush_relocate_on_flush_list(
558
/*=============================*/
559
buf_page_t* bpage, /*!< in/out: control block being moved */
560
buf_page_t* dpage) /*!< in/out: destination block */
563
buf_page_t* prev_b = NULL;
564
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
566
ut_ad(buf_pool_mutex_own(buf_pool));
567
/* Must reside in the same buffer pool. */
568
ut_ad(buf_pool == buf_pool_from_bpage(dpage));
570
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
572
buf_flush_list_mutex_enter(buf_pool);
574
/* FIXME: At this point we have both buf_pool and flush_list
575
mutexes. Theoretically removal of a block from flush list is
576
only covered by flush_list mutex but currently we do
577
have buf_pool mutex in buf_flush_remove() therefore this block
578
is guaranteed to be in the flush list. We need to check if
579
this will work without the assumption of block removing code
580
having the buf_pool mutex. */
581
ut_ad(bpage->in_flush_list);
582
ut_ad(dpage->in_flush_list);
584
/* If recovery is active we must swap the control blocks in
585
the flush_rbt as well. */
586
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
587
buf_flush_delete_from_flush_rbt(bpage);
588
prev_b = buf_flush_insert_in_flush_rbt(dpage);
591
/* Must be done after we have removed it from the flush_rbt
592
because we assert on in_flush_list in comparison function. */
593
ut_d(bpage->in_flush_list = FALSE);
595
prev = UT_LIST_GET_PREV(list, bpage);
596
UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
599
ut_ad(prev->in_flush_list);
600
UT_LIST_INSERT_AFTER(
602
buf_pool->flush_list,
607
buf_pool->flush_list,
611
/* Just an extra check. Previous in flush_list
612
should be the same control block as in flush_rbt. */
613
ut_a(!buf_pool->flush_rbt || prev_b == prev);
615
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
616
ut_a(buf_flush_validate_low(buf_pool));
617
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
619
buf_flush_list_mutex_exit(buf_pool);
268
622
/********************************************************************//**
1155
# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
1156
/********************************************************************//**
1157
Writes a flushable page asynchronously from the buffer pool to a file.
1158
NOTE: buf_pool->mutex and block->mutex must be held upon entering this
1159
function, and they will be released by this function after flushing.
1160
This is loosely based on buf_flush_batch() and buf_flush_page().
1161
@return TRUE if the page was flushed and the mutexes released */
1166
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
1167
buf_block_t* block) /*!< in/out: buffer control block */
1169
ut_ad(buf_pool_mutex_own(buf_pool));
1170
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1171
ut_ad(mutex_own(&block->mutex));
1173
if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_LRU)) {
1177
if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0
1178
|| buf_pool->init_flush[BUF_FLUSH_LRU]) {
1179
/* There is already a flush batch of the same type running */
1183
buf_pool->init_flush[BUF_FLUSH_LRU] = TRUE;
1185
buf_page_set_io_fix(&block->page, BUF_IO_WRITE);
1187
buf_page_set_flush_type(&block->page, BUF_FLUSH_LRU);
1189
if (buf_pool->n_flush[BUF_FLUSH_LRU]++ == 0) {
1191
os_event_reset(buf_pool->no_flush[BUF_FLUSH_LRU]);
1195
Because any thread may call the LRU flush, even when owning
1196
locks on pages, to avoid deadlocks, we must make sure that the
1197
s-lock is acquired on the page without waiting: this is
1198
accomplished because buf_flush_ready_for_flush() must hold,
1199
and that requires the page not to be bufferfixed. */
1201
rw_lock_s_lock_gen(&block->lock, BUF_IO_WRITE);
1203
/* Note that the s-latch is acquired before releasing the
1204
buf_pool mutex: this ensures that the latch is acquired
1207
mutex_exit(&block->mutex);
1208
buf_pool_mutex_exit(buf_pool);
1210
/* Even though block is not protected by any mutex at this
1211
point, it is safe to access block, because it is io_fixed and
1212
oldest_modification != 0. Thus, it cannot be relocated in the
1213
buffer pool or removed from flush_list or LRU_list. */
1215
buf_flush_write_block_low(&block->page);
1217
buf_pool_mutex_enter(buf_pool);
1218
buf_pool->init_flush[BUF_FLUSH_LRU] = FALSE;
1220
if (buf_pool->n_flush[BUF_FLUSH_LRU] == 0) {
1221
/* The running flush batch has ended */
1222
os_event_set(buf_pool->no_flush[BUF_FLUSH_LRU]);
1225
buf_pool_mutex_exit(buf_pool);
1226
buf_flush_buffered_writes();
1230
# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
780
1232
/********************************************************************//**
781
1233
Writes a flushable page asynchronously from the buffer pool to a file.
782
1234
NOTE: in simulated aio we must call
783
1235
os_aio_simulated_wake_handler_threads after we have posted a batch of
784
writes! NOTE: buf_pool_mutex and buf_page_get_mutex(bpage) must be
1236
writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
785
1237
held upon entering this function, and they will be released by this
1243
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
791
1244
buf_page_t* bpage, /*!< in: buffer control block */
792
1245
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
793
1246
or BUF_FLUSH_LIST */
900
1353
/*====================*/
901
1354
ulint space, /*!< in: space id */
902
1355
ulint offset, /*!< in: page offset */
903
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or
1356
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
904
1357
BUF_FLUSH_LIST */
1358
ulint n_flushed, /*!< in: number of pages
1359
flushed so far in this batch */
1360
ulint n_to_flush) /*!< in: maximum number of pages
1361
we are allowed to flush */
1367
buf_pool_t* buf_pool = buf_pool_get(space, offset);
911
1369
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
913
1371
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
914
/* If there is little space, it is better not to flush any
915
block except from the end of the LRU list */
1372
/* If there is little space, it is better not to flush
1373
any block except from the end of the LRU list */
918
1376
high = offset + 1;
920
/* When flushed, dirty blocks are searched in neighborhoods of
921
this size, and flushed along with the original page. */
1378
/* When flushed, dirty blocks are searched in
1379
neighborhoods of this size, and flushed along with the
923
ulint buf_flush_area = ut_min(BUF_READ_AHEAD_AREA,
924
buf_pool->curr_size / 16);
1382
ulint buf_flush_area;
1384
buf_flush_area = ut_min(
1385
BUF_READ_AHEAD_AREA(buf_pool),
1386
buf_pool->curr_size / 16);
926
1388
low = (offset / buf_flush_area) * buf_flush_area;
927
1389
high = (offset / buf_flush_area + 1) * buf_flush_area;
959
1442
if (buf_flush_ready_for_flush(bpage, flush_type)
960
1443
&& (i == offset || !bpage->buf_fix_count)) {
961
1444
/* We only try to flush those
962
neighbors != offset where the buf fix count is
963
zero, as we then know that we probably can
964
latch the page without a semaphore wait.
965
Semaphore waits are expensive because we must
966
flush the doublewrite buffer before we start
1445
neighbors != offset where the buf fix
1446
count is zero, as we then know that we
1447
probably can latch the page without a
1448
semaphore wait. Semaphore waits are
1449
expensive because we must flush the
1450
doublewrite buffer before we start
969
buf_flush_page(bpage, flush_type);
1453
buf_flush_page(buf_pool, bpage, flush_type);
970
1454
ut_ad(!mutex_own(block_mutex));
1455
ut_ad(!buf_pool_mutex_own(buf_pool));
973
buf_pool_mutex_enter();
975
1459
mutex_exit(block_mutex);
980
buf_pool_mutex_exit();
1462
buf_pool_mutex_exit(buf_pool);
1468
/********************************************************************//**
1469
Check if the block is modified and ready for flushing. If the the block
1470
is ready to flush then flush the page and try o flush its neighbors.
1472
@return TRUE if buf_pool mutex was not released during this function.
1473
This does not guarantee that some pages were written as well.
1474
Number of pages written are incremented to the count. */
1477
buf_flush_page_and_try_neighbors(
1478
/*=============================*/
1479
buf_page_t* bpage, /*!< in: buffer control block,
1481
buf_page_in_file(bpage) */
1482
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU
1483
or BUF_FLUSH_LIST */
1484
ulint n_to_flush, /*!< in: number of pages to
1486
ulint* count) /*!< in/out: number of pages
1489
mutex_t* block_mutex;
1490
ibool flushed = FALSE;
1492
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1493
#endif /* UNIV_DEBUG */
1495
ut_ad(buf_pool_mutex_own(buf_pool));
1497
block_mutex = buf_page_get_mutex(bpage);
1498
mutex_enter(block_mutex);
1500
ut_a(buf_page_in_file(bpage));
1502
if (buf_flush_ready_for_flush(bpage, flush_type)) {
1505
buf_pool_t* buf_pool;
1507
buf_pool = buf_pool_from_bpage(bpage);
1509
buf_pool_mutex_exit(buf_pool);
1511
/* These fields are protected by both the
1512
buffer pool mutex and block mutex. */
1513
space = buf_page_get_space(bpage);
1514
offset = buf_page_get_page_no(bpage);
1516
mutex_exit(block_mutex);
1518
/* Try to flush also all the neighbors */
1519
*count += buf_flush_try_neighbors(space,
1525
buf_pool_mutex_enter(buf_pool);
1528
mutex_exit(block_mutex);
1531
ut_ad(buf_pool_mutex_own(buf_pool));
1536
/*******************************************************************//**
1537
This utility flushes dirty blocks from the end of the LRU list.
1538
In the case of an LRU flush the calling thread may own latches to
1539
pages: to avoid deadlocks, this function must be written so that it
1540
cannot end up waiting for these latches!
1541
@return number of blocks for which the write request was queued. */
1544
buf_flush_LRU_list_batch(
1545
/*=====================*/
1546
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1547
ulint max) /*!< in: max of blocks to flush */
1552
ut_ad(buf_pool_mutex_own(buf_pool));
1555
/* Start from the end of the list looking for a
1556
suitable block to be flushed. */
1557
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1559
/* Iterate backwards over the flush list till we find
1560
a page that isn't ready for flushing. */
1561
while (bpage != NULL
1562
&& !buf_flush_page_and_try_neighbors(
1563
bpage, BUF_FLUSH_LRU, max, &count)) {
1565
bpage = UT_LIST_GET_PREV(LRU, bpage);
1567
} while (bpage != NULL && count < max);
1569
/* We keep track of all flushes happening as part of LRU
1570
flush. When estimating the desired rate at which flush_list
1571
should be flushed, we factor in this value. */
1572
buf_lru_flush_page_count += count;
1574
ut_ad(buf_pool_mutex_own(buf_pool));
1579
/*******************************************************************//**
1580
This utility flushes dirty blocks from the end of the flush_list.
1581
the calling thread is not allowed to own any latches on pages!
1582
@return number of blocks for which the write request was queued;
1583
ULINT_UNDEFINED if there was a flush of the same type already
1587
buf_flush_flush_list_batch(
1588
/*=======================*/
1589
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1590
ulint min_n, /*!< in: wished minimum mumber
1591
of blocks flushed (it is not
1592
guaranteed that the actual
1593
number is that big, though) */
1594
ib_uint64_t lsn_limit) /*!< all blocks whose
1595
oldest_modification is smaller
1596
than this should be flushed (if
1597
their number does not exceed
1604
ut_ad(buf_pool_mutex_own(buf_pool));
1606
/* If we have flushed enough, leave the loop */
1608
/* Start from the end of the list looking for a suitable
1609
block to be flushed. */
1611
buf_flush_list_mutex_enter(buf_pool);
1613
/* We use len here because theoretically insertions can
1614
happen in the flush_list below while we are traversing
1615
it for a suitable candidate for flushing. We'd like to
1616
set a limit on how farther we are willing to traverse
1618
len = UT_LIST_GET_LEN(buf_pool->flush_list);
1619
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1622
ut_a(bpage->oldest_modification > 0);
1625
if (!bpage || bpage->oldest_modification >= lsn_limit) {
1627
/* We have flushed enough */
1628
buf_flush_list_mutex_exit(buf_pool);
1632
ut_a(bpage->oldest_modification > 0);
1634
ut_ad(bpage->in_flush_list);
1636
buf_flush_list_mutex_exit(buf_pool);
1638
/* The list may change during the flushing and we cannot
1639
safely preserve within this function a pointer to a
1640
block in the list! */
1641
while (bpage != NULL
1643
&& !buf_flush_page_and_try_neighbors(
1644
bpage, BUF_FLUSH_LIST, min_n, &count)) {
1646
buf_flush_list_mutex_enter(buf_pool);
1648
/* If we are here that means that buf_pool->mutex
1649
was not released in buf_flush_page_and_try_neighbors()
1650
above and this guarantees that bpage didn't get
1651
relocated since we released the flush_list
1652
mutex above. There is a chance, however, that
1653
the bpage got removed from flush_list (not
1654
currently possible because flush_list_remove()
1655
also obtains buf_pool mutex but that may change
1656
in future). To avoid this scenario we check
1657
the oldest_modification and if it is zero
1658
we start all over again. */
1659
if (bpage->oldest_modification == 0) {
1660
buf_flush_list_mutex_exit(buf_pool);
1664
bpage = UT_LIST_GET_PREV(list, bpage);
1666
ut_ad(!bpage || bpage->in_flush_list);
1668
buf_flush_list_mutex_exit(buf_pool);
1673
} while (count < min_n && bpage != NULL && len > 0);
1675
ut_ad(buf_pool_mutex_own(buf_pool));
1001
1697
ulint min_n, /*!< in: wished minimum mumber of blocks
1002
1698
flushed (it is not guaranteed that the
1003
1699
actual number is that big, though) */
1004
ib_uint64_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all
1005
blocks whose oldest_modification is
1700
ib_uint64_t lsn_limit) /*!< in: in the case of BUF_FLUSH_LIST
1701
all blocks whose oldest_modification is
1006
1702
smaller than this should be flushed
1007
1703
(if their number does not exceed
1008
1704
min_n), otherwise ignored */
1011
ulint page_count = 0;
1012
ulint old_page_count;
1016
ut_ad((flush_type == BUF_FLUSH_LRU)
1017
|| (flush_type == BUF_FLUSH_LIST));
1708
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1018
1709
#ifdef UNIV_SYNC_DEBUG
1019
1710
ut_ad((flush_type != BUF_FLUSH_LIST)
1020
1711
|| sync_thread_levels_empty_gen(TRUE));
1021
1712
#endif /* UNIV_SYNC_DEBUG */
1022
buf_pool_mutex_enter();
1024
if ((buf_pool->n_flush[flush_type] > 0)
1025
|| (buf_pool->init_flush[flush_type] == TRUE)) {
1027
/* There is already a flush batch of the same type running */
1029
buf_pool_mutex_exit();
1031
return(ULINT_UNDEFINED);
1034
buf_pool->init_flush[flush_type] = TRUE;
1036
bool done_with_loop= false;
1037
for (;done_with_loop != true;) {
1039
/* If we have flushed enough, leave the loop */
1040
if (page_count >= min_n) {
1045
/* Start from the end of the list looking for a suitable
1046
block to be flushed. */
1048
if (flush_type == BUF_FLUSH_LRU) {
1049
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1051
ut_ad(flush_type == BUF_FLUSH_LIST);
1053
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1055
|| bpage->oldest_modification >= lsn_limit) {
1056
/* We have flushed enough */
1060
ut_ad(bpage->in_flush_list);
1063
/* Note that after finding a single flushable page, we try to
1064
flush also all its neighbors, and after that start from the
1065
END of the LRU list or flush list again: the list may change
1066
during the flushing and we cannot safely preserve within this
1067
function a pointer to a block in the list! */
1070
mutex_t*block_mutex = buf_page_get_mutex(bpage);
1073
ut_a(buf_page_in_file(bpage));
1075
mutex_enter(block_mutex);
1076
ready = buf_flush_ready_for_flush(bpage, flush_type);
1077
mutex_exit(block_mutex);
1080
space = buf_page_get_space(bpage);
1081
offset = buf_page_get_page_no(bpage);
1083
buf_pool_mutex_exit();
1085
old_page_count = page_count;
1087
/* Try to flush also all the neighbors */
1088
page_count += buf_flush_try_neighbors(
1089
space, offset, flush_type);
1091
"Flush type %lu, page no %lu, neighb %lu\n",
1093
page_count - old_page_count); */
1095
buf_pool_mutex_enter();
1098
} else if (flush_type == BUF_FLUSH_LRU) {
1099
bpage = UT_LIST_GET_PREV(LRU, bpage);
1101
ut_ad(flush_type == BUF_FLUSH_LIST);
1103
bpage = UT_LIST_GET_PREV(list, bpage);
1104
ut_ad(!bpage || bpage->in_flush_list);
1106
} while (bpage != NULL);
1108
/* If we could not find anything to flush, leave the loop */
1110
done_with_loop= true;
1114
buf_pool->init_flush[flush_type] = FALSE;
1116
if (buf_pool->n_flush[flush_type] == 0) {
1118
/* The running flush batch has ended */
1120
os_event_set(buf_pool->no_flush[flush_type]);
1123
buf_pool_mutex_exit();
1125
buf_flush_buffered_writes();
1714
buf_pool_mutex_enter(buf_pool);
1716
/* Note: The buffer pool mutex is released and reacquired within
1717
the flush functions. */
1718
switch(flush_type) {
1720
count = buf_flush_LRU_list_batch(buf_pool, min_n);
1722
case BUF_FLUSH_LIST:
1723
count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
1729
buf_pool_mutex_exit(buf_pool);
1731
buf_flush_buffered_writes();
1734
if (buf_debug_prints && count > 0) {
1735
fprintf(stderr, flush_type == BUF_FLUSH_LRU
1736
? "Flushed %lu pages in LRU flush\n"
1737
: "Flushed %lu pages in flush list flush\n",
1740
#endif /* UNIV_DEBUG */
1742
srv_buf_pool_flushed += count;
1747
/******************************************************************//**
1748
Gather the aggregated stats for both flush list and LRU list flushing */
1753
enum buf_flush flush_type, /*!< in: type of flush */
1754
ulint page_count) /*!< in: number of pages flushed */
1756
buf_flush_buffered_writes();
1758
ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1127
1760
#ifdef UNIV_DEBUG
1128
1761
if (buf_debug_prints && page_count > 0) {
1129
ut_a(flush_type == BUF_FLUSH_LRU
1130
|| flush_type == BUF_FLUSH_LIST);
1131
1762
fprintf(stderr, flush_type == BUF_FLUSH_LRU
1132
1763
? "Flushed %lu pages in LRU flush\n"
1133
1764
: "Flushed %lu pages in flush list flush\n",
1138
1769
srv_buf_pool_flushed += page_count;
1140
/* We keep track of all flushes happening as part of LRU
1141
flush. When estimating the desired rate at which flush_list
1142
should be flushed we factor in this value. */
1143
1771
if (flush_type == BUF_FLUSH_LRU) {
1772
/* We keep track of all flushes happening as part of LRU
1773
flush. When estimating the desired rate at which flush_list
1774
should be flushed we factor in this value. */
1144
1775
buf_lru_flush_page_count += page_count;
1779
/******************************************************************//**
1780
Start a buffer flush batch for LRU or flush list */
1785
buf_pool_t* buf_pool, /*!< buffer pool instance */
1786
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
1787
or BUF_FLUSH_LIST */
1789
buf_pool_mutex_enter(buf_pool);
1791
if (buf_pool->n_flush[flush_type] > 0
1792
|| buf_pool->init_flush[flush_type] == TRUE) {
1794
/* There is already a flush batch of the same type running */
1796
buf_pool_mutex_exit(buf_pool);
1801
buf_pool->init_flush[flush_type] = TRUE;
1803
buf_pool_mutex_exit(buf_pool);
1808
/******************************************************************//**
1809
End a buffer flush batch for LRU or flush list */
1814
buf_pool_t* buf_pool, /*!< buffer pool instance */
1815
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
1816
or BUF_FLUSH_LIST */
1818
buf_pool_mutex_enter(buf_pool);
1820
buf_pool->init_flush[flush_type] = FALSE;
1822
if (buf_pool->n_flush[flush_type] == 0) {
1824
/* The running flush batch has ended */
1826
os_event_set(buf_pool->no_flush[flush_type]);
1829
buf_pool_mutex_exit(buf_pool);
1150
1832
/******************************************************************//**
1154
1836
buf_flush_wait_batch_end(
1155
1837
/*=====================*/
1156
enum buf_flush type) /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1158
ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
1160
os_event_wait(buf_pool->no_flush[type]);
1838
buf_pool_t* buf_pool, /*!< buffer pool instance */
1839
enum buf_flush type) /*!< in: BUF_FLUSH_LRU
1840
or BUF_FLUSH_LIST */
1842
ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
1844
if (buf_pool == NULL) {
1847
for (i = 0; i < srv_buf_pool_instances; ++i) {
1848
buf_pool_t* i_buf_pool = buf_pool_from_array(i);
1850
os_event_wait(i_buf_pool->no_flush[type]);
1853
os_event_wait(buf_pool->no_flush[type]);
1857
/*******************************************************************//**
1858
This utility flushes dirty blocks from the end of the LRU list.
1859
NOTE: The calling thread may own latches to pages: to avoid deadlocks,
1860
this function must be written so that it cannot end up waiting for these
1862
@return number of blocks for which the write request was queued;
1863
ULINT_UNDEFINED if there was a flush of the same type already running */
1868
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1869
ulint min_n) /*!< in: wished minimum mumber of blocks
1870
flushed (it is not guaranteed that the
1871
actual number is that big, though) */
1875
if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
1876
return(ULINT_UNDEFINED);
1879
page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
1881
buf_flush_end(buf_pool, BUF_FLUSH_LRU);
1883
buf_flush_common(BUF_FLUSH_LRU, page_count);
1888
/*******************************************************************//**
1889
This utility flushes dirty blocks from the end of the flush list of
1890
all buffer pool instances.
1891
NOTE: The calling thread is not allowed to own any latches on pages!
1892
@return number of blocks for which the write request was queued;
1893
ULINT_UNDEFINED if there was a flush of the same type already running */
1898
ulint min_n, /*!< in: wished minimum mumber of blocks
1899
flushed (it is not guaranteed that the
1900
actual number is that big, though) */
1901
ib_uint64_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all
1902
blocks whose oldest_modification is
1903
smaller than this should be flushed
1904
(if their number does not exceed
1905
min_n), otherwise ignored */
1908
ulint total_page_count = 0;
1909
ibool skipped = FALSE;
1911
if (min_n != ULINT_MAX) {
1912
/* Ensure that flushing is spread evenly amongst the
1913
buffer pool instances. When min_n is ULINT_MAX
1914
we need to flush everything up to the lsn limit
1915
so no limit here. */
1916
min_n = (min_n + srv_buf_pool_instances - 1)
1917
/ srv_buf_pool_instances;
1920
/* Flush to lsn_limit in all buffer pool instances */
1921
for (i = 0; i < srv_buf_pool_instances; i++) {
1922
buf_pool_t* buf_pool;
1923
ulint page_count = 0;
1925
buf_pool = buf_pool_from_array(i);
1927
if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
1928
/* We have two choices here. If lsn_limit was
1929
specified then skipping an instance of buffer
1930
pool means we cannot guarantee that all pages
1931
up to lsn_limit has been flushed. We can
1932
return right now with failure or we can try
1933
to flush remaining buffer pools up to the
1934
lsn_limit. We attempt to flush other buffer
1935
pools based on the assumption that it will
1936
help in the retry which will follow the
1943
page_count = buf_flush_batch(
1944
buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
1946
buf_flush_end(buf_pool, BUF_FLUSH_LIST);
1948
buf_flush_common(BUF_FLUSH_LIST, page_count);
1950
total_page_count += page_count;
1953
return(lsn_limit != IB_ULONGLONG_MAX && skipped
1954
? ULINT_UNDEFINED : total_page_count);
1163
1957
/******************************************************************//**
1164
1958
Gives a recommendation of how many blocks should be flushed to establish
1165
1959
a big enough margin of replaceable blocks near the end of the LRU list
1220
2016
immediately, without waiting. */
1223
buf_flush_free_margin(void)
1224
/*=======================*/
2019
buf_flush_free_margin(
2020
/*==================*/
2021
buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
1226
2023
ulint n_to_flush;
1229
n_to_flush = buf_flush_LRU_recommendation();
2025
n_to_flush = buf_flush_LRU_recommendation(buf_pool);
1231
2027
if (n_to_flush > 0) {
1232
n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
2030
n_flushed = buf_flush_LRU(buf_pool, n_to_flush);
1233
2032
if (n_flushed == ULINT_UNDEFINED) {
1234
2033
/* There was an LRU type flush batch already running;
1235
2034
let us wait for it to end */
1237
buf_flush_wait_batch_end(BUF_FLUSH_LRU);
2036
buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
2041
/*********************************************************************//**
2042
Flushes pages from the end of all the LRU lists. */
2045
buf_flush_free_margins(void)
2046
/*========================*/
2050
for (i = 0; i < srv_buf_pool_instances; i++) {
2051
buf_pool_t* buf_pool;
2053
buf_pool = buf_pool_from_array(i);
2055
buf_flush_free_margin(buf_pool);
1242
2059
/*********************************************************************
1243
2060
Update the historical stats that we are collecting for flush rate
1244
2061
heuristics at the end of each interval.
1356
2179
@return TRUE if ok */
1359
buf_flush_validate_low(void)
1360
/*========================*/
2182
buf_flush_validate_low(
2183
/*===================*/
2184
buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
2187
const ib_rbt_node_t* rnode = NULL;
2189
ut_ad(buf_flush_list_mutex_own(buf_pool));
1364
2191
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
1365
2192
ut_ad(ut_list_node_313->in_flush_list));
1367
2194
bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2196
/* If we are in recovery mode i.e.: flush_rbt != NULL
2197
then each block in the flush_list must also be present
2198
in the flush_rbt. */
2199
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2200
rnode = rbt_first(buf_pool->flush_rbt);
1369
2203
while (bpage != NULL) {
1370
2204
const ib_uint64_t om = bpage->oldest_modification;
2206
ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
1371
2208
ut_ad(bpage->in_flush_list);
1372
ut_a(buf_page_in_file(bpage));
2210
/* A page in buf_pool->flush_list can be in
2211
BUF_BLOCK_REMOVE_HASH state. This happens when a page
2212
is in the middle of being relocated. In that case the
2213
original descriptor can have this state and still be
2214
in the flush list waiting to acquire the
2215
buf_pool->flush_list_mutex to complete the relocation. */
2216
ut_a(buf_page_in_file(bpage)
2217
|| buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
2220
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2221
buf_page_t** prpage;
2224
prpage = rbt_value(buf_page_t*, rnode);
2227
ut_a(*prpage == bpage);
2228
rnode = rbt_next(buf_pool->flush_rbt, rnode);
1375
2231
bpage = UT_LIST_GET_NEXT(list, bpage);
1377
2233
ut_a(!bpage || om >= bpage->oldest_modification);
2236
/* By this time we must have exhausted the traversal of
2237
flush_rbt (if active) as well. */
2238
ut_a(rnode == NULL);