83
83
@return TRUE if ok */
86
buf_flush_validate_low(
87
/*===================*/
88
buf_pool_t* buf_pool); /*!< in: Buffer pool instance */
89
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
91
/******************************************************************//**
92
Insert a block in the flush_rbt and returns a pointer to its
93
predecessor or NULL if no predecessor. The ordering is maintained
94
on the basis of the <oldest_modification, space, offset> key.
95
@return pointer to the predecessor or NULL if no predecessor. */
98
buf_flush_insert_in_flush_rbt(
99
/*==========================*/
100
buf_page_t* bpage) /*!< in: bpage to be inserted. */
102
const ib_rbt_node_t* c_node;
103
const ib_rbt_node_t* p_node;
104
buf_page_t* prev = NULL;
105
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
107
ut_ad(buf_flush_list_mutex_own(buf_pool));
109
/* Insert this buffer into the rbt. */
110
c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
111
ut_a(c_node != NULL);
113
/* Get the predecessor. */
114
p_node = rbt_prev(buf_pool->flush_rbt, c_node);
116
if (p_node != NULL) {
118
value = rbt_value(buf_page_t*, p_node);
126
/*********************************************************//**
127
Delete a bpage from the flush_rbt. */
130
buf_flush_delete_from_flush_rbt(
131
/*============================*/
132
buf_page_t* bpage) /*!< in: bpage to be removed. */
136
#endif /* UNIV_DEBUG */
137
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
139
ut_ad(buf_flush_list_mutex_own(buf_pool));
143
#endif /* UNIV_DEBUG */
144
rbt_delete(buf_pool->flush_rbt, &bpage);
148
/*****************************************************************//**
149
Compare two modified blocks in the buffer pool. The key for comparison
151
key = <oldest_modification, space, offset>
152
This comparison is used to maintian ordering of blocks in the
154
Note that for the purpose of flush_rbt, we only need to order blocks
155
on the oldest_modification. The other two fields are used to uniquely
157
@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
162
const void* p1, /*!< in: block1 */
163
const void* p2) /*!< in: block2 */
166
const buf_page_t* b1 = *(const buf_page_t**) p1;
167
const buf_page_t* b2 = *(const buf_page_t**) p2;
169
buf_pool_t* buf_pool = buf_pool_from_bpage(b1);
170
#endif /* UNIV_DEBUG */
175
ut_ad(buf_flush_list_mutex_own(buf_pool));
177
ut_ad(b1->in_flush_list);
178
ut_ad(b2->in_flush_list);
180
if (b2->oldest_modification > b1->oldest_modification) {
182
} else if (b2->oldest_modification < b1->oldest_modification) {
186
/* If oldest_modification is same then decide on the space. */
187
ret = (int)(b2->space - b1->space);
189
/* Or else decide ordering on the offset field. */
190
return(ret ? ret : (int)(b2->offset - b1->offset));
193
/********************************************************************//**
194
Initialize the red-black tree to speed up insertions into the flush_list
195
during recovery process. Should be called at the start of recovery
196
process before any page has been read/written. */
199
buf_flush_init_flush_rbt(void)
200
/*==========================*/
204
for (i = 0; i < srv_buf_pool_instances; i++) {
205
buf_pool_t* buf_pool;
207
buf_pool = buf_pool_from_array(i);
209
buf_flush_list_mutex_enter(buf_pool);
211
/* Create red black tree for speedy insertions in flush list. */
212
buf_pool->flush_rbt = rbt_create(
213
sizeof(buf_page_t*), buf_flush_block_cmp);
215
buf_flush_list_mutex_exit(buf_pool);
219
/********************************************************************//**
220
Frees up the red-black tree. */
223
buf_flush_free_flush_rbt(void)
224
/*==========================*/
228
for (i = 0; i < srv_buf_pool_instances; i++) {
229
buf_pool_t* buf_pool;
231
buf_pool = buf_pool_from_array(i);
233
buf_flush_list_mutex_enter(buf_pool);
235
#ifdef UNIV_DEBUG_VALGRIND
237
ulint zip_size = buf_block_get_zip_size(block);
239
if (UNIV_UNLIKELY(zip_size)) {
240
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
242
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
245
#endif /* UNIV_DEBUG_VALGRIND */
246
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
247
ut_a(buf_flush_validate_low(buf_pool));
248
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
250
rbt_free(buf_pool->flush_rbt);
251
buf_pool->flush_rbt = NULL;
253
buf_flush_list_mutex_exit(buf_pool);
86
buf_flush_validate_low(void);
87
/*========================*/
88
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
257
90
/********************************************************************//**
258
91
Inserts a modified block into the flush list. */
261
94
buf_flush_insert_into_flush_list(
262
95
/*=============================*/
263
buf_pool_t* buf_pool, /*!< buffer pool instance */
264
buf_block_t* block, /*!< in/out: block which is modified */
265
ib_uint64_t lsn) /*!< in: oldest modification */
96
buf_block_t* block) /*!< in/out: block which is modified */
267
ut_ad(!buf_pool_mutex_own(buf_pool));
268
ut_ad(log_flush_order_mutex_own());
269
ut_ad(mutex_own(&block->mutex));
271
buf_flush_list_mutex_enter(buf_pool);
98
ut_ad(buf_pool_mutex_own());
273
99
ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
274
100
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
277
/* If we are in the recovery then we need to update the flush
278
red-black tree as well. */
279
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
280
buf_flush_list_mutex_exit(buf_pool);
281
buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
101
<= block->page.oldest_modification));
285
103
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
104
ut_ad(block->page.in_LRU_list);
105
ut_ad(block->page.in_page_hash);
106
ut_ad(!block->page.in_zip_hash);
286
107
ut_ad(!block->page.in_flush_list);
288
108
ut_d(block->page.in_flush_list = TRUE);
289
block->page.oldest_modification = lsn;
290
109
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
292
#ifdef UNIV_DEBUG_VALGRIND
294
ulint zip_size = buf_block_get_zip_size(block);
296
if (UNIV_UNLIKELY(zip_size)) {
297
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
299
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
302
#endif /* UNIV_DEBUG_VALGRIND */
303
111
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
304
ut_a(buf_flush_validate_low(buf_pool));
112
ut_a(buf_flush_validate_low());
305
113
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
307
buf_flush_list_mutex_exit(buf_pool);
310
116
/********************************************************************//**
316
122
buf_flush_insert_sorted_into_flush_list(
317
123
/*====================================*/
318
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
319
buf_block_t* block, /*!< in/out: block which is modified */
320
ib_uint64_t lsn) /*!< in: oldest modification */
124
buf_block_t* block) /*!< in/out: block which is modified */
322
126
buf_page_t* prev_b;
325
ut_ad(!buf_pool_mutex_own(buf_pool));
326
ut_ad(log_flush_order_mutex_own());
327
ut_ad(mutex_own(&block->mutex));
129
ut_ad(buf_pool_mutex_own());
328
130
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
330
buf_flush_list_mutex_enter(buf_pool);
332
/* The field in_LRU_list is protected by buf_pool_mutex, which
333
we are not holding. However, while a block is in the flush
334
list, it is dirty and cannot be discarded, not from the
335
page_hash or from the LRU list. At most, the uncompressed
336
page frame of a compressed block may be discarded or created
337
(copying the block->page to or from a buf_page_t that is
338
dynamically allocated from buf_buddy_alloc()). Because those
339
transitions hold block->mutex and the flush list mutex (via
340
buf_flush_relocate_on_flush_list()), there is no possibility
341
of a race condition in the assertions below. */
342
132
ut_ad(block->page.in_LRU_list);
343
133
ut_ad(block->page.in_page_hash);
344
/* buf_buddy_block_register() will take a block in the
345
BUF_BLOCK_MEMORY state, not a file page. */
346
134
ut_ad(!block->page.in_zip_hash);
348
135
ut_ad(!block->page.in_flush_list);
349
136
ut_d(block->page.in_flush_list = TRUE);
350
block->page.oldest_modification = lsn;
352
#ifdef UNIV_DEBUG_VALGRIND
354
ulint zip_size = buf_block_get_zip_size(block);
356
if (UNIV_UNLIKELY(zip_size)) {
357
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
359
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
362
#endif /* UNIV_DEBUG_VALGRIND */
364
#ifdef UNIV_DEBUG_VALGRIND
366
ulint zip_size = buf_block_get_zip_size(block);
368
if (UNIV_UNLIKELY(zip_size)) {
369
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
371
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
374
#endif /* UNIV_DEBUG_VALGRIND */
378
/* For the most part when this function is called the flush_rbt
379
should not be NULL. In a very rare boundary case it is possible
380
that the flush_rbt has already been freed by the recovery thread
381
before the last page was hooked up in the flush_list by the
382
io-handler thread. In that case we'll just do a simple
383
linear search in the else block. */
384
if (buf_pool->flush_rbt) {
386
prev_b = buf_flush_insert_in_flush_rbt(&block->page);
390
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
392
while (b && b->oldest_modification
393
> block->page.oldest_modification) {
394
ut_ad(b->in_flush_list);
396
b = UT_LIST_GET_NEXT(list, b);
139
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
141
while (b && b->oldest_modification > block->page.oldest_modification) {
142
ut_ad(b->in_flush_list);
144
b = UT_LIST_GET_NEXT(list, b);
400
147
if (prev_b == NULL) {
526
/* If the flush_rbt is active then delete from there as well. */
527
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
528
buf_flush_delete_from_flush_rbt(bpage);
531
/* Must be done after we have removed it from the flush_rbt
532
because we assert on in_flush_list in comparison function. */
533
ut_d(bpage->in_flush_list = FALSE);
535
262
bpage->oldest_modification = 0;
537
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
538
ut_a(buf_flush_validate_low(buf_pool));
539
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
541
buf_flush_list_mutex_exit(buf_pool);
544
/*******************************************************************//**
545
Relocates a buffer control block on the flush_list.
546
Note that it is assumed that the contents of bpage have already been
548
IMPORTANT: When this function is called bpage and dpage are not
549
exact copies of each other. For example, they both will have different
550
::state. Also the ::list pointers in dpage may be stale. We need to
551
use the current list node (bpage) to do the list manipulation because
552
the list pointers could have changed between the time that we copied
553
the contents of bpage to the dpage and the flush list manipulation
557
buf_flush_relocate_on_flush_list(
558
/*=============================*/
559
buf_page_t* bpage, /*!< in/out: control block being moved */
560
buf_page_t* dpage) /*!< in/out: destination block */
563
buf_page_t* prev_b = NULL;
564
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
566
ut_ad(buf_pool_mutex_own(buf_pool));
567
/* Must reside in the same buffer pool. */
568
ut_ad(buf_pool == buf_pool_from_bpage(dpage));
570
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
572
buf_flush_list_mutex_enter(buf_pool);
574
/* FIXME: At this point we have both buf_pool and flush_list
575
mutexes. Theoretically removal of a block from flush list is
576
only covered by flush_list mutex but currently we do
577
have buf_pool mutex in buf_flush_remove() therefore this block
578
is guaranteed to be in the flush list. We need to check if
579
this will work without the assumption of block removing code
580
having the buf_pool mutex. */
581
ut_ad(bpage->in_flush_list);
582
ut_ad(dpage->in_flush_list);
584
/* If recovery is active we must swap the control blocks in
585
the flush_rbt as well. */
586
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
587
buf_flush_delete_from_flush_rbt(bpage);
588
prev_b = buf_flush_insert_in_flush_rbt(dpage);
591
/* Must be done after we have removed it from the flush_rbt
592
because we assert on in_flush_list in comparison function. */
593
ut_d(bpage->in_flush_list = FALSE);
595
prev = UT_LIST_GET_PREV(list, bpage);
596
UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
599
ut_ad(prev->in_flush_list);
600
UT_LIST_INSERT_AFTER(
602
buf_pool->flush_list,
607
buf_pool->flush_list,
611
/* Just an extra check. Previous in flush_list
612
should be the same control block as in flush_rbt. */
613
ut_a(!buf_pool->flush_rbt || prev_b == prev);
615
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
616
ut_a(buf_flush_validate_low(buf_pool));
617
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
619
buf_flush_list_mutex_exit(buf_pool);
264
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
265
ut_ad(ut_list_node_313->in_flush_list)));
622
268
/********************************************************************//**
1276
900
/*====================*/
1277
901
ulint space, /*!< in: space id */
1278
902
ulint offset, /*!< in: page offset */
1279
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
903
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or
1280
904
BUF_FLUSH_LIST */
1281
ulint n_flushed, /*!< in: number of pages
1282
flushed so far in this batch */
1283
ulint n_to_flush) /*!< in: maximum number of pages
1284
we are allowed to flush */
1290
buf_pool_t* buf_pool = buf_pool_get(space, offset);
1292
911
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1294
913
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
1295
/* If there is little space, it is better not to flush
1296
any block except from the end of the LRU list */
914
/* If there is little space, it is better not to flush any
915
block except from the end of the LRU list */
1299
918
high = offset + 1;
1301
/* When flushed, dirty blocks are searched in
1302
neighborhoods of this size, and flushed along with the
920
/* When flushed, dirty blocks are searched in neighborhoods of
921
this size, and flushed along with the original page. */
1305
ulint buf_flush_area;
1307
buf_flush_area = ut_min(
1308
BUF_READ_AHEAD_AREA(buf_pool),
1309
buf_pool->curr_size / 16);
923
ulint buf_flush_area = ut_min(BUF_READ_AHEAD_AREA,
924
buf_pool->curr_size / 16);
1311
926
low = (offset / buf_flush_area) * buf_flush_area;
1312
927
high = (offset / buf_flush_area + 1) * buf_flush_area;
1365
959
if (buf_flush_ready_for_flush(bpage, flush_type)
1366
960
&& (i == offset || !bpage->buf_fix_count)) {
1367
961
/* We only try to flush those
1368
neighbors != offset where the buf fix
1369
count is zero, as we then know that we
1370
probably can latch the page without a
1371
semaphore wait. Semaphore waits are
1372
expensive because we must flush the
1373
doublewrite buffer before we start
962
neighbors != offset where the buf fix count is
963
zero, as we then know that we probably can
964
latch the page without a semaphore wait.
965
Semaphore waits are expensive because we must
966
flush the doublewrite buffer before we start
1376
buf_flush_page(buf_pool, bpage, flush_type);
969
buf_flush_page(bpage, flush_type);
1377
970
ut_ad(!mutex_own(block_mutex));
1378
ut_ad(!buf_pool_mutex_own(buf_pool));
973
buf_pool_mutex_enter();
1382
975
mutex_exit(block_mutex);
1385
buf_pool_mutex_exit(buf_pool);
1391
/********************************************************************//**
1392
Check if the block is modified and ready for flushing. If the the block
1393
is ready to flush then flush the page and try o flush its neighbors.
1395
@return TRUE if buf_pool mutex was not released during this function.
1396
This does not guarantee that some pages were written as well.
1397
Number of pages written are incremented to the count. */
1400
buf_flush_page_and_try_neighbors(
1401
/*=============================*/
1402
buf_page_t* bpage, /*!< in: buffer control block,
1404
buf_page_in_file(bpage) */
1405
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU
1406
or BUF_FLUSH_LIST */
1407
ulint n_to_flush, /*!< in: number of pages to
1409
ulint* count) /*!< in/out: number of pages
1412
mutex_t* block_mutex;
1413
ibool flushed = FALSE;
1415
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1416
#endif /* UNIV_DEBUG */
1418
ut_ad(buf_pool_mutex_own(buf_pool));
1420
block_mutex = buf_page_get_mutex(bpage);
1421
mutex_enter(block_mutex);
1423
ut_a(buf_page_in_file(bpage));
1425
if (buf_flush_ready_for_flush(bpage, flush_type)) {
1428
buf_pool_t* buf_pool;
1430
buf_pool = buf_pool_from_bpage(bpage);
1432
buf_pool_mutex_exit(buf_pool);
1434
/* These fields are protected by both the
1435
buffer pool mutex and block mutex. */
1436
space = buf_page_get_space(bpage);
1437
offset = buf_page_get_page_no(bpage);
1439
mutex_exit(block_mutex);
1441
/* Try to flush also all the neighbors */
1442
*count += buf_flush_try_neighbors(space,
1448
buf_pool_mutex_enter(buf_pool);
1451
mutex_exit(block_mutex);
1454
ut_ad(buf_pool_mutex_own(buf_pool));
1459
/*******************************************************************//**
1460
This utility flushes dirty blocks from the end of the LRU list.
1461
In the case of an LRU flush the calling thread may own latches to
1462
pages: to avoid deadlocks, this function must be written so that it
1463
cannot end up waiting for these latches!
1464
@return number of blocks for which the write request was queued. */
1467
buf_flush_LRU_list_batch(
1468
/*=====================*/
1469
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1470
ulint max) /*!< in: max of blocks to flush */
1475
ut_ad(buf_pool_mutex_own(buf_pool));
1478
/* Start from the end of the list looking for a
1479
suitable block to be flushed. */
1480
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1482
/* Iterate backwards over the flush list till we find
1483
a page that isn't ready for flushing. */
1484
while (bpage != NULL
1485
&& !buf_flush_page_and_try_neighbors(
1486
bpage, BUF_FLUSH_LRU, max, &count)) {
1488
bpage = UT_LIST_GET_PREV(LRU, bpage);
1490
} while (bpage != NULL && count < max);
1492
/* We keep track of all flushes happening as part of LRU
1493
flush. When estimating the desired rate at which flush_list
1494
should be flushed, we factor in this value. */
1495
buf_lru_flush_page_count += count;
1497
ut_ad(buf_pool_mutex_own(buf_pool));
1502
/*******************************************************************//**
1503
This utility flushes dirty blocks from the end of the flush_list.
1504
the calling thread is not allowed to own any latches on pages!
1505
@return number of blocks for which the write request was queued;
1506
ULINT_UNDEFINED if there was a flush of the same type already
1510
buf_flush_flush_list_batch(
1511
/*=======================*/
1512
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1513
ulint min_n, /*!< in: wished minimum mumber
1514
of blocks flushed (it is not
1515
guaranteed that the actual
1516
number is that big, though) */
1517
ib_uint64_t lsn_limit) /*!< all blocks whose
1518
oldest_modification is smaller
1519
than this should be flushed (if
1520
their number does not exceed
1527
ut_ad(buf_pool_mutex_own(buf_pool));
1529
/* If we have flushed enough, leave the loop */
1531
/* Start from the end of the list looking for a suitable
1532
block to be flushed. */
1534
buf_flush_list_mutex_enter(buf_pool);
1536
/* We use len here because theoretically insertions can
1537
happen in the flush_list below while we are traversing
1538
it for a suitable candidate for flushing. We'd like to
1539
set a limit on how farther we are willing to traverse
1541
len = UT_LIST_GET_LEN(buf_pool->flush_list);
1542
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1545
ut_a(bpage->oldest_modification > 0);
1548
if (!bpage || bpage->oldest_modification >= lsn_limit) {
1550
/* We have flushed enough */
1551
buf_flush_list_mutex_exit(buf_pool);
1555
ut_a(bpage->oldest_modification > 0);
1557
ut_ad(bpage->in_flush_list);
1559
buf_flush_list_mutex_exit(buf_pool);
1561
/* The list may change during the flushing and we cannot
1562
safely preserve within this function a pointer to a
1563
block in the list! */
1564
while (bpage != NULL
1566
&& !buf_flush_page_and_try_neighbors(
1567
bpage, BUF_FLUSH_LIST, min_n, &count)) {
1569
buf_flush_list_mutex_enter(buf_pool);
1571
/* If we are here that means that buf_pool->mutex
1572
was not released in buf_flush_page_and_try_neighbors()
1573
above and this guarantees that bpage didn't get
1574
relocated since we released the flush_list
1575
mutex above. There is a chance, however, that
1576
the bpage got removed from flush_list (not
1577
currently possible because flush_list_remove()
1578
also obtains buf_pool mutex but that may change
1579
in future). To avoid this scenario we check
1580
the oldest_modification and if it is zero
1581
we start all over again. */
1582
if (bpage->oldest_modification == 0) {
1583
buf_flush_list_mutex_exit(buf_pool);
1587
bpage = UT_LIST_GET_PREV(list, bpage);
1589
ut_ad(!bpage || bpage->in_flush_list);
1591
buf_flush_list_mutex_exit(buf_pool);
1596
} while (count < min_n && bpage != NULL && len > 0);
1598
ut_ad(buf_pool_mutex_own(buf_pool));
980
buf_pool_mutex_exit();
1620
1001
ulint min_n, /*!< in: wished minimum mumber of blocks
1621
1002
flushed (it is not guaranteed that the
1622
1003
actual number is that big, though) */
1623
ib_uint64_t lsn_limit) /*!< in: in the case of BUF_FLUSH_LIST
1624
all blocks whose oldest_modification is
1004
ib_uint64_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all
1005
blocks whose oldest_modification is
1625
1006
smaller than this should be flushed
1626
1007
(if their number does not exceed
1627
1008
min_n), otherwise ignored */
1011
ulint page_count = 0;
1012
ulint old_page_count;
1631
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1016
ut_ad((flush_type == BUF_FLUSH_LRU)
1017
|| (flush_type == BUF_FLUSH_LIST));
1632
1018
#ifdef UNIV_SYNC_DEBUG
1633
1019
ut_ad((flush_type != BUF_FLUSH_LIST)
1634
1020
|| sync_thread_levels_empty_gen(TRUE));
1635
1021
#endif /* UNIV_SYNC_DEBUG */
1637
buf_pool_mutex_enter(buf_pool);
1639
/* Note: The buffer pool mutex is released and reacquired within
1640
the flush functions. */
1641
switch(flush_type) {
1643
count = buf_flush_LRU_list_batch(buf_pool, min_n);
1645
case BUF_FLUSH_LIST:
1646
count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
1652
buf_pool_mutex_exit(buf_pool);
1654
buf_flush_buffered_writes();
1657
if (buf_debug_prints && count > 0) {
1658
fprintf(stderr, flush_type == BUF_FLUSH_LRU
1659
? "Flushed %lu pages in LRU flush\n"
1660
: "Flushed %lu pages in flush list flush\n",
1663
#endif /* UNIV_DEBUG */
1665
srv_buf_pool_flushed += count;
1670
/******************************************************************//**
1671
Gather the aggregated stats for both flush list and LRU list flushing */
1676
enum buf_flush flush_type, /*!< in: type of flush */
1677
ulint page_count) /*!< in: number of pages flushed */
1679
buf_flush_buffered_writes();
1681
ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1022
buf_pool_mutex_enter();
1024
if ((buf_pool->n_flush[flush_type] > 0)
1025
|| (buf_pool->init_flush[flush_type] == TRUE)) {
1027
/* There is already a flush batch of the same type running */
1029
buf_pool_mutex_exit();
1031
return(ULINT_UNDEFINED);
1034
buf_pool->init_flush[flush_type] = TRUE;
1036
bool done_with_loop= false;
1037
for (;done_with_loop != true;) {
1039
/* If we have flushed enough, leave the loop */
1040
if (page_count >= min_n) {
1045
/* Start from the end of the list looking for a suitable
1046
block to be flushed. */
1048
if (flush_type == BUF_FLUSH_LRU) {
1049
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1051
ut_ad(flush_type == BUF_FLUSH_LIST);
1053
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1055
|| bpage->oldest_modification >= lsn_limit) {
1056
/* We have flushed enough */
1060
ut_ad(bpage->in_flush_list);
1063
/* Note that after finding a single flushable page, we try to
1064
flush also all its neighbors, and after that start from the
1065
END of the LRU list or flush list again: the list may change
1066
during the flushing and we cannot safely preserve within this
1067
function a pointer to a block in the list! */
1070
mutex_t*block_mutex = buf_page_get_mutex(bpage);
1073
ut_a(buf_page_in_file(bpage));
1075
mutex_enter(block_mutex);
1076
ready = buf_flush_ready_for_flush(bpage, flush_type);
1077
mutex_exit(block_mutex);
1080
space = buf_page_get_space(bpage);
1081
offset = buf_page_get_page_no(bpage);
1083
buf_pool_mutex_exit();
1085
old_page_count = page_count;
1087
/* Try to flush also all the neighbors */
1088
page_count += buf_flush_try_neighbors(
1089
space, offset, flush_type);
1091
"Flush type %lu, page no %lu, neighb %lu\n",
1093
page_count - old_page_count); */
1095
buf_pool_mutex_enter();
1098
} else if (flush_type == BUF_FLUSH_LRU) {
1099
bpage = UT_LIST_GET_PREV(LRU, bpage);
1101
ut_ad(flush_type == BUF_FLUSH_LIST);
1103
bpage = UT_LIST_GET_PREV(list, bpage);
1104
ut_ad(!bpage || bpage->in_flush_list);
1106
} while (bpage != NULL);
1108
/* If we could not find anything to flush, leave the loop */
1110
done_with_loop= true;
1114
buf_pool->init_flush[flush_type] = FALSE;
1116
if (buf_pool->n_flush[flush_type] == 0) {
1118
/* The running flush batch has ended */
1120
os_event_set(buf_pool->no_flush[flush_type]);
1123
buf_pool_mutex_exit();
1125
buf_flush_buffered_writes();
1683
1127
#ifdef UNIV_DEBUG
1684
1128
if (buf_debug_prints && page_count > 0) {
1129
ut_a(flush_type == BUF_FLUSH_LRU
1130
|| flush_type == BUF_FLUSH_LIST);
1685
1131
fprintf(stderr, flush_type == BUF_FLUSH_LRU
1686
1132
? "Flushed %lu pages in LRU flush\n"
1687
1133
: "Flushed %lu pages in flush list flush\n",
1692
1138
srv_buf_pool_flushed += page_count;
1140
/* We keep track of all flushes happening as part of LRU
1141
flush. When estimating the desired rate at which flush_list
1142
should be flushed we factor in this value. */
1694
1143
if (flush_type == BUF_FLUSH_LRU) {
1695
/* We keep track of all flushes happening as part of LRU
1696
flush. When estimating the desired rate at which flush_list
1697
should be flushed we factor in this value. */
1698
1144
buf_lru_flush_page_count += page_count;
1702
/******************************************************************//**
1703
Start a buffer flush batch for LRU or flush list */
1708
buf_pool_t* buf_pool, /*!< buffer pool instance */
1709
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
1710
or BUF_FLUSH_LIST */
1712
buf_pool_mutex_enter(buf_pool);
1714
if (buf_pool->n_flush[flush_type] > 0
1715
|| buf_pool->init_flush[flush_type] == TRUE) {
1717
/* There is already a flush batch of the same type running */
1719
buf_pool_mutex_exit(buf_pool);
1724
buf_pool->init_flush[flush_type] = TRUE;
1726
buf_pool_mutex_exit(buf_pool);
1731
/******************************************************************//**
1732
End a buffer flush batch for LRU or flush list */
1737
buf_pool_t* buf_pool, /*!< buffer pool instance */
1738
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
1739
or BUF_FLUSH_LIST */
1741
buf_pool_mutex_enter(buf_pool);
1743
buf_pool->init_flush[flush_type] = FALSE;
1745
if (buf_pool->n_flush[flush_type] == 0) {
1747
/* The running flush batch has ended */
1749
os_event_set(buf_pool->no_flush[flush_type]);
1752
buf_pool_mutex_exit(buf_pool);
1755
1150
/******************************************************************//**
1759
1154
buf_flush_wait_batch_end(
1760
1155
/*=====================*/
1761
buf_pool_t* buf_pool, /*!< buffer pool instance */
1762
enum buf_flush type) /*!< in: BUF_FLUSH_LRU
1763
or BUF_FLUSH_LIST */
1765
ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
1767
if (buf_pool == NULL) {
1770
for (i = 0; i < srv_buf_pool_instances; ++i) {
1771
buf_pool_t* buf_pool;
1773
buf_pool = buf_pool_from_array(i);
1775
os_event_wait(buf_pool->no_flush[type]);
1778
os_event_wait(buf_pool->no_flush[type]);
1782
/*******************************************************************//**
1783
This utility flushes dirty blocks from the end of the LRU list.
1784
NOTE: The calling thread may own latches to pages: to avoid deadlocks,
1785
this function must be written so that it cannot end up waiting for these
1787
@return number of blocks for which the write request was queued;
1788
ULINT_UNDEFINED if there was a flush of the same type already running */
1793
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1794
ulint min_n) /*!< in: wished minimum mumber of blocks
1795
flushed (it is not guaranteed that the
1796
actual number is that big, though) */
1800
if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
1801
return(ULINT_UNDEFINED);
1804
page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
1806
buf_flush_end(buf_pool, BUF_FLUSH_LRU);
1808
buf_flush_common(BUF_FLUSH_LRU, page_count);
1813
/*******************************************************************//**
1814
This utility flushes dirty blocks from the end of the flush list of
1815
all buffer pool instances.
1816
NOTE: The calling thread is not allowed to own any latches on pages!
1817
@return number of blocks for which the write request was queued;
1818
ULINT_UNDEFINED if there was a flush of the same type already running */
1823
ulint min_n, /*!< in: wished minimum mumber of blocks
1824
flushed (it is not guaranteed that the
1825
actual number is that big, though) */
1826
ib_uint64_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all
1827
blocks whose oldest_modification is
1828
smaller than this should be flushed
1829
(if their number does not exceed
1830
min_n), otherwise ignored */
1833
ulint total_page_count = 0;
1834
ibool skipped = FALSE;
1836
if (min_n != ULINT_MAX) {
1837
/* Ensure that flushing is spread evenly amongst the
1838
buffer pool instances. When min_n is ULINT_MAX
1839
we need to flush everything up to the lsn limit
1840
so no limit here. */
1841
min_n = (min_n + srv_buf_pool_instances - 1)
1842
/ srv_buf_pool_instances;
1845
/* Flush to lsn_limit in all buffer pool instances */
1846
for (i = 0; i < srv_buf_pool_instances; i++) {
1847
buf_pool_t* buf_pool;
1848
ulint page_count = 0;
1850
buf_pool = buf_pool_from_array(i);
1852
if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
1853
/* We have two choices here. If lsn_limit was
1854
specified then skipping an instance of buffer
1855
pool means we cannot guarantee that all pages
1856
up to lsn_limit has been flushed. We can
1857
return right now with failure or we can try
1858
to flush remaining buffer pools up to the
1859
lsn_limit. We attempt to flush other buffer
1860
pools based on the assumption that it will
1861
help in the retry which will follow the
1868
page_count = buf_flush_batch(
1869
buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
1871
buf_flush_end(buf_pool, BUF_FLUSH_LIST);
1873
buf_flush_common(BUF_FLUSH_LIST, page_count);
1875
total_page_count += page_count;
1878
return(lsn_limit != IB_ULONGLONG_MAX && skipped
1879
? ULINT_UNDEFINED : total_page_count);
1156
enum buf_flush type) /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1158
ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
1160
os_event_wait(buf_pool->no_flush[type]);
1882
1163
/******************************************************************//**
1883
1164
Gives a recommendation of how many blocks should be flushed to establish
1884
1165
a big enough margin of replaceable blocks near the end of the LRU list
1941
1220
immediately, without waiting. */
1944
buf_flush_free_margin(
1945
/*==================*/
1946
buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
1223
buf_flush_free_margin(void)
1224
/*=======================*/
1948
1226
ulint n_to_flush;
1950
n_to_flush = buf_flush_LRU_recommendation(buf_pool);
1229
n_to_flush = buf_flush_LRU_recommendation();
1952
1231
if (n_to_flush > 0) {
1955
n_flushed = buf_flush_LRU(buf_pool, n_to_flush);
1232
n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
1957
1233
if (n_flushed == ULINT_UNDEFINED) {
1958
1234
/* There was an LRU type flush batch already running;
1959
1235
let us wait for it to end */
1961
buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
1237
buf_flush_wait_batch_end(BUF_FLUSH_LRU);
1966
/*********************************************************************//**
1967
Flushes pages from the end of all the LRU lists. */
1970
buf_flush_free_margins(void)
1971
/*========================*/
1975
for (i = 0; i < srv_buf_pool_instances; i++) {
1976
buf_pool_t* buf_pool;
1978
buf_pool = buf_pool_from_array(i);
1980
buf_flush_free_margin(buf_pool);
1984
1242
/*********************************************************************
1985
1243
Update the historical stats that we are collecting for flush rate
1986
1244
heuristics at the end of each interval.
2104
1356
@return TRUE if ok */
2107
buf_flush_validate_low(
2108
/*===================*/
2109
buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
1359
buf_flush_validate_low(void)
1360
/*========================*/
2112
const ib_rbt_node_t* rnode = NULL;
2114
ut_ad(buf_flush_list_mutex_own(buf_pool));
2116
1364
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2117
1365
ut_ad(ut_list_node_313->in_flush_list));
2119
1367
bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2121
/* If we are in recovery mode i.e.: flush_rbt != NULL
2122
then each block in the flush_list must also be present
2123
in the flush_rbt. */
2124
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2125
rnode = rbt_first(buf_pool->flush_rbt);
2128
1369
while (bpage != NULL) {
2129
1370
const ib_uint64_t om = bpage->oldest_modification;
2131
ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
2133
1371
ut_ad(bpage->in_flush_list);
2135
/* A page in flush_list can be in BUF_BLOCK_REMOVE_HASH
2136
state. This happens when a page is in the middle of
2137
being relocated. In that case the original descriptor
2138
can have this state and still be in the flush list
2139
waiting to acquire the flush_list_mutex to complete
2141
ut_a(buf_page_in_file(bpage)
2142
|| buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
1372
ut_a(buf_page_in_file(bpage));
2145
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2146
buf_page_t** prpage;
2149
prpage = rbt_value(buf_page_t*, rnode);
2152
ut_a(*prpage == bpage);
2153
rnode = rbt_next(buf_pool->flush_rbt, rnode);
2156
1375
bpage = UT_LIST_GET_NEXT(list, bpage);
2158
1377
ut_a(!bpage || om >= bpage->oldest_modification);
2161
/* By this time we must have exhausted the traversal of
2162
flush_rbt (if active) as well. */
2163
ut_a(rnode == NULL);