83
83
@return TRUE if ok */
86
buf_flush_validate_low(
87
/*===================*/
88
buf_pool_t* buf_pool); /*!< in: Buffer pool instance */
89
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
91
/******************************************************************//**
92
Insert a block in the flush_rbt and returns a pointer to its
93
predecessor or NULL if no predecessor. The ordering is maintained
94
on the basis of the <oldest_modification, space, offset> key.
95
@return pointer to the predecessor or NULL if no predecessor. */
98
buf_flush_insert_in_flush_rbt(
99
/*==========================*/
100
buf_page_t* bpage) /*!< in: bpage to be inserted. */
102
const ib_rbt_node_t* c_node;
103
const ib_rbt_node_t* p_node;
104
buf_page_t* prev = NULL;
105
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
107
ut_ad(buf_flush_list_mutex_own(buf_pool));
109
/* Insert this buffer into the rbt. */
110
c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
111
ut_a(c_node != NULL);
113
/* Get the predecessor. */
114
p_node = rbt_prev(buf_pool->flush_rbt, c_node);
116
if (p_node != NULL) {
118
value = rbt_value(buf_page_t*, p_node);
126
/*********************************************************//**
127
Delete a bpage from the flush_rbt. */
130
buf_flush_delete_from_flush_rbt(
131
/*============================*/
132
buf_page_t* bpage) /*!< in: bpage to be removed. */
136
#endif /* UNIV_DEBUG */
137
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
139
ut_ad(buf_flush_list_mutex_own(buf_pool));
143
#endif /* UNIV_DEBUG */
144
rbt_delete(buf_pool->flush_rbt, &bpage);
148
/*****************************************************************//**
149
Compare two modified blocks in the buffer pool. The key for comparison
151
key = <oldest_modification, space, offset>
152
This comparison is used to maintian ordering of blocks in the
154
Note that for the purpose of flush_rbt, we only need to order blocks
155
on the oldest_modification. The other two fields are used to uniquely
157
@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
162
const void* p1, /*!< in: block1 */
163
const void* p2) /*!< in: block2 */
166
const buf_page_t* b1 = *(const buf_page_t**) p1;
167
const buf_page_t* b2 = *(const buf_page_t**) p2;
169
buf_pool_t* buf_pool = buf_pool_from_bpage(b1);
170
#endif /* UNIV_DEBUG */
175
ut_ad(buf_flush_list_mutex_own(buf_pool));
177
ut_ad(b1->in_flush_list);
178
ut_ad(b2->in_flush_list);
180
if (b2->oldest_modification > b1->oldest_modification) {
182
} else if (b2->oldest_modification < b1->oldest_modification) {
186
/* If oldest_modification is same then decide on the space. */
187
ret = (int)(b2->space - b1->space);
189
/* Or else decide ordering on the offset field. */
190
return(ret ? ret : (int)(b2->offset - b1->offset));
193
/********************************************************************//**
194
Initialize the red-black tree to speed up insertions into the flush_list
195
during recovery process. Should be called at the start of recovery
196
process before any page has been read/written. */
199
buf_flush_init_flush_rbt(void)
200
/*==========================*/
204
for (i = 0; i < srv_buf_pool_instances; i++) {
205
buf_pool_t* buf_pool;
207
buf_pool = buf_pool_from_array(i);
209
buf_flush_list_mutex_enter(buf_pool);
211
/* Create red black tree for speedy insertions in flush list. */
212
buf_pool->flush_rbt = rbt_create(
213
sizeof(buf_page_t*), buf_flush_block_cmp);
215
buf_flush_list_mutex_exit(buf_pool);
219
/********************************************************************//**
220
Frees up the red-black tree. */
223
buf_flush_free_flush_rbt(void)
224
/*==========================*/
228
for (i = 0; i < srv_buf_pool_instances; i++) {
229
buf_pool_t* buf_pool;
231
buf_pool = buf_pool_from_array(i);
233
buf_flush_list_mutex_enter(buf_pool);
235
#ifdef UNIV_DEBUG_VALGRIND
237
ulint zip_size = buf_block_get_zip_size(block);
239
if (UNIV_UNLIKELY(zip_size)) {
240
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
242
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
245
#endif /* UNIV_DEBUG_VALGRIND */
246
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
247
ut_a(buf_flush_validate_low(buf_pool));
248
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
250
rbt_free(buf_pool->flush_rbt);
251
buf_pool->flush_rbt = NULL;
253
buf_flush_list_mutex_exit(buf_pool);
86
buf_flush_validate_low(void);
87
/*========================*/
88
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
257
90
/********************************************************************//**
258
91
Inserts a modified block into the flush list. */
261
94
buf_flush_insert_into_flush_list(
262
95
/*=============================*/
263
buf_pool_t* buf_pool, /*!< buffer pool instance */
264
buf_block_t* block, /*!< in/out: block which is modified */
265
ib_uint64_t lsn) /*!< in: oldest modification */
96
buf_block_t* block) /*!< in/out: block which is modified */
267
ut_ad(!buf_pool_mutex_own(buf_pool));
268
ut_ad(log_flush_order_mutex_own());
269
ut_ad(mutex_own(&block->mutex));
271
buf_flush_list_mutex_enter(buf_pool);
98
ut_ad(buf_pool_mutex_own());
273
99
ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
274
100
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
277
/* If we are in the recovery then we need to update the flush
278
red-black tree as well. */
279
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
280
buf_flush_list_mutex_exit(buf_pool);
281
buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
101
<= block->page.oldest_modification));
285
103
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
104
ut_ad(block->page.in_LRU_list);
105
ut_ad(block->page.in_page_hash);
106
ut_ad(!block->page.in_zip_hash);
286
107
ut_ad(!block->page.in_flush_list);
288
108
ut_d(block->page.in_flush_list = TRUE);
289
block->page.oldest_modification = lsn;
290
109
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
292
#ifdef UNIV_DEBUG_VALGRIND
294
ulint zip_size = buf_block_get_zip_size(block);
296
if (UNIV_UNLIKELY(zip_size)) {
297
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
299
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
302
#endif /* UNIV_DEBUG_VALGRIND */
303
111
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
304
ut_a(buf_flush_validate_low(buf_pool));
112
ut_a(buf_flush_validate_low());
305
113
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
307
buf_flush_list_mutex_exit(buf_pool);
310
116
/********************************************************************//**
316
122
buf_flush_insert_sorted_into_flush_list(
317
123
/*====================================*/
318
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
319
buf_block_t* block, /*!< in/out: block which is modified */
320
ib_uint64_t lsn) /*!< in: oldest modification */
124
buf_block_t* block) /*!< in/out: block which is modified */
322
126
buf_page_t* prev_b;
325
ut_ad(!buf_pool_mutex_own(buf_pool));
326
ut_ad(log_flush_order_mutex_own());
327
ut_ad(mutex_own(&block->mutex));
129
ut_ad(buf_pool_mutex_own());
328
130
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
330
buf_flush_list_mutex_enter(buf_pool);
332
/* The field in_LRU_list is protected by buf_pool_mutex, which
333
we are not holding. However, while a block is in the flush
334
list, it is dirty and cannot be discarded, not from the
335
page_hash or from the LRU list. At most, the uncompressed
336
page frame of a compressed block may be discarded or created
337
(copying the block->page to or from a buf_page_t that is
338
dynamically allocated from buf_buddy_alloc()). Because those
339
transitions hold block->mutex and the flush list mutex (via
340
buf_flush_relocate_on_flush_list()), there is no possibility
341
of a race condition in the assertions below. */
342
132
ut_ad(block->page.in_LRU_list);
343
133
ut_ad(block->page.in_page_hash);
344
/* buf_buddy_block_register() will take a block in the
345
BUF_BLOCK_MEMORY state, not a file page. */
346
134
ut_ad(!block->page.in_zip_hash);
348
135
ut_ad(!block->page.in_flush_list);
349
136
ut_d(block->page.in_flush_list = TRUE);
350
block->page.oldest_modification = lsn;
352
#ifdef UNIV_DEBUG_VALGRIND
354
ulint zip_size = buf_block_get_zip_size(block);
356
if (UNIV_UNLIKELY(zip_size)) {
357
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
359
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
362
#endif /* UNIV_DEBUG_VALGRIND */
364
#ifdef UNIV_DEBUG_VALGRIND
366
ulint zip_size = buf_block_get_zip_size(block);
368
if (UNIV_UNLIKELY(zip_size)) {
369
UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
371
UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
374
#endif /* UNIV_DEBUG_VALGRIND */
378
/* For the most part when this function is called the flush_rbt
379
should not be NULL. In a very rare boundary case it is possible
380
that the flush_rbt has already been freed by the recovery thread
381
before the last page was hooked up in the flush_list by the
382
io-handler thread. In that case we'll just do a simple
383
linear search in the else block. */
384
if (buf_pool->flush_rbt) {
386
prev_b = buf_flush_insert_in_flush_rbt(&block->page);
390
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
392
while (b && b->oldest_modification
393
> block->page.oldest_modification) {
394
ut_ad(b->in_flush_list);
396
b = UT_LIST_GET_NEXT(list, b);
139
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
141
while (b && b->oldest_modification > block->page.oldest_modification) {
142
ut_ad(b->in_flush_list);
144
b = UT_LIST_GET_NEXT(list, b);
400
147
if (prev_b == NULL) {
526
/* If the flush_rbt is active then delete from there as well. */
527
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
528
buf_flush_delete_from_flush_rbt(bpage);
531
/* Must be done after we have removed it from the flush_rbt
532
because we assert on in_flush_list in comparison function. */
533
ut_d(bpage->in_flush_list = FALSE);
535
262
bpage->oldest_modification = 0;
537
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
538
ut_a(buf_flush_validate_low(buf_pool));
539
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
541
buf_flush_list_mutex_exit(buf_pool);
544
/*******************************************************************//**
545
Relocates a buffer control block on the flush_list.
546
Note that it is assumed that the contents of bpage have already been
548
IMPORTANT: When this function is called bpage and dpage are not
549
exact copies of each other. For example, they both will have different
550
::state. Also the ::list pointers in dpage may be stale. We need to
551
use the current list node (bpage) to do the list manipulation because
552
the list pointers could have changed between the time that we copied
553
the contents of bpage to the dpage and the flush list manipulation
557
buf_flush_relocate_on_flush_list(
558
/*=============================*/
559
buf_page_t* bpage, /*!< in/out: control block being moved */
560
buf_page_t* dpage) /*!< in/out: destination block */
563
buf_page_t* prev_b = NULL;
564
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
566
ut_ad(buf_pool_mutex_own(buf_pool));
567
/* Must reside in the same buffer pool. */
568
ut_ad(buf_pool == buf_pool_from_bpage(dpage));
570
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
572
buf_flush_list_mutex_enter(buf_pool);
574
/* FIXME: At this point we have both buf_pool and flush_list
575
mutexes. Theoretically removal of a block from flush list is
576
only covered by flush_list mutex but currently we do
577
have buf_pool mutex in buf_flush_remove() therefore this block
578
is guaranteed to be in the flush list. We need to check if
579
this will work without the assumption of block removing code
580
having the buf_pool mutex. */
581
ut_ad(bpage->in_flush_list);
582
ut_ad(dpage->in_flush_list);
584
/* If recovery is active we must swap the control blocks in
585
the flush_rbt as well. */
586
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
587
buf_flush_delete_from_flush_rbt(bpage);
588
prev_b = buf_flush_insert_in_flush_rbt(dpage);
591
/* Must be done after we have removed it from the flush_rbt
592
because we assert on in_flush_list in comparison function. */
593
ut_d(bpage->in_flush_list = FALSE);
595
prev = UT_LIST_GET_PREV(list, bpage);
596
UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
599
ut_ad(prev->in_flush_list);
600
UT_LIST_INSERT_AFTER(
602
buf_pool->flush_list,
607
buf_pool->flush_list,
611
/* Just an extra check. Previous in flush_list
612
should be the same control block as in flush_rbt. */
613
ut_a(!buf_pool->flush_rbt || prev_b == prev);
615
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
616
ut_a(buf_flush_validate_low(buf_pool));
617
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
619
buf_flush_list_mutex_exit(buf_pool);
264
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
265
ut_ad(ut_list_node_313->in_flush_list)));
622
268
/********************************************************************//**
1276
910
/*====================*/
1277
911
ulint space, /*!< in: space id */
1278
912
ulint offset, /*!< in: page offset */
1279
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
913
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or
1280
914
BUF_FLUSH_LIST */
1281
ulint n_flushed, /*!< in: number of pages
1282
flushed so far in this batch */
1283
ulint n_to_flush) /*!< in: maximum number of pages
1284
we are allowed to flush */
1290
buf_pool_t* buf_pool = buf_pool_get(space, offset);
1292
921
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1294
923
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
1295
/* If there is little space, it is better not to flush
1296
any block except from the end of the LRU list */
924
/* If there is little space, it is better not to flush any
925
block except from the end of the LRU list */
1299
928
high = offset + 1;
1301
/* When flushed, dirty blocks are searched in
1302
neighborhoods of this size, and flushed along with the
930
/* When flushed, dirty blocks are searched in neighborhoods of
931
this size, and flushed along with the original page. */
1305
ulint buf_flush_area;
1307
buf_flush_area = ut_min(
1308
BUF_READ_AHEAD_AREA(buf_pool),
1309
buf_pool->curr_size / 16);
933
ulint buf_flush_area = ut_min(BUF_READ_AHEAD_AREA,
934
buf_pool->curr_size / 16);
1311
936
low = (offset / buf_flush_area) * buf_flush_area;
1312
937
high = (offset / buf_flush_area + 1) * buf_flush_area;
1365
969
if (buf_flush_ready_for_flush(bpage, flush_type)
1366
970
&& (i == offset || !bpage->buf_fix_count)) {
1367
971
/* We only try to flush those
1368
neighbors != offset where the buf fix
1369
count is zero, as we then know that we
1370
probably can latch the page without a
1371
semaphore wait. Semaphore waits are
1372
expensive because we must flush the
1373
doublewrite buffer before we start
972
neighbors != offset where the buf fix count is
973
zero, as we then know that we probably can
974
latch the page without a semaphore wait.
975
Semaphore waits are expensive because we must
976
flush the doublewrite buffer before we start
1376
buf_flush_page(buf_pool, bpage, flush_type);
979
buf_flush_page(bpage, flush_type);
1377
980
ut_ad(!mutex_own(block_mutex));
1378
ut_ad(!buf_pool_mutex_own(buf_pool));
983
buf_pool_mutex_enter();
1382
985
mutex_exit(block_mutex);
1385
buf_pool_mutex_exit(buf_pool);
1391
/********************************************************************//**
1392
Check if the block is modified and ready for flushing. If the the block
1393
is ready to flush then flush the page and try o flush its neighbors.
1395
@return TRUE if buf_pool mutex was not released during this function.
1396
This does not guarantee that some pages were written as well.
1397
Number of pages written are incremented to the count. */
1400
buf_flush_page_and_try_neighbors(
1401
/*=============================*/
1402
buf_page_t* bpage, /*!< in: buffer control block,
1404
buf_page_in_file(bpage) */
1405
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU
1406
or BUF_FLUSH_LIST */
1407
ulint n_to_flush, /*!< in: number of pages to
1409
ulint* count) /*!< in/out: number of pages
1412
mutex_t* block_mutex;
1413
ibool flushed = FALSE;
1415
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1416
#endif /* UNIV_DEBUG */
1418
ut_ad(buf_pool_mutex_own(buf_pool));
1420
block_mutex = buf_page_get_mutex(bpage);
1421
mutex_enter(block_mutex);
1423
ut_a(buf_page_in_file(bpage));
1425
if (buf_flush_ready_for_flush(bpage, flush_type)) {
1428
buf_pool_t* buf_pool;
1430
buf_pool = buf_pool_from_bpage(bpage);
1432
buf_pool_mutex_exit(buf_pool);
1434
/* These fields are protected by both the
1435
buffer pool mutex and block mutex. */
1436
space = buf_page_get_space(bpage);
1437
offset = buf_page_get_page_no(bpage);
1439
mutex_exit(block_mutex);
1441
/* Try to flush also all the neighbors */
1442
*count += buf_flush_try_neighbors(space,
1448
buf_pool_mutex_enter(buf_pool);
1451
mutex_exit(block_mutex);
1454
ut_ad(buf_pool_mutex_own(buf_pool));
1459
/*******************************************************************//**
1460
This utility flushes dirty blocks from the end of the LRU list.
1461
In the case of an LRU flush the calling thread may own latches to
1462
pages: to avoid deadlocks, this function must be written so that it
1463
cannot end up waiting for these latches!
1464
@return number of blocks for which the write request was queued. */
1467
buf_flush_LRU_list_batch(
1468
/*=====================*/
1469
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1470
ulint max) /*!< in: max of blocks to flush */
1475
ut_ad(buf_pool_mutex_own(buf_pool));
1478
/* Start from the end of the list looking for a
1479
suitable block to be flushed. */
1480
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1482
/* Iterate backwards over the flush list till we find
1483
a page that isn't ready for flushing. */
1484
while (bpage != NULL
1485
&& !buf_flush_page_and_try_neighbors(
1486
bpage, BUF_FLUSH_LRU, max, &count)) {
1488
bpage = UT_LIST_GET_PREV(LRU, bpage);
1490
} while (bpage != NULL && count < max);
1492
/* We keep track of all flushes happening as part of LRU
1493
flush. When estimating the desired rate at which flush_list
1494
should be flushed, we factor in this value. */
1495
buf_lru_flush_page_count += count;
1497
ut_ad(buf_pool_mutex_own(buf_pool));
1502
/*******************************************************************//**
1503
This utility flushes dirty blocks from the end of the flush_list.
1504
the calling thread is not allowed to own any latches on pages!
1505
@return number of blocks for which the write request was queued;
1506
ULINT_UNDEFINED if there was a flush of the same type already
1510
buf_flush_flush_list_batch(
1511
/*=======================*/
1512
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1513
ulint min_n, /*!< in: wished minimum mumber
1514
of blocks flushed (it is not
1515
guaranteed that the actual
1516
number is that big, though) */
1517
ib_uint64_t lsn_limit) /*!< all blocks whose
1518
oldest_modification is smaller
1519
than this should be flushed (if
1520
their number does not exceed
1527
ut_ad(buf_pool_mutex_own(buf_pool));
1529
/* If we have flushed enough, leave the loop */
1531
/* Start from the end of the list looking for a suitable
1532
block to be flushed. */
1534
buf_flush_list_mutex_enter(buf_pool);
1536
/* We use len here because theoretically insertions can
1537
happen in the flush_list below while we are traversing
1538
it for a suitable candidate for flushing. We'd like to
1539
set a limit on how farther we are willing to traverse
1541
len = UT_LIST_GET_LEN(buf_pool->flush_list);
1542
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1545
ut_a(bpage->oldest_modification > 0);
1548
if (!bpage || bpage->oldest_modification >= lsn_limit) {
1550
/* We have flushed enough */
1551
buf_flush_list_mutex_exit(buf_pool);
1555
ut_a(bpage->oldest_modification > 0);
1557
ut_ad(bpage->in_flush_list);
1559
buf_flush_list_mutex_exit(buf_pool);
1561
/* The list may change during the flushing and we cannot
1562
safely preserve within this function a pointer to a
1563
block in the list! */
1564
while (bpage != NULL
1566
&& !buf_flush_page_and_try_neighbors(
1567
bpage, BUF_FLUSH_LIST, min_n, &count)) {
1569
buf_flush_list_mutex_enter(buf_pool);
1571
/* If we are here that means that buf_pool->mutex
1572
was not released in buf_flush_page_and_try_neighbors()
1573
above and this guarantees that bpage didn't get
1574
relocated since we released the flush_list
1575
mutex above. There is a chance, however, that
1576
the bpage got removed from flush_list (not
1577
currently possible because flush_list_remove()
1578
also obtains buf_pool mutex but that may change
1579
in future). To avoid this scenario we check
1580
the oldest_modification and if it is zero
1581
we start all over again. */
1582
if (bpage->oldest_modification == 0) {
1583
buf_flush_list_mutex_exit(buf_pool);
1587
bpage = UT_LIST_GET_PREV(list, bpage);
1589
ut_ad(!bpage || bpage->in_flush_list);
1591
buf_flush_list_mutex_exit(buf_pool);
1596
} while (count < min_n && bpage != NULL && len > 0);
1598
ut_ad(buf_pool_mutex_own(buf_pool));
990
buf_pool_mutex_exit();
1620
1011
ulint min_n, /*!< in: wished minimum mumber of blocks
1621
1012
flushed (it is not guaranteed that the
1622
1013
actual number is that big, though) */
1623
ib_uint64_t lsn_limit) /*!< in: in the case of BUF_FLUSH_LIST
1624
all blocks whose oldest_modification is
1014
ib_uint64_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all
1015
blocks whose oldest_modification is
1625
1016
smaller than this should be flushed
1626
1017
(if their number does not exceed
1627
1018
min_n), otherwise ignored */
1021
ulint page_count = 0;
1022
ulint old_page_count;
1631
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1026
ut_ad((flush_type == BUF_FLUSH_LRU)
1027
|| (flush_type == BUF_FLUSH_LIST));
1632
1028
#ifdef UNIV_SYNC_DEBUG
1633
1029
ut_ad((flush_type != BUF_FLUSH_LIST)
1634
1030
|| sync_thread_levels_empty_gen(TRUE));
1635
1031
#endif /* UNIV_SYNC_DEBUG */
1637
buf_pool_mutex_enter(buf_pool);
1639
/* Note: The buffer pool mutex is released and reacquired within
1640
the flush functions. */
1641
switch(flush_type) {
1643
count = buf_flush_LRU_list_batch(buf_pool, min_n);
1645
case BUF_FLUSH_LIST:
1646
count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
1652
buf_pool_mutex_exit(buf_pool);
1654
buf_flush_buffered_writes();
1657
if (buf_debug_prints && count > 0) {
1658
fprintf(stderr, flush_type == BUF_FLUSH_LRU
1659
? "Flushed %lu pages in LRU flush\n"
1660
: "Flushed %lu pages in flush list flush\n",
1663
#endif /* UNIV_DEBUG */
1665
srv_buf_pool_flushed += count;
1670
/******************************************************************//**
1671
Gather the aggregated stats for both flush list and LRU list flushing */
1676
enum buf_flush flush_type, /*!< in: type of flush */
1677
ulint page_count) /*!< in: number of pages flushed */
1679
buf_flush_buffered_writes();
1681
ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1032
buf_pool_mutex_enter();
1034
if ((buf_pool->n_flush[flush_type] > 0)
1035
|| (buf_pool->init_flush[flush_type] == TRUE)) {
1037
/* There is already a flush batch of the same type running */
1039
buf_pool_mutex_exit();
1041
return(ULINT_UNDEFINED);
1044
buf_pool->init_flush[flush_type] = TRUE;
1046
bool done_with_loop= false;
1047
for (;done_with_loop != true;) {
1049
/* If we have flushed enough, leave the loop */
1050
if (page_count >= min_n) {
1055
/* Start from the end of the list looking for a suitable
1056
block to be flushed. */
1058
if (flush_type == BUF_FLUSH_LRU) {
1059
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1061
ut_ad(flush_type == BUF_FLUSH_LIST);
1063
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1065
|| bpage->oldest_modification >= lsn_limit) {
1066
/* We have flushed enough */
1070
ut_ad(bpage->in_flush_list);
1073
/* Note that after finding a single flushable page, we try to
1074
flush also all its neighbors, and after that start from the
1075
END of the LRU list or flush list again: the list may change
1076
during the flushing and we cannot safely preserve within this
1077
function a pointer to a block in the list! */
1080
mutex_t*block_mutex = buf_page_get_mutex(bpage);
1083
ut_a(buf_page_in_file(bpage));
1085
mutex_enter(block_mutex);
1086
ready = buf_flush_ready_for_flush(bpage, flush_type);
1087
mutex_exit(block_mutex);
1090
space = buf_page_get_space(bpage);
1091
offset = buf_page_get_page_no(bpage);
1093
buf_pool_mutex_exit();
1095
old_page_count = page_count;
1097
/* Try to flush also all the neighbors */
1098
page_count += buf_flush_try_neighbors(
1099
space, offset, flush_type);
1101
"Flush type %lu, page no %lu, neighb %lu\n",
1103
page_count - old_page_count); */
1105
buf_pool_mutex_enter();
1108
} else if (flush_type == BUF_FLUSH_LRU) {
1109
bpage = UT_LIST_GET_PREV(LRU, bpage);
1111
ut_ad(flush_type == BUF_FLUSH_LIST);
1113
bpage = UT_LIST_GET_PREV(list, bpage);
1114
ut_ad(!bpage || bpage->in_flush_list);
1116
} while (bpage != NULL);
1118
/* If we could not find anything to flush, leave the loop */
1120
done_with_loop= true;
1124
buf_pool->init_flush[flush_type] = FALSE;
1126
if (buf_pool->n_flush[flush_type] == 0) {
1128
/* The running flush batch has ended */
1130
os_event_set(buf_pool->no_flush[flush_type]);
1133
buf_pool_mutex_exit();
1135
buf_flush_buffered_writes();
1683
1137
#ifdef UNIV_DEBUG
1684
1138
if (buf_debug_prints && page_count > 0) {
1139
ut_a(flush_type == BUF_FLUSH_LRU
1140
|| flush_type == BUF_FLUSH_LIST);
1685
1141
fprintf(stderr, flush_type == BUF_FLUSH_LRU
1686
1142
? "Flushed %lu pages in LRU flush\n"
1687
1143
: "Flushed %lu pages in flush list flush\n",
1692
1148
srv_buf_pool_flushed += page_count;
1150
/* We keep track of all flushes happening as part of LRU
1151
flush. When estimating the desired rate at which flush_list
1152
should be flushed we factor in this value. */
1694
1153
if (flush_type == BUF_FLUSH_LRU) {
1695
/* We keep track of all flushes happening as part of LRU
1696
flush. When estimating the desired rate at which flush_list
1697
should be flushed we factor in this value. */
1698
1154
buf_lru_flush_page_count += page_count;
1702
/******************************************************************//**
1703
Start a buffer flush batch for LRU or flush list */
1708
buf_pool_t* buf_pool, /*!< buffer pool instance */
1709
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
1710
or BUF_FLUSH_LIST */
1712
buf_pool_mutex_enter(buf_pool);
1714
if (buf_pool->n_flush[flush_type] > 0
1715
|| buf_pool->init_flush[flush_type] == TRUE) {
1717
/* There is already a flush batch of the same type running */
1719
buf_pool_mutex_exit(buf_pool);
1724
buf_pool->init_flush[flush_type] = TRUE;
1726
buf_pool_mutex_exit(buf_pool);
1731
/******************************************************************//**
1732
End a buffer flush batch for LRU or flush list */
1737
buf_pool_t* buf_pool, /*!< buffer pool instance */
1738
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
1739
or BUF_FLUSH_LIST */
1741
buf_pool_mutex_enter(buf_pool);
1743
buf_pool->init_flush[flush_type] = FALSE;
1745
if (buf_pool->n_flush[flush_type] == 0) {
1747
/* The running flush batch has ended */
1749
os_event_set(buf_pool->no_flush[flush_type]);
1752
buf_pool_mutex_exit(buf_pool);
1755
1160
/******************************************************************//**
1759
1164
buf_flush_wait_batch_end(
1760
1165
/*=====================*/
1761
buf_pool_t* buf_pool, /*!< buffer pool instance */
1762
enum buf_flush type) /*!< in: BUF_FLUSH_LRU
1763
or BUF_FLUSH_LIST */
1765
ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
1767
if (buf_pool == NULL) {
1770
for (i = 0; i < srv_buf_pool_instances; ++i) {
1771
buf_pool_t* buf_pool;
1773
buf_pool = buf_pool_from_array(i);
1775
os_event_wait(buf_pool->no_flush[type]);
1778
os_event_wait(buf_pool->no_flush[type]);
1782
/*******************************************************************//**
1783
This utility flushes dirty blocks from the end of the LRU list.
1784
NOTE: The calling thread may own latches to pages: to avoid deadlocks,
1785
this function must be written so that it cannot end up waiting for these
1787
@return number of blocks for which the write request was queued;
1788
ULINT_UNDEFINED if there was a flush of the same type already running */
1793
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1794
ulint min_n) /*!< in: wished minimum mumber of blocks
1795
flushed (it is not guaranteed that the
1796
actual number is that big, though) */
1800
if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
1801
return(ULINT_UNDEFINED);
1804
page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
1806
buf_flush_end(buf_pool, BUF_FLUSH_LRU);
1808
buf_flush_common(BUF_FLUSH_LRU, page_count);
1813
/*******************************************************************//**
1814
This utility flushes dirty blocks from the end of the flush list of
1815
all buffer pool instances.
1816
NOTE: The calling thread is not allowed to own any latches on pages!
1817
@return number of blocks for which the write request was queued;
1818
ULINT_UNDEFINED if there was a flush of the same type already running */
1823
ulint min_n, /*!< in: wished minimum mumber of blocks
1824
flushed (it is not guaranteed that the
1825
actual number is that big, though) */
1826
ib_uint64_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all
1827
blocks whose oldest_modification is
1828
smaller than this should be flushed
1829
(if their number does not exceed
1830
min_n), otherwise ignored */
1833
ulint total_page_count = 0;
1834
ibool skipped = FALSE;
1836
if (min_n != ULINT_MAX) {
1837
/* Ensure that flushing is spread evenly amongst the
1838
buffer pool instances. When min_n is ULINT_MAX
1839
we need to flush everything up to the lsn limit
1840
so no limit here. */
1841
min_n = (min_n + srv_buf_pool_instances - 1)
1842
/ srv_buf_pool_instances;
1845
/* Flush to lsn_limit in all buffer pool instances */
1846
for (i = 0; i < srv_buf_pool_instances; i++) {
1847
buf_pool_t* buf_pool;
1848
ulint page_count = 0;
1850
buf_pool = buf_pool_from_array(i);
1852
if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
1853
/* We have two choices here. If lsn_limit was
1854
specified then skipping an instance of buffer
1855
pool means we cannot guarantee that all pages
1856
up to lsn_limit has been flushed. We can
1857
return right now with failure or we can try
1858
to flush remaining buffer pools up to the
1859
lsn_limit. We attempt to flush other buffer
1860
pools based on the assumption that it will
1861
help in the retry which will follow the
1868
page_count = buf_flush_batch(
1869
buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
1871
buf_flush_end(buf_pool, BUF_FLUSH_LIST);
1873
buf_flush_common(BUF_FLUSH_LIST, page_count);
1875
total_page_count += page_count;
1878
return(lsn_limit != IB_ULONGLONG_MAX && skipped
1879
? ULINT_UNDEFINED : total_page_count);
1166
enum buf_flush type) /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1168
ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
1170
os_event_wait(buf_pool->no_flush[type]);
1882
1173
/******************************************************************//**
1883
1174
Gives a recommendation of how many blocks should be flushed to establish
1884
1175
a big enough margin of replaceable blocks near the end of the LRU list
1941
1230
immediately, without waiting. */
1944
buf_flush_free_margin(
1945
/*==================*/
1946
buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
1233
buf_flush_free_margin(void)
1234
/*=======================*/
1948
1236
ulint n_to_flush;
1950
n_to_flush = buf_flush_LRU_recommendation(buf_pool);
1239
n_to_flush = buf_flush_LRU_recommendation();
1952
1241
if (n_to_flush > 0) {
1955
n_flushed = buf_flush_LRU(buf_pool, n_to_flush);
1242
n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
1957
1243
if (n_flushed == ULINT_UNDEFINED) {
1958
1244
/* There was an LRU type flush batch already running;
1959
1245
let us wait for it to end */
1961
buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
1247
buf_flush_wait_batch_end(BUF_FLUSH_LRU);
1966
/*********************************************************************//**
1967
Flushes pages from the end of all the LRU lists. */
1970
buf_flush_free_margins(void)
1971
/*========================*/
1975
for (i = 0; i < srv_buf_pool_instances; i++) {
1976
buf_pool_t* buf_pool;
1978
buf_pool = buf_pool_from_array(i);
1980
buf_flush_free_margin(buf_pool);
1984
1252
/*********************************************************************
1985
1253
Update the historical stats that we are collecting for flush rate
1986
1254
heuristics at the end of each interval.
2104
1366
@return TRUE if ok */
2107
buf_flush_validate_low(
2108
/*===================*/
2109
buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
1369
buf_flush_validate_low(void)
1370
/*========================*/
2112
const ib_rbt_node_t* rnode = NULL;
2114
ut_ad(buf_flush_list_mutex_own(buf_pool));
2116
1374
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2117
1375
ut_ad(ut_list_node_313->in_flush_list));
2119
1377
bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2121
/* If we are in recovery mode i.e.: flush_rbt != NULL
2122
then each block in the flush_list must also be present
2123
in the flush_rbt. */
2124
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2125
rnode = rbt_first(buf_pool->flush_rbt);
2128
1379
while (bpage != NULL) {
2129
1380
const ib_uint64_t om = bpage->oldest_modification;
2131
ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
2133
1381
ut_ad(bpage->in_flush_list);
2135
/* A page in flush_list can be in BUF_BLOCK_REMOVE_HASH
2136
state. This happens when a page is in the middle of
2137
being relocated. In that case the original descriptor
2138
can have this state and still be in the flush list
2139
waiting to acquire the flush_list_mutex to complete
2141
ut_a(buf_page_in_file(bpage)
2142
|| buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
1382
ut_a(buf_page_in_file(bpage));
2145
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2146
buf_page_t** prpage;
2149
prpage = rbt_value(buf_page_t*, rnode);
2152
ut_a(*prpage == bpage);
2153
rnode = rbt_next(buf_pool->flush_rbt, rnode);
2156
1385
bpage = UT_LIST_GET_NEXT(list, bpage);
2158
1387
ut_a(!bpage || om >= bpage->oldest_modification);
2161
/* By this time we must have exhausted the traversal of
2162
flush_rbt (if active) as well. */
2163
ut_a(rnode == NULL);