~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/innobase/buf/buf0flu.c

  • Committer: Monty Taylor
  • Date: 2010-11-26 22:50:54 UTC
  • mfrom: (1953.1.6 build)
  • Revision ID: mordred@inaugust.com-20101126225054-sg90svw8579t5p3i
Stewart - InnoDB 1.1.1
Monty - Fixed some autoconf tests which were returning false positives.

Show diffs side-by-side

added added

removed removed

Lines of Context:
83
83
@return TRUE if ok */
84
84
static
85
85
ibool
86
 
buf_flush_validate_low(void);
87
 
/*========================*/
 
86
buf_flush_validate_low(
 
87
/*===================*/
 
88
        buf_pool_t*     buf_pool);      /*!< in: Buffer pool instance */
88
89
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
89
90
 
90
 
/********************************************************************//**
 
91
/******************************************************************//**
91
92
Insert a block in the flush_rbt and returns a pointer to its
92
93
predecessor or NULL if no predecessor. The ordering is maintained
93
94
on the basis of the <oldest_modification, space, offset> key.
94
 
@return pointer to the predecessor or NULL if no predecessor. */
 
95
@return pointer to the predecessor or NULL if no predecessor. */
95
96
static
96
97
buf_page_t*
97
98
buf_flush_insert_in_flush_rbt(
98
99
/*==========================*/
99
 
        buf_page_t*     bpage)          /*!< in: bpage to be inserted. */
 
100
        buf_page_t*     bpage)  /*!< in: bpage to be inserted. */
100
101
{
101
 
        buf_page_t*             prev = NULL;
102
102
        const ib_rbt_node_t*    c_node;
103
103
        const ib_rbt_node_t*    p_node;
 
104
        buf_page_t*             prev = NULL;
 
105
        buf_pool_t*             buf_pool = buf_pool_from_bpage(bpage);
104
106
 
105
 
        ut_ad(buf_pool_mutex_own());
 
107
        ut_ad(buf_flush_list_mutex_own(buf_pool));
106
108
 
107
109
        /* Insert this buffer into the rbt. */
108
110
        c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
119
121
        return(prev);
120
122
}
121
123
 
122
 
/********************************************************************//**
 
124
/*********************************************************//**
123
125
Delete a bpage from the flush_rbt. */
124
126
static
125
127
void
126
128
buf_flush_delete_from_flush_rbt(
127
129
/*============================*/
128
 
        buf_page_t*     bpage)          /*!< in: bpage to be removed. */
 
130
        buf_page_t*     bpage)  /*!< in: bpage to be removed. */
129
131
{
130
 
 
131
 
        ibool   ret = FALSE;
132
 
 
133
 
        ut_ad(buf_pool_mutex_own());
 
132
        ibool           ret = FALSE;
 
133
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
 
134
 
 
135
        ut_ad(buf_flush_list_mutex_own(buf_pool));
 
136
 
134
137
        ret = rbt_delete(buf_pool->flush_rbt, &bpage);
135
138
        ut_ad(ret);
136
139
}
137
140
 
138
 
/********************************************************************//**
 
141
/*****************************************************************//**
139
142
Compare two modified blocks in the buffer pool. The key for comparison
140
143
is:
141
144
key = <oldest_modification, space, offset>
144
147
Note that for the purpose of flush_rbt, we only need to order blocks
145
148
on the oldest_modification. The other two fields are used to uniquely
146
149
identify the blocks.
147
 
@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
 
150
@return  < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
148
151
static
149
152
int
150
153
buf_flush_block_cmp(
152
155
        const void*     p1,             /*!< in: block1 */
153
156
        const void*     p2)             /*!< in: block2 */
154
157
{
155
 
        int             ret;
156
 
        const buf_page_t* b1;
157
 
        const buf_page_t* b2;
158
 
 
159
 
        ut_ad(p1 != NULL);
160
 
        ut_ad(p2 != NULL);
161
 
 
162
 
        b1 = *(const buf_page_t**) p1;
163
 
        b2 = *(const buf_page_t**) p2;
 
158
        int                     ret;
 
159
        const buf_page_t*       b1 = *(const buf_page_t**) p1;
 
160
        const buf_page_t*       b2 = *(const buf_page_t**) p2;
 
161
#ifdef UNIV_DEBUG
 
162
        buf_pool_t*             buf_pool = buf_pool_from_bpage(b1);
 
163
#endif /* UNIV_DEBUG */
164
164
 
165
165
        ut_ad(b1 != NULL);
166
166
        ut_ad(b2 != NULL);
167
167
 
 
168
        ut_ad(buf_flush_list_mutex_own(buf_pool));
 
169
 
168
170
        ut_ad(b1->in_flush_list);
169
171
        ut_ad(b2->in_flush_list);
170
172
 
171
 
        if (b2->oldest_modification
172
 
            > b1->oldest_modification) {
 
173
        if (b2->oldest_modification > b1->oldest_modification) {
173
174
                return(1);
174
 
        }
175
 
 
176
 
        if (b2->oldest_modification
177
 
            < b1->oldest_modification) {
 
175
        } else if (b2->oldest_modification < b1->oldest_modification) {
178
176
                return(-1);
179
177
        }
180
178
 
194
192
buf_flush_init_flush_rbt(void)
195
193
/*==========================*/
196
194
{
197
 
        buf_pool_mutex_enter();
198
 
 
199
 
        /* Create red black tree for speedy insertions in flush list. */
200
 
        buf_pool->flush_rbt = rbt_create(sizeof(buf_page_t*),
201
 
                                         buf_flush_block_cmp);
202
 
        buf_pool_mutex_exit();
 
195
        ulint   i;
 
196
 
 
197
        for (i = 0; i < srv_buf_pool_instances; i++) {
 
198
                buf_pool_t*     buf_pool;
 
199
 
 
200
                buf_pool = buf_pool_from_array(i);
 
201
 
 
202
                buf_flush_list_mutex_enter(buf_pool);
 
203
 
 
204
                /* Create red black tree for speedy insertions in flush list. */
 
205
                buf_pool->flush_rbt = rbt_create(
 
206
                        sizeof(buf_page_t*), buf_flush_block_cmp);
 
207
 
 
208
                buf_flush_list_mutex_exit(buf_pool);
 
209
        }
203
210
}
204
211
 
205
212
/********************************************************************//**
209
216
buf_flush_free_flush_rbt(void)
210
217
/*==========================*/
211
218
{
212
 
        buf_pool_mutex_enter();
213
 
 
 
219
        ulint   i;
 
220
 
 
221
        for (i = 0; i < srv_buf_pool_instances; i++) {
 
222
                buf_pool_t*     buf_pool;
 
223
 
 
224
                buf_pool = buf_pool_from_array(i);
 
225
 
 
226
                buf_flush_list_mutex_enter(buf_pool);
 
227
 
 
228
#ifdef UNIV_DEBUG_VALGRIND
 
229
        {
 
230
                ulint   zip_size = buf_block_get_zip_size(block);
 
231
 
 
232
                if (UNIV_UNLIKELY(zip_size)) {
 
233
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
 
234
                } else {
 
235
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
 
236
                }
 
237
        }
 
238
#endif /* UNIV_DEBUG_VALGRIND */
214
239
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
215
 
        ut_a(buf_flush_validate_low());
 
240
                ut_a(buf_flush_validate_low(buf_pool));
216
241
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
217
242
 
218
 
        rbt_free(buf_pool->flush_rbt);
219
 
        buf_pool->flush_rbt = NULL;
 
243
                rbt_free(buf_pool->flush_rbt);
 
244
                buf_pool->flush_rbt = NULL;
220
245
 
221
 
        buf_pool_mutex_exit();
 
246
                buf_flush_list_mutex_exit(buf_pool);
 
247
        }
222
248
}
223
249
 
224
250
/********************************************************************//**
227
253
void
228
254
buf_flush_insert_into_flush_list(
229
255
/*=============================*/
230
 
        buf_block_t*    block)  /*!< in/out: block which is modified */
 
256
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
 
257
        buf_block_t*    block,          /*!< in/out: block which is modified */
 
258
        ib_uint64_t     lsn)            /*!< in: oldest modification */
231
259
{
232
 
        ut_ad(buf_pool_mutex_own());
 
260
        ut_ad(!buf_pool_mutex_own(buf_pool));
 
261
        ut_ad(log_flush_order_mutex_own());
 
262
        ut_ad(mutex_own(&block->mutex));
 
263
 
 
264
        buf_flush_list_mutex_enter(buf_pool);
 
265
 
233
266
        ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
234
267
              || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
235
 
                  <= block->page.oldest_modification));
 
268
                  <= lsn));
236
269
 
237
270
        /* If we are in the recovery then we need to update the flush
238
271
        red-black tree as well. */
239
272
        if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
240
 
                buf_flush_insert_sorted_into_flush_list(block);
 
273
                buf_flush_list_mutex_exit(buf_pool);
 
274
                buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
241
275
                return;
242
276
        }
243
277
 
244
278
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
245
 
        ut_ad(block->page.in_LRU_list);
246
 
        ut_ad(block->page.in_page_hash);
247
 
        ut_ad(!block->page.in_zip_hash);
248
279
        ut_ad(!block->page.in_flush_list);
 
280
 
249
281
        ut_d(block->page.in_flush_list = TRUE);
 
282
        block->page.oldest_modification = lsn;
250
283
        UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
251
284
 
 
285
#ifdef UNIV_DEBUG_VALGRIND
 
286
        {
 
287
                ulint   zip_size = buf_block_get_zip_size(block);
 
288
 
 
289
                if (UNIV_UNLIKELY(zip_size)) {
 
290
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
 
291
                } else {
 
292
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
 
293
                }
 
294
        }
 
295
#endif /* UNIV_DEBUG_VALGRIND */
252
296
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
253
 
        ut_a(buf_flush_validate_low());
 
297
        ut_a(buf_flush_validate_low(buf_pool));
254
298
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
299
 
 
300
        buf_flush_list_mutex_exit(buf_pool);
255
301
}
256
302
 
257
303
/********************************************************************//**
262
308
void
263
309
buf_flush_insert_sorted_into_flush_list(
264
310
/*====================================*/
265
 
        buf_block_t*    block)  /*!< in/out: block which is modified */
 
311
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
 
312
        buf_block_t*    block,          /*!< in/out: block which is modified */
 
313
        ib_uint64_t     lsn)            /*!< in: oldest modification */
266
314
{
267
315
        buf_page_t*     prev_b;
268
316
        buf_page_t*     b;
269
317
 
270
 
        ut_ad(buf_pool_mutex_own());
 
318
        ut_ad(!buf_pool_mutex_own(buf_pool));
 
319
        ut_ad(log_flush_order_mutex_own());
 
320
        ut_ad(mutex_own(&block->mutex));
271
321
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
272
322
 
 
323
        buf_flush_list_mutex_enter(buf_pool);
 
324
 
 
325
        /* The field in_LRU_list is protected by buf_pool_mutex, which
 
326
        we are not holding.  However, while a block is in the flush
 
327
        list, it is dirty and cannot be discarded, not from the
 
328
        page_hash or from the LRU list.  At most, the uncompressed
 
329
        page frame of a compressed block may be discarded or created
 
330
        (copying the block->page to or from a buf_page_t that is
 
331
        dynamically allocated from buf_buddy_alloc()).  Because those
 
332
        transitions hold block->mutex and the flush list mutex (via
 
333
        buf_flush_relocate_on_flush_list()), there is no possibility
 
334
        of a race condition in the assertions below. */
273
335
        ut_ad(block->page.in_LRU_list);
274
336
        ut_ad(block->page.in_page_hash);
 
337
        /* buf_buddy_block_register() will take a block in the
 
338
        BUF_BLOCK_MEMORY state, not a file page. */
275
339
        ut_ad(!block->page.in_zip_hash);
 
340
 
276
341
        ut_ad(!block->page.in_flush_list);
277
342
        ut_d(block->page.in_flush_list = TRUE);
 
343
        block->page.oldest_modification = lsn;
 
344
 
 
345
#ifdef UNIV_DEBUG_VALGRIND
 
346
        {
 
347
                ulint   zip_size = buf_block_get_zip_size(block);
 
348
 
 
349
                if (UNIV_UNLIKELY(zip_size)) {
 
350
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
 
351
                } else {
 
352
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
 
353
                }
 
354
        }
 
355
#endif /* UNIV_DEBUG_VALGRIND */
 
356
 
 
357
#ifdef UNIV_DEBUG_VALGRIND
 
358
        {
 
359
                ulint   zip_size = buf_block_get_zip_size(block);
 
360
 
 
361
                if (UNIV_UNLIKELY(zip_size)) {
 
362
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
 
363
                } else {
 
364
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
 
365
                }
 
366
        }
 
367
#endif /* UNIV_DEBUG_VALGRIND */
278
368
 
279
369
        prev_b = NULL;
280
370
 
308
398
        }
309
399
 
310
400
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
311
 
        ut_a(buf_flush_validate_low());
 
401
        ut_a(buf_flush_validate_low(buf_pool));
312
402
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
403
 
 
404
        buf_flush_list_mutex_exit(buf_pool);
313
405
}
314
406
 
315
407
/********************************************************************//**
323
415
        buf_page_t*     bpage)  /*!< in: buffer control block, must be
324
416
                                buf_page_in_file(bpage) and in the LRU list */
325
417
{
326
 
        ut_ad(buf_pool_mutex_own());
 
418
#ifdef UNIV_DEBUG
 
419
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
 
420
        ut_ad(buf_pool_mutex_own(buf_pool));
 
421
#endif
327
422
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
328
423
        ut_ad(bpage->in_LRU_list);
329
424
 
356
451
                                buf_page_in_file(bpage) */
357
452
        enum buf_flush  flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
358
453
{
 
454
#ifdef UNIV_DEBUG
 
455
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
 
456
        ut_ad(buf_pool_mutex_own(buf_pool));
 
457
#endif
359
458
        ut_a(buf_page_in_file(bpage));
360
 
        ut_ad(buf_pool_mutex_own());
361
459
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
362
460
        ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
363
461
 
390
488
/*=============*/
391
489
        buf_page_t*     bpage)  /*!< in: pointer to the block in question */
392
490
{
393
 
        ut_ad(buf_pool_mutex_own());
 
491
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
 
492
 
 
493
        ut_ad(buf_pool_mutex_own(buf_pool));
394
494
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
395
495
        ut_ad(bpage->in_flush_list);
396
496
 
 
497
        buf_flush_list_mutex_enter(buf_pool);
 
498
 
397
499
        switch (buf_page_get_state(bpage)) {
398
500
        case BUF_BLOCK_ZIP_PAGE:
399
 
                /* clean compressed pages should not be on the flush list */
 
501
                /* Clean compressed pages should not be on the flush list */
400
502
        case BUF_BLOCK_ZIP_FREE:
401
503
        case BUF_BLOCK_NOT_USED:
402
504
        case BUF_BLOCK_READY_FOR_USE:
414
516
                break;
415
517
        }
416
518
 
417
 
        /* If the flush_rbt is active then delete from it as well. */
 
519
        /* If the flush_rbt is active then delete from there as well. */
418
520
        if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
419
521
                buf_flush_delete_from_flush_rbt(bpage);
420
522
        }
425
527
 
426
528
        bpage->oldest_modification = 0;
427
529
 
428
 
        ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
429
 
                              ut_ad(ut_list_node_313->in_flush_list)));
 
530
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
531
        ut_a(buf_flush_validate_low(buf_pool));
 
532
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
533
 
 
534
        buf_flush_list_mutex_exit(buf_pool);
430
535
}
431
536
 
432
 
/********************************************************************//**
 
537
/*******************************************************************//**
433
538
Relocates a buffer control block on the flush_list.
434
 
Note that it is assumed that the contents of bpage has already been
435
 
copied to dpage. */
 
539
Note that it is assumed that the contents of bpage have already been
 
540
copied to dpage.
 
541
IMPORTANT: When this function is called bpage and dpage are not
 
542
exact copies of each other. For example, they both will have different
 
543
::state. Also the ::list pointers in dpage may be stale. We need to
 
544
use the current list node (bpage) to do the list manipulation because
 
545
the list pointers could have changed between the time that we copied
 
546
the contents of bpage to the dpage and the flush list manipulation
 
547
below. */
436
548
UNIV_INTERN
437
549
void
438
550
buf_flush_relocate_on_flush_list(
440
552
        buf_page_t*     bpage,  /*!< in/out: control block being moved */
441
553
        buf_page_t*     dpage)  /*!< in/out: destination block */
442
554
{
443
 
        buf_page_t* prev;
444
 
        buf_page_t* prev_b = NULL;
 
555
        buf_page_t*     prev;
 
556
        buf_page_t*     prev_b = NULL;
 
557
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
445
558
 
446
 
        ut_ad(buf_pool_mutex_own());
 
559
        ut_ad(buf_pool_mutex_own(buf_pool));
 
560
        /* Must reside in the same buffer pool. */
 
561
        ut_ad(buf_pool == buf_pool_from_bpage(dpage));
447
562
 
448
563
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
449
564
 
 
565
        buf_flush_list_mutex_enter(buf_pool);
 
566
 
 
567
        /* FIXME: At this point we have both buf_pool and flush_list
 
568
        mutexes. Theoretically removal of a block from flush list is
 
569
        only covered by flush_list mutex but currently we do
 
570
        have buf_pool mutex in buf_flush_remove() therefore this block
 
571
        is guaranteed to be in the flush list. We need to check if
 
572
        this will work without the assumption of block removing code
 
573
        having the buf_pool mutex. */
450
574
        ut_ad(bpage->in_flush_list);
451
575
        ut_ad(dpage->in_flush_list);
452
576
 
482
606
        ut_a(!buf_pool->flush_rbt || prev_b == prev);
483
607
 
484
608
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
485
 
        ut_a(buf_flush_validate_low());
 
609
        ut_a(buf_flush_validate_low(buf_pool));
486
610
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
611
 
 
612
        buf_flush_list_mutex_exit(buf_pool);
487
613
}
488
614
 
489
615
/********************************************************************//**
495
621
        buf_page_t*     bpage)  /*!< in: pointer to the block in question */
496
622
{
497
623
        enum buf_flush  flush_type;
 
624
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
498
625
 
499
626
        ut_ad(bpage);
500
627
 
515
642
        /* fprintf(stderr, "n pending flush %lu\n",
516
643
        buf_pool->n_flush[flush_type]); */
517
644
 
518
 
        if ((buf_pool->n_flush[flush_type] == 0)
519
 
            && (buf_pool->init_flush[flush_type] == FALSE)) {
 
645
        if (buf_pool->n_flush[flush_type] == 0
 
646
            && buf_pool->init_flush[flush_type] == FALSE) {
520
647
 
521
648
                /* The running flush batch has ended */
522
649
 
809
936
        zip_size = buf_page_get_zip_size(bpage);
810
937
 
811
938
        if (UNIV_UNLIKELY(zip_size)) {
 
939
                UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size);
812
940
                /* Copy the compressed page and clear the rest. */
813
941
                memcpy(trx_doublewrite->write_buf
814
942
                       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
818
946
                       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
819
947
        } else {
820
948
                ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
 
949
                UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame,
 
950
                                   UNIV_PAGE_SIZE);
821
951
 
822
952
                memcpy(trx_doublewrite->write_buf
823
953
                       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
932
1062
{
933
1063
        ulint   zip_size        = buf_page_get_zip_size(bpage);
934
1064
        page_t* frame           = NULL;
 
1065
 
 
1066
#ifdef UNIV_DEBUG
 
1067
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
 
1068
        ut_ad(!buf_pool_mutex_own(buf_pool));
 
1069
#endif
 
1070
 
935
1071
#ifdef UNIV_LOG_DEBUG
936
1072
        static ibool univ_log_debug_warned;
937
1073
#endif /* UNIV_LOG_DEBUG */
943
1079
        io_fixed and oldest_modification != 0.  Thus, it cannot be
944
1080
        relocated in the buffer pool or removed from flush_list or
945
1081
        LRU_list. */
946
 
        ut_ad(!buf_pool_mutex_own());
 
1082
        ut_ad(!buf_pool_mutex_own(buf_pool));
 
1083
        ut_ad(!buf_flush_list_mutex_own(buf_pool));
947
1084
        ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
948
1085
        ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
949
1086
        ut_ad(bpage->oldest_modification != 0);
1012
1149
Writes a flushable page asynchronously from the buffer pool to a file.
1013
1150
NOTE: in simulated aio we must call
1014
1151
os_aio_simulated_wake_handler_threads after we have posted a batch of
1015
 
writes! NOTE: buf_pool_mutex and buf_page_get_mutex(bpage) must be
 
1152
writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
1016
1153
held upon entering this function, and they will be released by this
1017
1154
function. */
1018
1155
static
1019
1156
void
1020
1157
buf_flush_page(
1021
1158
/*===========*/
 
1159
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1022
1160
        buf_page_t*     bpage,          /*!< in: buffer control block */
1023
1161
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU
1024
1162
                                        or BUF_FLUSH_LIST */
1027
1165
        ibool           is_uncompressed;
1028
1166
 
1029
1167
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1030
 
        ut_ad(buf_pool_mutex_own());
 
1168
        ut_ad(buf_pool_mutex_own(buf_pool));
1031
1169
        ut_ad(buf_page_in_file(bpage));
1032
1170
 
1033
1171
        block_mutex = buf_page_get_mutex(bpage);
1047
1185
        buf_pool->n_flush[flush_type]++;
1048
1186
 
1049
1187
        is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
1050
 
        ut_ad(is_uncompressed == (block_mutex != &buf_pool_zip_mutex));
 
1188
        ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
1051
1189
 
1052
1190
        switch (flush_type) {
1053
1191
                ibool   is_s_latched;
1063
1201
                }
1064
1202
 
1065
1203
                mutex_exit(block_mutex);
1066
 
                buf_pool_mutex_exit();
 
1204
                buf_pool_mutex_exit(buf_pool);
1067
1205
 
1068
1206
                /* Even though bpage is not protected by any mutex at
1069
1207
                this point, it is safe to access bpage, because it is
1100
1238
                immediately. */
1101
1239
 
1102
1240
                mutex_exit(block_mutex);
1103
 
                buf_pool_mutex_exit();
 
1241
                buf_pool_mutex_exit(buf_pool);
1104
1242
                break;
1105
1243
 
1106
1244
        default:
1134
1272
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU or
1135
1273
                                        BUF_FLUSH_LIST */
1136
1274
{
1137
 
        buf_page_t*     bpage;
1138
 
        ulint           low, high;
1139
 
        ulint           count           = 0;
1140
1275
        ulint           i;
 
1276
        ulint           low;
 
1277
        ulint           high;
 
1278
        ulint           count = 0;
 
1279
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1141
1280
 
1142
1281
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1143
1282
 
1144
1283
        if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
1145
 
                /* If there is little space, it is better not to flush any
1146
 
                block except from the end of the LRU list */
 
1284
                /* If there is little space, it is better not to flush
 
1285
                any block except from the end of the LRU list */
1147
1286
 
1148
1287
                low = offset;
1149
1288
                high = offset + 1;
1150
1289
        } else {
1151
 
                /* When flushed, dirty blocks are searched in neighborhoods of
1152
 
                this size, and flushed along with the original page. */
 
1290
                /* When flushed, dirty blocks are searched in
 
1291
                neighborhoods of this size, and flushed along with the
 
1292
                original page. */
1153
1293
 
1154
 
                ulint   buf_flush_area  = ut_min(BUF_READ_AHEAD_AREA,
1155
 
                                                 buf_pool->curr_size / 16);
 
1294
                ulint   buf_flush_area;
 
1295
        
 
1296
                buf_flush_area  = ut_min(
 
1297
                        BUF_READ_AHEAD_AREA(buf_pool),
 
1298
                        buf_pool->curr_size / 16);
1156
1299
 
1157
1300
                low = (offset / buf_flush_area) * buf_flush_area;
1158
1301
                high = (offset / buf_flush_area + 1) * buf_flush_area;
1164
1307
                high = fil_space_get_size(space);
1165
1308
        }
1166
1309
 
1167
 
        buf_pool_mutex_enter();
1168
 
 
1169
1310
        for (i = low; i < high; i++) {
1170
1311
 
1171
 
                bpage = buf_page_hash_get(space, i);
 
1312
                buf_page_t*     bpage;
 
1313
 
 
1314
                buf_pool = buf_pool_get(space, i);
 
1315
 
 
1316
                buf_pool_mutex_enter(buf_pool);
 
1317
 
 
1318
                /* We only want to flush pages from this buffer pool. */
 
1319
                bpage = buf_page_hash_get(buf_pool, space, i);
1172
1320
 
1173
1321
                if (!bpage) {
1174
1322
 
 
1323
                        buf_pool_mutex_exit(buf_pool);
1175
1324
                        continue;
1176
1325
                }
1177
1326
 
1190
1339
                        if (buf_flush_ready_for_flush(bpage, flush_type)
1191
1340
                            && (i == offset || !bpage->buf_fix_count)) {
1192
1341
                                /* We only try to flush those
1193
 
                                neighbors != offset where the buf fix count is
1194
 
                                zero, as we then know that we probably can
1195
 
                                latch the page without a semaphore wait.
1196
 
                                Semaphore waits are expensive because we must
1197
 
                                flush the doublewrite buffer before we start
 
1342
                                neighbors != offset where the buf fix
 
1343
                                count is zero, as we then know that we
 
1344
                                probably can latch the page without a
 
1345
                                semaphore wait. Semaphore waits are
 
1346
                                expensive because we must flush the
 
1347
                                doublewrite buffer before we start
1198
1348
                                waiting. */
1199
1349
 
1200
 
                                buf_flush_page(bpage, flush_type);
 
1350
                                buf_flush_page(buf_pool, bpage, flush_type);
1201
1351
                                ut_ad(!mutex_own(block_mutex));
 
1352
                                ut_ad(!buf_pool_mutex_own(buf_pool));
1202
1353
                                count++;
1203
 
 
1204
 
                                buf_pool_mutex_enter();
 
1354
                                continue;
1205
1355
                        } else {
1206
1356
                                mutex_exit(block_mutex);
1207
1357
                        }
1208
1358
                }
1209
 
        }
1210
 
 
1211
 
        buf_pool_mutex_exit();
 
1359
                buf_pool_mutex_exit(buf_pool);
 
1360
        }
 
1361
 
 
1362
        return(count);
 
1363
}
 
1364
 
 
1365
/********************************************************************//**
 
1366
Check if the block is modified and ready for flushing. If the the block
 
1367
is ready to flush then flush the page and try o flush its neighbors.
 
1368
 
 
1369
@return TRUE if buf_pool mutex was not released during this function.
 
1370
This does not guarantee that some pages were written as well.
 
1371
Number of pages written are incremented to the count. */
 
1372
static
 
1373
ibool
 
1374
buf_flush_page_and_try_neighbors(
 
1375
/*=============================*/
 
1376
        buf_page_t*     bpage,          /*!< in: buffer control block,
 
1377
                                        must be
 
1378
                                        buf_page_in_file(bpage) */
 
1379
        enum buf_flush  flush_type,     /*!< in: BUF_FLUSH_LRU
 
1380
                                        or BUF_FLUSH_LIST */
 
1381
        ulint*          count)          /*!< in/out: number of pages
 
1382
                                        flushed */
 
1383
{
 
1384
        mutex_t*        block_mutex;
 
1385
        ibool           flushed = FALSE;
 
1386
#ifdef UNIV_DEBUG
 
1387
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
 
1388
#endif /* UNIV_DEBUG */
 
1389
 
 
1390
        ut_ad(buf_pool_mutex_own(buf_pool));
 
1391
 
 
1392
        block_mutex = buf_page_get_mutex(bpage);
 
1393
        mutex_enter(block_mutex);
 
1394
 
 
1395
        ut_a(buf_page_in_file(bpage));
 
1396
 
 
1397
        if (buf_flush_ready_for_flush(bpage, flush_type)) {
 
1398
                ulint           space;
 
1399
                ulint           offset;
 
1400
                buf_pool_t*     buf_pool;
 
1401
 
 
1402
                buf_pool = buf_pool_from_bpage(bpage);
 
1403
 
 
1404
                buf_pool_mutex_exit(buf_pool);
 
1405
 
 
1406
                /* These fields are protected by both the
 
1407
                buffer pool mutex and block mutex. */
 
1408
                space = buf_page_get_space(bpage);
 
1409
                offset = buf_page_get_page_no(bpage);
 
1410
 
 
1411
                mutex_exit(block_mutex);
 
1412
 
 
1413
                /* Try to flush also all the neighbors */
 
1414
                *count += buf_flush_try_neighbors(space, offset, flush_type);
 
1415
 
 
1416
                buf_pool_mutex_enter(buf_pool);
 
1417
                flushed = TRUE;
 
1418
        } else {
 
1419
                mutex_exit(block_mutex);
 
1420
        }
 
1421
 
 
1422
        ut_ad(buf_pool_mutex_own(buf_pool));
 
1423
 
 
1424
        return(flushed);
 
1425
}
 
1426
 
 
1427
/*******************************************************************//**
 
1428
This utility flushes dirty blocks from the end of the LRU list.
 
1429
In the case of an LRU flush the calling thread may own latches to
 
1430
pages: to avoid deadlocks, this function must be written so that it
 
1431
cannot end up waiting for these latches!
 
1432
@return number of blocks for which the write request was queued. */
 
1433
static
 
1434
ulint
 
1435
buf_flush_LRU_list_batch(
 
1436
/*=====================*/
 
1437
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
 
1438
        ulint           max)            /*!< in: max of blocks to flush */
 
1439
{
 
1440
        buf_page_t*     bpage;
 
1441
        ulint           count = 0;
 
1442
 
 
1443
        ut_ad(buf_pool_mutex_own(buf_pool));
 
1444
 
 
1445
        do {
 
1446
                /* Start from the end of the list looking for a
 
1447
                suitable block to be flushed. */
 
1448
                bpage = UT_LIST_GET_LAST(buf_pool->LRU);
 
1449
 
 
1450
                /* Iterate backwards over the flush list till we find
 
1451
                a page that isn't ready for flushing. */
 
1452
                while (bpage != NULL
 
1453
                       && !buf_flush_page_and_try_neighbors(
 
1454
                                bpage, BUF_FLUSH_LRU, &count)) {
 
1455
 
 
1456
                        bpage = UT_LIST_GET_PREV(LRU, bpage);
 
1457
                }
 
1458
        } while (bpage != NULL && count < max);
 
1459
 
 
1460
        /* We keep track of all flushes happening as part of LRU
 
1461
        flush. When estimating the desired rate at which flush_list
 
1462
        should be flushed, we factor in this value. */
 
1463
        buf_lru_flush_page_count += count;
 
1464
 
 
1465
        ut_ad(buf_pool_mutex_own(buf_pool));
 
1466
 
 
1467
        return(count);
 
1468
}
 
1469
 
 
1470
/*******************************************************************//**
 
1471
This utility flushes dirty blocks from the end of the flush_list.
 
1472
the calling thread is not allowed to own any latches on pages!
 
1473
@return number of blocks for which the write request was queued;
 
1474
ULINT_UNDEFINED if there was a flush of the same type already
 
1475
running */
 
1476
static
 
1477
ulint
 
1478
buf_flush_flush_list_batch(
 
1479
/*=======================*/
 
1480
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
 
1481
        ulint           min_n,          /*!< in: wished minimum mumber
 
1482
                                        of blocks flushed (it is not
 
1483
                                        guaranteed that the actual
 
1484
                                        number is that big, though) */
 
1485
        ib_uint64_t     lsn_limit)      /*!< all blocks whose
 
1486
                                        oldest_modification is smaller
 
1487
                                        than this should be flushed (if
 
1488
                                        their number does not exceed
 
1489
                                        min_n) */
 
1490
{
 
1491
        ulint           len;
 
1492
        buf_page_t*     bpage;
 
1493
        ulint           count = 0;
 
1494
 
 
1495
        ut_ad(buf_pool_mutex_own(buf_pool));
 
1496
 
 
1497
        /* If we have flushed enough, leave the loop */
 
1498
        do {
 
1499
                /* Start from the end of the list looking for a suitable
 
1500
                block to be flushed. */
 
1501
 
 
1502
                buf_flush_list_mutex_enter(buf_pool);
 
1503
 
 
1504
                /* We use len here because theoretically insertions can
 
1505
                happen in the flush_list below while we are traversing
 
1506
                it for a suitable candidate for flushing. We'd like to
 
1507
                set a limit on how farther we are willing to traverse
 
1508
                the list. */
 
1509
                len = UT_LIST_GET_LEN(buf_pool->flush_list);
 
1510
                bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
 
1511
 
 
1512
                if (bpage) {
 
1513
                        ut_a(bpage->oldest_modification > 0);
 
1514
                }
 
1515
 
 
1516
                if (!bpage || bpage->oldest_modification >= lsn_limit) {
 
1517
 
 
1518
                        /* We have flushed enough */
 
1519
                        buf_flush_list_mutex_exit(buf_pool);
 
1520
                        break;
 
1521
                }
 
1522
 
 
1523
                ut_a(bpage->oldest_modification > 0);
 
1524
 
 
1525
                ut_ad(bpage->in_flush_list);
 
1526
 
 
1527
                buf_flush_list_mutex_exit(buf_pool);
 
1528
 
 
1529
                /* The list may change during the flushing and we cannot
 
1530
                safely preserve within this function a pointer to a
 
1531
                block in the list! */
 
1532
                while (bpage != NULL
 
1533
                       && len > 0
 
1534
                       && !buf_flush_page_and_try_neighbors(
 
1535
                                bpage, BUF_FLUSH_LIST, &count)) {
 
1536
 
 
1537
                        buf_flush_list_mutex_enter(buf_pool);
 
1538
 
 
1539
                        /* If we are here that means that buf_pool->mutex
 
1540
                         was not released in buf_flush_page_and_try_neighbors()
 
1541
                        above and this guarantees that bpage didn't get
 
1542
                        relocated since we released the flush_list
 
1543
                        mutex above. There is a chance, however, that
 
1544
                        the bpage got removed from flush_list (not
 
1545
                        currently possible because flush_list_remove()
 
1546
                        also obtains buf_pool mutex but that may change
 
1547
                        in future). To avoid this scenario we check
 
1548
                        the oldest_modification and if it is zero
 
1549
                        we start all over again. */
 
1550
                        if (bpage->oldest_modification == 0) {
 
1551
                                buf_flush_list_mutex_exit(buf_pool);
 
1552
                                break;
 
1553
                        }
 
1554
 
 
1555
                        bpage = UT_LIST_GET_PREV(list, bpage);
 
1556
 
 
1557
                        ut_ad(!bpage || bpage->in_flush_list);
 
1558
 
 
1559
                        buf_flush_list_mutex_exit(buf_pool);
 
1560
 
 
1561
                        --len;
 
1562
                }
 
1563
 
 
1564
        } while (count < min_n && bpage != NULL && len > 0);
 
1565
 
 
1566
        ut_ad(buf_pool_mutex_own(buf_pool));
1212
1567
 
1213
1568
        return(count);
1214
1569
}
1221
1576
the calling thread is not allowed to own any latches on pages!
1222
1577
@return number of blocks for which the write request was queued;
1223
1578
ULINT_UNDEFINED if there was a flush of the same type already running */
1224
 
UNIV_INTERN
 
1579
static
1225
1580
ulint
1226
1581
buf_flush_batch(
1227
1582
/*============*/
 
1583
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1228
1584
        enum buf_flush  flush_type,     /*!< in: BUF_FLUSH_LRU or
1229
1585
                                        BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
1230
1586
                                        then the caller must not own any
1232
1588
        ulint           min_n,          /*!< in: wished minimum mumber of blocks
1233
1589
                                        flushed (it is not guaranteed that the
1234
1590
                                        actual number is that big, though) */
1235
 
        ib_uint64_t     lsn_limit)      /*!< in the case BUF_FLUSH_LIST all
1236
 
                                        blocks whose oldest_modification is
 
1591
        ib_uint64_t     lsn_limit)      /*!< in: in the case of BUF_FLUSH_LIST
 
1592
                                        all blocks whose oldest_modification is
1237
1593
                                        smaller than this should be flushed
1238
1594
                                        (if their number does not exceed
1239
1595
                                        min_n), otherwise ignored */
1240
1596
{
1241
 
        buf_page_t*     bpage;
1242
 
        ulint           page_count      = 0;
1243
 
        ulint           old_page_count;
1244
 
        ulint           space;
1245
 
        ulint           offset;
 
1597
        ulint           count   = 0;
1246
1598
 
1247
 
        ut_ad((flush_type == BUF_FLUSH_LRU)
1248
 
              || (flush_type == BUF_FLUSH_LIST));
 
1599
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1249
1600
#ifdef UNIV_SYNC_DEBUG
1250
1601
        ut_ad((flush_type != BUF_FLUSH_LIST)
1251
1602
              || sync_thread_levels_empty_gen(TRUE));
1252
1603
#endif /* UNIV_SYNC_DEBUG */
1253
 
        buf_pool_mutex_enter();
1254
 
 
1255
 
        if ((buf_pool->n_flush[flush_type] > 0)
1256
 
            || (buf_pool->init_flush[flush_type] == TRUE)) {
1257
 
 
1258
 
                /* There is already a flush batch of the same type running */
1259
 
 
1260
 
                buf_pool_mutex_exit();
1261
 
 
1262
 
                return(ULINT_UNDEFINED);
1263
 
        }
1264
 
 
1265
 
        buf_pool->init_flush[flush_type] = TRUE;
1266
 
 
1267
 
        bool done_with_loop= false;
1268
 
        for (;done_with_loop != true;) {
1269
 
flush_next:
1270
 
                /* If we have flushed enough, leave the loop */
1271
 
                if (page_count >= min_n) {
1272
 
 
1273
 
                        break;
1274
 
                }
1275
 
 
1276
 
                /* Start from the end of the list looking for a suitable
1277
 
                block to be flushed. */
1278
 
 
1279
 
                if (flush_type == BUF_FLUSH_LRU) {
1280
 
                        bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1281
 
                } else {
1282
 
                        ut_ad(flush_type == BUF_FLUSH_LIST);
1283
 
 
1284
 
                        bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1285
 
                        if (!bpage
1286
 
                            || bpage->oldest_modification >= lsn_limit) {
1287
 
                                /* We have flushed enough */
1288
 
 
1289
 
                                break;
1290
 
                        }
1291
 
                        ut_ad(bpage->in_flush_list);
1292
 
                }
1293
 
 
1294
 
                /* Note that after finding a single flushable page, we try to
1295
 
                flush also all its neighbors, and after that start from the
1296
 
                END of the LRU list or flush list again: the list may change
1297
 
                during the flushing and we cannot safely preserve within this
1298
 
                function a pointer to a block in the list! */
1299
 
 
1300
 
                do {
1301
 
                        mutex_t*block_mutex = buf_page_get_mutex(bpage);
1302
 
                        ibool   ready;
1303
 
 
1304
 
                        ut_a(buf_page_in_file(bpage));
1305
 
 
1306
 
                        mutex_enter(block_mutex);
1307
 
                        ready = buf_flush_ready_for_flush(bpage, flush_type);
1308
 
                        mutex_exit(block_mutex);
1309
 
 
1310
 
                        if (ready) {
1311
 
                                space = buf_page_get_space(bpage);
1312
 
                                offset = buf_page_get_page_no(bpage);
1313
 
 
1314
 
                                buf_pool_mutex_exit();
1315
 
 
1316
 
                                old_page_count = page_count;
1317
 
 
1318
 
                                /* Try to flush also all the neighbors */
1319
 
                                page_count += buf_flush_try_neighbors(
1320
 
                                        space, offset, flush_type);
1321
 
                                /* fprintf(stderr,
1322
 
                                "Flush type %lu, page no %lu, neighb %lu\n",
1323
 
                                flush_type, offset,
1324
 
                                page_count - old_page_count); */
1325
 
 
1326
 
                                buf_pool_mutex_enter();
1327
 
                                goto flush_next;
1328
 
 
1329
 
                        } else if (flush_type == BUF_FLUSH_LRU) {
1330
 
                                bpage = UT_LIST_GET_PREV(LRU, bpage);
1331
 
                        } else {
1332
 
                                ut_ad(flush_type == BUF_FLUSH_LIST);
1333
 
 
1334
 
                                bpage = UT_LIST_GET_PREV(list, bpage);
1335
 
                                ut_ad(!bpage || bpage->in_flush_list);
1336
 
                        }
1337
 
                } while (bpage != NULL);
1338
 
 
1339
 
                /* If we could not find anything to flush, leave the loop */
1340
 
 
1341
 
                done_with_loop= true;
1342
 
 
1343
 
        }
1344
 
 
1345
 
        buf_pool->init_flush[flush_type] = FALSE;
1346
 
 
1347
 
        if (buf_pool->n_flush[flush_type] == 0) {
1348
 
 
1349
 
                /* The running flush batch has ended */
1350
 
 
1351
 
                os_event_set(buf_pool->no_flush[flush_type]);
1352
 
        }
1353
 
 
1354
 
        buf_pool_mutex_exit();
1355
 
 
1356
 
        buf_flush_buffered_writes();
 
1604
 
 
1605
        buf_pool_mutex_enter(buf_pool);
 
1606
 
 
1607
        /* Note: The buffer pool mutex is released and reacquired within
 
1608
        the flush functions. */
 
1609
        switch(flush_type) {
 
1610
        case BUF_FLUSH_LRU:
 
1611
                count = buf_flush_LRU_list_batch(buf_pool, min_n);
 
1612
                break;
 
1613
        case BUF_FLUSH_LIST:
 
1614
                count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
 
1615
                break;
 
1616
        default:
 
1617
                ut_error;
 
1618
        }
 
1619
 
 
1620
        buf_pool_mutex_exit(buf_pool);
 
1621
 
 
1622
        buf_flush_buffered_writes();
 
1623
 
 
1624
#ifdef UNIV_DEBUG
 
1625
        if (buf_debug_prints && count > 0) {
 
1626
                fprintf(stderr, flush_type == BUF_FLUSH_LRU
 
1627
                        ? "Flushed %lu pages in LRU flush\n"
 
1628
                        : "Flushed %lu pages in flush list flush\n",
 
1629
                        (ulong) count);
 
1630
        }
 
1631
#endif /* UNIV_DEBUG */
 
1632
 
 
1633
        srv_buf_pool_flushed += count;
 
1634
 
 
1635
        return(count);
 
1636
}
 
1637
 
 
1638
/******************************************************************//**
 
1639
Gather the aggregated stats for both flush list and LRU list flushing */
 
1640
static
 
1641
void
 
1642
buf_flush_common(
 
1643
/*=============*/
 
1644
        enum buf_flush  flush_type,     /*!< in: type of flush */
 
1645
        ulint           page_count)     /*!< in: number of pages flushed */
 
1646
{
 
1647
        buf_flush_buffered_writes();
 
1648
 
 
1649
        ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1357
1650
 
1358
1651
#ifdef UNIV_DEBUG
1359
1652
        if (buf_debug_prints && page_count > 0) {
1360
 
                ut_a(flush_type == BUF_FLUSH_LRU
1361
 
                     || flush_type == BUF_FLUSH_LIST);
1362
1653
                fprintf(stderr, flush_type == BUF_FLUSH_LRU
1363
1654
                        ? "Flushed %lu pages in LRU flush\n"
1364
1655
                        : "Flushed %lu pages in flush list flush\n",
1368
1659
 
1369
1660
        srv_buf_pool_flushed += page_count;
1370
1661
 
1371
 
        /* We keep track of all flushes happening as part of LRU
1372
 
        flush. When estimating the desired rate at which flush_list
1373
 
        should be flushed we factor in this value. */
1374
1662
        if (flush_type == BUF_FLUSH_LRU) {
 
1663
                /* We keep track of all flushes happening as part of LRU
 
1664
                flush. When estimating the desired rate at which flush_list
 
1665
                should be flushed we factor in this value. */
1375
1666
                buf_lru_flush_page_count += page_count;
1376
1667
        }
1377
 
 
1378
 
        return(page_count);
 
1668
}
 
1669
 
 
1670
/******************************************************************//**
 
1671
Start a buffer flush batch for LRU or flush list */
 
1672
static
 
1673
ibool
 
1674
buf_flush_start(
 
1675
/*============*/
 
1676
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
 
1677
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU
 
1678
                                        or BUF_FLUSH_LIST */
 
1679
{
 
1680
        buf_pool_mutex_enter(buf_pool);
 
1681
 
 
1682
        if (buf_pool->n_flush[flush_type] > 0
 
1683
           || buf_pool->init_flush[flush_type] == TRUE) {
 
1684
 
 
1685
                /* There is already a flush batch of the same type running */
 
1686
 
 
1687
                buf_pool_mutex_exit(buf_pool);
 
1688
 
 
1689
                return(FALSE);
 
1690
        }
 
1691
 
 
1692
        buf_pool->init_flush[flush_type] = TRUE;
 
1693
 
 
1694
        buf_pool_mutex_exit(buf_pool);
 
1695
 
 
1696
        return(TRUE);
 
1697
}
 
1698
 
 
1699
/******************************************************************//**
 
1700
End a buffer flush batch for LRU or flush list */
 
1701
static
 
1702
void
 
1703
buf_flush_end(
 
1704
/*==========*/
 
1705
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
 
1706
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU
 
1707
                                        or BUF_FLUSH_LIST */
 
1708
{
 
1709
        buf_pool_mutex_enter(buf_pool);
 
1710
 
 
1711
        buf_pool->init_flush[flush_type] = FALSE;
 
1712
 
 
1713
        if (buf_pool->n_flush[flush_type] == 0) {
 
1714
 
 
1715
                /* The running flush batch has ended */
 
1716
 
 
1717
                os_event_set(buf_pool->no_flush[flush_type]);
 
1718
        }
 
1719
 
 
1720
        buf_pool_mutex_exit(buf_pool);
1379
1721
}
1380
1722
 
1381
1723
/******************************************************************//**
1384
1726
void
1385
1727
buf_flush_wait_batch_end(
1386
1728
/*=====================*/
1387
 
        enum buf_flush  type)   /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1388
 
{
1389
 
        ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
1390
 
 
1391
 
        os_event_wait(buf_pool->no_flush[type]);
1392
 
}
1393
 
 
 
1729
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
 
1730
        enum buf_flush  type)           /*!< in: BUF_FLUSH_LRU
 
1731
                                        or BUF_FLUSH_LIST */
 
1732
{
 
1733
        ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
 
1734
 
 
1735
        if (buf_pool == NULL) {
 
1736
                ulint   i;
 
1737
 
 
1738
                for (i = 0; i < srv_buf_pool_instances; ++i) {
 
1739
                        buf_pool_t*     buf_pool;
 
1740
 
 
1741
                        buf_pool = buf_pool_from_array(i);
 
1742
 
 
1743
                        os_event_wait(buf_pool->no_flush[type]);
 
1744
                }
 
1745
        } else {
 
1746
                os_event_wait(buf_pool->no_flush[type]);
 
1747
        }
 
1748
}
 
1749
 
 
1750
/*******************************************************************//**
 
1751
This utility flushes dirty blocks from the end of the LRU list.
 
1752
NOTE: The calling thread may own latches to pages: to avoid deadlocks,
 
1753
this function must be written so that it cannot end up waiting for these
 
1754
latches!
 
1755
@return number of blocks for which the write request was queued;
 
1756
ULINT_UNDEFINED if there was a flush of the same type already running */
 
1757
UNIV_INTERN
 
1758
ulint
 
1759
buf_flush_LRU(
 
1760
/*==========*/
 
1761
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
 
1762
        ulint           min_n)          /*!< in: wished minimum mumber of blocks
 
1763
                                        flushed (it is not guaranteed that the
 
1764
                                        actual number is that big, though) */
 
1765
{
 
1766
        ulint           page_count;
 
1767
 
 
1768
        if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
 
1769
                return(ULINT_UNDEFINED);
 
1770
        }
 
1771
 
 
1772
        page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
 
1773
 
 
1774
        buf_flush_end(buf_pool, BUF_FLUSH_LRU);
 
1775
 
 
1776
        buf_flush_common(BUF_FLUSH_LRU, page_count);
 
1777
 
 
1778
        return(page_count);
 
1779
}
 
1780
 
 
1781
/*******************************************************************//**
 
1782
This utility flushes dirty blocks from the end of the flush list of
 
1783
all buffer pool instances.
 
1784
NOTE: The calling thread is not allowed to own any latches on pages!
 
1785
@return number of blocks for which the write request was queued;
 
1786
ULINT_UNDEFINED if there was a flush of the same type already running */
 
1787
UNIV_INTERN
 
1788
ulint
 
1789
buf_flush_list(
 
1790
/*===========*/
 
1791
        ulint           min_n,          /*!< in: wished minimum mumber of blocks
 
1792
                                        flushed (it is not guaranteed that the
 
1793
                                        actual number is that big, though) */
 
1794
        ib_uint64_t     lsn_limit)      /*!< in the case BUF_FLUSH_LIST all
 
1795
                                        blocks whose oldest_modification is
 
1796
                                        smaller than this should be flushed
 
1797
                                        (if their number does not exceed
 
1798
                                        min_n), otherwise ignored */
 
1799
{
 
1800
        ulint           i;
 
1801
        ulint           total_page_count = 0;
 
1802
        ibool           skipped = FALSE;
 
1803
 
 
1804
        if (min_n != ULINT_MAX) {
 
1805
                /* Ensure that flushing is spread evenly amongst the
 
1806
                buffer pool instances. When min_n is ULINT_MAX
 
1807
                we need to flush everything up to the lsn limit
 
1808
                so no limit here. */
 
1809
                min_n = (min_n + srv_buf_pool_instances - 1)
 
1810
                         / srv_buf_pool_instances;
 
1811
        }
 
1812
 
 
1813
        /* Flush to lsn_limit in all buffer pool instances */
 
1814
        for (i = 0; i < srv_buf_pool_instances; i++) {
 
1815
                buf_pool_t*     buf_pool;
 
1816
                ulint           page_count = 0;
 
1817
 
 
1818
                buf_pool = buf_pool_from_array(i);
 
1819
 
 
1820
                if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
 
1821
                        /* We have two choices here. If lsn_limit was
 
1822
                        specified then skipping an instance of buffer
 
1823
                        pool means we cannot guarantee that all pages
 
1824
                        up to lsn_limit has been flushed. We can
 
1825
                        return right now with failure or we can try
 
1826
                        to flush remaining buffer pools up to the
 
1827
                        lsn_limit. We attempt to flush other buffer
 
1828
                        pools based on the assumption that it will
 
1829
                        help in the retry which will follow the
 
1830
                        failure. */
 
1831
                        skipped = TRUE;
 
1832
 
 
1833
                        continue;
 
1834
                }
 
1835
 
 
1836
                page_count = buf_flush_batch(
 
1837
                        buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
 
1838
 
 
1839
                buf_flush_end(buf_pool, BUF_FLUSH_LIST);
 
1840
 
 
1841
                buf_flush_common(BUF_FLUSH_LIST, page_count);
 
1842
 
 
1843
                total_page_count += page_count;
 
1844
        }
 
1845
 
 
1846
        return(lsn_limit != IB_ULONGLONG_MAX && skipped
 
1847
               ? ULINT_UNDEFINED : total_page_count);
 
1848
}
 
1849
 
1394
1850
/******************************************************************//**
1395
1851
Gives a recommendation of how many blocks should be flushed to establish
1396
1852
a big enough margin of replaceable blocks near the end of the LRU list
1399
1855
LRU list */
1400
1856
static
1401
1857
ulint
1402
 
buf_flush_LRU_recommendation(void)
1403
 
/*==============================*/
 
1858
buf_flush_LRU_recommendation(
 
1859
/*=========================*/
 
1860
        buf_pool_t*     buf_pool)               /*!< in: Buffer pool instance */
1404
1861
{
1405
1862
        buf_page_t*     bpage;
1406
1863
        ulint           n_replaceable;
1407
1864
        ulint           distance        = 0;
1408
1865
 
1409
 
        buf_pool_mutex_enter();
 
1866
        buf_pool_mutex_enter(buf_pool);
1410
1867
 
1411
1868
        n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
1412
1869
 
1413
1870
        bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1414
1871
 
1415
1872
        while ((bpage != NULL)
1416
 
               && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
1417
 
                   + BUF_FLUSH_EXTRA_MARGIN)
1418
 
               && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
 
1873
               && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
 
1874
                   + BUF_FLUSH_EXTRA_MARGIN(buf_pool))
 
1875
               && (distance < BUF_LRU_FREE_SEARCH_LEN(buf_pool))) {
1419
1876
 
1420
1877
                mutex_t* block_mutex = buf_page_get_mutex(bpage);
1421
1878
 
1432
1889
                bpage = UT_LIST_GET_PREV(LRU, bpage);
1433
1890
        }
1434
1891
 
1435
 
        buf_pool_mutex_exit();
 
1892
        buf_pool_mutex_exit(buf_pool);
1436
1893
 
1437
 
        if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
 
1894
        if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
1438
1895
 
1439
1896
                return(0);
1440
1897
        }
1441
1898
 
1442
 
        return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN
 
1899
        return(BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
 
1900
               + BUF_FLUSH_EXTRA_MARGIN(buf_pool)
1443
1901
               - n_replaceable);
1444
1902
}
1445
1903
 
1451
1909
immediately, without waiting. */
1452
1910
UNIV_INTERN
1453
1911
void
1454
 
buf_flush_free_margin(void)
1455
 
/*=======================*/
 
1912
buf_flush_free_margin(
 
1913
/*==================*/
 
1914
        buf_pool_t*     buf_pool)               /*!< in: Buffer pool instance */
1456
1915
{
1457
1916
        ulint   n_to_flush;
1458
 
        ulint   n_flushed;
1459
1917
 
1460
 
        n_to_flush = buf_flush_LRU_recommendation();
 
1918
        n_to_flush = buf_flush_LRU_recommendation(buf_pool);
1461
1919
 
1462
1920
        if (n_to_flush > 0) {
1463
 
                n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
 
1921
                ulint   n_flushed;
 
1922
 
 
1923
                n_flushed = buf_flush_LRU(buf_pool, n_to_flush);
 
1924
 
1464
1925
                if (n_flushed == ULINT_UNDEFINED) {
1465
1926
                        /* There was an LRU type flush batch already running;
1466
1927
                        let us wait for it to end */
1467
1928
 
1468
 
                        buf_flush_wait_batch_end(BUF_FLUSH_LRU);
 
1929
                        buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
1469
1930
                }
1470
1931
        }
1471
1932
}
1472
1933
 
 
1934
/*********************************************************************//**
 
1935
Flushes pages from the end of all the LRU lists. */
 
1936
UNIV_INTERN
 
1937
void
 
1938
buf_flush_free_margins(void)
 
1939
/*========================*/
 
1940
{
 
1941
        ulint   i;
 
1942
 
 
1943
        for (i = 0; i < srv_buf_pool_instances; i++) {
 
1944
                buf_pool_t*     buf_pool;
 
1945
 
 
1946
                buf_pool = buf_pool_from_array(i);
 
1947
 
 
1948
                buf_flush_free_margin(buf_pool);
 
1949
        }
 
1950
}
 
1951
 
1473
1952
/*********************************************************************
1474
1953
Update the historical stats that we are collecting for flush rate
1475
1954
heuristics at the end of each interval.
1530
2009
buf_flush_get_desired_flush_rate(void)
1531
2010
/*==================================*/
1532
2011
{
1533
 
        ulint                   redo_avg;
1534
 
        ulint                   lru_flush_avg;
1535
 
        ulint                   n_dirty;
1536
 
        ulint                   n_flush_req;
1537
 
        lint                    rate;
1538
 
        ib_uint64_t             lsn = log_get_lsn();
1539
 
        ulint                   log_capacity = log_get_capacity();
 
2012
        ulint           i;
 
2013
        lint            rate;
 
2014
        ulint           redo_avg;
 
2015
        ulint           n_dirty = 0;
 
2016
        ulint           n_flush_req;
 
2017
        ulint           lru_flush_avg;
 
2018
        ib_uint64_t     lsn = log_get_lsn();
 
2019
        ulint           log_capacity = log_get_capacity();
1540
2020
 
1541
2021
        /* log_capacity should never be zero after the initialization
1542
2022
        of log subsystem. */
1543
2023
        ut_ad(log_capacity != 0);
1544
2024
 
1545
2025
        /* Get total number of dirty pages. It is OK to access
1546
 
        flush_list without holding any mtex as we are using this
 
2026
        flush_list without holding any mutex as we are using this
1547
2027
        only for heuristics. */
1548
 
        n_dirty = UT_LIST_GET_LEN(buf_pool->flush_list);
 
2028
        for (i = 0; i < srv_buf_pool_instances; i++) {
 
2029
                buf_pool_t*     buf_pool;
 
2030
 
 
2031
                buf_pool = buf_pool_from_array(i);
 
2032
                n_dirty += UT_LIST_GET_LEN(buf_pool->flush_list);
 
2033
        }
1549
2034
 
1550
2035
        /* An overflow can happen if we generate more than 2^32 bytes
1551
2036
        of redo in this interval i.e.: 4G of redo in 1 second. We can
1587
2072
@return TRUE if ok */
1588
2073
static
1589
2074
ibool
1590
 
buf_flush_validate_low(void)
1591
 
/*========================*/
 
2075
buf_flush_validate_low(
 
2076
/*===================*/
 
2077
        buf_pool_t*     buf_pool)               /*!< in: Buffer pool instance */
1592
2078
{
1593
2079
        buf_page_t*             bpage;
1594
2080
        const ib_rbt_node_t*    rnode = NULL;
1595
2081
 
 
2082
        ut_ad(buf_flush_list_mutex_own(buf_pool));
 
2083
 
1596
2084
        UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
1597
2085
                         ut_ad(ut_list_node_313->in_flush_list));
1598
2086
 
1607
2095
 
1608
2096
        while (bpage != NULL) {
1609
2097
                const ib_uint64_t om = bpage->oldest_modification;
 
2098
 
 
2099
                ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
 
2100
 
1610
2101
                ut_ad(bpage->in_flush_list);
1611
 
                ut_a(buf_page_in_file(bpage));
 
2102
 
 
2103
                /* A page in flush_list can be in BUF_BLOCK_REMOVE_HASH
 
2104
                state. This happens when a page is in the middle of
 
2105
                being relocated. In that case the original descriptor
 
2106
                can have this state and still be in the flush list
 
2107
                waiting to acquire the flush_list_mutex to complete
 
2108
                the relocation. */
 
2109
                ut_a(buf_page_in_file(bpage)
 
2110
                     || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
1612
2111
                ut_a(om > 0);
1613
2112
 
1614
2113
                if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
 
2114
                        buf_page_t* rpage;
 
2115
 
1615
2116
                        ut_a(rnode);
1616
 
                        buf_page_t* rpage = *rbt_value(buf_page_t*,
1617
 
                                                       rnode);
 
2117
                        rpage = *rbt_value(buf_page_t*, rnode);
 
2118
 
1618
2119
                        ut_a(rpage);
1619
2120
                        ut_a(rpage == bpage);
1620
2121
                        rnode = rbt_next(buf_pool->flush_rbt, rnode);
1637
2138
@return TRUE if ok */
1638
2139
UNIV_INTERN
1639
2140
ibool
1640
 
buf_flush_validate(void)
1641
 
/*====================*/
 
2141
buf_flush_validate(
 
2142
/*===============*/
 
2143
        buf_pool_t*     buf_pool)       /*!< buffer pool instance */
1642
2144
{
1643
2145
        ibool   ret;
1644
2146
 
1645
 
        buf_pool_mutex_enter();
1646
 
 
1647
 
        ret = buf_flush_validate_low();
1648
 
 
1649
 
        buf_pool_mutex_exit();
 
2147
        buf_flush_list_mutex_enter(buf_pool);
 
2148
 
 
2149
        ret = buf_flush_validate_low(buf_pool);
 
2150
 
 
2151
        buf_flush_list_mutex_exit(buf_pool);
1650
2152
 
1651
2153
        return(ret);
1652
2154
}