~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/innobase/buf/buf0flu.c

  • Committer: Lee Bieber
  • Date: 2010-11-20 01:33:21 UTC
  • mfrom: (1878.10.4 drizzle_bug665252)
  • Revision ID: kalebral@gmail.com-20101120013321-7nk9lq4nnr20zk6b
Merge Billy - removed my_getsysdate, my_micro_time and my_micro_time_and_time and replaced with boost::date_time for compatibility.

Show diffs side-by-side

added added

removed removed

Lines of Context:
83
83
@return TRUE if ok */
84
84
static
85
85
ibool
86
 
buf_flush_validate_low(
87
 
/*===================*/
88
 
        buf_pool_t*     buf_pool);      /*!< in: Buffer pool instance */
 
86
buf_flush_validate_low(void);
 
87
/*========================*/
89
88
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
90
89
 
91
 
/******************************************************************//**
 
90
/********************************************************************//**
92
91
Insert a block in the flush_rbt and returns a pointer to its
93
92
predecessor or NULL if no predecessor. The ordering is maintained
94
93
on the basis of the <oldest_modification, space, offset> key.
95
 
@return pointer to the predecessor or NULL if no predecessor. */
 
94
@return pointer to the predecessor or NULL if no predecessor. */
96
95
static
97
96
buf_page_t*
98
97
buf_flush_insert_in_flush_rbt(
99
98
/*==========================*/
100
 
        buf_page_t*     bpage)  /*!< in: bpage to be inserted. */
 
99
        buf_page_t*     bpage)          /*!< in: bpage to be inserted. */
101
100
{
 
101
        buf_page_t*             prev = NULL;
102
102
        const ib_rbt_node_t*    c_node;
103
103
        const ib_rbt_node_t*    p_node;
104
 
        buf_page_t*             prev = NULL;
105
 
        buf_pool_t*             buf_pool = buf_pool_from_bpage(bpage);
106
104
 
107
 
        ut_ad(buf_flush_list_mutex_own(buf_pool));
 
105
        ut_ad(buf_pool_mutex_own());
108
106
 
109
107
        /* Insert this buffer into the rbt. */
110
108
        c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
114
112
        p_node = rbt_prev(buf_pool->flush_rbt, c_node);
115
113
 
116
114
        if (p_node != NULL) {
117
 
                buf_page_t**    value;
118
 
                value = rbt_value(buf_page_t*, p_node);
119
 
                prev = *value;
 
115
                prev = *rbt_value(buf_page_t*, p_node);
120
116
                ut_a(prev != NULL);
121
117
        }
122
118
 
123
119
        return(prev);
124
120
}
125
121
 
126
 
/*********************************************************//**
 
122
/********************************************************************//**
127
123
Delete a bpage from the flush_rbt. */
128
124
static
129
125
void
130
126
buf_flush_delete_from_flush_rbt(
131
127
/*============================*/
132
 
        buf_page_t*     bpage)  /*!< in: bpage to be removed. */
 
128
        buf_page_t*     bpage)          /*!< in: bpage to be removed. */
133
129
{
134
 
#ifdef UNIV_DEBUG
135
 
        ibool           ret = FALSE;
136
 
#endif /* UNIV_DEBUG */
137
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
138
 
 
139
 
        ut_ad(buf_flush_list_mutex_own(buf_pool));
140
 
 
141
 
#ifdef UNIV_DEBUG
142
 
        ret =
143
 
#endif /* UNIV_DEBUG */
144
 
        rbt_delete(buf_pool->flush_rbt, &bpage);
 
130
 
 
131
        ibool   ret = FALSE;
 
132
 
 
133
        ut_ad(buf_pool_mutex_own());
 
134
        ret = rbt_delete(buf_pool->flush_rbt, &bpage);
145
135
        ut_ad(ret);
146
136
}
147
137
 
148
 
/*****************************************************************//**
 
138
/********************************************************************//**
149
139
Compare two modified blocks in the buffer pool. The key for comparison
150
140
is:
151
141
key = <oldest_modification, space, offset>
154
144
Note that for the purpose of flush_rbt, we only need to order blocks
155
145
on the oldest_modification. The other two fields are used to uniquely
156
146
identify the blocks.
157
 
@return  < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
 
147
@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
158
148
static
159
149
int
160
150
buf_flush_block_cmp(
162
152
        const void*     p1,             /*!< in: block1 */
163
153
        const void*     p2)             /*!< in: block2 */
164
154
{
165
 
        int                     ret;
166
 
        const buf_page_t*       b1 = *(const buf_page_t**) p1;
167
 
        const buf_page_t*       b2 = *(const buf_page_t**) p2;
168
 
#ifdef UNIV_DEBUG
169
 
        buf_pool_t*             buf_pool = buf_pool_from_bpage(b1);
170
 
#endif /* UNIV_DEBUG */
 
155
        int             ret;
 
156
        const buf_page_t* b1;
 
157
        const buf_page_t* b2;
 
158
 
 
159
        ut_ad(p1 != NULL);
 
160
        ut_ad(p2 != NULL);
 
161
 
 
162
        b1 = *(const buf_page_t**) p1;
 
163
        b2 = *(const buf_page_t**) p2;
171
164
 
172
165
        ut_ad(b1 != NULL);
173
166
        ut_ad(b2 != NULL);
174
167
 
175
 
        ut_ad(buf_flush_list_mutex_own(buf_pool));
176
 
 
177
168
        ut_ad(b1->in_flush_list);
178
169
        ut_ad(b2->in_flush_list);
179
170
 
180
 
        if (b2->oldest_modification > b1->oldest_modification) {
 
171
        if (b2->oldest_modification
 
172
            > b1->oldest_modification) {
181
173
                return(1);
182
 
        } else if (b2->oldest_modification < b1->oldest_modification) {
 
174
        }
 
175
 
 
176
        if (b2->oldest_modification
 
177
            < b1->oldest_modification) {
183
178
                return(-1);
184
179
        }
185
180
 
199
194
buf_flush_init_flush_rbt(void)
200
195
/*==========================*/
201
196
{
202
 
        ulint   i;
203
 
 
204
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
205
 
                buf_pool_t*     buf_pool;
206
 
 
207
 
                buf_pool = buf_pool_from_array(i);
208
 
 
209
 
                buf_flush_list_mutex_enter(buf_pool);
210
 
 
211
 
                /* Create red black tree for speedy insertions in flush list. */
212
 
                buf_pool->flush_rbt = rbt_create(
213
 
                        sizeof(buf_page_t*), buf_flush_block_cmp);
214
 
 
215
 
                buf_flush_list_mutex_exit(buf_pool);
216
 
        }
 
197
        buf_pool_mutex_enter();
 
198
 
 
199
        /* Create red black tree for speedy insertions in flush list. */
 
200
        buf_pool->flush_rbt = rbt_create(sizeof(buf_page_t*),
 
201
                                         buf_flush_block_cmp);
 
202
        buf_pool_mutex_exit();
217
203
}
218
204
 
219
205
/********************************************************************//**
223
209
buf_flush_free_flush_rbt(void)
224
210
/*==========================*/
225
211
{
226
 
        ulint   i;
227
 
 
228
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
229
 
                buf_pool_t*     buf_pool;
230
 
 
231
 
                buf_pool = buf_pool_from_array(i);
232
 
 
233
 
                buf_flush_list_mutex_enter(buf_pool);
234
 
 
235
 
#ifdef UNIV_DEBUG_VALGRIND
236
 
        {
237
 
                ulint   zip_size = buf_block_get_zip_size(block);
238
 
 
239
 
                if (UNIV_UNLIKELY(zip_size)) {
240
 
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
241
 
                } else {
242
 
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
243
 
                }
244
 
        }
245
 
#endif /* UNIV_DEBUG_VALGRIND */
 
212
        buf_pool_mutex_enter();
 
213
 
246
214
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
247
 
                ut_a(buf_flush_validate_low(buf_pool));
 
215
        ut_a(buf_flush_validate_low());
248
216
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
249
217
 
250
 
                rbt_free(buf_pool->flush_rbt);
251
 
                buf_pool->flush_rbt = NULL;
 
218
        rbt_free(buf_pool->flush_rbt);
 
219
        buf_pool->flush_rbt = NULL;
252
220
 
253
 
                buf_flush_list_mutex_exit(buf_pool);
254
 
        }
 
221
        buf_pool_mutex_exit();
255
222
}
256
223
 
257
224
/********************************************************************//**
260
227
void
261
228
buf_flush_insert_into_flush_list(
262
229
/*=============================*/
263
 
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
264
 
        buf_block_t*    block,          /*!< in/out: block which is modified */
265
 
        ib_uint64_t     lsn)            /*!< in: oldest modification */
 
230
        buf_block_t*    block)  /*!< in/out: block which is modified */
266
231
{
267
 
        ut_ad(!buf_pool_mutex_own(buf_pool));
268
 
        ut_ad(log_flush_order_mutex_own());
269
 
        ut_ad(mutex_own(&block->mutex));
270
 
 
271
 
        buf_flush_list_mutex_enter(buf_pool);
272
 
 
 
232
        ut_ad(buf_pool_mutex_own());
273
233
        ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
274
234
              || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
275
 
                  <= lsn));
 
235
                  <= block->page.oldest_modification));
276
236
 
277
237
        /* If we are in the recovery then we need to update the flush
278
238
        red-black tree as well. */
279
239
        if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
280
 
                buf_flush_list_mutex_exit(buf_pool);
281
 
                buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
 
240
                buf_flush_insert_sorted_into_flush_list(block);
282
241
                return;
283
242
        }
284
243
 
285
244
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
245
        ut_ad(block->page.in_LRU_list);
 
246
        ut_ad(block->page.in_page_hash);
 
247
        ut_ad(!block->page.in_zip_hash);
286
248
        ut_ad(!block->page.in_flush_list);
287
 
 
288
249
        ut_d(block->page.in_flush_list = TRUE);
289
 
        block->page.oldest_modification = lsn;
290
250
        UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
291
251
 
292
 
#ifdef UNIV_DEBUG_VALGRIND
293
 
        {
294
 
                ulint   zip_size = buf_block_get_zip_size(block);
295
 
 
296
 
                if (UNIV_UNLIKELY(zip_size)) {
297
 
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
298
 
                } else {
299
 
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
300
 
                }
301
 
        }
302
 
#endif /* UNIV_DEBUG_VALGRIND */
303
252
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
304
 
        ut_a(buf_flush_validate_low(buf_pool));
 
253
        ut_a(buf_flush_validate_low());
305
254
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
306
 
 
307
 
        buf_flush_list_mutex_exit(buf_pool);
308
255
}
309
256
 
310
257
/********************************************************************//**
315
262
void
316
263
buf_flush_insert_sorted_into_flush_list(
317
264
/*====================================*/
318
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
319
 
        buf_block_t*    block,          /*!< in/out: block which is modified */
320
 
        ib_uint64_t     lsn)            /*!< in: oldest modification */
 
265
        buf_block_t*    block)  /*!< in/out: block which is modified */
321
266
{
322
267
        buf_page_t*     prev_b;
323
268
        buf_page_t*     b;
324
269
 
325
 
        ut_ad(!buf_pool_mutex_own(buf_pool));
326
 
        ut_ad(log_flush_order_mutex_own());
327
 
        ut_ad(mutex_own(&block->mutex));
 
270
        ut_ad(buf_pool_mutex_own());
328
271
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
329
272
 
330
 
        buf_flush_list_mutex_enter(buf_pool);
331
 
 
332
 
        /* The field in_LRU_list is protected by buf_pool_mutex, which
333
 
        we are not holding.  However, while a block is in the flush
334
 
        list, it is dirty and cannot be discarded, not from the
335
 
        page_hash or from the LRU list.  At most, the uncompressed
336
 
        page frame of a compressed block may be discarded or created
337
 
        (copying the block->page to or from a buf_page_t that is
338
 
        dynamically allocated from buf_buddy_alloc()).  Because those
339
 
        transitions hold block->mutex and the flush list mutex (via
340
 
        buf_flush_relocate_on_flush_list()), there is no possibility
341
 
        of a race condition in the assertions below. */
342
273
        ut_ad(block->page.in_LRU_list);
343
274
        ut_ad(block->page.in_page_hash);
344
 
        /* buf_buddy_block_register() will take a block in the
345
 
        BUF_BLOCK_MEMORY state, not a file page. */
346
275
        ut_ad(!block->page.in_zip_hash);
347
 
 
348
276
        ut_ad(!block->page.in_flush_list);
349
277
        ut_d(block->page.in_flush_list = TRUE);
350
 
        block->page.oldest_modification = lsn;
351
 
 
352
 
#ifdef UNIV_DEBUG_VALGRIND
353
 
        {
354
 
                ulint   zip_size = buf_block_get_zip_size(block);
355
 
 
356
 
                if (UNIV_UNLIKELY(zip_size)) {
357
 
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
358
 
                } else {
359
 
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
360
 
                }
361
 
        }
362
 
#endif /* UNIV_DEBUG_VALGRIND */
363
 
 
364
 
#ifdef UNIV_DEBUG_VALGRIND
365
 
        {
366
 
                ulint   zip_size = buf_block_get_zip_size(block);
367
 
 
368
 
                if (UNIV_UNLIKELY(zip_size)) {
369
 
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
370
 
                } else {
371
 
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
372
 
                }
373
 
        }
374
 
#endif /* UNIV_DEBUG_VALGRIND */
375
278
 
376
279
        prev_b = NULL;
377
280
 
405
308
        }
406
309
 
407
310
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
408
 
        ut_a(buf_flush_validate_low(buf_pool));
 
311
        ut_a(buf_flush_validate_low());
409
312
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
410
 
 
411
 
        buf_flush_list_mutex_exit(buf_pool);
412
313
}
413
314
 
414
315
/********************************************************************//**
422
323
        buf_page_t*     bpage)  /*!< in: buffer control block, must be
423
324
                                buf_page_in_file(bpage) and in the LRU list */
424
325
{
425
 
#ifdef UNIV_DEBUG
426
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
427
 
        ut_ad(buf_pool_mutex_own(buf_pool));
428
 
#endif
 
326
        ut_ad(buf_pool_mutex_own());
429
327
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
430
328
        ut_ad(bpage->in_LRU_list);
431
329
 
458
356
                                buf_page_in_file(bpage) */
459
357
        enum buf_flush  flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
460
358
{
461
 
#ifdef UNIV_DEBUG
462
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
463
 
        ut_ad(buf_pool_mutex_own(buf_pool));
464
 
#endif
465
359
        ut_a(buf_page_in_file(bpage));
 
360
        ut_ad(buf_pool_mutex_own());
466
361
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
467
362
        ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
468
363
 
495
390
/*=============*/
496
391
        buf_page_t*     bpage)  /*!< in: pointer to the block in question */
497
392
{
498
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
499
 
 
500
 
        ut_ad(buf_pool_mutex_own(buf_pool));
 
393
        ut_ad(buf_pool_mutex_own());
501
394
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
502
395
        ut_ad(bpage->in_flush_list);
503
396
 
504
 
        buf_flush_list_mutex_enter(buf_pool);
505
 
 
506
397
        switch (buf_page_get_state(bpage)) {
507
398
        case BUF_BLOCK_ZIP_PAGE:
508
 
                /* Clean compressed pages should not be on the flush list */
 
399
                /* clean compressed pages should not be on the flush list */
509
400
        case BUF_BLOCK_ZIP_FREE:
510
401
        case BUF_BLOCK_NOT_USED:
511
402
        case BUF_BLOCK_READY_FOR_USE:
523
414
                break;
524
415
        }
525
416
 
526
 
        /* If the flush_rbt is active then delete from there as well. */
 
417
        /* If the flush_rbt is active then delete from it as well. */
527
418
        if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
528
419
                buf_flush_delete_from_flush_rbt(bpage);
529
420
        }
534
425
 
535
426
        bpage->oldest_modification = 0;
536
427
 
537
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
538
 
        ut_a(buf_flush_validate_low(buf_pool));
539
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
540
 
 
541
 
        buf_flush_list_mutex_exit(buf_pool);
 
428
        ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
 
429
                              ut_ad(ut_list_node_313->in_flush_list)));
542
430
}
543
431
 
544
 
/*******************************************************************//**
 
432
/********************************************************************//**
545
433
Relocates a buffer control block on the flush_list.
546
 
Note that it is assumed that the contents of bpage have already been
547
 
copied to dpage.
548
 
IMPORTANT: When this function is called bpage and dpage are not
549
 
exact copies of each other. For example, they both will have different
550
 
::state. Also the ::list pointers in dpage may be stale. We need to
551
 
use the current list node (bpage) to do the list manipulation because
552
 
the list pointers could have changed between the time that we copied
553
 
the contents of bpage to the dpage and the flush list manipulation
554
 
below. */
 
434
Note that it is assumed that the contents of bpage has already been
 
435
copied to dpage. */
555
436
UNIV_INTERN
556
437
void
557
438
buf_flush_relocate_on_flush_list(
559
440
        buf_page_t*     bpage,  /*!< in/out: control block being moved */
560
441
        buf_page_t*     dpage)  /*!< in/out: destination block */
561
442
{
562
 
        buf_page_t*     prev;
563
 
        buf_page_t*     prev_b = NULL;
564
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
 
443
        buf_page_t* prev;
 
444
        buf_page_t* prev_b = NULL;
565
445
 
566
 
        ut_ad(buf_pool_mutex_own(buf_pool));
567
 
        /* Must reside in the same buffer pool. */
568
 
        ut_ad(buf_pool == buf_pool_from_bpage(dpage));
 
446
        ut_ad(buf_pool_mutex_own());
569
447
 
570
448
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
571
449
 
572
 
        buf_flush_list_mutex_enter(buf_pool);
573
 
 
574
 
        /* FIXME: At this point we have both buf_pool and flush_list
575
 
        mutexes. Theoretically removal of a block from flush list is
576
 
        only covered by flush_list mutex but currently we do
577
 
        have buf_pool mutex in buf_flush_remove() therefore this block
578
 
        is guaranteed to be in the flush list. We need to check if
579
 
        this will work without the assumption of block removing code
580
 
        having the buf_pool mutex. */
581
450
        ut_ad(bpage->in_flush_list);
582
451
        ut_ad(dpage->in_flush_list);
583
452
 
613
482
        ut_a(!buf_pool->flush_rbt || prev_b == prev);
614
483
 
615
484
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
616
 
        ut_a(buf_flush_validate_low(buf_pool));
 
485
        ut_a(buf_flush_validate_low());
617
486
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
618
 
 
619
 
        buf_flush_list_mutex_exit(buf_pool);
620
487
}
621
488
 
622
489
/********************************************************************//**
628
495
        buf_page_t*     bpage)  /*!< in: pointer to the block in question */
629
496
{
630
497
        enum buf_flush  flush_type;
631
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
632
498
 
633
499
        ut_ad(bpage);
634
500
 
649
515
        /* fprintf(stderr, "n pending flush %lu\n",
650
516
        buf_pool->n_flush[flush_type]); */
651
517
 
652
 
        if (buf_pool->n_flush[flush_type] == 0
653
 
            && buf_pool->init_flush[flush_type] == FALSE) {
 
518
        if ((buf_pool->n_flush[flush_type] == 0)
 
519
            && (buf_pool->init_flush[flush_type] == FALSE)) {
654
520
 
655
521
                /* The running flush batch has ended */
656
522
 
943
809
        zip_size = buf_page_get_zip_size(bpage);
944
810
 
945
811
        if (UNIV_UNLIKELY(zip_size)) {
946
 
                UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size);
947
812
                /* Copy the compressed page and clear the rest. */
948
813
                memcpy(trx_doublewrite->write_buf
949
814
                       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
953
818
                       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
954
819
        } else {
955
820
                ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
956
 
                UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame,
957
 
                                   UNIV_PAGE_SIZE);
958
821
 
959
822
                memcpy(trx_doublewrite->write_buf
960
823
                       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
1010
873
                case FIL_PAGE_TYPE_ZBLOB:
1011
874
                case FIL_PAGE_TYPE_ZBLOB2:
1012
875
                case FIL_PAGE_INDEX:
1013
 
                        mach_write_to_8(page_zip->data
1014
 
                                        + FIL_PAGE_LSN, newest_lsn);
 
876
                        mach_write_ull(page_zip->data
 
877
                                       + FIL_PAGE_LSN, newest_lsn);
1015
878
                        memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
1016
879
                        mach_write_to_4(page_zip->data
1017
880
                                        + FIL_PAGE_SPACE_OR_CHKSUM,
1033
896
        }
1034
897
 
1035
898
        /* Write the newest modification lsn to the page header and trailer */
1036
 
        mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
 
899
        mach_write_ull(page + FIL_PAGE_LSN, newest_lsn);
1037
900
 
1038
 
        mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
1039
 
                        newest_lsn);
 
901
        mach_write_ull(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
 
902
                       newest_lsn);
1040
903
 
1041
904
        /* Store the new formula checksum */
1042
905
 
1069
932
{
1070
933
        ulint   zip_size        = buf_page_get_zip_size(bpage);
1071
934
        page_t* frame           = NULL;
1072
 
 
1073
 
#ifdef UNIV_DEBUG
1074
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1075
 
        ut_ad(!buf_pool_mutex_own(buf_pool));
1076
 
#endif
1077
 
 
1078
935
#ifdef UNIV_LOG_DEBUG
1079
936
        static ibool univ_log_debug_warned;
1080
937
#endif /* UNIV_LOG_DEBUG */
1086
943
        io_fixed and oldest_modification != 0.  Thus, it cannot be
1087
944
        relocated in the buffer pool or removed from flush_list or
1088
945
        LRU_list. */
1089
 
        ut_ad(!buf_pool_mutex_own(buf_pool));
1090
 
        ut_ad(!buf_flush_list_mutex_own(buf_pool));
 
946
        ut_ad(!buf_pool_mutex_own());
1091
947
        ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
1092
948
        ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
1093
949
        ut_ad(bpage->oldest_modification != 0);
1124
980
                        ut_a(mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
1125
981
                             == page_zip_calc_checksum(frame, zip_size));
1126
982
                }
1127
 
                mach_write_to_8(frame + FIL_PAGE_LSN,
1128
 
                                bpage->newest_modification);
 
983
                mach_write_ull(frame + FIL_PAGE_LSN,
 
984
                               bpage->newest_modification);
1129
985
                memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
1130
986
                break;
1131
987
        case BUF_BLOCK_FILE_PAGE:
1156
1012
Writes a flushable page asynchronously from the buffer pool to a file.
1157
1013
NOTE: in simulated aio we must call
1158
1014
os_aio_simulated_wake_handler_threads after we have posted a batch of
1159
 
writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
 
1015
writes! NOTE: buf_pool_mutex and buf_page_get_mutex(bpage) must be
1160
1016
held upon entering this function, and they will be released by this
1161
1017
function. */
1162
1018
static
1163
1019
void
1164
1020
buf_flush_page(
1165
1021
/*===========*/
1166
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1167
1022
        buf_page_t*     bpage,          /*!< in: buffer control block */
1168
1023
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU
1169
1024
                                        or BUF_FLUSH_LIST */
1172
1027
        ibool           is_uncompressed;
1173
1028
 
1174
1029
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1175
 
        ut_ad(buf_pool_mutex_own(buf_pool));
 
1030
        ut_ad(buf_pool_mutex_own());
1176
1031
        ut_ad(buf_page_in_file(bpage));
1177
1032
 
1178
1033
        block_mutex = buf_page_get_mutex(bpage);
1192
1047
        buf_pool->n_flush[flush_type]++;
1193
1048
 
1194
1049
        is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
1195
 
        ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
 
1050
        ut_ad(is_uncompressed == (block_mutex != &buf_pool_zip_mutex));
1196
1051
 
1197
1052
        switch (flush_type) {
1198
1053
                ibool   is_s_latched;
1208
1063
                }
1209
1064
 
1210
1065
                mutex_exit(block_mutex);
1211
 
                buf_pool_mutex_exit(buf_pool);
 
1066
                buf_pool_mutex_exit();
1212
1067
 
1213
1068
                /* Even though bpage is not protected by any mutex at
1214
1069
                this point, it is safe to access bpage, because it is
1245
1100
                immediately. */
1246
1101
 
1247
1102
                mutex_exit(block_mutex);
1248
 
                buf_pool_mutex_exit(buf_pool);
 
1103
                buf_pool_mutex_exit();
1249
1104
                break;
1250
1105
 
1251
1106
        default:
1276
1131
/*====================*/
1277
1132
        ulint           space,          /*!< in: space id */
1278
1133
        ulint           offset,         /*!< in: page offset */
1279
 
        enum buf_flush  flush_type,     /*!< in: BUF_FLUSH_LRU or
 
1134
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU or
1280
1135
                                        BUF_FLUSH_LIST */
1281
 
        ulint           n_flushed,      /*!< in: number of pages
1282
 
                                        flushed so far in this batch */
1283
 
        ulint           n_to_flush)     /*!< in: maximum number of pages
1284
 
                                        we are allowed to flush */
1285
1136
{
 
1137
        buf_page_t*     bpage;
 
1138
        ulint           low, high;
 
1139
        ulint           count           = 0;
1286
1140
        ulint           i;
1287
 
        ulint           low;
1288
 
        ulint           high;
1289
 
        ulint           count = 0;
1290
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1291
1141
 
1292
1142
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1293
1143
 
1294
1144
        if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
1295
 
                /* If there is little space, it is better not to flush
1296
 
                any block except from the end of the LRU list */
 
1145
                /* If there is little space, it is better not to flush any
 
1146
                block except from the end of the LRU list */
1297
1147
 
1298
1148
                low = offset;
1299
1149
                high = offset + 1;
1300
1150
        } else {
1301
 
                /* When flushed, dirty blocks are searched in
1302
 
                neighborhoods of this size, and flushed along with the
1303
 
                original page. */
 
1151
                /* When flushed, dirty blocks are searched in neighborhoods of
 
1152
                this size, and flushed along with the original page. */
1304
1153
 
1305
 
                ulint   buf_flush_area;
1306
 
        
1307
 
                buf_flush_area  = ut_min(
1308
 
                        BUF_READ_AHEAD_AREA(buf_pool),
1309
 
                        buf_pool->curr_size / 16);
 
1154
                ulint   buf_flush_area  = ut_min(BUF_READ_AHEAD_AREA,
 
1155
                                                 buf_pool->curr_size / 16);
1310
1156
 
1311
1157
                low = (offset / buf_flush_area) * buf_flush_area;
1312
1158
                high = (offset / buf_flush_area + 1) * buf_flush_area;
1318
1164
                high = fil_space_get_size(space);
1319
1165
        }
1320
1166
 
 
1167
        buf_pool_mutex_enter();
 
1168
 
1321
1169
        for (i = low; i < high; i++) {
1322
1170
 
1323
 
                buf_page_t*     bpage;
1324
 
 
1325
 
                if ((count + n_flushed) >= n_to_flush) {
1326
 
 
1327
 
                        /* We have already flushed enough pages and
1328
 
                        should call it a day. There is, however, one
1329
 
                        exception. If the page whose neighbors we
1330
 
                        are flushing has not been flushed yet then
1331
 
                        we'll try to flush the victim that we
1332
 
                        selected originally. */
1333
 
                        if (i <= offset) {
1334
 
                                i = offset;
1335
 
                        } else {
1336
 
                                break;
1337
 
                        }
1338
 
                }
1339
 
 
1340
 
                buf_pool = buf_pool_get(space, i);
1341
 
 
1342
 
                buf_pool_mutex_enter(buf_pool);
1343
 
 
1344
 
                /* We only want to flush pages from this buffer pool. */
1345
 
                bpage = buf_page_hash_get(buf_pool, space, i);
 
1171
                bpage = buf_page_hash_get(space, i);
1346
1172
 
1347
1173
                if (!bpage) {
1348
1174
 
1349
 
                        buf_pool_mutex_exit(buf_pool);
1350
1175
                        continue;
1351
1176
                }
1352
1177
 
1365
1190
                        if (buf_flush_ready_for_flush(bpage, flush_type)
1366
1191
                            && (i == offset || !bpage->buf_fix_count)) {
1367
1192
                                /* We only try to flush those
1368
 
                                neighbors != offset where the buf fix
1369
 
                                count is zero, as we then know that we
1370
 
                                probably can latch the page without a
1371
 
                                semaphore wait. Semaphore waits are
1372
 
                                expensive because we must flush the
1373
 
                                doublewrite buffer before we start
 
1193
                                neighbors != offset where the buf fix count is
 
1194
                                zero, as we then know that we probably can
 
1195
                                latch the page without a semaphore wait.
 
1196
                                Semaphore waits are expensive because we must
 
1197
                                flush the doublewrite buffer before we start
1374
1198
                                waiting. */
1375
1199
 
1376
 
                                buf_flush_page(buf_pool, bpage, flush_type);
 
1200
                                buf_flush_page(bpage, flush_type);
1377
1201
                                ut_ad(!mutex_own(block_mutex));
1378
 
                                ut_ad(!buf_pool_mutex_own(buf_pool));
1379
1202
                                count++;
1380
 
                                continue;
 
1203
 
 
1204
                                buf_pool_mutex_enter();
1381
1205
                        } else {
1382
1206
                                mutex_exit(block_mutex);
1383
1207
                        }
1384
1208
                }
1385
 
                buf_pool_mutex_exit(buf_pool);
1386
 
        }
1387
 
 
1388
 
        return(count);
1389
 
}
1390
 
 
1391
 
/********************************************************************//**
1392
 
Check if the block is modified and ready for flushing. If the the block
1393
 
is ready to flush then flush the page and try o flush its neighbors.
1394
 
 
1395
 
@return TRUE if buf_pool mutex was not released during this function.
1396
 
This does not guarantee that some pages were written as well.
1397
 
Number of pages written are incremented to the count. */
1398
 
static
1399
 
ibool
1400
 
buf_flush_page_and_try_neighbors(
1401
 
/*=============================*/
1402
 
        buf_page_t*     bpage,          /*!< in: buffer control block,
1403
 
                                        must be
1404
 
                                        buf_page_in_file(bpage) */
1405
 
        enum buf_flush  flush_type,     /*!< in: BUF_FLUSH_LRU
1406
 
                                        or BUF_FLUSH_LIST */
1407
 
        ulint           n_to_flush,     /*!< in: number of pages to
1408
 
                                        flush */
1409
 
        ulint*          count)          /*!< in/out: number of pages
1410
 
                                        flushed */
1411
 
{
1412
 
        mutex_t*        block_mutex;
1413
 
        ibool           flushed = FALSE;
1414
 
#ifdef UNIV_DEBUG
1415
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1416
 
#endif /* UNIV_DEBUG */
1417
 
 
1418
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1419
 
 
1420
 
        block_mutex = buf_page_get_mutex(bpage);
1421
 
        mutex_enter(block_mutex);
1422
 
 
1423
 
        ut_a(buf_page_in_file(bpage));
1424
 
 
1425
 
        if (buf_flush_ready_for_flush(bpage, flush_type)) {
1426
 
                ulint           space;
1427
 
                ulint           offset;
1428
 
                buf_pool_t*     buf_pool;
1429
 
 
1430
 
                buf_pool = buf_pool_from_bpage(bpage);
1431
 
 
1432
 
                buf_pool_mutex_exit(buf_pool);
1433
 
 
1434
 
                /* These fields are protected by both the
1435
 
                buffer pool mutex and block mutex. */
1436
 
                space = buf_page_get_space(bpage);
1437
 
                offset = buf_page_get_page_no(bpage);
1438
 
 
1439
 
                mutex_exit(block_mutex);
1440
 
 
1441
 
                /* Try to flush also all the neighbors */
1442
 
                *count += buf_flush_try_neighbors(space,
1443
 
                                                  offset,
1444
 
                                                  flush_type,
1445
 
                                                  *count,
1446
 
                                                  n_to_flush);
1447
 
 
1448
 
                buf_pool_mutex_enter(buf_pool);
1449
 
                flushed = TRUE;
1450
 
        } else {
1451
 
                mutex_exit(block_mutex);
1452
 
        }
1453
 
 
1454
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1455
 
 
1456
 
        return(flushed);
1457
 
}
1458
 
 
1459
 
/*******************************************************************//**
1460
 
This utility flushes dirty blocks from the end of the LRU list.
1461
 
In the case of an LRU flush the calling thread may own latches to
1462
 
pages: to avoid deadlocks, this function must be written so that it
1463
 
cannot end up waiting for these latches!
1464
 
@return number of blocks for which the write request was queued. */
1465
 
static
1466
 
ulint
1467
 
buf_flush_LRU_list_batch(
1468
 
/*=====================*/
1469
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1470
 
        ulint           max)            /*!< in: max of blocks to flush */
1471
 
{
1472
 
        buf_page_t*     bpage;
1473
 
        ulint           count = 0;
1474
 
 
1475
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1476
 
 
1477
 
        do {
1478
 
                /* Start from the end of the list looking for a
1479
 
                suitable block to be flushed. */
1480
 
                bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1481
 
 
1482
 
                /* Iterate backwards over the flush list till we find
1483
 
                a page that isn't ready for flushing. */
1484
 
                while (bpage != NULL
1485
 
                       && !buf_flush_page_and_try_neighbors(
1486
 
                                bpage, BUF_FLUSH_LRU, max, &count)) {
1487
 
 
1488
 
                        bpage = UT_LIST_GET_PREV(LRU, bpage);
1489
 
                }
1490
 
        } while (bpage != NULL && count < max);
1491
 
 
1492
 
        /* We keep track of all flushes happening as part of LRU
1493
 
        flush. When estimating the desired rate at which flush_list
1494
 
        should be flushed, we factor in this value. */
1495
 
        buf_lru_flush_page_count += count;
1496
 
 
1497
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1498
 
 
1499
 
        return(count);
1500
 
}
1501
 
 
1502
 
/*******************************************************************//**
1503
 
This utility flushes dirty blocks from the end of the flush_list.
1504
 
the calling thread is not allowed to own any latches on pages!
1505
 
@return number of blocks for which the write request was queued;
1506
 
ULINT_UNDEFINED if there was a flush of the same type already
1507
 
running */
1508
 
static
1509
 
ulint
1510
 
buf_flush_flush_list_batch(
1511
 
/*=======================*/
1512
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1513
 
        ulint           min_n,          /*!< in: wished minimum mumber
1514
 
                                        of blocks flushed (it is not
1515
 
                                        guaranteed that the actual
1516
 
                                        number is that big, though) */
1517
 
        ib_uint64_t     lsn_limit)      /*!< all blocks whose
1518
 
                                        oldest_modification is smaller
1519
 
                                        than this should be flushed (if
1520
 
                                        their number does not exceed
1521
 
                                        min_n) */
1522
 
{
1523
 
        ulint           len;
1524
 
        buf_page_t*     bpage;
1525
 
        ulint           count = 0;
1526
 
 
1527
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1528
 
 
1529
 
        /* If we have flushed enough, leave the loop */
1530
 
        do {
1531
 
                /* Start from the end of the list looking for a suitable
1532
 
                block to be flushed. */
1533
 
 
1534
 
                buf_flush_list_mutex_enter(buf_pool);
1535
 
 
1536
 
                /* We use len here because theoretically insertions can
1537
 
                happen in the flush_list below while we are traversing
1538
 
                it for a suitable candidate for flushing. We'd like to
1539
 
                set a limit on how farther we are willing to traverse
1540
 
                the list. */
1541
 
                len = UT_LIST_GET_LEN(buf_pool->flush_list);
1542
 
                bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1543
 
 
1544
 
                if (bpage) {
1545
 
                        ut_a(bpage->oldest_modification > 0);
1546
 
                }
1547
 
 
1548
 
                if (!bpage || bpage->oldest_modification >= lsn_limit) {
1549
 
 
1550
 
                        /* We have flushed enough */
1551
 
                        buf_flush_list_mutex_exit(buf_pool);
1552
 
                        break;
1553
 
                }
1554
 
 
1555
 
                ut_a(bpage->oldest_modification > 0);
1556
 
 
1557
 
                ut_ad(bpage->in_flush_list);
1558
 
 
1559
 
                buf_flush_list_mutex_exit(buf_pool);
1560
 
 
1561
 
                /* The list may change during the flushing and we cannot
1562
 
                safely preserve within this function a pointer to a
1563
 
                block in the list! */
1564
 
                while (bpage != NULL
1565
 
                       && len > 0
1566
 
                       && !buf_flush_page_and_try_neighbors(
1567
 
                                bpage, BUF_FLUSH_LIST, min_n, &count)) {
1568
 
 
1569
 
                        buf_flush_list_mutex_enter(buf_pool);
1570
 
 
1571
 
                        /* If we are here that means that buf_pool->mutex
1572
 
                         was not released in buf_flush_page_and_try_neighbors()
1573
 
                        above and this guarantees that bpage didn't get
1574
 
                        relocated since we released the flush_list
1575
 
                        mutex above. There is a chance, however, that
1576
 
                        the bpage got removed from flush_list (not
1577
 
                        currently possible because flush_list_remove()
1578
 
                        also obtains buf_pool mutex but that may change
1579
 
                        in future). To avoid this scenario we check
1580
 
                        the oldest_modification and if it is zero
1581
 
                        we start all over again. */
1582
 
                        if (bpage->oldest_modification == 0) {
1583
 
                                buf_flush_list_mutex_exit(buf_pool);
1584
 
                                break;
1585
 
                        }
1586
 
 
1587
 
                        bpage = UT_LIST_GET_PREV(list, bpage);
1588
 
 
1589
 
                        ut_ad(!bpage || bpage->in_flush_list);
1590
 
 
1591
 
                        buf_flush_list_mutex_exit(buf_pool);
1592
 
 
1593
 
                        --len;
1594
 
                }
1595
 
 
1596
 
        } while (count < min_n && bpage != NULL && len > 0);
1597
 
 
1598
 
        ut_ad(buf_pool_mutex_own(buf_pool));
 
1209
        }
 
1210
 
 
1211
        buf_pool_mutex_exit();
1599
1212
 
1600
1213
        return(count);
1601
1214
}
1608
1221
the calling thread is not allowed to own any latches on pages!
1609
1222
@return number of blocks for which the write request was queued;
1610
1223
ULINT_UNDEFINED if there was a flush of the same type already running */
1611
 
static
 
1224
UNIV_INTERN
1612
1225
ulint
1613
1226
buf_flush_batch(
1614
1227
/*============*/
1615
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1616
1228
        enum buf_flush  flush_type,     /*!< in: BUF_FLUSH_LRU or
1617
1229
                                        BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
1618
1230
                                        then the caller must not own any
1620
1232
        ulint           min_n,          /*!< in: wished minimum mumber of blocks
1621
1233
                                        flushed (it is not guaranteed that the
1622
1234
                                        actual number is that big, though) */
1623
 
        ib_uint64_t     lsn_limit)      /*!< in: in the case of BUF_FLUSH_LIST
1624
 
                                        all blocks whose oldest_modification is
 
1235
        ib_uint64_t     lsn_limit)      /*!< in the case BUF_FLUSH_LIST all
 
1236
                                        blocks whose oldest_modification is
1625
1237
                                        smaller than this should be flushed
1626
1238
                                        (if their number does not exceed
1627
1239
                                        min_n), otherwise ignored */
1628
1240
{
1629
 
        ulint           count   = 0;
 
1241
        buf_page_t*     bpage;
 
1242
        ulint           page_count      = 0;
 
1243
        ulint           old_page_count;
 
1244
        ulint           space;
 
1245
        ulint           offset;
1630
1246
 
1631
 
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
 
1247
        ut_ad((flush_type == BUF_FLUSH_LRU)
 
1248
              || (flush_type == BUF_FLUSH_LIST));
1632
1249
#ifdef UNIV_SYNC_DEBUG
1633
1250
        ut_ad((flush_type != BUF_FLUSH_LIST)
1634
1251
              || sync_thread_levels_empty_gen(TRUE));
1635
1252
#endif /* UNIV_SYNC_DEBUG */
1636
 
 
1637
 
        buf_pool_mutex_enter(buf_pool);
1638
 
 
1639
 
        /* Note: The buffer pool mutex is released and reacquired within
1640
 
        the flush functions. */
1641
 
        switch(flush_type) {
1642
 
        case BUF_FLUSH_LRU:
1643
 
                count = buf_flush_LRU_list_batch(buf_pool, min_n);
1644
 
                break;
1645
 
        case BUF_FLUSH_LIST:
1646
 
                count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
1647
 
                break;
1648
 
        default:
1649
 
                ut_error;
1650
 
        }
1651
 
 
1652
 
        buf_pool_mutex_exit(buf_pool);
1653
 
 
1654
 
        buf_flush_buffered_writes();
1655
 
 
1656
 
#ifdef UNIV_DEBUG
1657
 
        if (buf_debug_prints && count > 0) {
1658
 
                fprintf(stderr, flush_type == BUF_FLUSH_LRU
1659
 
                        ? "Flushed %lu pages in LRU flush\n"
1660
 
                        : "Flushed %lu pages in flush list flush\n",
1661
 
                        (ulong) count);
1662
 
        }
1663
 
#endif /* UNIV_DEBUG */
1664
 
 
1665
 
        srv_buf_pool_flushed += count;
1666
 
 
1667
 
        return(count);
1668
 
}
1669
 
 
1670
 
/******************************************************************//**
1671
 
Gather the aggregated stats for both flush list and LRU list flushing */
1672
 
static
1673
 
void
1674
 
buf_flush_common(
1675
 
/*=============*/
1676
 
        enum buf_flush  flush_type,     /*!< in: type of flush */
1677
 
        ulint           page_count)     /*!< in: number of pages flushed */
1678
 
{
1679
 
        buf_flush_buffered_writes();
1680
 
 
1681
 
        ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
 
1253
        buf_pool_mutex_enter();
 
1254
 
 
1255
        if ((buf_pool->n_flush[flush_type] > 0)
 
1256
            || (buf_pool->init_flush[flush_type] == TRUE)) {
 
1257
 
 
1258
                /* There is already a flush batch of the same type running */
 
1259
 
 
1260
                buf_pool_mutex_exit();
 
1261
 
 
1262
                return(ULINT_UNDEFINED);
 
1263
        }
 
1264
 
 
1265
        buf_pool->init_flush[flush_type] = TRUE;
 
1266
 
 
1267
        bool done_with_loop= false;
 
1268
        for (;done_with_loop != true;) {
 
1269
flush_next:
 
1270
                /* If we have flushed enough, leave the loop */
 
1271
                if (page_count >= min_n) {
 
1272
 
 
1273
                        break;
 
1274
                }
 
1275
 
 
1276
                /* Start from the end of the list looking for a suitable
 
1277
                block to be flushed. */
 
1278
 
 
1279
                if (flush_type == BUF_FLUSH_LRU) {
 
1280
                        bpage = UT_LIST_GET_LAST(buf_pool->LRU);
 
1281
                } else {
 
1282
                        ut_ad(flush_type == BUF_FLUSH_LIST);
 
1283
 
 
1284
                        bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
 
1285
                        if (!bpage
 
1286
                            || bpage->oldest_modification >= lsn_limit) {
 
1287
                                /* We have flushed enough */
 
1288
 
 
1289
                                break;
 
1290
                        }
 
1291
                        ut_ad(bpage->in_flush_list);
 
1292
                }
 
1293
 
 
1294
                /* Note that after finding a single flushable page, we try to
 
1295
                flush also all its neighbors, and after that start from the
 
1296
                END of the LRU list or flush list again: the list may change
 
1297
                during the flushing and we cannot safely preserve within this
 
1298
                function a pointer to a block in the list! */
 
1299
 
 
1300
                do {
 
1301
                        mutex_t*block_mutex = buf_page_get_mutex(bpage);
 
1302
                        ibool   ready;
 
1303
 
 
1304
                        ut_a(buf_page_in_file(bpage));
 
1305
 
 
1306
                        mutex_enter(block_mutex);
 
1307
                        ready = buf_flush_ready_for_flush(bpage, flush_type);
 
1308
                        mutex_exit(block_mutex);
 
1309
 
 
1310
                        if (ready) {
 
1311
                                space = buf_page_get_space(bpage);
 
1312
                                offset = buf_page_get_page_no(bpage);
 
1313
 
 
1314
                                buf_pool_mutex_exit();
 
1315
 
 
1316
                                old_page_count = page_count;
 
1317
 
 
1318
                                /* Try to flush also all the neighbors */
 
1319
                                page_count += buf_flush_try_neighbors(
 
1320
                                        space, offset, flush_type);
 
1321
                                /* fprintf(stderr,
 
1322
                                "Flush type %lu, page no %lu, neighb %lu\n",
 
1323
                                flush_type, offset,
 
1324
                                page_count - old_page_count); */
 
1325
 
 
1326
                                buf_pool_mutex_enter();
 
1327
                                goto flush_next;
 
1328
 
 
1329
                        } else if (flush_type == BUF_FLUSH_LRU) {
 
1330
                                bpage = UT_LIST_GET_PREV(LRU, bpage);
 
1331
                        } else {
 
1332
                                ut_ad(flush_type == BUF_FLUSH_LIST);
 
1333
 
 
1334
                                bpage = UT_LIST_GET_PREV(list, bpage);
 
1335
                                ut_ad(!bpage || bpage->in_flush_list);
 
1336
                        }
 
1337
                } while (bpage != NULL);
 
1338
 
 
1339
                /* If we could not find anything to flush, leave the loop */
 
1340
 
 
1341
                done_with_loop= true;
 
1342
 
 
1343
        }
 
1344
 
 
1345
        buf_pool->init_flush[flush_type] = FALSE;
 
1346
 
 
1347
        if (buf_pool->n_flush[flush_type] == 0) {
 
1348
 
 
1349
                /* The running flush batch has ended */
 
1350
 
 
1351
                os_event_set(buf_pool->no_flush[flush_type]);
 
1352
        }
 
1353
 
 
1354
        buf_pool_mutex_exit();
 
1355
 
 
1356
        buf_flush_buffered_writes();
1682
1357
 
1683
1358
#ifdef UNIV_DEBUG
1684
1359
        if (buf_debug_prints && page_count > 0) {
 
1360
                ut_a(flush_type == BUF_FLUSH_LRU
 
1361
                     || flush_type == BUF_FLUSH_LIST);
1685
1362
                fprintf(stderr, flush_type == BUF_FLUSH_LRU
1686
1363
                        ? "Flushed %lu pages in LRU flush\n"
1687
1364
                        : "Flushed %lu pages in flush list flush\n",
1691
1368
 
1692
1369
        srv_buf_pool_flushed += page_count;
1693
1370
 
 
1371
        /* We keep track of all flushes happening as part of LRU
 
1372
        flush. When estimating the desired rate at which flush_list
 
1373
        should be flushed we factor in this value. */
1694
1374
        if (flush_type == BUF_FLUSH_LRU) {
1695
 
                /* We keep track of all flushes happening as part of LRU
1696
 
                flush. When estimating the desired rate at which flush_list
1697
 
                should be flushed we factor in this value. */
1698
1375
                buf_lru_flush_page_count += page_count;
1699
1376
        }
1700
 
}
1701
 
 
1702
 
/******************************************************************//**
1703
 
Start a buffer flush batch for LRU or flush list */
1704
 
static
1705
 
ibool
1706
 
buf_flush_start(
1707
 
/*============*/
1708
 
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
1709
 
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU
1710
 
                                        or BUF_FLUSH_LIST */
1711
 
{
1712
 
        buf_pool_mutex_enter(buf_pool);
1713
 
 
1714
 
        if (buf_pool->n_flush[flush_type] > 0
1715
 
           || buf_pool->init_flush[flush_type] == TRUE) {
1716
 
 
1717
 
                /* There is already a flush batch of the same type running */
1718
 
 
1719
 
                buf_pool_mutex_exit(buf_pool);
1720
 
 
1721
 
                return(FALSE);
1722
 
        }
1723
 
 
1724
 
        buf_pool->init_flush[flush_type] = TRUE;
1725
 
 
1726
 
        buf_pool_mutex_exit(buf_pool);
1727
 
 
1728
 
        return(TRUE);
1729
 
}
1730
 
 
1731
 
/******************************************************************//**
1732
 
End a buffer flush batch for LRU or flush list */
1733
 
static
1734
 
void
1735
 
buf_flush_end(
1736
 
/*==========*/
1737
 
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
1738
 
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU
1739
 
                                        or BUF_FLUSH_LIST */
1740
 
{
1741
 
        buf_pool_mutex_enter(buf_pool);
1742
 
 
1743
 
        buf_pool->init_flush[flush_type] = FALSE;
1744
 
 
1745
 
        if (buf_pool->n_flush[flush_type] == 0) {
1746
 
 
1747
 
                /* The running flush batch has ended */
1748
 
 
1749
 
                os_event_set(buf_pool->no_flush[flush_type]);
1750
 
        }
1751
 
 
1752
 
        buf_pool_mutex_exit(buf_pool);
 
1377
 
 
1378
        return(page_count);
1753
1379
}
1754
1380
 
1755
1381
/******************************************************************//**
1758
1384
void
1759
1385
buf_flush_wait_batch_end(
1760
1386
/*=====================*/
1761
 
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
1762
 
        enum buf_flush  type)           /*!< in: BUF_FLUSH_LRU
1763
 
                                        or BUF_FLUSH_LIST */
1764
 
{
1765
 
        ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
1766
 
 
1767
 
        if (buf_pool == NULL) {
1768
 
                ulint   i;
1769
 
 
1770
 
                for (i = 0; i < srv_buf_pool_instances; ++i) {
1771
 
                        buf_pool_t*     buf_pool;
1772
 
 
1773
 
                        buf_pool = buf_pool_from_array(i);
1774
 
 
1775
 
                        os_event_wait(buf_pool->no_flush[type]);
1776
 
                }
1777
 
        } else {
1778
 
                os_event_wait(buf_pool->no_flush[type]);
1779
 
        }
1780
 
}
1781
 
 
1782
 
/*******************************************************************//**
1783
 
This utility flushes dirty blocks from the end of the LRU list.
1784
 
NOTE: The calling thread may own latches to pages: to avoid deadlocks,
1785
 
this function must be written so that it cannot end up waiting for these
1786
 
latches!
1787
 
@return number of blocks for which the write request was queued;
1788
 
ULINT_UNDEFINED if there was a flush of the same type already running */
1789
 
UNIV_INTERN
1790
 
ulint
1791
 
buf_flush_LRU(
1792
 
/*==========*/
1793
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1794
 
        ulint           min_n)          /*!< in: wished minimum mumber of blocks
1795
 
                                        flushed (it is not guaranteed that the
1796
 
                                        actual number is that big, though) */
1797
 
{
1798
 
        ulint           page_count;
1799
 
 
1800
 
        if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
1801
 
                return(ULINT_UNDEFINED);
1802
 
        }
1803
 
 
1804
 
        page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
1805
 
 
1806
 
        buf_flush_end(buf_pool, BUF_FLUSH_LRU);
1807
 
 
1808
 
        buf_flush_common(BUF_FLUSH_LRU, page_count);
1809
 
 
1810
 
        return(page_count);
1811
 
}
1812
 
 
1813
 
/*******************************************************************//**
1814
 
This utility flushes dirty blocks from the end of the flush list of
1815
 
all buffer pool instances.
1816
 
NOTE: The calling thread is not allowed to own any latches on pages!
1817
 
@return number of blocks for which the write request was queued;
1818
 
ULINT_UNDEFINED if there was a flush of the same type already running */
1819
 
UNIV_INTERN
1820
 
ulint
1821
 
buf_flush_list(
1822
 
/*===========*/
1823
 
        ulint           min_n,          /*!< in: wished minimum mumber of blocks
1824
 
                                        flushed (it is not guaranteed that the
1825
 
                                        actual number is that big, though) */
1826
 
        ib_uint64_t     lsn_limit)      /*!< in the case BUF_FLUSH_LIST all
1827
 
                                        blocks whose oldest_modification is
1828
 
                                        smaller than this should be flushed
1829
 
                                        (if their number does not exceed
1830
 
                                        min_n), otherwise ignored */
1831
 
{
1832
 
        ulint           i;
1833
 
        ulint           total_page_count = 0;
1834
 
        ibool           skipped = FALSE;
1835
 
 
1836
 
        if (min_n != ULINT_MAX) {
1837
 
                /* Ensure that flushing is spread evenly amongst the
1838
 
                buffer pool instances. When min_n is ULINT_MAX
1839
 
                we need to flush everything up to the lsn limit
1840
 
                so no limit here. */
1841
 
                min_n = (min_n + srv_buf_pool_instances - 1)
1842
 
                         / srv_buf_pool_instances;
1843
 
        }
1844
 
 
1845
 
        /* Flush to lsn_limit in all buffer pool instances */
1846
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1847
 
                buf_pool_t*     buf_pool;
1848
 
                ulint           page_count = 0;
1849
 
 
1850
 
                buf_pool = buf_pool_from_array(i);
1851
 
 
1852
 
                if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
1853
 
                        /* We have two choices here. If lsn_limit was
1854
 
                        specified then skipping an instance of buffer
1855
 
                        pool means we cannot guarantee that all pages
1856
 
                        up to lsn_limit has been flushed. We can
1857
 
                        return right now with failure or we can try
1858
 
                        to flush remaining buffer pools up to the
1859
 
                        lsn_limit. We attempt to flush other buffer
1860
 
                        pools based on the assumption that it will
1861
 
                        help in the retry which will follow the
1862
 
                        failure. */
1863
 
                        skipped = TRUE;
1864
 
 
1865
 
                        continue;
1866
 
                }
1867
 
 
1868
 
                page_count = buf_flush_batch(
1869
 
                        buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
1870
 
 
1871
 
                buf_flush_end(buf_pool, BUF_FLUSH_LIST);
1872
 
 
1873
 
                buf_flush_common(BUF_FLUSH_LIST, page_count);
1874
 
 
1875
 
                total_page_count += page_count;
1876
 
        }
1877
 
 
1878
 
        return(lsn_limit != IB_ULONGLONG_MAX && skipped
1879
 
               ? ULINT_UNDEFINED : total_page_count);
1880
 
}
1881
 
 
 
1387
        enum buf_flush  type)   /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
 
1388
{
 
1389
        ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
 
1390
 
 
1391
        os_event_wait(buf_pool->no_flush[type]);
 
1392
}
 
1393
 
1882
1394
/******************************************************************//**
1883
1395
Gives a recommendation of how many blocks should be flushed to establish
1884
1396
a big enough margin of replaceable blocks near the end of the LRU list
1887
1399
LRU list */
1888
1400
static
1889
1401
ulint
1890
 
buf_flush_LRU_recommendation(
1891
 
/*=========================*/
1892
 
        buf_pool_t*     buf_pool)               /*!< in: Buffer pool instance */
 
1402
buf_flush_LRU_recommendation(void)
 
1403
/*==============================*/
1893
1404
{
1894
1405
        buf_page_t*     bpage;
1895
1406
        ulint           n_replaceable;
1896
1407
        ulint           distance        = 0;
1897
1408
 
1898
 
        buf_pool_mutex_enter(buf_pool);
 
1409
        buf_pool_mutex_enter();
1899
1410
 
1900
1411
        n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
1901
1412
 
1902
1413
        bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1903
1414
 
1904
1415
        while ((bpage != NULL)
1905
 
               && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
1906
 
                   + BUF_FLUSH_EXTRA_MARGIN(buf_pool))
1907
 
               && (distance < BUF_LRU_FREE_SEARCH_LEN(buf_pool))) {
 
1416
               && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
 
1417
                   + BUF_FLUSH_EXTRA_MARGIN)
 
1418
               && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
1908
1419
 
1909
1420
                mutex_t* block_mutex = buf_page_get_mutex(bpage);
1910
1421
 
1921
1432
                bpage = UT_LIST_GET_PREV(LRU, bpage);
1922
1433
        }
1923
1434
 
1924
 
        buf_pool_mutex_exit(buf_pool);
 
1435
        buf_pool_mutex_exit();
1925
1436
 
1926
 
        if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
 
1437
        if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
1927
1438
 
1928
1439
                return(0);
1929
1440
        }
1930
1441
 
1931
 
        return(BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
1932
 
               + BUF_FLUSH_EXTRA_MARGIN(buf_pool)
 
1442
        return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN
1933
1443
               - n_replaceable);
1934
1444
}
1935
1445
 
1941
1451
immediately, without waiting. */
1942
1452
UNIV_INTERN
1943
1453
void
1944
 
buf_flush_free_margin(
1945
 
/*==================*/
1946
 
        buf_pool_t*     buf_pool)               /*!< in: Buffer pool instance */
 
1454
buf_flush_free_margin(void)
 
1455
/*=======================*/
1947
1456
{
1948
1457
        ulint   n_to_flush;
 
1458
        ulint   n_flushed;
1949
1459
 
1950
 
        n_to_flush = buf_flush_LRU_recommendation(buf_pool);
 
1460
        n_to_flush = buf_flush_LRU_recommendation();
1951
1461
 
1952
1462
        if (n_to_flush > 0) {
1953
 
                ulint   n_flushed;
1954
 
 
1955
 
                n_flushed = buf_flush_LRU(buf_pool, n_to_flush);
1956
 
 
 
1463
                n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
1957
1464
                if (n_flushed == ULINT_UNDEFINED) {
1958
1465
                        /* There was an LRU type flush batch already running;
1959
1466
                        let us wait for it to end */
1960
1467
 
1961
 
                        buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
 
1468
                        buf_flush_wait_batch_end(BUF_FLUSH_LRU);
1962
1469
                }
1963
1470
        }
1964
1471
}
1965
1472
 
1966
 
/*********************************************************************//**
1967
 
Flushes pages from the end of all the LRU lists. */
1968
 
UNIV_INTERN
1969
 
void
1970
 
buf_flush_free_margins(void)
1971
 
/*========================*/
1972
 
{
1973
 
        ulint   i;
1974
 
 
1975
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1976
 
                buf_pool_t*     buf_pool;
1977
 
 
1978
 
                buf_pool = buf_pool_from_array(i);
1979
 
 
1980
 
                buf_flush_free_margin(buf_pool);
1981
 
        }
1982
 
}
1983
 
 
1984
1473
/*********************************************************************
1985
1474
Update the historical stats that we are collecting for flush rate
1986
1475
heuristics at the end of each interval.
2041
1530
buf_flush_get_desired_flush_rate(void)
2042
1531
/*==================================*/
2043
1532
{
2044
 
        ulint           i;
2045
 
        lint            rate;
2046
 
        ulint           redo_avg;
2047
 
        ulint           n_dirty = 0;
2048
 
        ulint           n_flush_req;
2049
 
        ulint           lru_flush_avg;
2050
 
        ib_uint64_t     lsn = log_get_lsn();
2051
 
        ulint           log_capacity = log_get_capacity();
 
1533
        ulint                   redo_avg;
 
1534
        ulint                   lru_flush_avg;
 
1535
        ulint                   n_dirty;
 
1536
        ulint                   n_flush_req;
 
1537
        lint                    rate;
 
1538
        ib_uint64_t             lsn = log_get_lsn();
 
1539
        ulint                   log_capacity = log_get_capacity();
2052
1540
 
2053
1541
        /* log_capacity should never be zero after the initialization
2054
1542
        of log subsystem. */
2055
1543
        ut_ad(log_capacity != 0);
2056
1544
 
2057
1545
        /* Get total number of dirty pages. It is OK to access
2058
 
        flush_list without holding any mutex as we are using this
 
1546
        flush_list without holding any mtex as we are using this
2059
1547
        only for heuristics. */
2060
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
2061
 
                buf_pool_t*     buf_pool;
2062
 
 
2063
 
                buf_pool = buf_pool_from_array(i);
2064
 
                n_dirty += UT_LIST_GET_LEN(buf_pool->flush_list);
2065
 
        }
 
1548
        n_dirty = UT_LIST_GET_LEN(buf_pool->flush_list);
2066
1549
 
2067
1550
        /* An overflow can happen if we generate more than 2^32 bytes
2068
1551
        of redo in this interval i.e.: 4G of redo in 1 second. We can
2104
1587
@return TRUE if ok */
2105
1588
static
2106
1589
ibool
2107
 
buf_flush_validate_low(
2108
 
/*===================*/
2109
 
        buf_pool_t*     buf_pool)               /*!< in: Buffer pool instance */
 
1590
buf_flush_validate_low(void)
 
1591
/*========================*/
2110
1592
{
2111
1593
        buf_page_t*             bpage;
2112
1594
        const ib_rbt_node_t*    rnode = NULL;
2113
1595
 
2114
 
        ut_ad(buf_flush_list_mutex_own(buf_pool));
2115
 
 
2116
1596
        UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2117
1597
                         ut_ad(ut_list_node_313->in_flush_list));
2118
1598
 
2127
1607
 
2128
1608
        while (bpage != NULL) {
2129
1609
                const ib_uint64_t om = bpage->oldest_modification;
2130
 
 
2131
 
                ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
2132
 
 
2133
1610
                ut_ad(bpage->in_flush_list);
2134
 
 
2135
 
                /* A page in flush_list can be in BUF_BLOCK_REMOVE_HASH
2136
 
                state. This happens when a page is in the middle of
2137
 
                being relocated. In that case the original descriptor
2138
 
                can have this state and still be in the flush list
2139
 
                waiting to acquire the flush_list_mutex to complete
2140
 
                the relocation. */
2141
 
                ut_a(buf_page_in_file(bpage)
2142
 
                     || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
 
1611
                ut_a(buf_page_in_file(bpage));
2143
1612
                ut_a(om > 0);
2144
1613
 
2145
1614
                if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2146
 
                        buf_page_t** prpage;
2147
 
 
2148
1615
                        ut_a(rnode);
2149
 
                        prpage = rbt_value(buf_page_t*, rnode);
2150
 
 
2151
 
                        ut_a(*prpage);
2152
 
                        ut_a(*prpage == bpage);
 
1616
                        buf_page_t* rpage = *rbt_value(buf_page_t*,
 
1617
                                                       rnode);
 
1618
                        ut_a(rpage);
 
1619
                        ut_a(rpage == bpage);
2153
1620
                        rnode = rbt_next(buf_pool->flush_rbt, rnode);
2154
1621
                }
2155
1622
 
2170
1637
@return TRUE if ok */
2171
1638
UNIV_INTERN
2172
1639
ibool
2173
 
buf_flush_validate(
2174
 
/*===============*/
2175
 
        buf_pool_t*     buf_pool)       /*!< buffer pool instance */
 
1640
buf_flush_validate(void)
 
1641
/*====================*/
2176
1642
{
2177
1643
        ibool   ret;
2178
1644
 
2179
 
        buf_flush_list_mutex_enter(buf_pool);
2180
 
 
2181
 
        ret = buf_flush_validate_low(buf_pool);
2182
 
 
2183
 
        buf_flush_list_mutex_exit(buf_pool);
 
1645
        buf_pool_mutex_enter();
 
1646
 
 
1647
        ret = buf_flush_validate_low();
 
1648
 
 
1649
        buf_pool_mutex_exit();
2184
1650
 
2185
1651
        return(ret);
2186
1652
}