~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/buf/buf0flu.c

  • Committer: Brian Aker
  • Date: 2009-02-21 00:18:15 UTC
  • Revision ID: brian@tangent.org-20090221001815-x20e8h71e984lvs1
Completion (?) of uint conversion.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*****************************************************************************
2
 
 
3
 
Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
4
 
 
5
 
This program is free software; you can redistribute it and/or modify it under
6
 
the terms of the GNU General Public License as published by the Free Software
7
 
Foundation; version 2 of the License.
8
 
 
9
 
This program is distributed in the hope that it will be useful, but WITHOUT
10
 
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
 
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12
 
 
13
 
You should have received a copy of the GNU General Public License along with
14
 
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15
 
St, Fifth Floor, Boston, MA 02110-1301 USA
16
 
 
17
 
*****************************************************************************/
18
 
 
19
 
/**************************************************//**
20
 
@file buf/buf0flu.c
 
1
/******************************************************
21
2
The database buffer buf_pool flush algorithm
22
3
 
 
4
(c) 1995-2001 Innobase Oy
 
5
 
23
6
Created 11/11/1995 Heikki Tuuri
24
7
*******************************************************/
25
8
 
27
10
 
28
11
#ifdef UNIV_NONINL
29
12
#include "buf0flu.ic"
 
13
#include "trx0sys.h"
30
14
#endif
31
15
 
32
 
#include "buf0buf.h"
33
 
#include "srv0srv.h"
34
 
#include "page0zip.h"
35
 
#ifndef UNIV_HOTBACKUP
36
16
#include "ut0byte.h"
37
17
#include "ut0lst.h"
38
18
#include "page0page.h"
 
19
#include "page0zip.h"
39
20
#include "fil0fil.h"
 
21
#include "buf0buf.h"
40
22
#include "buf0lru.h"
41
23
#include "buf0rea.h"
42
24
#include "ibuf0ibuf.h"
43
25
#include "log0log.h"
44
26
#include "os0file.h"
45
27
#include "trx0sys.h"
 
28
#include "srv0srv.h"
46
29
 
 
30
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
47
31
/**********************************************************************
48
 
These statistics are generated for heuristics used in estimating the
49
 
rate at which we should flush the dirty blocks to avoid bursty IO
50
 
activity. Note that the rate of flushing not only depends on how many
51
 
dirty pages we have in the buffer pool but it is also a fucntion of
52
 
how much redo the workload is generating and at what rate. */
53
 
/* @{ */
54
 
 
55
 
/** Number of intervals for which we keep the history of these stats.
56
 
Each interval is 1 second, defined by the rate at which
57
 
srv_error_monitor_thread() calls buf_flush_stat_update(). */
58
 
#define BUF_FLUSH_STAT_N_INTERVAL 20
59
 
 
60
 
/** Sampled values buf_flush_stat_cur.
61
 
Not protected by any mutex.  Updated by buf_flush_stat_update(). */
62
 
static buf_flush_stat_t buf_flush_stat_arr[BUF_FLUSH_STAT_N_INTERVAL];
63
 
 
64
 
/** Cursor to buf_flush_stat_arr[]. Updated in a round-robin fashion. */
65
 
static ulint            buf_flush_stat_arr_ind;
66
 
 
67
 
/** Values at start of the current interval. Reset by
68
 
buf_flush_stat_update(). */
69
 
static buf_flush_stat_t buf_flush_stat_cur;
70
 
 
71
 
/** Running sum of past values of buf_flush_stat_cur.
72
 
Updated by buf_flush_stat_update(). Not protected by any mutex. */
73
 
static buf_flush_stat_t buf_flush_stat_sum;
74
 
 
75
 
/** Number of pages flushed through non flush_list flushes. */
76
 
static ulint buf_lru_flush_page_count = 0;
77
 
 
78
 
/* @} */
79
 
 
80
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
81
 
/******************************************************************//**
82
 
Validates the flush list.
83
 
@return TRUE if ok */
 
32
Validates the flush list. */
84
33
static
85
34
ibool
86
 
buf_flush_validate_low(
87
 
/*===================*/
88
 
        buf_pool_t*     buf_pool);      /*!< in: Buffer pool instance */
89
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
90
 
 
91
 
/******************************************************************//**
92
 
Insert a block in the flush_rbt and returns a pointer to its
93
 
predecessor or NULL if no predecessor. The ordering is maintained
94
 
on the basis of the <oldest_modification, space, offset> key.
95
 
@return pointer to the predecessor or NULL if no predecessor. */
96
 
static
97
 
buf_page_t*
98
 
buf_flush_insert_in_flush_rbt(
99
 
/*==========================*/
100
 
        buf_page_t*     bpage)  /*!< in: bpage to be inserted. */
101
 
{
102
 
        const ib_rbt_node_t*    c_node;
103
 
        const ib_rbt_node_t*    p_node;
104
 
        buf_page_t*             prev = NULL;
105
 
        buf_pool_t*             buf_pool = buf_pool_from_bpage(bpage);
106
 
 
107
 
        ut_ad(buf_flush_list_mutex_own(buf_pool));
108
 
 
109
 
        /* Insert this buffer into the rbt. */
110
 
        c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
111
 
        ut_a(c_node != NULL);
112
 
 
113
 
        /* Get the predecessor. */
114
 
        p_node = rbt_prev(buf_pool->flush_rbt, c_node);
115
 
 
116
 
        if (p_node != NULL) {
117
 
                buf_page_t**    value;
118
 
                value = rbt_value(buf_page_t*, p_node);
119
 
                prev = *value;
120
 
                ut_a(prev != NULL);
121
 
        }
122
 
 
123
 
        return(prev);
124
 
}
125
 
 
126
 
/*********************************************************//**
127
 
Delete a bpage from the flush_rbt. */
128
 
static
129
 
void
130
 
buf_flush_delete_from_flush_rbt(
131
 
/*============================*/
132
 
        buf_page_t*     bpage)  /*!< in: bpage to be removed. */
133
 
{
134
 
#ifdef UNIV_DEBUG
135
 
        ibool           ret = FALSE;
136
 
#endif /* UNIV_DEBUG */
137
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
138
 
 
139
 
        ut_ad(buf_flush_list_mutex_own(buf_pool));
140
 
 
141
 
#ifdef UNIV_DEBUG
142
 
        ret =
143
 
#endif /* UNIV_DEBUG */
144
 
        rbt_delete(buf_pool->flush_rbt, &bpage);
145
 
        ut_ad(ret);
146
 
}
147
 
 
148
 
/*****************************************************************//**
149
 
Compare two modified blocks in the buffer pool. The key for comparison
150
 
is:
151
 
key = <oldest_modification, space, offset>
152
 
This comparison is used to maintian ordering of blocks in the
153
 
buf_pool->flush_rbt.
154
 
Note that for the purpose of flush_rbt, we only need to order blocks
155
 
on the oldest_modification. The other two fields are used to uniquely
156
 
identify the blocks.
157
 
@return  < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
158
 
static
159
 
int
160
 
buf_flush_block_cmp(
161
 
/*================*/
162
 
        const void*     p1,             /*!< in: block1 */
163
 
        const void*     p2)             /*!< in: block2 */
164
 
{
165
 
        int                     ret;
166
 
        const buf_page_t*       b1 = *(const buf_page_t**) p1;
167
 
        const buf_page_t*       b2 = *(const buf_page_t**) p2;
168
 
#ifdef UNIV_DEBUG
169
 
        buf_pool_t*             buf_pool = buf_pool_from_bpage(b1);
170
 
#endif /* UNIV_DEBUG */
171
 
 
172
 
        ut_ad(b1 != NULL);
173
 
        ut_ad(b2 != NULL);
174
 
 
175
 
        ut_ad(buf_flush_list_mutex_own(buf_pool));
176
 
 
177
 
        ut_ad(b1->in_flush_list);
178
 
        ut_ad(b2->in_flush_list);
179
 
 
180
 
        if (b2->oldest_modification > b1->oldest_modification) {
181
 
                return(1);
182
 
        } else if (b2->oldest_modification < b1->oldest_modification) {
183
 
                return(-1);
184
 
        }
185
 
 
186
 
        /* If oldest_modification is same then decide on the space. */
187
 
        ret = (int)(b2->space - b1->space);
188
 
 
189
 
        /* Or else decide ordering on the offset field. */
190
 
        return(ret ? ret : (int)(b2->offset - b1->offset));
191
 
}
192
 
 
193
 
/********************************************************************//**
194
 
Initialize the red-black tree to speed up insertions into the flush_list
195
 
during recovery process. Should be called at the start of recovery
196
 
process before any page has been read/written. */
197
 
UNIV_INTERN
198
 
void
199
 
buf_flush_init_flush_rbt(void)
200
 
/*==========================*/
201
 
{
202
 
        ulint   i;
203
 
 
204
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
205
 
                buf_pool_t*     buf_pool;
206
 
 
207
 
                buf_pool = buf_pool_from_array(i);
208
 
 
209
 
                buf_flush_list_mutex_enter(buf_pool);
210
 
 
211
 
                /* Create red black tree for speedy insertions in flush list. */
212
 
                buf_pool->flush_rbt = rbt_create(
213
 
                        sizeof(buf_page_t*), buf_flush_block_cmp);
214
 
 
215
 
                buf_flush_list_mutex_exit(buf_pool);
216
 
        }
217
 
}
218
 
 
219
 
/********************************************************************//**
220
 
Frees up the red-black tree. */
221
 
UNIV_INTERN
222
 
void
223
 
buf_flush_free_flush_rbt(void)
224
 
/*==========================*/
225
 
{
226
 
        ulint   i;
227
 
 
228
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
229
 
                buf_pool_t*     buf_pool;
230
 
 
231
 
                buf_pool = buf_pool_from_array(i);
232
 
 
233
 
                buf_flush_list_mutex_enter(buf_pool);
234
 
 
235
 
#ifdef UNIV_DEBUG_VALGRIND
236
 
        {
237
 
                ulint   zip_size = buf_block_get_zip_size(block);
238
 
 
239
 
                if (UNIV_UNLIKELY(zip_size)) {
240
 
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
241
 
                } else {
242
 
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
243
 
                }
244
 
        }
245
 
#endif /* UNIV_DEBUG_VALGRIND */
246
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
247
 
                ut_a(buf_flush_validate_low(buf_pool));
248
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
249
 
 
250
 
                rbt_free(buf_pool->flush_rbt);
251
 
                buf_pool->flush_rbt = NULL;
252
 
 
253
 
                buf_flush_list_mutex_exit(buf_pool);
254
 
        }
255
 
}
256
 
 
257
 
/********************************************************************//**
 
35
buf_flush_validate_low(void);
 
36
/*========================*/
 
37
                /* out: TRUE if ok */
 
38
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
39
 
 
40
/************************************************************************
258
41
Inserts a modified block into the flush list. */
259
42
UNIV_INTERN
260
43
void
261
44
buf_flush_insert_into_flush_list(
262
45
/*=============================*/
263
 
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
264
 
        buf_block_t*    block,          /*!< in/out: block which is modified */
265
 
        ib_uint64_t     lsn)            /*!< in: oldest modification */
 
46
        buf_page_t*     bpage)  /* in: block which is modified */
266
47
{
267
 
        ut_ad(!buf_pool_mutex_own(buf_pool));
268
 
        ut_ad(log_flush_order_mutex_own());
269
 
        ut_ad(mutex_own(&block->mutex));
270
 
 
271
 
        buf_flush_list_mutex_enter(buf_pool);
272
 
 
 
48
        ut_ad(buf_pool_mutex_own());
273
49
        ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
274
50
              || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
275
 
                  <= lsn));
 
51
                  <= bpage->oldest_modification));
276
52
 
277
 
        /* If we are in the recovery then we need to update the flush
278
 
        red-black tree as well. */
279
 
        if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
280
 
                buf_flush_list_mutex_exit(buf_pool);
281
 
                buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
 
53
        switch (buf_page_get_state(bpage)) {
 
54
        case BUF_BLOCK_ZIP_PAGE:
 
55
                mutex_enter(&buf_pool_zip_mutex);
 
56
                buf_page_set_state(bpage, BUF_BLOCK_ZIP_DIRTY);
 
57
                mutex_exit(&buf_pool_zip_mutex);
 
58
                UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
 
59
                /* fall through */
 
60
        case BUF_BLOCK_ZIP_DIRTY:
 
61
        case BUF_BLOCK_FILE_PAGE:
 
62
                ut_ad(bpage->in_LRU_list);
 
63
                ut_ad(bpage->in_page_hash);
 
64
                ut_ad(!bpage->in_zip_hash);
 
65
                ut_ad(!bpage->in_flush_list);
 
66
                ut_d(bpage->in_flush_list = TRUE);
 
67
                UT_LIST_ADD_FIRST(list, buf_pool->flush_list, bpage);
 
68
                break;
 
69
        case BUF_BLOCK_ZIP_FREE:
 
70
        case BUF_BLOCK_NOT_USED:
 
71
        case BUF_BLOCK_READY_FOR_USE:
 
72
        case BUF_BLOCK_MEMORY:
 
73
        case BUF_BLOCK_REMOVE_HASH:
 
74
                ut_error;
282
75
                return;
283
76
        }
284
77
 
285
 
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
286
 
        ut_ad(!block->page.in_flush_list);
287
 
 
288
 
        ut_d(block->page.in_flush_list = TRUE);
289
 
        block->page.oldest_modification = lsn;
290
 
        UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
291
 
 
292
 
#ifdef UNIV_DEBUG_VALGRIND
293
 
        {
294
 
                ulint   zip_size = buf_block_get_zip_size(block);
295
 
 
296
 
                if (UNIV_UNLIKELY(zip_size)) {
297
 
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
298
 
                } else {
299
 
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
300
 
                }
301
 
        }
302
 
#endif /* UNIV_DEBUG_VALGRIND */
303
78
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
304
 
        ut_a(buf_flush_validate_low(buf_pool));
 
79
        ut_a(buf_flush_validate_low());
305
80
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
306
 
 
307
 
        buf_flush_list_mutex_exit(buf_pool);
308
81
}
309
82
 
310
 
/********************************************************************//**
 
83
/************************************************************************
311
84
Inserts a modified block into the flush list in the right sorted position.
312
85
This function is used by recovery, because there the modifications do not
313
86
necessarily come in the order of lsn's. */
315
88
void
316
89
buf_flush_insert_sorted_into_flush_list(
317
90
/*====================================*/
318
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
319
 
        buf_block_t*    block,          /*!< in/out: block which is modified */
320
 
        ib_uint64_t     lsn)            /*!< in: oldest modification */
 
91
        buf_page_t*     bpage)  /* in: block which is modified */
321
92
{
322
93
        buf_page_t*     prev_b;
323
94
        buf_page_t*     b;
324
95
 
325
 
        ut_ad(!buf_pool_mutex_own(buf_pool));
326
 
        ut_ad(log_flush_order_mutex_own());
327
 
        ut_ad(mutex_own(&block->mutex));
328
 
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
329
 
 
330
 
        buf_flush_list_mutex_enter(buf_pool);
331
 
 
332
 
        /* The field in_LRU_list is protected by buf_pool->mutex, which
333
 
        we are not holding.  However, while a block is in the flush
334
 
        list, it is dirty and cannot be discarded, not from the
335
 
        page_hash or from the LRU list.  At most, the uncompressed
336
 
        page frame of a compressed block may be discarded or created
337
 
        (copying the block->page to or from a buf_page_t that is
338
 
        dynamically allocated from buf_buddy_alloc()).  Because those
339
 
        transitions hold block->mutex and the flush list mutex (via
340
 
        buf_flush_relocate_on_flush_list()), there is no possibility
341
 
        of a race condition in the assertions below. */
342
 
        ut_ad(block->page.in_LRU_list);
343
 
        ut_ad(block->page.in_page_hash);
344
 
        /* buf_buddy_block_register() will take a block in the
345
 
        BUF_BLOCK_MEMORY state, not a file page. */
346
 
        ut_ad(!block->page.in_zip_hash);
347
 
 
348
 
        ut_ad(!block->page.in_flush_list);
349
 
        ut_d(block->page.in_flush_list = TRUE);
350
 
        block->page.oldest_modification = lsn;
351
 
 
352
 
#ifdef UNIV_DEBUG_VALGRIND
353
 
        {
354
 
                ulint   zip_size = buf_block_get_zip_size(block);
355
 
 
356
 
                if (UNIV_UNLIKELY(zip_size)) {
357
 
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
358
 
                } else {
359
 
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
360
 
                }
361
 
        }
362
 
#endif /* UNIV_DEBUG_VALGRIND */
363
 
 
364
 
#ifdef UNIV_DEBUG_VALGRIND
365
 
        {
366
 
                ulint   zip_size = buf_block_get_zip_size(block);
367
 
 
368
 
                if (UNIV_UNLIKELY(zip_size)) {
369
 
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
370
 
                } else {
371
 
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
372
 
                }
373
 
        }
374
 
#endif /* UNIV_DEBUG_VALGRIND */
 
96
        ut_ad(buf_pool_mutex_own());
 
97
 
 
98
        switch (buf_page_get_state(bpage)) {
 
99
        case BUF_BLOCK_ZIP_PAGE:
 
100
                mutex_enter(&buf_pool_zip_mutex);
 
101
                buf_page_set_state(bpage, BUF_BLOCK_ZIP_DIRTY);
 
102
                mutex_exit(&buf_pool_zip_mutex);
 
103
                UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
 
104
                /* fall through */
 
105
        case BUF_BLOCK_ZIP_DIRTY:
 
106
        case BUF_BLOCK_FILE_PAGE:
 
107
                ut_ad(bpage->in_LRU_list);
 
108
                ut_ad(bpage->in_page_hash);
 
109
                ut_ad(!bpage->in_zip_hash);
 
110
                ut_ad(!bpage->in_flush_list);
 
111
                ut_d(bpage->in_flush_list = TRUE);
 
112
                break;
 
113
        case BUF_BLOCK_ZIP_FREE:
 
114
        case BUF_BLOCK_NOT_USED:
 
115
        case BUF_BLOCK_READY_FOR_USE:
 
116
        case BUF_BLOCK_MEMORY:
 
117
        case BUF_BLOCK_REMOVE_HASH:
 
118
                ut_error;
 
119
                return;
 
120
        }
375
121
 
376
122
        prev_b = NULL;
377
 
 
378
 
        /* For the most part when this function is called the flush_rbt
379
 
        should not be NULL. In a very rare boundary case it is possible
380
 
        that the flush_rbt has already been freed by the recovery thread
381
 
        before the last page was hooked up in the flush_list by the
382
 
        io-handler thread. In that case we'll  just do a simple
383
 
        linear search in the else block. */
384
 
        if (buf_pool->flush_rbt) {
385
 
 
386
 
                prev_b = buf_flush_insert_in_flush_rbt(&block->page);
387
 
 
388
 
        } else {
389
 
 
390
 
                b = UT_LIST_GET_FIRST(buf_pool->flush_list);
391
 
 
392
 
                while (b && b->oldest_modification
393
 
                       > block->page.oldest_modification) {
394
 
                        ut_ad(b->in_flush_list);
395
 
                        prev_b = b;
396
 
                        b = UT_LIST_GET_NEXT(list, b);
397
 
                }
 
123
        b = UT_LIST_GET_FIRST(buf_pool->flush_list);
 
124
 
 
125
        while (b && b->oldest_modification > bpage->oldest_modification) {
 
126
                ut_ad(b->in_flush_list);
 
127
                prev_b = b;
 
128
                b = UT_LIST_GET_NEXT(list, b);
398
129
        }
399
130
 
400
131
        if (prev_b == NULL) {
401
 
                UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
 
132
                UT_LIST_ADD_FIRST(list, buf_pool->flush_list, bpage);
402
133
        } else {
403
134
                UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
404
 
                                     prev_b, &block->page);
 
135
                                     prev_b, bpage);
405
136
        }
406
137
 
407
138
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
408
 
        ut_a(buf_flush_validate_low(buf_pool));
 
139
        ut_a(buf_flush_validate_low());
409
140
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
410
 
 
411
 
        buf_flush_list_mutex_exit(buf_pool);
412
141
}
413
142
 
414
 
/********************************************************************//**
 
143
/************************************************************************
415
144
Returns TRUE if the file page block is immediately suitable for replacement,
416
 
i.e., the transition FILE_PAGE => NOT_USED allowed.
417
 
@return TRUE if can replace immediately */
 
145
i.e., the transition FILE_PAGE => NOT_USED allowed. */
418
146
UNIV_INTERN
419
147
ibool
420
148
buf_flush_ready_for_replace(
421
149
/*========================*/
422
 
        buf_page_t*     bpage)  /*!< in: buffer control block, must be
 
150
                                /* out: TRUE if can replace immediately */
 
151
        buf_page_t*     bpage)  /* in: buffer control block, must be
423
152
                                buf_page_in_file(bpage) and in the LRU list */
424
153
{
425
 
#ifdef UNIV_DEBUG
426
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
427
 
        ut_ad(buf_pool_mutex_own(buf_pool));
428
 
#endif
 
154
        ut_ad(buf_pool_mutex_own());
429
155
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
430
156
        ut_ad(bpage->in_LRU_list);
431
157
 
447
173
        return(FALSE);
448
174
}
449
175
 
450
 
/********************************************************************//**
451
 
Returns TRUE if the block is modified and ready for flushing.
452
 
@return TRUE if can flush immediately */
 
176
/************************************************************************
 
177
Returns TRUE if the block is modified and ready for flushing. */
453
178
UNIV_INLINE
454
179
ibool
455
180
buf_flush_ready_for_flush(
456
181
/*======================*/
457
 
        buf_page_t*     bpage,  /*!< in: buffer control block, must be
 
182
                                /* out: TRUE if can flush immediately */
 
183
        buf_page_t*     bpage,  /* in: buffer control block, must be
458
184
                                buf_page_in_file(bpage) */
459
 
        enum buf_flush  flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
 
185
        enum buf_flush  flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
460
186
{
461
 
#ifdef UNIV_DEBUG
462
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
463
 
        ut_ad(buf_pool_mutex_own(buf_pool));
464
 
#endif
465
187
        ut_a(buf_page_in_file(bpage));
 
188
        ut_ad(buf_pool_mutex_own());
466
189
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
467
 
        ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
468
190
 
469
191
        if (bpage->oldest_modification != 0
470
192
            && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
487
209
        return(FALSE);
488
210
}
489
211
 
490
 
/********************************************************************//**
 
212
/************************************************************************
491
213
Remove a block from the flush list of modified blocks. */
492
214
UNIV_INTERN
493
215
void
494
216
buf_flush_remove(
495
217
/*=============*/
496
 
        buf_page_t*     bpage)  /*!< in: pointer to the block in question */
 
218
        buf_page_t*     bpage)  /* in: pointer to the block in question */
497
219
{
498
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
499
 
 
500
 
        ut_ad(buf_pool_mutex_own(buf_pool));
 
220
        ut_ad(buf_pool_mutex_own());
501
221
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
502
222
        ut_ad(bpage->in_flush_list);
503
 
 
504
 
        buf_flush_list_mutex_enter(buf_pool);
 
223
        ut_d(bpage->in_flush_list = FALSE);
505
224
 
506
225
        switch (buf_page_get_state(bpage)) {
507
226
        case BUF_BLOCK_ZIP_PAGE:
508
 
                /* Clean compressed pages should not be on the flush list */
 
227
                /* clean compressed pages should not be on the flush list */
509
228
        case BUF_BLOCK_ZIP_FREE:
510
229
        case BUF_BLOCK_NOT_USED:
511
230
        case BUF_BLOCK_READY_FOR_USE:
523
242
                break;
524
243
        }
525
244
 
526
 
        /* If the flush_rbt is active then delete from there as well. */
527
 
        if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
528
 
                buf_flush_delete_from_flush_rbt(bpage);
529
 
        }
530
 
 
531
 
        /* Must be done after we have removed it from the flush_rbt
532
 
        because we assert on in_flush_list in comparison function. */
533
 
        ut_d(bpage->in_flush_list = FALSE);
534
 
 
535
245
        bpage->oldest_modification = 0;
536
246
 
537
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
538
 
        ut_a(buf_flush_validate_low(buf_pool));
539
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
540
 
 
541
 
        buf_flush_list_mutex_exit(buf_pool);
542
 
}
543
 
 
544
 
/*******************************************************************//**
545
 
Relocates a buffer control block on the flush_list.
546
 
Note that it is assumed that the contents of bpage have already been
547
 
copied to dpage.
548
 
IMPORTANT: When this function is called bpage and dpage are not
549
 
exact copies of each other. For example, they both will have different
550
 
::state. Also the ::list pointers in dpage may be stale. We need to
551
 
use the current list node (bpage) to do the list manipulation because
552
 
the list pointers could have changed between the time that we copied
553
 
the contents of bpage to the dpage and the flush list manipulation
554
 
below. */
555
 
UNIV_INTERN
556
 
void
557
 
buf_flush_relocate_on_flush_list(
558
 
/*=============================*/
559
 
        buf_page_t*     bpage,  /*!< in/out: control block being moved */
560
 
        buf_page_t*     dpage)  /*!< in/out: destination block */
561
 
{
562
 
        buf_page_t*     prev;
563
 
        buf_page_t*     prev_b = NULL;
564
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
565
 
 
566
 
        ut_ad(buf_pool_mutex_own(buf_pool));
567
 
        /* Must reside in the same buffer pool. */
568
 
        ut_ad(buf_pool == buf_pool_from_bpage(dpage));
569
 
 
570
 
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
571
 
 
572
 
        buf_flush_list_mutex_enter(buf_pool);
573
 
 
574
 
        /* FIXME: At this point we have both buf_pool and flush_list
575
 
        mutexes. Theoretically removal of a block from flush list is
576
 
        only covered by flush_list mutex but currently we do
577
 
        have buf_pool mutex in buf_flush_remove() therefore this block
578
 
        is guaranteed to be in the flush list. We need to check if
579
 
        this will work without the assumption of block removing code
580
 
        having the buf_pool mutex. */
581
 
        ut_ad(bpage->in_flush_list);
582
 
        ut_ad(dpage->in_flush_list);
583
 
 
584
 
        /* If recovery is active we must swap the control blocks in
585
 
        the flush_rbt as well. */
586
 
        if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
587
 
                buf_flush_delete_from_flush_rbt(bpage);
588
 
                prev_b = buf_flush_insert_in_flush_rbt(dpage);
589
 
        }
590
 
 
591
 
        /* Must be done after we have removed it from the flush_rbt
592
 
        because we assert on in_flush_list in comparison function. */
593
 
        ut_d(bpage->in_flush_list = FALSE);
594
 
 
595
 
        prev = UT_LIST_GET_PREV(list, bpage);
596
 
        UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
597
 
 
598
 
        if (prev) {
599
 
                ut_ad(prev->in_flush_list);
600
 
                UT_LIST_INSERT_AFTER(
601
 
                        list,
602
 
                        buf_pool->flush_list,
603
 
                        prev, dpage);
604
 
        } else {
605
 
                UT_LIST_ADD_FIRST(
606
 
                        list,
607
 
                        buf_pool->flush_list,
608
 
                        dpage);
609
 
        }
610
 
 
611
 
        /* Just an extra check. Previous in flush_list
612
 
        should be the same control block as in flush_rbt. */
613
 
        ut_a(!buf_pool->flush_rbt || prev_b == prev);
614
 
 
615
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
616
 
        ut_a(buf_flush_validate_low(buf_pool));
617
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
618
 
 
619
 
        buf_flush_list_mutex_exit(buf_pool);
620
 
}
621
 
 
622
 
/********************************************************************//**
 
247
        ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list));
 
248
}
 
249
 
 
250
/************************************************************************
623
251
Updates the flush system data structures when a write is completed. */
624
252
UNIV_INTERN
625
253
void
626
254
buf_flush_write_complete(
627
255
/*=====================*/
628
 
        buf_page_t*     bpage)  /*!< in: pointer to the block in question */
 
256
        buf_page_t*     bpage)  /* in: pointer to the block in question */
629
257
{
630
258
        enum buf_flush  flush_type;
631
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
632
259
 
633
260
        ut_ad(bpage);
634
261
 
649
276
        /* fprintf(stderr, "n pending flush %lu\n",
650
277
        buf_pool->n_flush[flush_type]); */
651
278
 
652
 
        if (buf_pool->n_flush[flush_type] == 0
653
 
            && buf_pool->init_flush[flush_type] == FALSE) {
 
279
        if ((buf_pool->n_flush[flush_type] == 0)
 
280
            && (buf_pool->init_flush[flush_type] == FALSE)) {
654
281
 
655
282
                /* The running flush batch has ended */
656
283
 
658
285
        }
659
286
}
660
287
 
661
 
/********************************************************************//**
662
 
Flush a batch of writes to the datafiles that have already been
663
 
written by the OS. */
664
 
static
665
 
void
666
 
buf_flush_sync_datafiles(void)
667
 
/*==========================*/
668
 
{
669
 
        /* Wake possible simulated aio thread to actually post the
670
 
        writes to the operating system */
671
 
        os_aio_simulated_wake_handler_threads();
672
 
 
673
 
        /* Wait that all async writes to tablespaces have been posted to
674
 
        the OS */
675
 
        os_aio_wait_until_no_pending_writes();
676
 
 
677
 
        /* Now we flush the data to disk (for example, with fsync) */
678
 
        fil_flush_file_spaces(FIL_TABLESPACE);
679
 
 
680
 
        return;
681
 
}
682
 
 
683
 
/********************************************************************//**
 
288
/************************************************************************
684
289
Flushes possible buffered writes from the doublewrite memory buffer to disk,
685
290
and also wakes up the aio thread if simulated aio is used. It is very
686
291
important to call this function after a batch of writes has been posted,
697
302
        ulint           i;
698
303
 
699
304
        if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) {
700
 
                /* Sync the writes to the disk. */
701
 
                buf_flush_sync_datafiles();
 
305
                os_aio_simulated_wake_handler_threads();
 
306
 
702
307
                return;
703
308
        }
704
309
 
906
511
                buf_LRU_stat_inc_io();
907
512
        }
908
513
 
909
 
        /* Sync the writes to the disk. */
910
 
        buf_flush_sync_datafiles();
 
514
        /* Wake possible simulated aio thread to actually post the
 
515
        writes to the operating system */
 
516
 
 
517
        os_aio_simulated_wake_handler_threads();
 
518
 
 
519
        /* Wait that all async writes to tablespaces have been posted to
 
520
        the OS */
 
521
 
 
522
        os_aio_wait_until_no_pending_writes();
 
523
 
 
524
        /* Now we flush the data to disk (for example, with fsync) */
 
525
 
 
526
        fil_flush_file_spaces(FIL_TABLESPACE);
911
527
 
912
528
        /* We can now reuse the doublewrite memory buffer: */
 
529
 
913
530
        trx_doublewrite->first_free = 0;
914
531
 
915
532
        mutex_exit(&(trx_doublewrite->mutex));
916
533
}
917
534
 
918
 
/********************************************************************//**
 
535
/************************************************************************
919
536
Posts a buffer page for writing. If the doublewrite memory buffer is
920
537
full, calls buf_flush_buffered_writes and waits for for free space to
921
538
appear. */
923
540
void
924
541
buf_flush_post_to_doublewrite_buf(
925
542
/*==============================*/
926
 
        buf_page_t*     bpage)  /*!< in: buffer block to write */
 
543
        buf_page_t*     bpage)  /* in: buffer block to write */
927
544
{
928
545
        ulint   zip_size;
929
546
try_again:
943
560
        zip_size = buf_page_get_zip_size(bpage);
944
561
 
945
562
        if (UNIV_UNLIKELY(zip_size)) {
946
 
                UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size);
947
563
                /* Copy the compressed page and clear the rest. */
948
564
                memcpy(trx_doublewrite->write_buf
949
565
                       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
953
569
                       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
954
570
        } else {
955
571
                ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
956
 
                UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame,
957
 
                                   UNIV_PAGE_SIZE);
958
572
 
959
573
                memcpy(trx_doublewrite->write_buf
960
574
                       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
976
590
 
977
591
        mutex_exit(&(trx_doublewrite->mutex));
978
592
}
979
 
#endif /* !UNIV_HOTBACKUP */
980
593
 
981
 
/********************************************************************//**
 
594
/************************************************************************
982
595
Initializes a page for writing to the tablespace. */
983
596
UNIV_INTERN
984
597
void
985
598
buf_flush_init_for_writing(
986
599
/*=======================*/
987
 
        byte*           page,           /*!< in/out: page */
988
 
        void*           page_zip_,      /*!< in/out: compressed page, or NULL */
989
 
        ib_uint64_t     newest_lsn)     /*!< in: newest modification lsn
 
600
        byte*           page,           /* in/out: page */
 
601
        void*           page_zip_,      /* in/out: compressed page, or NULL */
 
602
        ib_uint64_t     newest_lsn)     /* in: newest modification lsn
990
603
                                        to the page */
991
604
{
992
605
        ut_ad(page);
993
606
 
994
607
        if (page_zip_) {
995
 
                page_zip_des_t* page_zip = static_cast<page_zip_des_t *>(page_zip_);
 
608
                page_zip_des_t* page_zip = page_zip_;
996
609
                ulint           zip_size = page_zip_get_size(page_zip);
997
610
                ut_ad(zip_size);
998
611
                ut_ad(ut_is_2pow(zip_size));
1010
623
                case FIL_PAGE_TYPE_ZBLOB:
1011
624
                case FIL_PAGE_TYPE_ZBLOB2:
1012
625
                case FIL_PAGE_INDEX:
1013
 
                        mach_write_to_8(page_zip->data
1014
 
                                        + FIL_PAGE_LSN, newest_lsn);
 
626
                        mach_write_ull(page_zip->data
 
627
                                       + FIL_PAGE_LSN, newest_lsn);
1015
628
                        memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
1016
629
                        mach_write_to_4(page_zip->data
1017
630
                                        + FIL_PAGE_SPACE_OR_CHKSUM,
1033
646
        }
1034
647
 
1035
648
        /* Write the newest modification lsn to the page header and trailer */
1036
 
        mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
 
649
        mach_write_ull(page + FIL_PAGE_LSN, newest_lsn);
1037
650
 
1038
 
        mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
1039
 
                        newest_lsn);
 
651
        mach_write_ull(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
 
652
                       newest_lsn);
1040
653
 
1041
654
        /* Store the new formula checksum */
1042
655
 
1056
669
                        : BUF_NO_CHECKSUM_MAGIC);
1057
670
}
1058
671
 
1059
 
#ifndef UNIV_HOTBACKUP
1060
 
/********************************************************************//**
 
672
/************************************************************************
1061
673
Does an asynchronous write of a buffer page. NOTE: in simulated aio and
1062
674
also when the doublewrite buffer is used, we must call
1063
675
buf_flush_buffered_writes after we have posted a batch of writes! */
1065
677
void
1066
678
buf_flush_write_block_low(
1067
679
/*======================*/
1068
 
        buf_page_t*     bpage)  /*!< in: buffer block to write */
 
680
        buf_page_t*     bpage)  /* in: buffer block to write */
1069
681
{
1070
682
        ulint   zip_size        = buf_page_get_zip_size(bpage);
1071
683
        page_t* frame           = NULL;
1072
 
 
1073
 
#ifdef UNIV_DEBUG
1074
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1075
 
        ut_ad(!buf_pool_mutex_own(buf_pool));
1076
 
#endif
1077
 
 
1078
684
#ifdef UNIV_LOG_DEBUG
1079
685
        static ibool univ_log_debug_warned;
1080
686
#endif /* UNIV_LOG_DEBUG */
1081
687
 
1082
688
        ut_ad(buf_page_in_file(bpage));
1083
689
 
1084
 
        /* We are not holding buf_pool->mutex or block_mutex here.
1085
 
        Nevertheless, it is safe to access bpage, because it is
1086
 
        io_fixed and oldest_modification != 0.  Thus, it cannot be
1087
 
        relocated in the buffer pool or removed from flush_list or
1088
 
        LRU_list. */
1089
 
        ut_ad(!buf_pool_mutex_own(buf_pool));
1090
 
        ut_ad(!buf_flush_list_mutex_own(buf_pool));
1091
 
        ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
1092
 
        ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
1093
 
        ut_ad(bpage->oldest_modification != 0);
1094
 
 
1095
690
#ifdef UNIV_IBUF_COUNT_DEBUG
1096
691
        ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
1097
692
#endif
1124
719
                        ut_a(mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
1125
720
                             == page_zip_calc_checksum(frame, zip_size));
1126
721
                }
1127
 
                mach_write_to_8(frame + FIL_PAGE_LSN,
1128
 
                                bpage->newest_modification);
 
722
                mach_write_ull(frame + FIL_PAGE_LSN,
 
723
                               bpage->newest_modification);
1129
724
                memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
1130
725
                break;
1131
726
        case BUF_BLOCK_FILE_PAGE:
1152
747
        }
1153
748
}
1154
749
 
1155
 
# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
1156
 
/********************************************************************//**
1157
 
Writes a flushable page asynchronously from the buffer pool to a file.
1158
 
NOTE: buf_pool->mutex and block->mutex must be held upon entering this
1159
 
function, and they will be released by this function after flushing.
1160
 
This is loosely based on buf_flush_batch() and buf_flush_page().
1161
 
@return TRUE if the page was flushed and the mutexes released */
1162
 
UNIV_INTERN
1163
 
ibool
1164
 
buf_flush_page_try(
 
750
/************************************************************************
 
751
Writes a page asynchronously from the buffer buf_pool to a file, if it can be
 
752
found in the buf_pool and it is in a flushable state. NOTE: in simulated aio
 
753
we must call os_aio_simulated_wake_handler_threads after we have posted a batch
 
754
of writes! */
 
755
static
 
756
ulint
 
757
buf_flush_try_page(
1165
758
/*===============*/
1166
 
        buf_pool_t*     buf_pool,       /*!< in/out: buffer pool instance */
1167
 
        buf_block_t*    block)          /*!< in/out: buffer control block */
1168
 
{
1169
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1170
 
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1171
 
        ut_ad(mutex_own(&block->mutex));
1172
 
 
1173
 
        if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_LRU)) {
1174
 
                return(FALSE);
1175
 
        }
1176
 
 
1177
 
        if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0
1178
 
            || buf_pool->init_flush[BUF_FLUSH_LRU]) {
1179
 
                /* There is already a flush batch of the same type running */
1180
 
                return(FALSE);
1181
 
        }
1182
 
 
1183
 
        buf_pool->init_flush[BUF_FLUSH_LRU] = TRUE;
1184
 
 
1185
 
        buf_page_set_io_fix(&block->page, BUF_IO_WRITE);
1186
 
 
1187
 
        buf_page_set_flush_type(&block->page, BUF_FLUSH_LRU);
1188
 
 
1189
 
        if (buf_pool->n_flush[BUF_FLUSH_LRU]++ == 0) {
1190
 
 
1191
 
                os_event_reset(buf_pool->no_flush[BUF_FLUSH_LRU]);
1192
 
        }
1193
 
 
1194
 
        /* VERY IMPORTANT:
1195
 
        Because any thread may call the LRU flush, even when owning
1196
 
        locks on pages, to avoid deadlocks, we must make sure that the
1197
 
        s-lock is acquired on the page without waiting: this is
1198
 
        accomplished because buf_flush_ready_for_flush() must hold,
1199
 
        and that requires the page not to be bufferfixed. */
1200
 
 
1201
 
        rw_lock_s_lock_gen(&block->lock, BUF_IO_WRITE);
1202
 
 
1203
 
        /* Note that the s-latch is acquired before releasing the
1204
 
        buf_pool mutex: this ensures that the latch is acquired
1205
 
        immediately. */
1206
 
 
1207
 
        mutex_exit(&block->mutex);
1208
 
        buf_pool_mutex_exit(buf_pool);
1209
 
 
1210
 
        /* Even though block is not protected by any mutex at this
1211
 
        point, it is safe to access block, because it is io_fixed and
1212
 
        oldest_modification != 0.  Thus, it cannot be relocated in the
1213
 
        buffer pool or removed from flush_list or LRU_list. */
1214
 
 
1215
 
        buf_flush_write_block_low(&block->page);
1216
 
 
1217
 
        buf_pool_mutex_enter(buf_pool);
1218
 
        buf_pool->init_flush[BUF_FLUSH_LRU] = FALSE;
1219
 
 
1220
 
        if (buf_pool->n_flush[BUF_FLUSH_LRU] == 0) {
1221
 
                /* The running flush batch has ended */
1222
 
                os_event_set(buf_pool->no_flush[BUF_FLUSH_LRU]);
1223
 
        }
1224
 
 
1225
 
        buf_pool_mutex_exit(buf_pool);
1226
 
        buf_flush_buffered_writes();
1227
 
 
1228
 
        return(TRUE);
1229
 
}
1230
 
# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
1231
 
 
1232
 
/********************************************************************//**
1233
 
Writes a flushable page asynchronously from the buffer pool to a file.
1234
 
NOTE: in simulated aio we must call
1235
 
os_aio_simulated_wake_handler_threads after we have posted a batch of
1236
 
writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
1237
 
held upon entering this function, and they will be released by this
1238
 
function. */
1239
 
static
1240
 
void
1241
 
buf_flush_page(
1242
 
/*===========*/
1243
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1244
 
        buf_page_t*     bpage,          /*!< in: buffer control block */
1245
 
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU
1246
 
                                        or BUF_FLUSH_LIST */
1247
 
{
 
759
                                        /* out: 1 if a page was
 
760
                                        flushed, 0 otherwise */
 
761
        ulint           space,          /* in: space id */
 
762
        ulint           offset,         /* in: page offset */
 
763
        enum buf_flush  flush_type)     /* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST,
 
764
                                        or BUF_FLUSH_SINGLE_PAGE */
 
765
{
 
766
        buf_page_t*     bpage;
1248
767
        mutex_t*        block_mutex;
1249
 
        ibool           is_uncompressed;
1250
 
 
1251
 
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1252
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1253
 
        ut_ad(buf_page_in_file(bpage));
1254
 
 
 
768
        ibool           locked;
 
769
 
 
770
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
 
771
              || flush_type == BUF_FLUSH_SINGLE_PAGE);
 
772
 
 
773
        buf_pool_mutex_enter();
 
774
 
 
775
        bpage = buf_page_hash_get(space, offset);
 
776
 
 
777
        if (!bpage) {
 
778
                buf_pool_mutex_exit();
 
779
                return(0);
 
780
        }
 
781
 
 
782
        ut_a(buf_page_in_file(bpage));
1255
783
        block_mutex = buf_page_get_mutex(bpage);
1256
 
        ut_ad(mutex_own(block_mutex));
1257
 
 
1258
 
        ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
1259
 
 
1260
 
        buf_page_set_io_fix(bpage, BUF_IO_WRITE);
1261
 
 
1262
 
        buf_page_set_flush_type(bpage, flush_type);
1263
 
 
1264
 
        if (buf_pool->n_flush[flush_type] == 0) {
1265
 
 
1266
 
                os_event_reset(buf_pool->no_flush[flush_type]);
 
784
 
 
785
        mutex_enter(block_mutex);
 
786
 
 
787
        if (!buf_flush_ready_for_flush(bpage, flush_type)) {
 
788
                mutex_exit(block_mutex);
 
789
                buf_pool_mutex_exit();
 
790
                return(0);
1267
791
        }
1268
792
 
1269
 
        buf_pool->n_flush[flush_type]++;
1270
 
 
1271
 
        is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
1272
 
        ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
1273
 
 
1274
793
        switch (flush_type) {
1275
 
                ibool   is_s_latched;
1276
794
        case BUF_FLUSH_LIST:
 
795
                buf_page_set_io_fix(bpage, BUF_IO_WRITE);
 
796
 
 
797
                buf_page_set_flush_type(bpage, flush_type);
 
798
 
 
799
                if (buf_pool->n_flush[flush_type] == 0) {
 
800
 
 
801
                        os_event_reset(buf_pool->no_flush[flush_type]);
 
802
                }
 
803
 
 
804
                buf_pool->n_flush[flush_type]++;
 
805
 
1277
806
                /* If the simulated aio thread is not running, we must
1278
807
                not wait for any latch, as we may end up in a deadlock:
1279
808
                if buf_fix_count == 0, then we know we need not wait */
1280
809
 
1281
 
                is_s_latched = (bpage->buf_fix_count == 0);
1282
 
                if (is_s_latched && is_uncompressed) {
 
810
                locked = bpage->buf_fix_count == 0;
 
811
                if (locked
 
812
                    && buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
1283
813
                        rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
1284
814
                                           BUF_IO_WRITE);
1285
815
                }
1286
816
 
1287
817
                mutex_exit(block_mutex);
1288
 
                buf_pool_mutex_exit(buf_pool);
1289
 
 
1290
 
                /* Even though bpage is not protected by any mutex at
1291
 
                this point, it is safe to access bpage, because it is
1292
 
                io_fixed and oldest_modification != 0.  Thus, it
1293
 
                cannot be relocated in the buffer pool or removed from
1294
 
                flush_list or LRU_list. */
1295
 
 
1296
 
                if (!is_s_latched) {
 
818
                buf_pool_mutex_exit();
 
819
 
 
820
                if (!locked) {
1297
821
                        buf_flush_buffered_writes();
1298
822
 
1299
 
                        if (is_uncompressed) {
 
823
                        if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
1300
824
                                rw_lock_s_lock_gen(&((buf_block_t*) bpage)
1301
825
                                                   ->lock, BUF_IO_WRITE);
1302
826
                        }
1309
833
                Because any thread may call the LRU flush, even when owning
1310
834
                locks on pages, to avoid deadlocks, we must make sure that the
1311
835
                s-lock is acquired on the page without waiting: this is
1312
 
                accomplished because buf_flush_ready_for_flush() must hold,
1313
 
                and that requires the page not to be bufferfixed. */
1314
 
 
1315
 
                if (is_uncompressed) {
 
836
                accomplished because in the if-condition above we require
 
837
                the page not to be bufferfixed (in function
 
838
                ..._ready_for_flush). */
 
839
 
 
840
                buf_page_set_io_fix(bpage, BUF_IO_WRITE);
 
841
 
 
842
                buf_page_set_flush_type(bpage, flush_type);
 
843
 
 
844
                if (buf_pool->n_flush[flush_type] == 0) {
 
845
 
 
846
                        os_event_reset(buf_pool->no_flush[flush_type]);
 
847
                }
 
848
 
 
849
                buf_pool->n_flush[flush_type]++;
 
850
 
 
851
                if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
1316
852
                        rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
1317
853
                                           BUF_IO_WRITE);
1318
854
                }
1322
858
                immediately. */
1323
859
 
1324
860
                mutex_exit(block_mutex);
1325
 
                buf_pool_mutex_exit(buf_pool);
 
861
                buf_pool_mutex_exit();
 
862
                break;
 
863
 
 
864
        case BUF_FLUSH_SINGLE_PAGE:
 
865
                buf_page_set_io_fix(bpage, BUF_IO_WRITE);
 
866
 
 
867
                buf_page_set_flush_type(bpage, flush_type);
 
868
 
 
869
                if (buf_pool->n_flush[flush_type] == 0) {
 
870
 
 
871
                        os_event_reset(buf_pool->no_flush[flush_type]);
 
872
                }
 
873
 
 
874
                buf_pool->n_flush[flush_type]++;
 
875
 
 
876
                mutex_exit(block_mutex);
 
877
                buf_pool_mutex_exit();
 
878
 
 
879
                if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
 
880
                        rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
 
881
                                           BUF_IO_WRITE);
 
882
                }
1326
883
                break;
1327
884
 
1328
885
        default:
1329
886
                ut_error;
1330
887
        }
1331
888
 
1332
 
        /* Even though bpage is not protected by any mutex at this
1333
 
        point, it is safe to access bpage, because it is io_fixed and
1334
 
        oldest_modification != 0.  Thus, it cannot be relocated in the
1335
 
        buffer pool or removed from flush_list or LRU_list. */
1336
 
 
1337
889
#ifdef UNIV_DEBUG
1338
890
        if (buf_debug_prints) {
1339
891
                fprintf(stderr,
1342
894
        }
1343
895
#endif /* UNIV_DEBUG */
1344
896
        buf_flush_write_block_low(bpage);
 
897
 
 
898
        return(1);
1345
899
}
1346
900
 
1347
 
/***********************************************************//**
1348
 
Flushes to disk all flushable pages within the flush area.
1349
 
@return number of pages flushed */
 
901
/***************************************************************
 
902
Flushes to disk all flushable pages within the flush area. */
1350
903
static
1351
904
ulint
1352
905
buf_flush_try_neighbors(
1353
906
/*====================*/
1354
 
        ulint           space,          /*!< in: space id */
1355
 
        ulint           offset,         /*!< in: page offset */
1356
 
        enum buf_flush  flush_type,     /*!< in: BUF_FLUSH_LRU or
 
907
                                        /* out: number of pages flushed */
 
908
        ulint           space,          /* in: space id */
 
909
        ulint           offset,         /* in: page offset */
 
910
        enum buf_flush  flush_type)     /* in: BUF_FLUSH_LRU or
1357
911
                                        BUF_FLUSH_LIST */
1358
 
        ulint           n_flushed,      /*!< in: number of pages
1359
 
                                        flushed so far in this batch */
1360
 
        ulint           n_to_flush)     /*!< in: maximum number of pages
1361
 
                                        we are allowed to flush */
1362
912
{
 
913
        buf_page_t*     bpage;
 
914
        ulint           low, high;
 
915
        ulint           count           = 0;
1363
916
        ulint           i;
1364
 
        ulint           low;
1365
 
        ulint           high;
1366
 
        ulint           count = 0;
1367
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1368
917
 
1369
918
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1370
919
 
1371
920
        if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
1372
 
                /* If there is little space, it is better not to flush
1373
 
                any block except from the end of the LRU list */
 
921
                /* If there is little space, it is better not to flush any
 
922
                block except from the end of the LRU list */
1374
923
 
1375
924
                low = offset;
1376
925
                high = offset + 1;
1377
926
        } else {
1378
 
                /* When flushed, dirty blocks are searched in
1379
 
                neighborhoods of this size, and flushed along with the
1380
 
                original page. */
 
927
                /* When flushed, dirty blocks are searched in neighborhoods of
 
928
                this size, and flushed along with the original page. */
1381
929
 
1382
 
                ulint   buf_flush_area;
1383
 
        
1384
 
                buf_flush_area  = ut_min(
1385
 
                        BUF_READ_AHEAD_AREA(buf_pool),
1386
 
                        buf_pool->curr_size / 16);
 
930
                ulint   buf_flush_area  = ut_min(BUF_READ_AHEAD_AREA,
 
931
                                                 buf_pool->curr_size / 16);
1387
932
 
1388
933
                low = (offset / buf_flush_area) * buf_flush_area;
1389
934
                high = (offset / buf_flush_area + 1) * buf_flush_area;
1395
940
                high = fil_space_get_size(space);
1396
941
        }
1397
942
 
 
943
        buf_pool_mutex_enter();
 
944
 
1398
945
        for (i = low; i < high; i++) {
1399
946
 
1400
 
                buf_page_t*     bpage;
1401
 
 
1402
 
                if ((count + n_flushed) >= n_to_flush) {
1403
 
 
1404
 
                        /* We have already flushed enough pages and
1405
 
                        should call it a day. There is, however, one
1406
 
                        exception. If the page whose neighbors we
1407
 
                        are flushing has not been flushed yet then
1408
 
                        we'll try to flush the victim that we
1409
 
                        selected originally. */
1410
 
                        if (i <= offset) {
1411
 
                                i = offset;
1412
 
                        } else {
1413
 
                                break;
1414
 
                        }
1415
 
                }
1416
 
 
1417
 
                buf_pool = buf_pool_get(space, i);
1418
 
 
1419
 
                buf_pool_mutex_enter(buf_pool);
1420
 
 
1421
 
                /* We only want to flush pages from this buffer pool. */
1422
 
                bpage = buf_page_hash_get(buf_pool, space, i);
 
947
                bpage = buf_page_hash_get(space, i);
 
948
                ut_a(!bpage || buf_page_in_file(bpage));
1423
949
 
1424
950
                if (!bpage) {
1425
951
 
1426
 
                        buf_pool_mutex_exit(buf_pool);
1427
 
                        continue;
1428
 
                }
1429
 
 
1430
 
                ut_a(buf_page_in_file(bpage));
1431
 
 
1432
 
                /* We avoid flushing 'non-old' blocks in an LRU flush,
1433
 
                because the flushed blocks are soon freed */
1434
 
 
1435
 
                if (flush_type != BUF_FLUSH_LRU
1436
 
                    || i == offset
1437
 
                    || buf_page_is_old(bpage)) {
 
952
                        continue;
 
953
 
 
954
                } else if (flush_type == BUF_FLUSH_LRU && i != offset
 
955
                           && !buf_page_is_old(bpage)) {
 
956
 
 
957
                        /* We avoid flushing 'non-old' blocks in an LRU flush,
 
958
                        because the flushed blocks are soon freed */
 
959
 
 
960
                        continue;
 
961
                } else {
 
962
 
1438
963
                        mutex_t* block_mutex = buf_page_get_mutex(bpage);
1439
964
 
1440
965
                        mutex_enter(block_mutex);
1442
967
                        if (buf_flush_ready_for_flush(bpage, flush_type)
1443
968
                            && (i == offset || !bpage->buf_fix_count)) {
1444
969
                                /* We only try to flush those
1445
 
                                neighbors != offset where the buf fix
1446
 
                                count is zero, as we then know that we
1447
 
                                probably can latch the page without a
1448
 
                                semaphore wait. Semaphore waits are
1449
 
                                expensive because we must flush the
1450
 
                                doublewrite buffer before we start
 
970
                                neighbors != offset where the buf fix count is
 
971
                                zero, as we then know that we probably can
 
972
                                latch the page without a semaphore wait.
 
973
                                Semaphore waits are expensive because we must
 
974
                                flush the doublewrite buffer before we start
1451
975
                                waiting. */
1452
976
 
1453
 
                                buf_flush_page(buf_pool, bpage, flush_type);
1454
 
                                ut_ad(!mutex_own(block_mutex));
1455
 
                                ut_ad(!buf_pool_mutex_own(buf_pool));
1456
 
                                count++;
1457
 
                                continue;
 
977
                                buf_pool_mutex_exit();
 
978
 
 
979
                                mutex_exit(block_mutex);
 
980
 
 
981
                                /* Note: as we release the buf_pool mutex
 
982
                                above, in buf_flush_try_page we cannot be sure
 
983
                                the page is still in a flushable state:
 
984
                                therefore we check it again inside that
 
985
                                function. */
 
986
 
 
987
                                count += buf_flush_try_page(space, i,
 
988
                                                            flush_type);
 
989
 
 
990
                                buf_pool_mutex_enter();
1458
991
                        } else {
1459
992
                                mutex_exit(block_mutex);
1460
993
                        }
1461
994
                }
1462
 
                buf_pool_mutex_exit(buf_pool);
1463
 
        }
1464
 
 
1465
 
        return(count);
1466
 
}
1467
 
 
1468
 
/********************************************************************//**
1469
 
Check if the block is modified and ready for flushing. If the the block
1470
 
is ready to flush then flush the page and try o flush its neighbors.
1471
 
 
1472
 
@return TRUE if buf_pool mutex was not released during this function.
1473
 
This does not guarantee that some pages were written as well.
1474
 
Number of pages written are incremented to the count. */
1475
 
static
1476
 
ibool
1477
 
buf_flush_page_and_try_neighbors(
1478
 
/*=============================*/
1479
 
        buf_page_t*     bpage,          /*!< in: buffer control block,
1480
 
                                        must be
1481
 
                                        buf_page_in_file(bpage) */
1482
 
        enum buf_flush  flush_type,     /*!< in: BUF_FLUSH_LRU
1483
 
                                        or BUF_FLUSH_LIST */
1484
 
        ulint           n_to_flush,     /*!< in: number of pages to
1485
 
                                        flush */
1486
 
        ulint*          count)          /*!< in/out: number of pages
1487
 
                                        flushed */
1488
 
{
1489
 
        mutex_t*        block_mutex;
1490
 
        ibool           flushed = FALSE;
1491
 
#ifdef UNIV_DEBUG
1492
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1493
 
#endif /* UNIV_DEBUG */
1494
 
 
1495
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1496
 
 
1497
 
        block_mutex = buf_page_get_mutex(bpage);
1498
 
        mutex_enter(block_mutex);
1499
 
 
1500
 
        ut_a(buf_page_in_file(bpage));
1501
 
 
1502
 
        if (buf_flush_ready_for_flush(bpage, flush_type)) {
1503
 
                ulint           space;
1504
 
                ulint           offset;
1505
 
                buf_pool_t*     buf_pool;
1506
 
 
1507
 
                buf_pool = buf_pool_from_bpage(bpage);
1508
 
 
1509
 
                buf_pool_mutex_exit(buf_pool);
1510
 
 
1511
 
                /* These fields are protected by both the
1512
 
                buffer pool mutex and block mutex. */
1513
 
                space = buf_page_get_space(bpage);
1514
 
                offset = buf_page_get_page_no(bpage);
1515
 
 
1516
 
                mutex_exit(block_mutex);
1517
 
 
1518
 
                /* Try to flush also all the neighbors */
1519
 
                *count += buf_flush_try_neighbors(space,
1520
 
                                                  offset,
1521
 
                                                  flush_type,
1522
 
                                                  *count,
1523
 
                                                  n_to_flush);
1524
 
 
1525
 
                buf_pool_mutex_enter(buf_pool);
1526
 
                flushed = TRUE;
1527
 
        } else {
1528
 
                mutex_exit(block_mutex);
1529
 
        }
1530
 
 
1531
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1532
 
 
1533
 
        return(flushed);
1534
 
}
1535
 
 
1536
 
/*******************************************************************//**
1537
 
This utility flushes dirty blocks from the end of the LRU list.
1538
 
In the case of an LRU flush the calling thread may own latches to
1539
 
pages: to avoid deadlocks, this function must be written so that it
1540
 
cannot end up waiting for these latches!
1541
 
@return number of blocks for which the write request was queued. */
1542
 
static
1543
 
ulint
1544
 
buf_flush_LRU_list_batch(
1545
 
/*=====================*/
1546
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1547
 
        ulint           max)            /*!< in: max of blocks to flush */
1548
 
{
1549
 
        buf_page_t*     bpage;
1550
 
        ulint           count = 0;
1551
 
 
1552
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1553
 
 
1554
 
        do {
1555
 
                /* Start from the end of the list looking for a
1556
 
                suitable block to be flushed. */
1557
 
                bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1558
 
 
1559
 
                /* Iterate backwards over the flush list till we find
1560
 
                a page that isn't ready for flushing. */
1561
 
                while (bpage != NULL
1562
 
                       && !buf_flush_page_and_try_neighbors(
1563
 
                                bpage, BUF_FLUSH_LRU, max, &count)) {
1564
 
 
1565
 
                        bpage = UT_LIST_GET_PREV(LRU, bpage);
1566
 
                }
1567
 
        } while (bpage != NULL && count < max);
1568
 
 
1569
 
        /* We keep track of all flushes happening as part of LRU
1570
 
        flush. When estimating the desired rate at which flush_list
1571
 
        should be flushed, we factor in this value. */
1572
 
        buf_lru_flush_page_count += count;
1573
 
 
1574
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1575
 
 
1576
 
        return(count);
1577
 
}
1578
 
 
1579
 
/*******************************************************************//**
1580
 
This utility flushes dirty blocks from the end of the flush_list.
1581
 
the calling thread is not allowed to own any latches on pages!
1582
 
@return number of blocks for which the write request was queued;
1583
 
ULINT_UNDEFINED if there was a flush of the same type already
1584
 
running */
1585
 
static
1586
 
ulint
1587
 
buf_flush_flush_list_batch(
1588
 
/*=======================*/
1589
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1590
 
        ulint           min_n,          /*!< in: wished minimum mumber
1591
 
                                        of blocks flushed (it is not
1592
 
                                        guaranteed that the actual
1593
 
                                        number is that big, though) */
1594
 
        ib_uint64_t     lsn_limit)      /*!< all blocks whose
1595
 
                                        oldest_modification is smaller
1596
 
                                        than this should be flushed (if
1597
 
                                        their number does not exceed
1598
 
                                        min_n) */
1599
 
{
1600
 
        ulint           len;
1601
 
        buf_page_t*     bpage;
1602
 
        ulint           count = 0;
1603
 
 
1604
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1605
 
 
1606
 
        /* If we have flushed enough, leave the loop */
1607
 
        do {
1608
 
                /* Start from the end of the list looking for a suitable
1609
 
                block to be flushed. */
1610
 
 
1611
 
                buf_flush_list_mutex_enter(buf_pool);
1612
 
 
1613
 
                /* We use len here because theoretically insertions can
1614
 
                happen in the flush_list below while we are traversing
1615
 
                it for a suitable candidate for flushing. We'd like to
1616
 
                set a limit on how farther we are willing to traverse
1617
 
                the list. */
1618
 
                len = UT_LIST_GET_LEN(buf_pool->flush_list);
1619
 
                bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1620
 
 
1621
 
                if (bpage) {
1622
 
                        ut_a(bpage->oldest_modification > 0);
1623
 
                }
1624
 
 
1625
 
                if (!bpage || bpage->oldest_modification >= lsn_limit) {
1626
 
 
1627
 
                        /* We have flushed enough */
1628
 
                        buf_flush_list_mutex_exit(buf_pool);
1629
 
                        break;
1630
 
                }
1631
 
 
1632
 
                ut_a(bpage->oldest_modification > 0);
1633
 
 
1634
 
                ut_ad(bpage->in_flush_list);
1635
 
 
1636
 
                buf_flush_list_mutex_exit(buf_pool);
1637
 
 
1638
 
                /* The list may change during the flushing and we cannot
1639
 
                safely preserve within this function a pointer to a
1640
 
                block in the list! */
1641
 
                while (bpage != NULL
1642
 
                       && len > 0
1643
 
                       && !buf_flush_page_and_try_neighbors(
1644
 
                                bpage, BUF_FLUSH_LIST, min_n, &count)) {
1645
 
 
1646
 
                        buf_flush_list_mutex_enter(buf_pool);
1647
 
 
1648
 
                        /* If we are here that means that buf_pool->mutex
1649
 
                         was not released in buf_flush_page_and_try_neighbors()
1650
 
                        above and this guarantees that bpage didn't get
1651
 
                        relocated since we released the flush_list
1652
 
                        mutex above. There is a chance, however, that
1653
 
                        the bpage got removed from flush_list (not
1654
 
                        currently possible because flush_list_remove()
1655
 
                        also obtains buf_pool mutex but that may change
1656
 
                        in future). To avoid this scenario we check
1657
 
                        the oldest_modification and if it is zero
1658
 
                        we start all over again. */
1659
 
                        if (bpage->oldest_modification == 0) {
1660
 
                                buf_flush_list_mutex_exit(buf_pool);
1661
 
                                break;
1662
 
                        }
1663
 
 
1664
 
                        bpage = UT_LIST_GET_PREV(list, bpage);
1665
 
 
1666
 
                        ut_ad(!bpage || bpage->in_flush_list);
1667
 
 
1668
 
                        buf_flush_list_mutex_exit(buf_pool);
1669
 
 
1670
 
                        --len;
1671
 
                }
1672
 
 
1673
 
        } while (count < min_n && bpage != NULL && len > 0);
1674
 
 
1675
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1676
 
 
1677
 
        return(count);
1678
 
}
1679
 
 
1680
 
/*******************************************************************//**
 
995
        }
 
996
 
 
997
        buf_pool_mutex_exit();
 
998
 
 
999
        return(count);
 
1000
}
 
1001
 
 
1002
/***********************************************************************
1681
1003
This utility flushes dirty blocks from the end of the LRU list or flush_list.
1682
1004
NOTE 1: in the case of an LRU flush the calling thread may own latches to
1683
1005
pages: to avoid deadlocks, this function must be written so that it cannot
1684
1006
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
1685
 
the calling thread is not allowed to own any latches on pages!
1686
 
@return number of blocks for which the write request was queued;
1687
 
ULINT_UNDEFINED if there was a flush of the same type already running */
1688
 
static
 
1007
the calling thread is not allowed to own any latches on pages! */
 
1008
UNIV_INTERN
1689
1009
ulint
1690
1010
buf_flush_batch(
1691
1011
/*============*/
1692
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1693
 
        enum buf_flush  flush_type,     /*!< in: BUF_FLUSH_LRU or
 
1012
                                        /* out: number of blocks for which the
 
1013
                                        write request was queued;
 
1014
                                        ULINT_UNDEFINED if there was a flush
 
1015
                                        of the same type already running */
 
1016
        enum buf_flush  flush_type,     /* in: BUF_FLUSH_LRU or
1694
1017
                                        BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
1695
1018
                                        then the caller must not own any
1696
1019
                                        latches on pages */
1697
 
        ulint           min_n,          /*!< in: wished minimum mumber of blocks
 
1020
        ulint           min_n,          /* in: wished minimum mumber of blocks
1698
1021
                                        flushed (it is not guaranteed that the
1699
1022
                                        actual number is that big, though) */
1700
 
        ib_uint64_t     lsn_limit)      /*!< in: in the case of BUF_FLUSH_LIST
1701
 
                                        all blocks whose oldest_modification is
 
1023
        ib_uint64_t     lsn_limit)      /* in the case BUF_FLUSH_LIST all
 
1024
                                        blocks whose oldest_modification is
1702
1025
                                        smaller than this should be flushed
1703
1026
                                        (if their number does not exceed
1704
1027
                                        min_n), otherwise ignored */
1705
1028
{
1706
 
        ulint           count   = 0;
 
1029
        buf_page_t*     bpage;
 
1030
        ulint           page_count      = 0;
 
1031
        ulint           old_page_count;
 
1032
        ulint           space;
 
1033
        ulint           offset;
1707
1034
 
1708
 
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
 
1035
        ut_ad((flush_type == BUF_FLUSH_LRU)
 
1036
              || (flush_type == BUF_FLUSH_LIST));
1709
1037
#ifdef UNIV_SYNC_DEBUG
1710
1038
        ut_ad((flush_type != BUF_FLUSH_LIST)
1711
1039
              || sync_thread_levels_empty_gen(TRUE));
1712
1040
#endif /* UNIV_SYNC_DEBUG */
1713
 
 
1714
 
        buf_pool_mutex_enter(buf_pool);
1715
 
 
1716
 
        /* Note: The buffer pool mutex is released and reacquired within
1717
 
        the flush functions. */
1718
 
        switch(flush_type) {
1719
 
        case BUF_FLUSH_LRU:
1720
 
                count = buf_flush_LRU_list_batch(buf_pool, min_n);
1721
 
                break;
1722
 
        case BUF_FLUSH_LIST:
1723
 
                count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
1724
 
                break;
1725
 
        default:
1726
 
                ut_error;
1727
 
        }
1728
 
 
1729
 
        buf_pool_mutex_exit(buf_pool);
1730
 
 
1731
 
        buf_flush_buffered_writes();
1732
 
 
1733
 
#ifdef UNIV_DEBUG
1734
 
        if (buf_debug_prints && count > 0) {
1735
 
                fprintf(stderr, flush_type == BUF_FLUSH_LRU
1736
 
                        ? "Flushed %lu pages in LRU flush\n"
1737
 
                        : "Flushed %lu pages in flush list flush\n",
1738
 
                        (ulong) count);
1739
 
        }
1740
 
#endif /* UNIV_DEBUG */
1741
 
 
1742
 
        srv_buf_pool_flushed += count;
1743
 
 
1744
 
        return(count);
1745
 
}
1746
 
 
1747
 
/******************************************************************//**
1748
 
Gather the aggregated stats for both flush list and LRU list flushing */
1749
 
static
1750
 
void
1751
 
buf_flush_common(
1752
 
/*=============*/
1753
 
        enum buf_flush  flush_type,     /*!< in: type of flush */
1754
 
        ulint           page_count)     /*!< in: number of pages flushed */
1755
 
{
1756
 
        buf_flush_buffered_writes();
1757
 
 
1758
 
        ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
 
1041
        buf_pool_mutex_enter();
 
1042
 
 
1043
        if ((buf_pool->n_flush[flush_type] > 0)
 
1044
            || (buf_pool->init_flush[flush_type] == TRUE)) {
 
1045
 
 
1046
                /* There is already a flush batch of the same type running */
 
1047
 
 
1048
                buf_pool_mutex_exit();
 
1049
 
 
1050
                return(ULINT_UNDEFINED);
 
1051
        }
 
1052
 
 
1053
        buf_pool->init_flush[flush_type] = TRUE;
 
1054
 
 
1055
        bool done_with_loop= false;
 
1056
        for (;done_with_loop != true;) {
 
1057
flush_next:
 
1058
                /* If we have flushed enough, leave the loop */
 
1059
                if (page_count >= min_n) {
 
1060
 
 
1061
                        break;
 
1062
                }
 
1063
 
 
1064
                /* Start from the end of the list looking for a suitable
 
1065
                block to be flushed. */
 
1066
 
 
1067
                if (flush_type == BUF_FLUSH_LRU) {
 
1068
                        bpage = UT_LIST_GET_LAST(buf_pool->LRU);
 
1069
                } else {
 
1070
                        ut_ad(flush_type == BUF_FLUSH_LIST);
 
1071
 
 
1072
                        bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
 
1073
                        if (!bpage
 
1074
                            || bpage->oldest_modification >= lsn_limit) {
 
1075
                                /* We have flushed enough */
 
1076
 
 
1077
                                break;
 
1078
                        }
 
1079
                        ut_ad(bpage->in_flush_list);
 
1080
                }
 
1081
 
 
1082
                /* Note that after finding a single flushable page, we try to
 
1083
                flush also all its neighbors, and after that start from the
 
1084
                END of the LRU list or flush list again: the list may change
 
1085
                during the flushing and we cannot safely preserve within this
 
1086
                function a pointer to a block in the list! */
 
1087
 
 
1088
                do {
 
1089
                        mutex_t* block_mutex = buf_page_get_mutex(bpage);
 
1090
 
 
1091
                        ut_a(buf_page_in_file(bpage));
 
1092
 
 
1093
                        mutex_enter(block_mutex);
 
1094
 
 
1095
                        if (buf_flush_ready_for_flush(bpage, flush_type)) {
 
1096
 
 
1097
                                space = buf_page_get_space(bpage);
 
1098
                                offset = buf_page_get_page_no(bpage);
 
1099
 
 
1100
                                buf_pool_mutex_exit();
 
1101
                                mutex_exit(block_mutex);
 
1102
 
 
1103
                                old_page_count = page_count;
 
1104
 
 
1105
                                /* Try to flush also all the neighbors */
 
1106
                                page_count += buf_flush_try_neighbors(
 
1107
                                        space, offset, flush_type);
 
1108
                                /* fprintf(stderr,
 
1109
                                "Flush type %lu, page no %lu, neighb %lu\n",
 
1110
                                flush_type, offset,
 
1111
                                page_count - old_page_count); */
 
1112
 
 
1113
                                buf_pool_mutex_enter();
 
1114
                                goto flush_next;
 
1115
 
 
1116
                        } else if (flush_type == BUF_FLUSH_LRU) {
 
1117
 
 
1118
                                mutex_exit(block_mutex);
 
1119
 
 
1120
                                bpage = UT_LIST_GET_PREV(LRU, bpage);
 
1121
                        } else {
 
1122
                                ut_ad(flush_type == BUF_FLUSH_LIST);
 
1123
 
 
1124
                                mutex_exit(block_mutex);
 
1125
 
 
1126
                                bpage = UT_LIST_GET_PREV(list, bpage);
 
1127
                                ut_ad(!bpage || bpage->in_flush_list);
 
1128
                        }
 
1129
                } while (bpage != NULL);
 
1130
 
 
1131
                /* If we could not find anything to flush, leave the loop */
 
1132
 
 
1133
                done_with_loop= true;
 
1134
 
 
1135
        }
 
1136
 
 
1137
        buf_pool->init_flush[flush_type] = FALSE;
 
1138
 
 
1139
        if ((buf_pool->n_flush[flush_type] == 0)
 
1140
            && (buf_pool->init_flush[flush_type] == FALSE)) {
 
1141
 
 
1142
                /* The running flush batch has ended */
 
1143
 
 
1144
                os_event_set(buf_pool->no_flush[flush_type]);
 
1145
        }
 
1146
 
 
1147
        buf_pool_mutex_exit();
 
1148
 
 
1149
        buf_flush_buffered_writes();
1759
1150
 
1760
1151
#ifdef UNIV_DEBUG
1761
1152
        if (buf_debug_prints && page_count > 0) {
 
1153
                ut_a(flush_type == BUF_FLUSH_LRU
 
1154
                     || flush_type == BUF_FLUSH_LIST);
1762
1155
                fprintf(stderr, flush_type == BUF_FLUSH_LRU
1763
1156
                        ? "Flushed %lu pages in LRU flush\n"
1764
1157
                        : "Flushed %lu pages in flush list flush\n",
1768
1161
 
1769
1162
        srv_buf_pool_flushed += page_count;
1770
1163
 
1771
 
        if (flush_type == BUF_FLUSH_LRU) {
1772
 
                /* We keep track of all flushes happening as part of LRU
1773
 
                flush. When estimating the desired rate at which flush_list
1774
 
                should be flushed we factor in this value. */
1775
 
                buf_lru_flush_page_count += page_count;
1776
 
        }
1777
 
}
1778
 
 
1779
 
/******************************************************************//**
1780
 
Start a buffer flush batch for LRU or flush list */
1781
 
static
1782
 
ibool
1783
 
buf_flush_start(
1784
 
/*============*/
1785
 
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
1786
 
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU
1787
 
                                        or BUF_FLUSH_LIST */
1788
 
{
1789
 
        buf_pool_mutex_enter(buf_pool);
1790
 
 
1791
 
        if (buf_pool->n_flush[flush_type] > 0
1792
 
           || buf_pool->init_flush[flush_type] == TRUE) {
1793
 
 
1794
 
                /* There is already a flush batch of the same type running */
1795
 
 
1796
 
                buf_pool_mutex_exit(buf_pool);
1797
 
 
1798
 
                return(FALSE);
1799
 
        }
1800
 
 
1801
 
        buf_pool->init_flush[flush_type] = TRUE;
1802
 
 
1803
 
        buf_pool_mutex_exit(buf_pool);
1804
 
 
1805
 
        return(TRUE);
1806
 
}
1807
 
 
1808
 
/******************************************************************//**
1809
 
End a buffer flush batch for LRU or flush list */
1810
 
static
1811
 
void
1812
 
buf_flush_end(
1813
 
/*==========*/
1814
 
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
1815
 
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU
1816
 
                                        or BUF_FLUSH_LIST */
1817
 
{
1818
 
        buf_pool_mutex_enter(buf_pool);
1819
 
 
1820
 
        buf_pool->init_flush[flush_type] = FALSE;
1821
 
 
1822
 
        if (buf_pool->n_flush[flush_type] == 0) {
1823
 
 
1824
 
                /* The running flush batch has ended */
1825
 
 
1826
 
                os_event_set(buf_pool->no_flush[flush_type]);
1827
 
        }
1828
 
 
1829
 
        buf_pool_mutex_exit(buf_pool);
1830
 
}
1831
 
 
1832
 
/******************************************************************//**
 
1164
        return(page_count);
 
1165
}
 
1166
 
 
1167
/**********************************************************************
1833
1168
Waits until a flush batch of the given type ends */
1834
1169
UNIV_INTERN
1835
1170
void
1836
1171
buf_flush_wait_batch_end(
1837
1172
/*=====================*/
1838
 
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
1839
 
        enum buf_flush  type)           /*!< in: BUF_FLUSH_LRU
1840
 
                                        or BUF_FLUSH_LIST */
1841
 
{
1842
 
        ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
1843
 
 
1844
 
        if (buf_pool == NULL) {
1845
 
                ulint   i;
1846
 
 
1847
 
                for (i = 0; i < srv_buf_pool_instances; ++i) {
1848
 
                        buf_pool_t*     i_buf_pool = buf_pool_from_array(i);
1849
 
 
1850
 
                        os_event_wait(i_buf_pool->no_flush[type]);
1851
 
                }
1852
 
        } else {
1853
 
                os_event_wait(buf_pool->no_flush[type]);
1854
 
        }
1855
 
}
1856
 
 
1857
 
/*******************************************************************//**
1858
 
This utility flushes dirty blocks from the end of the LRU list.
1859
 
NOTE: The calling thread may own latches to pages: to avoid deadlocks,
1860
 
this function must be written so that it cannot end up waiting for these
1861
 
latches!
1862
 
@return number of blocks for which the write request was queued;
1863
 
ULINT_UNDEFINED if there was a flush of the same type already running */
1864
 
UNIV_INTERN
1865
 
ulint
1866
 
buf_flush_LRU(
1867
 
/*==========*/
1868
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1869
 
        ulint           min_n)          /*!< in: wished minimum mumber of blocks
1870
 
                                        flushed (it is not guaranteed that the
1871
 
                                        actual number is that big, though) */
1872
 
{
1873
 
        ulint           page_count;
1874
 
 
1875
 
        if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
1876
 
                return(ULINT_UNDEFINED);
1877
 
        }
1878
 
 
1879
 
        page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
1880
 
 
1881
 
        buf_flush_end(buf_pool, BUF_FLUSH_LRU);
1882
 
 
1883
 
        buf_flush_common(BUF_FLUSH_LRU, page_count);
1884
 
 
1885
 
        return(page_count);
1886
 
}
1887
 
 
1888
 
/*******************************************************************//**
1889
 
This utility flushes dirty blocks from the end of the flush list of
1890
 
all buffer pool instances.
1891
 
NOTE: The calling thread is not allowed to own any latches on pages!
1892
 
@return number of blocks for which the write request was queued;
1893
 
ULINT_UNDEFINED if there was a flush of the same type already running */
1894
 
UNIV_INTERN
1895
 
ulint
1896
 
buf_flush_list(
1897
 
/*===========*/
1898
 
        ulint           min_n,          /*!< in: wished minimum mumber of blocks
1899
 
                                        flushed (it is not guaranteed that the
1900
 
                                        actual number is that big, though) */
1901
 
        ib_uint64_t     lsn_limit)      /*!< in the case BUF_FLUSH_LIST all
1902
 
                                        blocks whose oldest_modification is
1903
 
                                        smaller than this should be flushed
1904
 
                                        (if their number does not exceed
1905
 
                                        min_n), otherwise ignored */
1906
 
{
1907
 
        ulint           i;
1908
 
        ulint           total_page_count = 0;
1909
 
        ibool           skipped = FALSE;
1910
 
 
1911
 
        if (min_n != ULINT_MAX) {
1912
 
                /* Ensure that flushing is spread evenly amongst the
1913
 
                buffer pool instances. When min_n is ULINT_MAX
1914
 
                we need to flush everything up to the lsn limit
1915
 
                so no limit here. */
1916
 
                min_n = (min_n + srv_buf_pool_instances - 1)
1917
 
                         / srv_buf_pool_instances;
1918
 
        }
1919
 
 
1920
 
        /* Flush to lsn_limit in all buffer pool instances */
1921
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1922
 
                buf_pool_t*     buf_pool;
1923
 
                ulint           page_count = 0;
1924
 
 
1925
 
                buf_pool = buf_pool_from_array(i);
1926
 
 
1927
 
                if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
1928
 
                        /* We have two choices here. If lsn_limit was
1929
 
                        specified then skipping an instance of buffer
1930
 
                        pool means we cannot guarantee that all pages
1931
 
                        up to lsn_limit has been flushed. We can
1932
 
                        return right now with failure or we can try
1933
 
                        to flush remaining buffer pools up to the
1934
 
                        lsn_limit. We attempt to flush other buffer
1935
 
                        pools based on the assumption that it will
1936
 
                        help in the retry which will follow the
1937
 
                        failure. */
1938
 
                        skipped = TRUE;
1939
 
 
1940
 
                        continue;
1941
 
                }
1942
 
 
1943
 
                page_count = buf_flush_batch(
1944
 
                        buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
1945
 
 
1946
 
                buf_flush_end(buf_pool, BUF_FLUSH_LIST);
1947
 
 
1948
 
                buf_flush_common(BUF_FLUSH_LIST, page_count);
1949
 
 
1950
 
                total_page_count += page_count;
1951
 
        }
1952
 
 
1953
 
        return(lsn_limit != IB_ULONGLONG_MAX && skipped
1954
 
               ? ULINT_UNDEFINED : total_page_count);
1955
 
}
1956
 
 
1957
 
/******************************************************************//**
 
1173
        enum buf_flush  type)   /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
 
1174
{
 
1175
        ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
 
1176
 
 
1177
        os_event_wait(buf_pool->no_flush[type]);
 
1178
}
 
1179
 
 
1180
/**********************************************************************
1958
1181
Gives a recommendation of how many blocks should be flushed to establish
1959
1182
a big enough margin of replaceable blocks near the end of the LRU list
1960
 
and in the free list.
1961
 
@return number of blocks which should be flushed from the end of the
1962
 
LRU list */
 
1183
and in the free list. */
1963
1184
static
1964
1185
ulint
1965
 
buf_flush_LRU_recommendation(
1966
 
/*=========================*/
1967
 
        buf_pool_t*     buf_pool)               /*!< in: Buffer pool instance */
 
1186
buf_flush_LRU_recommendation(void)
 
1187
/*==============================*/
 
1188
                        /* out: number of blocks which should be flushed
 
1189
                        from the end of the LRU list */
1968
1190
{
1969
1191
        buf_page_t*     bpage;
1970
1192
        ulint           n_replaceable;
1971
1193
        ulint           distance        = 0;
1972
1194
 
1973
 
        buf_pool_mutex_enter(buf_pool);
 
1195
        buf_pool_mutex_enter();
1974
1196
 
1975
1197
        n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
1976
1198
 
1977
1199
        bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1978
1200
 
1979
1201
        while ((bpage != NULL)
1980
 
               && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
1981
 
                   + BUF_FLUSH_EXTRA_MARGIN(buf_pool))
1982
 
               && (distance < BUF_LRU_FREE_SEARCH_LEN(buf_pool))) {
 
1202
               && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
 
1203
                   + BUF_FLUSH_EXTRA_MARGIN)
 
1204
               && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
1983
1205
 
1984
1206
                mutex_t* block_mutex = buf_page_get_mutex(bpage);
1985
1207
 
1996
1218
                bpage = UT_LIST_GET_PREV(LRU, bpage);
1997
1219
        }
1998
1220
 
1999
 
        buf_pool_mutex_exit(buf_pool);
 
1221
        buf_pool_mutex_exit();
2000
1222
 
2001
 
        if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
 
1223
        if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
2002
1224
 
2003
1225
                return(0);
2004
1226
        }
2005
1227
 
2006
 
        return(BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
2007
 
               + BUF_FLUSH_EXTRA_MARGIN(buf_pool)
 
1228
        return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN
2008
1229
               - n_replaceable);
2009
1230
}
2010
1231
 
2011
 
/*********************************************************************//**
 
1232
/*************************************************************************
2012
1233
Flushes pages from the end of the LRU list if there is too small a margin
2013
1234
of replaceable pages there or in the free list. VERY IMPORTANT: this function
2014
1235
is called also by threads which have locks on pages. To avoid deadlocks, we
2016
1237
immediately, without waiting. */
2017
1238
UNIV_INTERN
2018
1239
void
2019
 
buf_flush_free_margin(
2020
 
/*==================*/
2021
 
        buf_pool_t*     buf_pool)               /*!< in: Buffer pool instance */
 
1240
buf_flush_free_margin(void)
 
1241
/*=======================*/
2022
1242
{
2023
1243
        ulint   n_to_flush;
 
1244
        ulint   n_flushed;
2024
1245
 
2025
 
        n_to_flush = buf_flush_LRU_recommendation(buf_pool);
 
1246
        n_to_flush = buf_flush_LRU_recommendation();
2026
1247
 
2027
1248
        if (n_to_flush > 0) {
2028
 
                ulint   n_flushed;
2029
 
 
2030
 
                n_flushed = buf_flush_LRU(buf_pool, n_to_flush);
2031
 
 
 
1249
                n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
2032
1250
                if (n_flushed == ULINT_UNDEFINED) {
2033
1251
                        /* There was an LRU type flush batch already running;
2034
1252
                        let us wait for it to end */
2035
1253
 
2036
 
                        buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
 
1254
                        buf_flush_wait_batch_end(BUF_FLUSH_LRU);
2037
1255
                }
2038
1256
        }
2039
1257
}
2040
1258
 
2041
 
/*********************************************************************//**
2042
 
Flushes pages from the end of all the LRU lists. */
2043
 
UNIV_INTERN
2044
 
void
2045
 
buf_flush_free_margins(void)
2046
 
/*========================*/
2047
 
{
2048
 
        ulint   i;
2049
 
 
2050
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
2051
 
                buf_pool_t*     buf_pool;
2052
 
 
2053
 
                buf_pool = buf_pool_from_array(i);
2054
 
 
2055
 
                buf_flush_free_margin(buf_pool);
2056
 
        }
2057
 
}
2058
 
 
2059
 
/*********************************************************************
2060
 
Update the historical stats that we are collecting for flush rate
2061
 
heuristics at the end of each interval.
2062
 
Flush rate heuristic depends on (a) rate of redo log generation and
2063
 
(b) the rate at which LRU flush is happening. */
2064
 
UNIV_INTERN
2065
 
void
2066
 
buf_flush_stat_update(void)
2067
 
/*=======================*/
2068
 
{
2069
 
        buf_flush_stat_t*       item;
2070
 
        ib_uint64_t             lsn_diff;
2071
 
        ib_uint64_t             lsn;
2072
 
        ulint                   n_flushed;
2073
 
 
2074
 
        lsn = log_get_lsn();
2075
 
        if (buf_flush_stat_cur.redo == 0) {
2076
 
                /* First time around. Just update the current LSN
2077
 
                and return. */
2078
 
                buf_flush_stat_cur.redo = lsn;
2079
 
                return;
2080
 
        }
2081
 
 
2082
 
        item = &buf_flush_stat_arr[buf_flush_stat_arr_ind];
2083
 
 
2084
 
        /* values for this interval */
2085
 
        lsn_diff = lsn - buf_flush_stat_cur.redo;
2086
 
        n_flushed = buf_lru_flush_page_count
2087
 
                    - buf_flush_stat_cur.n_flushed;
2088
 
 
2089
 
        /* add the current value and subtract the obsolete entry. */
2090
 
        buf_flush_stat_sum.redo += lsn_diff - item->redo;
2091
 
        buf_flush_stat_sum.n_flushed += n_flushed - item->n_flushed;
2092
 
 
2093
 
        /* put current entry in the array. */
2094
 
        item->redo = lsn_diff;
2095
 
        item->n_flushed = n_flushed;
2096
 
 
2097
 
        /* update the index */
2098
 
        buf_flush_stat_arr_ind++;
2099
 
        buf_flush_stat_arr_ind %= BUF_FLUSH_STAT_N_INTERVAL;
2100
 
 
2101
 
        /* reset the current entry. */
2102
 
        buf_flush_stat_cur.redo = lsn;
2103
 
        buf_flush_stat_cur.n_flushed = buf_lru_flush_page_count;
2104
 
}
2105
 
 
2106
 
/*********************************************************************
2107
 
Determines the fraction of dirty pages that need to be flushed based
2108
 
on the speed at which we generate redo log. Note that if redo log
2109
 
is generated at a significant rate without corresponding increase
2110
 
in the number of dirty pages (for example, an in-memory workload)
2111
 
it can cause IO bursts of flushing. This function implements heuristics
2112
 
to avoid this burstiness.
2113
 
@return number of dirty pages to be flushed / second */
2114
 
UNIV_INTERN
2115
 
ulint
2116
 
buf_flush_get_desired_flush_rate(void)
2117
 
/*==================================*/
2118
 
{
2119
 
        ulint           i;
2120
 
        lint            rate;
2121
 
        ulint           redo_avg;
2122
 
        ulint           n_dirty = 0;
2123
 
        ulint           n_flush_req;
2124
 
        ulint           lru_flush_avg;
2125
 
        ib_uint64_t     lsn = log_get_lsn();
2126
 
        ulint           log_capacity = log_get_capacity();
2127
 
 
2128
 
        /* log_capacity should never be zero after the initialization
2129
 
        of log subsystem. */
2130
 
        ut_ad(log_capacity != 0);
2131
 
 
2132
 
        /* Get total number of dirty pages. It is OK to access
2133
 
        flush_list without holding any mutex as we are using this
2134
 
        only for heuristics. */
2135
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
2136
 
                buf_pool_t*     buf_pool;
2137
 
 
2138
 
                buf_pool = buf_pool_from_array(i);
2139
 
                n_dirty += UT_LIST_GET_LEN(buf_pool->flush_list);
2140
 
        }
2141
 
 
2142
 
        /* An overflow can happen if we generate more than 2^32 bytes
2143
 
        of redo in this interval i.e.: 4G of redo in 1 second. We can
2144
 
        safely consider this as infinity because if we ever come close
2145
 
        to 4G we'll start a synchronous flush of dirty pages. */
2146
 
        /* redo_avg below is average at which redo is generated in
2147
 
        past BUF_FLUSH_STAT_N_INTERVAL + redo generated in the current
2148
 
        interval. */
2149
 
        redo_avg = (ulint) (buf_flush_stat_sum.redo
2150
 
                            / BUF_FLUSH_STAT_N_INTERVAL
2151
 
                            + (lsn - buf_flush_stat_cur.redo));
2152
 
 
2153
 
        /* An overflow can happen possibly if we flush more than 2^32
2154
 
        pages in BUF_FLUSH_STAT_N_INTERVAL. This is a very very
2155
 
        unlikely scenario. Even when this happens it means that our
2156
 
        flush rate will be off the mark. It won't affect correctness
2157
 
        of any subsystem. */
2158
 
        /* lru_flush_avg below is rate at which pages are flushed as
2159
 
        part of LRU flush in past BUF_FLUSH_STAT_N_INTERVAL + the
2160
 
        number of pages flushed in the current interval. */
2161
 
        lru_flush_avg = buf_flush_stat_sum.n_flushed
2162
 
                        / BUF_FLUSH_STAT_N_INTERVAL
2163
 
                        + (buf_lru_flush_page_count
2164
 
                           - buf_flush_stat_cur.n_flushed);
2165
 
 
2166
 
        n_flush_req = (n_dirty * redo_avg) / log_capacity;
2167
 
 
2168
 
        /* The number of pages that we want to flush from the flush
2169
 
        list is the difference between the required rate and the
2170
 
        number of pages that we are historically flushing from the
2171
 
        LRU list */
2172
 
        rate = n_flush_req - lru_flush_avg;
2173
 
        return(rate > 0 ? (ulint) rate : 0);
2174
 
}
2175
 
 
2176
1259
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2177
 
/******************************************************************//**
2178
 
Validates the flush list.
2179
 
@return TRUE if ok */
 
1260
/**********************************************************************
 
1261
Validates the flush list. */
2180
1262
static
2181
1263
ibool
2182
 
buf_flush_validate_low(
2183
 
/*===================*/
2184
 
        buf_pool_t*     buf_pool)               /*!< in: Buffer pool instance */
 
1264
buf_flush_validate_low(void)
 
1265
/*========================*/
 
1266
                /* out: TRUE if ok */
2185
1267
{
2186
 
        buf_page_t*             bpage;
2187
 
        const ib_rbt_node_t*    rnode = NULL;
2188
 
 
2189
 
        ut_ad(buf_flush_list_mutex_own(buf_pool));
2190
 
 
2191
 
        UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2192
 
                         ut_ad(ut_list_node_313->in_flush_list));
 
1268
        buf_page_t*     bpage;
 
1269
 
 
1270
        UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list);
2193
1271
 
2194
1272
        bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2195
1273
 
2196
 
        /* If we are in recovery mode i.e.: flush_rbt != NULL
2197
 
        then each block in the flush_list must also be present
2198
 
        in the flush_rbt. */
2199
 
        if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2200
 
                rnode = rbt_first(buf_pool->flush_rbt);
2201
 
        }
2202
 
 
2203
1274
        while (bpage != NULL) {
2204
1275
                const ib_uint64_t om = bpage->oldest_modification;
2205
 
 
2206
 
                ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
2207
 
 
2208
1276
                ut_ad(bpage->in_flush_list);
2209
 
 
2210
 
                /* A page in buf_pool->flush_list can be in
2211
 
                BUF_BLOCK_REMOVE_HASH state. This happens when a page
2212
 
                is in the middle of being relocated. In that case the
2213
 
                original descriptor can have this state and still be
2214
 
                in the flush list waiting to acquire the
2215
 
                buf_pool->flush_list_mutex to complete the relocation. */
2216
 
                ut_a(buf_page_in_file(bpage)
2217
 
                     || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
 
1277
                ut_a(buf_page_in_file(bpage));
2218
1278
                ut_a(om > 0);
2219
1279
 
2220
 
                if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2221
 
                        buf_page_t** prpage;
2222
 
 
2223
 
                        ut_a(rnode);
2224
 
                        prpage = rbt_value(buf_page_t*, rnode);
2225
 
 
2226
 
                        ut_a(*prpage);
2227
 
                        ut_a(*prpage == bpage);
2228
 
                        rnode = rbt_next(buf_pool->flush_rbt, rnode);
2229
 
                }
2230
 
 
2231
1280
                bpage = UT_LIST_GET_NEXT(list, bpage);
2232
1281
 
2233
1282
                ut_a(!bpage || om >= bpage->oldest_modification);
2234
1283
        }
2235
1284
 
2236
 
        /* By this time we must have exhausted the traversal of
2237
 
        flush_rbt (if active) as well. */
2238
 
        ut_a(rnode == NULL);
2239
 
 
2240
1285
        return(TRUE);
2241
1286
}
2242
1287
 
2243
 
/******************************************************************//**
2244
 
Validates the flush list.
2245
 
@return TRUE if ok */
 
1288
/**********************************************************************
 
1289
Validates the flush list. */
2246
1290
UNIV_INTERN
2247
1291
ibool
2248
 
buf_flush_validate(
2249
 
/*===============*/
2250
 
        buf_pool_t*     buf_pool)       /*!< buffer pool instance */
 
1292
buf_flush_validate(void)
 
1293
/*====================*/
 
1294
                /* out: TRUE if ok */
2251
1295
{
2252
1296
        ibool   ret;
2253
1297
 
2254
 
        buf_flush_list_mutex_enter(buf_pool);
2255
 
 
2256
 
        ret = buf_flush_validate_low(buf_pool);
2257
 
 
2258
 
        buf_flush_list_mutex_exit(buf_pool);
 
1298
        buf_pool_mutex_enter();
 
1299
 
 
1300
        ret = buf_flush_validate_low();
 
1301
 
 
1302
        buf_pool_mutex_exit();
2259
1303
 
2260
1304
        return(ret);
2261
1305
}
2262
1306
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2263
 
#endif /* !UNIV_HOTBACKUP */