~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/buf/buf0flu.c

Merge Revision revid:marko.makela@oracle.com-20100514133144-fe0l0b89tea4x4uu from MySQL InnoDB

Original revid:marko.makela@oracle.com-20100514133144-fe0l0b89tea4x4uu

Original Authors: Marko Mkel <marko.makela@oracle.com>
Original commit message:
Merge from mysql-5.1-innodb:

Post-merge fixes: Remove the MYSQL_VERSION_ID checks, because they only
apply to the InnoDB Plugin. Fix potential race condition accessing
trx->op_info and trx->detailed_error.
------------------------------------------------------------
revno: 3466
revision-id: marko.makela@oracle.com-20100514130815-ym7j7cfu88ro6km4
parent: marko.makela@oracle.com-20100514130228-n3n42nw7ht78k0wn
committer: Marko Mkel <marko.makela@oracle.com>
branch nick: mysql-5.1-innodb2
timestamp: Fri 2010-05-14 16:08:15 +0300
message:
  Make the InnoDB FOREIGN KEY parser understand multi-statements. (Bug #48024)
  Also make InnoDB thinks that /*/ only starts a comment. (Bug #53644).

  This fixes the bugs in the InnoDB Plugin.

  ha_innodb.h: Use trx_query_string() instead of trx_query() when
  available (MySQL 5.1.42 or later).

  innobase_get_stmt(): New function, to retrieve the currently running
  SQL statement.

  struct trx_struct: Remove mysql_query_str. Use innobase_get_stmt() instead.

  dict_strip_comments(): Add and observe the parameter sql_length. Treat
  /*/ as the start of a comment.

  dict_create_foreign_constraints(), row_table_add_foreign_constraints():
  Add the parameter sql_length.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/******************************************************
2
 
The database buffer buf_pool flush algorithm
3
 
 
4
 
(c) 1995-2001 Innobase Oy
5
 
 
6
 
Created 11/11/1995 Heikki Tuuri
7
 
*******************************************************/
8
 
 
9
 
#include "buf0flu.h"
10
 
 
11
 
#ifdef UNIV_NONINL
12
 
#include "buf0flu.ic"
13
 
#include "trx0sys.h"
14
 
#endif
15
 
 
16
 
#include "ut0byte.h"
17
 
#include "ut0lst.h"
18
 
#include "page0page.h"
19
 
#include "page0zip.h"
20
 
#include "fil0fil.h"
21
 
#include "buf0buf.h"
22
 
#include "buf0lru.h"
23
 
#include "buf0rea.h"
24
 
#include "ibuf0ibuf.h"
25
 
#include "log0log.h"
26
 
#include "os0file.h"
27
 
#include "trx0sys.h"
28
 
#include "srv0srv.h"
29
 
 
30
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
31
 
/**********************************************************************
32
 
Validates the flush list. */
33
 
static
34
 
ibool
35
 
buf_flush_validate_low(void);
36
 
/*========================*/
37
 
                /* out: TRUE if ok */
38
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
39
 
 
40
 
/************************************************************************
41
 
Inserts a modified block into the flush list. */
42
 
UNIV_INTERN
43
 
void
44
 
buf_flush_insert_into_flush_list(
45
 
/*=============================*/
46
 
        buf_page_t*     bpage)  /* in: block which is modified */
47
 
{
48
 
        ut_ad(buf_pool_mutex_own());
49
 
        ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
50
 
              || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
51
 
                  <= bpage->oldest_modification));
52
 
 
53
 
        switch (buf_page_get_state(bpage)) {
54
 
        case BUF_BLOCK_ZIP_PAGE:
55
 
                mutex_enter(&buf_pool_zip_mutex);
56
 
                buf_page_set_state(bpage, BUF_BLOCK_ZIP_DIRTY);
57
 
                mutex_exit(&buf_pool_zip_mutex);
58
 
                UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
59
 
                /* fall through */
60
 
        case BUF_BLOCK_ZIP_DIRTY:
61
 
        case BUF_BLOCK_FILE_PAGE:
62
 
                ut_ad(bpage->in_LRU_list);
63
 
                ut_ad(bpage->in_page_hash);
64
 
                ut_ad(!bpage->in_zip_hash);
65
 
                ut_ad(!bpage->in_flush_list);
66
 
                ut_d(bpage->in_flush_list = TRUE);
67
 
                UT_LIST_ADD_FIRST(list, buf_pool->flush_list, bpage);
68
 
                break;
69
 
        case BUF_BLOCK_ZIP_FREE:
70
 
        case BUF_BLOCK_NOT_USED:
71
 
        case BUF_BLOCK_READY_FOR_USE:
72
 
        case BUF_BLOCK_MEMORY:
73
 
        case BUF_BLOCK_REMOVE_HASH:
74
 
                ut_error;
75
 
                return;
76
 
        }
77
 
 
78
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
79
 
        ut_a(buf_flush_validate_low());
80
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
81
 
}
82
 
 
83
 
/************************************************************************
84
 
Inserts a modified block into the flush list in the right sorted position.
85
 
This function is used by recovery, because there the modifications do not
86
 
necessarily come in the order of lsn's. */
87
 
UNIV_INTERN
88
 
void
89
 
buf_flush_insert_sorted_into_flush_list(
90
 
/*====================================*/
91
 
        buf_page_t*     bpage)  /* in: block which is modified */
92
 
{
93
 
        buf_page_t*     prev_b;
94
 
        buf_page_t*     b;
95
 
 
96
 
        ut_ad(buf_pool_mutex_own());
97
 
 
98
 
        switch (buf_page_get_state(bpage)) {
99
 
        case BUF_BLOCK_ZIP_PAGE:
100
 
                mutex_enter(&buf_pool_zip_mutex);
101
 
                buf_page_set_state(bpage, BUF_BLOCK_ZIP_DIRTY);
102
 
                mutex_exit(&buf_pool_zip_mutex);
103
 
                UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
104
 
                /* fall through */
105
 
        case BUF_BLOCK_ZIP_DIRTY:
106
 
        case BUF_BLOCK_FILE_PAGE:
107
 
                ut_ad(bpage->in_LRU_list);
108
 
                ut_ad(bpage->in_page_hash);
109
 
                ut_ad(!bpage->in_zip_hash);
110
 
                ut_ad(!bpage->in_flush_list);
111
 
                ut_d(bpage->in_flush_list = TRUE);
112
 
                break;
113
 
        case BUF_BLOCK_ZIP_FREE:
114
 
        case BUF_BLOCK_NOT_USED:
115
 
        case BUF_BLOCK_READY_FOR_USE:
116
 
        case BUF_BLOCK_MEMORY:
117
 
        case BUF_BLOCK_REMOVE_HASH:
118
 
                ut_error;
119
 
                return;
120
 
        }
121
 
 
122
 
        prev_b = NULL;
123
 
        b = UT_LIST_GET_FIRST(buf_pool->flush_list);
124
 
 
125
 
        while (b && b->oldest_modification > bpage->oldest_modification) {
126
 
                ut_ad(b->in_flush_list);
127
 
                prev_b = b;
128
 
                b = UT_LIST_GET_NEXT(list, b);
129
 
        }
130
 
 
131
 
        if (prev_b == NULL) {
132
 
                UT_LIST_ADD_FIRST(list, buf_pool->flush_list, bpage);
133
 
        } else {
134
 
                UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
135
 
                                     prev_b, bpage);
136
 
        }
137
 
 
138
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
139
 
        ut_a(buf_flush_validate_low());
140
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
141
 
}
142
 
 
143
 
/************************************************************************
144
 
Returns TRUE if the file page block is immediately suitable for replacement,
145
 
i.e., the transition FILE_PAGE => NOT_USED allowed. */
146
 
UNIV_INTERN
147
 
ibool
148
 
buf_flush_ready_for_replace(
149
 
/*========================*/
150
 
                                /* out: TRUE if can replace immediately */
151
 
        buf_page_t*     bpage)  /* in: buffer control block, must be
152
 
                                buf_page_in_file(bpage) and in the LRU list */
153
 
{
154
 
        ut_ad(buf_pool_mutex_own());
155
 
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
156
 
        ut_ad(bpage->in_LRU_list);
157
 
 
158
 
        if (UNIV_LIKELY(buf_page_in_file(bpage))) {
159
 
 
160
 
                return(bpage->oldest_modification == 0
161
 
                       && buf_page_get_io_fix(bpage) == BUF_IO_NONE
162
 
                       && bpage->buf_fix_count == 0);
163
 
        }
164
 
 
165
 
        ut_print_timestamp(stderr);
166
 
        fprintf(stderr,
167
 
                "  InnoDB: Error: buffer block state %lu"
168
 
                " in the LRU list!\n",
169
 
                (ulong) buf_page_get_state(bpage));
170
 
        ut_print_buf(stderr, bpage, sizeof(buf_page_t));
171
 
 
172
 
        return(FALSE);
173
 
}
174
 
 
175
 
/************************************************************************
176
 
Returns TRUE if the block is modified and ready for flushing. */
177
 
UNIV_INLINE
178
 
ibool
179
 
buf_flush_ready_for_flush(
180
 
/*======================*/
181
 
                                /* out: TRUE if can flush immediately */
182
 
        buf_page_t*     bpage,  /* in: buffer control block, must be
183
 
                                buf_page_in_file(bpage) */
184
 
        enum buf_flush  flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
185
 
{
186
 
        ut_a(buf_page_in_file(bpage));
187
 
        ut_ad(buf_pool_mutex_own());
188
 
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
189
 
 
190
 
        if (bpage->oldest_modification != 0
191
 
            && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
192
 
                ut_ad(bpage->in_flush_list);
193
 
 
194
 
                if (flush_type != BUF_FLUSH_LRU) {
195
 
 
196
 
                        return(TRUE);
197
 
 
198
 
                } else if (bpage->buf_fix_count == 0) {
199
 
 
200
 
                        /* If we are flushing the LRU list, to avoid deadlocks
201
 
                        we require the block not to be bufferfixed, and hence
202
 
                        not latched. */
203
 
 
204
 
                        return(TRUE);
205
 
                }
206
 
        }
207
 
 
208
 
        return(FALSE);
209
 
}
210
 
 
211
 
/************************************************************************
212
 
Remove a block from the flush list of modified blocks. */
213
 
UNIV_INTERN
214
 
void
215
 
buf_flush_remove(
216
 
/*=============*/
217
 
        buf_page_t*     bpage)  /* in: pointer to the block in question */
218
 
{
219
 
        ut_ad(buf_pool_mutex_own());
220
 
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
221
 
        ut_ad(bpage->in_flush_list);
222
 
        ut_d(bpage->in_flush_list = FALSE);
223
 
 
224
 
        switch (buf_page_get_state(bpage)) {
225
 
        case BUF_BLOCK_ZIP_PAGE:
226
 
                /* clean compressed pages should not be on the flush list */
227
 
        case BUF_BLOCK_ZIP_FREE:
228
 
        case BUF_BLOCK_NOT_USED:
229
 
        case BUF_BLOCK_READY_FOR_USE:
230
 
        case BUF_BLOCK_MEMORY:
231
 
        case BUF_BLOCK_REMOVE_HASH:
232
 
                ut_error;
233
 
                return;
234
 
        case BUF_BLOCK_ZIP_DIRTY:
235
 
                buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
236
 
                UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
237
 
                buf_LRU_insert_zip_clean(bpage);
238
 
                break;
239
 
        case BUF_BLOCK_FILE_PAGE:
240
 
                UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
241
 
                break;
242
 
        }
243
 
 
244
 
        bpage->oldest_modification = 0;
245
 
 
246
 
        ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list));
247
 
}
248
 
 
249
 
/************************************************************************
250
 
Updates the flush system data structures when a write is completed. */
251
 
UNIV_INTERN
252
 
void
253
 
buf_flush_write_complete(
254
 
/*=====================*/
255
 
        buf_page_t*     bpage)  /* in: pointer to the block in question */
256
 
{
257
 
        enum buf_flush  flush_type;
258
 
 
259
 
        ut_ad(bpage);
260
 
 
261
 
        buf_flush_remove(bpage);
262
 
 
263
 
        flush_type = buf_page_get_flush_type(bpage);
264
 
        buf_pool->n_flush[flush_type]--;
265
 
 
266
 
        if (flush_type == BUF_FLUSH_LRU) {
267
 
                /* Put the block to the end of the LRU list to wait to be
268
 
                moved to the free list */
269
 
 
270
 
                buf_LRU_make_block_old(bpage);
271
 
 
272
 
                buf_pool->LRU_flush_ended++;
273
 
        }
274
 
 
275
 
        /* fprintf(stderr, "n pending flush %lu\n",
276
 
        buf_pool->n_flush[flush_type]); */
277
 
 
278
 
        if ((buf_pool->n_flush[flush_type] == 0)
279
 
            && (buf_pool->init_flush[flush_type] == FALSE)) {
280
 
 
281
 
                /* The running flush batch has ended */
282
 
 
283
 
                os_event_set(buf_pool->no_flush[flush_type]);
284
 
        }
285
 
}
286
 
 
287
 
/************************************************************************
288
 
Flushes possible buffered writes from the doublewrite memory buffer to disk,
289
 
and also wakes up the aio thread if simulated aio is used. It is very
290
 
important to call this function after a batch of writes has been posted,
291
 
and also when we may have to wait for a page latch! Otherwise a deadlock
292
 
of threads can occur. */
293
 
static
294
 
void
295
 
buf_flush_buffered_writes(void)
296
 
/*===========================*/
297
 
{
298
 
        byte*           write_buf;
299
 
        ulint           len;
300
 
        ulint           len2;
301
 
        ulint           i;
302
 
 
303
 
        if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) {
304
 
                os_aio_simulated_wake_handler_threads();
305
 
 
306
 
                return;
307
 
        }
308
 
 
309
 
        mutex_enter(&(trx_doublewrite->mutex));
310
 
 
311
 
        /* Write first to doublewrite buffer blocks. We use synchronous
312
 
        aio and thus know that file write has been completed when the
313
 
        control returns. */
314
 
 
315
 
        if (trx_doublewrite->first_free == 0) {
316
 
 
317
 
                mutex_exit(&(trx_doublewrite->mutex));
318
 
 
319
 
                return;
320
 
        }
321
 
 
322
 
        for (i = 0; i < trx_doublewrite->first_free; i++) {
323
 
 
324
 
                const buf_block_t*      block;
325
 
 
326
 
                block = (buf_block_t*) trx_doublewrite->buf_block_arr[i];
327
 
 
328
 
                if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
329
 
                    || block->page.zip.data) {
330
 
                        /* No simple validate for compressed pages exists. */
331
 
                        continue;
332
 
                }
333
 
 
334
 
                if (UNIV_UNLIKELY
335
 
                    (memcmp(block->frame + (FIL_PAGE_LSN + 4),
336
 
                            block->frame + (UNIV_PAGE_SIZE
337
 
                                            - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
338
 
                            4))) {
339
 
                        ut_print_timestamp(stderr);
340
 
                        fprintf(stderr,
341
 
                                "  InnoDB: ERROR: The page to be written"
342
 
                                " seems corrupt!\n"
343
 
                                "InnoDB: The lsn fields do not match!"
344
 
                                " Noticed in the buffer pool\n"
345
 
                                "InnoDB: before posting to the"
346
 
                                " doublewrite buffer.\n");
347
 
                }
348
 
 
349
 
                if (!block->check_index_page_at_flush) {
350
 
                } else if (page_is_comp(block->frame)) {
351
 
                        if (UNIV_UNLIKELY
352
 
                            (!page_simple_validate_new(block->frame))) {
353
 
corrupted_page:
354
 
                                buf_page_print(block->frame, 0);
355
 
 
356
 
                                ut_print_timestamp(stderr);
357
 
                                fprintf(stderr,
358
 
                                        "  InnoDB: Apparent corruption of an"
359
 
                                        " index page n:o %lu in space %lu\n"
360
 
                                        "InnoDB: to be written to data file."
361
 
                                        " We intentionally crash server\n"
362
 
                                        "InnoDB: to prevent corrupt data"
363
 
                                        " from ending up in data\n"
364
 
                                        "InnoDB: files.\n",
365
 
                                        (ulong) buf_block_get_page_no(block),
366
 
                                        (ulong) buf_block_get_space(block));
367
 
 
368
 
                                ut_error;
369
 
                        }
370
 
                } else if (UNIV_UNLIKELY
371
 
                           (!page_simple_validate_old(block->frame))) {
372
 
 
373
 
                        goto corrupted_page;
374
 
                }
375
 
        }
376
 
 
377
 
        /* increment the doublewrite flushed pages counter */
378
 
        srv_dblwr_pages_written+= trx_doublewrite->first_free;
379
 
        srv_dblwr_writes++;
380
 
 
381
 
        len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
382
 
                     trx_doublewrite->first_free) * UNIV_PAGE_SIZE;
383
 
 
384
 
        write_buf = trx_doublewrite->write_buf;
385
 
        i = 0;
386
 
 
387
 
        fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
388
 
               trx_doublewrite->block1, 0, len,
389
 
               (void*) write_buf, NULL);
390
 
 
391
 
        for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
392
 
             len2 += UNIV_PAGE_SIZE, i++) {
393
 
                const buf_block_t* block = (buf_block_t*)
394
 
                        trx_doublewrite->buf_block_arr[i];
395
 
 
396
 
                if (UNIV_LIKELY(!block->page.zip.data)
397
 
                    && UNIV_LIKELY(buf_block_get_state(block)
398
 
                                   == BUF_BLOCK_FILE_PAGE)
399
 
                    && UNIV_UNLIKELY
400
 
                    (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
401
 
                            write_buf + len2
402
 
                            + (UNIV_PAGE_SIZE
403
 
                               - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
404
 
                        ut_print_timestamp(stderr);
405
 
                        fprintf(stderr,
406
 
                                "  InnoDB: ERROR: The page to be written"
407
 
                                " seems corrupt!\n"
408
 
                                "InnoDB: The lsn fields do not match!"
409
 
                                " Noticed in the doublewrite block1.\n");
410
 
                }
411
 
        }
412
 
 
413
 
        if (trx_doublewrite->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
414
 
                goto flush;
415
 
        }
416
 
 
417
 
        len = (trx_doublewrite->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
418
 
                * UNIV_PAGE_SIZE;
419
 
 
420
 
        write_buf = trx_doublewrite->write_buf
421
 
                + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
422
 
        ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE);
423
 
 
424
 
        fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
425
 
               trx_doublewrite->block2, 0, len,
426
 
               (void*) write_buf, NULL);
427
 
 
428
 
        for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
429
 
             len2 += UNIV_PAGE_SIZE, i++) {
430
 
                const buf_block_t* block = (buf_block_t*)
431
 
                        trx_doublewrite->buf_block_arr[i];
432
 
 
433
 
                if (UNIV_LIKELY(!block->page.zip.data)
434
 
                    && UNIV_LIKELY(buf_block_get_state(block)
435
 
                                   == BUF_BLOCK_FILE_PAGE)
436
 
                    && UNIV_UNLIKELY
437
 
                    (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
438
 
                            write_buf + len2
439
 
                            + (UNIV_PAGE_SIZE
440
 
                               - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
441
 
                        ut_print_timestamp(stderr);
442
 
                        fprintf(stderr,
443
 
                                "  InnoDB: ERROR: The page to be"
444
 
                                " written seems corrupt!\n"
445
 
                                "InnoDB: The lsn fields do not match!"
446
 
                                " Noticed in"
447
 
                                " the doublewrite block2.\n");
448
 
                }
449
 
        }
450
 
 
451
 
flush:
452
 
        /* Now flush the doublewrite buffer data to disk */
453
 
 
454
 
        fil_flush(TRX_SYS_SPACE);
455
 
 
456
 
        /* We know that the writes have been flushed to disk now
457
 
        and in recovery we will find them in the doublewrite buffer
458
 
        blocks. Next do the writes to the intended positions. */
459
 
 
460
 
        for (i = 0; i < trx_doublewrite->first_free; i++) {
461
 
                const buf_block_t* block = (buf_block_t*)
462
 
                        trx_doublewrite->buf_block_arr[i];
463
 
 
464
 
                ut_a(buf_page_in_file(&block->page));
465
 
                if (UNIV_LIKELY_NULL(block->page.zip.data)) {
466
 
                        fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
467
 
                               FALSE, buf_page_get_space(&block->page),
468
 
                               buf_page_get_zip_size(&block->page),
469
 
                               buf_page_get_page_no(&block->page), 0,
470
 
                               buf_page_get_zip_size(&block->page),
471
 
                               (void*)block->page.zip.data,
472
 
                               (void*)block);
473
 
 
474
 
                        /* Increment the counter of I/O operations used
475
 
                        for selecting LRU policy. */
476
 
                        buf_LRU_stat_inc_io();
477
 
 
478
 
                        continue;
479
 
                }
480
 
 
481
 
                ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
482
 
 
483
 
                if (UNIV_UNLIKELY(memcmp(block->frame + (FIL_PAGE_LSN + 4),
484
 
                                         block->frame
485
 
                                         + (UNIV_PAGE_SIZE
486
 
                                            - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
487
 
                                         4))) {
488
 
                        ut_print_timestamp(stderr);
489
 
                        fprintf(stderr,
490
 
                                "  InnoDB: ERROR: The page to be written"
491
 
                                " seems corrupt!\n"
492
 
                                "InnoDB: The lsn fields do not match!"
493
 
                                " Noticed in the buffer pool\n"
494
 
                                "InnoDB: after posting and flushing"
495
 
                                " the doublewrite buffer.\n"
496
 
                                "InnoDB: Page buf fix count %lu,"
497
 
                                " io fix %lu, state %lu\n",
498
 
                                (ulong)block->page.buf_fix_count,
499
 
                                (ulong)buf_block_get_io_fix(block),
500
 
                                (ulong)buf_block_get_state(block));
501
 
                }
502
 
 
503
 
                fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
504
 
                       FALSE, buf_block_get_space(block), 0,
505
 
                       buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
506
 
                       (void*)block->frame, (void*)block);
507
 
 
508
 
                /* Increment the counter of I/O operations used
509
 
                for selecting LRU policy. */
510
 
                buf_LRU_stat_inc_io();
511
 
        }
512
 
 
513
 
        /* Wake possible simulated aio thread to actually post the
514
 
        writes to the operating system */
515
 
 
516
 
        os_aio_simulated_wake_handler_threads();
517
 
 
518
 
        /* Wait that all async writes to tablespaces have been posted to
519
 
        the OS */
520
 
 
521
 
        os_aio_wait_until_no_pending_writes();
522
 
 
523
 
        /* Now we flush the data to disk (for example, with fsync) */
524
 
 
525
 
        fil_flush_file_spaces(FIL_TABLESPACE);
526
 
 
527
 
        /* We can now reuse the doublewrite memory buffer: */
528
 
 
529
 
        trx_doublewrite->first_free = 0;
530
 
 
531
 
        mutex_exit(&(trx_doublewrite->mutex));
532
 
}
533
 
 
534
 
/************************************************************************
535
 
Posts a buffer page for writing. If the doublewrite memory buffer is
536
 
full, calls buf_flush_buffered_writes and waits for for free space to
537
 
appear. */
538
 
static
539
 
void
540
 
buf_flush_post_to_doublewrite_buf(
541
 
/*==============================*/
542
 
        buf_page_t*     bpage)  /* in: buffer block to write */
543
 
{
544
 
        ulint   zip_size;
545
 
try_again:
546
 
        mutex_enter(&(trx_doublewrite->mutex));
547
 
 
548
 
        ut_a(buf_page_in_file(bpage));
549
 
 
550
 
        if (trx_doublewrite->first_free
551
 
            >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
552
 
                mutex_exit(&(trx_doublewrite->mutex));
553
 
 
554
 
                buf_flush_buffered_writes();
555
 
 
556
 
                goto try_again;
557
 
        }
558
 
 
559
 
        zip_size = buf_page_get_zip_size(bpage);
560
 
 
561
 
        if (UNIV_UNLIKELY(zip_size)) {
562
 
                /* Copy the compressed page and clear the rest. */
563
 
                memcpy(trx_doublewrite->write_buf
564
 
                       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
565
 
                       bpage->zip.data, zip_size);
566
 
                memset(trx_doublewrite->write_buf
567
 
                       + UNIV_PAGE_SIZE * trx_doublewrite->first_free
568
 
                       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
569
 
        } else {
570
 
                ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
571
 
 
572
 
                memcpy(trx_doublewrite->write_buf
573
 
                       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
574
 
                       ((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE);
575
 
        }
576
 
 
577
 
        trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = bpage;
578
 
 
579
 
        trx_doublewrite->first_free++;
580
 
 
581
 
        if (trx_doublewrite->first_free
582
 
            >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
583
 
                mutex_exit(&(trx_doublewrite->mutex));
584
 
 
585
 
                buf_flush_buffered_writes();
586
 
 
587
 
                return;
588
 
        }
589
 
 
590
 
        mutex_exit(&(trx_doublewrite->mutex));
591
 
}
592
 
 
593
 
/************************************************************************
594
 
Initializes a page for writing to the tablespace. */
595
 
UNIV_INTERN
596
 
void
597
 
buf_flush_init_for_writing(
598
 
/*=======================*/
599
 
        byte*           page,           /* in/out: page */
600
 
        void*           page_zip_,      /* in/out: compressed page, or NULL */
601
 
        ib_uint64_t     newest_lsn)     /* in: newest modification lsn
602
 
                                        to the page */
603
 
{
604
 
        ut_ad(page);
605
 
 
606
 
        if (page_zip_) {
607
 
                page_zip_des_t* page_zip = page_zip_;
608
 
                ulint           zip_size = page_zip_get_size(page_zip);
609
 
                ut_ad(zip_size);
610
 
                ut_ad(ut_is_2pow(zip_size));
611
 
                ut_ad(zip_size <= UNIV_PAGE_SIZE);
612
 
 
613
 
                switch (UNIV_EXPECT(fil_page_get_type(page), FIL_PAGE_INDEX)) {
614
 
                case FIL_PAGE_TYPE_ALLOCATED:
615
 
                case FIL_PAGE_INODE:
616
 
                case FIL_PAGE_IBUF_BITMAP:
617
 
                case FIL_PAGE_TYPE_FSP_HDR:
618
 
                case FIL_PAGE_TYPE_XDES:
619
 
                        /* These are essentially uncompressed pages. */
620
 
                        memcpy(page_zip->data, page, zip_size);
621
 
                        /* fall through */
622
 
                case FIL_PAGE_TYPE_ZBLOB:
623
 
                case FIL_PAGE_TYPE_ZBLOB2:
624
 
                case FIL_PAGE_INDEX:
625
 
                        mach_write_ull(page_zip->data
626
 
                                       + FIL_PAGE_LSN, newest_lsn);
627
 
                        memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
628
 
                        mach_write_to_4(page_zip->data
629
 
                                        + FIL_PAGE_SPACE_OR_CHKSUM,
630
 
                                        srv_use_checksums
631
 
                                        ? page_zip_calc_checksum(
632
 
                                                page_zip->data, zip_size)
633
 
                                        : BUF_NO_CHECKSUM_MAGIC);
634
 
                        return;
635
 
                }
636
 
 
637
 
                ut_error;
638
 
        }
639
 
 
640
 
        /* Write the newest modification lsn to the page header and trailer */
641
 
        mach_write_ull(page + FIL_PAGE_LSN, newest_lsn);
642
 
 
643
 
        mach_write_ull(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
644
 
                       newest_lsn);
645
 
 
646
 
        /* Store the new formula checksum */
647
 
 
648
 
        mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
649
 
                        srv_use_checksums
650
 
                        ? buf_calc_page_new_checksum(page)
651
 
                        : BUF_NO_CHECKSUM_MAGIC);
652
 
 
653
 
        /* We overwrite the first 4 bytes of the end lsn field to store
654
 
        the old formula checksum. Since it depends also on the field
655
 
        FIL_PAGE_SPACE_OR_CHKSUM, it has to be calculated after storing the
656
 
        new formula checksum. */
657
 
 
658
 
        mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
659
 
                        srv_use_checksums
660
 
                        ? buf_calc_page_old_checksum(page)
661
 
                        : BUF_NO_CHECKSUM_MAGIC);
662
 
}
663
 
 
664
 
/************************************************************************
665
 
Does an asynchronous write of a buffer page. NOTE: in simulated aio and
666
 
also when the doublewrite buffer is used, we must call
667
 
buf_flush_buffered_writes after we have posted a batch of writes! */
668
 
static
669
 
void
670
 
buf_flush_write_block_low(
671
 
/*======================*/
672
 
        buf_page_t*     bpage)  /* in: buffer block to write */
673
 
{
674
 
        ulint   zip_size        = buf_page_get_zip_size(bpage);
675
 
        page_t* frame           = NULL;
676
 
#ifdef UNIV_LOG_DEBUG
677
 
        static ibool univ_log_debug_warned;
678
 
#endif /* UNIV_LOG_DEBUG */
679
 
 
680
 
        ut_ad(buf_page_in_file(bpage));
681
 
 
682
 
#ifdef UNIV_IBUF_COUNT_DEBUG
683
 
        ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
684
 
#endif
685
 
        ut_ad(bpage->newest_modification != 0);
686
 
 
687
 
#ifdef UNIV_LOG_DEBUG
688
 
        if (!univ_log_debug_warned) {
689
 
                univ_log_debug_warned = TRUE;
690
 
                fputs("Warning: cannot force log to disk if"
691
 
                      " UNIV_LOG_DEBUG is defined!\n"
692
 
                      "Crash recovery will not work!\n",
693
 
                      stderr);
694
 
        }
695
 
#else
696
 
        /* Force the log to the disk before writing the modified block */
697
 
        log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
698
 
#endif
699
 
        switch (buf_page_get_state(bpage)) {
700
 
        case BUF_BLOCK_ZIP_FREE:
701
 
        case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */
702
 
        case BUF_BLOCK_NOT_USED:
703
 
        case BUF_BLOCK_READY_FOR_USE:
704
 
        case BUF_BLOCK_MEMORY:
705
 
        case BUF_BLOCK_REMOVE_HASH:
706
 
                ut_error;
707
 
                break;
708
 
        case BUF_BLOCK_ZIP_DIRTY:
709
 
                frame = bpage->zip.data;
710
 
                if (UNIV_LIKELY(srv_use_checksums)) {
711
 
                        ut_a(mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
712
 
                             == page_zip_calc_checksum(frame, zip_size));
713
 
                }
714
 
                mach_write_ull(frame + FIL_PAGE_LSN,
715
 
                               bpage->newest_modification);
716
 
                memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
717
 
                break;
718
 
        case BUF_BLOCK_FILE_PAGE:
719
 
                frame = bpage->zip.data;
720
 
                if (!frame) {
721
 
                        frame = ((buf_block_t*) bpage)->frame;
722
 
                }
723
 
 
724
 
                buf_flush_init_for_writing(((buf_block_t*) bpage)->frame,
725
 
                                           bpage->zip.data
726
 
                                           ? &bpage->zip : NULL,
727
 
                                           bpage->newest_modification);
728
 
                break;
729
 
        }
730
 
 
731
 
        if (!srv_use_doublewrite_buf || !trx_doublewrite) {
732
 
                fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
733
 
                       FALSE, buf_page_get_space(bpage), zip_size,
734
 
                       buf_page_get_page_no(bpage), 0,
735
 
                       zip_size ? zip_size : UNIV_PAGE_SIZE,
736
 
                       frame, bpage);
737
 
        } else {
738
 
                buf_flush_post_to_doublewrite_buf(bpage);
739
 
        }
740
 
}
741
 
 
742
 
/************************************************************************
743
 
Writes a page asynchronously from the buffer buf_pool to a file, if it can be
744
 
found in the buf_pool and it is in a flushable state. NOTE: in simulated aio
745
 
we must call os_aio_simulated_wake_handler_threads after we have posted a batch
746
 
of writes! */
747
 
static
748
 
ulint
749
 
buf_flush_try_page(
750
 
/*===============*/
751
 
                                        /* out: 1 if a page was
752
 
                                        flushed, 0 otherwise */
753
 
        ulint           space,          /* in: space id */
754
 
        ulint           offset,         /* in: page offset */
755
 
        enum buf_flush  flush_type)     /* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST,
756
 
                                        or BUF_FLUSH_SINGLE_PAGE */
757
 
{
758
 
        buf_page_t*     bpage;
759
 
        mutex_t*        block_mutex;
760
 
        ibool           locked;
761
 
 
762
 
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
763
 
              || flush_type == BUF_FLUSH_SINGLE_PAGE);
764
 
 
765
 
        buf_pool_mutex_enter();
766
 
 
767
 
        bpage = buf_page_hash_get(space, offset);
768
 
 
769
 
        if (!bpage) {
770
 
                buf_pool_mutex_exit();
771
 
                return(0);
772
 
        }
773
 
 
774
 
        ut_a(buf_page_in_file(bpage));
775
 
        block_mutex = buf_page_get_mutex(bpage);
776
 
 
777
 
        mutex_enter(block_mutex);
778
 
 
779
 
        if (!buf_flush_ready_for_flush(bpage, flush_type)) {
780
 
                mutex_exit(block_mutex);
781
 
                buf_pool_mutex_exit();
782
 
                return(0);
783
 
        }
784
 
 
785
 
        switch (flush_type) {
786
 
        case BUF_FLUSH_LIST:
787
 
                buf_page_set_io_fix(bpage, BUF_IO_WRITE);
788
 
 
789
 
                buf_page_set_flush_type(bpage, flush_type);
790
 
 
791
 
                if (buf_pool->n_flush[flush_type] == 0) {
792
 
 
793
 
                        os_event_reset(buf_pool->no_flush[flush_type]);
794
 
                }
795
 
 
796
 
                buf_pool->n_flush[flush_type]++;
797
 
 
798
 
                /* If the simulated aio thread is not running, we must
799
 
                not wait for any latch, as we may end up in a deadlock:
800
 
                if buf_fix_count == 0, then we know we need not wait */
801
 
 
802
 
                locked = bpage->buf_fix_count == 0;
803
 
                if (locked
804
 
                    && buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
805
 
                        rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
806
 
                                           BUF_IO_WRITE);
807
 
                }
808
 
 
809
 
                mutex_exit(block_mutex);
810
 
                buf_pool_mutex_exit();
811
 
 
812
 
                if (!locked) {
813
 
                        buf_flush_buffered_writes();
814
 
 
815
 
                        if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
816
 
                                rw_lock_s_lock_gen(&((buf_block_t*) bpage)
817
 
                                                   ->lock, BUF_IO_WRITE);
818
 
                        }
819
 
                }
820
 
 
821
 
                break;
822
 
 
823
 
        case BUF_FLUSH_LRU:
824
 
                /* VERY IMPORTANT:
825
 
                Because any thread may call the LRU flush, even when owning
826
 
                locks on pages, to avoid deadlocks, we must make sure that the
827
 
                s-lock is acquired on the page without waiting: this is
828
 
                accomplished because in the if-condition above we require
829
 
                the page not to be bufferfixed (in function
830
 
                ..._ready_for_flush). */
831
 
 
832
 
                buf_page_set_io_fix(bpage, BUF_IO_WRITE);
833
 
 
834
 
                buf_page_set_flush_type(bpage, flush_type);
835
 
 
836
 
                if (buf_pool->n_flush[flush_type] == 0) {
837
 
 
838
 
                        os_event_reset(buf_pool->no_flush[flush_type]);
839
 
                }
840
 
 
841
 
                buf_pool->n_flush[flush_type]++;
842
 
 
843
 
                if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
844
 
                        rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
845
 
                                           BUF_IO_WRITE);
846
 
                }
847
 
 
848
 
                /* Note that the s-latch is acquired before releasing the
849
 
                buf_pool mutex: this ensures that the latch is acquired
850
 
                immediately. */
851
 
 
852
 
                mutex_exit(block_mutex);
853
 
                buf_pool_mutex_exit();
854
 
                break;
855
 
 
856
 
        case BUF_FLUSH_SINGLE_PAGE:
857
 
                buf_page_set_io_fix(bpage, BUF_IO_WRITE);
858
 
 
859
 
                buf_page_set_flush_type(bpage, flush_type);
860
 
 
861
 
                if (buf_pool->n_flush[flush_type] == 0) {
862
 
 
863
 
                        os_event_reset(buf_pool->no_flush[flush_type]);
864
 
                }
865
 
 
866
 
                buf_pool->n_flush[flush_type]++;
867
 
 
868
 
                mutex_exit(block_mutex);
869
 
                buf_pool_mutex_exit();
870
 
 
871
 
                if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
872
 
                        rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
873
 
                                           BUF_IO_WRITE);
874
 
                }
875
 
                break;
876
 
 
877
 
        default:
878
 
                ut_error;
879
 
        }
880
 
 
881
 
#ifdef UNIV_DEBUG
882
 
        if (buf_debug_prints) {
883
 
                fprintf(stderr,
884
 
                        "Flushing %u space %u page %u\n",
885
 
                        flush_type, bpage->space, bpage->offset);
886
 
        }
887
 
#endif /* UNIV_DEBUG */
888
 
        buf_flush_write_block_low(bpage);
889
 
 
890
 
        return(1);
891
 
}
892
 
 
893
 
/***************************************************************
894
 
Flushes to disk all flushable pages within the flush area. */
895
 
static
896
 
ulint
897
 
buf_flush_try_neighbors(
898
 
/*====================*/
899
 
                                        /* out: number of pages flushed */
900
 
        ulint           space,          /* in: space id */
901
 
        ulint           offset,         /* in: page offset */
902
 
        enum buf_flush  flush_type)     /* in: BUF_FLUSH_LRU or
903
 
                                        BUF_FLUSH_LIST */
904
 
{
905
 
        buf_page_t*     bpage;
906
 
        ulint           low, high;
907
 
        ulint           count           = 0;
908
 
        ulint           i;
909
 
 
910
 
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
911
 
 
912
 
        if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
913
 
                /* If there is little space, it is better not to flush any
914
 
                block except from the end of the LRU list */
915
 
 
916
 
                low = offset;
917
 
                high = offset + 1;
918
 
        } else {
919
 
                /* When flushed, dirty blocks are searched in neighborhoods of
920
 
                this size, and flushed along with the original page. */
921
 
 
922
 
                ulint   buf_flush_area  = ut_min(BUF_READ_AHEAD_AREA,
923
 
                                                 buf_pool->curr_size / 16);
924
 
 
925
 
                low = (offset / buf_flush_area) * buf_flush_area;
926
 
                high = (offset / buf_flush_area + 1) * buf_flush_area;
927
 
        }
928
 
 
929
 
        /* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
930
 
 
931
 
        if (high > fil_space_get_size(space)) {
932
 
                high = fil_space_get_size(space);
933
 
        }
934
 
 
935
 
        buf_pool_mutex_enter();
936
 
 
937
 
        for (i = low; i < high; i++) {
938
 
 
939
 
                bpage = buf_page_hash_get(space, i);
940
 
                ut_a(!bpage || buf_page_in_file(bpage));
941
 
 
942
 
                if (!bpage) {
943
 
 
944
 
                        continue;
945
 
 
946
 
                } else if (flush_type == BUF_FLUSH_LRU && i != offset
947
 
                           && !buf_page_is_old(bpage)) {
948
 
 
949
 
                        /* We avoid flushing 'non-old' blocks in an LRU flush,
950
 
                        because the flushed blocks are soon freed */
951
 
 
952
 
                        continue;
953
 
                } else {
954
 
 
955
 
                        mutex_t* block_mutex = buf_page_get_mutex(bpage);
956
 
 
957
 
                        mutex_enter(block_mutex);
958
 
 
959
 
                        if (buf_flush_ready_for_flush(bpage, flush_type)
960
 
                            && (i == offset || !bpage->buf_fix_count)) {
961
 
                                /* We only try to flush those
962
 
                                neighbors != offset where the buf fix count is
963
 
                                zero, as we then know that we probably can
964
 
                                latch the page without a semaphore wait.
965
 
                                Semaphore waits are expensive because we must
966
 
                                flush the doublewrite buffer before we start
967
 
                                waiting. */
968
 
 
969
 
                                buf_pool_mutex_exit();
970
 
 
971
 
                                mutex_exit(block_mutex);
972
 
 
973
 
                                /* Note: as we release the buf_pool mutex
974
 
                                above, in buf_flush_try_page we cannot be sure
975
 
                                the page is still in a flushable state:
976
 
                                therefore we check it again inside that
977
 
                                function. */
978
 
 
979
 
                                count += buf_flush_try_page(space, i,
980
 
                                                            flush_type);
981
 
 
982
 
                                buf_pool_mutex_enter();
983
 
                        } else {
984
 
                                mutex_exit(block_mutex);
985
 
                        }
986
 
                }
987
 
        }
988
 
 
989
 
        buf_pool_mutex_exit();
990
 
 
991
 
        return(count);
992
 
}
993
 
 
994
 
/***********************************************************************
995
 
This utility flushes dirty blocks from the end of the LRU list or flush_list.
996
 
NOTE 1: in the case of an LRU flush the calling thread may own latches to
997
 
pages: to avoid deadlocks, this function must be written so that it cannot
998
 
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
999
 
the calling thread is not allowed to own any latches on pages! */
1000
 
UNIV_INTERN
1001
 
ulint
1002
 
buf_flush_batch(
1003
 
/*============*/
1004
 
                                        /* out: number of blocks for which the
1005
 
                                        write request was queued;
1006
 
                                        ULINT_UNDEFINED if there was a flush
1007
 
                                        of the same type already running */
1008
 
        enum buf_flush  flush_type,     /* in: BUF_FLUSH_LRU or
1009
 
                                        BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
1010
 
                                        then the caller must not own any
1011
 
                                        latches on pages */
1012
 
        ulint           min_n,          /* in: wished minimum mumber of blocks
1013
 
                                        flushed (it is not guaranteed that the
1014
 
                                        actual number is that big, though) */
1015
 
        ib_uint64_t     lsn_limit)      /* in the case BUF_FLUSH_LIST all
1016
 
                                        blocks whose oldest_modification is
1017
 
                                        smaller than this should be flushed
1018
 
                                        (if their number does not exceed
1019
 
                                        min_n), otherwise ignored */
1020
 
{
1021
 
        buf_page_t*     bpage;
1022
 
        ulint           page_count      = 0;
1023
 
        ulint           old_page_count;
1024
 
        ulint           space;
1025
 
        ulint           offset;
1026
 
 
1027
 
        ut_ad((flush_type == BUF_FLUSH_LRU)
1028
 
              || (flush_type == BUF_FLUSH_LIST));
1029
 
#ifdef UNIV_SYNC_DEBUG
1030
 
        ut_ad((flush_type != BUF_FLUSH_LIST)
1031
 
              || sync_thread_levels_empty_gen(TRUE));
1032
 
#endif /* UNIV_SYNC_DEBUG */
1033
 
        buf_pool_mutex_enter();
1034
 
 
1035
 
        if ((buf_pool->n_flush[flush_type] > 0)
1036
 
            || (buf_pool->init_flush[flush_type] == TRUE)) {
1037
 
 
1038
 
                /* There is already a flush batch of the same type running */
1039
 
 
1040
 
                buf_pool_mutex_exit();
1041
 
 
1042
 
                return(ULINT_UNDEFINED);
1043
 
        }
1044
 
 
1045
 
        buf_pool->init_flush[flush_type] = TRUE;
1046
 
 
1047
 
        for (;;) {
1048
 
flush_next:
1049
 
                /* If we have flushed enough, leave the loop */
1050
 
                if (page_count >= min_n) {
1051
 
 
1052
 
                        break;
1053
 
                }
1054
 
 
1055
 
                /* Start from the end of the list looking for a suitable
1056
 
                block to be flushed. */
1057
 
 
1058
 
                if (flush_type == BUF_FLUSH_LRU) {
1059
 
                        bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1060
 
                } else {
1061
 
                        ut_ad(flush_type == BUF_FLUSH_LIST);
1062
 
 
1063
 
                        bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1064
 
                        if (!bpage
1065
 
                            || bpage->oldest_modification >= lsn_limit) {
1066
 
                                /* We have flushed enough */
1067
 
 
1068
 
                                break;
1069
 
                        }
1070
 
                        ut_ad(bpage->in_flush_list);
1071
 
                }
1072
 
 
1073
 
                /* Note that after finding a single flushable page, we try to
1074
 
                flush also all its neighbors, and after that start from the
1075
 
                END of the LRU list or flush list again: the list may change
1076
 
                during the flushing and we cannot safely preserve within this
1077
 
                function a pointer to a block in the list! */
1078
 
 
1079
 
                do {
1080
 
                        mutex_t* block_mutex = buf_page_get_mutex(bpage);
1081
 
 
1082
 
                        ut_a(buf_page_in_file(bpage));
1083
 
 
1084
 
                        mutex_enter(block_mutex);
1085
 
 
1086
 
                        if (buf_flush_ready_for_flush(bpage, flush_type)) {
1087
 
 
1088
 
                                space = buf_page_get_space(bpage);
1089
 
                                offset = buf_page_get_page_no(bpage);
1090
 
 
1091
 
                                buf_pool_mutex_exit();
1092
 
                                mutex_exit(block_mutex);
1093
 
 
1094
 
                                old_page_count = page_count;
1095
 
 
1096
 
                                /* Try to flush also all the neighbors */
1097
 
                                page_count += buf_flush_try_neighbors(
1098
 
                                        space, offset, flush_type);
1099
 
                                /* fprintf(stderr,
1100
 
                                "Flush type %lu, page no %lu, neighb %lu\n",
1101
 
                                flush_type, offset,
1102
 
                                page_count - old_page_count); */
1103
 
 
1104
 
                                buf_pool_mutex_enter();
1105
 
                                goto flush_next;
1106
 
 
1107
 
                        } else if (flush_type == BUF_FLUSH_LRU) {
1108
 
 
1109
 
                                mutex_exit(block_mutex);
1110
 
 
1111
 
                                bpage = UT_LIST_GET_PREV(LRU, bpage);
1112
 
                        } else {
1113
 
                                ut_ad(flush_type == BUF_FLUSH_LIST);
1114
 
 
1115
 
                                mutex_exit(block_mutex);
1116
 
 
1117
 
                                bpage = UT_LIST_GET_PREV(list, bpage);
1118
 
                                ut_ad(!bpage || bpage->in_flush_list);
1119
 
                        }
1120
 
                } while (bpage != NULL);
1121
 
 
1122
 
                /* If we could not find anything to flush, leave the loop */
1123
 
 
1124
 
                break;
1125
 
        }
1126
 
 
1127
 
        buf_pool->init_flush[flush_type] = FALSE;
1128
 
 
1129
 
        if ((buf_pool->n_flush[flush_type] == 0)
1130
 
            && (buf_pool->init_flush[flush_type] == FALSE)) {
1131
 
 
1132
 
                /* The running flush batch has ended */
1133
 
 
1134
 
                os_event_set(buf_pool->no_flush[flush_type]);
1135
 
        }
1136
 
 
1137
 
        buf_pool_mutex_exit();
1138
 
 
1139
 
        buf_flush_buffered_writes();
1140
 
 
1141
 
#ifdef UNIV_DEBUG
1142
 
        if (buf_debug_prints && page_count > 0) {
1143
 
                ut_a(flush_type == BUF_FLUSH_LRU
1144
 
                     || flush_type == BUF_FLUSH_LIST);
1145
 
                fprintf(stderr, flush_type == BUF_FLUSH_LRU
1146
 
                        ? "Flushed %lu pages in LRU flush\n"
1147
 
                        : "Flushed %lu pages in flush list flush\n",
1148
 
                        (ulong) page_count);
1149
 
        }
1150
 
#endif /* UNIV_DEBUG */
1151
 
 
1152
 
        srv_buf_pool_flushed += page_count;
1153
 
 
1154
 
        return(page_count);
1155
 
}
1156
 
 
1157
 
/**********************************************************************
1158
 
Waits until a flush batch of the given type ends */
1159
 
UNIV_INTERN
1160
 
void
1161
 
buf_flush_wait_batch_end(
1162
 
/*=====================*/
1163
 
        enum buf_flush  type)   /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1164
 
{
1165
 
        ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
1166
 
 
1167
 
        os_event_wait(buf_pool->no_flush[type]);
1168
 
}
1169
 
 
1170
 
/**********************************************************************
1171
 
Gives a recommendation of how many blocks should be flushed to establish
1172
 
a big enough margin of replaceable blocks near the end of the LRU list
1173
 
and in the free list. */
1174
 
static
1175
 
ulint
1176
 
buf_flush_LRU_recommendation(void)
1177
 
/*==============================*/
1178
 
                        /* out: number of blocks which should be flushed
1179
 
                        from the end of the LRU list */
1180
 
{
1181
 
        buf_page_t*     bpage;
1182
 
        ulint           n_replaceable;
1183
 
        ulint           distance        = 0;
1184
 
 
1185
 
        buf_pool_mutex_enter();
1186
 
 
1187
 
        n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
1188
 
 
1189
 
        bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1190
 
 
1191
 
        while ((bpage != NULL)
1192
 
               && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
1193
 
                   + BUF_FLUSH_EXTRA_MARGIN)
1194
 
               && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
1195
 
 
1196
 
                mutex_t* block_mutex = buf_page_get_mutex(bpage);
1197
 
 
1198
 
                mutex_enter(block_mutex);
1199
 
 
1200
 
                if (buf_flush_ready_for_replace(bpage)) {
1201
 
                        n_replaceable++;
1202
 
                }
1203
 
 
1204
 
                mutex_exit(block_mutex);
1205
 
 
1206
 
                distance++;
1207
 
 
1208
 
                bpage = UT_LIST_GET_PREV(LRU, bpage);
1209
 
        }
1210
 
 
1211
 
        buf_pool_mutex_exit();
1212
 
 
1213
 
        if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
1214
 
 
1215
 
                return(0);
1216
 
        }
1217
 
 
1218
 
        return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN
1219
 
               - n_replaceable);
1220
 
}
1221
 
 
1222
 
/*************************************************************************
1223
 
Flushes pages from the end of the LRU list if there is too small a margin
1224
 
of replaceable pages there or in the free list. VERY IMPORTANT: this function
1225
 
is called also by threads which have locks on pages. To avoid deadlocks, we
1226
 
flush only pages such that the s-lock required for flushing can be acquired
1227
 
immediately, without waiting. */
1228
 
UNIV_INTERN
1229
 
void
1230
 
buf_flush_free_margin(void)
1231
 
/*=======================*/
1232
 
{
1233
 
        ulint   n_to_flush;
1234
 
        ulint   n_flushed;
1235
 
 
1236
 
        n_to_flush = buf_flush_LRU_recommendation();
1237
 
 
1238
 
        if (n_to_flush > 0) {
1239
 
                n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
1240
 
                if (n_flushed == ULINT_UNDEFINED) {
1241
 
                        /* There was an LRU type flush batch already running;
1242
 
                        let us wait for it to end */
1243
 
 
1244
 
                        buf_flush_wait_batch_end(BUF_FLUSH_LRU);
1245
 
                }
1246
 
        }
1247
 
}
1248
 
 
1249
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1250
 
/**********************************************************************
1251
 
Validates the flush list. */
1252
 
static
1253
 
ibool
1254
 
buf_flush_validate_low(void)
1255
 
/*========================*/
1256
 
                /* out: TRUE if ok */
1257
 
{
1258
 
        buf_page_t*     bpage;
1259
 
 
1260
 
        UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list);
1261
 
 
1262
 
        bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1263
 
 
1264
 
        while (bpage != NULL) {
1265
 
                const ib_uint64_t om = bpage->oldest_modification;
1266
 
                ut_ad(bpage->in_flush_list);
1267
 
                ut_a(buf_page_in_file(bpage));
1268
 
                ut_a(om > 0);
1269
 
 
1270
 
                bpage = UT_LIST_GET_NEXT(list, bpage);
1271
 
 
1272
 
                ut_a(!bpage || om >= bpage->oldest_modification);
1273
 
        }
1274
 
 
1275
 
        return(TRUE);
1276
 
}
1277
 
 
1278
 
/**********************************************************************
1279
 
Validates the flush list. */
1280
 
UNIV_INTERN
1281
 
ibool
1282
 
buf_flush_validate(void)
1283
 
/*====================*/
1284
 
                /* out: TRUE if ok */
1285
 
{
1286
 
        ibool   ret;
1287
 
 
1288
 
        buf_pool_mutex_enter();
1289
 
 
1290
 
        ret = buf_flush_validate_low();
1291
 
 
1292
 
        buf_pool_mutex_exit();
1293
 
 
1294
 
        return(ret);
1295
 
}
1296
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */