~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/buf/buf0flu.c

Imported InnoDB plugin with changes.

Show diffs side-by-side

added added

removed removed

Lines of Context:
16
16
#include "ut0byte.h"
17
17
#include "ut0lst.h"
18
18
#include "page0page.h"
 
19
#include "page0zip.h"
19
20
#include "fil0fil.h"
20
21
#include "buf0buf.h"
21
22
#include "buf0lru.h"
26
27
#include "trx0sys.h"
27
28
#include "srv0srv.h"
28
29
 
29
 
/* When flushed, dirty blocks are searched in neighborhoods of this size, and
30
 
flushed along with the original page. */
31
 
 
32
 
#define BUF_FLUSH_AREA          ut_min(BUF_READ_AHEAD_AREA,\
33
 
                buf_pool->curr_size / 16)
34
 
 
 
30
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
35
31
/**********************************************************************
36
32
Validates the flush list. */
37
33
static
39
35
buf_flush_validate_low(void);
40
36
/*========================*/
41
37
                /* out: TRUE if ok */
 
38
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
42
39
 
43
40
/************************************************************************
44
41
Inserts a modified block into the flush list. */
45
 
 
 
42
UNIV_INTERN
46
43
void
47
44
buf_flush_insert_into_flush_list(
48
45
/*=============================*/
49
 
        buf_block_t*    block)  /* in: block which is modified */
 
46
        buf_page_t*     bpage)  /* in: block which is modified */
50
47
{
51
 
        ut_ad(mutex_own(&(buf_pool->mutex)));
52
 
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
53
 
 
 
48
        ut_ad(buf_pool_mutex_own());
54
49
        ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
55
 
              || (ut_dulint_cmp((UT_LIST_GET_FIRST(buf_pool->flush_list))
56
 
                                ->oldest_modification,
57
 
                                block->oldest_modification) <= 0));
58
 
 
59
 
        UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
60
 
 
61
 
        ut_ad(buf_flush_validate_low());
 
50
              || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
 
51
                  <= bpage->oldest_modification));
 
52
 
 
53
        switch (buf_page_get_state(bpage)) {
 
54
        case BUF_BLOCK_ZIP_PAGE:
 
55
                mutex_enter(&buf_pool_zip_mutex);
 
56
                buf_page_set_state(bpage, BUF_BLOCK_ZIP_DIRTY);
 
57
                mutex_exit(&buf_pool_zip_mutex);
 
58
                UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
 
59
                /* fall through */
 
60
        case BUF_BLOCK_ZIP_DIRTY:
 
61
        case BUF_BLOCK_FILE_PAGE:
 
62
                ut_ad(bpage->in_LRU_list);
 
63
                ut_ad(bpage->in_page_hash);
 
64
                ut_ad(!bpage->in_zip_hash);
 
65
                ut_ad(!bpage->in_flush_list);
 
66
                ut_d(bpage->in_flush_list = TRUE);
 
67
                UT_LIST_ADD_FIRST(list, buf_pool->flush_list, bpage);
 
68
                break;
 
69
        case BUF_BLOCK_ZIP_FREE:
 
70
        case BUF_BLOCK_NOT_USED:
 
71
        case BUF_BLOCK_READY_FOR_USE:
 
72
        case BUF_BLOCK_MEMORY:
 
73
        case BUF_BLOCK_REMOVE_HASH:
 
74
                ut_error;
 
75
                return;
 
76
        }
 
77
 
 
78
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
79
        ut_a(buf_flush_validate_low());
 
80
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
62
81
}
63
82
 
64
83
/************************************************************************
65
84
Inserts a modified block into the flush list in the right sorted position.
66
85
This function is used by recovery, because there the modifications do not
67
86
necessarily come in the order of lsn's. */
68
 
 
 
87
UNIV_INTERN
69
88
void
70
89
buf_flush_insert_sorted_into_flush_list(
71
90
/*====================================*/
72
 
        buf_block_t*    block)  /* in: block which is modified */
 
91
        buf_page_t*     bpage)  /* in: block which is modified */
73
92
{
74
 
        buf_block_t*    prev_b;
75
 
        buf_block_t*    b;
76
 
 
77
 
        ut_ad(mutex_own(&(buf_pool->mutex)));
 
93
        buf_page_t*     prev_b;
 
94
        buf_page_t*     b;
 
95
 
 
96
        ut_ad(buf_pool_mutex_own());
 
97
 
 
98
        switch (buf_page_get_state(bpage)) {
 
99
        case BUF_BLOCK_ZIP_PAGE:
 
100
                mutex_enter(&buf_pool_zip_mutex);
 
101
                buf_page_set_state(bpage, BUF_BLOCK_ZIP_DIRTY);
 
102
                mutex_exit(&buf_pool_zip_mutex);
 
103
                UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
 
104
                /* fall through */
 
105
        case BUF_BLOCK_ZIP_DIRTY:
 
106
        case BUF_BLOCK_FILE_PAGE:
 
107
                ut_ad(bpage->in_LRU_list);
 
108
                ut_ad(bpage->in_page_hash);
 
109
                ut_ad(!bpage->in_zip_hash);
 
110
                ut_ad(!bpage->in_flush_list);
 
111
                ut_d(bpage->in_flush_list = TRUE);
 
112
                break;
 
113
        case BUF_BLOCK_ZIP_FREE:
 
114
        case BUF_BLOCK_NOT_USED:
 
115
        case BUF_BLOCK_READY_FOR_USE:
 
116
        case BUF_BLOCK_MEMORY:
 
117
        case BUF_BLOCK_REMOVE_HASH:
 
118
                ut_error;
 
119
                return;
 
120
        }
78
121
 
79
122
        prev_b = NULL;
80
123
        b = UT_LIST_GET_FIRST(buf_pool->flush_list);
81
124
 
82
 
        while (b && (ut_dulint_cmp(b->oldest_modification,
83
 
                                   block->oldest_modification) > 0)) {
 
125
        while (b && b->oldest_modification > bpage->oldest_modification) {
 
126
                ut_ad(b->in_flush_list);
84
127
                prev_b = b;
85
 
                b = UT_LIST_GET_NEXT(flush_list, b);
 
128
                b = UT_LIST_GET_NEXT(list, b);
86
129
        }
87
130
 
88
131
        if (prev_b == NULL) {
89
 
                UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
 
132
                UT_LIST_ADD_FIRST(list, buf_pool->flush_list, bpage);
90
133
        } else {
91
 
                UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list, prev_b,
92
 
                                     block);
 
134
                UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
 
135
                                     prev_b, bpage);
93
136
        }
94
137
 
95
 
        ut_ad(buf_flush_validate_low());
 
138
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
139
        ut_a(buf_flush_validate_low());
 
140
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
96
141
}
97
142
 
98
143
/************************************************************************
99
144
Returns TRUE if the file page block is immediately suitable for replacement,
100
145
i.e., the transition FILE_PAGE => NOT_USED allowed. */
101
 
 
 
146
UNIV_INTERN
102
147
ibool
103
148
buf_flush_ready_for_replace(
104
149
/*========================*/
105
150
                                /* out: TRUE if can replace immediately */
106
 
        buf_block_t*    block)  /* in: buffer control block, must be in state
107
 
                                BUF_BLOCK_FILE_PAGE and in the LRU list */
 
151
        buf_page_t*     bpage)  /* in: buffer control block, must be
 
152
                                buf_page_in_file(bpage) and in the LRU list */
108
153
{
109
 
        ut_ad(mutex_own(&(buf_pool->mutex)));
110
 
        ut_ad(mutex_own(&block->mutex));
111
 
        if (block->state != BUF_BLOCK_FILE_PAGE) {
112
 
                ut_print_timestamp(stderr);
113
 
                fprintf(stderr,
114
 
                        "  InnoDB: Error: buffer block state %lu"
115
 
                        " in the LRU list!\n",
116
 
                        (ulong)block->state);
117
 
                ut_print_buf(stderr, block, sizeof(buf_block_t));
118
 
 
119
 
                return(FALSE);
120
 
        }
121
 
 
122
 
        if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
123
 
            || (block->buf_fix_count != 0)
124
 
            || (block->io_fix != 0)) {
125
 
 
126
 
                return(FALSE);
127
 
        }
128
 
 
129
 
        return(TRUE);
 
154
        ut_ad(buf_pool_mutex_own());
 
155
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 
156
        ut_ad(bpage->in_LRU_list);
 
157
 
 
158
        if (UNIV_LIKELY(buf_page_in_file(bpage))) {
 
159
 
 
160
                return(bpage->oldest_modification == 0
 
161
                       && buf_page_get_io_fix(bpage) == BUF_IO_NONE
 
162
                       && bpage->buf_fix_count == 0);
 
163
        }
 
164
 
 
165
        ut_print_timestamp(stderr);
 
166
        fprintf(stderr,
 
167
                "  InnoDB: Error: buffer block state %lu"
 
168
                " in the LRU list!\n",
 
169
                (ulong) buf_page_get_state(bpage));
 
170
        ut_print_buf(stderr, bpage, sizeof(buf_page_t));
 
171
 
 
172
        return(FALSE);
130
173
}
131
174
 
132
175
/************************************************************************
136
179
buf_flush_ready_for_flush(
137
180
/*======================*/
138
181
                                /* out: TRUE if can flush immediately */
139
 
        buf_block_t*    block,  /* in: buffer control block, must be in state
140
 
                                BUF_BLOCK_FILE_PAGE */
141
 
        ulint           flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
 
182
        buf_page_t*     bpage,  /* in: buffer control block, must be
 
183
                                buf_page_in_file(bpage) */
 
184
        enum buf_flush  flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
142
185
{
143
 
        ut_ad(mutex_own(&(buf_pool->mutex)));
144
 
        ut_ad(mutex_own(&(block->mutex)));
145
 
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
146
 
 
147
 
        if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
148
 
            && (block->io_fix == 0)) {
 
186
        ut_a(buf_page_in_file(bpage));
 
187
        ut_ad(buf_pool_mutex_own());
 
188
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 
189
 
 
190
        if (bpage->oldest_modification != 0
 
191
            && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
 
192
                ut_ad(bpage->in_flush_list);
 
193
 
149
194
                if (flush_type != BUF_FLUSH_LRU) {
150
195
 
151
196
                        return(TRUE);
152
197
 
153
 
                } else if (block->buf_fix_count == 0) {
 
198
                } else if (bpage->buf_fix_count == 0) {
154
199
 
155
200
                        /* If we are flushing the LRU list, to avoid deadlocks
156
201
                        we require the block not to be bufferfixed, and hence
164
209
}
165
210
 
166
211
/************************************************************************
 
212
Remove a block from the flush list of modified blocks. */
 
213
UNIV_INTERN
 
214
void
 
215
buf_flush_remove(
 
216
/*=============*/
 
217
        buf_page_t*     bpage)  /* in: pointer to the block in question */
 
218
{
 
219
        ut_ad(buf_pool_mutex_own());
 
220
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 
221
        ut_ad(bpage->in_flush_list);
 
222
        ut_d(bpage->in_flush_list = FALSE);
 
223
 
 
224
        switch (buf_page_get_state(bpage)) {
 
225
        case BUF_BLOCK_ZIP_PAGE:
 
226
                /* clean compressed pages should not be on the flush list */
 
227
        case BUF_BLOCK_ZIP_FREE:
 
228
        case BUF_BLOCK_NOT_USED:
 
229
        case BUF_BLOCK_READY_FOR_USE:
 
230
        case BUF_BLOCK_MEMORY:
 
231
        case BUF_BLOCK_REMOVE_HASH:
 
232
                ut_error;
 
233
                return;
 
234
        case BUF_BLOCK_ZIP_DIRTY:
 
235
                buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
 
236
                UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
 
237
                buf_LRU_insert_zip_clean(bpage);
 
238
                break;
 
239
        case BUF_BLOCK_FILE_PAGE:
 
240
                UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
 
241
                break;
 
242
        }
 
243
 
 
244
        bpage->oldest_modification = 0;
 
245
 
 
246
        ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list));
 
247
}
 
248
 
 
249
/************************************************************************
167
250
Updates the flush system data structures when a write is completed. */
168
 
 
 
251
UNIV_INTERN
169
252
void
170
253
buf_flush_write_complete(
171
254
/*=====================*/
172
 
        buf_block_t*    block)  /* in: pointer to the block in question */
 
255
        buf_page_t*     bpage)  /* in: pointer to the block in question */
173
256
{
174
 
        ut_ad(block);
175
 
#ifdef UNIV_SYNC_DEBUG
176
 
        ut_ad(mutex_own(&(buf_pool->mutex)));
177
 
#endif /* UNIV_SYNC_DEBUG */
178
 
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
179
 
 
180
 
        block->oldest_modification = ut_dulint_zero;
181
 
 
182
 
        UT_LIST_REMOVE(flush_list, buf_pool->flush_list, block);
183
 
 
184
 
        ut_d(UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list));
185
 
 
186
 
        (buf_pool->n_flush[block->flush_type])--;
187
 
 
188
 
        if (block->flush_type == BUF_FLUSH_LRU) {
 
257
        enum buf_flush  flush_type;
 
258
 
 
259
        ut_ad(bpage);
 
260
 
 
261
        buf_flush_remove(bpage);
 
262
 
 
263
        flush_type = buf_page_get_flush_type(bpage);
 
264
        buf_pool->n_flush[flush_type]--;
 
265
 
 
266
        if (flush_type == BUF_FLUSH_LRU) {
189
267
                /* Put the block to the end of the LRU list to wait to be
190
268
                moved to the free list */
191
269
 
192
 
                buf_LRU_make_block_old(block);
 
270
                buf_LRU_make_block_old(bpage);
193
271
 
194
272
                buf_pool->LRU_flush_ended++;
195
273
        }
196
274
 
197
275
        /* fprintf(stderr, "n pending flush %lu\n",
198
 
        buf_pool->n_flush[block->flush_type]); */
 
276
        buf_pool->n_flush[flush_type]); */
199
277
 
200
 
        if ((buf_pool->n_flush[block->flush_type] == 0)
201
 
            && (buf_pool->init_flush[block->flush_type] == FALSE)) {
 
278
        if ((buf_pool->n_flush[flush_type] == 0)
 
279
            && (buf_pool->init_flush[flush_type] == FALSE)) {
202
280
 
203
281
                /* The running flush batch has ended */
204
282
 
205
 
                os_event_set(buf_pool->no_flush[block->flush_type]);
 
283
                os_event_set(buf_pool->no_flush[flush_type]);
206
284
        }
207
285
}
208
286
 
217
295
buf_flush_buffered_writes(void)
218
296
/*===========================*/
219
297
{
220
 
        buf_block_t*    block;
221
298
        byte*           write_buf;
222
299
        ulint           len;
223
300
        ulint           len2;
244
321
 
245
322
        for (i = 0; i < trx_doublewrite->first_free; i++) {
246
323
 
247
 
                block = trx_doublewrite->buf_block_arr[i];
248
 
                ut_a(block->state == BUF_BLOCK_FILE_PAGE);
249
 
 
250
 
                if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
251
 
                    != mach_read_from_4(block->frame + UNIV_PAGE_SIZE
252
 
                                        - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
 
324
                const buf_block_t*      block;
 
325
 
 
326
                block = (buf_block_t*) trx_doublewrite->buf_block_arr[i];
 
327
 
 
328
                if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
 
329
                    || block->page.zip.data) {
 
330
                        /* No simple validate for compressed pages exists. */
 
331
                        continue;
 
332
                }
 
333
 
 
334
                if (UNIV_UNLIKELY
 
335
                    (memcmp(block->frame + (FIL_PAGE_LSN + 4),
 
336
                            block->frame + (UNIV_PAGE_SIZE
 
337
                                            - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
 
338
                            4))) {
253
339
                        ut_print_timestamp(stderr);
254
340
                        fprintf(stderr,
255
341
                                "  InnoDB: ERROR: The page to be written"
260
346
                                " doublewrite buffer.\n");
261
347
                }
262
348
 
263
 
                if (block->check_index_page_at_flush
264
 
                    && !page_simple_validate(block->frame)) {
265
 
 
266
 
                        buf_page_print(block->frame);
267
 
 
268
 
                        ut_print_timestamp(stderr);
269
 
                        fprintf(stderr,
270
 
                                "  InnoDB: Apparent corruption of an"
271
 
                                " index page n:o %lu in space %lu\n"
272
 
                                "InnoDB: to be written to data file."
273
 
                                " We intentionally crash server\n"
274
 
                                "InnoDB: to prevent corrupt data"
275
 
                                " from ending up in data\n"
276
 
                                "InnoDB: files.\n",
277
 
                                (ulong) block->offset, (ulong) block->space);
278
 
 
279
 
                        ut_error;
 
349
                if (!block->check_index_page_at_flush) {
 
350
                } else if (page_is_comp(block->frame)) {
 
351
                        if (UNIV_UNLIKELY
 
352
                            (!page_simple_validate_new(block->frame))) {
 
353
corrupted_page:
 
354
                                buf_page_print(block->frame, 0);
 
355
 
 
356
                                ut_print_timestamp(stderr);
 
357
                                fprintf(stderr,
 
358
                                        "  InnoDB: Apparent corruption of an"
 
359
                                        " index page n:o %lu in space %lu\n"
 
360
                                        "InnoDB: to be written to data file."
 
361
                                        " We intentionally crash server\n"
 
362
                                        "InnoDB: to prevent corrupt data"
 
363
                                        " from ending up in data\n"
 
364
                                        "InnoDB: files.\n",
 
365
                                        (ulong) buf_block_get_page_no(block),
 
366
                                        (ulong) buf_block_get_space(block));
 
367
 
 
368
                                ut_error;
 
369
                        }
 
370
                } else if (UNIV_UNLIKELY
 
371
                           (!page_simple_validate_old(block->frame))) {
 
372
 
 
373
                        goto corrupted_page;
280
374
                }
281
375
        }
282
376
 
284
378
        srv_dblwr_pages_written+= trx_doublewrite->first_free;
285
379
        srv_dblwr_writes++;
286
380
 
287
 
        if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
288
 
                len = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
289
 
        } else {
290
 
                len = trx_doublewrite->first_free * UNIV_PAGE_SIZE;
291
 
        }
292
 
 
293
 
        fil_io(OS_FILE_WRITE,
294
 
               TRUE, TRX_SYS_SPACE,
 
381
        len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
 
382
                     trx_doublewrite->first_free) * UNIV_PAGE_SIZE;
 
383
 
 
384
        write_buf = trx_doublewrite->write_buf;
 
385
        i = 0;
 
386
 
 
387
        fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
295
388
               trx_doublewrite->block1, 0, len,
296
 
               (void*)trx_doublewrite->write_buf, NULL);
297
 
 
298
 
        write_buf = trx_doublewrite->write_buf;
299
 
 
300
 
        for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len; len2 += UNIV_PAGE_SIZE) {
301
 
                if (mach_read_from_4(write_buf + len2 + FIL_PAGE_LSN + 4)
302
 
                    != mach_read_from_4(write_buf + len2 + UNIV_PAGE_SIZE
303
 
                                        - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
 
389
               (void*) write_buf, NULL);
 
390
 
 
391
        for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
 
392
             len2 += UNIV_PAGE_SIZE, i++) {
 
393
                const buf_block_t* block = (buf_block_t*)
 
394
                        trx_doublewrite->buf_block_arr[i];
 
395
 
 
396
                if (UNIV_LIKELY(!block->page.zip.data)
 
397
                    && UNIV_LIKELY(buf_block_get_state(block)
 
398
                                   == BUF_BLOCK_FILE_PAGE)
 
399
                    && UNIV_UNLIKELY
 
400
                    (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
 
401
                            write_buf + len2
 
402
                            + (UNIV_PAGE_SIZE
 
403
                               - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
304
404
                        ut_print_timestamp(stderr);
305
405
                        fprintf(stderr,
306
406
                                "  InnoDB: ERROR: The page to be written"
310
410
                }
311
411
        }
312
412
 
313
 
        if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
314
 
                len = (trx_doublewrite->first_free
315
 
                       - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE;
316
 
 
317
 
                fil_io(OS_FILE_WRITE,
318
 
                       TRUE, TRX_SYS_SPACE,
319
 
                       trx_doublewrite->block2, 0, len,
320
 
                       (void*)(trx_doublewrite->write_buf
321
 
                               + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
322
 
                               * UNIV_PAGE_SIZE),
323
 
                       NULL);
324
 
 
325
 
                write_buf = trx_doublewrite->write_buf
326
 
                        + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
327
 
                for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
328
 
                     len2 += UNIV_PAGE_SIZE) {
329
 
                        if (mach_read_from_4(write_buf + len2
330
 
                                             + FIL_PAGE_LSN + 4)
331
 
                            != mach_read_from_4(write_buf + len2
332
 
                                                + UNIV_PAGE_SIZE
333
 
                                                - FIL_PAGE_END_LSN_OLD_CHKSUM
334
 
                                                + 4)) {
335
 
                                ut_print_timestamp(stderr);
336
 
                                fprintf(stderr,
337
 
                                        "  InnoDB: ERROR: The page to be"
338
 
                                        " written seems corrupt!\n"
339
 
                                        "InnoDB: The lsn fields do not match!"
340
 
                                        " Noticed in"
341
 
                                        " the doublewrite block2.\n");
342
 
                        }
 
413
        if (trx_doublewrite->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 
414
                goto flush;
 
415
        }
 
416
 
 
417
        len = (trx_doublewrite->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
 
418
                * UNIV_PAGE_SIZE;
 
419
 
 
420
        write_buf = trx_doublewrite->write_buf
 
421
                + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
 
422
        ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE);
 
423
 
 
424
        fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
 
425
               trx_doublewrite->block2, 0, len,
 
426
               (void*) write_buf, NULL);
 
427
 
 
428
        for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
 
429
             len2 += UNIV_PAGE_SIZE, i++) {
 
430
                const buf_block_t* block = (buf_block_t*)
 
431
                        trx_doublewrite->buf_block_arr[i];
 
432
 
 
433
                if (UNIV_LIKELY(!block->page.zip.data)
 
434
                    && UNIV_LIKELY(buf_block_get_state(block)
 
435
                                   == BUF_BLOCK_FILE_PAGE)
 
436
                    && UNIV_UNLIKELY
 
437
                    (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
 
438
                            write_buf + len2
 
439
                            + (UNIV_PAGE_SIZE
 
440
                               - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
 
441
                        ut_print_timestamp(stderr);
 
442
                        fprintf(stderr,
 
443
                                "  InnoDB: ERROR: The page to be"
 
444
                                " written seems corrupt!\n"
 
445
                                "InnoDB: The lsn fields do not match!"
 
446
                                " Noticed in"
 
447
                                " the doublewrite block2.\n");
343
448
                }
344
449
        }
345
450
 
 
451
flush:
346
452
        /* Now flush the doublewrite buffer data to disk */
347
453
 
348
454
        fil_flush(TRX_SYS_SPACE);
352
458
        blocks. Next do the writes to the intended positions. */
353
459
 
354
460
        for (i = 0; i < trx_doublewrite->first_free; i++) {
355
 
                block = trx_doublewrite->buf_block_arr[i];
356
 
 
357
 
                if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
358
 
                    != mach_read_from_4(block->frame + UNIV_PAGE_SIZE
359
 
                                        - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
 
461
                const buf_block_t* block = (buf_block_t*)
 
462
                        trx_doublewrite->buf_block_arr[i];
 
463
 
 
464
                ut_a(buf_page_in_file(&block->page));
 
465
                if (UNIV_LIKELY_NULL(block->page.zip.data)) {
 
466
                        fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
 
467
                               FALSE, buf_page_get_space(&block->page),
 
468
                               buf_page_get_zip_size(&block->page),
 
469
                               buf_page_get_page_no(&block->page), 0,
 
470
                               buf_page_get_zip_size(&block->page),
 
471
                               (void*)block->page.zip.data,
 
472
                               (void*)block);
 
473
 
 
474
                        /* Increment the counter of I/O operations used
 
475
                        for selecting LRU policy. */
 
476
                        buf_LRU_stat_inc_io();
 
477
 
 
478
                        continue;
 
479
                }
 
480
 
 
481
                ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
482
 
 
483
                if (UNIV_UNLIKELY(memcmp(block->frame + (FIL_PAGE_LSN + 4),
 
484
                                         block->frame
 
485
                                         + (UNIV_PAGE_SIZE
 
486
                                            - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
 
487
                                         4))) {
360
488
                        ut_print_timestamp(stderr);
361
489
                        fprintf(stderr,
362
490
                                "  InnoDB: ERROR: The page to be written"
367
495
                                " the doublewrite buffer.\n"
368
496
                                "InnoDB: Page buf fix count %lu,"
369
497
                                " io fix %lu, state %lu\n",
370
 
                                (ulong)block->buf_fix_count,
371
 
                                (ulong)block->io_fix,
372
 
                                (ulong)block->state);
 
498
                                (ulong)block->page.buf_fix_count,
 
499
                                (ulong)buf_block_get_io_fix(block),
 
500
                                (ulong)buf_block_get_state(block));
373
501
                }
374
 
                ut_a(block->state == BUF_BLOCK_FILE_PAGE);
375
502
 
376
503
                fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
377
 
                       FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
 
504
                       FALSE, buf_block_get_space(block), 0,
 
505
                       buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
378
506
                       (void*)block->frame, (void*)block);
 
507
 
 
508
                /* Increment the counter of I/O operations used
 
509
                for selecting LRU policy. */
 
510
                buf_LRU_stat_inc_io();
379
511
        }
380
512
 
381
513
        /* Wake possible simulated aio thread to actually post the
407
539
void
408
540
buf_flush_post_to_doublewrite_buf(
409
541
/*==============================*/
410
 
        buf_block_t*    block)  /* in: buffer block to write */
 
542
        buf_page_t*     bpage)  /* in: buffer block to write */
411
543
{
 
544
        ulint   zip_size;
412
545
try_again:
413
546
        mutex_enter(&(trx_doublewrite->mutex));
414
547
 
415
 
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
548
        ut_a(buf_page_in_file(bpage));
416
549
 
417
550
        if (trx_doublewrite->first_free
418
551
            >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
423
556
                goto try_again;
424
557
        }
425
558
 
426
 
        ut_memcpy(trx_doublewrite->write_buf
427
 
                  + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
428
 
                  block->frame, UNIV_PAGE_SIZE);
429
 
 
430
 
        trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = block;
 
559
        zip_size = buf_page_get_zip_size(bpage);
 
560
 
 
561
        if (UNIV_UNLIKELY(zip_size)) {
 
562
                /* Copy the compressed page and clear the rest. */
 
563
                memcpy(trx_doublewrite->write_buf
 
564
                       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
 
565
                       bpage->zip.data, zip_size);
 
566
                memset(trx_doublewrite->write_buf
 
567
                       + UNIV_PAGE_SIZE * trx_doublewrite->first_free
 
568
                       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
 
569
        } else {
 
570
                ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
 
571
 
 
572
                memcpy(trx_doublewrite->write_buf
 
573
                       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
 
574
                       ((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE);
 
575
        }
 
576
 
 
577
        trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = bpage;
431
578
 
432
579
        trx_doublewrite->first_free++;
433
580
 
445
592
 
446
593
/************************************************************************
447
594
Initializes a page for writing to the tablespace. */
448
 
 
 
595
UNIV_INTERN
449
596
void
450
597
buf_flush_init_for_writing(
451
598
/*=======================*/
452
 
        byte*   page,           /* in: page */
453
 
        dulint  newest_lsn,     /* in: newest modification lsn to the page */
454
 
        ulint   space,          /* in: space id */
455
 
        ulint   page_no)        /* in: page number */
 
599
        byte*           page,           /* in/out: page */
 
600
        void*           page_zip_,      /* in/out: compressed page, or NULL */
 
601
        ib_uint64_t     newest_lsn)     /* in: newest modification lsn
 
602
                                        to the page */
456
603
{
 
604
        ut_ad(page);
 
605
 
 
606
        if (page_zip_) {
 
607
                page_zip_des_t* page_zip = page_zip_;
 
608
                ulint           zip_size = page_zip_get_size(page_zip);
 
609
                ut_ad(zip_size);
 
610
                ut_ad(ut_is_2pow(zip_size));
 
611
                ut_ad(zip_size <= UNIV_PAGE_SIZE);
 
612
 
 
613
                switch (UNIV_EXPECT(fil_page_get_type(page), FIL_PAGE_INDEX)) {
 
614
                case FIL_PAGE_TYPE_ALLOCATED:
 
615
                case FIL_PAGE_INODE:
 
616
                case FIL_PAGE_IBUF_BITMAP:
 
617
                case FIL_PAGE_TYPE_FSP_HDR:
 
618
                case FIL_PAGE_TYPE_XDES:
 
619
                        /* These are essentially uncompressed pages. */
 
620
                        memcpy(page_zip->data, page, zip_size);
 
621
                        /* fall through */
 
622
                case FIL_PAGE_TYPE_ZBLOB:
 
623
                case FIL_PAGE_TYPE_ZBLOB2:
 
624
                case FIL_PAGE_INDEX:
 
625
                        mach_write_ull(page_zip->data
 
626
                                       + FIL_PAGE_LSN, newest_lsn);
 
627
                        memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
 
628
                        mach_write_to_4(page_zip->data
 
629
                                        + FIL_PAGE_SPACE_OR_CHKSUM,
 
630
                                        srv_use_checksums
 
631
                                        ? page_zip_calc_checksum(
 
632
                                                page_zip->data, zip_size)
 
633
                                        : BUF_NO_CHECKSUM_MAGIC);
 
634
                        return;
 
635
                }
 
636
 
 
637
                ut_error;
 
638
        }
 
639
 
457
640
        /* Write the newest modification lsn to the page header and trailer */
458
 
        mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
459
 
 
460
 
        mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
461
 
                        newest_lsn);
462
 
        /* Write the page number and the space id */
463
 
 
464
 
        mach_write_to_4(page + FIL_PAGE_OFFSET, page_no);
465
 
        mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space);
 
641
        mach_write_ull(page + FIL_PAGE_LSN, newest_lsn);
 
642
 
 
643
        mach_write_ull(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
 
644
                       newest_lsn);
466
645
 
467
646
        /* Store the new formula checksum */
468
647
 
490
669
void
491
670
buf_flush_write_block_low(
492
671
/*======================*/
493
 
        buf_block_t*    block)  /* in: buffer block to write */
 
672
        buf_page_t*     bpage)  /* in: buffer block to write */
494
673
{
 
674
        ulint   zip_size        = buf_page_get_zip_size(bpage);
 
675
        page_t* frame           = NULL;
495
676
#ifdef UNIV_LOG_DEBUG
496
677
        static ibool univ_log_debug_warned;
497
678
#endif /* UNIV_LOG_DEBUG */
498
 
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
499
 
 
500
 
#ifdef UNIV_IBUF_DEBUG
501
 
        ut_a(ibuf_count_get(block->space, block->offset) == 0);
 
679
 
 
680
        ut_ad(buf_page_in_file(bpage));
 
681
 
 
682
#ifdef UNIV_IBUF_COUNT_DEBUG
 
683
        ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
502
684
#endif
503
 
        ut_ad(!ut_dulint_is_zero(block->newest_modification));
 
685
        ut_ad(bpage->newest_modification != 0);
504
686
 
505
687
#ifdef UNIV_LOG_DEBUG
506
688
        if (!univ_log_debug_warned) {
512
694
        }
513
695
#else
514
696
        /* Force the log to the disk before writing the modified block */
515
 
        log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
 
697
        log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
516
698
#endif
517
 
        buf_flush_init_for_writing(block->frame, block->newest_modification,
518
 
                                   block->space, block->offset);
 
699
        switch (buf_page_get_state(bpage)) {
 
700
        case BUF_BLOCK_ZIP_FREE:
 
701
        case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */
 
702
        case BUF_BLOCK_NOT_USED:
 
703
        case BUF_BLOCK_READY_FOR_USE:
 
704
        case BUF_BLOCK_MEMORY:
 
705
        case BUF_BLOCK_REMOVE_HASH:
 
706
                ut_error;
 
707
                break;
 
708
        case BUF_BLOCK_ZIP_DIRTY:
 
709
                frame = bpage->zip.data;
 
710
                if (UNIV_LIKELY(srv_use_checksums)) {
 
711
                        ut_a(mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
 
712
                             == page_zip_calc_checksum(frame, zip_size));
 
713
                }
 
714
                mach_write_ull(frame + FIL_PAGE_LSN,
 
715
                               bpage->newest_modification);
 
716
                memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
 
717
                break;
 
718
        case BUF_BLOCK_FILE_PAGE:
 
719
                frame = bpage->zip.data;
 
720
                if (!frame) {
 
721
                        frame = ((buf_block_t*) bpage)->frame;
 
722
                }
 
723
 
 
724
                buf_flush_init_for_writing(((buf_block_t*) bpage)->frame,
 
725
                                           bpage->zip.data
 
726
                                           ? &bpage->zip : NULL,
 
727
                                           bpage->newest_modification);
 
728
                break;
 
729
        }
 
730
 
519
731
        if (!srv_use_doublewrite_buf || !trx_doublewrite) {
520
732
                fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
521
 
                       FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
522
 
                       (void*)block->frame, (void*)block);
 
733
                       FALSE, buf_page_get_space(bpage), zip_size,
 
734
                       buf_page_get_page_no(bpage), 0,
 
735
                       zip_size ? zip_size : UNIV_PAGE_SIZE,
 
736
                       frame, bpage);
523
737
        } else {
524
 
                buf_flush_post_to_doublewrite_buf(block);
 
738
                buf_flush_post_to_doublewrite_buf(bpage);
525
739
        }
526
740
}
527
741
 
534
748
ulint
535
749
buf_flush_try_page(
536
750
/*===============*/
537
 
                                /* out: 1 if a page was flushed, 0 otherwise */
538
 
        ulint   space,          /* in: space id */
539
 
        ulint   offset,         /* in: page offset */
540
 
        ulint   flush_type)     /* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST, or
541
 
                                BUF_FLUSH_SINGLE_PAGE */
 
751
                                        /* out: 1 if a page was
 
752
                                        flushed, 0 otherwise */
 
753
        ulint           space,          /* in: space id */
 
754
        ulint           offset,         /* in: page offset */
 
755
        enum buf_flush  flush_type)     /* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST,
 
756
                                        or BUF_FLUSH_SINGLE_PAGE */
542
757
{
543
 
        buf_block_t*    block;
 
758
        buf_page_t*     bpage;
 
759
        mutex_t*        block_mutex;
544
760
        ibool           locked;
545
761
 
546
762
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
547
763
              || flush_type == BUF_FLUSH_SINGLE_PAGE);
548
764
 
549
 
        mutex_enter(&(buf_pool->mutex));
550
 
 
551
 
        block = buf_page_hash_get(space, offset);
552
 
 
553
 
        ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
554
 
 
555
 
        if (!block) {
556
 
                mutex_exit(&(buf_pool->mutex));
557
 
                return(0);
558
 
        }
559
 
 
560
 
        mutex_enter(&block->mutex);
561
 
 
562
 
        if (flush_type == BUF_FLUSH_LIST
563
 
            && buf_flush_ready_for_flush(block, flush_type)) {
564
 
 
565
 
                block->io_fix = BUF_IO_WRITE;
566
 
 
567
 
                /* If AWE is enabled and the page is not mapped to a frame,
568
 
                then map it */
569
 
 
570
 
                if (block->frame == NULL) {
571
 
                        ut_a(srv_use_awe);
572
 
 
573
 
                        /* We set second parameter TRUE because the block is
574
 
                        in the LRU list and we must put it to
575
 
                        awe_LRU_free_mapped list once mapped to a frame */
576
 
 
577
 
                        buf_awe_map_page_to_frame(block, TRUE);
578
 
                }
579
 
 
580
 
                block->flush_type = flush_type;
 
765
        buf_pool_mutex_enter();
 
766
 
 
767
        bpage = buf_page_hash_get(space, offset);
 
768
 
 
769
        if (!bpage) {
 
770
                buf_pool_mutex_exit();
 
771
                return(0);
 
772
        }
 
773
 
 
774
        ut_a(buf_page_in_file(bpage));
 
775
        block_mutex = buf_page_get_mutex(bpage);
 
776
 
 
777
        mutex_enter(block_mutex);
 
778
 
 
779
        if (!buf_flush_ready_for_flush(bpage, flush_type)) {
 
780
                mutex_exit(block_mutex);
 
781
                buf_pool_mutex_exit();
 
782
                return(0);
 
783
        }
 
784
 
 
785
        switch (flush_type) {
 
786
        case BUF_FLUSH_LIST:
 
787
                buf_page_set_io_fix(bpage, BUF_IO_WRITE);
 
788
 
 
789
                buf_page_set_flush_type(bpage, flush_type);
581
790
 
582
791
                if (buf_pool->n_flush[flush_type] == 0) {
583
792
 
584
793
                        os_event_reset(buf_pool->no_flush[flush_type]);
585
794
                }
586
795
 
587
 
                (buf_pool->n_flush[flush_type])++;
588
 
 
589
 
                locked = FALSE;
 
796
                buf_pool->n_flush[flush_type]++;
590
797
 
591
798
                /* If the simulated aio thread is not running, we must
592
799
                not wait for any latch, as we may end up in a deadlock:
593
800
                if buf_fix_count == 0, then we know we need not wait */
594
801
 
595
 
                if (block->buf_fix_count == 0) {
596
 
                        rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
597
 
 
598
 
                        locked = TRUE;
 
802
                locked = bpage->buf_fix_count == 0;
 
803
                if (locked
 
804
                    && buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
 
805
                        rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
 
806
                                           BUF_IO_WRITE);
599
807
                }
600
808
 
601
 
                mutex_exit(&block->mutex);
602
 
                mutex_exit(&(buf_pool->mutex));
 
809
                mutex_exit(block_mutex);
 
810
                buf_pool_mutex_exit();
603
811
 
604
812
                if (!locked) {
605
813
                        buf_flush_buffered_writes();
606
814
 
607
 
                        rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
608
 
                }
609
 
 
610
 
#ifdef UNIV_DEBUG
611
 
                if (buf_debug_prints) {
612
 
                        fprintf(stderr,
613
 
                                "Flushing page space %lu, page no %lu \n",
614
 
                                (ulong) block->space, (ulong) block->offset);
615
 
                }
616
 
#endif /* UNIV_DEBUG */
617
 
 
618
 
                buf_flush_write_block_low(block);
619
 
 
620
 
                return(1);
621
 
 
622
 
        } else if (flush_type == BUF_FLUSH_LRU
623
 
                   && buf_flush_ready_for_flush(block, flush_type)) {
624
 
 
 
815
                        if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
 
816
                                rw_lock_s_lock_gen(&((buf_block_t*) bpage)
 
817
                                                   ->lock, BUF_IO_WRITE);
 
818
                        }
 
819
                }
 
820
 
 
821
                break;
 
822
 
 
823
        case BUF_FLUSH_LRU:
625
824
                /* VERY IMPORTANT:
626
825
                Because any thread may call the LRU flush, even when owning
627
826
                locks on pages, to avoid deadlocks, we must make sure that the
630
829
                the page not to be bufferfixed (in function
631
830
                ..._ready_for_flush). */
632
831
 
633
 
                block->io_fix = BUF_IO_WRITE;
634
 
 
635
 
                /* If AWE is enabled and the page is not mapped to a frame,
636
 
                then map it */
637
 
 
638
 
                if (block->frame == NULL) {
639
 
                        ut_a(srv_use_awe);
640
 
 
641
 
                        /* We set second parameter TRUE because the block is
642
 
                        in the LRU list and we must put it to
643
 
                        awe_LRU_free_mapped list once mapped to a frame */
644
 
 
645
 
                        buf_awe_map_page_to_frame(block, TRUE);
646
 
                }
647
 
 
648
 
                block->flush_type = flush_type;
 
832
                buf_page_set_io_fix(bpage, BUF_IO_WRITE);
 
833
 
 
834
                buf_page_set_flush_type(bpage, flush_type);
649
835
 
650
836
                if (buf_pool->n_flush[flush_type] == 0) {
651
837
 
652
838
                        os_event_reset(buf_pool->no_flush[flush_type]);
653
839
                }
654
840
 
655
 
                (buf_pool->n_flush[flush_type])++;
 
841
                buf_pool->n_flush[flush_type]++;
656
842
 
657
 
                rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
 
843
                if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
 
844
                        rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
 
845
                                           BUF_IO_WRITE);
 
846
                }
658
847
 
659
848
                /* Note that the s-latch is acquired before releasing the
660
849
                buf_pool mutex: this ensures that the latch is acquired
661
850
                immediately. */
662
851
 
663
 
                mutex_exit(&block->mutex);
664
 
                mutex_exit(&(buf_pool->mutex));
665
 
 
666
 
                buf_flush_write_block_low(block);
667
 
 
668
 
                return(1);
669
 
 
670
 
        } else if (flush_type == BUF_FLUSH_SINGLE_PAGE
671
 
                   && buf_flush_ready_for_flush(block, flush_type)) {
672
 
 
673
 
                block->io_fix = BUF_IO_WRITE;
674
 
 
675
 
                /* If AWE is enabled and the page is not mapped to a frame,
676
 
                then map it */
677
 
 
678
 
                if (block->frame == NULL) {
679
 
                        ut_a(srv_use_awe);
680
 
 
681
 
                        /* We set second parameter TRUE because the block is
682
 
                        in the LRU list and we must put it to
683
 
                        awe_LRU_free_mapped list once mapped to a frame */
684
 
 
685
 
                        buf_awe_map_page_to_frame(block, TRUE);
686
 
                }
687
 
 
688
 
                block->flush_type = flush_type;
689
 
 
690
 
                if (buf_pool->n_flush[block->flush_type] == 0) {
691
 
 
692
 
                        os_event_reset(buf_pool->no_flush[block->flush_type]);
693
 
                }
694
 
 
695
 
                (buf_pool->n_flush[flush_type])++;
696
 
 
697
 
                mutex_exit(&block->mutex);
698
 
                mutex_exit(&(buf_pool->mutex));
699
 
 
700
 
                rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
 
852
                mutex_exit(block_mutex);
 
853
                buf_pool_mutex_exit();
 
854
                break;
 
855
 
 
856
        case BUF_FLUSH_SINGLE_PAGE:
 
857
                buf_page_set_io_fix(bpage, BUF_IO_WRITE);
 
858
 
 
859
                buf_page_set_flush_type(bpage, flush_type);
 
860
 
 
861
                if (buf_pool->n_flush[flush_type] == 0) {
 
862
 
 
863
                        os_event_reset(buf_pool->no_flush[flush_type]);
 
864
                }
 
865
 
 
866
                buf_pool->n_flush[flush_type]++;
 
867
 
 
868
                mutex_exit(block_mutex);
 
869
                buf_pool_mutex_exit();
 
870
 
 
871
                if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
 
872
                        rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
 
873
                                           BUF_IO_WRITE);
 
874
                }
 
875
                break;
 
876
 
 
877
        default:
 
878
                ut_error;
 
879
        }
701
880
 
702
881
#ifdef UNIV_DEBUG
703
 
                if (buf_debug_prints) {
704
 
                        fprintf(stderr,
705
 
                                "Flushing single page space %lu,"
706
 
                                " page no %lu \n",
707
 
                                (ulong) block->space,
708
 
                                (ulong) block->offset);
709
 
                }
 
882
        if (buf_debug_prints) {
 
883
                fprintf(stderr,
 
884
                        "Flushing %u space %u page %u\n",
 
885
                        flush_type, bpage->space, bpage->offset);
 
886
        }
710
887
#endif /* UNIV_DEBUG */
711
 
 
712
 
                buf_flush_write_block_low(block);
713
 
 
714
 
                return(1);
715
 
        }
716
 
 
717
 
        mutex_exit(&block->mutex);
718
 
        mutex_exit(&(buf_pool->mutex));
719
 
 
720
 
        return(0);
 
888
        buf_flush_write_block_low(bpage);
 
889
 
 
890
        return(1);
721
891
}
722
892
 
723
893
/***************************************************************
726
896
ulint
727
897
buf_flush_try_neighbors(
728
898
/*====================*/
729
 
                                /* out: number of pages flushed */
730
 
        ulint   space,          /* in: space id */
731
 
        ulint   offset,         /* in: page offset */
732
 
        ulint   flush_type)     /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
 
899
                                        /* out: number of pages flushed */
 
900
        ulint           space,          /* in: space id */
 
901
        ulint           offset,         /* in: page offset */
 
902
        enum buf_flush  flush_type)     /* in: BUF_FLUSH_LRU or
 
903
                                        BUF_FLUSH_LIST */
733
904
{
734
 
        buf_block_t*    block;
 
905
        buf_page_t*     bpage;
735
906
        ulint           low, high;
736
907
        ulint           count           = 0;
737
908
        ulint           i;
738
909
 
739
910
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
740
911
 
741
 
        low = (offset / BUF_FLUSH_AREA) * BUF_FLUSH_AREA;
742
 
        high = (offset / BUF_FLUSH_AREA + 1) * BUF_FLUSH_AREA;
743
 
 
744
912
        if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
745
913
                /* If there is little space, it is better not to flush any
746
914
                block except from the end of the LRU list */
747
915
 
748
916
                low = offset;
749
917
                high = offset + 1;
 
918
        } else {
 
919
                /* When flushed, dirty blocks are searched in neighborhoods of
 
920
                this size, and flushed along with the original page. */
 
921
 
 
922
                ulint   buf_flush_area  = ut_min(BUF_READ_AHEAD_AREA,
 
923
                                                 buf_pool->curr_size / 16);
 
924
 
 
925
                low = (offset / buf_flush_area) * buf_flush_area;
 
926
                high = (offset / buf_flush_area + 1) * buf_flush_area;
750
927
        }
751
928
 
752
929
        /* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
755
932
                high = fil_space_get_size(space);
756
933
        }
757
934
 
758
 
        mutex_enter(&(buf_pool->mutex));
 
935
        buf_pool_mutex_enter();
759
936
 
760
937
        for (i = low; i < high; i++) {
761
938
 
762
 
                block = buf_page_hash_get(space, i);
763
 
                ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
 
939
                bpage = buf_page_hash_get(space, i);
 
940
                ut_a(!bpage || buf_page_in_file(bpage));
764
941
 
765
 
                if (!block) {
 
942
                if (!bpage) {
766
943
 
767
944
                        continue;
768
945
 
769
946
                } else if (flush_type == BUF_FLUSH_LRU && i != offset
770
 
                           && !block->old) {
 
947
                           && !buf_page_is_old(bpage)) {
771
948
 
772
949
                        /* We avoid flushing 'non-old' blocks in an LRU flush,
773
950
                        because the flushed blocks are soon freed */
775
952
                        continue;
776
953
                } else {
777
954
 
778
 
                        mutex_enter(&block->mutex);
779
 
 
780
 
                        if (buf_flush_ready_for_flush(block, flush_type)
781
 
                            && (i == offset || block->buf_fix_count == 0)) {
 
955
                        mutex_t* block_mutex = buf_page_get_mutex(bpage);
 
956
 
 
957
                        mutex_enter(block_mutex);
 
958
 
 
959
                        if (buf_flush_ready_for_flush(bpage, flush_type)
 
960
                            && (i == offset || !bpage->buf_fix_count)) {
782
961
                                /* We only try to flush those
783
962
                                neighbors != offset where the buf fix count is
784
963
                                zero, as we then know that we probably can
787
966
                                flush the doublewrite buffer before we start
788
967
                                waiting. */
789
968
 
790
 
                                mutex_exit(&block->mutex);
 
969
                                buf_pool_mutex_exit();
791
970
 
792
 
                                mutex_exit(&(buf_pool->mutex));
 
971
                                mutex_exit(block_mutex);
793
972
 
794
973
                                /* Note: as we release the buf_pool mutex
795
974
                                above, in buf_flush_try_page we cannot be sure
800
979
                                count += buf_flush_try_page(space, i,
801
980
                                                            flush_type);
802
981
 
803
 
                                mutex_enter(&(buf_pool->mutex));
 
982
                                buf_pool_mutex_enter();
804
983
                        } else {
805
 
                                mutex_exit(&block->mutex);
 
984
                                mutex_exit(block_mutex);
806
985
                        }
807
986
                }
808
987
        }
809
988
 
810
 
        mutex_exit(&(buf_pool->mutex));
 
989
        buf_pool_mutex_exit();
811
990
 
812
991
        return(count);
813
992
}
818
997
pages: to avoid deadlocks, this function must be written so that it cannot
819
998
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
820
999
the calling thread is not allowed to own any latches on pages! */
821
 
 
 
1000
UNIV_INTERN
822
1001
ulint
823
1002
buf_flush_batch(
824
1003
/*============*/
825
 
                                /* out: number of blocks for which the write
826
 
                                request was queued; ULINT_UNDEFINED if there
827
 
                                was a flush of the same type already running */
828
 
        ulint   flush_type,     /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
829
 
                                BUF_FLUSH_LIST, then the caller must not own
830
 
                                any latches on pages */
831
 
        ulint   min_n,          /* in: wished minimum mumber of blocks flushed
832
 
                                (it is not guaranteed that the actual number
833
 
                                is that big, though) */
834
 
        dulint  lsn_limit)      /* in the case BUF_FLUSH_LIST all blocks whose
835
 
                                oldest_modification is smaller than this
836
 
                                should be flushed (if their number does not
837
 
                                exceed min_n), otherwise ignored */
 
1004
                                        /* out: number of blocks for which the
 
1005
                                        write request was queued;
 
1006
                                        ULINT_UNDEFINED if there was a flush
 
1007
                                        of the same type already running */
 
1008
        enum buf_flush  flush_type,     /* in: BUF_FLUSH_LRU or
 
1009
                                        BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
 
1010
                                        then the caller must not own any
 
1011
                                        latches on pages */
 
1012
        ulint           min_n,          /* in: wished minimum mumber of blocks
 
1013
                                        flushed (it is not guaranteed that the
 
1014
                                        actual number is that big, though) */
 
1015
        ib_uint64_t     lsn_limit)      /* in the case BUF_FLUSH_LIST all
 
1016
                                        blocks whose oldest_modification is
 
1017
                                        smaller than this should be flushed
 
1018
                                        (if their number does not exceed
 
1019
                                        min_n), otherwise ignored */
838
1020
{
839
 
        buf_block_t*    block;
 
1021
        buf_page_t*     bpage;
840
1022
        ulint           page_count      = 0;
841
1023
        ulint           old_page_count;
842
1024
        ulint           space;
843
1025
        ulint           offset;
844
 
        ibool           found;
845
1026
 
846
1027
        ut_ad((flush_type == BUF_FLUSH_LRU)
847
1028
              || (flush_type == BUF_FLUSH_LIST));
849
1030
        ut_ad((flush_type != BUF_FLUSH_LIST)
850
1031
              || sync_thread_levels_empty_gen(TRUE));
851
1032
#endif /* UNIV_SYNC_DEBUG */
852
 
        mutex_enter(&(buf_pool->mutex));
 
1033
        buf_pool_mutex_enter();
853
1034
 
854
1035
        if ((buf_pool->n_flush[flush_type] > 0)
855
1036
            || (buf_pool->init_flush[flush_type] == TRUE)) {
856
1037
 
857
1038
                /* There is already a flush batch of the same type running */
858
1039
 
859
 
                mutex_exit(&(buf_pool->mutex));
 
1040
                buf_pool_mutex_exit();
860
1041
 
861
1042
                return(ULINT_UNDEFINED);
862
1043
        }
863
1044
 
864
 
        (buf_pool->init_flush)[flush_type] = TRUE;
 
1045
        buf_pool->init_flush[flush_type] = TRUE;
865
1046
 
866
1047
        for (;;) {
 
1048
flush_next:
867
1049
                /* If we have flushed enough, leave the loop */
868
1050
                if (page_count >= min_n) {
869
1051
 
874
1056
                block to be flushed. */
875
1057
 
876
1058
                if (flush_type == BUF_FLUSH_LRU) {
877
 
                        block = UT_LIST_GET_LAST(buf_pool->LRU);
 
1059
                        bpage = UT_LIST_GET_LAST(buf_pool->LRU);
878
1060
                } else {
879
1061
                        ut_ad(flush_type == BUF_FLUSH_LIST);
880
1062
 
881
 
                        block = UT_LIST_GET_LAST(buf_pool->flush_list);
882
 
                        if (!block
883
 
                            || (ut_dulint_cmp(block->oldest_modification,
884
 
                                              lsn_limit) >= 0)) {
 
1063
                        bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
 
1064
                        if (!bpage
 
1065
                            || bpage->oldest_modification >= lsn_limit) {
885
1066
                                /* We have flushed enough */
886
1067
 
887
1068
                                break;
888
1069
                        }
 
1070
                        ut_ad(bpage->in_flush_list);
889
1071
                }
890
1072
 
891
 
                found = FALSE;
892
 
 
893
1073
                /* Note that after finding a single flushable page, we try to
894
1074
                flush also all its neighbors, and after that start from the
895
1075
                END of the LRU list or flush list again: the list may change
896
1076
                during the flushing and we cannot safely preserve within this
897
1077
                function a pointer to a block in the list! */
898
1078
 
899
 
                while ((block != NULL) && !found) {
900
 
                        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
901
 
 
902
 
                        mutex_enter(&block->mutex);
903
 
 
904
 
                        if (buf_flush_ready_for_flush(block, flush_type)) {
905
 
 
906
 
                                found = TRUE;
907
 
                                space = block->space;
908
 
                                offset = block->offset;
909
 
 
910
 
                                mutex_exit(&block->mutex);
911
 
                                mutex_exit(&(buf_pool->mutex));
 
1079
                do {
 
1080
                        mutex_t* block_mutex = buf_page_get_mutex(bpage);
 
1081
 
 
1082
                        ut_a(buf_page_in_file(bpage));
 
1083
 
 
1084
                        mutex_enter(block_mutex);
 
1085
 
 
1086
                        if (buf_flush_ready_for_flush(bpage, flush_type)) {
 
1087
 
 
1088
                                space = buf_page_get_space(bpage);
 
1089
                                offset = buf_page_get_page_no(bpage);
 
1090
 
 
1091
                                buf_pool_mutex_exit();
 
1092
                                mutex_exit(block_mutex);
912
1093
 
913
1094
                                old_page_count = page_count;
914
1095
 
920
1101
                                flush_type, offset,
921
1102
                                page_count - old_page_count); */
922
1103
 
923
 
                                mutex_enter(&(buf_pool->mutex));
 
1104
                                buf_pool_mutex_enter();
 
1105
                                goto flush_next;
924
1106
 
925
1107
                        } else if (flush_type == BUF_FLUSH_LRU) {
926
1108
 
927
 
                                mutex_exit(&block->mutex);
 
1109
                                mutex_exit(block_mutex);
928
1110
 
929
 
                                block = UT_LIST_GET_PREV(LRU, block);
 
1111
                                bpage = UT_LIST_GET_PREV(LRU, bpage);
930
1112
                        } else {
931
1113
                                ut_ad(flush_type == BUF_FLUSH_LIST);
932
1114
 
933
 
                                mutex_exit(&block->mutex);
 
1115
                                mutex_exit(block_mutex);
934
1116
 
935
 
                                block = UT_LIST_GET_PREV(flush_list, block);
 
1117
                                bpage = UT_LIST_GET_PREV(list, bpage);
 
1118
                                ut_ad(!bpage || bpage->in_flush_list);
936
1119
                        }
937
 
                }
 
1120
                } while (bpage != NULL);
938
1121
 
939
1122
                /* If we could not find anything to flush, leave the loop */
940
1123
 
941
 
                if (!found) {
942
 
                        break;
943
 
                }
 
1124
                break;
944
1125
        }
945
1126
 
946
 
        (buf_pool->init_flush)[flush_type] = FALSE;
 
1127
        buf_pool->init_flush[flush_type] = FALSE;
947
1128
 
948
1129
        if ((buf_pool->n_flush[flush_type] == 0)
949
1130
            && (buf_pool->init_flush[flush_type] == FALSE)) {
953
1134
                os_event_set(buf_pool->no_flush[flush_type]);
954
1135
        }
955
1136
 
956
 
        mutex_exit(&(buf_pool->mutex));
 
1137
        buf_pool_mutex_exit();
957
1138
 
958
1139
        buf_flush_buffered_writes();
959
1140
 
975
1156
 
976
1157
/**********************************************************************
977
1158
Waits until a flush batch of the given type ends */
978
 
 
 
1159
UNIV_INTERN
979
1160
void
980
1161
buf_flush_wait_batch_end(
981
1162
/*=====================*/
982
 
        ulint   type)   /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
 
1163
        enum buf_flush  type)   /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
983
1164
{
984
1165
        ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
985
1166
 
997
1178
                        /* out: number of blocks which should be flushed
998
1179
                        from the end of the LRU list */
999
1180
{
1000
 
        buf_block_t*    block;
 
1181
        buf_page_t*     bpage;
1001
1182
        ulint           n_replaceable;
1002
1183
        ulint           distance        = 0;
1003
1184
 
1004
 
        mutex_enter(&(buf_pool->mutex));
 
1185
        buf_pool_mutex_enter();
1005
1186
 
1006
1187
        n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
1007
1188
 
1008
 
        block = UT_LIST_GET_LAST(buf_pool->LRU);
 
1189
        bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1009
1190
 
1010
 
        while ((block != NULL)
 
1191
        while ((bpage != NULL)
1011
1192
               && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
1012
1193
                   + BUF_FLUSH_EXTRA_MARGIN)
1013
1194
               && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
1014
1195
 
1015
 
                mutex_enter(&block->mutex);
1016
 
 
1017
 
                if (buf_flush_ready_for_replace(block)) {
 
1196
                mutex_t* block_mutex = buf_page_get_mutex(bpage);
 
1197
 
 
1198
                mutex_enter(block_mutex);
 
1199
 
 
1200
                if (buf_flush_ready_for_replace(bpage)) {
1018
1201
                        n_replaceable++;
1019
1202
                }
1020
1203
 
1021
 
                mutex_exit(&block->mutex);
 
1204
                mutex_exit(block_mutex);
1022
1205
 
1023
1206
                distance++;
1024
1207
 
1025
 
                block = UT_LIST_GET_PREV(LRU, block);
 
1208
                bpage = UT_LIST_GET_PREV(LRU, bpage);
1026
1209
        }
1027
1210
 
1028
 
        mutex_exit(&(buf_pool->mutex));
 
1211
        buf_pool_mutex_exit();
1029
1212
 
1030
1213
        if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
1031
1214
 
1042
1225
is called also by threads which have locks on pages. To avoid deadlocks, we
1043
1226
flush only pages such that the s-lock required for flushing can be acquired
1044
1227
immediately, without waiting. */
1045
 
 
 
1228
UNIV_INTERN
1046
1229
void
1047
1230
buf_flush_free_margin(void)
1048
1231
/*=======================*/
1053
1236
        n_to_flush = buf_flush_LRU_recommendation();
1054
1237
 
1055
1238
        if (n_to_flush > 0) {
1056
 
                n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush,
1057
 
                                            ut_dulint_zero);
 
1239
                n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
1058
1240
                if (n_flushed == ULINT_UNDEFINED) {
1059
1241
                        /* There was an LRU type flush batch already running;
1060
1242
                        let us wait for it to end */
1064
1246
        }
1065
1247
}
1066
1248
 
 
1249
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1067
1250
/**********************************************************************
1068
1251
Validates the flush list. */
1069
1252
static
1072
1255
/*========================*/
1073
1256
                /* out: TRUE if ok */
1074
1257
{
1075
 
        buf_block_t*    block;
1076
 
        dulint          om;
1077
 
 
1078
 
        UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list);
1079
 
 
1080
 
        block = UT_LIST_GET_FIRST(buf_pool->flush_list);
1081
 
 
1082
 
        while (block != NULL) {
1083
 
                om = block->oldest_modification;
1084
 
                ut_a(block->state == BUF_BLOCK_FILE_PAGE);
1085
 
                ut_a(ut_dulint_cmp(om, ut_dulint_zero) > 0);
1086
 
 
1087
 
                block = UT_LIST_GET_NEXT(flush_list, block);
1088
 
 
1089
 
                if (block) {
1090
 
                        ut_a(ut_dulint_cmp(om, block->oldest_modification)
1091
 
                             >= 0);
1092
 
                }
 
1258
        buf_page_t*     bpage;
 
1259
 
 
1260
        UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list);
 
1261
 
 
1262
        bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
 
1263
 
 
1264
        while (bpage != NULL) {
 
1265
                const ib_uint64_t om = bpage->oldest_modification;
 
1266
                ut_ad(bpage->in_flush_list);
 
1267
                ut_a(buf_page_in_file(bpage));
 
1268
                ut_a(om > 0);
 
1269
 
 
1270
                bpage = UT_LIST_GET_NEXT(list, bpage);
 
1271
 
 
1272
                ut_a(!bpage || om >= bpage->oldest_modification);
1093
1273
        }
1094
1274
 
1095
1275
        return(TRUE);
1097
1277
 
1098
1278
/**********************************************************************
1099
1279
Validates the flush list. */
1100
 
 
 
1280
UNIV_INTERN
1101
1281
ibool
1102
1282
buf_flush_validate(void)
1103
1283
/*====================*/
1105
1285
{
1106
1286
        ibool   ret;
1107
1287
 
1108
 
        mutex_enter(&(buf_pool->mutex));
 
1288
        buf_pool_mutex_enter();
1109
1289
 
1110
1290
        ret = buf_flush_validate_low();
1111
1291
 
1112
 
        mutex_exit(&(buf_pool->mutex));
 
1292
        buf_pool_mutex_exit();
1113
1293
 
1114
1294
        return(ret);
1115
1295
}
 
1296
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */