39
35
buf_flush_validate_low(void);
40
36
/*========================*/
41
37
/* out: TRUE if ok */
38
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
43
40
/************************************************************************
44
41
Inserts a modified block into the flush list. */
47
44
buf_flush_insert_into_flush_list(
48
45
/*=============================*/
49
buf_block_t* block) /* in: block which is modified */
46
buf_page_t* bpage) /* in: block which is modified */
51
ut_ad(mutex_own(&(buf_pool->mutex)));
52
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
48
ut_ad(buf_pool_mutex_own());
54
49
ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
55
|| (ut_dulint_cmp((UT_LIST_GET_FIRST(buf_pool->flush_list))
56
->oldest_modification,
57
block->oldest_modification) <= 0));
59
UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
61
ut_ad(buf_flush_validate_low());
50
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
51
<= bpage->oldest_modification));
53
switch (buf_page_get_state(bpage)) {
54
case BUF_BLOCK_ZIP_PAGE:
55
mutex_enter(&buf_pool_zip_mutex);
56
buf_page_set_state(bpage, BUF_BLOCK_ZIP_DIRTY);
57
mutex_exit(&buf_pool_zip_mutex);
58
UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
60
case BUF_BLOCK_ZIP_DIRTY:
61
case BUF_BLOCK_FILE_PAGE:
62
ut_ad(bpage->in_LRU_list);
63
ut_ad(bpage->in_page_hash);
64
ut_ad(!bpage->in_zip_hash);
65
ut_ad(!bpage->in_flush_list);
66
ut_d(bpage->in_flush_list = TRUE);
67
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, bpage);
69
case BUF_BLOCK_ZIP_FREE:
70
case BUF_BLOCK_NOT_USED:
71
case BUF_BLOCK_READY_FOR_USE:
72
case BUF_BLOCK_MEMORY:
73
case BUF_BLOCK_REMOVE_HASH:
78
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
79
ut_a(buf_flush_validate_low());
80
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
64
83
/************************************************************************
65
84
Inserts a modified block into the flush list in the right sorted position.
66
85
This function is used by recovery, because there the modifications do not
67
86
necessarily come in the order of lsn's. */
70
89
buf_flush_insert_sorted_into_flush_list(
71
90
/*====================================*/
72
buf_block_t* block) /* in: block which is modified */
91
buf_page_t* bpage) /* in: block which is modified */
77
ut_ad(mutex_own(&(buf_pool->mutex)));
96
ut_ad(buf_pool_mutex_own());
98
switch (buf_page_get_state(bpage)) {
99
case BUF_BLOCK_ZIP_PAGE:
100
mutex_enter(&buf_pool_zip_mutex);
101
buf_page_set_state(bpage, BUF_BLOCK_ZIP_DIRTY);
102
mutex_exit(&buf_pool_zip_mutex);
103
UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
105
case BUF_BLOCK_ZIP_DIRTY:
106
case BUF_BLOCK_FILE_PAGE:
107
ut_ad(bpage->in_LRU_list);
108
ut_ad(bpage->in_page_hash);
109
ut_ad(!bpage->in_zip_hash);
110
ut_ad(!bpage->in_flush_list);
111
ut_d(bpage->in_flush_list = TRUE);
113
case BUF_BLOCK_ZIP_FREE:
114
case BUF_BLOCK_NOT_USED:
115
case BUF_BLOCK_READY_FOR_USE:
116
case BUF_BLOCK_MEMORY:
117
case BUF_BLOCK_REMOVE_HASH:
80
123
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
82
while (b && (ut_dulint_cmp(b->oldest_modification,
83
block->oldest_modification) > 0)) {
125
while (b && b->oldest_modification > bpage->oldest_modification) {
126
ut_ad(b->in_flush_list);
85
b = UT_LIST_GET_NEXT(flush_list, b);
128
b = UT_LIST_GET_NEXT(list, b);
88
131
if (prev_b == NULL) {
89
UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
132
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, bpage);
91
UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list, prev_b,
134
UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
95
ut_ad(buf_flush_validate_low());
138
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
139
ut_a(buf_flush_validate_low());
140
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
98
143
/************************************************************************
99
144
Returns TRUE if the file page block is immediately suitable for replacement,
100
145
i.e., the transition FILE_PAGE => NOT_USED allowed. */
103
148
buf_flush_ready_for_replace(
104
149
/*========================*/
105
150
/* out: TRUE if can replace immediately */
106
buf_block_t* block) /* in: buffer control block, must be in state
107
BUF_BLOCK_FILE_PAGE and in the LRU list */
151
buf_page_t* bpage) /* in: buffer control block, must be
152
buf_page_in_file(bpage) and in the LRU list */
109
ut_ad(mutex_own(&(buf_pool->mutex)));
110
ut_ad(mutex_own(&block->mutex));
111
if (block->state != BUF_BLOCK_FILE_PAGE) {
112
ut_print_timestamp(stderr);
114
" InnoDB: Error: buffer block state %lu"
115
" in the LRU list!\n",
116
(ulong)block->state);
117
ut_print_buf(stderr, block, sizeof(buf_block_t));
122
if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
123
|| (block->buf_fix_count != 0)
124
|| (block->io_fix != 0)) {
154
ut_ad(buf_pool_mutex_own());
155
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
156
ut_ad(bpage->in_LRU_list);
158
if (UNIV_LIKELY(buf_page_in_file(bpage))) {
160
return(bpage->oldest_modification == 0
161
&& buf_page_get_io_fix(bpage) == BUF_IO_NONE
162
&& bpage->buf_fix_count == 0);
165
ut_print_timestamp(stderr);
167
" InnoDB: Error: buffer block state %lu"
168
" in the LRU list!\n",
169
(ulong) buf_page_get_state(bpage));
170
ut_print_buf(stderr, bpage, sizeof(buf_page_t));
132
175
/************************************************************************
136
179
buf_flush_ready_for_flush(
137
180
/*======================*/
138
181
/* out: TRUE if can flush immediately */
139
buf_block_t* block, /* in: buffer control block, must be in state
140
BUF_BLOCK_FILE_PAGE */
141
ulint flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
182
buf_page_t* bpage, /* in: buffer control block, must be
183
buf_page_in_file(bpage) */
184
enum buf_flush flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
143
ut_ad(mutex_own(&(buf_pool->mutex)));
144
ut_ad(mutex_own(&(block->mutex)));
145
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
147
if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
148
&& (block->io_fix == 0)) {
186
ut_a(buf_page_in_file(bpage));
187
ut_ad(buf_pool_mutex_own());
188
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
190
if (bpage->oldest_modification != 0
191
&& buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
192
ut_ad(bpage->in_flush_list);
149
194
if (flush_type != BUF_FLUSH_LRU) {
153
} else if (block->buf_fix_count == 0) {
198
} else if (bpage->buf_fix_count == 0) {
155
200
/* If we are flushing the LRU list, to avoid deadlocks
156
201
we require the block not to be bufferfixed, and hence
166
211
/************************************************************************
212
Remove a block from the flush list of modified blocks. */
217
buf_page_t* bpage) /* in: pointer to the block in question */
219
ut_ad(buf_pool_mutex_own());
220
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
221
ut_ad(bpage->in_flush_list);
222
ut_d(bpage->in_flush_list = FALSE);
224
switch (buf_page_get_state(bpage)) {
225
case BUF_BLOCK_ZIP_PAGE:
226
/* clean compressed pages should not be on the flush list */
227
case BUF_BLOCK_ZIP_FREE:
228
case BUF_BLOCK_NOT_USED:
229
case BUF_BLOCK_READY_FOR_USE:
230
case BUF_BLOCK_MEMORY:
231
case BUF_BLOCK_REMOVE_HASH:
234
case BUF_BLOCK_ZIP_DIRTY:
235
buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
236
UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
237
buf_LRU_insert_zip_clean(bpage);
239
case BUF_BLOCK_FILE_PAGE:
240
UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
244
bpage->oldest_modification = 0;
246
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list));
249
/************************************************************************
167
250
Updates the flush system data structures when a write is completed. */
170
253
buf_flush_write_complete(
171
254
/*=====================*/
172
buf_block_t* block) /* in: pointer to the block in question */
255
buf_page_t* bpage) /* in: pointer to the block in question */
175
#ifdef UNIV_SYNC_DEBUG
176
ut_ad(mutex_own(&(buf_pool->mutex)));
177
#endif /* UNIV_SYNC_DEBUG */
178
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
180
block->oldest_modification = ut_dulint_zero;
182
UT_LIST_REMOVE(flush_list, buf_pool->flush_list, block);
184
ut_d(UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list));
186
(buf_pool->n_flush[block->flush_type])--;
188
if (block->flush_type == BUF_FLUSH_LRU) {
257
enum buf_flush flush_type;
261
buf_flush_remove(bpage);
263
flush_type = buf_page_get_flush_type(bpage);
264
buf_pool->n_flush[flush_type]--;
266
if (flush_type == BUF_FLUSH_LRU) {
189
267
/* Put the block to the end of the LRU list to wait to be
190
268
moved to the free list */
192
buf_LRU_make_block_old(block);
270
buf_LRU_make_block_old(bpage);
194
272
buf_pool->LRU_flush_ended++;
197
275
/* fprintf(stderr, "n pending flush %lu\n",
198
buf_pool->n_flush[block->flush_type]); */
276
buf_pool->n_flush[flush_type]); */
200
if ((buf_pool->n_flush[block->flush_type] == 0)
201
&& (buf_pool->init_flush[block->flush_type] == FALSE)) {
278
if ((buf_pool->n_flush[flush_type] == 0)
279
&& (buf_pool->init_flush[flush_type] == FALSE)) {
203
281
/* The running flush batch has ended */
205
os_event_set(buf_pool->no_flush[block->flush_type]);
283
os_event_set(buf_pool->no_flush[flush_type]);
260
346
" doublewrite buffer.\n");
263
if (block->check_index_page_at_flush
264
&& !page_simple_validate(block->frame)) {
266
buf_page_print(block->frame);
268
ut_print_timestamp(stderr);
270
" InnoDB: Apparent corruption of an"
271
" index page n:o %lu in space %lu\n"
272
"InnoDB: to be written to data file."
273
" We intentionally crash server\n"
274
"InnoDB: to prevent corrupt data"
275
" from ending up in data\n"
277
(ulong) block->offset, (ulong) block->space);
349
if (!block->check_index_page_at_flush) {
350
} else if (page_is_comp(block->frame)) {
352
(!page_simple_validate_new(block->frame))) {
354
buf_page_print(block->frame, 0);
356
ut_print_timestamp(stderr);
358
" InnoDB: Apparent corruption of an"
359
" index page n:o %lu in space %lu\n"
360
"InnoDB: to be written to data file."
361
" We intentionally crash server\n"
362
"InnoDB: to prevent corrupt data"
363
" from ending up in data\n"
365
(ulong) buf_block_get_page_no(block),
366
(ulong) buf_block_get_space(block));
370
} else if (UNIV_UNLIKELY
371
(!page_simple_validate_old(block->frame))) {
284
378
srv_dblwr_pages_written+= trx_doublewrite->first_free;
285
379
srv_dblwr_writes++;
287
if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
288
len = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
290
len = trx_doublewrite->first_free * UNIV_PAGE_SIZE;
293
fil_io(OS_FILE_WRITE,
381
len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
382
trx_doublewrite->first_free) * UNIV_PAGE_SIZE;
384
write_buf = trx_doublewrite->write_buf;
387
fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
295
388
trx_doublewrite->block1, 0, len,
296
(void*)trx_doublewrite->write_buf, NULL);
298
write_buf = trx_doublewrite->write_buf;
300
for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len; len2 += UNIV_PAGE_SIZE) {
301
if (mach_read_from_4(write_buf + len2 + FIL_PAGE_LSN + 4)
302
!= mach_read_from_4(write_buf + len2 + UNIV_PAGE_SIZE
303
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
389
(void*) write_buf, NULL);
391
for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
392
len2 += UNIV_PAGE_SIZE, i++) {
393
const buf_block_t* block = (buf_block_t*)
394
trx_doublewrite->buf_block_arr[i];
396
if (UNIV_LIKELY(!block->page.zip.data)
397
&& UNIV_LIKELY(buf_block_get_state(block)
398
== BUF_BLOCK_FILE_PAGE)
400
(memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
403
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
304
404
ut_print_timestamp(stderr);
306
406
" InnoDB: ERROR: The page to be written"
313
if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
314
len = (trx_doublewrite->first_free
315
- TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE;
317
fil_io(OS_FILE_WRITE,
319
trx_doublewrite->block2, 0, len,
320
(void*)(trx_doublewrite->write_buf
321
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
325
write_buf = trx_doublewrite->write_buf
326
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
327
for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
328
len2 += UNIV_PAGE_SIZE) {
329
if (mach_read_from_4(write_buf + len2
331
!= mach_read_from_4(write_buf + len2
333
- FIL_PAGE_END_LSN_OLD_CHKSUM
335
ut_print_timestamp(stderr);
337
" InnoDB: ERROR: The page to be"
338
" written seems corrupt!\n"
339
"InnoDB: The lsn fields do not match!"
341
" the doublewrite block2.\n");
413
if (trx_doublewrite->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
417
len = (trx_doublewrite->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
420
write_buf = trx_doublewrite->write_buf
421
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
422
ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE);
424
fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
425
trx_doublewrite->block2, 0, len,
426
(void*) write_buf, NULL);
428
for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
429
len2 += UNIV_PAGE_SIZE, i++) {
430
const buf_block_t* block = (buf_block_t*)
431
trx_doublewrite->buf_block_arr[i];
433
if (UNIV_LIKELY(!block->page.zip.data)
434
&& UNIV_LIKELY(buf_block_get_state(block)
435
== BUF_BLOCK_FILE_PAGE)
437
(memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
440
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
441
ut_print_timestamp(stderr);
443
" InnoDB: ERROR: The page to be"
444
" written seems corrupt!\n"
445
"InnoDB: The lsn fields do not match!"
447
" the doublewrite block2.\n");
346
452
/* Now flush the doublewrite buffer data to disk */
348
454
fil_flush(TRX_SYS_SPACE);
352
458
blocks. Next do the writes to the intended positions. */
354
460
for (i = 0; i < trx_doublewrite->first_free; i++) {
355
block = trx_doublewrite->buf_block_arr[i];
357
if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
358
!= mach_read_from_4(block->frame + UNIV_PAGE_SIZE
359
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
461
const buf_block_t* block = (buf_block_t*)
462
trx_doublewrite->buf_block_arr[i];
464
ut_a(buf_page_in_file(&block->page));
465
if (UNIV_LIKELY_NULL(block->page.zip.data)) {
466
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
467
FALSE, buf_page_get_space(&block->page),
468
buf_page_get_zip_size(&block->page),
469
buf_page_get_page_no(&block->page), 0,
470
buf_page_get_zip_size(&block->page),
471
(void*)block->page.zip.data,
474
/* Increment the counter of I/O operations used
475
for selecting LRU policy. */
476
buf_LRU_stat_inc_io();
481
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
483
if (UNIV_UNLIKELY(memcmp(block->frame + (FIL_PAGE_LSN + 4),
486
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
360
488
ut_print_timestamp(stderr);
362
490
" InnoDB: ERROR: The page to be written"
367
495
" the doublewrite buffer.\n"
368
496
"InnoDB: Page buf fix count %lu,"
369
497
" io fix %lu, state %lu\n",
370
(ulong)block->buf_fix_count,
371
(ulong)block->io_fix,
372
(ulong)block->state);
498
(ulong)block->page.buf_fix_count,
499
(ulong)buf_block_get_io_fix(block),
500
(ulong)buf_block_get_state(block));
374
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
376
503
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
377
FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
504
FALSE, buf_block_get_space(block), 0,
505
buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
378
506
(void*)block->frame, (void*)block);
508
/* Increment the counter of I/O operations used
509
for selecting LRU policy. */
510
buf_LRU_stat_inc_io();
381
513
/* Wake possible simulated aio thread to actually post the
426
ut_memcpy(trx_doublewrite->write_buf
427
+ UNIV_PAGE_SIZE * trx_doublewrite->first_free,
428
block->frame, UNIV_PAGE_SIZE);
430
trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = block;
559
zip_size = buf_page_get_zip_size(bpage);
561
if (UNIV_UNLIKELY(zip_size)) {
562
/* Copy the compressed page and clear the rest. */
563
memcpy(trx_doublewrite->write_buf
564
+ UNIV_PAGE_SIZE * trx_doublewrite->first_free,
565
bpage->zip.data, zip_size);
566
memset(trx_doublewrite->write_buf
567
+ UNIV_PAGE_SIZE * trx_doublewrite->first_free
568
+ zip_size, 0, UNIV_PAGE_SIZE - zip_size);
570
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
572
memcpy(trx_doublewrite->write_buf
573
+ UNIV_PAGE_SIZE * trx_doublewrite->first_free,
574
((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE);
577
trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = bpage;
432
579
trx_doublewrite->first_free++;
446
593
/************************************************************************
447
594
Initializes a page for writing to the tablespace. */
450
597
buf_flush_init_for_writing(
451
598
/*=======================*/
452
byte* page, /* in: page */
453
dulint newest_lsn, /* in: newest modification lsn to the page */
454
ulint space, /* in: space id */
455
ulint page_no) /* in: page number */
599
byte* page, /* in/out: page */
600
void* page_zip_, /* in/out: compressed page, or NULL */
601
ib_uint64_t newest_lsn) /* in: newest modification lsn
607
page_zip_des_t* page_zip = page_zip_;
608
ulint zip_size = page_zip_get_size(page_zip);
610
ut_ad(ut_is_2pow(zip_size));
611
ut_ad(zip_size <= UNIV_PAGE_SIZE);
613
switch (UNIV_EXPECT(fil_page_get_type(page), FIL_PAGE_INDEX)) {
614
case FIL_PAGE_TYPE_ALLOCATED:
616
case FIL_PAGE_IBUF_BITMAP:
617
case FIL_PAGE_TYPE_FSP_HDR:
618
case FIL_PAGE_TYPE_XDES:
619
/* These are essentially uncompressed pages. */
620
memcpy(page_zip->data, page, zip_size);
622
case FIL_PAGE_TYPE_ZBLOB:
623
case FIL_PAGE_TYPE_ZBLOB2:
625
mach_write_ull(page_zip->data
626
+ FIL_PAGE_LSN, newest_lsn);
627
memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
628
mach_write_to_4(page_zip->data
629
+ FIL_PAGE_SPACE_OR_CHKSUM,
631
? page_zip_calc_checksum(
632
page_zip->data, zip_size)
633
: BUF_NO_CHECKSUM_MAGIC);
457
640
/* Write the newest modification lsn to the page header and trailer */
458
mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
460
mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
462
/* Write the page number and the space id */
464
mach_write_to_4(page + FIL_PAGE_OFFSET, page_no);
465
mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space);
641
mach_write_ull(page + FIL_PAGE_LSN, newest_lsn);
643
mach_write_ull(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
467
646
/* Store the new formula checksum */
514
696
/* Force the log to the disk before writing the modified block */
515
log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
697
log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
517
buf_flush_init_for_writing(block->frame, block->newest_modification,
518
block->space, block->offset);
699
switch (buf_page_get_state(bpage)) {
700
case BUF_BLOCK_ZIP_FREE:
701
case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */
702
case BUF_BLOCK_NOT_USED:
703
case BUF_BLOCK_READY_FOR_USE:
704
case BUF_BLOCK_MEMORY:
705
case BUF_BLOCK_REMOVE_HASH:
708
case BUF_BLOCK_ZIP_DIRTY:
709
frame = bpage->zip.data;
710
if (UNIV_LIKELY(srv_use_checksums)) {
711
ut_a(mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
712
== page_zip_calc_checksum(frame, zip_size));
714
mach_write_ull(frame + FIL_PAGE_LSN,
715
bpage->newest_modification);
716
memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
718
case BUF_BLOCK_FILE_PAGE:
719
frame = bpage->zip.data;
721
frame = ((buf_block_t*) bpage)->frame;
724
buf_flush_init_for_writing(((buf_block_t*) bpage)->frame,
726
? &bpage->zip : NULL,
727
bpage->newest_modification);
519
731
if (!srv_use_doublewrite_buf || !trx_doublewrite) {
520
732
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
521
FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
522
(void*)block->frame, (void*)block);
733
FALSE, buf_page_get_space(bpage), zip_size,
734
buf_page_get_page_no(bpage), 0,
735
zip_size ? zip_size : UNIV_PAGE_SIZE,
524
buf_flush_post_to_doublewrite_buf(block);
738
buf_flush_post_to_doublewrite_buf(bpage);
535
749
buf_flush_try_page(
536
750
/*===============*/
537
/* out: 1 if a page was flushed, 0 otherwise */
538
ulint space, /* in: space id */
539
ulint offset, /* in: page offset */
540
ulint flush_type) /* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST, or
541
BUF_FLUSH_SINGLE_PAGE */
751
/* out: 1 if a page was
752
flushed, 0 otherwise */
753
ulint space, /* in: space id */
754
ulint offset, /* in: page offset */
755
enum buf_flush flush_type) /* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST,
756
or BUF_FLUSH_SINGLE_PAGE */
759
mutex_t* block_mutex;
546
762
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
547
763
|| flush_type == BUF_FLUSH_SINGLE_PAGE);
549
mutex_enter(&(buf_pool->mutex));
551
block = buf_page_hash_get(space, offset);
553
ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
556
mutex_exit(&(buf_pool->mutex));
560
mutex_enter(&block->mutex);
562
if (flush_type == BUF_FLUSH_LIST
563
&& buf_flush_ready_for_flush(block, flush_type)) {
565
block->io_fix = BUF_IO_WRITE;
567
/* If AWE is enabled and the page is not mapped to a frame,
570
if (block->frame == NULL) {
573
/* We set second parameter TRUE because the block is
574
in the LRU list and we must put it to
575
awe_LRU_free_mapped list once mapped to a frame */
577
buf_awe_map_page_to_frame(block, TRUE);
580
block->flush_type = flush_type;
765
buf_pool_mutex_enter();
767
bpage = buf_page_hash_get(space, offset);
770
buf_pool_mutex_exit();
774
ut_a(buf_page_in_file(bpage));
775
block_mutex = buf_page_get_mutex(bpage);
777
mutex_enter(block_mutex);
779
if (!buf_flush_ready_for_flush(bpage, flush_type)) {
780
mutex_exit(block_mutex);
781
buf_pool_mutex_exit();
785
switch (flush_type) {
787
buf_page_set_io_fix(bpage, BUF_IO_WRITE);
789
buf_page_set_flush_type(bpage, flush_type);
582
791
if (buf_pool->n_flush[flush_type] == 0) {
584
793
os_event_reset(buf_pool->no_flush[flush_type]);
587
(buf_pool->n_flush[flush_type])++;
796
buf_pool->n_flush[flush_type]++;
591
798
/* If the simulated aio thread is not running, we must
592
799
not wait for any latch, as we may end up in a deadlock:
593
800
if buf_fix_count == 0, then we know we need not wait */
595
if (block->buf_fix_count == 0) {
596
rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
802
locked = bpage->buf_fix_count == 0;
804
&& buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
805
rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
601
mutex_exit(&block->mutex);
602
mutex_exit(&(buf_pool->mutex));
809
mutex_exit(block_mutex);
810
buf_pool_mutex_exit();
605
813
buf_flush_buffered_writes();
607
rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
611
if (buf_debug_prints) {
613
"Flushing page space %lu, page no %lu \n",
614
(ulong) block->space, (ulong) block->offset);
616
#endif /* UNIV_DEBUG */
618
buf_flush_write_block_low(block);
622
} else if (flush_type == BUF_FLUSH_LRU
623
&& buf_flush_ready_for_flush(block, flush_type)) {
815
if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
816
rw_lock_s_lock_gen(&((buf_block_t*) bpage)
817
->lock, BUF_IO_WRITE);
625
824
/* VERY IMPORTANT:
626
825
Because any thread may call the LRU flush, even when owning
627
826
locks on pages, to avoid deadlocks, we must make sure that the
630
829
the page not to be bufferfixed (in function
631
830
..._ready_for_flush). */
633
block->io_fix = BUF_IO_WRITE;
635
/* If AWE is enabled and the page is not mapped to a frame,
638
if (block->frame == NULL) {
641
/* We set second parameter TRUE because the block is
642
in the LRU list and we must put it to
643
awe_LRU_free_mapped list once mapped to a frame */
645
buf_awe_map_page_to_frame(block, TRUE);
648
block->flush_type = flush_type;
832
buf_page_set_io_fix(bpage, BUF_IO_WRITE);
834
buf_page_set_flush_type(bpage, flush_type);
650
836
if (buf_pool->n_flush[flush_type] == 0) {
652
838
os_event_reset(buf_pool->no_flush[flush_type]);
655
(buf_pool->n_flush[flush_type])++;
841
buf_pool->n_flush[flush_type]++;
657
rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
843
if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
844
rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
659
848
/* Note that the s-latch is acquired before releasing the
660
849
buf_pool mutex: this ensures that the latch is acquired
663
mutex_exit(&block->mutex);
664
mutex_exit(&(buf_pool->mutex));
666
buf_flush_write_block_low(block);
670
} else if (flush_type == BUF_FLUSH_SINGLE_PAGE
671
&& buf_flush_ready_for_flush(block, flush_type)) {
673
block->io_fix = BUF_IO_WRITE;
675
/* If AWE is enabled and the page is not mapped to a frame,
678
if (block->frame == NULL) {
681
/* We set second parameter TRUE because the block is
682
in the LRU list and we must put it to
683
awe_LRU_free_mapped list once mapped to a frame */
685
buf_awe_map_page_to_frame(block, TRUE);
688
block->flush_type = flush_type;
690
if (buf_pool->n_flush[block->flush_type] == 0) {
692
os_event_reset(buf_pool->no_flush[block->flush_type]);
695
(buf_pool->n_flush[flush_type])++;
697
mutex_exit(&block->mutex);
698
mutex_exit(&(buf_pool->mutex));
700
rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
852
mutex_exit(block_mutex);
853
buf_pool_mutex_exit();
856
case BUF_FLUSH_SINGLE_PAGE:
857
buf_page_set_io_fix(bpage, BUF_IO_WRITE);
859
buf_page_set_flush_type(bpage, flush_type);
861
if (buf_pool->n_flush[flush_type] == 0) {
863
os_event_reset(buf_pool->no_flush[flush_type]);
866
buf_pool->n_flush[flush_type]++;
868
mutex_exit(block_mutex);
869
buf_pool_mutex_exit();
871
if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
872
rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
702
881
#ifdef UNIV_DEBUG
703
if (buf_debug_prints) {
705
"Flushing single page space %lu,"
707
(ulong) block->space,
708
(ulong) block->offset);
882
if (buf_debug_prints) {
884
"Flushing %u space %u page %u\n",
885
flush_type, bpage->space, bpage->offset);
710
887
#endif /* UNIV_DEBUG */
712
buf_flush_write_block_low(block);
717
mutex_exit(&block->mutex);
718
mutex_exit(&(buf_pool->mutex));
888
buf_flush_write_block_low(bpage);
723
893
/***************************************************************
727
897
buf_flush_try_neighbors(
728
898
/*====================*/
729
/* out: number of pages flushed */
730
ulint space, /* in: space id */
731
ulint offset, /* in: page offset */
732
ulint flush_type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
899
/* out: number of pages flushed */
900
ulint space, /* in: space id */
901
ulint offset, /* in: page offset */
902
enum buf_flush flush_type) /* in: BUF_FLUSH_LRU or
739
910
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
741
low = (offset / BUF_FLUSH_AREA) * BUF_FLUSH_AREA;
742
high = (offset / BUF_FLUSH_AREA + 1) * BUF_FLUSH_AREA;
744
912
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
745
913
/* If there is little space, it is better not to flush any
746
914
block except from the end of the LRU list */
749
917
high = offset + 1;
919
/* When flushed, dirty blocks are searched in neighborhoods of
920
this size, and flushed along with the original page. */
922
ulint buf_flush_area = ut_min(BUF_READ_AHEAD_AREA,
923
buf_pool->curr_size / 16);
925
low = (offset / buf_flush_area) * buf_flush_area;
926
high = (offset / buf_flush_area + 1) * buf_flush_area;
752
929
/* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
818
997
pages: to avoid deadlocks, this function must be written so that it cannot
819
998
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
820
999
the calling thread is not allowed to own any latches on pages! */
823
1002
buf_flush_batch(
824
1003
/*============*/
825
/* out: number of blocks for which the write
826
request was queued; ULINT_UNDEFINED if there
827
was a flush of the same type already running */
828
ulint flush_type, /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
829
BUF_FLUSH_LIST, then the caller must not own
830
any latches on pages */
831
ulint min_n, /* in: wished minimum mumber of blocks flushed
832
(it is not guaranteed that the actual number
833
is that big, though) */
834
dulint lsn_limit) /* in the case BUF_FLUSH_LIST all blocks whose
835
oldest_modification is smaller than this
836
should be flushed (if their number does not
837
exceed min_n), otherwise ignored */
1004
/* out: number of blocks for which the
1005
write request was queued;
1006
ULINT_UNDEFINED if there was a flush
1007
of the same type already running */
1008
enum buf_flush flush_type, /* in: BUF_FLUSH_LRU or
1009
BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
1010
then the caller must not own any
1012
ulint min_n, /* in: wished minimum mumber of blocks
1013
flushed (it is not guaranteed that the
1014
actual number is that big, though) */
1015
ib_uint64_t lsn_limit) /* in the case BUF_FLUSH_LIST all
1016
blocks whose oldest_modification is
1017
smaller than this should be flushed
1018
(if their number does not exceed
1019
min_n), otherwise ignored */
840
1022
ulint page_count = 0;
841
1023
ulint old_page_count;
846
1027
ut_ad((flush_type == BUF_FLUSH_LRU)
847
1028
|| (flush_type == BUF_FLUSH_LIST));
874
1056
block to be flushed. */
876
1058
if (flush_type == BUF_FLUSH_LRU) {
877
block = UT_LIST_GET_LAST(buf_pool->LRU);
1059
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
879
1061
ut_ad(flush_type == BUF_FLUSH_LIST);
881
block = UT_LIST_GET_LAST(buf_pool->flush_list);
883
|| (ut_dulint_cmp(block->oldest_modification,
1063
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1065
|| bpage->oldest_modification >= lsn_limit) {
885
1066
/* We have flushed enough */
1070
ut_ad(bpage->in_flush_list);
893
1073
/* Note that after finding a single flushable page, we try to
894
1074
flush also all its neighbors, and after that start from the
895
1075
END of the LRU list or flush list again: the list may change
896
1076
during the flushing and we cannot safely preserve within this
897
1077
function a pointer to a block in the list! */
899
while ((block != NULL) && !found) {
900
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
902
mutex_enter(&block->mutex);
904
if (buf_flush_ready_for_flush(block, flush_type)) {
907
space = block->space;
908
offset = block->offset;
910
mutex_exit(&block->mutex);
911
mutex_exit(&(buf_pool->mutex));
1080
mutex_t* block_mutex = buf_page_get_mutex(bpage);
1082
ut_a(buf_page_in_file(bpage));
1084
mutex_enter(block_mutex);
1086
if (buf_flush_ready_for_flush(bpage, flush_type)) {
1088
space = buf_page_get_space(bpage);
1089
offset = buf_page_get_page_no(bpage);
1091
buf_pool_mutex_exit();
1092
mutex_exit(block_mutex);
913
1094
old_page_count = page_count;