~drizzle-trunk/drizzle/development

641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1
/******************************************************
2
The database buffer buf_pool flush algorithm
3
4
(c) 1995-2001 Innobase Oy
5
6
Created 11/11/1995 Heikki Tuuri
7
*******************************************************/
8
9
#include "buf0flu.h"
10
11
#ifdef UNIV_NONINL
12
#include "buf0flu.ic"
13
#include "trx0sys.h"
14
#endif
15
16
#include "ut0byte.h"
17
#include "ut0lst.h"
18
#include "page0page.h"
19
#include "page0zip.h"
20
#include "fil0fil.h"
21
#include "buf0buf.h"
22
#include "buf0lru.h"
23
#include "buf0rea.h"
24
#include "ibuf0ibuf.h"
25
#include "log0log.h"
26
#include "os0file.h"
27
#include "trx0sys.h"
28
#include "srv0srv.h"
29
30
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
31
/**********************************************************************
32
Validates the flush list. */
33
static
34
ibool
35
buf_flush_validate_low(void);
36
/*========================*/
37
		/* out: TRUE if ok */
38
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
39
40
/************************************************************************
41
Inserts a modified block into the flush list. */
42
UNIV_INTERN
43
void
44
buf_flush_insert_into_flush_list(
45
/*=============================*/
46
	buf_page_t*	bpage)	/* in: block which is modified */
47
{
48
	ut_ad(buf_pool_mutex_own());
49
	ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
50
	      || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
51
		  <= bpage->oldest_modification));
52
53
	switch (buf_page_get_state(bpage)) {
54
	case BUF_BLOCK_ZIP_PAGE:
55
		mutex_enter(&buf_pool_zip_mutex);
56
		buf_page_set_state(bpage, BUF_BLOCK_ZIP_DIRTY);
57
		mutex_exit(&buf_pool_zip_mutex);
58
		UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
59
		/* fall through */
60
	case BUF_BLOCK_ZIP_DIRTY:
61
	case BUF_BLOCK_FILE_PAGE:
62
		ut_ad(bpage->in_LRU_list);
63
		ut_ad(bpage->in_page_hash);
64
		ut_ad(!bpage->in_zip_hash);
65
		ut_ad(!bpage->in_flush_list);
66
		ut_d(bpage->in_flush_list = TRUE);
67
		UT_LIST_ADD_FIRST(list, buf_pool->flush_list, bpage);
68
		break;
69
	case BUF_BLOCK_ZIP_FREE:
70
	case BUF_BLOCK_NOT_USED:
71
	case BUF_BLOCK_READY_FOR_USE:
72
	case BUF_BLOCK_MEMORY:
73
	case BUF_BLOCK_REMOVE_HASH:
74
		ut_error;
75
		return;
76
	}
77
78
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
79
	ut_a(buf_flush_validate_low());
80
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
81
}
82
83
/************************************************************************
84
Inserts a modified block into the flush list in the right sorted position.
85
This function is used by recovery, because there the modifications do not
86
necessarily come in the order of lsn's. */
87
UNIV_INTERN
88
void
89
buf_flush_insert_sorted_into_flush_list(
90
/*====================================*/
91
	buf_page_t*	bpage)	/* in: block which is modified */
92
{
93
	buf_page_t*	prev_b;
94
	buf_page_t*	b;
95
96
	ut_ad(buf_pool_mutex_own());
97
98
	switch (buf_page_get_state(bpage)) {
99
	case BUF_BLOCK_ZIP_PAGE:
100
		mutex_enter(&buf_pool_zip_mutex);
101
		buf_page_set_state(bpage, BUF_BLOCK_ZIP_DIRTY);
102
		mutex_exit(&buf_pool_zip_mutex);
103
		UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
104
		/* fall through */
105
	case BUF_BLOCK_ZIP_DIRTY:
106
	case BUF_BLOCK_FILE_PAGE:
107
		ut_ad(bpage->in_LRU_list);
108
		ut_ad(bpage->in_page_hash);
109
		ut_ad(!bpage->in_zip_hash);
110
		ut_ad(!bpage->in_flush_list);
111
		ut_d(bpage->in_flush_list = TRUE);
112
		break;
113
	case BUF_BLOCK_ZIP_FREE:
114
	case BUF_BLOCK_NOT_USED:
115
	case BUF_BLOCK_READY_FOR_USE:
116
	case BUF_BLOCK_MEMORY:
117
	case BUF_BLOCK_REMOVE_HASH:
118
		ut_error;
119
		return;
120
	}
121
122
	prev_b = NULL;
123
	b = UT_LIST_GET_FIRST(buf_pool->flush_list);
124
125
	while (b && b->oldest_modification > bpage->oldest_modification) {
126
		ut_ad(b->in_flush_list);
127
		prev_b = b;
128
		b = UT_LIST_GET_NEXT(list, b);
129
	}
130
131
	if (prev_b == NULL) {
132
		UT_LIST_ADD_FIRST(list, buf_pool->flush_list, bpage);
133
	} else {
134
		UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
135
				     prev_b, bpage);
136
	}
137
138
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
139
	ut_a(buf_flush_validate_low());
140
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
141
}
142
143
/************************************************************************
144
Returns TRUE if the file page block is immediately suitable for replacement,
145
i.e., the transition FILE_PAGE => NOT_USED allowed. */
146
UNIV_INTERN
147
ibool
148
buf_flush_ready_for_replace(
149
/*========================*/
150
				/* out: TRUE if can replace immediately */
151
	buf_page_t*	bpage)	/* in: buffer control block, must be
152
				buf_page_in_file(bpage) and in the LRU list */
153
{
154
	ut_ad(buf_pool_mutex_own());
155
	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
156
	ut_ad(bpage->in_LRU_list);
157
158
	if (UNIV_LIKELY(buf_page_in_file(bpage))) {
159
160
		return(bpage->oldest_modification == 0
161
		       && buf_page_get_io_fix(bpage) == BUF_IO_NONE
162
		       && bpage->buf_fix_count == 0);
163
	}
164
165
	ut_print_timestamp(stderr);
166
	fprintf(stderr,
167
		"  InnoDB: Error: buffer block state %lu"
168
		" in the LRU list!\n",
169
		(ulong) buf_page_get_state(bpage));
170
	ut_print_buf(stderr, bpage, sizeof(buf_page_t));
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
171
	putc('\n', stderr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
172
173
	return(FALSE);
174
}
175
176
/************************************************************************
177
Returns TRUE if the block is modified and ready for flushing. */
178
UNIV_INLINE
179
ibool
180
buf_flush_ready_for_flush(
181
/*======================*/
182
				/* out: TRUE if can flush immediately */
183
	buf_page_t*	bpage,	/* in: buffer control block, must be
184
				buf_page_in_file(bpage) */
185
	enum buf_flush	flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
186
{
187
	ut_a(buf_page_in_file(bpage));
188
	ut_ad(buf_pool_mutex_own());
189
	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
190
191
	if (bpage->oldest_modification != 0
192
	    && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
193
		ut_ad(bpage->in_flush_list);
194
195
		if (flush_type != BUF_FLUSH_LRU) {
196
197
			return(TRUE);
198
199
		} else if (bpage->buf_fix_count == 0) {
200
201
			/* If we are flushing the LRU list, to avoid deadlocks
202
			we require the block not to be bufferfixed, and hence
203
			not latched. */
204
205
			return(TRUE);
206
		}
207
	}
208
209
	return(FALSE);
210
}
211
212
/************************************************************************
213
Remove a block from the flush list of modified blocks. */
214
UNIV_INTERN
215
void
216
buf_flush_remove(
217
/*=============*/
218
	buf_page_t*	bpage)	/* in: pointer to the block in question */
219
{
220
	ut_ad(buf_pool_mutex_own());
221
	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
222
	ut_ad(bpage->in_flush_list);
223
	ut_d(bpage->in_flush_list = FALSE);
224
225
	switch (buf_page_get_state(bpage)) {
226
	case BUF_BLOCK_ZIP_PAGE:
227
		/* clean compressed pages should not be on the flush list */
228
	case BUF_BLOCK_ZIP_FREE:
229
	case BUF_BLOCK_NOT_USED:
230
	case BUF_BLOCK_READY_FOR_USE:
231
	case BUF_BLOCK_MEMORY:
232
	case BUF_BLOCK_REMOVE_HASH:
233
		ut_error;
234
		return;
235
	case BUF_BLOCK_ZIP_DIRTY:
236
		buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
237
		UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
238
		buf_LRU_insert_zip_clean(bpage);
239
		break;
240
	case BUF_BLOCK_FILE_PAGE:
241
		UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
242
		break;
243
	}
244
245
	bpage->oldest_modification = 0;
246
247
	ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list));
248
}
249
250
/************************************************************************
251
Updates the flush system data structures when a write is completed. */
252
UNIV_INTERN
253
void
254
buf_flush_write_complete(
255
/*=====================*/
256
	buf_page_t*	bpage)	/* in: pointer to the block in question */
257
{
258
	enum buf_flush	flush_type;
259
260
	ut_ad(bpage);
261
262
	buf_flush_remove(bpage);
263
264
	flush_type = buf_page_get_flush_type(bpage);
265
	buf_pool->n_flush[flush_type]--;
266
267
	if (flush_type == BUF_FLUSH_LRU) {
268
		/* Put the block to the end of the LRU list to wait to be
269
		moved to the free list */
270
271
		buf_LRU_make_block_old(bpage);
272
273
		buf_pool->LRU_flush_ended++;
274
	}
275
276
	/* fprintf(stderr, "n pending flush %lu\n",
277
	buf_pool->n_flush[flush_type]); */
278
279
	if ((buf_pool->n_flush[flush_type] == 0)
280
	    && (buf_pool->init_flush[flush_type] == FALSE)) {
281
282
		/* The running flush batch has ended */
283
284
		os_event_set(buf_pool->no_flush[flush_type]);
285
	}
286
}
287
288
/************************************************************************
289
Flushes possible buffered writes from the doublewrite memory buffer to disk,
290
and also wakes up the aio thread if simulated aio is used. It is very
291
important to call this function after a batch of writes has been posted,
292
and also when we may have to wait for a page latch! Otherwise a deadlock
293
of threads can occur. */
294
static
295
void
296
buf_flush_buffered_writes(void)
297
/*===========================*/
298
{
299
	byte*		write_buf;
300
	ulint		len;
301
	ulint		len2;
302
	ulint		i;
303
304
	if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) {
305
		os_aio_simulated_wake_handler_threads();
306
307
		return;
308
	}
309
310
	mutex_enter(&(trx_doublewrite->mutex));
311
312
	/* Write first to doublewrite buffer blocks. We use synchronous
313
	aio and thus know that file write has been completed when the
314
	control returns. */
315
316
	if (trx_doublewrite->first_free == 0) {
317
318
		mutex_exit(&(trx_doublewrite->mutex));
319
320
		return;
321
	}
322
323
	for (i = 0; i < trx_doublewrite->first_free; i++) {
324
325
		const buf_block_t*	block;
326
327
		block = (buf_block_t*) trx_doublewrite->buf_block_arr[i];
328
329
		if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
330
		    || block->page.zip.data) {
331
			/* No simple validate for compressed pages exists. */
332
			continue;
333
		}
334
335
		if (UNIV_UNLIKELY
336
		    (memcmp(block->frame + (FIL_PAGE_LSN + 4),
337
			    block->frame + (UNIV_PAGE_SIZE
338
					    - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
339
			    4))) {
340
			ut_print_timestamp(stderr);
341
			fprintf(stderr,
342
				"  InnoDB: ERROR: The page to be written"
343
				" seems corrupt!\n"
344
				"InnoDB: The lsn fields do not match!"
345
				" Noticed in the buffer pool\n"
346
				"InnoDB: before posting to the"
347
				" doublewrite buffer.\n");
348
		}
349
350
		if (!block->check_index_page_at_flush) {
351
		} else if (page_is_comp(block->frame)) {
352
			if (UNIV_UNLIKELY
353
			    (!page_simple_validate_new(block->frame))) {
354
corrupted_page:
355
				buf_page_print(block->frame, 0);
356
357
				ut_print_timestamp(stderr);
358
				fprintf(stderr,
359
					"  InnoDB: Apparent corruption of an"
360
					" index page n:o %lu in space %lu\n"
361
					"InnoDB: to be written to data file."
362
					" We intentionally crash server\n"
363
					"InnoDB: to prevent corrupt data"
364
					" from ending up in data\n"
365
					"InnoDB: files.\n",
366
					(ulong) buf_block_get_page_no(block),
367
					(ulong) buf_block_get_space(block));
368
369
				ut_error;
370
			}
371
		} else if (UNIV_UNLIKELY
372
			   (!page_simple_validate_old(block->frame))) {
373
374
			goto corrupted_page;
375
		}
376
	}
377
378
	/* increment the doublewrite flushed pages counter */
379
	srv_dblwr_pages_written+= trx_doublewrite->first_free;
380
	srv_dblwr_writes++;
381
382
	len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
383
		     trx_doublewrite->first_free) * UNIV_PAGE_SIZE;
384
385
	write_buf = trx_doublewrite->write_buf;
386
	i = 0;
387
388
	fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
389
	       trx_doublewrite->block1, 0, len,
390
	       (void*) write_buf, NULL);
391
392
	for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
393
	     len2 += UNIV_PAGE_SIZE, i++) {
394
		const buf_block_t* block = (buf_block_t*)
395
			trx_doublewrite->buf_block_arr[i];
396
397
		if (UNIV_LIKELY(!block->page.zip.data)
398
		    && UNIV_LIKELY(buf_block_get_state(block)
399
				   == BUF_BLOCK_FILE_PAGE)
400
		    && UNIV_UNLIKELY
401
		    (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
402
			    write_buf + len2
403
			    + (UNIV_PAGE_SIZE
404
			       - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
405
			ut_print_timestamp(stderr);
406
			fprintf(stderr,
407
				"  InnoDB: ERROR: The page to be written"
408
				" seems corrupt!\n"
409
				"InnoDB: The lsn fields do not match!"
410
				" Noticed in the doublewrite block1.\n");
411
		}
412
	}
413
414
	if (trx_doublewrite->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
415
		goto flush;
416
	}
417
418
	len = (trx_doublewrite->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
419
		* UNIV_PAGE_SIZE;
420
421
	write_buf = trx_doublewrite->write_buf
422
		+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
423
	ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE);
424
425
	fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
426
	       trx_doublewrite->block2, 0, len,
427
	       (void*) write_buf, NULL);
428
429
	for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
430
	     len2 += UNIV_PAGE_SIZE, i++) {
431
		const buf_block_t* block = (buf_block_t*)
432
			trx_doublewrite->buf_block_arr[i];
433
434
		if (UNIV_LIKELY(!block->page.zip.data)
435
		    && UNIV_LIKELY(buf_block_get_state(block)
436
				   == BUF_BLOCK_FILE_PAGE)
437
		    && UNIV_UNLIKELY
438
		    (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
439
			    write_buf + len2
440
			    + (UNIV_PAGE_SIZE
441
			       - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
442
			ut_print_timestamp(stderr);
443
			fprintf(stderr,
444
				"  InnoDB: ERROR: The page to be"
445
				" written seems corrupt!\n"
446
				"InnoDB: The lsn fields do not match!"
447
				" Noticed in"
448
				" the doublewrite block2.\n");
449
		}
450
	}
451
452
flush:
453
	/* Now flush the doublewrite buffer data to disk */
454
455
	fil_flush(TRX_SYS_SPACE);
456
457
	/* We know that the writes have been flushed to disk now
458
	and in recovery we will find them in the doublewrite buffer
459
	blocks. Next do the writes to the intended positions. */
460
461
	for (i = 0; i < trx_doublewrite->first_free; i++) {
462
		const buf_block_t* block = (buf_block_t*)
463
			trx_doublewrite->buf_block_arr[i];
464
465
		ut_a(buf_page_in_file(&block->page));
466
		if (UNIV_LIKELY_NULL(block->page.zip.data)) {
467
			fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
468
			       FALSE, buf_page_get_space(&block->page),
469
			       buf_page_get_zip_size(&block->page),
470
			       buf_page_get_page_no(&block->page), 0,
471
			       buf_page_get_zip_size(&block->page),
472
			       (void*)block->page.zip.data,
473
			       (void*)block);
474
475
			/* Increment the counter of I/O operations used
476
			for selecting LRU policy. */
477
			buf_LRU_stat_inc_io();
478
479
			continue;
480
		}
481
482
		ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
483
484
		if (UNIV_UNLIKELY(memcmp(block->frame + (FIL_PAGE_LSN + 4),
485
					 block->frame
486
					 + (UNIV_PAGE_SIZE
487
					    - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
488
					 4))) {
489
			ut_print_timestamp(stderr);
490
			fprintf(stderr,
491
				"  InnoDB: ERROR: The page to be written"
492
				" seems corrupt!\n"
493
				"InnoDB: The lsn fields do not match!"
494
				" Noticed in the buffer pool\n"
495
				"InnoDB: after posting and flushing"
496
				" the doublewrite buffer.\n"
497
				"InnoDB: Page buf fix count %lu,"
498
				" io fix %lu, state %lu\n",
499
				(ulong)block->page.buf_fix_count,
500
				(ulong)buf_block_get_io_fix(block),
501
				(ulong)buf_block_get_state(block));
502
		}
503
504
		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
505
		       FALSE, buf_block_get_space(block), 0,
506
		       buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
507
		       (void*)block->frame, (void*)block);
508
509
		/* Increment the counter of I/O operations used
510
		for selecting LRU policy. */
511
		buf_LRU_stat_inc_io();
512
	}
513
514
	/* Wake possible simulated aio thread to actually post the
515
	writes to the operating system */
516
517
	os_aio_simulated_wake_handler_threads();
518
519
	/* Wait that all async writes to tablespaces have been posted to
520
	the OS */
521
522
	os_aio_wait_until_no_pending_writes();
523
524
	/* Now we flush the data to disk (for example, with fsync) */
525
526
	fil_flush_file_spaces(FIL_TABLESPACE);
527
528
	/* We can now reuse the doublewrite memory buffer: */
529
530
	trx_doublewrite->first_free = 0;
531
532
	mutex_exit(&(trx_doublewrite->mutex));
533
}
534
535
/************************************************************************
536
Posts a buffer page for writing. If the doublewrite memory buffer is
537
full, calls buf_flush_buffered_writes and waits for for free space to
538
appear. */
539
static
540
void
541
buf_flush_post_to_doublewrite_buf(
542
/*==============================*/
543
	buf_page_t*	bpage)	/* in: buffer block to write */
544
{
545
	ulint	zip_size;
546
try_again:
547
	mutex_enter(&(trx_doublewrite->mutex));
548
549
	ut_a(buf_page_in_file(bpage));
550
551
	if (trx_doublewrite->first_free
552
	    >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
553
		mutex_exit(&(trx_doublewrite->mutex));
554
555
		buf_flush_buffered_writes();
556
557
		goto try_again;
558
	}
559
560
	zip_size = buf_page_get_zip_size(bpage);
561
562
	if (UNIV_UNLIKELY(zip_size)) {
563
		/* Copy the compressed page and clear the rest. */
564
		memcpy(trx_doublewrite->write_buf
565
		       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
566
		       bpage->zip.data, zip_size);
567
		memset(trx_doublewrite->write_buf
568
		       + UNIV_PAGE_SIZE * trx_doublewrite->first_free
569
		       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
570
	} else {
571
		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
572
573
		memcpy(trx_doublewrite->write_buf
574
		       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
575
		       ((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE);
576
	}
577
578
	trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = bpage;
579
580
	trx_doublewrite->first_free++;
581
582
	if (trx_doublewrite->first_free
583
	    >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
584
		mutex_exit(&(trx_doublewrite->mutex));
585
586
		buf_flush_buffered_writes();
587
588
		return;
589
	}
590
591
	mutex_exit(&(trx_doublewrite->mutex));
592
}
593
594
/************************************************************************
595
Initializes a page for writing to the tablespace. */
596
UNIV_INTERN
597
void
598
buf_flush_init_for_writing(
599
/*=======================*/
600
	byte*		page,		/* in/out: page */
601
	void*		page_zip_,	/* in/out: compressed page, or NULL */
602
	ib_uint64_t	newest_lsn)	/* in: newest modification lsn
603
					to the page */
604
{
605
	ut_ad(page);
606
607
	if (page_zip_) {
608
		page_zip_des_t*	page_zip = page_zip_;
609
		ulint		zip_size = page_zip_get_size(page_zip);
610
		ut_ad(zip_size);
611
		ut_ad(ut_is_2pow(zip_size));
612
		ut_ad(zip_size <= UNIV_PAGE_SIZE);
613
614
		switch (UNIV_EXPECT(fil_page_get_type(page), FIL_PAGE_INDEX)) {
615
		case FIL_PAGE_TYPE_ALLOCATED:
616
		case FIL_PAGE_INODE:
617
		case FIL_PAGE_IBUF_BITMAP:
618
		case FIL_PAGE_TYPE_FSP_HDR:
619
		case FIL_PAGE_TYPE_XDES:
620
			/* These are essentially uncompressed pages. */
621
			memcpy(page_zip->data, page, zip_size);
622
			/* fall through */
623
		case FIL_PAGE_TYPE_ZBLOB:
624
		case FIL_PAGE_TYPE_ZBLOB2:
625
		case FIL_PAGE_INDEX:
626
			mach_write_ull(page_zip->data
627
				       + FIL_PAGE_LSN, newest_lsn);
628
			memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
629
			mach_write_to_4(page_zip->data
630
					+ FIL_PAGE_SPACE_OR_CHKSUM,
631
					srv_use_checksums
632
					? page_zip_calc_checksum(
633
						page_zip->data, zip_size)
634
					: BUF_NO_CHECKSUM_MAGIC);
635
			return;
636
		}
637
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
638
		ut_print_timestamp(stderr);
639
		fputs("  InnoDB: ERROR: The compressed page to be written"
640
		      " seems corrupt:", stderr);
641
		ut_print_buf(stderr, page, zip_size);
642
		fputs("\nInnoDB: Possibly older version of the page:", stderr);
643
		ut_print_buf(stderr, page_zip->data, zip_size);
644
		putc('\n', stderr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
645
		ut_error;
646
	}
647
648
	/* Write the newest modification lsn to the page header and trailer */
649
	mach_write_ull(page + FIL_PAGE_LSN, newest_lsn);
650
651
	mach_write_ull(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
652
		       newest_lsn);
653
654
	/* Store the new formula checksum */
655
656
	mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
657
			srv_use_checksums
658
			? buf_calc_page_new_checksum(page)
659
			: BUF_NO_CHECKSUM_MAGIC);
660
661
	/* We overwrite the first 4 bytes of the end lsn field to store
662
	the old formula checksum. Since it depends also on the field
663
	FIL_PAGE_SPACE_OR_CHKSUM, it has to be calculated after storing the
664
	new formula checksum. */
665
666
	mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
667
			srv_use_checksums
668
			? buf_calc_page_old_checksum(page)
669
			: BUF_NO_CHECKSUM_MAGIC);
670
}
671
672
/************************************************************************
673
Does an asynchronous write of a buffer page. NOTE: in simulated aio and
674
also when the doublewrite buffer is used, we must call
675
buf_flush_buffered_writes after we have posted a batch of writes! */
676
static
677
void
678
buf_flush_write_block_low(
679
/*======================*/
680
	buf_page_t*	bpage)	/* in: buffer block to write */
681
{
682
	ulint	zip_size	= buf_page_get_zip_size(bpage);
683
	page_t*	frame		= NULL;
684
#ifdef UNIV_LOG_DEBUG
685
	static ibool univ_log_debug_warned;
686
#endif /* UNIV_LOG_DEBUG */
687
688
	ut_ad(buf_page_in_file(bpage));
689
690
#ifdef UNIV_IBUF_COUNT_DEBUG
691
	ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
692
#endif
693
	ut_ad(bpage->newest_modification != 0);
694
695
#ifdef UNIV_LOG_DEBUG
696
	if (!univ_log_debug_warned) {
697
		univ_log_debug_warned = TRUE;
698
		fputs("Warning: cannot force log to disk if"
699
		      " UNIV_LOG_DEBUG is defined!\n"
700
		      "Crash recovery will not work!\n",
701
		      stderr);
702
	}
703
#else
704
	/* Force the log to the disk before writing the modified block */
705
	log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
706
#endif
707
	switch (buf_page_get_state(bpage)) {
708
	case BUF_BLOCK_ZIP_FREE:
709
	case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */
710
	case BUF_BLOCK_NOT_USED:
711
	case BUF_BLOCK_READY_FOR_USE:
712
	case BUF_BLOCK_MEMORY:
713
	case BUF_BLOCK_REMOVE_HASH:
714
		ut_error;
715
		break;
716
	case BUF_BLOCK_ZIP_DIRTY:
717
		frame = bpage->zip.data;
718
		if (UNIV_LIKELY(srv_use_checksums)) {
719
			ut_a(mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
720
			     == page_zip_calc_checksum(frame, zip_size));
721
		}
722
		mach_write_ull(frame + FIL_PAGE_LSN,
723
			       bpage->newest_modification);
724
		memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
725
		break;
726
	case BUF_BLOCK_FILE_PAGE:
727
		frame = bpage->zip.data;
728
		if (!frame) {
729
			frame = ((buf_block_t*) bpage)->frame;
730
		}
731
732
		buf_flush_init_for_writing(((buf_block_t*) bpage)->frame,
733
					   bpage->zip.data
734
					   ? &bpage->zip : NULL,
735
					   bpage->newest_modification);
736
		break;
737
	}
738
739
	if (!srv_use_doublewrite_buf || !trx_doublewrite) {
740
		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
741
		       FALSE, buf_page_get_space(bpage), zip_size,
742
		       buf_page_get_page_no(bpage), 0,
743
		       zip_size ? zip_size : UNIV_PAGE_SIZE,
744
		       frame, bpage);
745
	} else {
746
		buf_flush_post_to_doublewrite_buf(bpage);
747
	}
748
}
749
750
/************************************************************************
751
Writes a page asynchronously from the buffer buf_pool to a file, if it can be
752
found in the buf_pool and it is in a flushable state. NOTE: in simulated aio
753
we must call os_aio_simulated_wake_handler_threads after we have posted a batch
754
of writes! */
755
static
756
ulint
757
buf_flush_try_page(
758
/*===============*/
759
					/* out: 1 if a page was
760
					flushed, 0 otherwise */
761
	ulint		space,		/* in: space id */
762
	ulint		offset,		/* in: page offset */
763
	enum buf_flush	flush_type)	/* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST,
764
					or BUF_FLUSH_SINGLE_PAGE */
765
{
766
	buf_page_t*	bpage;
767
	mutex_t*	block_mutex;
768
	ibool		locked;
769
770
	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
771
	      || flush_type == BUF_FLUSH_SINGLE_PAGE);
772
773
	buf_pool_mutex_enter();
774
775
	bpage = buf_page_hash_get(space, offset);
776
777
	if (!bpage) {
778
		buf_pool_mutex_exit();
779
		return(0);
780
	}
781
782
	ut_a(buf_page_in_file(bpage));
783
	block_mutex = buf_page_get_mutex(bpage);
784
785
	mutex_enter(block_mutex);
786
787
	if (!buf_flush_ready_for_flush(bpage, flush_type)) {
788
		mutex_exit(block_mutex);
789
		buf_pool_mutex_exit();
790
		return(0);
791
	}
792
793
	switch (flush_type) {
794
	case BUF_FLUSH_LIST:
795
		buf_page_set_io_fix(bpage, BUF_IO_WRITE);
796
797
		buf_page_set_flush_type(bpage, flush_type);
798
799
		if (buf_pool->n_flush[flush_type] == 0) {
800
801
			os_event_reset(buf_pool->no_flush[flush_type]);
802
		}
803
804
		buf_pool->n_flush[flush_type]++;
805
806
		/* If the simulated aio thread is not running, we must
807
		not wait for any latch, as we may end up in a deadlock:
808
		if buf_fix_count == 0, then we know we need not wait */
809
810
		locked = bpage->buf_fix_count == 0;
811
		if (locked
812
		    && buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
813
			rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
814
					   BUF_IO_WRITE);
815
		}
816
817
		mutex_exit(block_mutex);
818
		buf_pool_mutex_exit();
819
820
		if (!locked) {
821
			buf_flush_buffered_writes();
822
823
			if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
824
				rw_lock_s_lock_gen(&((buf_block_t*) bpage)
825
						   ->lock, BUF_IO_WRITE);
826
			}
827
		}
828
829
		break;
830
831
	case BUF_FLUSH_LRU:
832
		/* VERY IMPORTANT:
833
		Because any thread may call the LRU flush, even when owning
834
		locks on pages, to avoid deadlocks, we must make sure that the
835
		s-lock is acquired on the page without waiting: this is
836
		accomplished because in the if-condition above we require
837
		the page not to be bufferfixed (in function
838
		..._ready_for_flush). */
839
840
		buf_page_set_io_fix(bpage, BUF_IO_WRITE);
841
842
		buf_page_set_flush_type(bpage, flush_type);
843
844
		if (buf_pool->n_flush[flush_type] == 0) {
845
846
			os_event_reset(buf_pool->no_flush[flush_type]);
847
		}
848
849
		buf_pool->n_flush[flush_type]++;
850
851
		if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
852
			rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
853
					   BUF_IO_WRITE);
854
		}
855
856
		/* Note that the s-latch is acquired before releasing the
857
		buf_pool mutex: this ensures that the latch is acquired
858
		immediately. */
859
860
		mutex_exit(block_mutex);
861
		buf_pool_mutex_exit();
862
		break;
863
864
	case BUF_FLUSH_SINGLE_PAGE:
865
		buf_page_set_io_fix(bpage, BUF_IO_WRITE);
866
867
		buf_page_set_flush_type(bpage, flush_type);
868
869
		if (buf_pool->n_flush[flush_type] == 0) {
870
871
			os_event_reset(buf_pool->no_flush[flush_type]);
872
		}
873
874
		buf_pool->n_flush[flush_type]++;
875
876
		mutex_exit(block_mutex);
877
		buf_pool_mutex_exit();
878
879
		if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
880
			rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
881
					   BUF_IO_WRITE);
882
		}
883
		break;
884
885
	default:
886
		ut_error;
887
	}
888
889
#ifdef UNIV_DEBUG
890
	if (buf_debug_prints) {
891
		fprintf(stderr,
892
			"Flushing %u space %u page %u\n",
893
			flush_type, bpage->space, bpage->offset);
894
	}
895
#endif /* UNIV_DEBUG */
896
	buf_flush_write_block_low(bpage);
897
898
	return(1);
899
}
900
901
/***************************************************************
902
Flushes to disk all flushable pages within the flush area. */
903
static
904
ulint
905
buf_flush_try_neighbors(
906
/*====================*/
907
					/* out: number of pages flushed */
908
	ulint		space,		/* in: space id */
909
	ulint		offset,		/* in: page offset */
910
	enum buf_flush	flush_type)	/* in: BUF_FLUSH_LRU or
911
					BUF_FLUSH_LIST */
912
{
913
	buf_page_t*	bpage;
914
	ulint		low, high;
915
	ulint		count		= 0;
916
	ulint		i;
917
918
	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
919
920
	if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
921
		/* If there is little space, it is better not to flush any
922
		block except from the end of the LRU list */
923
924
		low = offset;
925
		high = offset + 1;
926
	} else {
927
		/* When flushed, dirty blocks are searched in neighborhoods of
928
		this size, and flushed along with the original page. */
929
930
		ulint	buf_flush_area	= ut_min(BUF_READ_AHEAD_AREA,
931
						 buf_pool->curr_size / 16);
932
933
		low = (offset / buf_flush_area) * buf_flush_area;
934
		high = (offset / buf_flush_area + 1) * buf_flush_area;
935
	}
936
937
	/* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
938
939
	if (high > fil_space_get_size(space)) {
940
		high = fil_space_get_size(space);
941
	}
942
943
	buf_pool_mutex_enter();
944
945
	for (i = low; i < high; i++) {
946
947
		bpage = buf_page_hash_get(space, i);
948
		ut_a(!bpage || buf_page_in_file(bpage));
949
950
		if (!bpage) {
951
952
			continue;
953
954
		} else if (flush_type == BUF_FLUSH_LRU && i != offset
955
			   && !buf_page_is_old(bpage)) {
956
957
			/* We avoid flushing 'non-old' blocks in an LRU flush,
958
			because the flushed blocks are soon freed */
959
960
			continue;
961
		} else {
962
963
			mutex_t* block_mutex = buf_page_get_mutex(bpage);
964
965
			mutex_enter(block_mutex);
966
967
			if (buf_flush_ready_for_flush(bpage, flush_type)
968
			    && (i == offset || !bpage->buf_fix_count)) {
969
				/* We only try to flush those
970
				neighbors != offset where the buf fix count is
971
				zero, as we then know that we probably can
972
				latch the page without a semaphore wait.
973
				Semaphore waits are expensive because we must
974
				flush the doublewrite buffer before we start
975
				waiting. */
976
977
				buf_pool_mutex_exit();
978
979
				mutex_exit(block_mutex);
980
981
				/* Note: as we release the buf_pool mutex
982
				above, in buf_flush_try_page we cannot be sure
983
				the page is still in a flushable state:
984
				therefore we check it again inside that
985
				function. */
986
987
				count += buf_flush_try_page(space, i,
988
							    flush_type);
989
990
				buf_pool_mutex_enter();
991
			} else {
992
				mutex_exit(block_mutex);
993
			}
994
		}
995
	}
996
997
	buf_pool_mutex_exit();
998
999
	return(count);
1000
}
1001
1002
/***********************************************************************
1003
This utility flushes dirty blocks from the end of the LRU list or flush_list.
1004
NOTE 1: in the case of an LRU flush the calling thread may own latches to
1005
pages: to avoid deadlocks, this function must be written so that it cannot
1006
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
1007
the calling thread is not allowed to own any latches on pages! */
1008
UNIV_INTERN
1009
ulint
1010
buf_flush_batch(
1011
/*============*/
1012
					/* out: number of blocks for which the
1013
					write request was queued;
1014
					ULINT_UNDEFINED if there was a flush
1015
					of the same type already running */
1016
	enum buf_flush	flush_type,	/* in: BUF_FLUSH_LRU or
1017
					BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
1018
					then the caller must not own any
1019
					latches on pages */
1020
	ulint		min_n,		/* in: wished minimum mumber of blocks
1021
					flushed (it is not guaranteed that the
1022
					actual number is that big, though) */
1023
	ib_uint64_t	lsn_limit)	/* in the case BUF_FLUSH_LIST all
1024
					blocks whose oldest_modification is
1025
					smaller than this should be flushed
1026
					(if their number does not exceed
1027
					min_n), otherwise ignored */
1028
{
1029
	buf_page_t*	bpage;
1030
	ulint		page_count	= 0;
1031
	ulint		old_page_count;
1032
	ulint		space;
1033
	ulint		offset;
1034
1035
	ut_ad((flush_type == BUF_FLUSH_LRU)
1036
	      || (flush_type == BUF_FLUSH_LIST));
1037
#ifdef UNIV_SYNC_DEBUG
1038
	ut_ad((flush_type != BUF_FLUSH_LIST)
1039
	      || sync_thread_levels_empty_gen(TRUE));
1040
#endif /* UNIV_SYNC_DEBUG */
1041
	buf_pool_mutex_enter();
1042
1043
	if ((buf_pool->n_flush[flush_type] > 0)
1044
	    || (buf_pool->init_flush[flush_type] == TRUE)) {
1045
1046
		/* There is already a flush batch of the same type running */
1047
1048
		buf_pool_mutex_exit();
1049
1050
		return(ULINT_UNDEFINED);
1051
	}
1052
1053
	buf_pool->init_flush[flush_type] = TRUE;
1054
779.3.32 by Monty Taylor
Undid bad solaris warning fix.
1055
	for (;;) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1056
flush_next:
1057
		/* If we have flushed enough, leave the loop */
1058
		if (page_count >= min_n) {
1059
1060
			break;
1061
		}
1062
1063
		/* Start from the end of the list looking for a suitable
1064
		block to be flushed. */
1065
1066
		if (flush_type == BUF_FLUSH_LRU) {
1067
			bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1068
		} else {
1069
			ut_ad(flush_type == BUF_FLUSH_LIST);
1070
1071
			bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1072
			if (!bpage
1073
			    || bpage->oldest_modification >= lsn_limit) {
1074
				/* We have flushed enough */
1075
1076
				break;
1077
			}
1078
			ut_ad(bpage->in_flush_list);
1079
		}
1080
1081
		/* Note that after finding a single flushable page, we try to
1082
		flush also all its neighbors, and after that start from the
1083
		END of the LRU list or flush list again: the list may change
1084
		during the flushing and we cannot safely preserve within this
1085
		function a pointer to a block in the list! */
1086
1087
		do {
1088
			mutex_t* block_mutex = buf_page_get_mutex(bpage);
1089
1090
			ut_a(buf_page_in_file(bpage));
1091
1092
			mutex_enter(block_mutex);
1093
1094
			if (buf_flush_ready_for_flush(bpage, flush_type)) {
1095
1096
				space = buf_page_get_space(bpage);
1097
				offset = buf_page_get_page_no(bpage);
1098
1099
				buf_pool_mutex_exit();
1100
				mutex_exit(block_mutex);
1101
1102
				old_page_count = page_count;
1103
1104
				/* Try to flush also all the neighbors */
1105
				page_count += buf_flush_try_neighbors(
1106
					space, offset, flush_type);
1107
				/* fprintf(stderr,
1108
				"Flush type %lu, page no %lu, neighb %lu\n",
1109
				flush_type, offset,
1110
				page_count - old_page_count); */
1111
1112
				buf_pool_mutex_enter();
1113
				goto flush_next;
1114
1115
			} else if (flush_type == BUF_FLUSH_LRU) {
1116
1117
				mutex_exit(block_mutex);
1118
1119
				bpage = UT_LIST_GET_PREV(LRU, bpage);
1120
			} else {
1121
				ut_ad(flush_type == BUF_FLUSH_LIST);
1122
1123
				mutex_exit(block_mutex);
1124
1125
				bpage = UT_LIST_GET_PREV(list, bpage);
1126
				ut_ad(!bpage || bpage->in_flush_list);
1127
			}
1128
		} while (bpage != NULL);
1129
779.3.32 by Monty Taylor
Undid bad solaris warning fix.
1130
		/* If we could not find anything to flush, leave the loop */
1131
1132
		break;
1133
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1134
	}
1135
1136
	buf_pool->init_flush[flush_type] = FALSE;
1137
1138
	if ((buf_pool->n_flush[flush_type] == 0)
1139
	    && (buf_pool->init_flush[flush_type] == FALSE)) {
1140
1141
		/* The running flush batch has ended */
1142
1143
		os_event_set(buf_pool->no_flush[flush_type]);
1144
	}
1145
1146
	buf_pool_mutex_exit();
1147
1148
	buf_flush_buffered_writes();
1149
1150
#ifdef UNIV_DEBUG
1151
	if (buf_debug_prints && page_count > 0) {
1152
		ut_a(flush_type == BUF_FLUSH_LRU
1153
		     || flush_type == BUF_FLUSH_LIST);
1154
		fprintf(stderr, flush_type == BUF_FLUSH_LRU
1155
			? "Flushed %lu pages in LRU flush\n"
1156
			: "Flushed %lu pages in flush list flush\n",
1157
			(ulong) page_count);
1158
	}
1159
#endif /* UNIV_DEBUG */
1160
1161
	srv_buf_pool_flushed += page_count;
1162
1163
	return(page_count);
1164
}
1165
1166
/**********************************************************************
1167
Waits until a flush batch of the given type ends */
1168
UNIV_INTERN
1169
void
1170
buf_flush_wait_batch_end(
1171
/*=====================*/
1172
	enum buf_flush	type)	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1173
{
1174
	ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
1175
1176
	os_event_wait(buf_pool->no_flush[type]);
1177
}
1178
1179
/**********************************************************************
1180
Gives a recommendation of how many blocks should be flushed to establish
1181
a big enough margin of replaceable blocks near the end of the LRU list
1182
and in the free list. */
1183
static
1184
ulint
1185
buf_flush_LRU_recommendation(void)
1186
/*==============================*/
1187
			/* out: number of blocks which should be flushed
1188
			from the end of the LRU list */
1189
{
1190
	buf_page_t*	bpage;
1191
	ulint		n_replaceable;
1192
	ulint		distance	= 0;
1193
1194
	buf_pool_mutex_enter();
1195
1196
	n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
1197
1198
	bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1199
1200
	while ((bpage != NULL)
1201
	       && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
1202
		   + BUF_FLUSH_EXTRA_MARGIN)
1203
	       && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
1204
1205
		mutex_t* block_mutex = buf_page_get_mutex(bpage);
1206
1207
		mutex_enter(block_mutex);
1208
1209
		if (buf_flush_ready_for_replace(bpage)) {
1210
			n_replaceable++;
1211
		}
1212
1213
		mutex_exit(block_mutex);
1214
1215
		distance++;
1216
1217
		bpage = UT_LIST_GET_PREV(LRU, bpage);
1218
	}
1219
1220
	buf_pool_mutex_exit();
1221
1222
	if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
1223
1224
		return(0);
1225
	}
1226
1227
	return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN
1228
	       - n_replaceable);
1229
}
1230
1231
/*************************************************************************
1232
Flushes pages from the end of the LRU list if there is too small a margin
1233
of replaceable pages there or in the free list. VERY IMPORTANT: this function
1234
is called also by threads which have locks on pages. To avoid deadlocks, we
1235
flush only pages such that the s-lock required for flushing can be acquired
1236
immediately, without waiting. */
1237
UNIV_INTERN
1238
void
1239
buf_flush_free_margin(void)
1240
/*=======================*/
1241
{
1242
	ulint	n_to_flush;
1243
	ulint	n_flushed;
1244
1245
	n_to_flush = buf_flush_LRU_recommendation();
1246
1247
	if (n_to_flush > 0) {
1248
		n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
1249
		if (n_flushed == ULINT_UNDEFINED) {
1250
			/* There was an LRU type flush batch already running;
1251
			let us wait for it to end */
1252
1253
			buf_flush_wait_batch_end(BUF_FLUSH_LRU);
1254
		}
1255
	}
1256
}
1257
1258
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1259
/**********************************************************************
1260
Validates the flush list. */
1261
static
1262
ibool
1263
buf_flush_validate_low(void)
1264
/*========================*/
1265
		/* out: TRUE if ok */
1266
{
1267
	buf_page_t*	bpage;
1268
1269
	UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list);
1270
1271
	bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1272
1273
	while (bpage != NULL) {
1274
		const ib_uint64_t om = bpage->oldest_modification;
1275
		ut_ad(bpage->in_flush_list);
1276
		ut_a(buf_page_in_file(bpage));
1277
		ut_a(om > 0);
1278
1279
		bpage = UT_LIST_GET_NEXT(list, bpage);
1280
1281
		ut_a(!bpage || om >= bpage->oldest_modification);
1282
	}
1283
1284
	return(TRUE);
1285
}
1286
1287
/**********************************************************************
1288
Validates the flush list. */
1289
UNIV_INTERN
1290
ibool
1291
buf_flush_validate(void)
1292
/*====================*/
1293
		/* out: TRUE if ok */
1294
{
1295
	ibool	ret;
1296
1297
	buf_pool_mutex_enter();
1298
1299
	ret = buf_flush_validate_low();
1300
1301
	buf_pool_mutex_exit();
1302
1303
	return(ret);
1304
}
1305
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */