27
28
/* There must be at least this many pages in buf_pool in the area to start
28
29
a random read-ahead */
29
#define BUF_READ_AHEAD_RANDOM_THRESHOLD (5 + BUF_READ_AHEAD_RANDOM_AREA / 8)
30
#define BUF_READ_AHEAD_RANDOM_THRESHOLD (5 + buf_read_ahead_random_area / 8)
31
32
/* The linear read-ahead area size */
32
33
#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
34
35
/* The linear read-ahead threshold */
35
#define BUF_READ_AHEAD_LINEAR_THRESHOLD (3 * BUF_READ_AHEAD_LINEAR_AREA / 8)
36
#define LINEAR_AREA_THRESHOLD_COEF 5 / 8
37
38
/* If there are buf_pool->curr_size per the number below pending reads, then
38
39
read-ahead is not done: this is to prevent flooding the buffer pool with
61
62
ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
62
63
at read-ahead functions) */
63
64
ulint space, /* in: space id */
64
ib_longlong tablespace_version, /* in: if the space memory object has
65
ulint zip_size,/* in: compressed page size, or 0 */
66
ibool unzip, /* in: TRUE=request uncompressed page */
67
ib_int64_t tablespace_version, /* in: if the space memory object has
65
68
this timestamp different from what we are giving here,
66
69
treat the tablespace as dropped; this is a timestamp we
67
70
use to stop dangling page reads from a tablespace
68
71
which we have DISCARDed + IMPORTed back */
69
72
ulint offset) /* in: page number */
95
if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
98
if (ibuf_bitmap_page(zip_size, offset)
99
|| trx_sys_hdr_page(space, offset)) {
97
101
/* Trx sys header is so low in the latching order that we play
98
102
safe and do not leave the i/o-completion to an asynchronous
107
111
or is being dropped; if we succeed in initing the page in the buffer
108
112
pool for read, then DISCARD cannot proceed until the read has
110
block = buf_page_init_for_read(err, mode, space, tablespace_version,
114
bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
115
tablespace_version, offset);
126
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
128
*err = fil_io(OS_FILE_READ | wake_later,
130
offset, 0, UNIV_PAGE_SIZE,
131
(void*)block->frame, (void*)block);
130
ut_ad(buf_page_in_file(bpage));
133
*err = fil_io(OS_FILE_READ | wake_later,
134
sync, space, zip_size, offset, 0, zip_size,
135
bpage->zip.data, bpage);
137
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
139
*err = fil_io(OS_FILE_READ | wake_later,
140
sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
141
((buf_block_t*) bpage)->frame, bpage);
132
143
ut_a(*err == DB_SUCCESS);
135
146
/* The i/o is already completed when we arrive from
137
buf_page_io_complete(block);
148
buf_page_io_complete(bpage);
159
170
the page at the given page number does not get
160
171
read even if we return a value > 0! */
161
172
ulint space, /* in: space id */
173
ulint zip_size,/* in: compressed page size in bytes, or 0 */
162
174
ulint offset) /* in: page number of a page which the current thread
163
175
wants to access */
165
ib_longlong tablespace_version;
177
ib_int64_t tablespace_version;
167
178
ulint recent_blocks = 0;
169
180
ulint LRU_recent_limit;
185
ulint buf_read_ahead_random_area;
175
187
if (srv_startup_is_before_trx_rollback_phase) {
176
188
/* No read-ahead to avoid thread deadlocks */
180
if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
192
if (ibuf_bitmap_page(zip_size, offset)
193
|| trx_sys_hdr_page(space, offset)) {
182
195
/* If it is an ibuf bitmap page or trx sys hdr, we do
183
196
no read-ahead, as that could break the ibuf page access
193
206
tablespace_version = fil_space_get_version(space);
195
low = (offset / BUF_READ_AHEAD_RANDOM_AREA)
196
* BUF_READ_AHEAD_RANDOM_AREA;
197
high = (offset / BUF_READ_AHEAD_RANDOM_AREA + 1)
198
* BUF_READ_AHEAD_RANDOM_AREA;
208
buf_read_ahead_random_area = BUF_READ_AHEAD_RANDOM_AREA;
210
low = (offset / buf_read_ahead_random_area)
211
* buf_read_ahead_random_area;
212
high = (offset / buf_read_ahead_random_area + 1)
213
* buf_read_ahead_random_area;
199
214
if (high > fil_space_get_size(space)) {
201
216
high = fil_space_get_size(space);
208
223
LRU_recent_limit = buf_LRU_get_recent_limit();
210
mutex_enter(&(buf_pool->mutex));
225
buf_pool_mutex_enter();
212
227
if (buf_pool->n_pend_reads
213
228
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
214
mutex_exit(&(buf_pool->mutex));
229
buf_pool_mutex_exit();
220
235
that is, reside near the start of the LRU list. */
222
237
for (i = low; i < high; i++) {
223
block = buf_page_hash_get(space, i);
238
const buf_page_t* bpage = buf_page_hash_get(space, i);
226
&& (block->LRU_position > LRU_recent_limit)
227
&& block->accessed) {
241
&& buf_page_is_accessed(bpage)
242
&& (buf_page_get_LRU_position(bpage) > LRU_recent_limit)) {
246
if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
248
buf_pool_mutex_exit();
233
mutex_exit(&(buf_pool->mutex));
235
if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) {
254
buf_pool_mutex_exit();
241
259
/* Read all the suitable blocks within the area */
243
261
if (ibuf_inside()) {
252
270
/* It is only sensible to do read-ahead in the non-sync aio
253
271
mode: hence FALSE as the first parameter */
255
if (!ibuf_bitmap_page(i)) {
273
if (!ibuf_bitmap_page(zip_size, i)) {
256
274
count += buf_read_page_low(
258
276
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
259
space, tablespace_version, i);
277
space, zip_size, FALSE,
278
tablespace_version, i);
260
279
if (err == DB_TABLESPACE_DELETED) {
261
280
ut_print_timestamp(stderr);
295
314
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
296
315
released by the i/o-handler thread. Does a random read-ahead if it seems
302
321
/* out: number of page read requests issued: this can
303
322
be > 1 if read-ahead occurred */
304
323
ulint space, /* in: space id */
324
ulint zip_size,/* in: compressed page size in bytes, or 0 */
305
325
ulint offset) /* in: page number */
307
ib_longlong tablespace_version;
327
ib_int64_t tablespace_version;
312
332
tablespace_version = fil_space_get_version(space);
314
count = buf_read_ahead_random(space, offset);
334
count = buf_read_ahead_random(space, zip_size, offset);
316
336
/* We do the i/o in the synchronous aio mode to save thread
317
337
switches: hence TRUE */
319
339
count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
320
341
tablespace_version, offset);
321
342
srv_buf_pool_reads+= count2;
322
343
if (err == DB_TABLESPACE_DELETED) {
358
382
NOTE 3: the calling thread must want access to the page given: this rule is
359
383
set to prevent unintended read-aheads performed by ibuf routines, a situation
360
384
which could result in a deadlock if the OS does not support asynchronous io. */
363
387
buf_read_ahead_linear(
364
388
/*==================*/
365
389
/* out: number of page read requests issued */
366
390
ulint space, /* in: space id */
391
ulint zip_size,/* in: compressed page size in bytes, or 0 */
367
392
ulint offset) /* in: page number of a page; NOTE: the current thread
368
393
must want access to this page (see NOTE 3 above) */
370
ib_longlong tablespace_version;
395
ib_int64_t tablespace_version;
372
397
buf_frame_t* frame;
373
buf_block_t* pred_block = NULL;
398
buf_page_t* pred_bpage = NULL;
374
399
ulint pred_offset;
375
400
ulint succ_offset;
409
const ulint buf_read_ahead_linear_area
410
= BUF_READ_AHEAD_LINEAR_AREA;
385
if (srv_startup_is_before_trx_rollback_phase) {
412
if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
386
413
/* No read-ahead to avoid thread deadlocks */
390
if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
392
/* If it is an ibuf bitmap page or trx sys hdr, we do
393
no read-ahead, as that could break the ibuf page access
399
low = (offset / BUF_READ_AHEAD_LINEAR_AREA)
400
* BUF_READ_AHEAD_LINEAR_AREA;
401
high = (offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
402
* BUF_READ_AHEAD_LINEAR_AREA;
417
low = (offset / buf_read_ahead_linear_area)
418
* buf_read_ahead_linear_area;
419
high = (offset / buf_read_ahead_linear_area + 1)
420
* buf_read_ahead_linear_area;
404
422
if ((offset != low) && (offset != high - 1)) {
405
423
/* This is not a border page of the area: return */
428
if (ibuf_bitmap_page(zip_size, offset)
429
|| trx_sys_hdr_page(space, offset)) {
431
/* If it is an ibuf bitmap page or trx sys hdr, we do
432
no read-ahead, as that could break the ibuf page access
410
438
/* Remember the tablespace version before we ask te tablespace size
411
439
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
412
440
do not try to read outside the bounds of the tablespace! */
414
442
tablespace_version = fil_space_get_version(space);
416
mutex_enter(&(buf_pool->mutex));
444
buf_pool_mutex_enter();
418
446
if (high > fil_space_get_size(space)) {
419
mutex_exit(&(buf_pool->mutex));
447
buf_pool_mutex_exit();
420
448
/* The area is not whole, return */
444
472
for (i = low; i < high; i++) {
445
block = buf_page_hash_get(space, i);
473
bpage = buf_page_hash_get(space, i);
447
if ((block == NULL) || !block->accessed) {
475
if ((bpage == NULL) || !buf_page_is_accessed(bpage)) {
448
476
/* Not accessed */
451
} else if (pred_block
452
&& (ut_ulint_cmp(block->LRU_position,
453
pred_block->LRU_position)
479
} else if (pred_bpage
481
buf_page_get_LRU_position(bpage),
482
buf_page_get_LRU_position(pred_bpage))
454
483
!= asc_or_desc)) {
455
484
/* Accesses not in the right order */
462
if (fail_count > BUF_READ_AHEAD_LINEAR_AREA
463
- BUF_READ_AHEAD_LINEAR_THRESHOLD) {
491
if (fail_count > buf_read_ahead_linear_area
492
* LINEAR_AREA_THRESHOLD_COEF) {
464
493
/* Too many failures: return */
466
mutex_exit(&(buf_pool->mutex));
495
buf_pool_mutex_exit();
471
500
/* If we got this far, we know that enough pages in the area have
472
501
been accessed in the right order: linear read-ahead can be sensible */
474
block = buf_page_hash_get(space, offset);
503
bpage = buf_page_hash_get(space, offset);
477
mutex_exit(&(buf_pool->mutex));
506
buf_pool_mutex_exit();
482
frame = block->frame;
511
switch (buf_page_get_state(bpage)) {
512
case BUF_BLOCK_ZIP_PAGE:
513
frame = bpage->zip.data;
515
case BUF_BLOCK_FILE_PAGE:
516
frame = ((buf_block_t*) bpage)->frame;
484
523
/* Read the natural predecessor and successor page addresses from
485
524
the page; NOTE that because the calling thread may have an x-latch
510
low = (new_offset / BUF_READ_AHEAD_LINEAR_AREA)
511
* BUF_READ_AHEAD_LINEAR_AREA;
512
high = (new_offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
513
* BUF_READ_AHEAD_LINEAR_AREA;
549
low = (new_offset / buf_read_ahead_linear_area)
550
* buf_read_ahead_linear_area;
551
high = (new_offset / buf_read_ahead_linear_area + 1)
552
* buf_read_ahead_linear_area;
515
554
if ((new_offset != low) && (new_offset != high - 1)) {
516
555
/* This is not a border page of the area: return */
544
583
/* It is only sensible to do read-ahead in the non-sync
545
584
aio mode: hence FALSE as the first parameter */
547
if (!ibuf_bitmap_page(i)) {
586
if (!ibuf_bitmap_page(zip_size, i)) {
548
587
count += buf_read_page_low(
550
589
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
551
space, tablespace_version, i);
590
space, zip_size, FALSE, tablespace_version, i);
552
591
if (err == DB_TABLESPACE_DELETED) {
553
592
ut_print_timestamp(stderr);
587
630
Issues read requests for pages which the ibuf module wants to read in, in
588
631
order to contract the insert buffer tree. Technically, this function is like
589
632
a read-ahead function. */
592
635
buf_read_ibuf_merge_pages(
593
636
/*======================*/
594
ibool sync, /* in: TRUE if the caller wants this function
595
to wait for the highest address page to get
596
read in, before this function returns */
597
ulint* space_ids, /* in: array of space ids */
598
ib_longlong* space_versions,/* in: the spaces must have this version
599
number (timestamp), otherwise we discard the
600
read; we use this to cancel reads if
601
DISCARD + IMPORT may have changed the
603
ulint* page_nos, /* in: array of page numbers to read, with the
604
highest page number the last in the array */
605
ulint n_stored) /* in: number of page numbers in the array */
637
ibool sync, /* in: TRUE if the caller
638
wants this function to wait
639
for the highest address page
640
to get read in, before this
642
const ulint* space_ids, /* in: array of space ids */
643
const ib_int64_t* space_versions,/* in: the spaces must have
645
(timestamp), otherwise we
646
discard the read; we use this
647
to cancel reads if DISCARD +
648
IMPORT may have changed the
650
const ulint* page_nos, /* in: array of page numbers
651
to read, with the highest page
652
number the last in the
654
ulint n_stored) /* in: number of elements
610
659
ut_ad(!ibuf_inside());
619
668
for (i = 0; i < n_stored; i++) {
620
buf_read_page_low(&err,
621
(i + 1 == n_stored) && sync,
623
space_ids[i], space_versions[i],
669
ulint zip_size = fil_space_get_zip_size(space_ids[i]);
672
if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
674
goto tablespace_deleted;
677
buf_read_page_low(&err, sync && (i + 1 == n_stored),
678
BUF_READ_ANY_PAGE, space_ids[i],
679
zip_size, TRUE, space_versions[i],
626
if (err == DB_TABLESPACE_DELETED) {
682
if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
627
684
/* We have deleted or are deleting the single-table
628
685
tablespace: remove the entries for that page */
630
687
ibuf_merge_or_delete_for_page(NULL, space_ids[i],
649
707
/************************************************************************
650
708
Issues read requests for pages which recovery wants to read in. */
653
711
buf_read_recv_pages(
654
712
/*================*/
655
ibool sync, /* in: TRUE if the caller wants this function
656
to wait for the highest address page to get
657
read in, before this function returns */
658
ulint space, /* in: space id */
659
ulint* page_nos, /* in: array of page numbers to read, with the
660
highest page number the last in the array */
661
ulint n_stored) /* in: number of page numbers in the array */
713
ibool sync, /* in: TRUE if the caller
714
wants this function to wait
715
for the highest address page
716
to get read in, before this
718
ulint space, /* in: space id */
719
ulint zip_size, /* in: compressed page size in
721
const ulint* page_nos, /* in: array of page numbers
722
to read, with the highest page
723
number the last in the
725
ulint n_stored) /* in: number of page numbers
663
ib_longlong tablespace_version;
728
ib_int64_t tablespace_version;
733
zip_size = fil_space_get_zip_size(space);
668
734
tablespace_version = fil_space_get_version(space);
670
736
for (i = 0; i < n_stored; i++) {
698
764
os_aio_print_debug = FALSE;
700
766
if ((i + 1 == n_stored) && sync) {
701
buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE,
702
space, tablespace_version,
767
buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
768
zip_size, TRUE, tablespace_version,
705
771
buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
706
772
| OS_AIO_SIMULATED_WAKE_LATER,
707
space, tablespace_version,
773
space, zip_size, TRUE,
774
tablespace_version, page_nos[i]);