27
27
/* There must be at least this many pages in buf_pool in the area to start
28
28
a random read-ahead */
29
#define BUF_READ_AHEAD_RANDOM_THRESHOLD (5 + BUF_READ_AHEAD_RANDOM_AREA / 8)
29
#define BUF_READ_AHEAD_RANDOM_THRESHOLD (5 + buf_read_ahead_random_area / 8)
31
31
/* The linear read-ahead area size */
32
32
#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
34
34
/* The linear read-ahead threshold */
35
#define BUF_READ_AHEAD_LINEAR_THRESHOLD (3 * BUF_READ_AHEAD_LINEAR_AREA / 8)
35
#define LINEAR_AREA_THRESHOLD_COEF 5 / 8
37
37
/* If there are buf_pool->curr_size per the number below pending reads, then
38
38
read-ahead is not done: this is to prevent flooding the buffer pool with
61
61
ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
62
62
at read-ahead functions) */
63
63
ulint space, /* in: space id */
64
ib_longlong tablespace_version, /* in: if the space memory object has
64
ulint zip_size,/* in: compressed page size, or 0 */
65
ibool unzip, /* in: TRUE=request uncompressed page */
66
ib_int64_t tablespace_version, /* in: if the space memory object has
65
67
this timestamp different from what we are giving here,
66
68
treat the tablespace as dropped; this is a timestamp we
67
69
use to stop dangling page reads from a tablespace
68
70
which we have DISCARDed + IMPORTed back */
69
71
ulint offset) /* in: page number */
95
if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
97
if (ibuf_bitmap_page(zip_size, offset)
98
|| trx_sys_hdr_page(space, offset)) {
97
100
/* Trx sys header is so low in the latching order that we play
98
101
safe and do not leave the i/o-completion to an asynchronous
107
110
or is being dropped; if we succeed in initing the page in the buffer
108
111
pool for read, then DISCARD cannot proceed until the read has
110
block = buf_page_init_for_read(err, mode, space, tablespace_version,
113
bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
114
tablespace_version, offset);
126
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
128
*err = fil_io(OS_FILE_READ | wake_later,
130
offset, 0, UNIV_PAGE_SIZE,
131
(void*)block->frame, (void*)block);
129
ut_ad(buf_page_in_file(bpage));
132
*err = fil_io(OS_FILE_READ | wake_later,
133
sync, space, zip_size, offset, 0, zip_size,
134
bpage->zip.data, bpage);
136
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
138
*err = fil_io(OS_FILE_READ | wake_later,
139
sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
140
((buf_block_t*) bpage)->frame, bpage);
132
142
ut_a(*err == DB_SUCCESS);
135
145
/* The i/o is already completed when we arrive from
137
buf_page_io_complete(block);
147
buf_page_io_complete(bpage);
159
169
the page at the given page number does not get
160
170
read even if we return a value > 0! */
161
171
ulint space, /* in: space id */
172
ulint zip_size,/* in: compressed page size in bytes, or 0 */
162
173
ulint offset) /* in: page number of a page which the current thread
163
174
wants to access */
165
ib_longlong tablespace_version;
176
ib_int64_t tablespace_version;
167
177
ulint recent_blocks = 0;
169
179
ulint LRU_recent_limit;
184
ulint buf_read_ahead_random_area;
175
186
if (srv_startup_is_before_trx_rollback_phase) {
176
187
/* No read-ahead to avoid thread deadlocks */
180
if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
191
if (ibuf_bitmap_page(zip_size, offset)
192
|| trx_sys_hdr_page(space, offset)) {
182
194
/* If it is an ibuf bitmap page or trx sys hdr, we do
183
195
no read-ahead, as that could break the ibuf page access
193
205
tablespace_version = fil_space_get_version(space);
195
low = (offset / BUF_READ_AHEAD_RANDOM_AREA)
196
* BUF_READ_AHEAD_RANDOM_AREA;
197
high = (offset / BUF_READ_AHEAD_RANDOM_AREA + 1)
198
* BUF_READ_AHEAD_RANDOM_AREA;
207
buf_read_ahead_random_area = BUF_READ_AHEAD_RANDOM_AREA;
209
low = (offset / buf_read_ahead_random_area)
210
* buf_read_ahead_random_area;
211
high = (offset / buf_read_ahead_random_area + 1)
212
* buf_read_ahead_random_area;
199
213
if (high > fil_space_get_size(space)) {
201
215
high = fil_space_get_size(space);
208
222
LRU_recent_limit = buf_LRU_get_recent_limit();
210
mutex_enter(&(buf_pool->mutex));
224
buf_pool_mutex_enter();
212
226
if (buf_pool->n_pend_reads
213
227
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
214
mutex_exit(&(buf_pool->mutex));
228
buf_pool_mutex_exit();
220
234
that is, reside near the start of the LRU list. */
222
236
for (i = low; i < high; i++) {
223
block = buf_page_hash_get(space, i);
237
const buf_page_t* bpage = buf_page_hash_get(space, i);
226
&& (block->LRU_position > LRU_recent_limit)
227
&& block->accessed) {
240
&& buf_page_is_accessed(bpage)
241
&& (buf_page_get_LRU_position(bpage) > LRU_recent_limit)) {
245
if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
247
buf_pool_mutex_exit();
233
mutex_exit(&(buf_pool->mutex));
235
if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) {
253
buf_pool_mutex_exit();
241
258
/* Read all the suitable blocks within the area */
243
260
if (ibuf_inside()) {
252
269
/* It is only sensible to do read-ahead in the non-sync aio
253
270
mode: hence FALSE as the first parameter */
255
if (!ibuf_bitmap_page(i)) {
272
if (!ibuf_bitmap_page(zip_size, i)) {
256
273
count += buf_read_page_low(
258
275
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
259
space, tablespace_version, i);
276
space, zip_size, FALSE,
277
tablespace_version, i);
260
278
if (err == DB_TABLESPACE_DELETED) {
261
279
ut_print_timestamp(stderr);
295
313
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
296
314
released by the i/o-handler thread. Does a random read-ahead if it seems
302
320
/* out: number of page read requests issued: this can
303
321
be > 1 if read-ahead occurred */
304
322
ulint space, /* in: space id */
323
ulint zip_size,/* in: compressed page size in bytes, or 0 */
305
324
ulint offset) /* in: page number */
307
ib_longlong tablespace_version;
326
ib_int64_t tablespace_version;
312
331
tablespace_version = fil_space_get_version(space);
314
count = buf_read_ahead_random(space, offset);
333
count = buf_read_ahead_random(space, zip_size, offset);
316
335
/* We do the i/o in the synchronous aio mode to save thread
317
336
switches: hence TRUE */
319
338
count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
320
340
tablespace_version, offset);
321
341
srv_buf_pool_reads+= count2;
322
342
if (err == DB_TABLESPACE_DELETED) {
358
381
NOTE 3: the calling thread must want access to the page given: this rule is
359
382
set to prevent unintended read-aheads performed by ibuf routines, a situation
360
383
which could result in a deadlock if the OS does not support asynchronous io. */
363
386
buf_read_ahead_linear(
364
387
/*==================*/
365
388
/* out: number of page read requests issued */
366
389
ulint space, /* in: space id */
390
ulint zip_size,/* in: compressed page size in bytes, or 0 */
367
391
ulint offset) /* in: page number of a page; NOTE: the current thread
368
392
must want access to this page (see NOTE 3 above) */
370
ib_longlong tablespace_version;
394
ib_int64_t tablespace_version;
372
396
buf_frame_t* frame;
373
buf_block_t* pred_block = NULL;
397
buf_page_t* pred_bpage = NULL;
374
398
ulint pred_offset;
375
399
ulint succ_offset;
408
const ulint buf_read_ahead_linear_area
409
= BUF_READ_AHEAD_LINEAR_AREA;
385
if (srv_startup_is_before_trx_rollback_phase) {
411
if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
386
412
/* No read-ahead to avoid thread deadlocks */
390
if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
392
/* If it is an ibuf bitmap page or trx sys hdr, we do
393
no read-ahead, as that could break the ibuf page access
399
low = (offset / BUF_READ_AHEAD_LINEAR_AREA)
400
* BUF_READ_AHEAD_LINEAR_AREA;
401
high = (offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
402
* BUF_READ_AHEAD_LINEAR_AREA;
416
low = (offset / buf_read_ahead_linear_area)
417
* buf_read_ahead_linear_area;
418
high = (offset / buf_read_ahead_linear_area + 1)
419
* buf_read_ahead_linear_area;
404
421
if ((offset != low) && (offset != high - 1)) {
405
422
/* This is not a border page of the area: return */
427
if (ibuf_bitmap_page(zip_size, offset)
428
|| trx_sys_hdr_page(space, offset)) {
430
/* If it is an ibuf bitmap page or trx sys hdr, we do
431
no read-ahead, as that could break the ibuf page access
410
437
/* Remember the tablespace version before we ask te tablespace size
411
438
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
412
439
do not try to read outside the bounds of the tablespace! */
414
441
tablespace_version = fil_space_get_version(space);
416
mutex_enter(&(buf_pool->mutex));
443
buf_pool_mutex_enter();
418
445
if (high > fil_space_get_size(space)) {
419
mutex_exit(&(buf_pool->mutex));
446
buf_pool_mutex_exit();
420
447
/* The area is not whole, return */
444
471
for (i = low; i < high; i++) {
445
block = buf_page_hash_get(space, i);
472
bpage = buf_page_hash_get(space, i);
447
if ((block == NULL) || !block->accessed) {
474
if ((bpage == NULL) || !buf_page_is_accessed(bpage)) {
448
475
/* Not accessed */
451
} else if (pred_block
452
&& (ut_ulint_cmp(block->LRU_position,
453
pred_block->LRU_position)
478
} else if (pred_bpage
480
buf_page_get_LRU_position(bpage),
481
buf_page_get_LRU_position(pred_bpage))
454
482
!= asc_or_desc)) {
455
483
/* Accesses not in the right order */
462
if (fail_count > BUF_READ_AHEAD_LINEAR_AREA
463
- BUF_READ_AHEAD_LINEAR_THRESHOLD) {
490
if (fail_count > buf_read_ahead_linear_area
491
* LINEAR_AREA_THRESHOLD_COEF) {
464
492
/* Too many failures: return */
466
mutex_exit(&(buf_pool->mutex));
494
buf_pool_mutex_exit();
471
499
/* If we got this far, we know that enough pages in the area have
472
500
been accessed in the right order: linear read-ahead can be sensible */
474
block = buf_page_hash_get(space, offset);
502
bpage = buf_page_hash_get(space, offset);
477
mutex_exit(&(buf_pool->mutex));
505
buf_pool_mutex_exit();
482
frame = block->frame;
510
switch (buf_page_get_state(bpage)) {
511
case BUF_BLOCK_ZIP_PAGE:
512
frame = bpage->zip.data;
514
case BUF_BLOCK_FILE_PAGE:
515
frame = ((buf_block_t*) bpage)->frame;
484
522
/* Read the natural predecessor and successor page addresses from
485
523
the page; NOTE that because the calling thread may have an x-latch
510
low = (new_offset / BUF_READ_AHEAD_LINEAR_AREA)
511
* BUF_READ_AHEAD_LINEAR_AREA;
512
high = (new_offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
513
* BUF_READ_AHEAD_LINEAR_AREA;
548
low = (new_offset / buf_read_ahead_linear_area)
549
* buf_read_ahead_linear_area;
550
high = (new_offset / buf_read_ahead_linear_area + 1)
551
* buf_read_ahead_linear_area;
515
553
if ((new_offset != low) && (new_offset != high - 1)) {
516
554
/* This is not a border page of the area: return */
544
582
/* It is only sensible to do read-ahead in the non-sync
545
583
aio mode: hence FALSE as the first parameter */
547
if (!ibuf_bitmap_page(i)) {
585
if (!ibuf_bitmap_page(zip_size, i)) {
548
586
count += buf_read_page_low(
550
588
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
551
space, tablespace_version, i);
589
space, zip_size, FALSE, tablespace_version, i);
552
590
if (err == DB_TABLESPACE_DELETED) {
553
591
ut_print_timestamp(stderr);
587
629
Issues read requests for pages which the ibuf module wants to read in, in
588
630
order to contract the insert buffer tree. Technically, this function is like
589
631
a read-ahead function. */
592
634
buf_read_ibuf_merge_pages(
593
635
/*======================*/
594
ibool sync, /* in: TRUE if the caller wants this function
595
to wait for the highest address page to get
596
read in, before this function returns */
597
ulint* space_ids, /* in: array of space ids */
598
ib_longlong* space_versions,/* in: the spaces must have this version
599
number (timestamp), otherwise we discard the
600
read; we use this to cancel reads if
601
DISCARD + IMPORT may have changed the
603
ulint* page_nos, /* in: array of page numbers to read, with the
604
highest page number the last in the array */
605
ulint n_stored) /* in: number of page numbers in the array */
636
ibool sync, /* in: TRUE if the caller
637
wants this function to wait
638
for the highest address page
639
to get read in, before this
641
const ulint* space_ids, /* in: array of space ids */
642
const ib_int64_t* space_versions,/* in: the spaces must have
644
(timestamp), otherwise we
645
discard the read; we use this
646
to cancel reads if DISCARD +
647
IMPORT may have changed the
649
const ulint* page_nos, /* in: array of page numbers
650
to read, with the highest page
651
number the last in the
653
ulint n_stored) /* in: number of elements
610
658
ut_ad(!ibuf_inside());
619
667
for (i = 0; i < n_stored; i++) {
620
buf_read_page_low(&err,
621
(i + 1 == n_stored) && sync,
623
space_ids[i], space_versions[i],
668
ulint zip_size = fil_space_get_zip_size(space_ids[i]);
671
if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
673
goto tablespace_deleted;
676
buf_read_page_low(&err, sync && (i + 1 == n_stored),
677
BUF_READ_ANY_PAGE, space_ids[i],
678
zip_size, TRUE, space_versions[i],
626
if (err == DB_TABLESPACE_DELETED) {
681
if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
627
683
/* We have deleted or are deleting the single-table
628
684
tablespace: remove the entries for that page */
630
686
ibuf_merge_or_delete_for_page(NULL, space_ids[i],
649
706
/************************************************************************
650
707
Issues read requests for pages which recovery wants to read in. */
653
710
buf_read_recv_pages(
654
711
/*================*/
655
ibool sync, /* in: TRUE if the caller wants this function
656
to wait for the highest address page to get
657
read in, before this function returns */
658
ulint space, /* in: space id */
659
ulint* page_nos, /* in: array of page numbers to read, with the
660
highest page number the last in the array */
661
ulint n_stored) /* in: number of page numbers in the array */
712
ibool sync, /* in: TRUE if the caller
713
wants this function to wait
714
for the highest address page
715
to get read in, before this
717
ulint space, /* in: space id */
718
ulint zip_size, /* in: compressed page size in
720
const ulint* page_nos, /* in: array of page numbers
721
to read, with the highest page
722
number the last in the
724
ulint n_stored) /* in: number of page numbers
663
ib_longlong tablespace_version;
727
ib_int64_t tablespace_version;
732
zip_size = fil_space_get_zip_size(space);
668
733
tablespace_version = fil_space_get_version(space);
670
735
for (i = 0; i < n_stored; i++) {
698
763
os_aio_print_debug = FALSE;
700
765
if ((i + 1 == n_stored) && sync) {
701
buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE,
702
space, tablespace_version,
766
buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
767
zip_size, TRUE, tablespace_version,
705
770
buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
706
771
| OS_AIO_SIMULATED_WAKE_LATER,
707
space, tablespace_version,
772
space, zip_size, TRUE,
773
tablespace_version, page_nos[i]);