38
21
#include "srv0start.h"
39
22
#include "srv0srv.h"
41
/** The linear read-ahead area size */
24
/* The size in blocks of the area where the random read-ahead algorithm counts
25
the accessed pages when deciding whether to read-ahead */
26
#define BUF_READ_AHEAD_RANDOM_AREA BUF_READ_AHEAD_AREA
28
/* There must be at least this many pages in buf_pool in the area to start
29
a random read-ahead */
30
#define BUF_READ_AHEAD_RANDOM_THRESHOLD (5 + buf_read_ahead_random_area / 8)
32
/* The linear read-ahead area size */
42
33
#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
44
/** If there are buf_pool->curr_size per the number below pending reads, then
35
/* The linear read-ahead threshold */
36
#define LINEAR_AREA_THRESHOLD_COEF 5 / 8
38
/* If there are buf_pool->curr_size per the number below pending reads, then
45
39
read-ahead is not done: this is to prevent flooding the buffer pool with
46
40
i/o-fixed buffer blocks */
47
41
#define BUF_READ_AHEAD_PEND_LIMIT 2
49
/********************************************************************//**
43
/************************************************************************
50
44
Low-level function which reads a page asynchronously from a file to the
51
45
buffer buf_pool if it is not already there, in which case does nothing.
52
46
Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
53
flag is cleared and the x-lock released by an i/o-handler thread.
54
@return 1 if a read request was queued, 0 if the page already resided
55
in buf_pool, or if the page is in the doublewrite buffer blocks in
56
which case it is never read into the pool, or if the tablespace does
57
not exist or is being dropped
58
@return 1 if read request is issued. 0 if it is not */
47
flag is cleared and the x-lock released by an i/o-handler thread. */
63
ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
52
/* out: 1 if a read request was queued, 0 if the page
53
already resided in buf_pool, or if the page is in
54
the doublewrite buffer blocks in which case it is never
55
read into the pool, or if the tablespace does not
56
exist or is being dropped */
57
ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
64
58
trying to read from a non-existent tablespace, or a
65
59
tablespace which is just now being dropped */
66
ibool sync, /*!< in: TRUE if synchronous aio is desired */
67
ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
60
ibool sync, /* in: TRUE if synchronous aio is desired */
61
ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ...,
68
62
ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
69
63
at read-ahead functions) */
70
ulint space, /*!< in: space id */
71
ulint zip_size,/*!< in: compressed page size, or 0 */
72
ibool unzip, /*!< in: TRUE=request uncompressed page */
73
ib_int64_t tablespace_version, /*!< in: if the space memory object has
64
ulint space, /* in: space id */
65
ulint zip_size,/* in: compressed page size, or 0 */
66
ibool unzip, /* in: TRUE=request uncompressed page */
67
ib_int64_t tablespace_version, /* in: if the space memory object has
74
68
this timestamp different from what we are giving here,
75
69
treat the tablespace as dropped; this is a timestamp we
76
70
use to stop dangling page reads from a tablespace
77
71
which we have DISCARDed + IMPORTed back */
78
ulint offset) /*!< in: page number */
72
ulint offset) /* in: page number */
160
/********************************************************************//**
154
/************************************************************************
155
Applies a random read-ahead in buf_pool if there are at least a threshold
156
value of accessed pages from the random read-ahead area. Does not read any
157
page, not even the one at the position (space, offset), if the read-ahead
158
mechanism is not activated. NOTE 1: the calling thread may own latches on
159
pages: to avoid deadlocks this function must be written such that it cannot
160
end up waiting for these latches! NOTE 2: the calling thread must want
161
access to the page given: this rule is set to prevent unintended read-aheads
162
performed by ibuf routines, a situation which could result in a deadlock if
163
the OS does not support asynchronous i/o. */
166
buf_read_ahead_random(
167
/*==================*/
168
/* out: number of page read requests issued; NOTE
169
that if we read ibuf pages, it may happen that
170
the page at the given page number does not get
171
read even if we return a value > 0! */
172
ulint space, /* in: space id */
173
ulint zip_size,/* in: compressed page size in bytes, or 0 */
174
ulint offset) /* in: page number of a page which the current thread
177
ib_int64_t tablespace_version;
178
ulint recent_blocks = 0;
180
ulint LRU_recent_limit;
185
ulint buf_read_ahead_random_area;
187
if (srv_startup_is_before_trx_rollback_phase) {
188
/* No read-ahead to avoid thread deadlocks */
192
if (ibuf_bitmap_page(zip_size, offset)
193
|| trx_sys_hdr_page(space, offset)) {
195
/* If it is an ibuf bitmap page or trx sys hdr, we do
196
no read-ahead, as that could break the ibuf page access
202
/* Remember the tablespace version before we ask te tablespace size
203
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
204
do not try to read outside the bounds of the tablespace! */
206
tablespace_version = fil_space_get_version(space);
208
buf_read_ahead_random_area = BUF_READ_AHEAD_RANDOM_AREA;
210
low = (offset / buf_read_ahead_random_area)
211
* buf_read_ahead_random_area;
212
high = (offset / buf_read_ahead_random_area + 1)
213
* buf_read_ahead_random_area;
214
if (high > fil_space_get_size(space)) {
216
high = fil_space_get_size(space);
219
/* Get the minimum LRU_position field value for an initial segment
220
of the LRU list, to determine which blocks have recently been added
221
to the start of the list. */
223
LRU_recent_limit = buf_LRU_get_recent_limit();
225
buf_pool_mutex_enter();
227
if (buf_pool->n_pend_reads
228
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
229
buf_pool_mutex_exit();
234
/* Count how many blocks in the area have been recently accessed,
235
that is, reside near the start of the LRU list. */
237
for (i = low; i < high; i++) {
238
const buf_page_t* bpage = buf_page_hash_get(space, i);
241
&& buf_page_is_accessed(bpage)
242
&& (buf_page_get_LRU_position(bpage) > LRU_recent_limit)) {
246
if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
248
buf_pool_mutex_exit();
254
buf_pool_mutex_exit();
259
/* Read all the suitable blocks within the area */
262
ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
264
ibuf_mode = BUF_READ_ANY_PAGE;
269
for (i = low; i < high; i++) {
270
/* It is only sensible to do read-ahead in the non-sync aio
271
mode: hence FALSE as the first parameter */
273
if (!ibuf_bitmap_page(zip_size, i)) {
274
count += buf_read_page_low(
276
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
277
space, zip_size, FALSE,
278
tablespace_version, i);
279
if (err == DB_TABLESPACE_DELETED) {
280
ut_print_timestamp(stderr);
282
" InnoDB: Warning: in random"
283
" readahead trying to access\n"
284
"InnoDB: tablespace %lu page %lu,\n"
285
"InnoDB: but the tablespace does not"
286
" exist or is just being dropped.\n",
287
(ulong) space, (ulong) i);
292
/* In simulated aio we wake the aio handler threads only after
293
queuing all aio requests, in native aio the following call does
296
os_aio_simulated_wake_handler_threads();
299
if (buf_debug_prints && (count > 0)) {
301
"Random read-ahead space %lu offset %lu pages %lu\n",
302
(ulong) space, (ulong) offset,
305
#endif /* UNIV_DEBUG */
307
++srv_read_ahead_rnd;
311
/************************************************************************
161
312
High-level function which reads a page asynchronously from a file to the
162
313
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
163
314
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
164
released by the i/o-handler thread.
165
@return TRUE if page has been read in, FALSE in case of failure */
315
released by the i/o-handler thread. Does a random read-ahead if it seems
170
ulint space, /*!< in: space id */
171
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
172
ulint offset) /*!< in: page number */
321
/* out: number of page read requests issued: this can
322
be > 1 if read-ahead occurred */
323
ulint space, /* in: space id */
324
ulint zip_size,/* in: compressed page size in bytes, or 0 */
325
ulint offset) /* in: page number */
174
buf_pool_t* buf_pool = buf_pool_get(space, offset);
175
327
ib_int64_t tablespace_version;
179
332
tablespace_version = fil_space_get_version(space);
334
count = buf_read_ahead_random(space, zip_size, offset);
181
336
/* We do the i/o in the synchronous aio mode to save thread
182
337
switches: hence TRUE */
184
count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
186
tablespace_version, offset);
187
srv_buf_pool_reads += count;
339
count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
341
tablespace_version, offset);
342
srv_buf_pool_reads+= count2;
188
343
if (err == DB_TABLESPACE_DELETED) {
189
344
ut_print_timestamp(stderr);
314
467
asc_or_desc = -1;
317
/* How many out of order accessed pages can we ignore
318
when working out the access pattern for linear readahead */
319
threshold = ut_min((64 - srv_read_ahead_threshold),
320
BUF_READ_AHEAD_AREA(buf_pool));
324
472
for (i = low; i < high; i++) {
325
bpage = buf_page_hash_get(buf_pool, space, i);
473
bpage = buf_page_hash_get(space, i);
327
if (bpage == NULL || !buf_page_is_accessed(bpage)) {
475
if ((bpage == NULL) || !buf_page_is_accessed(bpage)) {
328
476
/* Not accessed */
331
} else if (pred_bpage) {
332
/* Note that buf_page_is_accessed() returns
333
the time of the first access. If some blocks
334
of the extent existed in the buffer pool at
335
the time of a linear access pattern, the first
336
access times may be nonmonotonic, even though
337
the latest access times were linear. The
338
threshold (srv_read_ahead_factor) should help
339
a little against this. */
340
int res = ut_ulint_cmp(
341
buf_page_is_accessed(bpage),
342
buf_page_is_accessed(pred_bpage));
479
} else if (pred_bpage
481
buf_page_get_LRU_position(bpage),
482
buf_page_get_LRU_position(pred_bpage))
343
484
/* Accesses not in the right order */
344
if (res != 0 && res != asc_or_desc) {
349
if (fail_count > threshold) {
350
/* Too many failures: return */
351
buf_pool_mutex_exit(buf_pool);
355
if (bpage && buf_page_is_accessed(bpage)) {
356
487
pred_bpage = bpage;
491
if (fail_count > buf_read_ahead_linear_area
492
* LINEAR_AREA_THRESHOLD_COEF) {
493
/* Too many failures: return */
495
buf_pool_mutex_exit();
360
500
/* If we got this far, we know that enough pages in the area have
361
501
been accessed in the right order: linear read-ahead can be sensible */
363
bpage = buf_page_hash_get(buf_pool, space, offset);
503
bpage = buf_page_hash_get(space, offset);
365
505
if (bpage == NULL) {
366
buf_pool_mutex_exit(buf_pool);
506
buf_pool_mutex_exit();
495
635
buf_read_ibuf_merge_pages(
496
636
/*======================*/
497
ibool sync, /*!< in: TRUE if the caller
637
ibool sync, /* in: TRUE if the caller
498
638
wants this function to wait
499
639
for the highest address page
500
640
to get read in, before this
501
641
function returns */
502
const ulint* space_ids, /*!< in: array of space ids */
503
const ib_int64_t* space_versions,/*!< in: the spaces must have
642
const ulint* space_ids, /* in: array of space ids */
643
const ib_int64_t* space_versions,/* in: the spaces must have
504
644
this version number
505
645
(timestamp), otherwise we
506
646
discard the read; we use this
507
647
to cancel reads if DISCARD +
508
648
IMPORT may have changed the
509
649
tablespace size */
510
const ulint* page_nos, /*!< in: array of page numbers
650
const ulint* page_nos, /* in: array of page numbers
511
651
to read, with the highest page
512
652
number the last in the
514
ulint n_stored) /*!< in: number of elements
654
ulint n_stored) /* in: number of elements
568
704
#endif /* UNIV_DEBUG */
571
/********************************************************************//**
707
/************************************************************************
572
708
Issues read requests for pages which recovery wants to read in. */
575
711
buf_read_recv_pages(
576
712
/*================*/
577
ibool sync, /*!< in: TRUE if the caller
713
ibool sync, /* in: TRUE if the caller
578
714
wants this function to wait
579
715
for the highest address page
580
716
to get read in, before this
581
717
function returns */
582
ulint space, /*!< in: space id */
583
ulint zip_size, /*!< in: compressed page size in
718
ulint space, /* in: space id */
719
ulint zip_size, /* in: compressed page size in
585
const ulint* page_nos, /*!< in: array of page numbers
721
const ulint* page_nos, /* in: array of page numbers
586
722
to read, with the highest page
587
723
number the last in the
589
ulint n_stored) /*!< in: number of page numbers
725
ulint n_stored) /* in: number of page numbers
592
728
ib_int64_t tablespace_version;