1
/*****************************************************************************
3
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
4
Copyright (c) 2008, Google Inc.
6
Portions of this file contain modifications contributed and copyrighted by
7
Google, Inc. Those modifications are gratefully acknowledged and are described
8
briefly in the InnoDB documentation. The contributions by Google are
9
incorporated with their permission, and subject to the conditions contained in
10
the file COPYING.Google.
12
This program is free software; you can redistribute it and/or modify it under
13
the terms of the GNU General Public License as published by the Free Software
14
Foundation; version 2 of the License.
16
This program is distributed in the hope that it will be useful, but WITHOUT
17
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
You should have received a copy of the GNU General Public License along with
21
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22
St, Fifth Floor, Boston, MA 02110-1301 USA
24
*****************************************************************************/
26
/**************************************************//**
28
The database buffer buf_pool
30
Created 11/5/1995 Heikki Tuuri
31
*******************************************************/
42
#ifndef UNIV_HOTBACKUP
43
#include "buf0buddy.h"
44
#include "lock0lock.h"
46
#include "ibuf0ibuf.h"
49
#endif /* !UNIV_HOTBACKUP */
51
#include "dict0dict.h"
56
IMPLEMENTATION OF THE BUFFER POOL
57
=================================
59
Performance improvement:
60
------------------------
61
Thread scheduling in NT may be so slow that the OS wait mechanism should
62
not be used even in waiting for disk reads to complete.
63
Rather, we should put waiting query threads to the queue of
64
waiting jobs, and let the OS thread do something useful while the i/o
65
is processed. In this way we could remove most OS thread switches in
66
an i/o-intensive benchmark like TPC-C.
68
A possibility is to put a user space thread library between the database
69
and NT. User space thread libraries might be very fast.
71
SQL Server 7.0 can be configured to use 'fibers' which are lightweight
72
threads in NT. These should be studied.
74
Buffer frames and blocks
75
------------------------
76
Following the terminology of Gray and Reuter, we call the memory
77
blocks where file pages are loaded buffer frames. For each buffer
78
frame there is a control block, or shortly, a block, in the buffer
79
control array. The control info which does not need to be stored
80
in the file along with the file page, resides in the control block.
84
The buffer buf_pool contains a single mutex which protects all the
85
control data structures of the buf_pool. The content of a buffer frame is
86
protected by a separate read-write lock in its control block, though.
87
These locks can be locked and unlocked without owning the buf_pool->mutex.
88
The OS events in the buf_pool struct can be waited for without owning the
91
The buf_pool->mutex is a hot-spot in main memory, causing a lot of
92
memory bus traffic on multiprocessor systems when processors
93
alternately access the mutex. On our Pentium, the mutex is accessed
94
maybe every 10 microseconds. We gave up the solution to have mutexes
95
for each control block, for instance, because it seemed to be
98
A solution to reduce mutex contention of the buf_pool->mutex is to
99
create a separate mutex for the page hash table. On Pentium,
100
accessing the hash table takes 2 microseconds, about half
101
of the total buf_pool->mutex hold time.
106
The control block contains, for instance, the bufferfix count
107
which is incremented when a thread wants a file page to be fixed
108
in a buffer frame. The bufferfix operation does not lock the
109
contents of the frame, however. For this purpose, the control
110
block contains a read-write lock.
112
The buffer frames have to be aligned so that the start memory
113
address of a frame is divisible by the universal page size, which
116
We intend to make the buffer buf_pool size on-line reconfigurable,
117
that is, the buf_pool size can be changed without closing the database.
118
Then the database administarator may adjust it to be bigger
119
at night, for example. The control block array must
120
contain enough control blocks for the maximum buffer buf_pool size
121
which is used in the particular database.
122
If the buf_pool size is cut, we exploit the virtual memory mechanism of
123
the OS, and just refrain from using frames at high addresses. Then the OS
124
can swap them to disk.
126
The control blocks containing file pages are put to a hash table
127
according to the file address of the page.
128
We could speed up the access to an individual page by using
129
"pointer swizzling": we could replace the page references on
130
non-leaf index pages by direct pointers to the page, if it exists
131
in the buf_pool. We could make a separate hash table where we could
132
chain all the page references in non-leaf pages residing in the buf_pool,
133
using the page reference as the hash key,
134
and at the time of reading of a page update the pointers accordingly.
135
Drawbacks of this solution are added complexity and,
136
possibly, extra space required on non-leaf pages for memory pointers.
137
A simpler solution is just to speed up the hash table mechanism
138
in the database, using tables whose size is a power of 2.
143
There are several lists of control blocks.
145
The free list (buf_pool->free) contains blocks which are currently not
148
The common LRU list contains all the blocks holding a file page
149
except those for which the bufferfix count is non-zero.
150
The pages are in the LRU list roughly in the order of the last
151
access to the page, so that the oldest pages are at the end of the
152
list. We also keep a pointer to near the end of the LRU list,
153
which we can use when we want to artificially age a page in the
154
buf_pool. This is used if we know that some page is not needed
155
again for some time: we insert the block right after the pointer,
156
causing it to be replaced sooner than would normally be the case.
157
Currently this aging mechanism is used for read-ahead mechanism
158
of pages, and it can also be used when there is a scan of a full
159
table which cannot fit in the memory. Putting the pages near the
160
end of the LRU list, we make sure that most of the buf_pool stays
161
in the main memory, undisturbed.
163
The unzip_LRU list contains a subset of the common LRU list. The
164
blocks on the unzip_LRU list hold a compressed file page and the
165
corresponding uncompressed page frame. A block is in unzip_LRU if and
166
only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
167
holds. The blocks in unzip_LRU will be in same order as they are in
168
the common LRU list. That is, each manipulation of the common LRU
169
list will result in the same manipulation of the unzip_LRU list.
171
The chain of modified blocks (buf_pool->flush_list) contains the blocks
172
holding file pages that have been modified in the memory
173
but not written to disk yet. The block with the oldest modification
174
which has not yet been written to disk is at the end of the chain.
175
The access to this list is protected by flush_list_mutex.
177
The chain of unmodified compressed blocks (buf_pool->zip_clean)
178
contains the control blocks (buf_page_t) of those compressed pages
179
that are not in buf_pool->flush_list and for which no uncompressed
180
page has been allocated in the buffer pool. The control blocks for
181
uncompressed pages are accessible via buf_block_t objects that are
182
reachable via buf_pool->chunks[].
184
The chains of free memory blocks (buf_pool->zip_free[]) are used by
185
the buddy allocator (buf0buddy.c) to keep track of currently unused
186
memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2. These
187
blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
188
BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
189
pool. The buddy allocator is solely used for allocating control
190
blocks for compressed pages (buf_page_t) and compressed page frames.
195
First, a victim block for replacement has to be found in the
196
buf_pool. It is taken from the free list or searched for from the
197
end of the LRU-list. An exclusive lock is reserved for the frame,
198
the io_fix field is set in the block fixing the block in buf_pool,
199
and the io-operation for loading the page is queued. The io-handler thread
200
releases the X-lock on the frame and resets the io_fix field
201
when the io operation completes.
203
A thread may request the above operation using the function
204
buf_page_get(). It may then continue to request a lock on the frame.
205
The lock is granted when the io-handler releases the x-lock.
210
The read-ahead mechanism is intended to be intelligent and
211
isolated from the semantically higher levels of the database
212
index management. From the higher level we only need the
213
information if a file page has a natural successor or
214
predecessor page. On the leaf level of a B-tree index,
215
these are the next and previous pages in the natural
218
Let us first explain the read-ahead mechanism when the leafs
219
of a B-tree are scanned in an ascending or descending order.
220
When a read page is the first time referenced in the buf_pool,
221
the buffer manager checks if it is at the border of a so-called
222
linear read-ahead area. The tablespace is divided into these
223
areas of size 64 blocks, for example. So if the page is at the
224
border of such an area, the read-ahead mechanism checks if
225
all the other blocks in the area have been accessed in an
226
ascending or descending order. If this is the case, the system
227
looks at the natural successor or predecessor of the page,
228
checks if that is at the border of another area, and in this case
229
issues read-requests for all the pages in that area. Maybe
230
we could relax the condition that all the pages in the area
231
have to be accessed: if data is deleted from a table, there may
232
appear holes of unused pages in the area.
234
A different read-ahead mechanism is used when there appears
235
to be a random access pattern to a file.
236
If a new page is referenced in the buf_pool, and several pages
237
of its random access area (for instance, 32 consecutive pages
238
in a tablespace) have recently been referenced, we may predict
239
that the whole area may be needed in the near future, and issue
240
the read requests for the whole area.
243
#ifndef UNIV_HOTBACKUP
244
/** Value in microseconds */
245
static const int WAIT_FOR_READ = 5000;
246
/** Number of attemtps made to read in a page in the buffer pool */
247
static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
249
/** The buffer buf_pool of the database */
250
UNIV_INTERN buf_pool_t* buf_pool_ptr[MAX_BUFFER_POOLS];
252
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
253
static ulint buf_dbg_counter = 0; /*!< This is used to insert validation
254
operations in execution in the
256
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
258
/** If this is set TRUE, the program prints info whenever
259
read-ahead or flush occurs */
260
UNIV_INTERN ibool buf_debug_prints = FALSE;
261
#endif /* UNIV_DEBUG */
263
#ifdef UNIV_PFS_RWLOCK
264
/* Keys to register buffer block related rwlocks and mutexes with
265
performance schema */
266
UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
267
# ifdef UNIV_SYNC_DEBUG
268
UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
269
# endif /* UNIV_SYNC_DEBUG */
270
#endif /* UNIV_PFS_RWLOCK */
272
#ifdef UNIV_PFS_MUTEX
273
UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
274
UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
275
UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
276
UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
277
#endif /* UNIV_PFS_MUTEX */
279
#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
280
# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
282
/* Buffer block mutexes and rwlocks can be registered
283
in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
284
is defined, register buffer block mutex and rwlock
285
in one group after their initialization. */
286
# define PFS_GROUP_BUFFER_SYNC
288
/* This define caps the number of mutexes/rwlocks can
289
be registered with performance schema. Developers can
290
modify this define if necessary. Please note, this would
291
be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
292
# define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER ULINT_MAX
294
# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
295
#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
297
/** A chunk of buffers. The buffer pool is allocated in chunks. */
298
struct buf_chunk_struct{
299
ulint mem_size; /*!< allocated size of the chunk */
300
ulint size; /*!< size of frames[] and blocks[] */
301
void* mem; /*!< pointer to the memory area which
302
was allocated for the frames */
303
buf_block_t* blocks; /*!< array of buffer control blocks */
305
#endif /* !UNIV_HOTBACKUP */
307
/********************************************************************//**
308
Gets the smallest oldest_modification lsn for any page in the pool. Returns
309
zero if all modified pages have been flushed to disk.
310
@return oldest modification in pool, zero if none */
313
buf_pool_get_oldest_modification(void)
314
/*==================================*/
319
ib_uint64_t oldest_lsn = 0;
321
/* When we traverse all the flush lists we don't want another
322
thread to add a dirty page to any flush list. */
323
log_flush_order_mutex_enter();
325
for (i = 0; i < srv_buf_pool_instances; i++) {
326
buf_pool_t* buf_pool;
328
buf_pool = buf_pool_from_array(i);
330
buf_flush_list_mutex_enter(buf_pool);
332
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
335
ut_ad(bpage->in_flush_list);
336
lsn = bpage->oldest_modification;
339
buf_flush_list_mutex_exit(buf_pool);
341
if (!oldest_lsn || oldest_lsn > lsn) {
346
log_flush_order_mutex_exit();
348
/* The returned answer may be out of date: the flush_list can
349
change after the mutex has been released. */
354
/********************************************************************//**
355
Get total buffer pool statistics. */
358
buf_get_total_list_len(
359
/*===================*/
360
ulint* LRU_len, /*!< out: length of all LRU lists */
361
ulint* free_len, /*!< out: length of all free lists */
362
ulint* flush_list_len) /*!< out: length of all flush lists */
370
for (i = 0; i < srv_buf_pool_instances; i++) {
371
buf_pool_t* buf_pool;
373
buf_pool = buf_pool_from_array(i);
374
*LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
375
*free_len += UT_LIST_GET_LEN(buf_pool->free);
376
*flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
380
/********************************************************************//**
381
Get total buffer pool statistics. */
386
buf_pool_stat_t* tot_stat) /*!< out: buffer pool stats */
390
memset(tot_stat, 0, sizeof(*tot_stat));
392
for (i = 0; i < srv_buf_pool_instances; i++) {
393
buf_pool_stat_t*buf_stat;
394
buf_pool_t* buf_pool;
396
buf_pool = buf_pool_from_array(i);
398
buf_stat = &buf_pool->stat;
399
tot_stat->n_page_gets += buf_stat->n_page_gets;
400
tot_stat->n_pages_read += buf_stat->n_pages_read;
401
tot_stat->n_pages_written += buf_stat->n_pages_written;
402
tot_stat->n_pages_created += buf_stat->n_pages_created;
403
tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
404
tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
405
tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
407
tot_stat->n_pages_not_made_young +=
408
buf_stat->n_pages_not_made_young;
412
/********************************************************************//**
413
Allocates a buffer block.
414
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
419
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
420
ulint zip_size) /*!< in: compressed page size in bytes,
421
or 0 if uncompressed tablespace */
425
static ulint buf_pool_index;
427
if (buf_pool == NULL) {
428
/* We are allocating memory from any buffer pool, ensure
429
we spread the grace on all buffer pool instances. */
430
index = buf_pool_index++ % srv_buf_pool_instances;
431
buf_pool = buf_pool_from_array(index);
434
block = buf_LRU_get_free_block(buf_pool, zip_size);
436
buf_block_set_state(block, BUF_BLOCK_MEMORY);
441
/********************************************************************//**
442
Calculates a page checksum which is stored to the page when it is written
443
to a file. Note that we must be careful to calculate the same value on
444
32-bit and 64-bit architectures.
448
buf_calc_page_new_checksum(
449
/*=======================*/
450
const byte* page) /*!< in: buffer page */
454
/* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
455
..._ARCH_LOG_NO, are written outside the buffer pool to the first
456
pages of data files, we have to skip them in the page checksum
458
We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
459
checksum is stored, and also the last 8 bytes of page because
460
there we store the old formula checksum. */
462
checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
463
FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
464
+ ut_fold_binary(page + FIL_PAGE_DATA,
465
UNIV_PAGE_SIZE - FIL_PAGE_DATA
466
- FIL_PAGE_END_LSN_OLD_CHKSUM);
467
checksum = checksum & 0xFFFFFFFFUL;
472
/********************************************************************//**
473
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
474
looked at the first few bytes of the page. This calculates that old
476
NOTE: we must first store the new formula checksum to
477
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
478
because this takes that field as an input!
482
buf_calc_page_old_checksum(
483
/*=======================*/
484
const byte* page) /*!< in: buffer page */
488
checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
490
checksum = checksum & 0xFFFFFFFFUL;
495
/********************************************************************//**
496
Checks if a page is corrupt.
497
@return TRUE if corrupted */
500
buf_page_is_corrupted(
501
/*==================*/
502
const byte* read_buf, /*!< in: a database page */
503
ulint zip_size) /*!< in: size of compressed page;
504
0 for uncompressed pages */
506
ulint checksum_field;
507
ulint old_checksum_field;
509
if (UNIV_LIKELY(!zip_size)
510
&& memcmp(read_buf + FIL_PAGE_LSN + 4,
511
read_buf + UNIV_PAGE_SIZE
512
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
514
/* Stored log sequence numbers at the start and the end
515
of page do not match */
520
#ifndef UNIV_HOTBACKUP
521
if (recv_lsn_checks_on) {
522
ib_uint64_t current_lsn;
524
if (log_peek_lsn(¤t_lsn)
527
< mach_read_from_8(read_buf + FIL_PAGE_LSN))) {
528
ut_print_timestamp(stderr);
531
" InnoDB: Error: page %lu log sequence number"
533
"InnoDB: is in the future! Current system "
534
"log sequence number %"PRIu64".\n"
535
"InnoDB: Your database may be corrupt or "
536
"you may have copied the InnoDB\n"
537
"InnoDB: tablespace but not the InnoDB "
539
"InnoDB: " REFMAN "forcing-recovery.html\n"
540
"InnoDB: for more information.\n",
541
(ulong) mach_read_from_4(read_buf
543
mach_read_from_8(read_buf + FIL_PAGE_LSN),
549
/* If we use checksums validation, make additional check before
550
returning TRUE to ensure that the checksum is not equal to
551
BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
552
disabled. Otherwise, skip checksum calculation and return FALSE */
554
if (UNIV_LIKELY(srv_use_checksums)) {
555
checksum_field = mach_read_from_4(read_buf
556
+ FIL_PAGE_SPACE_OR_CHKSUM);
558
if (UNIV_UNLIKELY(zip_size)) {
559
return(checksum_field != BUF_NO_CHECKSUM_MAGIC
561
!= page_zip_calc_checksum(read_buf, zip_size));
564
old_checksum_field = mach_read_from_4(
565
read_buf + UNIV_PAGE_SIZE
566
- FIL_PAGE_END_LSN_OLD_CHKSUM);
568
/* There are 2 valid formulas for old_checksum_field:
570
1. Very old versions of InnoDB only stored 8 byte lsn to the
571
start and the end of the page.
573
2. Newer InnoDB versions store the old formula checksum
576
if (old_checksum_field != mach_read_from_4(read_buf
578
&& old_checksum_field != BUF_NO_CHECKSUM_MAGIC
579
&& old_checksum_field
580
!= buf_calc_page_old_checksum(read_buf)) {
585
/* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
586
(always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
588
if (checksum_field != 0
589
&& checksum_field != BUF_NO_CHECKSUM_MAGIC
591
!= buf_calc_page_new_checksum(read_buf)) {
600
/********************************************************************//**
601
Prints a page to stderr. */
606
const byte* read_buf, /*!< in: a database page */
607
ulint zip_size) /*!< in: compressed page size, or
608
0 for uncompressed pages */
610
#ifndef UNIV_HOTBACKUP
612
#endif /* !UNIV_HOTBACKUP */
615
ulint size = zip_size;
618
size = UNIV_PAGE_SIZE;
621
ut_print_timestamp(stderr);
622
fprintf(stderr, " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
624
ut_print_buf(stderr, read_buf, size);
625
fputs("\nInnoDB: End of page dump\n", stderr);
628
/* Print compressed page. */
630
switch (fil_page_get_type(read_buf)) {
631
case FIL_PAGE_TYPE_ZBLOB:
632
case FIL_PAGE_TYPE_ZBLOB2:
633
checksum = srv_use_checksums
634
? page_zip_calc_checksum(read_buf, zip_size)
635
: BUF_NO_CHECKSUM_MAGIC;
636
ut_print_timestamp(stderr);
638
" InnoDB: Compressed BLOB page"
639
" checksum %lu, stored %lu\n"
640
"InnoDB: Page lsn %lu %lu\n"
641
"InnoDB: Page number (if stored"
642
" to page already) %lu,\n"
643
"InnoDB: space id (if stored"
644
" to page already) %lu\n",
646
(ulong) mach_read_from_4(
647
read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
648
(ulong) mach_read_from_4(
649
read_buf + FIL_PAGE_LSN),
650
(ulong) mach_read_from_4(
651
read_buf + (FIL_PAGE_LSN + 4)),
652
(ulong) mach_read_from_4(
653
read_buf + FIL_PAGE_OFFSET),
654
(ulong) mach_read_from_4(
656
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
659
ut_print_timestamp(stderr);
661
" InnoDB: unknown page type %lu,"
662
" assuming FIL_PAGE_INDEX\n",
663
fil_page_get_type(read_buf));
666
checksum = srv_use_checksums
667
? page_zip_calc_checksum(read_buf, zip_size)
668
: BUF_NO_CHECKSUM_MAGIC;
670
ut_print_timestamp(stderr);
672
" InnoDB: Compressed page checksum %lu,"
674
"InnoDB: Page lsn %lu %lu\n"
675
"InnoDB: Page number (if stored"
676
" to page already) %lu,\n"
677
"InnoDB: space id (if stored"
678
" to page already) %lu\n",
680
(ulong) mach_read_from_4(
681
read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
682
(ulong) mach_read_from_4(
683
read_buf + FIL_PAGE_LSN),
684
(ulong) mach_read_from_4(
685
read_buf + (FIL_PAGE_LSN + 4)),
686
(ulong) mach_read_from_4(
687
read_buf + FIL_PAGE_OFFSET),
688
(ulong) mach_read_from_4(
690
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
692
case FIL_PAGE_TYPE_XDES:
693
/* This is an uncompressed page. */
698
checksum = srv_use_checksums
699
? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
700
old_checksum = srv_use_checksums
701
? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
703
ut_print_timestamp(stderr);
705
" InnoDB: Page checksum %lu, prior-to-4.0.14-form"
707
"InnoDB: stored checksum %lu, prior-to-4.0.14-form"
708
" stored checksum %lu\n"
709
"InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
711
"InnoDB: Page number (if stored to page already) %lu,\n"
712
"InnoDB: space id (if created with >= MySQL-4.1.1"
713
" and stored already) %lu\n",
714
(ulong) checksum, (ulong) old_checksum,
715
(ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
716
(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
717
- FIL_PAGE_END_LSN_OLD_CHKSUM),
718
(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
719
(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
720
(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
721
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
722
(ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
723
(ulong) mach_read_from_4(read_buf
724
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
726
#ifndef UNIV_HOTBACKUP
727
if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
728
== TRX_UNDO_INSERT) {
730
"InnoDB: Page may be an insert undo log page\n");
731
} else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
732
+ TRX_UNDO_PAGE_TYPE)
733
== TRX_UNDO_UPDATE) {
735
"InnoDB: Page may be an update undo log page\n");
737
#endif /* !UNIV_HOTBACKUP */
739
switch (fil_page_get_type(read_buf)) {
742
index_id = btr_page_get_index_id(read_buf);
744
"InnoDB: Page may be an index page where"
745
" index id is %llu\n",
747
#ifndef UNIV_HOTBACKUP
748
index = dict_index_find_on_id_low(index_id);
750
fputs("InnoDB: (", stderr);
751
dict_index_name_print(stderr, NULL, index);
752
fputs(")\n", stderr);
754
#endif /* !UNIV_HOTBACKUP */
757
fputs("InnoDB: Page may be an 'inode' page\n", stderr);
759
case FIL_PAGE_IBUF_FREE_LIST:
760
fputs("InnoDB: Page may be an insert buffer free list page\n",
763
case FIL_PAGE_TYPE_ALLOCATED:
764
fputs("InnoDB: Page may be a freshly allocated page\n",
767
case FIL_PAGE_IBUF_BITMAP:
768
fputs("InnoDB: Page may be an insert buffer bitmap page\n",
771
case FIL_PAGE_TYPE_SYS:
772
fputs("InnoDB: Page may be a system page\n",
775
case FIL_PAGE_TYPE_TRX_SYS:
776
fputs("InnoDB: Page may be a transaction system page\n",
779
case FIL_PAGE_TYPE_FSP_HDR:
780
fputs("InnoDB: Page may be a file space header page\n",
783
case FIL_PAGE_TYPE_XDES:
784
fputs("InnoDB: Page may be an extent descriptor page\n",
787
case FIL_PAGE_TYPE_BLOB:
788
fputs("InnoDB: Page may be a BLOB page\n",
791
case FIL_PAGE_TYPE_ZBLOB:
792
case FIL_PAGE_TYPE_ZBLOB2:
793
fputs("InnoDB: Page may be a compressed BLOB page\n",
799
#ifndef UNIV_HOTBACKUP
801
# ifdef PFS_GROUP_BUFFER_SYNC
802
/********************************************************************//**
803
This function registers mutexes and rwlocks in buffer blocks with
804
performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
805
defined to be a value less than chunk->size, then only mutexes
806
and rwlocks in the first PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER
807
blocks are registered. */
810
pfs_register_buffer_block(
811
/*======================*/
812
buf_chunk_t* chunk) /*!< in/out: chunk of buffers */
815
ulint num_to_register;
818
block = chunk->blocks;
820
num_to_register = ut_min(chunk->size,
821
PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
823
for (i = 0; i < num_to_register; i++) {
827
# ifdef UNIV_PFS_MUTEX
828
mutex = &block->mutex;
829
ut_a(!mutex->pfs_psi);
830
mutex->pfs_psi = (PSI_server)
831
? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
833
# endif /* UNIV_PFS_MUTEX */
835
# ifdef UNIV_PFS_RWLOCK
836
rwlock = &block->lock;
837
ut_a(!rwlock->pfs_psi);
838
rwlock->pfs_psi = (PSI_server)
839
? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
841
# endif /* UNIV_PFS_RWLOCK */
845
# endif /* PFS_GROUP_BUFFER_SYNC */
847
/********************************************************************//**
848
Initializes a buffer control block when the buf_pool is created. */
853
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
854
buf_block_t* block, /*!< in: pointer to control block */
855
byte* frame) /*!< in: pointer to buffer frame */
857
UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
859
block->frame = frame;
861
block->page.buf_pool = buf_pool;
862
block->page.state = BUF_BLOCK_NOT_USED;
863
block->page.buf_fix_count = 0;
864
block->page.io_fix = BUF_IO_NONE;
866
block->modify_clock = 0;
868
#ifdef UNIV_DEBUG_FILE_ACCESSES
869
block->page.file_page_was_freed = FALSE;
870
#endif /* UNIV_DEBUG_FILE_ACCESSES */
872
block->check_index_page_at_flush = FALSE;
875
block->is_hashed = FALSE;
878
block->page.in_page_hash = FALSE;
879
block->page.in_zip_hash = FALSE;
880
block->page.in_flush_list = FALSE;
881
block->page.in_free_list = FALSE;
882
block->page.in_LRU_list = FALSE;
883
block->in_unzip_LRU_list = FALSE;
884
#endif /* UNIV_DEBUG */
885
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
886
block->n_pointers = 0;
887
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
888
page_zip_des_init(&block->page.zip);
890
#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
891
/* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
892
of buffer block mutex/rwlock with performance schema. If
893
PFS_GROUP_BUFFER_SYNC is defined, skip the registration
894
since buffer block mutex/rwlock will be registered later in
895
pfs_register_buffer_block() */
897
mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
898
rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
899
#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
900
mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
901
rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
902
#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
904
ut_ad(rw_lock_validate(&(block->lock)));
906
#ifdef UNIV_SYNC_DEBUG
907
rw_lock_create(buf_block_debug_latch_key,
908
&block->debug_latch, SYNC_NO_ORDER_CHECK);
909
#endif /* UNIV_SYNC_DEBUG */
912
/********************************************************************//**
913
Allocates a chunk of buffer frames.
914
@return chunk, or NULL on failure */
919
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
920
buf_chunk_t* chunk, /*!< out: chunk of buffers */
921
ulint mem_size) /*!< in: requested size in bytes */
927
/* Round down to a multiple of page size,
928
although it already should be. */
929
mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
930
/* Reserve space for the block descriptors. */
931
mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
932
+ (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
934
chunk->mem_size = mem_size;
935
chunk->mem = os_mem_alloc_large(&chunk->mem_size);
937
if (UNIV_UNLIKELY(chunk->mem == NULL)) {
942
/* Allocate the block descriptors from
943
the start of the memory block. */
944
chunk->blocks = chunk->mem;
946
/* Align a pointer to the first frame. Note that when
947
os_large_page_size is smaller than UNIV_PAGE_SIZE,
948
we may allocate one fewer block than requested. When
949
it is bigger, we may allocate more blocks than requested. */
951
frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
952
chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
953
- (frame != chunk->mem);
955
/* Subtract the space needed for block descriptors. */
957
ulint size = chunk->size;
959
while (frame < (byte*) (chunk->blocks + size)) {
960
frame += UNIV_PAGE_SIZE;
967
/* Init block structs and assign frames for them. Then we
968
assign the frames to the first blocks (we already mapped the
971
block = chunk->blocks;
973
for (i = chunk->size; i--; ) {
975
buf_block_init(buf_pool, block, frame);
978
/* Wipe contents of frame to eliminate a Purify warning */
979
memset(block->frame, '\0', UNIV_PAGE_SIZE);
981
/* Add the block to the free list */
982
UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
984
ut_d(block->page.in_free_list = TRUE);
985
ut_ad(buf_pool_from_block(block) == buf_pool);
988
frame += UNIV_PAGE_SIZE;
991
#ifdef PFS_GROUP_BUFFER_SYNC
992
pfs_register_buffer_block(chunk);
998
/*********************************************************************//**
999
Finds a block in the given buffer chunk that points to a
1000
given compressed page.
1001
@return buffer block pointing to the compressed page, or NULL */
1004
buf_chunk_contains_zip(
1005
/*===================*/
1006
buf_chunk_t* chunk, /*!< in: chunk being checked */
1007
const void* data) /*!< in: pointer to compressed page */
1012
block = chunk->blocks;
1014
for (i = chunk->size; i--; block++) {
1015
if (block->page.zip.data == data) {
1024
/*********************************************************************//**
1025
Finds a block in the buffer pool that points to a
1026
given compressed page.
1027
@return buffer block pointing to the compressed page, or NULL */
1030
buf_pool_contains_zip(
1031
/*==================*/
1032
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1033
const void* data) /*!< in: pointer to compressed page */
1036
buf_chunk_t* chunk = buf_pool->chunks;
1039
ut_ad(buf_pool_mutex_own(buf_pool));
1040
for (n = buf_pool->n_chunks; n--; chunk++) {
1042
buf_block_t* block = buf_chunk_contains_zip(chunk, data);
1051
#endif /* UNIV_DEBUG */
1053
/*********************************************************************//**
1054
Checks that all file pages in the buffer chunk are in a replaceable state.
1055
@return address of a non-free block, or NULL if all freed */
1058
buf_chunk_not_freed(
1059
/*================*/
1060
buf_chunk_t* chunk) /*!< in: chunk being checked */
1065
block = chunk->blocks;
1067
for (i = chunk->size; i--; block++) {
1070
switch (buf_block_get_state(block)) {
1071
case BUF_BLOCK_ZIP_FREE:
1072
case BUF_BLOCK_ZIP_PAGE:
1073
case BUF_BLOCK_ZIP_DIRTY:
1074
/* The uncompressed buffer pool should never
1075
contain compressed block descriptors. */
1078
case BUF_BLOCK_NOT_USED:
1079
case BUF_BLOCK_READY_FOR_USE:
1080
case BUF_BLOCK_MEMORY:
1081
case BUF_BLOCK_REMOVE_HASH:
1082
/* Skip blocks that are not being used for
1085
case BUF_BLOCK_FILE_PAGE:
1086
mutex_enter(&block->mutex);
1087
ready = buf_flush_ready_for_replace(&block->page);
1088
mutex_exit(&block->mutex);
1102
/*********************************************************************//**
1103
Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state.
1104
@return TRUE if all freed */
1109
const buf_chunk_t* chunk) /*!< in: chunk being checked */
1111
const buf_block_t* block;
1114
block = chunk->blocks;
1116
for (i = chunk->size; i--; block++) {
1118
if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
1127
/********************************************************************//**
1128
Frees a chunk of buffer frames. */
1133
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1134
buf_chunk_t* chunk) /*!< out: chunk of buffers */
1137
const buf_block_t* block_end;
1139
ut_ad(buf_pool_mutex_own(buf_pool));
1141
block_end = chunk->blocks + chunk->size;
1143
for (block = chunk->blocks; block < block_end; block++) {
1144
ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
1145
ut_a(!block->page.zip.data);
1147
ut_ad(!block->page.in_LRU_list);
1148
ut_ad(!block->in_unzip_LRU_list);
1149
ut_ad(!block->page.in_flush_list);
1150
/* Remove the block from the free list. */
1151
ut_ad(block->page.in_free_list);
1152
UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
1154
/* Free the latches. */
1155
mutex_free(&block->mutex);
1156
rw_lock_free(&block->lock);
1157
#ifdef UNIV_SYNC_DEBUG
1158
rw_lock_free(&block->debug_latch);
1159
#endif /* UNIV_SYNC_DEBUG */
1160
UNIV_MEM_UNDESC(block);
1163
os_mem_free_large(chunk->mem, chunk->mem_size);
1166
/********************************************************************//**
1167
Set buffer pool size variables after resizing it */
1170
buf_pool_set_sizes(void)
1171
/*====================*/
1174
ulint curr_size = 0;
1176
buf_pool_mutex_enter_all();
1178
for (i = 0; i < srv_buf_pool_instances; i++) {
1179
buf_pool_t* buf_pool;
1181
buf_pool = buf_pool_from_array(i);
1182
curr_size += buf_pool->curr_pool_size;
1185
srv_buf_pool_curr_size = curr_size;
1186
srv_buf_pool_old_size = srv_buf_pool_size;
1188
buf_pool_mutex_exit_all();
1191
/********************************************************************//**
1192
Initialize a buffer pool instance.
1193
@return DB_SUCCESS if all goes well. */
1196
buf_pool_init_instance(
1197
/*===================*/
1198
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1199
ulint buf_pool_size, /*!< in: size in bytes */
1200
ulint instance_no) /*!< in: id of the instance */
1205
/* 1. Initialize general fields
1206
------------------------------- */
1207
mutex_create(buf_pool_mutex_key,
1208
&buf_pool->mutex, SYNC_BUF_POOL);
1209
mutex_create(buf_pool_zip_mutex_key,
1210
&buf_pool->zip_mutex, SYNC_BUF_BLOCK);
1212
buf_pool_mutex_enter(buf_pool);
1214
if (buf_pool_size > 0) {
1215
buf_pool->n_chunks = 1;
1216
buf_pool->chunks = chunk = mem_zalloc(sizeof *chunk);
1218
UT_LIST_INIT(buf_pool->free);
1220
if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
1224
buf_pool_mutex_exit(buf_pool);
1229
buf_pool->instance_no = instance_no;
1230
buf_pool->old_pool_size = buf_pool_size;
1231
buf_pool->curr_size = chunk->size;
1232
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1234
buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
1235
buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
1237
buf_pool->last_printout_time = ut_time();
1239
/* 2. Initialize flushing fields
1240
-------------------------------- */
1242
mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
1243
SYNC_BUF_FLUSH_LIST);
1245
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
1246
buf_pool->no_flush[i] = os_event_create(NULL);
1249
/* 3. Initialize LRU fields
1250
--------------------------- */
1252
/* All fields are initialized by mem_zalloc(). */
1254
buf_pool_mutex_exit(buf_pool);
1259
/********************************************************************//**
1260
free one buffer pool instance */
1263
buf_pool_free_instance(
1264
/*===================*/
1265
buf_pool_t* buf_pool) /* in,own: buffer pool instance
1269
buf_chunk_t* chunks;
1271
chunks = buf_pool->chunks;
1272
chunk = chunks + buf_pool->n_chunks;
1274
while (--chunk >= chunks) {
1275
/* Bypass the checks of buf_chunk_free(), since they
1276
would fail at shutdown. */
1277
os_mem_free_large(chunk->mem, chunk->mem_size);
1280
mem_free(buf_pool->chunks);
1281
hash_table_free(buf_pool->page_hash);
1282
hash_table_free(buf_pool->zip_hash);
1287
/********************************************************************//**
1288
Creates the buffer pool.
1289
@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
1294
ulint total_size, /*!< in: size of the total pool in bytes */
1295
ulint n_instances) /*!< in: number of instances */
1299
/* We create an extra buffer pool instance, this instance is used
1300
for flushing the flush lists, to keep track of n_flush for all
1301
the buffer pools and also used as a waiting object during flushing. */
1302
for (i = 0; i < n_instances; i++) {
1306
ptr = mem_zalloc(sizeof(*ptr));
1308
size = total_size / n_instances;
1310
buf_pool_ptr[i] = ptr;
1312
if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) {
1314
mem_free(buf_pool_ptr[i]);
1316
/* Free all the instances created so far. */
1323
buf_pool_set_sizes();
1324
buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
1326
btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
1331
/********************************************************************//**
1332
Frees the buffer pool at shutdown. This must not be invoked before
1333
freeing all mutexes. */
1338
ulint n_instances) /*!< in: numbere of instances to free */
1342
for (i = 0; i < n_instances; i++) {
1343
buf_pool_free_instance(buf_pool_from_array(i));
1344
buf_pool_ptr[i] = NULL;
1348
/********************************************************************//**
1349
Drops adaptive hash index for a buffer pool instance. */
1352
buf_pool_drop_hash_index_instance(
1353
/*==============================*/
1354
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1355
ibool* released_search_latch) /*!< out: flag for signalling
1356
whether the search latch was
1359
buf_chunk_t* chunks = buf_pool->chunks;
1360
buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
1362
while (--chunk >= chunks) {
1364
buf_block_t* block = chunk->blocks;
1366
for (i = chunk->size; i--; block++) {
1367
/* block->is_hashed cannot be modified
1368
when we have an x-latch on btr_search_latch;
1369
see the comment in buf0buf.h */
1371
if (!block->is_hashed) {
1375
/* To follow the latching order, we
1376
have to release btr_search_latch
1377
before acquiring block->latch. */
1378
rw_lock_x_unlock(&btr_search_latch);
1379
/* When we release the search latch,
1380
we must rescan all blocks, because
1381
some may become hashed again. */
1382
*released_search_latch = TRUE;
1384
rw_lock_x_lock(&block->lock);
1386
/* This should be guaranteed by the
1387
callers, which will be holding
1388
btr_search_enabled_mutex. */
1389
ut_ad(!btr_search_enabled);
1391
/* Because we did not buffer-fix the
1392
block by calling buf_block_get_gen(),
1393
it is possible that the block has been
1394
allocated for some other use after
1395
btr_search_latch was released above.
1396
We do not care which file page the
1397
block is mapped to. All we want to do
1398
is to drop any hash entries referring
1401
/* It is possible that
1402
block->page.state != BUF_FILE_PAGE.
1403
Even that does not matter, because
1404
btr_search_drop_page_hash_index() will
1405
check block->is_hashed before doing
1406
anything. block->is_hashed can only
1407
be set on uncompressed file pages. */
1409
btr_search_drop_page_hash_index(block);
1411
rw_lock_x_unlock(&block->lock);
1413
rw_lock_x_lock(&btr_search_latch);
1415
ut_ad(!btr_search_enabled);
1420
/********************************************************************//**
1421
Drops the adaptive hash index. To prevent a livelock, this function
1422
is only to be called while holding btr_search_latch and while
1423
btr_search_enabled == FALSE. */
1426
buf_pool_drop_hash_index(void)
1427
/*==========================*/
1429
ibool released_search_latch;
1431
#ifdef UNIV_SYNC_DEBUG
1432
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
1433
#endif /* UNIV_SYNC_DEBUG */
1434
ut_ad(!btr_search_enabled);
1439
released_search_latch = FALSE;
1441
for (i = 0; i < srv_buf_pool_instances; i++) {
1442
buf_pool_t* buf_pool;
1444
buf_pool = buf_pool_from_array(i);
1446
buf_pool_drop_hash_index_instance(
1447
buf_pool, &released_search_latch);
1450
} while (released_search_latch);
1453
/********************************************************************//**
1454
Relocate a buffer control block. Relocates the block on the LRU list
1455
and in buf_pool->page_hash. Does not relocate bpage->list.
1456
The caller must take care of relocating bpage->list. */
1461
buf_page_t* bpage, /*!< in/out: control block being relocated;
1462
buf_page_get_state(bpage) must be
1463
BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
1464
buf_page_t* dpage) /*!< in/out: destination control block */
1468
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1470
ut_ad(buf_pool_mutex_own(buf_pool));
1471
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1472
ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
1473
ut_a(bpage->buf_fix_count == 0);
1474
ut_ad(bpage->in_LRU_list);
1475
ut_ad(!bpage->in_zip_hash);
1476
ut_ad(bpage->in_page_hash);
1477
ut_ad(bpage == buf_page_hash_get(buf_pool,
1478
bpage->space, bpage->offset));
1479
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1481
switch (buf_page_get_state(bpage)) {
1482
case BUF_BLOCK_ZIP_FREE:
1483
case BUF_BLOCK_NOT_USED:
1484
case BUF_BLOCK_READY_FOR_USE:
1485
case BUF_BLOCK_FILE_PAGE:
1486
case BUF_BLOCK_MEMORY:
1487
case BUF_BLOCK_REMOVE_HASH:
1489
case BUF_BLOCK_ZIP_DIRTY:
1490
case BUF_BLOCK_ZIP_PAGE:
1493
#endif /* UNIV_DEBUG */
1495
memcpy(dpage, bpage, sizeof *dpage);
1497
ut_d(bpage->in_LRU_list = FALSE);
1498
ut_d(bpage->in_page_hash = FALSE);
1500
/* relocate buf_pool->LRU */
1501
b = UT_LIST_GET_PREV(LRU, bpage);
1502
UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
1505
UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
1507
UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
1510
if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
1511
buf_pool->LRU_old = dpage;
1512
#ifdef UNIV_LRU_DEBUG
1513
/* buf_pool->LRU_old must be the first item in the LRU list
1514
whose "old" flag is set. */
1515
ut_a(buf_pool->LRU_old->old);
1516
ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
1517
|| !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
1518
ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
1519
|| UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
1521
/* Check that the "old" flag is consistent in
1522
the block and its neighbours. */
1523
buf_page_set_old(dpage, buf_page_is_old(dpage));
1524
#endif /* UNIV_LRU_DEBUG */
1527
ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
1528
ut_ad(ut_list_node_313->in_LRU_list)));
1530
/* relocate buf_pool->page_hash */
1531
fold = buf_page_address_fold(bpage->space, bpage->offset);
1533
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
1534
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
1537
/********************************************************************//**
1538
Shrinks a buffer pool instance. */
1541
buf_pool_shrink_instance(
1542
/*=====================*/
1543
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1544
ulint chunk_size) /*!< in: number of pages to remove */
1546
buf_chunk_t* chunks;
1549
ulint max_free_size;
1550
buf_chunk_t* max_chunk;
1551
buf_chunk_t* max_free_chunk;
1553
ut_ad(!buf_pool_mutex_own(buf_pool));
1556
btr_search_disable(); /* Empty the adaptive hash index again */
1557
buf_pool_mutex_enter(buf_pool);
1560
if (buf_pool->n_chunks <= 1) {
1562
/* Cannot shrink if there is only one chunk */
1566
/* Search for the largest free chunk
1567
not larger than the size difference */
1568
chunks = buf_pool->chunks;
1569
chunk = chunks + buf_pool->n_chunks;
1570
max_size = max_free_size = 0;
1571
max_chunk = max_free_chunk = NULL;
1573
while (--chunk >= chunks) {
1574
if (chunk->size <= chunk_size
1575
&& chunk->size > max_free_size) {
1576
if (chunk->size > max_size) {
1577
max_size = chunk->size;
1581
if (buf_chunk_all_free(chunk)) {
1582
max_free_size = chunk->size;
1583
max_free_chunk = chunk;
1588
if (!max_free_size) {
1595
/* Cannot shrink: try again later
1596
(do not assign srv_buf_pool_old_size) */
1602
block = max_chunk->blocks;
1603
bend = block + max_chunk->size;
1605
/* Move the blocks of chunk to the end of the
1606
LRU list and try to flush them. */
1607
for (; block < bend; block++) {
1608
switch (buf_block_get_state(block)) {
1609
case BUF_BLOCK_NOT_USED:
1611
case BUF_BLOCK_FILE_PAGE:
1618
mutex_enter(&block->mutex);
1619
/* The following calls will temporarily
1620
release block->mutex and buf_pool->mutex.
1621
Therefore, we have to always retry,
1622
even if !dirty && !nonfree. */
1624
if (!buf_flush_ready_for_replace(&block->page)) {
1626
buf_LRU_make_block_old(&block->page);
1628
} else if (buf_LRU_free_block(&block->page, TRUE, NULL)
1633
mutex_exit(&block->mutex);
1636
buf_pool_mutex_exit(buf_pool);
1638
/* Request for a flush of the chunk if it helps.
1639
Do not flush if there are non-free blocks, since
1640
flushing will not make the chunk freeable. */
1642
/* Avoid busy-waiting. */
1643
os_thread_sleep(100000);
1645
&& buf_flush_LRU(buf_pool, dirty)
1646
== ULINT_UNDEFINED) {
1648
buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
1654
max_size = max_free_size;
1655
max_chunk = max_free_chunk;
1657
buf_pool->old_pool_size = buf_pool->curr_pool_size;
1659
/* Rewrite buf_pool->chunks. Copy everything but max_chunk. */
1660
chunks = mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks);
1661
memcpy(chunks, buf_pool->chunks,
1662
(max_chunk - buf_pool->chunks) * sizeof *chunks);
1663
memcpy(chunks + (max_chunk - buf_pool->chunks),
1665
buf_pool->chunks + buf_pool->n_chunks
1667
ut_a(buf_pool->curr_size > max_chunk->size);
1668
buf_pool->curr_size -= max_chunk->size;
1669
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1670
chunk_size -= max_chunk->size;
1671
buf_chunk_free(buf_pool, max_chunk);
1672
mem_free(buf_pool->chunks);
1673
buf_pool->chunks = chunks;
1674
buf_pool->n_chunks--;
1676
/* Allow a slack of one megabyte. */
1677
if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
1684
buf_pool->old_pool_size = buf_pool->curr_pool_size;
1686
buf_pool_mutex_exit(buf_pool);
1687
btr_search_enable();
1690
/********************************************************************//**
1691
Shrinks the buffer pool. */
1696
ulint chunk_size) /*!< in: number of pages to remove */
1700
for (i = 0; i < srv_buf_pool_instances; i++) {
1701
buf_pool_t* buf_pool;
1702
ulint instance_chunk_size;
1704
instance_chunk_size = chunk_size / srv_buf_pool_instances;
1705
buf_pool = buf_pool_from_array(i);
1706
buf_pool_shrink_instance(buf_pool, instance_chunk_size);
1709
buf_pool_set_sizes();
1712
/********************************************************************//**
1713
Rebuild buf_pool->page_hash for a buffer pool instance. */
1716
buf_pool_page_hash_rebuild_instance(
1717
/*================================*/
1718
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
1724
hash_table_t* zip_hash;
1725
hash_table_t* page_hash;
1727
buf_pool_mutex_enter(buf_pool);
1729
/* Free, create, and populate the hash table. */
1730
hash_table_free(buf_pool->page_hash);
1731
buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
1732
zip_hash = hash_create(2 * buf_pool->curr_size);
1734
HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
1735
BUF_POOL_ZIP_FOLD_BPAGE);
1737
hash_table_free(buf_pool->zip_hash);
1738
buf_pool->zip_hash = zip_hash;
1740
/* Insert the uncompressed file pages to buf_pool->page_hash. */
1742
chunk = buf_pool->chunks;
1743
n_chunks = buf_pool->n_chunks;
1745
for (i = 0; i < n_chunks; i++, chunk++) {
1747
buf_block_t* block = chunk->blocks;
1749
for (j = 0; j < chunk->size; j++, block++) {
1750
if (buf_block_get_state(block)
1751
== BUF_BLOCK_FILE_PAGE) {
1752
ut_ad(!block->page.in_zip_hash);
1753
ut_ad(block->page.in_page_hash);
1755
HASH_INSERT(buf_page_t, hash, page_hash,
1756
buf_page_address_fold(
1758
block->page.offset),
1764
/* Insert the compressed-only pages to buf_pool->page_hash.
1765
All such blocks are either in buf_pool->zip_clean or
1766
in buf_pool->flush_list. */
1768
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1769
b = UT_LIST_GET_NEXT(list, b)) {
1770
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1771
ut_ad(!b->in_flush_list);
1772
ut_ad(b->in_LRU_list);
1773
ut_ad(b->in_page_hash);
1774
ut_ad(!b->in_zip_hash);
1776
HASH_INSERT(buf_page_t, hash, page_hash,
1777
buf_page_address_fold(b->space, b->offset), b);
1780
buf_flush_list_mutex_enter(buf_pool);
1781
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1782
b = UT_LIST_GET_NEXT(list, b)) {
1783
ut_ad(b->in_flush_list);
1784
ut_ad(b->in_LRU_list);
1785
ut_ad(b->in_page_hash);
1786
ut_ad(!b->in_zip_hash);
1788
switch (buf_page_get_state(b)) {
1789
case BUF_BLOCK_ZIP_DIRTY:
1790
HASH_INSERT(buf_page_t, hash, page_hash,
1791
buf_page_address_fold(b->space,
1794
case BUF_BLOCK_FILE_PAGE:
1795
/* uncompressed page */
1797
case BUF_BLOCK_ZIP_FREE:
1798
case BUF_BLOCK_ZIP_PAGE:
1799
case BUF_BLOCK_NOT_USED:
1800
case BUF_BLOCK_READY_FOR_USE:
1801
case BUF_BLOCK_MEMORY:
1802
case BUF_BLOCK_REMOVE_HASH:
1808
buf_flush_list_mutex_exit(buf_pool);
1809
buf_pool_mutex_exit(buf_pool);
1812
/********************************************************************
1813
Determine if a block is a sentinel for a buffer pool watch.
1814
@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
1817
buf_pool_watch_is_sentinel(
1818
/*=======================*/
1819
buf_pool_t* buf_pool, /*!< buffer pool instance */
1820
const buf_page_t* bpage) /*!< in: block */
1822
ut_ad(buf_page_in_file(bpage));
1824
if (bpage < &buf_pool->watch[0]
1825
|| bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
1827
ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_PAGE
1828
|| bpage->zip.data != NULL);
1833
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
1834
ut_ad(!bpage->in_zip_hash);
1835
ut_ad(bpage->in_page_hash);
1836
ut_ad(bpage->zip.data == NULL);
1837
ut_ad(bpage->buf_fix_count > 0);
1841
/****************************************************************//**
1842
Add watch for the given page to be read in. Caller must have the buffer pool
1844
@return NULL if watch set, block if the page is in the buffer pool */
1849
ulint space, /*!< in: space id */
1850
ulint offset, /*!< in: page number */
1851
ulint fold) /*!< in: buf_page_address_fold(space, offset) */
1855
buf_pool_t* buf_pool = buf_pool_get(space, offset);
1857
ut_ad(buf_pool_mutex_own(buf_pool));
1859
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
1861
if (UNIV_LIKELY_NULL(bpage)) {
1862
if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
1863
/* The page was loaded meanwhile. */
1866
/* Add to an existing watch. */
1867
bpage->buf_fix_count++;
1871
for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
1872
bpage = &buf_pool->watch[i];
1874
ut_ad(bpage->access_time == 0);
1875
ut_ad(bpage->newest_modification == 0);
1876
ut_ad(bpage->oldest_modification == 0);
1877
ut_ad(bpage->zip.data == NULL);
1878
ut_ad(!bpage->in_zip_hash);
1880
switch (bpage->state) {
1881
case BUF_BLOCK_POOL_WATCH:
1882
ut_ad(!bpage->in_page_hash);
1883
ut_ad(bpage->buf_fix_count == 0);
1885
/* bpage is pointing to buf_pool_watch[],
1886
which is protected by buf_pool_mutex.
1887
Normally, buf_page_t objects are protected by
1888
buf_block_t::mutex or buf_pool->zip_mutex or both. */
1890
bpage->state = BUF_BLOCK_ZIP_PAGE;
1891
bpage->space = space;
1892
bpage->offset = offset;
1893
bpage->buf_fix_count = 1;
1895
ut_d(bpage->in_page_hash = TRUE);
1896
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
1899
case BUF_BLOCK_ZIP_PAGE:
1900
ut_ad(bpage->in_page_hash);
1901
ut_ad(bpage->buf_fix_count > 0);
1908
/* Allocation failed. Either the maximum number of purge
1909
threads should never exceed BUF_POOL_WATCH_SIZE, or this code
1910
should be modified to return a special non-NULL value and the
1911
caller should purge the record directly. */
1914
/* Fix compiler warning */
1918
/********************************************************************//**
1919
Rebuild buf_pool->page_hash. */
1922
buf_pool_page_hash_rebuild(void)
1923
/*============================*/
1927
for (i = 0; i < srv_buf_pool_instances; i++) {
1928
buf_pool_page_hash_rebuild_instance(buf_pool_from_array(i));
1932
/********************************************************************//**
1933
Increase the buffer pool size of one buffer pool instance. */
1936
buf_pool_increase_instance(
1937
/*=======================*/
1938
buf_pool_t* buf_pool, /*!< in: buffer pool instane */
1939
ulint change_size) /*!< in: new size of the pool */
1941
buf_chunk_t* chunks;
1944
buf_pool_mutex_enter(buf_pool);
1945
chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
1947
memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks * sizeof *chunks);
1949
chunk = &chunks[buf_pool->n_chunks];
1951
if (!buf_chunk_init(buf_pool, chunk, change_size)) {
1954
buf_pool->old_pool_size = buf_pool->curr_pool_size;
1955
buf_pool->curr_size += chunk->size;
1956
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1957
mem_free(buf_pool->chunks);
1958
buf_pool->chunks = chunks;
1959
buf_pool->n_chunks++;
1962
buf_pool_mutex_exit(buf_pool);
1965
/********************************************************************//**
1966
Increase the buffer pool size. */
1975
for (i = 0; i < srv_buf_pool_instances; i++) {
1976
buf_pool_increase_instance(
1977
buf_pool_from_array(i),
1978
change_size / srv_buf_pool_instances);
1981
buf_pool_set_sizes();
1984
/********************************************************************//**
1985
Resizes the buffer pool. */
1988
buf_pool_resize(void)
1989
/*=================*/
1992
ulint min_change_size = 1048576 * srv_buf_pool_instances;
1994
buf_pool_mutex_enter_all();
1996
if (srv_buf_pool_old_size == srv_buf_pool_size) {
1998
buf_pool_mutex_exit_all();
2002
} else if (srv_buf_pool_curr_size + min_change_size
2003
> srv_buf_pool_size) {
2005
change_size = (srv_buf_pool_curr_size - srv_buf_pool_size)
2008
buf_pool_mutex_exit_all();
2010
/* Disable adaptive hash indexes and empty the index
2011
in order to free up memory in the buffer pool chunks. */
2012
buf_pool_shrink(change_size);
2014
} else if (srv_buf_pool_curr_size + min_change_size
2015
< srv_buf_pool_size) {
2017
/* Enlarge the buffer pool by at least one megabyte */
2019
change_size = srv_buf_pool_size - srv_buf_pool_curr_size;
2021
buf_pool_mutex_exit_all();
2023
buf_pool_increase(change_size);
2025
srv_buf_pool_size = srv_buf_pool_old_size;
2027
buf_pool_mutex_exit_all();
2032
buf_pool_page_hash_rebuild();
2035
/****************************************************************//**
2036
Remove the sentinel block for the watch before replacing it with a real block.
2037
buf_page_watch_clear() or buf_page_watch_occurred() will notice that
2038
the block has been replaced with the real block.
2039
@return reference count, to be added to the replacement block */
2042
buf_pool_watch_remove(
2043
/*==================*/
2044
buf_pool_t* buf_pool, /*!< buffer pool instance */
2045
ulint fold, /*!< in: buf_page_address_fold(
2047
buf_page_t* watch) /*!< in/out: sentinel for watch */
2049
ut_ad(buf_pool_mutex_own(buf_pool));
2051
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
2052
ut_d(watch->in_page_hash = FALSE);
2053
watch->buf_fix_count = 0;
2054
watch->state = BUF_BLOCK_POOL_WATCH;
2057
/****************************************************************//**
2058
Stop watching if the page has been read in.
2059
buf_pool_watch_set(space,offset) must have returned NULL before. */
2062
buf_pool_watch_unset(
2063
/*=================*/
2064
ulint space, /*!< in: space id */
2065
ulint offset) /*!< in: page number */
2068
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2069
ulint fold = buf_page_address_fold(space, offset);
2071
buf_pool_mutex_enter(buf_pool);
2072
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2073
/* The page must exist because buf_pool_watch_set()
2074
increments buf_fix_count. */
2077
if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
2078
mutex_t* mutex = buf_page_get_mutex(bpage);
2081
ut_a(bpage->buf_fix_count > 0);
2082
bpage->buf_fix_count--;
2085
ut_a(bpage->buf_fix_count > 0);
2087
if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
2088
buf_pool_watch_remove(buf_pool, fold, bpage);
2092
buf_pool_mutex_exit(buf_pool);
2095
/****************************************************************//**
2096
Check if the page has been read in.
2097
This may only be called after buf_pool_watch_set(space,offset)
2098
has returned NULL and before invoking buf_pool_watch_unset(space,offset).
2099
@return FALSE if the given page was not read in, TRUE if it was */
2102
buf_pool_watch_occurred(
2103
/*====================*/
2104
ulint space, /*!< in: space id */
2105
ulint offset) /*!< in: page number */
2109
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2110
ulint fold = buf_page_address_fold(space, offset);
2112
buf_pool_mutex_enter(buf_pool);
2114
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2115
/* The page must exist because buf_pool_watch_set()
2116
increments buf_fix_count. */
2118
ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
2119
buf_pool_mutex_exit(buf_pool);
2124
/********************************************************************//**
2125
Moves a page to the start of the buffer pool LRU list. This high-level
2126
function can be used to prevent an important page from slipping out of
2130
buf_page_make_young(
2131
/*================*/
2132
buf_page_t* bpage) /*!< in: buffer block of a file page */
2134
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2136
buf_pool_mutex_enter(buf_pool);
2138
ut_a(buf_page_in_file(bpage));
2140
buf_LRU_make_block_young(bpage);
2142
buf_pool_mutex_exit(buf_pool);
2145
/********************************************************************//**
2146
Sets the time of the first access of a page and moves a page to the
2147
start of the buffer pool LRU list if it is too old. This high-level
2148
function can be used to prevent an important page from slipping
2149
out of the buffer pool. */
2152
buf_page_set_accessed_make_young(
2153
/*=============================*/
2154
buf_page_t* bpage, /*!< in/out: buffer block of a
2156
unsigned access_time) /*!< in: bpage->access_time
2157
read under mutex protection,
2160
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2162
ut_ad(!buf_pool_mutex_own(buf_pool));
2163
ut_a(buf_page_in_file(bpage));
2165
if (buf_page_peek_if_too_old(bpage)) {
2166
buf_pool_mutex_enter(buf_pool);
2167
buf_LRU_make_block_young(bpage);
2168
buf_pool_mutex_exit(buf_pool);
2169
} else if (!access_time) {
2170
ulint time_ms = ut_time_ms();
2171
buf_pool_mutex_enter(buf_pool);
2172
buf_page_set_accessed(bpage, time_ms);
2173
buf_pool_mutex_exit(buf_pool);
2177
/********************************************************************//**
2178
Resets the check_index_page_at_flush field of a page if found in the buffer
2182
buf_reset_check_index_page_at_flush(
2183
/*================================*/
2184
ulint space, /*!< in: space id */
2185
ulint offset) /*!< in: page number */
2188
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2190
buf_pool_mutex_enter(buf_pool);
2192
block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2194
if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
2195
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2196
block->check_index_page_at_flush = FALSE;
2199
buf_pool_mutex_exit(buf_pool);
2202
/********************************************************************//**
2203
Returns the current state of is_hashed of a page. FALSE if the page is
2204
not in the pool. NOTE that this operation does not fix the page in the
2205
pool if it is found there.
2206
@return TRUE if page hash index is built in search system */
2209
buf_page_peek_if_search_hashed(
2210
/*===========================*/
2211
ulint space, /*!< in: space id */
2212
ulint offset) /*!< in: page number */
2216
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2218
buf_pool_mutex_enter(buf_pool);
2220
block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2222
if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2225
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2226
is_hashed = block->is_hashed;
2229
buf_pool_mutex_exit(buf_pool);
2234
#ifdef UNIV_DEBUG_FILE_ACCESSES
2235
/********************************************************************//**
2236
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
2237
This function should be called when we free a file page and want the
2238
debug version to check that it is not accessed any more unless
2240
@return control block if found in page hash table, otherwise NULL */
2243
buf_page_set_file_page_was_freed(
2244
/*=============================*/
2245
ulint space, /*!< in: space id */
2246
ulint offset) /*!< in: page number */
2249
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2251
buf_pool_mutex_enter(buf_pool);
2253
bpage = buf_page_hash_get(buf_pool, space, offset);
2256
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2257
bpage->file_page_was_freed = TRUE;
2260
buf_pool_mutex_exit(buf_pool);
2265
/********************************************************************//**
2266
Sets file_page_was_freed FALSE if the page is found in the buffer pool.
2267
This function should be called when we free a file page and want the
2268
debug version to check that it is not accessed any more unless
2270
@return control block if found in page hash table, otherwise NULL */
2273
buf_page_reset_file_page_was_freed(
2274
/*===============================*/
2275
ulint space, /*!< in: space id */
2276
ulint offset) /*!< in: page number */
2279
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2281
buf_pool_mutex_enter(buf_pool);
2283
bpage = buf_page_hash_get(buf_pool, space, offset);
2286
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2287
bpage->file_page_was_freed = FALSE;
2290
buf_pool_mutex_exit(buf_pool);
2294
#endif /* UNIV_DEBUG_FILE_ACCESSES */
2296
/********************************************************************//**
2297
Get read access to a compressed page (usually of type
2298
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
2299
The page must be released with buf_page_release_zip().
2300
NOTE: the page is not protected by any latch. Mutual exclusion has to
2301
be implemented at a higher level. In other words, all possible
2302
accesses to a given page through this function must be protected by
2303
the same set of mutexes or latches.
2304
@return pointer to the block */
2309
ulint space, /*!< in: space id */
2310
ulint zip_size,/*!< in: compressed page size */
2311
ulint offset) /*!< in: page number */
2314
mutex_t* block_mutex;
2316
unsigned access_time;
2317
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2319
#ifndef UNIV_LOG_DEBUG
2320
ut_ad(!ibuf_inside());
2322
buf_pool->stat.n_page_gets++;
2325
buf_pool_mutex_enter(buf_pool);
2327
bpage = buf_page_hash_get(buf_pool, space, offset);
2329
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2333
/* Page not in buf_pool: needs to be read from file */
2335
buf_pool_mutex_exit(buf_pool);
2337
buf_read_page(space, zip_size, offset);
2339
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2340
ut_a(++buf_dbg_counter % 37 || buf_validate());
2341
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2344
if (UNIV_UNLIKELY(!bpage->zip.data)) {
2345
/* There is no compressed page. */
2347
buf_pool_mutex_exit(buf_pool);
2351
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2353
switch (buf_page_get_state(bpage)) {
2354
case BUF_BLOCK_NOT_USED:
2355
case BUF_BLOCK_READY_FOR_USE:
2356
case BUF_BLOCK_MEMORY:
2357
case BUF_BLOCK_REMOVE_HASH:
2358
case BUF_BLOCK_ZIP_FREE:
2360
case BUF_BLOCK_ZIP_PAGE:
2361
case BUF_BLOCK_ZIP_DIRTY:
2362
block_mutex = &buf_pool->zip_mutex;
2363
mutex_enter(block_mutex);
2364
bpage->buf_fix_count++;
2366
case BUF_BLOCK_FILE_PAGE:
2367
block_mutex = &((buf_block_t*) bpage)->mutex;
2368
mutex_enter(block_mutex);
2370
/* Discard the uncompressed page frame if possible. */
2371
if (buf_LRU_free_block(bpage, FALSE, NULL)
2374
mutex_exit(block_mutex);
2378
buf_block_buf_fix_inc((buf_block_t*) bpage,
2379
__FILE__, __LINE__);
2387
must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
2388
access_time = buf_page_is_accessed(bpage);
2390
buf_pool_mutex_exit(buf_pool);
2392
mutex_exit(block_mutex);
2394
buf_page_set_accessed_make_young(bpage, access_time);
2396
#ifdef UNIV_DEBUG_FILE_ACCESSES
2397
ut_a(!bpage->file_page_was_freed);
2400
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2401
ut_a(++buf_dbg_counter % 5771 || buf_validate());
2402
ut_a(bpage->buf_fix_count > 0);
2403
ut_a(buf_page_in_file(bpage));
2404
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2407
/* Let us wait until the read operation
2411
enum buf_io_fix io_fix;
2413
mutex_enter(block_mutex);
2414
io_fix = buf_page_get_io_fix(bpage);
2415
mutex_exit(block_mutex);
2417
if (io_fix == BUF_IO_READ) {
2419
os_thread_sleep(WAIT_FOR_READ);
2426
#ifdef UNIV_IBUF_COUNT_DEBUG
2427
ut_a(ibuf_count_get(buf_page_get_space(bpage),
2428
buf_page_get_page_no(bpage)) == 0);
2433
/********************************************************************//**
2434
Initialize some fields of a control block. */
2439
buf_block_t* block) /*!< in: block to init */
2441
block->check_index_page_at_flush = FALSE;
2442
block->index = NULL;
2444
block->n_hash_helps = 0;
2445
block->is_hashed = FALSE;
2446
block->n_fields = 1;
2448
block->left_side = TRUE;
2450
#endif /* !UNIV_HOTBACKUP */
2452
/********************************************************************//**
2454
@return TRUE if successful */
2459
buf_block_t* block, /*!< in/out: block */
2460
ibool check) /*!< in: TRUE=verify the page checksum */
2462
const byte* frame = block->page.zip.data;
2463
ulint stamp_checksum = mach_read_from_4(
2464
frame + FIL_PAGE_SPACE_OR_CHKSUM);
2466
ut_ad(buf_block_get_zip_size(block));
2467
ut_a(buf_block_get_space(block) != 0);
2469
if (UNIV_LIKELY(check && stamp_checksum != BUF_NO_CHECKSUM_MAGIC)) {
2470
ulint calc_checksum = page_zip_calc_checksum(
2471
frame, page_zip_get_size(&block->page.zip));
2473
if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
2474
ut_print_timestamp(stderr);
2476
" InnoDB: compressed page checksum mismatch"
2477
" (space %u page %u): %lu != %lu\n",
2478
block->page.space, block->page.offset,
2479
stamp_checksum, calc_checksum);
2484
switch (fil_page_get_type(frame)) {
2485
case FIL_PAGE_INDEX:
2486
if (page_zip_decompress(&block->page.zip,
2487
block->frame, TRUE)) {
2492
"InnoDB: unable to decompress space %lu page %lu\n",
2493
(ulong) block->page.space,
2494
(ulong) block->page.offset);
2497
case FIL_PAGE_TYPE_ALLOCATED:
2498
case FIL_PAGE_INODE:
2499
case FIL_PAGE_IBUF_BITMAP:
2500
case FIL_PAGE_TYPE_FSP_HDR:
2501
case FIL_PAGE_TYPE_XDES:
2502
case FIL_PAGE_TYPE_ZBLOB:
2503
case FIL_PAGE_TYPE_ZBLOB2:
2504
/* Copy to uncompressed storage. */
2505
memcpy(block->frame, frame,
2506
buf_block_get_zip_size(block));
2510
ut_print_timestamp(stderr);
2512
" InnoDB: unknown compressed page"
2514
fil_page_get_type(frame));
2518
#ifndef UNIV_HOTBACKUP
2519
/*******************************************************************//**
2520
Gets the block to whose frame the pointer is pointing to if found
2521
in this buffer pool instance.
2522
@return pointer to block */
2525
buf_block_align_instance(
2526
/*=====================*/
2527
buf_pool_t* buf_pool, /*!< in: buffer in which the block
2529
const byte* ptr) /*!< in: pointer to a frame */
2534
/* TODO: protect buf_pool->chunks with a mutex (it will
2535
currently remain constant after buf_pool_init()) */
2536
for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
2537
lint offs = ptr - chunk->blocks->frame;
2539
if (UNIV_UNLIKELY(offs < 0)) {
2544
offs >>= UNIV_PAGE_SIZE_SHIFT;
2546
if (UNIV_LIKELY((ulint) offs < chunk->size)) {
2547
buf_block_t* block = &chunk->blocks[offs];
2549
/* The function buf_chunk_init() invokes
2550
buf_block_init() so that block[n].frame ==
2551
block->frame + n * UNIV_PAGE_SIZE. Check it. */
2552
ut_ad(block->frame == page_align(ptr));
2554
/* A thread that updates these fields must
2555
hold buf_pool->mutex and block->mutex. Acquire
2557
mutex_enter(&block->mutex);
2559
switch (buf_block_get_state(block)) {
2560
case BUF_BLOCK_ZIP_FREE:
2561
case BUF_BLOCK_ZIP_PAGE:
2562
case BUF_BLOCK_ZIP_DIRTY:
2563
/* These types should only be used in
2564
the compressed buffer pool, whose
2565
memory is allocated from
2566
buf_pool->chunks, in UNIV_PAGE_SIZE
2567
blocks flagged as BUF_BLOCK_MEMORY. */
2570
case BUF_BLOCK_NOT_USED:
2571
case BUF_BLOCK_READY_FOR_USE:
2572
case BUF_BLOCK_MEMORY:
2573
/* Some data structures contain
2574
"guess" pointers to file pages. The
2575
file pages may have been freed and
2576
reused. Do not complain. */
2578
case BUF_BLOCK_REMOVE_HASH:
2579
/* buf_LRU_block_remove_hashed_page()
2580
will overwrite the FIL_PAGE_OFFSET and
2581
FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
2582
0xff and set the state to
2583
BUF_BLOCK_REMOVE_HASH. */
2584
ut_ad(page_get_space_id(page_align(ptr))
2586
ut_ad(page_get_page_no(page_align(ptr))
2589
case BUF_BLOCK_FILE_PAGE:
2590
ut_ad(block->page.space
2591
== page_get_space_id(page_align(ptr)));
2592
ut_ad(block->page.offset
2593
== page_get_page_no(page_align(ptr)));
2597
mutex_exit(&block->mutex);
2598
#endif /* UNIV_DEBUG */
2607
/*******************************************************************//**
2608
Gets the block to whose frame the pointer is pointing to.
2609
@return pointer to block, never NULL */
2614
const byte* ptr) /*!< in: pointer to a frame */
2618
for (i = 0; i < srv_buf_pool_instances; i++) {
2621
block = buf_block_align_instance(
2622
buf_pool_from_array(i), ptr);
2628
/* The block should always be found. */
2633
/********************************************************************//**
2634
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
2635
the buf_block_t itself or a member of it. This functions checks one of
2636
the buffer pool instances.
2637
@return TRUE if ptr belongs to a buf_block_t struct */
2640
buf_pointer_is_block_field_instance(
2641
/*================================*/
2642
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2643
const void* ptr) /*!< in: pointer not dereferenced */
2645
const buf_chunk_t* chunk = buf_pool->chunks;
2646
const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
2648
/* TODO: protect buf_pool->chunks with a mutex (it will
2649
currently remain constant after buf_pool_init()) */
2650
while (chunk < echunk) {
2651
if (ptr >= (void *)chunk->blocks
2652
&& ptr < (void *)(chunk->blocks + chunk->size)) {
2663
/********************************************************************//**
2664
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
2665
the buf_block_t itself or a member of it
2666
@return TRUE if ptr belongs to a buf_block_t struct */
2669
buf_pointer_is_block_field(
2670
/*=======================*/
2671
const void* ptr) /*!< in: pointer not dereferenced */
2675
for (i = 0; i < srv_buf_pool_instances; i++) {
2678
found = buf_pointer_is_block_field_instance(
2679
buf_pool_from_array(i), ptr);
2688
/********************************************************************//**
2689
Find out if a buffer block was created by buf_chunk_init().
2690
@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
2693
buf_block_is_uncompressed(
2694
/*======================*/
2695
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2696
const buf_block_t* block) /*!< in: pointer to block,
2699
ut_ad(buf_pool_mutex_own(buf_pool));
2701
if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
2702
/* The pointer should be aligned. */
2706
return(buf_pointer_is_block_field_instance(buf_pool, (void *)block));
2709
/********************************************************************//**
2710
This is the general function used to get access to a database page.
2711
@return pointer to the block or NULL */
2716
ulint space, /*!< in: space id */
2717
ulint zip_size,/*!< in: compressed page size in bytes
2718
or 0 for uncompressed pages */
2719
ulint offset, /*!< in: page number */
2720
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
2721
buf_block_t* guess, /*!< in: guessed block or NULL */
2722
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
2723
BUF_GET_NO_LATCH, or
2724
BUF_GET_IF_IN_POOL_OR_WATCH */
2725
const char* file, /*!< in: file name */
2726
ulint line, /*!< in: line where called */
2727
mtr_t* mtr) /*!< in: mini-transaction */
2731
unsigned access_time;
2735
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2738
ut_ad(mtr->state == MTR_ACTIVE);
2739
ut_ad((rw_latch == RW_S_LATCH)
2740
|| (rw_latch == RW_X_LATCH)
2741
|| (rw_latch == RW_NO_LATCH));
2742
ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
2743
ut_ad(mode == BUF_GET
2744
|| mode == BUF_GET_IF_IN_POOL
2745
|| mode == BUF_GET_NO_LATCH
2746
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH);
2747
ut_ad(zip_size == fil_space_get_zip_size(space));
2748
ut_ad(ut_is_2pow(zip_size));
2749
#ifndef UNIV_LOG_DEBUG
2750
ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
2752
buf_pool->stat.n_page_gets++;
2753
fold = buf_page_address_fold(space, offset);
2756
buf_pool_mutex_enter(buf_pool);
2759
/* If the guess is a compressed page descriptor that
2760
has been allocated by buf_buddy_alloc(), it may have
2761
been invalidated by buf_buddy_relocate(). In that
2762
case, block could point to something that happens to
2763
contain the expected bits in block->page. Similarly,
2764
the guess may be pointing to a buffer pool chunk that
2765
has been released when resizing the buffer pool. */
2767
if (!buf_block_is_uncompressed(buf_pool, block)
2768
|| offset != block->page.offset
2769
|| space != block->page.space
2770
|| buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2772
block = guess = NULL;
2774
ut_ad(!block->page.in_zip_hash);
2775
ut_ad(block->page.in_page_hash);
2779
if (block == NULL) {
2780
block = (buf_block_t*) buf_page_hash_get_low(
2781
buf_pool, space, offset, fold);
2785
if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
2789
if (block == NULL) {
2790
/* Page not in buf_pool: needs to be read from file */
2792
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2793
block = (buf_block_t*) buf_pool_watch_set(
2794
space, offset, fold);
2796
if (UNIV_LIKELY_NULL(block)) {
2802
buf_pool_mutex_exit(buf_pool);
2804
if (mode == BUF_GET_IF_IN_POOL
2805
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2810
if (buf_read_page(space, zip_size, offset)) {
2812
} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
2815
fprintf(stderr, "InnoDB: Error: Unable"
2816
" to read tablespace %lu page no"
2817
" %lu into the buffer pool after"
2819
"InnoDB: The most probable cause"
2820
" of this error may be that the"
2821
" table has been corrupted.\n"
2822
"InnoDB: You can try to fix this"
2824
" innodb_force_recovery.\n"
2825
"InnoDB: Please see reference manual"
2826
" for more details.\n"
2827
"InnoDB: Aborting...\n",
2829
BUF_PAGE_READ_MAX_RETRIES);
2834
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2835
ut_a(++buf_dbg_counter % 37 || buf_validate());
2836
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2841
ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
2843
must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
2845
if (must_read && mode == BUF_GET_IF_IN_POOL) {
2847
/* The page is being read to buffer pool,
2848
but we cannot wait around for the read to
2850
buf_pool_mutex_exit(buf_pool);
2855
switch (buf_block_get_state(block)) {
2859
case BUF_BLOCK_FILE_PAGE:
2862
case BUF_BLOCK_ZIP_PAGE:
2863
case BUF_BLOCK_ZIP_DIRTY:
2864
bpage = &block->page;
2865
/* Protect bpage->buf_fix_count. */
2866
mutex_enter(&buf_pool->zip_mutex);
2868
if (bpage->buf_fix_count
2869
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
2870
/* This condition often occurs when the buffer
2871
is not buffer-fixed, but I/O-fixed by
2872
buf_page_init_for_read(). */
2873
mutex_exit(&buf_pool->zip_mutex);
2875
/* The block is buffer-fixed or I/O-fixed.
2877
buf_pool_mutex_exit(buf_pool);
2878
os_thread_sleep(WAIT_FOR_READ);
2883
/* Allocate an uncompressed page. */
2884
buf_pool_mutex_exit(buf_pool);
2885
mutex_exit(&buf_pool->zip_mutex);
2887
block = buf_LRU_get_free_block(buf_pool, 0);
2890
buf_pool_mutex_enter(buf_pool);
2891
mutex_enter(&block->mutex);
2894
buf_page_t* hash_bpage;
2896
hash_bpage = buf_page_hash_get_low(
2897
buf_pool, space, offset, fold);
2899
if (UNIV_UNLIKELY(bpage != hash_bpage)) {
2900
/* The buf_pool->page_hash was modified
2901
while buf_pool->mutex was released.
2902
Free the block that was allocated. */
2904
buf_LRU_block_free_non_file_page(block);
2905
mutex_exit(&block->mutex);
2907
block = (buf_block_t*) hash_bpage;
2913
(bpage->buf_fix_count
2914
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
2916
/* The block was buffer-fixed or I/O-fixed
2917
while buf_pool->mutex was not held by this thread.
2918
Free the block that was allocated and try again.
2919
This should be extremely unlikely. */
2921
buf_LRU_block_free_non_file_page(block);
2922
mutex_exit(&block->mutex);
2924
goto wait_until_unfixed;
2927
/* Move the compressed page from bpage to block,
2928
and uncompress it. */
2930
mutex_enter(&buf_pool->zip_mutex);
2932
buf_relocate(bpage, &block->page);
2933
buf_block_init_low(block);
2934
block->lock_hash_val = lock_rec_hash(space, offset);
2936
UNIV_MEM_DESC(&block->page.zip.data,
2937
page_zip_get_size(&block->page.zip), block);
2939
if (buf_page_get_state(&block->page)
2940
== BUF_BLOCK_ZIP_PAGE) {
2941
UT_LIST_REMOVE(list, buf_pool->zip_clean,
2943
ut_ad(!block->page.in_flush_list);
2945
/* Relocate buf_pool->flush_list. */
2946
buf_flush_relocate_on_flush_list(bpage,
2950
/* Buffer-fix, I/O-fix, and X-latch the block
2951
for the duration of the decompression.
2952
Also add the block to the unzip_LRU list. */
2953
block->page.state = BUF_BLOCK_FILE_PAGE;
2955
/* Insert at the front of unzip_LRU list */
2956
buf_unzip_LRU_add_block(block, FALSE);
2958
block->page.buf_fix_count = 1;
2959
buf_block_set_io_fix(block, BUF_IO_READ);
2960
rw_lock_x_lock_func(&block->lock, 0, file, line);
2962
UNIV_MEM_INVALID(bpage, sizeof *bpage);
2964
mutex_exit(&block->mutex);
2965
mutex_exit(&buf_pool->zip_mutex);
2966
buf_pool->n_pend_unzip++;
2968
buf_buddy_free(buf_pool, bpage, sizeof *bpage);
2970
buf_pool_mutex_exit(buf_pool);
2972
/* Decompress the page and apply buffered operations
2973
while not holding buf_pool->mutex or block->mutex. */
2974
success = buf_zip_decompress(block, srv_use_checksums);
2977
if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
2978
ibuf_merge_or_delete_for_page(block, space, offset,
2982
/* Unfix and unlatch the block. */
2983
buf_pool_mutex_enter(buf_pool);
2984
mutex_enter(&block->mutex);
2985
block->page.buf_fix_count--;
2986
buf_block_set_io_fix(block, BUF_IO_NONE);
2987
mutex_exit(&block->mutex);
2988
buf_pool->n_pend_unzip--;
2989
rw_lock_x_unlock(&block->lock);
2993
case BUF_BLOCK_ZIP_FREE:
2994
case BUF_BLOCK_NOT_USED:
2995
case BUF_BLOCK_READY_FOR_USE:
2996
case BUF_BLOCK_MEMORY:
2997
case BUF_BLOCK_REMOVE_HASH:
3002
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3004
mutex_enter(&block->mutex);
3005
#if UNIV_WORD_SIZE == 4
3006
/* On 32-bit systems, there is no padding in buf_page_t. On
3007
other systems, Valgrind could complain about uninitialized pad
3009
UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
3012
buf_block_buf_fix_inc(block, file, line);
3014
mutex_exit(&block->mutex);
3016
/* Check if this is the first access to the page */
3018
access_time = buf_page_is_accessed(&block->page);
3020
buf_pool_mutex_exit(buf_pool);
3022
buf_page_set_accessed_make_young(&block->page, access_time);
3024
#ifdef UNIV_DEBUG_FILE_ACCESSES
3025
ut_a(!block->page.file_page_was_freed);
3028
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3029
ut_a(++buf_dbg_counter % 5771 || buf_validate());
3030
ut_a(block->page.buf_fix_count > 0);
3031
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3032
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3037
/* Let us wait until the read operation
3041
enum buf_io_fix io_fix;
3043
mutex_enter(&block->mutex);
3044
io_fix = buf_block_get_io_fix(block);
3045
mutex_exit(&block->mutex);
3047
if (io_fix == BUF_IO_READ) {
3049
os_thread_sleep(WAIT_FOR_READ);
3056
fix_type = MTR_MEMO_BUF_FIX;
3060
rw_lock_s_lock_func(&(block->lock), 0, file, line);
3062
fix_type = MTR_MEMO_PAGE_S_FIX;
3066
ut_ad(rw_latch == RW_X_LATCH);
3067
rw_lock_x_lock_func(&(block->lock), 0, file, line);
3069
fix_type = MTR_MEMO_PAGE_X_FIX;
3073
mtr_memo_push(mtr, block, fix_type);
3076
/* In the case of a first access, try to apply linear
3079
buf_read_ahead_linear(space, zip_size, offset);
3082
#ifdef UNIV_IBUF_COUNT_DEBUG
3083
ut_a(ibuf_count_get(buf_block_get_space(block),
3084
buf_block_get_page_no(block)) == 0);
3089
/********************************************************************//**
3090
This is the general function used to get optimistic access to a database
3092
@return TRUE if success */
3095
buf_page_optimistic_get(
3096
/*====================*/
3097
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
3098
buf_block_t* block, /*!< in: guessed buffer block */
3099
ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
3100
..._GUESS_ON_CLOCK */
3101
const char* file, /*!< in: file name */
3102
ulint line, /*!< in: line where called */
3103
mtr_t* mtr) /*!< in: mini-transaction */
3105
buf_pool_t* buf_pool;
3106
unsigned access_time;
3112
ut_ad(mtr->state == MTR_ACTIVE);
3113
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3115
mutex_enter(&block->mutex);
3117
if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
3119
mutex_exit(&block->mutex);
3124
buf_block_buf_fix_inc(block, file, line);
3126
mutex_exit(&block->mutex);
3128
/* Check if this is the first access to the page.
3129
We do a dirty read on purpose, to avoid mutex contention.
3130
This field is only used for heuristic purposes; it does not
3131
affect correctness. */
3133
access_time = buf_page_is_accessed(&block->page);
3134
buf_page_set_accessed_make_young(&block->page, access_time);
3136
ut_ad(!ibuf_inside()
3137
|| ibuf_page(buf_block_get_space(block),
3138
buf_block_get_zip_size(block),
3139
buf_block_get_page_no(block), NULL));
3141
if (rw_latch == RW_S_LATCH) {
3142
success = rw_lock_s_lock_nowait(&(block->lock),
3144
fix_type = MTR_MEMO_PAGE_S_FIX;
3146
success = rw_lock_x_lock_func_nowait(&(block->lock),
3148
fix_type = MTR_MEMO_PAGE_X_FIX;
3151
if (UNIV_UNLIKELY(!success)) {
3152
mutex_enter(&block->mutex);
3153
buf_block_buf_fix_dec(block);
3154
mutex_exit(&block->mutex);
3159
if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
3160
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3162
if (rw_latch == RW_S_LATCH) {
3163
rw_lock_s_unlock(&(block->lock));
3165
rw_lock_x_unlock(&(block->lock));
3168
mutex_enter(&block->mutex);
3169
buf_block_buf_fix_dec(block);
3170
mutex_exit(&block->mutex);
3175
mtr_memo_push(mtr, block, fix_type);
3177
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3178
ut_a(++buf_dbg_counter % 5771 || buf_validate());
3179
ut_a(block->page.buf_fix_count > 0);
3180
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3181
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3183
#ifdef UNIV_DEBUG_FILE_ACCESSES
3184
ut_a(block->page.file_page_was_freed == FALSE);
3186
if (UNIV_UNLIKELY(!access_time)) {
3187
/* In the case of a first access, try to apply linear
3190
buf_read_ahead_linear(buf_block_get_space(block),
3191
buf_block_get_zip_size(block),
3192
buf_block_get_page_no(block));
3195
#ifdef UNIV_IBUF_COUNT_DEBUG
3196
ut_a(ibuf_count_get(buf_block_get_space(block),
3197
buf_block_get_page_no(block)) == 0);
3199
buf_pool = buf_pool_from_block(block);
3200
buf_pool->stat.n_page_gets++;
3205
/********************************************************************//**
3206
This is used to get access to a known database page, when no waiting can be
3207
done. For example, if a search in an adaptive hash index leads us to this
3209
@return TRUE if success */
3212
buf_page_get_known_nowait(
3213
/*======================*/
3214
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
3215
buf_block_t* block, /*!< in: the known page */
3216
ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
3217
const char* file, /*!< in: file name */
3218
ulint line, /*!< in: line where called */
3219
mtr_t* mtr) /*!< in: mini-transaction */
3221
buf_pool_t* buf_pool;
3226
ut_ad(mtr->state == MTR_ACTIVE);
3227
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3229
mutex_enter(&block->mutex);
3231
if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
3232
/* Another thread is just freeing the block from the LRU list
3233
of the buffer pool: do not try to access this page; this
3234
attempt to access the page can only come through the hash
3235
index because when the buffer block state is ..._REMOVE_HASH,
3236
we have already removed it from the page address hash table
3237
of the buffer pool. */
3239
mutex_exit(&block->mutex);
3244
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3246
buf_block_buf_fix_inc(block, file, line);
3248
mutex_exit(&block->mutex);
3250
buf_pool = buf_pool_from_block(block);
3252
if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
3253
buf_pool_mutex_enter(buf_pool);
3254
buf_LRU_make_block_young(&block->page);
3255
buf_pool_mutex_exit(buf_pool);
3256
} else if (!buf_page_is_accessed(&block->page)) {
3257
/* Above, we do a dirty read on purpose, to avoid
3258
mutex contention. The field buf_page_t::access_time
3259
is only used for heuristic purposes. Writes to the
3260
field must be protected by mutex, however. */
3261
ulint time_ms = ut_time_ms();
3263
buf_pool_mutex_enter(buf_pool);
3264
buf_page_set_accessed(&block->page, time_ms);
3265
buf_pool_mutex_exit(buf_pool);
3268
ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
3270
if (rw_latch == RW_S_LATCH) {
3271
success = rw_lock_s_lock_nowait(&(block->lock),
3273
fix_type = MTR_MEMO_PAGE_S_FIX;
3275
success = rw_lock_x_lock_func_nowait(&(block->lock),
3277
fix_type = MTR_MEMO_PAGE_X_FIX;
3281
mutex_enter(&block->mutex);
3282
buf_block_buf_fix_dec(block);
3283
mutex_exit(&block->mutex);
3288
mtr_memo_push(mtr, block, fix_type);
3290
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3291
ut_a(++buf_dbg_counter % 5771 || buf_validate());
3292
ut_a(block->page.buf_fix_count > 0);
3293
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3294
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3295
#ifdef UNIV_DEBUG_FILE_ACCESSES
3296
ut_a(block->page.file_page_was_freed == FALSE);
3299
#ifdef UNIV_IBUF_COUNT_DEBUG
3300
ut_a((mode == BUF_KEEP_OLD)
3301
|| (ibuf_count_get(buf_block_get_space(block),
3302
buf_block_get_page_no(block)) == 0));
3304
buf_pool->stat.n_page_gets++;
3309
/*******************************************************************//**
3310
Given a tablespace id and page number tries to get that page. If the
3311
page is not in the buffer pool it is not loaded and NULL is returned.
3312
Suitable for using when holding the kernel mutex.
3313
@return pointer to a page or NULL */
3316
buf_page_try_get_func(
3317
/*==================*/
3318
ulint space_id,/*!< in: tablespace id */
3319
ulint page_no,/*!< in: page number */
3320
const char* file, /*!< in: file name */
3321
ulint line, /*!< in: line where called */
3322
mtr_t* mtr) /*!< in: mini-transaction */
3327
buf_pool_t* buf_pool = buf_pool_get(space_id, page_no);
3330
ut_ad(mtr->state == MTR_ACTIVE);
3332
buf_pool_mutex_enter(buf_pool);
3333
block = buf_block_hash_get(buf_pool, space_id, page_no);
3335
if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
3336
buf_pool_mutex_exit(buf_pool);
3340
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
3342
mutex_enter(&block->mutex);
3343
buf_pool_mutex_exit(buf_pool);
3345
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3346
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3347
ut_a(buf_block_get_space(block) == space_id);
3348
ut_a(buf_block_get_page_no(block) == page_no);
3349
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3351
buf_block_buf_fix_inc(block, file, line);
3352
mutex_exit(&block->mutex);
3354
fix_type = MTR_MEMO_PAGE_S_FIX;
3355
success = rw_lock_s_lock_nowait(&block->lock, file, line);
3358
/* Let us try to get an X-latch. If the current thread
3359
is holding an X-latch on the page, we cannot get an
3362
fix_type = MTR_MEMO_PAGE_X_FIX;
3363
success = rw_lock_x_lock_func_nowait(&block->lock,
3368
mutex_enter(&block->mutex);
3369
buf_block_buf_fix_dec(block);
3370
mutex_exit(&block->mutex);
3375
mtr_memo_push(mtr, block, fix_type);
3376
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3377
ut_a(++buf_dbg_counter % 5771 || buf_validate());
3378
ut_a(block->page.buf_fix_count > 0);
3379
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3380
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3381
#ifdef UNIV_DEBUG_FILE_ACCESSES
3382
ut_a(block->page.file_page_was_freed == FALSE);
3383
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3384
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3386
buf_pool->stat.n_page_gets++;
3388
#ifdef UNIV_IBUF_COUNT_DEBUG
3389
ut_a(ibuf_count_get(buf_block_get_space(block),
3390
buf_block_get_page_no(block)) == 0);
3396
/********************************************************************//**
3397
Initialize some fields of a control block. */
3402
buf_page_t* bpage) /*!< in: block to init */
3404
bpage->flush_type = BUF_FLUSH_LRU;
3405
bpage->io_fix = BUF_IO_NONE;
3406
bpage->buf_fix_count = 0;
3407
bpage->freed_page_clock = 0;
3408
bpage->access_time = 0;
3409
bpage->newest_modification = 0;
3410
bpage->oldest_modification = 0;
3411
HASH_INVALIDATE(bpage, hash);
3412
#ifdef UNIV_DEBUG_FILE_ACCESSES
3413
bpage->file_page_was_freed = FALSE;
3414
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3417
/********************************************************************//**
3418
Inits a page to the buffer buf_pool. */
3423
ulint space, /*!< in: space id */
3424
ulint offset, /*!< in: offset of the page within space
3425
in units of a page */
3426
ulint fold, /*!< in: buf_page_address_fold(space,offset) */
3427
buf_block_t* block) /*!< in: block to init */
3429
buf_page_t* hash_page;
3430
buf_pool_t* buf_pool = buf_pool_get(space, offset);
3432
ut_ad(buf_pool_mutex_own(buf_pool));
3433
ut_ad(mutex_own(&(block->mutex)));
3434
ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3436
/* Set the state of the block */
3437
buf_block_set_file_page(block, space, offset);
3439
#ifdef UNIV_DEBUG_VALGRIND
3441
/* Silence valid Valgrind warnings about uninitialized
3442
data being written to data files. There are some unused
3443
bytes on some pages that InnoDB does not initialize. */
3444
UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
3446
#endif /* UNIV_DEBUG_VALGRIND */
3448
buf_block_init_low(block);
3450
block->lock_hash_val = lock_rec_hash(space, offset);
3452
buf_page_init_low(&block->page);
3454
/* Insert into the hash table of file pages */
3456
hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3458
if (UNIV_LIKELY(!hash_page)) {
3459
} else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
3460
/* Preserve the reference count. */
3461
ulint buf_fix_count = hash_page->buf_fix_count;
3463
ut_a(buf_fix_count > 0);
3464
block->page.buf_fix_count += buf_fix_count;
3465
buf_pool_watch_remove(buf_pool, fold, hash_page);
3468
"InnoDB: Error: page %lu %lu already found"
3469
" in the hash table: %p, %p\n",
3472
(const void*) hash_page, (const void*) block);
3473
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3474
mutex_exit(&block->mutex);
3475
buf_pool_mutex_exit(buf_pool);
3480
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3484
ut_ad(!block->page.in_zip_hash);
3485
ut_ad(!block->page.in_page_hash);
3486
ut_d(block->page.in_page_hash = TRUE);
3487
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
3488
fold, &block->page);
3491
/********************************************************************//**
3492
Function which inits a page for read to the buffer buf_pool. If the page is
3493
(1) already in buf_pool, or
3494
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
3495
(3) if the space is deleted or being deleted,
3496
then this function does nothing.
3497
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
3498
on the buffer frame. The io-handler must take care that the flag is cleared
3499
and the lock released later.
3500
@return pointer to the block or NULL */
3503
buf_page_init_for_read(
3504
/*===================*/
3505
ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
3506
ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
3507
ulint space, /*!< in: space id */
3508
ulint zip_size,/*!< in: compressed page size, or 0 */
3509
ibool unzip, /*!< in: TRUE=request uncompressed page */
3510
ib_int64_t tablespace_version,
3511
/*!< in: prevents reading from a wrong
3512
version of the tablespace in case we have done
3514
ulint offset) /*!< in: page number */
3517
buf_page_t* bpage = NULL;
3518
buf_page_t* watch_page;
3523
buf_pool_t* buf_pool = buf_pool_get(space, offset);
3529
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3530
/* It is a read-ahead within an ibuf routine */
3532
ut_ad(!ibuf_bitmap_page(zip_size, offset));
3533
ut_ad(ibuf_inside());
3537
if (!recv_no_ibuf_operations
3538
&& !ibuf_page(space, zip_size, offset, &mtr)) {
3545
ut_ad(mode == BUF_READ_ANY_PAGE);
3548
if (zip_size && UNIV_LIKELY(!unzip)
3549
&& UNIV_LIKELY(!recv_recovery_is_on())) {
3552
block = buf_LRU_get_free_block(buf_pool, 0);
3554
ut_ad(buf_pool_from_block(block) == buf_pool);
3557
fold = buf_page_address_fold(space, offset);
3559
buf_pool_mutex_enter(buf_pool);
3561
watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3562
if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
3563
/* The page is already in the buffer pool. */
3567
mutex_enter(&block->mutex);
3568
buf_LRU_block_free_non_file_page(block);
3569
mutex_exit(&block->mutex);
3576
if (fil_tablespace_deleted_or_being_deleted_in_mem(
3577
space, tablespace_version)) {
3578
/* The page belongs to a space which has been
3579
deleted or is being deleted. */
3580
*err = DB_TABLESPACE_DELETED;
3586
bpage = &block->page;
3587
mutex_enter(&block->mutex);
3589
ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
3591
buf_page_init(space, offset, fold, block);
3593
/* The block must be put to the LRU list, to the old blocks */
3594
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3596
/* We set a pass-type x-lock on the frame because then
3597
the same thread which called for the read operation
3598
(and is running now at this point of code) can wait
3599
for the read to complete by waiting for the x-lock on
3600
the frame; if the x-lock were recursive, the same
3601
thread would illegally get the x-lock before the page
3602
read is completed. The x-lock is cleared by the
3603
io-handler thread. */
3605
rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
3606
buf_page_set_io_fix(bpage, BUF_IO_READ);
3608
if (UNIV_UNLIKELY(zip_size)) {
3609
page_zip_set_size(&block->page.zip, zip_size);
3611
/* buf_pool->mutex may be released and
3612
reacquired by buf_buddy_alloc(). Thus, we
3613
must release block->mutex in order not to
3614
break the latching order in the reacquisition
3615
of buf_pool->mutex. We also must defer this
3616
operation until after the block descriptor has
3617
been added to buf_pool->LRU and
3618
buf_pool->page_hash. */
3619
mutex_exit(&block->mutex);
3620
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3621
mutex_enter(&block->mutex);
3622
block->page.zip.data = data;
3624
/* To maintain the invariant
3625
block->in_unzip_LRU_list
3626
== buf_page_belongs_to_unzip_LRU(&block->page)
3627
we have to add this block to unzip_LRU
3628
after block->page.zip.data is set. */
3629
ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
3630
buf_unzip_LRU_add_block(block, TRUE);
3633
mutex_exit(&block->mutex);
3635
/* Defer buf_buddy_alloc() until after the block has
3636
been found not to exist. The buf_buddy_alloc() and
3637
buf_buddy_free() calls may be expensive because of
3638
buf_buddy_relocate(). */
3640
/* The compressed page must be allocated before the
3641
control block (bpage), in order to avoid the
3642
invocation of buf_buddy_relocate_block() on
3643
uninitialized data. */
3644
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3645
bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
3647
/* Initialize the buf_pool pointer. */
3648
bpage->buf_pool = buf_pool;
3650
/* If buf_buddy_alloc() allocated storage from the LRU list,
3651
it released and reacquired buf_pool->mutex. Thus, we must
3652
check the page_hash again, as it may have been modified. */
3653
if (UNIV_UNLIKELY(lru)) {
3655
watch_page = buf_page_hash_get_low(
3656
buf_pool, space, offset, fold);
3659
&& !buf_pool_watch_is_sentinel(buf_pool,
3662
/* The block was added by some other thread. */
3664
buf_buddy_free(buf_pool, bpage, sizeof *bpage);
3665
buf_buddy_free(buf_pool, data, zip_size);
3672
page_zip_des_init(&bpage->zip);
3673
page_zip_set_size(&bpage->zip, zip_size);
3674
bpage->zip.data = data;
3676
mutex_enter(&buf_pool->zip_mutex);
3677
UNIV_MEM_DESC(bpage->zip.data,
3678
page_zip_get_size(&bpage->zip), bpage);
3680
buf_page_init_low(bpage);
3682
bpage->state = BUF_BLOCK_ZIP_PAGE;
3683
bpage->space = space;
3684
bpage->offset = offset;
3688
bpage->in_page_hash = FALSE;
3689
bpage->in_zip_hash = FALSE;
3690
bpage->in_flush_list = FALSE;
3691
bpage->in_free_list = FALSE;
3692
bpage->in_LRU_list = FALSE;
3693
#endif /* UNIV_DEBUG */
3695
ut_d(bpage->in_page_hash = TRUE);
3697
if (UNIV_LIKELY_NULL(watch_page)) {
3698
/* Preserve the reference count. */
3699
ulint buf_fix_count = watch_page->buf_fix_count;
3700
ut_a(buf_fix_count > 0);
3701
bpage->buf_fix_count += buf_fix_count;
3702
ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
3703
buf_pool_watch_remove(buf_pool, fold, watch_page);
3706
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
3709
/* The block must be put to the LRU list, to the old blocks */
3710
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3711
buf_LRU_insert_zip_clean(bpage);
3713
buf_page_set_io_fix(bpage, BUF_IO_READ);
3715
mutex_exit(&buf_pool->zip_mutex);
3718
buf_pool->n_pend_reads++;
3720
buf_pool_mutex_exit(buf_pool);
3722
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3727
ut_ad(!bpage || buf_page_in_file(bpage));
3731
/********************************************************************//**
3732
Initializes a page to the buffer buf_pool. The page is usually not read
3733
from a file even if it cannot be found in the buffer buf_pool. This is one
3734
of the functions which perform to a block a state transition NOT_USED =>
3735
FILE_PAGE (the other is buf_page_get_gen).
3736
@return pointer to the block, page bufferfixed */
3741
ulint space, /*!< in: space id */
3742
ulint offset, /*!< in: offset of the page within space in units of
3744
ulint zip_size,/*!< in: compressed page size, or 0 */
3745
mtr_t* mtr) /*!< in: mini-transaction handle */
3750
buf_block_t* free_block = NULL;
3751
ulint time_ms = ut_time_ms();
3752
buf_pool_t* buf_pool = buf_pool_get(space, offset);
3755
ut_ad(mtr->state == MTR_ACTIVE);
3756
ut_ad(space || !zip_size);
3758
free_block = buf_LRU_get_free_block(buf_pool, 0);
3760
fold = buf_page_address_fold(space, offset);
3762
buf_pool_mutex_enter(buf_pool);
3764
block = (buf_block_t*) buf_page_hash_get_low(
3765
buf_pool, space, offset, fold);
3768
&& buf_page_in_file(&block->page)
3769
&& !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
3770
#ifdef UNIV_IBUF_COUNT_DEBUG
3771
ut_a(ibuf_count_get(space, offset) == 0);
3773
#ifdef UNIV_DEBUG_FILE_ACCESSES
3774
block->page.file_page_was_freed = FALSE;
3775
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3777
/* Page can be found in buf_pool */
3778
buf_pool_mutex_exit(buf_pool);
3780
buf_block_free(free_block);
3782
return(buf_page_get_with_no_latch(space, zip_size,
3786
/* If we get here, the page was not in buf_pool: init it there */
3789
if (buf_debug_prints) {
3790
fprintf(stderr, "Creating space %lu page %lu to buffer\n",
3791
(ulong) space, (ulong) offset);
3793
#endif /* UNIV_DEBUG */
3797
mutex_enter(&block->mutex);
3799
buf_page_init(space, offset, fold, block);
3801
/* The block must be put to the LRU list */
3802
buf_LRU_add_block(&block->page, FALSE);
3804
buf_block_buf_fix_inc(block, __FILE__, __LINE__);
3805
buf_pool->stat.n_pages_created++;
3811
/* Prevent race conditions during buf_buddy_alloc(),
3812
which may release and reacquire buf_pool->mutex,
3813
by IO-fixing and X-latching the block. */
3815
buf_page_set_io_fix(&block->page, BUF_IO_READ);
3816
rw_lock_x_lock(&block->lock);
3818
page_zip_set_size(&block->page.zip, zip_size);
3819
mutex_exit(&block->mutex);
3820
/* buf_pool->mutex may be released and reacquired by
3821
buf_buddy_alloc(). Thus, we must release block->mutex
3822
in order not to break the latching order in
3823
the reacquisition of buf_pool->mutex. We also must
3824
defer this operation until after the block descriptor
3825
has been added to buf_pool->LRU and buf_pool->page_hash. */
3826
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3827
mutex_enter(&block->mutex);
3828
block->page.zip.data = data;
3830
/* To maintain the invariant
3831
block->in_unzip_LRU_list
3832
== buf_page_belongs_to_unzip_LRU(&block->page)
3833
we have to add this block to unzip_LRU after
3834
block->page.zip.data is set. */
3835
ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
3836
buf_unzip_LRU_add_block(block, FALSE);
3838
buf_page_set_io_fix(&block->page, BUF_IO_NONE);
3839
rw_lock_x_unlock(&block->lock);
3842
buf_page_set_accessed(&block->page, time_ms);
3844
buf_pool_mutex_exit(buf_pool);
3846
mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
3848
mutex_exit(&block->mutex);
3850
/* Delete possible entries for the page from the insert buffer:
3851
such can exist if the page belonged to an index which was dropped */
3853
ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
3855
/* Flush pages from the end of the LRU list if necessary */
3856
buf_flush_free_margin(buf_pool);
3858
frame = block->frame;
3860
memset(frame + FIL_PAGE_PREV, 0xff, 4);
3861
memset(frame + FIL_PAGE_NEXT, 0xff, 4);
3862
mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
3864
/* Reset to zero the file flush lsn field in the page; if the first
3865
page of an ibdata file is 'created' in this function into the buffer
3866
pool then we lose the original contents of the file flush lsn stamp.
3867
Then InnoDB could in a crash recovery print a big, false, corruption
3868
warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
3870
memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
3872
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3873
ut_a(++buf_dbg_counter % 357 || buf_validate());
3874
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3875
#ifdef UNIV_IBUF_COUNT_DEBUG
3876
ut_a(ibuf_count_get(buf_block_get_space(block),
3877
buf_block_get_page_no(block)) == 0);
3882
/********************************************************************//**
3883
Completes an asynchronous read or write request of a file page to or from
3887
buf_page_io_complete(
3888
/*=================*/
3889
buf_page_t* bpage) /*!< in: pointer to the block in question */
3891
enum buf_io_fix io_type;
3892
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3893
const ibool uncompressed = (buf_page_get_state(bpage)
3894
== BUF_BLOCK_FILE_PAGE);
3896
ut_a(buf_page_in_file(bpage));
3898
/* We do not need protect io_fix here by mutex to read
3899
it because this is the only function where we can change the value
3900
from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
3901
ensures that this is the only thread that handles the i/o for this
3904
io_type = buf_page_get_io_fix(bpage);
3905
ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
3907
if (io_type == BUF_IO_READ) {
3909
ulint read_space_id;
3912
if (buf_page_get_zip_size(bpage)) {
3913
frame = bpage->zip.data;
3914
buf_pool->n_pend_unzip++;
3916
&& !buf_zip_decompress((buf_block_t*) bpage,
3919
buf_pool->n_pend_unzip--;
3922
buf_pool->n_pend_unzip--;
3925
frame = ((buf_block_t*) bpage)->frame;
3928
/* If this page is not uninitialized and not in the
3929
doublewrite buffer, then the page number and space id
3930
should be the same as in block. */
3931
read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
3932
read_space_id = mach_read_from_4(
3933
frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
3935
if (bpage->space == TRX_SYS_SPACE
3936
&& trx_doublewrite_page_inside(bpage->offset)) {
3938
ut_print_timestamp(stderr);
3940
" InnoDB: Error: reading page %lu\n"
3941
"InnoDB: which is in the"
3942
" doublewrite buffer!\n",
3943
(ulong) bpage->offset);
3944
} else if (!read_space_id && !read_page_no) {
3945
/* This is likely an uninitialized page. */
3946
} else if ((bpage->space
3947
&& bpage->space != read_space_id)
3948
|| bpage->offset != read_page_no) {
3949
/* We did not compare space_id to read_space_id
3950
if bpage->space == 0, because the field on the
3951
page may contain garbage in MySQL < 4.1.1,
3952
which only supported bpage->space == 0. */
3954
ut_print_timestamp(stderr);
3956
" InnoDB: Error: space id and page n:o"
3957
" stored in the page\n"
3958
"InnoDB: read in are %lu:%lu,"
3959
" should be %lu:%lu!\n",
3960
(ulong) read_space_id, (ulong) read_page_no,
3961
(ulong) bpage->space,
3962
(ulong) bpage->offset);
3965
/* From version 3.23.38 up we store the page checksum
3966
to the 4 first bytes of the page end lsn field */
3968
if (buf_page_is_corrupted(frame,
3969
buf_page_get_zip_size(bpage))) {
3972
"InnoDB: Database page corruption on disk"
3974
"InnoDB: file read of page %lu.\n"
3975
"InnoDB: You may have to recover"
3976
" from a backup.\n",
3977
(ulong) bpage->offset);
3978
buf_page_print(frame, buf_page_get_zip_size(bpage));
3980
"InnoDB: Database page corruption on disk"
3982
"InnoDB: file read of page %lu.\n"
3983
"InnoDB: You may have to recover"
3984
" from a backup.\n",
3985
(ulong) bpage->offset);
3986
fputs("InnoDB: It is also possible that"
3988
"InnoDB: system has corrupted its"
3990
"InnoDB: and rebooting your computer"
3993
"InnoDB: If the corrupt page is an index page\n"
3994
"InnoDB: you can also try to"
3995
" fix the corruption\n"
3996
"InnoDB: by dumping, dropping,"
3997
" and reimporting\n"
3998
"InnoDB: the corrupt table."
3999
" You can use CHECK\n"
4000
"InnoDB: TABLE to scan your"
4001
" table for corruption.\n"
4003
REFMAN "forcing-recovery.html\n"
4004
"InnoDB: about forcing recovery.\n", stderr);
4006
if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
4007
fputs("InnoDB: Ending processing because of"
4008
" a corrupt database page.\n",
4014
if (recv_recovery_is_on()) {
4015
/* Pages must be uncompressed for crash recovery. */
4017
recv_recover_page(TRUE, (buf_block_t*) bpage);
4020
if (uncompressed && !recv_no_ibuf_operations) {
4021
ibuf_merge_or_delete_for_page(
4022
(buf_block_t*) bpage, bpage->space,
4023
bpage->offset, buf_page_get_zip_size(bpage),
4028
buf_pool_mutex_enter(buf_pool);
4029
mutex_enter(buf_page_get_mutex(bpage));
4031
#ifdef UNIV_IBUF_COUNT_DEBUG
4032
if (io_type == BUF_IO_WRITE || uncompressed) {
4033
/* For BUF_IO_READ of compressed-only blocks, the
4034
buffered operations will be merged by buf_page_get_gen()
4035
after the block has been uncompressed. */
4036
ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
4039
/* Because this thread which does the unlocking is not the same that
4040
did the locking, we use a pass value != 0 in unlock, which simply
4041
removes the newest lock debug record, without checking the thread
4044
buf_page_set_io_fix(bpage, BUF_IO_NONE);
4048
/* NOTE that the call to ibuf may have moved the ownership of
4049
the x-latch to this OS thread: do not let this confuse you in
4052
ut_ad(buf_pool->n_pend_reads > 0);
4053
buf_pool->n_pend_reads--;
4054
buf_pool->stat.n_pages_read++;
4057
rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
4064
/* Write means a flush operation: call the completion
4065
routine in the flush system */
4067
buf_flush_write_complete(bpage);
4070
rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
4074
buf_pool->stat.n_pages_written++;
4083
if (buf_debug_prints) {
4084
fprintf(stderr, "Has %s page space %lu page no %lu\n",
4085
io_type == BUF_IO_READ ? "read" : "written",
4086
(ulong) buf_page_get_space(bpage),
4087
(ulong) buf_page_get_page_no(bpage));
4089
#endif /* UNIV_DEBUG */
4091
mutex_exit(buf_page_get_mutex(bpage));
4092
buf_pool_mutex_exit(buf_pool);
4095
/*********************************************************************//**
4096
Asserts that all file pages in the buffer are in a replaceable state.
4100
buf_all_freed_instance(
4101
/*===================*/
4102
buf_pool_t* buf_pool) /*!< in: buffer pool instancce */
4109
buf_pool_mutex_enter(buf_pool);
4111
chunk = buf_pool->chunks;
4113
for (i = buf_pool->n_chunks; i--; chunk++) {
4115
const buf_block_t* block = buf_chunk_not_freed(chunk);
4117
if (UNIV_LIKELY_NULL(block)) {
4119
"Page %lu %lu still fixed or dirty\n",
4120
(ulong) block->page.space,
4121
(ulong) block->page.offset);
4126
buf_pool_mutex_exit(buf_pool);
4131
/*********************************************************************//**
4132
Invalidates file pages in one buffer pool instance */
4135
buf_pool_invalidate_instance(
4136
/*=========================*/
4137
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
4142
buf_pool_mutex_enter(buf_pool);
4144
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
4146
/* As this function is called during startup and
4147
during redo application phase during recovery, InnoDB
4148
is single threaded (apart from IO helper threads) at
4149
this stage. No new write batch can be in intialization
4150
stage at this point. */
4151
ut_ad(buf_pool->init_flush[i] == FALSE);
4153
/* However, it is possible that a write batch that has
4154
been posted earlier is still not complete. For buffer
4155
pool invalidation to proceed we must ensure there is NO
4156
write activity happening. */
4157
if (buf_pool->n_flush[i] > 0) {
4158
buf_pool_mutex_exit(buf_pool);
4159
buf_flush_wait_batch_end(buf_pool, i);
4160
buf_pool_mutex_enter(buf_pool);
4164
buf_pool_mutex_exit(buf_pool);
4166
ut_ad(buf_all_freed_instance(buf_pool));
4171
freed = buf_LRU_search_and_free_block(buf_pool, 100);
4174
buf_pool_mutex_enter(buf_pool);
4176
ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
4177
ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
4179
buf_pool->freed_page_clock = 0;
4180
buf_pool->LRU_old = NULL;
4181
buf_pool->LRU_old_len = 0;
4182
buf_pool->LRU_flush_ended = 0;
4184
memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
4185
buf_refresh_io_stats(buf_pool);
4187
buf_pool_mutex_exit(buf_pool);
4190
/*********************************************************************//**
4191
Invalidates the file pages in the buffer pool when an archive recovery is
4192
completed. All the file pages buffered must be in a replaceable state when
4193
this function is called: not latched and not modified. */
4196
buf_pool_invalidate(void)
4197
/*=====================*/
4201
for (i = 0; i < srv_buf_pool_instances; i++) {
4202
buf_pool_invalidate_instance(buf_pool_from_array(i));
4206
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4207
/*********************************************************************//**
4208
Validates data in one buffer pool instance
4212
buf_pool_validate_instance(
4213
/*=======================*/
4214
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
4219
ulint n_single_flush = 0;
4220
ulint n_lru_flush = 0;
4221
ulint n_list_flush = 0;
4229
buf_pool_mutex_enter(buf_pool);
4231
chunk = buf_pool->chunks;
4233
/* Check the uncompressed blocks. */
4235
for (i = buf_pool->n_chunks; i--; chunk++) {
4238
buf_block_t* block = chunk->blocks;
4240
for (j = chunk->size; j--; block++) {
4242
mutex_enter(&block->mutex);
4244
switch (buf_block_get_state(block)) {
4245
case BUF_BLOCK_ZIP_FREE:
4246
case BUF_BLOCK_ZIP_PAGE:
4247
case BUF_BLOCK_ZIP_DIRTY:
4248
/* These should only occur on
4249
zip_clean, zip_free[], or flush_list. */
4253
case BUF_BLOCK_FILE_PAGE:
4254
ut_a(buf_page_hash_get(buf_pool,
4255
buf_block_get_space(
4257
buf_block_get_page_no(
4261
#ifdef UNIV_IBUF_COUNT_DEBUG
4262
ut_a(buf_page_get_io_fix(&block->page)
4264
|| !ibuf_count_get(buf_block_get_space(
4266
buf_block_get_page_no(
4269
switch (buf_page_get_io_fix(&block->page)) {
4274
switch (buf_page_get_flush_type(
4278
ut_a(rw_lock_is_locked(
4282
case BUF_FLUSH_LIST:
4285
case BUF_FLUSH_SINGLE_PAGE:
4296
ut_a(rw_lock_is_locked(&block->lock,
4304
case BUF_BLOCK_NOT_USED:
4308
case BUF_BLOCK_READY_FOR_USE:
4309
case BUF_BLOCK_MEMORY:
4310
case BUF_BLOCK_REMOVE_HASH:
4315
mutex_exit(&block->mutex);
4319
mutex_enter(&buf_pool->zip_mutex);
4321
/* Check clean compressed-only blocks. */
4323
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4324
b = UT_LIST_GET_NEXT(list, b)) {
4325
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
4326
switch (buf_page_get_io_fix(b)) {
4328
/* All clean blocks should be I/O-unfixed. */
4331
/* In buf_LRU_free_block(), we temporarily set
4332
b->io_fix = BUF_IO_READ for a newly allocated
4333
control block in order to prevent
4334
buf_page_get_gen() from decompressing the block. */
4341
/* It is OK to read oldest_modification here because
4342
we have acquired buf_pool->zip_mutex above which acts
4343
as the 'block->mutex' for these bpages. */
4344
ut_a(!b->oldest_modification);
4345
ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4351
/* Check dirty blocks. */
4353
buf_flush_list_mutex_enter(buf_pool);
4354
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4355
b = UT_LIST_GET_NEXT(list, b)) {
4356
ut_ad(b->in_flush_list);
4357
ut_a(b->oldest_modification);
4360
switch (buf_page_get_state(b)) {
4361
case BUF_BLOCK_ZIP_DIRTY:
4364
switch (buf_page_get_io_fix(b)) {
4369
switch (buf_page_get_flush_type(b)) {
4373
case BUF_FLUSH_LIST:
4376
case BUF_FLUSH_SINGLE_PAGE:
4385
case BUF_BLOCK_FILE_PAGE:
4386
/* uncompressed page */
4388
case BUF_BLOCK_ZIP_FREE:
4389
case BUF_BLOCK_ZIP_PAGE:
4390
case BUF_BLOCK_NOT_USED:
4391
case BUF_BLOCK_READY_FOR_USE:
4392
case BUF_BLOCK_MEMORY:
4393
case BUF_BLOCK_REMOVE_HASH:
4397
ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4400
ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
4402
buf_flush_list_mutex_exit(buf_pool);
4404
mutex_exit(&buf_pool->zip_mutex);
4406
if (n_lru + n_free > buf_pool->curr_size + n_zip) {
4407
fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
4408
(ulong) n_lru, (ulong) n_free,
4409
(ulong) buf_pool->curr_size, (ulong) n_zip);
4413
ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
4414
if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
4415
fprintf(stderr, "Free list len %lu, free blocks %lu\n",
4416
(ulong) UT_LIST_GET_LEN(buf_pool->free),
4421
ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
4422
ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
4423
ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
4425
buf_pool_mutex_exit(buf_pool);
4427
ut_a(buf_LRU_validate());
4428
ut_a(buf_flush_validate(buf_pool));
4433
/*********************************************************************//**
4434
Validates the buffer buf_pool data structure.
4443
for (i = 0; i < srv_buf_pool_instances; i++) {
4444
buf_pool_t* buf_pool;
4446
buf_pool = buf_pool_from_array(i);
4448
buf_pool_validate_instance(buf_pool);
4453
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
4455
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4456
/*********************************************************************//**
4457
Prints info of the buffer buf_pool data structure for one instance. */
4462
buf_pool_t* buf_pool)
4464
index_id_t* index_ids;
4472
dict_index_t* index;
4476
size = buf_pool->curr_size;
4478
index_ids = mem_alloc(size * sizeof *index_ids);
4479
counts = mem_alloc(sizeof(ulint) * size);
4481
buf_pool_mutex_enter(buf_pool);
4482
buf_flush_list_mutex_enter(buf_pool);
4485
"buf_pool size %lu\n"
4486
"database pages %lu\n"
4488
"modified database pages %lu\n"
4489
"n pending decompressions %lu\n"
4490
"n pending reads %lu\n"
4491
"n pending flush LRU %lu list %lu single page %lu\n"
4492
"pages made young %lu, not young %lu\n"
4493
"pages read %lu, created %lu, written %lu\n",
4495
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4496
(ulong) UT_LIST_GET_LEN(buf_pool->free),
4497
(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4498
(ulong) buf_pool->n_pend_unzip,
4499
(ulong) buf_pool->n_pend_reads,
4500
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
4501
(ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
4502
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
4503
(ulong) buf_pool->stat.n_pages_made_young,
4504
(ulong) buf_pool->stat.n_pages_not_made_young,
4505
(ulong) buf_pool->stat.n_pages_read,
4506
(ulong) buf_pool->stat.n_pages_created,
4507
(ulong) buf_pool->stat.n_pages_written);
4509
buf_flush_list_mutex_exit(buf_pool);
4511
/* Count the number of blocks belonging to each index in the buffer */
4515
chunk = buf_pool->chunks;
4517
for (i = buf_pool->n_chunks; i--; chunk++) {
4518
buf_block_t* block = chunk->blocks;
4519
ulint n_blocks = chunk->size;
4521
for (; n_blocks--; block++) {
4522
const buf_frame_t* frame = block->frame;
4524
if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
4526
id = btr_page_get_index_id(frame);
4528
/* Look for the id in the index_ids array */
4531
while (j < n_found) {
4533
if (index_ids[j] == id) {
4550
buf_pool_mutex_exit(buf_pool);
4552
for (i = 0; i < n_found; i++) {
4553
index = dict_index_get_if_in_cache(index_ids[i]);
4556
"Block count for index %llu in buffer is about %lu",
4557
(ullint) index_ids[i],
4562
dict_index_name_print(stderr, NULL, index);
4568
mem_free(index_ids);
4571
ut_a(buf_pool_validate_instance(buf_pool));
4574
/*********************************************************************//**
4575
Prints info of the buffer buf_pool data structure. */
4583
for (i = 0; i < srv_buf_pool_instances; i++) {
4584
buf_pool_t* buf_pool;
4586
buf_pool = buf_pool_from_array(i);
4587
buf_print_instance(buf_pool);
4590
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
4593
/*********************************************************************//**
4594
Returns the number of latched pages in the buffer pool.
4595
@return number of latched pages */
4598
buf_get_latched_pages_number_instance(
4599
/*==================================*/
4600
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
4605
ulint fixed_pages_number = 0;
4607
buf_pool_mutex_enter(buf_pool);
4609
chunk = buf_pool->chunks;
4611
for (i = buf_pool->n_chunks; i--; chunk++) {
4615
block = chunk->blocks;
4617
for (j = chunk->size; j--; block++) {
4618
if (buf_block_get_state(block)
4619
!= BUF_BLOCK_FILE_PAGE) {
4624
mutex_enter(&block->mutex);
4626
if (block->page.buf_fix_count != 0
4627
|| buf_page_get_io_fix(&block->page)
4629
fixed_pages_number++;
4632
mutex_exit(&block->mutex);
4636
mutex_enter(&buf_pool->zip_mutex);
4638
/* Traverse the lists of clean and dirty compressed-only blocks. */
4640
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4641
b = UT_LIST_GET_NEXT(list, b)) {
4642
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
4643
ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
4645
if (b->buf_fix_count != 0
4646
|| buf_page_get_io_fix(b) != BUF_IO_NONE) {
4647
fixed_pages_number++;
4651
buf_flush_list_mutex_enter(buf_pool);
4652
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4653
b = UT_LIST_GET_NEXT(list, b)) {
4654
ut_ad(b->in_flush_list);
4656
switch (buf_page_get_state(b)) {
4657
case BUF_BLOCK_ZIP_DIRTY:
4658
if (b->buf_fix_count != 0
4659
|| buf_page_get_io_fix(b) != BUF_IO_NONE) {
4660
fixed_pages_number++;
4663
case BUF_BLOCK_FILE_PAGE:
4664
/* uncompressed page */
4666
case BUF_BLOCK_ZIP_FREE:
4667
case BUF_BLOCK_ZIP_PAGE:
4668
case BUF_BLOCK_NOT_USED:
4669
case BUF_BLOCK_READY_FOR_USE:
4670
case BUF_BLOCK_MEMORY:
4671
case BUF_BLOCK_REMOVE_HASH:
4677
buf_flush_list_mutex_exit(buf_pool);
4678
mutex_exit(&buf_pool->zip_mutex);
4679
buf_pool_mutex_exit(buf_pool);
4681
return(fixed_pages_number);
4684
/*********************************************************************//**
4685
Returns the number of latched pages in all the buffer pools.
4686
@return number of latched pages */
4689
buf_get_latched_pages_number(void)
4690
/*==============================*/
4693
ulint total_latched_pages = 0;
4695
for (i = 0; i < srv_buf_pool_instances; i++) {
4696
buf_pool_t* buf_pool;
4698
buf_pool = buf_pool_from_array(i);
4700
total_latched_pages += buf_get_latched_pages_number_instance(
4704
return(total_latched_pages);
4707
#endif /* UNIV_DEBUG */
4709
/*********************************************************************//**
4710
Returns the number of pending buf pool ios.
4711
@return number of pending I/O operations */
4714
buf_get_n_pending_ios(void)
4715
/*=======================*/
4720
for (i = 0; i < srv_buf_pool_instances; i++) {
4721
buf_pool_t* buf_pool;
4723
buf_pool = buf_pool_from_array(i);
4726
buf_pool->n_pend_reads
4727
+ buf_pool->n_flush[BUF_FLUSH_LRU]
4728
+ buf_pool->n_flush[BUF_FLUSH_LIST]
4729
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
4735
/*********************************************************************//**
4736
Returns the ratio in percents of modified pages in the buffer pool /
4737
database pages in the buffer pool.
4738
@return modified page percentage ratio */
4741
buf_get_modified_ratio_pct(void)
4742
/*============================*/
4747
ulint flush_list_len = 0;
4749
buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
4751
ratio = (100 * flush_list_len) / (1 + lru_len + free_len);
4753
/* 1 + is there to avoid division by zero */
4758
/*********************************************************************//**
4759
Prints info of the buffer i/o. */
4762
buf_print_io_instance(
4763
/*==================*/
4764
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
4765
FILE* file) /*!< in/out: buffer where to print */
4767
time_t current_time;
4768
double time_elapsed;
4773
buf_pool_mutex_enter(buf_pool);
4774
buf_flush_list_mutex_enter(buf_pool);
4777
"Buffer pool size %lu\n"
4778
"Free buffers %lu\n"
4779
"Database pages %lu\n"
4780
"Old database pages %lu\n"
4781
"Modified db pages %lu\n"
4782
"Pending reads %lu\n"
4783
"Pending writes: LRU %lu, flush list %lu, single page %lu\n",
4784
(ulong) buf_pool->curr_size,
4785
(ulong) UT_LIST_GET_LEN(buf_pool->free),
4786
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4787
(ulong) buf_pool->LRU_old_len,
4788
(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4789
(ulong) buf_pool->n_pend_reads,
4790
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
4791
+ buf_pool->init_flush[BUF_FLUSH_LRU],
4792
(ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
4793
+ buf_pool->init_flush[BUF_FLUSH_LIST],
4794
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
4796
buf_flush_list_mutex_exit(buf_pool);
4798
current_time = time(NULL);
4799
time_elapsed = 0.001 + difftime(current_time,
4800
buf_pool->last_printout_time);
4803
"Pages made young %lu, not young %lu\n"
4804
"%.2f youngs/s, %.2f non-youngs/s\n"
4805
"Pages read %lu, created %lu, written %lu\n"
4806
"%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
4807
(ulong) buf_pool->stat.n_pages_made_young,
4808
(ulong) buf_pool->stat.n_pages_not_made_young,
4809
(buf_pool->stat.n_pages_made_young
4810
- buf_pool->old_stat.n_pages_made_young)
4812
(buf_pool->stat.n_pages_not_made_young
4813
- buf_pool->old_stat.n_pages_not_made_young)
4815
(ulong) buf_pool->stat.n_pages_read,
4816
(ulong) buf_pool->stat.n_pages_created,
4817
(ulong) buf_pool->stat.n_pages_written,
4818
(buf_pool->stat.n_pages_read
4819
- buf_pool->old_stat.n_pages_read)
4821
(buf_pool->stat.n_pages_created
4822
- buf_pool->old_stat.n_pages_created)
4824
(buf_pool->stat.n_pages_written
4825
- buf_pool->old_stat.n_pages_written)
4828
n_gets_diff = buf_pool->stat.n_page_gets
4829
- buf_pool->old_stat.n_page_gets;
4833
"Buffer pool hit rate %lu / 1000,"
4834
" young-making rate %lu / 1000 not %lu / 1000\n",
4836
(1000 - ((1000 * (buf_pool->stat.n_pages_read
4837
- buf_pool->old_stat.n_pages_read))
4838
/ (buf_pool->stat.n_page_gets
4839
- buf_pool->old_stat.n_page_gets))),
4841
(1000 * (buf_pool->stat.n_pages_made_young
4842
- buf_pool->old_stat.n_pages_made_young)
4845
(1000 * (buf_pool->stat.n_pages_not_made_young
4846
- buf_pool->old_stat.n_pages_not_made_young)
4849
fputs("No buffer pool page gets since the last printout\n",
4853
/* Statistics about read ahead algorithm */
4854
fprintf(file, "Pages read ahead %.2f/s,"
4855
" evicted without access %.2f/s\n",
4856
(buf_pool->stat.n_ra_pages_read
4857
- buf_pool->old_stat.n_ra_pages_read)
4859
(buf_pool->stat.n_ra_pages_evicted
4860
- buf_pool->old_stat.n_ra_pages_evicted)
4863
/* Print some values to help us with visualizing what is
4864
happening with LRU eviction. */
4866
"LRU len: %lu, unzip_LRU len: %lu\n"
4867
"I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
4868
UT_LIST_GET_LEN(buf_pool->LRU),
4869
UT_LIST_GET_LEN(buf_pool->unzip_LRU),
4870
buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
4871
buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
4873
buf_refresh_io_stats(buf_pool);
4874
buf_pool_mutex_exit(buf_pool);
4877
/*********************************************************************//**
4878
Prints info of the buffer i/o. */
4883
FILE* file) /*!< in/out: buffer where to print */
4887
for (i = 0; i < srv_buf_pool_instances; i++) {
4888
buf_pool_t* buf_pool;
4890
buf_pool = buf_pool_from_array(i);
4891
buf_print_io_instance(buf_pool, file);
4895
/**********************************************************************//**
4896
Refreshes the statistics used to print per-second averages. */
4899
buf_refresh_io_stats(
4900
/*=================*/
4901
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
4903
buf_pool->last_printout_time = ut_time();
4904
buf_pool->old_stat = buf_pool->stat;
4907
/**********************************************************************//**
4908
Refreshes the statistics used to print per-second averages. */
4911
buf_refresh_io_stats_all(void)
4912
/*==========================*/
4916
for (i = 0; i < srv_buf_pool_instances; i++) {
4917
buf_pool_t* buf_pool;
4919
buf_pool = buf_pool_from_array(i);
4921
buf_refresh_io_stats(buf_pool);
4925
/**********************************************************************//**
4926
Check if all pages in all buffer pools are in a replacable state.
4927
@return FALSE if not */
4935
for (i = 0; i < srv_buf_pool_instances; i++) {
4936
buf_pool_t* buf_pool;
4938
buf_pool = buf_pool_from_array(i);
4940
if (!buf_all_freed_instance(buf_pool)) {
4948
/*********************************************************************//**
4949
Checks that there currently are no pending i/o-operations for the buffer
4951
@return TRUE if there is no pending i/o */
4954
buf_pool_check_no_pending_io(void)
4955
/*==============================*/
4960
buf_pool_mutex_enter_all();
4962
for (i = 0; i < srv_buf_pool_instances && ret; i++) {
4963
const buf_pool_t* buf_pool;
4965
buf_pool = buf_pool_from_array(i);
4967
if (buf_pool->n_pend_reads
4968
+ buf_pool->n_flush[BUF_FLUSH_LRU]
4969
+ buf_pool->n_flush[BUF_FLUSH_LIST]
4970
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
4976
buf_pool_mutex_exit_all();
4982
Code currently not used
4983
/*********************************************************************//**
4984
Gets the current length of the free list of buffer blocks.
4985
@return length of the free list */
4988
buf_get_free_list_len(void)
4989
/*=======================*/
4993
buf_pool_mutex_enter(buf_pool);
4995
len = UT_LIST_GET_LEN(buf_pool->free);
4997
buf_pool_mutex_exit(buf_pool);
5003
#else /* !UNIV_HOTBACKUP */
5004
/********************************************************************//**
5005
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
5008
buf_page_init_for_backup_restore(
5009
/*=============================*/
5010
ulint space, /*!< in: space id */
5011
ulint offset, /*!< in: offset of the page within space
5012
in units of a page */
5013
ulint zip_size,/*!< in: compressed page size in bytes
5014
or 0 for uncompressed pages */
5015
buf_block_t* block) /*!< in: block to init */
5017
block->page.state = BUF_BLOCK_FILE_PAGE;
5018
block->page.space = space;
5019
block->page.offset = offset;
5021
page_zip_des_init(&block->page.zip);
5023
/* We assume that block->page.data has been allocated
5024
with zip_size == UNIV_PAGE_SIZE. */
5025
ut_ad(zip_size <= UNIV_PAGE_SIZE);
5026
ut_ad(ut_is_2pow(zip_size));
5027
page_zip_set_size(&block->page.zip, zip_size);
5029
block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
5032
#endif /* !UNIV_HOTBACKUP */