1
/*****************************************************************************
3
Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
4
Copyright (C) 2008, Google Inc.
6
Portions of this file contain modifications contributed and copyrighted by
7
Google, Inc. Those modifications are gratefully acknowledged and are described
8
briefly in the InnoDB documentation. The contributions by Google are
9
incorporated with their permission, and subject to the conditions contained in
10
the file COPYING.Google.
12
This program is free software; you can redistribute it and/or modify it under
13
the terms of the GNU General Public License as published by the Free Software
14
Foundation; version 2 of the License.
16
This program is distributed in the hope that it will be useful, but WITHOUT
17
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
You should have received a copy of the GNU General Public License along with
21
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22
St, Fifth Floor, Boston, MA 02110-1301 USA
24
*****************************************************************************/
26
/**************************************************//**
28
The database buffer buf_pool
30
Created 11/5/1995 Heikki Tuuri
31
*******************************************************/
42
#ifndef UNIV_HOTBACKUP
43
#include "buf0buddy.h"
44
#include "lock0lock.h"
46
#include "ibuf0ibuf.h"
49
#endif /* !UNIV_HOTBACKUP */
51
#include "dict0dict.h"
56
IMPLEMENTATION OF THE BUFFER POOL
57
=================================
59
Performance improvement:
60
------------------------
61
Thread scheduling in NT may be so slow that the OS wait mechanism should
62
not be used even in waiting for disk reads to complete.
63
Rather, we should put waiting query threads to the queue of
64
waiting jobs, and let the OS thread do something useful while the i/o
65
is processed. In this way we could remove most OS thread switches in
66
an i/o-intensive benchmark like TPC-C.
68
A possibility is to put a user space thread library between the database
69
and NT. User space thread libraries might be very fast.
71
SQL Server 7.0 can be configured to use 'fibers' which are lightweight
72
threads in NT. These should be studied.
74
Buffer frames and blocks
75
------------------------
76
Following the terminology of Gray and Reuter, we call the memory
77
blocks where file pages are loaded buffer frames. For each buffer
78
frame there is a control block, or shortly, a block, in the buffer
79
control array. The control info which does not need to be stored
80
in the file along with the file page, resides in the control block.
84
The buffer buf_pool contains a single mutex which protects all the
85
control data structures of the buf_pool. The content of a buffer frame is
86
protected by a separate read-write lock in its control block, though.
87
These locks can be locked and unlocked without owning the buf_pool->mutex.
88
The OS events in the buf_pool struct can be waited for without owning the
91
The buf_pool->mutex is a hot-spot in main memory, causing a lot of
92
memory bus traffic on multiprocessor systems when processors
93
alternately access the mutex. On our Pentium, the mutex is accessed
94
maybe every 10 microseconds. We gave up the solution to have mutexes
95
for each control block, for instance, because it seemed to be
98
A solution to reduce mutex contention of the buf_pool->mutex is to
99
create a separate mutex for the page hash table. On Pentium,
100
accessing the hash table takes 2 microseconds, about half
101
of the total buf_pool->mutex hold time.
106
The control block contains, for instance, the bufferfix count
107
which is incremented when a thread wants a file page to be fixed
108
in a buffer frame. The bufferfix operation does not lock the
109
contents of the frame, however. For this purpose, the control
110
block contains a read-write lock.
112
The buffer frames have to be aligned so that the start memory
113
address of a frame is divisible by the universal page size, which
116
We intend to make the buffer buf_pool size on-line reconfigurable,
117
that is, the buf_pool size can be changed without closing the database.
118
Then the database administarator may adjust it to be bigger
119
at night, for example. The control block array must
120
contain enough control blocks for the maximum buffer buf_pool size
121
which is used in the particular database.
122
If the buf_pool size is cut, we exploit the virtual memory mechanism of
123
the OS, and just refrain from using frames at high addresses. Then the OS
124
can swap them to disk.
126
The control blocks containing file pages are put to a hash table
127
according to the file address of the page.
128
We could speed up the access to an individual page by using
129
"pointer swizzling": we could replace the page references on
130
non-leaf index pages by direct pointers to the page, if it exists
131
in the buf_pool. We could make a separate hash table where we could
132
chain all the page references in non-leaf pages residing in the buf_pool,
133
using the page reference as the hash key,
134
and at the time of reading of a page update the pointers accordingly.
135
Drawbacks of this solution are added complexity and,
136
possibly, extra space required on non-leaf pages for memory pointers.
137
A simpler solution is just to speed up the hash table mechanism
138
in the database, using tables whose size is a power of 2.
143
There are several lists of control blocks.
145
The free list (buf_pool->free) contains blocks which are currently not
148
The common LRU list contains all the blocks holding a file page
149
except those for which the bufferfix count is non-zero.
150
The pages are in the LRU list roughly in the order of the last
151
access to the page, so that the oldest pages are at the end of the
152
list. We also keep a pointer to near the end of the LRU list,
153
which we can use when we want to artificially age a page in the
154
buf_pool. This is used if we know that some page is not needed
155
again for some time: we insert the block right after the pointer,
156
causing it to be replaced sooner than would normally be the case.
157
Currently this aging mechanism is used for read-ahead mechanism
158
of pages, and it can also be used when there is a scan of a full
159
table which cannot fit in the memory. Putting the pages near the
160
end of the LRU list, we make sure that most of the buf_pool stays
161
in the main memory, undisturbed.
163
The unzip_LRU list contains a subset of the common LRU list. The
164
blocks on the unzip_LRU list hold a compressed file page and the
165
corresponding uncompressed page frame. A block is in unzip_LRU if and
166
only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
167
holds. The blocks in unzip_LRU will be in same order as they are in
168
the common LRU list. That is, each manipulation of the common LRU
169
list will result in the same manipulation of the unzip_LRU list.
171
The chain of modified blocks (buf_pool->flush_list) contains the blocks
172
holding file pages that have been modified in the memory
173
but not written to disk yet. The block with the oldest modification
174
which has not yet been written to disk is at the end of the chain.
175
The access to this list is protected by buf_pool->flush_list_mutex.
177
The chain of unmodified compressed blocks (buf_pool->zip_clean)
178
contains the control blocks (buf_page_t) of those compressed pages
179
that are not in buf_pool->flush_list and for which no uncompressed
180
page has been allocated in the buffer pool. The control blocks for
181
uncompressed pages are accessible via buf_block_t objects that are
182
reachable via buf_pool->chunks[].
184
The chains of free memory blocks (buf_pool->zip_free[]) are used by
185
the buddy allocator (buf0buddy.c) to keep track of currently unused
186
memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2. These
187
blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
188
BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
189
pool. The buddy allocator is solely used for allocating control
190
blocks for compressed pages (buf_page_t) and compressed page frames.
195
First, a victim block for replacement has to be found in the
196
buf_pool. It is taken from the free list or searched for from the
197
end of the LRU-list. An exclusive lock is reserved for the frame,
198
the io_fix field is set in the block fixing the block in buf_pool,
199
and the io-operation for loading the page is queued. The io-handler thread
200
releases the X-lock on the frame and resets the io_fix field
201
when the io operation completes.
203
A thread may request the above operation using the function
204
buf_page_get(). It may then continue to request a lock on the frame.
205
The lock is granted when the io-handler releases the x-lock.
210
The read-ahead mechanism is intended to be intelligent and
211
isolated from the semantically higher levels of the database
212
index management. From the higher level we only need the
213
information if a file page has a natural successor or
214
predecessor page. On the leaf level of a B-tree index,
215
these are the next and previous pages in the natural
218
Let us first explain the read-ahead mechanism when the leafs
219
of a B-tree are scanned in an ascending or descending order.
220
When a read page is the first time referenced in the buf_pool,
221
the buffer manager checks if it is at the border of a so-called
222
linear read-ahead area. The tablespace is divided into these
223
areas of size 64 blocks, for example. So if the page is at the
224
border of such an area, the read-ahead mechanism checks if
225
all the other blocks in the area have been accessed in an
226
ascending or descending order. If this is the case, the system
227
looks at the natural successor or predecessor of the page,
228
checks if that is at the border of another area, and in this case
229
issues read-requests for all the pages in that area. Maybe
230
we could relax the condition that all the pages in the area
231
have to be accessed: if data is deleted from a table, there may
232
appear holes of unused pages in the area.
234
A different read-ahead mechanism is used when there appears
235
to be a random access pattern to a file.
236
If a new page is referenced in the buf_pool, and several pages
237
of its random access area (for instance, 32 consecutive pages
238
in a tablespace) have recently been referenced, we may predict
239
that the whole area may be needed in the near future, and issue
240
the read requests for the whole area.
243
#ifndef UNIV_HOTBACKUP
244
/** Value in microseconds */
245
static const int WAIT_FOR_READ = 5000;
246
/** Number of attemtps made to read in a page in the buffer pool */
247
static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
249
/** The buffer pools of the database */
250
UNIV_INTERN buf_pool_t* buf_pool_ptr;
252
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
253
static ulint buf_dbg_counter = 0; /*!< This is used to insert validation
254
operations in execution in the
256
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
258
/** If this is set TRUE, the program prints info whenever
259
read-ahead or flush occurs */
260
UNIV_INTERN ibool buf_debug_prints = FALSE;
261
#endif /* UNIV_DEBUG */
263
#ifdef UNIV_PFS_RWLOCK
264
/* Keys to register buffer block related rwlocks and mutexes with
265
performance schema */
266
UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
267
# ifdef UNIV_SYNC_DEBUG
268
UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
269
# endif /* UNIV_SYNC_DEBUG */
270
#endif /* UNIV_PFS_RWLOCK */
272
#ifdef UNIV_PFS_MUTEX
273
UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
274
UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
275
UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
276
UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
277
#endif /* UNIV_PFS_MUTEX */
279
#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
280
# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
282
/* Buffer block mutexes and rwlocks can be registered
283
in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
284
is defined, register buffer block mutex and rwlock
285
in one group after their initialization. */
286
# define PFS_GROUP_BUFFER_SYNC
288
/* This define caps the number of mutexes/rwlocks can
289
be registered with performance schema. Developers can
290
modify this define if necessary. Please note, this would
291
be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
292
# define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER ULINT_MAX
294
# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
295
#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
297
/** A chunk of buffers. The buffer pool is allocated in chunks. */
298
struct buf_chunk_struct{
299
ulint mem_size; /*!< allocated size of the chunk */
300
ulint size; /*!< size of frames[] and blocks[] */
301
void* mem; /*!< pointer to the memory area which
302
was allocated for the frames */
303
buf_block_t* blocks; /*!< array of buffer control blocks */
305
#endif /* !UNIV_HOTBACKUP */
307
/********************************************************************//**
308
Gets the smallest oldest_modification lsn for any page in the pool. Returns
309
zero if all modified pages have been flushed to disk.
310
@return oldest modification in pool, zero if none */
313
buf_pool_get_oldest_modification(void)
314
/*==================================*/
319
ib_uint64_t oldest_lsn = 0;
321
/* When we traverse all the flush lists we don't want another
322
thread to add a dirty page to any flush list. */
323
log_flush_order_mutex_enter();
325
for (i = 0; i < srv_buf_pool_instances; i++) {
326
buf_pool_t* buf_pool;
328
buf_pool = buf_pool_from_array(i);
330
buf_flush_list_mutex_enter(buf_pool);
332
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
335
ut_ad(bpage->in_flush_list);
336
lsn = bpage->oldest_modification;
339
buf_flush_list_mutex_exit(buf_pool);
341
if (!oldest_lsn || oldest_lsn > lsn) {
346
log_flush_order_mutex_exit();
348
/* The returned answer may be out of date: the flush_list can
349
change after the mutex has been released. */
354
/********************************************************************//**
355
Get total buffer pool statistics. */
358
buf_get_total_list_len(
359
/*===================*/
360
ulint* LRU_len, /*!< out: length of all LRU lists */
361
ulint* free_len, /*!< out: length of all free lists */
362
ulint* flush_list_len) /*!< out: length of all flush lists */
370
for (i = 0; i < srv_buf_pool_instances; i++) {
371
buf_pool_t* buf_pool;
373
buf_pool = buf_pool_from_array(i);
374
*LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
375
*free_len += UT_LIST_GET_LEN(buf_pool->free);
376
*flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
380
/********************************************************************//**
381
Get total buffer pool statistics. */
386
buf_pool_stat_t* tot_stat) /*!< out: buffer pool stats */
390
memset(tot_stat, 0, sizeof(*tot_stat));
392
for (i = 0; i < srv_buf_pool_instances; i++) {
393
buf_pool_stat_t*buf_stat;
394
buf_pool_t* buf_pool;
396
buf_pool = buf_pool_from_array(i);
398
buf_stat = &buf_pool->stat;
399
tot_stat->n_page_gets += buf_stat->n_page_gets;
400
tot_stat->n_pages_read += buf_stat->n_pages_read;
401
tot_stat->n_pages_written += buf_stat->n_pages_written;
402
tot_stat->n_pages_created += buf_stat->n_pages_created;
403
tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
404
tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
405
tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
407
tot_stat->n_pages_not_made_young +=
408
buf_stat->n_pages_not_made_young;
412
/********************************************************************//**
413
Allocates a buffer block.
414
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
419
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
420
ulint zip_size) /*!< in: compressed page size in bytes,
421
or 0 if uncompressed tablespace */
425
static ulint buf_pool_index;
427
if (buf_pool == NULL) {
428
/* We are allocating memory from any buffer pool, ensure
429
we spread the grace on all buffer pool instances. */
430
index = buf_pool_index++ % srv_buf_pool_instances;
431
buf_pool = buf_pool_from_array(index);
434
block = buf_LRU_get_free_block(buf_pool, zip_size);
436
buf_block_set_state(block, BUF_BLOCK_MEMORY);
441
/********************************************************************//**
442
Calculates a page checksum which is stored to the page when it is written
443
to a file. Note that we must be careful to calculate the same value on
444
32-bit and 64-bit architectures.
448
buf_calc_page_new_checksum(
449
/*=======================*/
450
const byte* page) /*!< in: buffer page */
454
/* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
455
..._ARCH_LOG_NO, are written outside the buffer pool to the first
456
pages of data files, we have to skip them in the page checksum
458
We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
459
checksum is stored, and also the last 8 bytes of page because
460
there we store the old formula checksum. */
462
checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
463
FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
464
+ ut_fold_binary(page + FIL_PAGE_DATA,
465
UNIV_PAGE_SIZE - FIL_PAGE_DATA
466
- FIL_PAGE_END_LSN_OLD_CHKSUM);
467
checksum = checksum & 0xFFFFFFFFUL;
472
/********************************************************************//**
473
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
474
looked at the first few bytes of the page. This calculates that old
476
NOTE: we must first store the new formula checksum to
477
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
478
because this takes that field as an input!
482
buf_calc_page_old_checksum(
483
/*=======================*/
484
const byte* page) /*!< in: buffer page */
488
checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
490
checksum = checksum & 0xFFFFFFFFUL;
495
/********************************************************************//**
496
Checks if a page is corrupt.
497
@return TRUE if corrupted */
500
buf_page_is_corrupted(
501
/*==================*/
502
const byte* read_buf, /*!< in: a database page */
503
ulint zip_size) /*!< in: size of compressed page;
504
0 for uncompressed pages */
506
ulint checksum_field;
507
ulint old_checksum_field;
509
if (UNIV_LIKELY(!zip_size)
510
&& memcmp(read_buf + FIL_PAGE_LSN + 4,
511
read_buf + UNIV_PAGE_SIZE
512
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
514
/* Stored log sequence numbers at the start and the end
515
of page do not match */
520
#ifndef UNIV_HOTBACKUP
521
if (recv_lsn_checks_on) {
522
ib_uint64_t current_lsn;
524
if (log_peek_lsn(¤t_lsn)
527
< mach_read_from_8(read_buf + FIL_PAGE_LSN))) {
528
ut_print_timestamp(stderr);
531
" InnoDB: Error: page %lu log sequence number"
533
"InnoDB: is in the future! Current system "
534
"log sequence number %"PRIu64".\n"
535
"InnoDB: Your database may be corrupt or "
536
"you may have copied the InnoDB\n"
537
"InnoDB: tablespace but not the InnoDB "
539
"InnoDB: " REFMAN "forcing-recovery.html\n"
540
"InnoDB: for more information.\n",
541
(ulong) mach_read_from_4(read_buf
543
mach_read_from_8(read_buf + FIL_PAGE_LSN),
549
/* If we use checksums validation, make additional check before
550
returning TRUE to ensure that the checksum is not equal to
551
BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
552
disabled. Otherwise, skip checksum calculation and return FALSE */
554
if (UNIV_LIKELY(srv_use_checksums)) {
555
checksum_field = mach_read_from_4(read_buf
556
+ FIL_PAGE_SPACE_OR_CHKSUM);
558
if (UNIV_UNLIKELY(zip_size)) {
559
return(checksum_field != BUF_NO_CHECKSUM_MAGIC
561
!= page_zip_calc_checksum(read_buf, zip_size));
564
old_checksum_field = mach_read_from_4(
565
read_buf + UNIV_PAGE_SIZE
566
- FIL_PAGE_END_LSN_OLD_CHKSUM);
568
/* There are 2 valid formulas for old_checksum_field:
570
1. Very old versions of InnoDB only stored 8 byte lsn to the
571
start and the end of the page.
573
2. Newer InnoDB versions store the old formula checksum
576
if (old_checksum_field != mach_read_from_4(read_buf
578
&& old_checksum_field != BUF_NO_CHECKSUM_MAGIC
579
&& old_checksum_field
580
!= buf_calc_page_old_checksum(read_buf)) {
585
/* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
586
(always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
588
if (checksum_field != 0
589
&& checksum_field != BUF_NO_CHECKSUM_MAGIC
591
!= buf_calc_page_new_checksum(read_buf)) {
600
/********************************************************************//**
601
Prints a page to stderr. */
606
const byte* read_buf, /*!< in: a database page */
607
ulint zip_size) /*!< in: compressed page size, or
608
0 for uncompressed pages */
610
#ifndef UNIV_HOTBACKUP
612
#endif /* !UNIV_HOTBACKUP */
615
ulint size = zip_size;
618
size = UNIV_PAGE_SIZE;
621
ut_print_timestamp(stderr);
622
fprintf(stderr, " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
624
ut_print_buf(stderr, read_buf, size);
625
fputs("\nInnoDB: End of page dump\n", stderr);
628
/* Print compressed page. */
630
switch (fil_page_get_type(read_buf)) {
631
case FIL_PAGE_TYPE_ZBLOB:
632
case FIL_PAGE_TYPE_ZBLOB2:
633
checksum = srv_use_checksums
634
? page_zip_calc_checksum(read_buf, zip_size)
635
: BUF_NO_CHECKSUM_MAGIC;
636
ut_print_timestamp(stderr);
638
" InnoDB: Compressed BLOB page"
639
" checksum %lu, stored %lu\n"
640
"InnoDB: Page lsn %lu %lu\n"
641
"InnoDB: Page number (if stored"
642
" to page already) %lu,\n"
643
"InnoDB: space id (if stored"
644
" to page already) %lu\n",
646
(ulong) mach_read_from_4(
647
read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
648
(ulong) mach_read_from_4(
649
read_buf + FIL_PAGE_LSN),
650
(ulong) mach_read_from_4(
651
read_buf + (FIL_PAGE_LSN + 4)),
652
(ulong) mach_read_from_4(
653
read_buf + FIL_PAGE_OFFSET),
654
(ulong) mach_read_from_4(
656
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
659
ut_print_timestamp(stderr);
661
" InnoDB: unknown page type %lu,"
662
" assuming FIL_PAGE_INDEX\n",
663
fil_page_get_type(read_buf));
666
checksum = srv_use_checksums
667
? page_zip_calc_checksum(read_buf, zip_size)
668
: BUF_NO_CHECKSUM_MAGIC;
670
ut_print_timestamp(stderr);
672
" InnoDB: Compressed page checksum %lu,"
674
"InnoDB: Page lsn %lu %lu\n"
675
"InnoDB: Page number (if stored"
676
" to page already) %lu,\n"
677
"InnoDB: space id (if stored"
678
" to page already) %lu\n",
680
(ulong) mach_read_from_4(
681
read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
682
(ulong) mach_read_from_4(
683
read_buf + FIL_PAGE_LSN),
684
(ulong) mach_read_from_4(
685
read_buf + (FIL_PAGE_LSN + 4)),
686
(ulong) mach_read_from_4(
687
read_buf + FIL_PAGE_OFFSET),
688
(ulong) mach_read_from_4(
690
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
692
case FIL_PAGE_TYPE_XDES:
693
/* This is an uncompressed page. */
698
checksum = srv_use_checksums
699
? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
700
old_checksum = srv_use_checksums
701
? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
703
ut_print_timestamp(stderr);
705
" InnoDB: Page checksum %lu, prior-to-4.0.14-form"
707
"InnoDB: stored checksum %lu, prior-to-4.0.14-form"
708
" stored checksum %lu\n"
709
"InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
711
"InnoDB: Page number (if stored to page already) %lu,\n"
712
"InnoDB: space id (if created with >= MySQL-4.1.1"
713
" and stored already) %lu\n",
714
(ulong) checksum, (ulong) old_checksum,
715
(ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
716
(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
717
- FIL_PAGE_END_LSN_OLD_CHKSUM),
718
(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
719
(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
720
(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
721
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
722
(ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
723
(ulong) mach_read_from_4(read_buf
724
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
726
#ifndef UNIV_HOTBACKUP
727
if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
728
== TRX_UNDO_INSERT) {
730
"InnoDB: Page may be an insert undo log page\n");
731
} else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
732
+ TRX_UNDO_PAGE_TYPE)
733
== TRX_UNDO_UPDATE) {
735
"InnoDB: Page may be an update undo log page\n");
737
#endif /* !UNIV_HOTBACKUP */
739
switch (fil_page_get_type(read_buf)) {
742
index_id = btr_page_get_index_id(read_buf);
744
"InnoDB: Page may be an index page where"
745
" index id is %llu\n",
747
#ifndef UNIV_HOTBACKUP
748
index = dict_index_find_on_id_low(index_id);
750
fputs("InnoDB: (", stderr);
751
dict_index_name_print(stderr, NULL, index);
752
fputs(")\n", stderr);
754
#endif /* !UNIV_HOTBACKUP */
757
fputs("InnoDB: Page may be an 'inode' page\n", stderr);
759
case FIL_PAGE_IBUF_FREE_LIST:
760
fputs("InnoDB: Page may be an insert buffer free list page\n",
763
case FIL_PAGE_TYPE_ALLOCATED:
764
fputs("InnoDB: Page may be a freshly allocated page\n",
767
case FIL_PAGE_IBUF_BITMAP:
768
fputs("InnoDB: Page may be an insert buffer bitmap page\n",
771
case FIL_PAGE_TYPE_SYS:
772
fputs("InnoDB: Page may be a system page\n",
775
case FIL_PAGE_TYPE_TRX_SYS:
776
fputs("InnoDB: Page may be a transaction system page\n",
779
case FIL_PAGE_TYPE_FSP_HDR:
780
fputs("InnoDB: Page may be a file space header page\n",
783
case FIL_PAGE_TYPE_XDES:
784
fputs("InnoDB: Page may be an extent descriptor page\n",
787
case FIL_PAGE_TYPE_BLOB:
788
fputs("InnoDB: Page may be a BLOB page\n",
791
case FIL_PAGE_TYPE_ZBLOB:
792
case FIL_PAGE_TYPE_ZBLOB2:
793
fputs("InnoDB: Page may be a compressed BLOB page\n",
799
#ifndef UNIV_HOTBACKUP
801
# ifdef PFS_GROUP_BUFFER_SYNC
802
/********************************************************************//**
803
This function registers mutexes and rwlocks in buffer blocks with
804
performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
805
defined to be a value less than chunk->size, then only mutexes
806
and rwlocks in the first PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER
807
blocks are registered. */
810
pfs_register_buffer_block(
811
/*======================*/
812
buf_chunk_t* chunk) /*!< in/out: chunk of buffers */
815
ulint num_to_register;
818
block = chunk->blocks;
820
num_to_register = ut_min(chunk->size,
821
PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
823
for (i = 0; i < num_to_register; i++) {
827
# ifdef UNIV_PFS_MUTEX
828
mutex = &block->mutex;
829
ut_a(!mutex->pfs_psi);
830
mutex->pfs_psi = (PSI_server)
831
? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
833
# endif /* UNIV_PFS_MUTEX */
835
# ifdef UNIV_PFS_RWLOCK
836
rwlock = &block->lock;
837
ut_a(!rwlock->pfs_psi);
838
rwlock->pfs_psi = (PSI_server)
839
? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
841
# endif /* UNIV_PFS_RWLOCK */
845
# endif /* PFS_GROUP_BUFFER_SYNC */
847
/********************************************************************//**
848
Initializes a buffer control block when the buf_pool is created. */
853
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
854
buf_block_t* block, /*!< in: pointer to control block */
855
byte* frame) /*!< in: pointer to buffer frame */
857
UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
859
block->frame = frame;
861
block->page.buf_pool_index = buf_pool_index(buf_pool);
862
block->page.state = BUF_BLOCK_NOT_USED;
863
block->page.buf_fix_count = 0;
864
block->page.io_fix = BUF_IO_NONE;
866
block->modify_clock = 0;
868
#ifdef UNIV_DEBUG_FILE_ACCESSES
869
block->page.file_page_was_freed = FALSE;
870
#endif /* UNIV_DEBUG_FILE_ACCESSES */
872
block->check_index_page_at_flush = FALSE;
875
block->is_hashed = FALSE;
878
block->page.in_page_hash = FALSE;
879
block->page.in_zip_hash = FALSE;
880
block->page.in_flush_list = FALSE;
881
block->page.in_free_list = FALSE;
882
block->page.in_LRU_list = FALSE;
883
block->in_unzip_LRU_list = FALSE;
884
#endif /* UNIV_DEBUG */
885
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
886
block->n_pointers = 0;
887
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
888
page_zip_des_init(&block->page.zip);
890
#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
891
/* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
892
of buffer block mutex/rwlock with performance schema. If
893
PFS_GROUP_BUFFER_SYNC is defined, skip the registration
894
since buffer block mutex/rwlock will be registered later in
895
pfs_register_buffer_block() */
897
mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
898
rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
899
#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
900
mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
901
rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
902
#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
904
ut_ad(rw_lock_validate(&(block->lock)));
906
#ifdef UNIV_SYNC_DEBUG
907
rw_lock_create(buf_block_debug_latch_key,
908
&block->debug_latch, SYNC_NO_ORDER_CHECK);
909
#endif /* UNIV_SYNC_DEBUG */
912
/********************************************************************//**
913
Allocates a chunk of buffer frames.
914
@return chunk, or NULL on failure */
919
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
920
buf_chunk_t* chunk, /*!< out: chunk of buffers */
921
ulint mem_size) /*!< in: requested size in bytes */
927
/* Round down to a multiple of page size,
928
although it already should be. */
929
mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
930
/* Reserve space for the block descriptors. */
931
mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
932
+ (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
934
chunk->mem_size = mem_size;
935
chunk->mem = os_mem_alloc_large(&chunk->mem_size);
937
if (UNIV_UNLIKELY(chunk->mem == NULL)) {
942
/* Allocate the block descriptors from
943
the start of the memory block. */
944
chunk->blocks = static_cast<buf_block_struct *>(chunk->mem);
946
/* Align a pointer to the first frame. Note that when
947
os_large_page_size is smaller than UNIV_PAGE_SIZE,
948
we may allocate one fewer block than requested. When
949
it is bigger, we may allocate more blocks than requested. */
951
frame = static_cast<unsigned char *>(ut_align(chunk->mem, UNIV_PAGE_SIZE));
952
chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
953
- (frame != chunk->mem);
955
/* Subtract the space needed for block descriptors. */
957
ulint size = chunk->size;
959
while (frame < (byte*) (chunk->blocks + size)) {
960
frame += UNIV_PAGE_SIZE;
967
/* Init block structs and assign frames for them. Then we
968
assign the frames to the first blocks (we already mapped the
971
block = chunk->blocks;
973
for (i = chunk->size; i--; ) {
975
buf_block_init(buf_pool, block, frame);
978
/* Wipe contents of frame to eliminate a Purify warning */
979
memset(block->frame, '\0', UNIV_PAGE_SIZE);
981
/* Add the block to the free list */
982
UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
984
ut_d(block->page.in_free_list = TRUE);
985
ut_ad(buf_pool_from_block(block) == buf_pool);
988
frame += UNIV_PAGE_SIZE;
991
#ifdef PFS_GROUP_BUFFER_SYNC
992
pfs_register_buffer_block(chunk);
998
/*********************************************************************//**
999
Finds a block in the given buffer chunk that points to a
1000
given compressed page.
1001
@return buffer block pointing to the compressed page, or NULL */
1004
buf_chunk_contains_zip(
1005
/*===================*/
1006
buf_chunk_t* chunk, /*!< in: chunk being checked */
1007
const void* data) /*!< in: pointer to compressed page */
1012
block = chunk->blocks;
1014
for (i = chunk->size; i--; block++) {
1015
if (block->page.zip.data == data) {
1024
/*********************************************************************//**
1025
Finds a block in the buffer pool that points to a
1026
given compressed page.
1027
@return buffer block pointing to the compressed page, or NULL */
1030
buf_pool_contains_zip(
1031
/*==================*/
1032
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1033
const void* data) /*!< in: pointer to compressed page */
1036
buf_chunk_t* chunk = buf_pool->chunks;
1039
ut_ad(buf_pool_mutex_own(buf_pool));
1040
for (n = buf_pool->n_chunks; n--; chunk++) {
1042
buf_block_t* block = buf_chunk_contains_zip(chunk, data);
1051
#endif /* UNIV_DEBUG */
1053
/*********************************************************************//**
1054
Checks that all file pages in the buffer chunk are in a replaceable state.
1055
@return address of a non-free block, or NULL if all freed */
1058
buf_chunk_not_freed(
1059
/*================*/
1060
buf_chunk_t* chunk) /*!< in: chunk being checked */
1065
block = chunk->blocks;
1067
for (i = chunk->size; i--; block++) {
1070
switch (buf_block_get_state(block)) {
1071
case BUF_BLOCK_ZIP_FREE:
1072
case BUF_BLOCK_ZIP_PAGE:
1073
case BUF_BLOCK_ZIP_DIRTY:
1074
/* The uncompressed buffer pool should never
1075
contain compressed block descriptors. */
1078
case BUF_BLOCK_NOT_USED:
1079
case BUF_BLOCK_READY_FOR_USE:
1080
case BUF_BLOCK_MEMORY:
1081
case BUF_BLOCK_REMOVE_HASH:
1082
/* Skip blocks that are not being used for
1085
case BUF_BLOCK_FILE_PAGE:
1086
mutex_enter(&block->mutex);
1087
ready = buf_flush_ready_for_replace(&block->page);
1088
mutex_exit(&block->mutex);
1102
/*********************************************************************//**
1103
Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state.
1104
@return TRUE if all freed */
1109
const buf_chunk_t* chunk) /*!< in: chunk being checked */
1111
const buf_block_t* block;
1114
block = chunk->blocks;
1116
for (i = chunk->size; i--; block++) {
1118
if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
1127
/********************************************************************//**
1128
Frees a chunk of buffer frames. */
1133
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1134
buf_chunk_t* chunk) /*!< out: chunk of buffers */
1137
const buf_block_t* block_end;
1139
ut_ad(buf_pool_mutex_own(buf_pool));
1141
block_end = chunk->blocks + chunk->size;
1143
for (block = chunk->blocks; block < block_end; block++) {
1144
ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
1145
ut_a(!block->page.zip.data);
1147
ut_ad(!block->page.in_LRU_list);
1148
ut_ad(!block->in_unzip_LRU_list);
1149
ut_ad(!block->page.in_flush_list);
1150
/* Remove the block from the free list. */
1151
ut_ad(block->page.in_free_list);
1152
UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
1154
/* Free the latches. */
1155
mutex_free(&block->mutex);
1156
rw_lock_free(&block->lock);
1157
#ifdef UNIV_SYNC_DEBUG
1158
rw_lock_free(&block->debug_latch);
1159
#endif /* UNIV_SYNC_DEBUG */
1160
UNIV_MEM_UNDESC(block);
1163
os_mem_free_large(chunk->mem, chunk->mem_size);
1166
/********************************************************************//**
1167
Set buffer pool size variables after resizing it */
1170
buf_pool_set_sizes(void)
1171
/*====================*/
1174
ulint curr_size = 0;
1176
buf_pool_mutex_enter_all();
1178
for (i = 0; i < srv_buf_pool_instances; i++) {
1179
buf_pool_t* buf_pool;
1181
buf_pool = buf_pool_from_array(i);
1182
curr_size += buf_pool->curr_pool_size;
1185
srv_buf_pool_curr_size = curr_size;
1186
srv_buf_pool_old_size = srv_buf_pool_size;
1188
buf_pool_mutex_exit_all();
1191
/********************************************************************//**
1192
Initialize a buffer pool instance.
1193
@return DB_SUCCESS if all goes well. */
1196
buf_pool_init_instance(
1197
/*===================*/
1198
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1199
ulint buf_pool_size, /*!< in: size in bytes */
1200
ulint instance_no) /*!< in: id of the instance */
1205
/* 1. Initialize general fields
1206
------------------------------- */
1207
mutex_create(buf_pool_mutex_key,
1208
&buf_pool->mutex, SYNC_BUF_POOL);
1209
mutex_create(buf_pool_zip_mutex_key,
1210
&buf_pool->zip_mutex, SYNC_BUF_BLOCK);
1212
buf_pool_mutex_enter(buf_pool);
1214
if (buf_pool_size > 0) {
1215
buf_pool->n_chunks = 1;
1216
void *chunk_ptr= mem_zalloc((sizeof *chunk));
1217
buf_pool->chunks = chunk = static_cast<buf_chunk_t *>(chunk_ptr);
1219
UT_LIST_INIT(buf_pool->free);
1221
if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
1225
buf_pool_mutex_exit(buf_pool);
1230
buf_pool->instance_no = instance_no;
1231
buf_pool->old_pool_size = buf_pool_size;
1232
buf_pool->curr_size = chunk->size;
1233
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1235
buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
1236
buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
1238
buf_pool->last_printout_time = ut_time();
1240
/* 2. Initialize flushing fields
1241
-------------------------------- */
1243
mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
1244
SYNC_BUF_FLUSH_LIST);
1246
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
1247
buf_pool->no_flush[i] = os_event_create(NULL);
1250
/* 3. Initialize LRU fields
1251
--------------------------- */
1253
/* All fields are initialized by mem_zalloc(). */
1255
buf_pool_mutex_exit(buf_pool);
1260
/********************************************************************//**
1261
free one buffer pool instance */
1264
buf_pool_free_instance(
1265
/*===================*/
1266
buf_pool_t* buf_pool) /* in,own: buffer pool instance
1270
buf_chunk_t* chunks;
1272
chunks = buf_pool->chunks;
1273
chunk = chunks + buf_pool->n_chunks;
1275
while (--chunk >= chunks) {
1276
/* Bypass the checks of buf_chunk_free(), since they
1277
would fail at shutdown. */
1278
os_mem_free_large(chunk->mem, chunk->mem_size);
1281
mem_free(buf_pool->chunks);
1282
hash_table_free(buf_pool->page_hash);
1283
hash_table_free(buf_pool->zip_hash);
1286
/********************************************************************//**
1287
Creates the buffer pool.
1288
@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
1293
ulint total_size, /*!< in: size of the total pool in bytes */
1294
ulint n_instances) /*!< in: number of instances */
1297
const ulint size = total_size / n_instances;
1299
ut_ad(n_instances > 0);
1300
ut_ad(n_instances <= MAX_BUFFER_POOLS);
1301
ut_ad(n_instances == srv_buf_pool_instances);
1303
/* We create an extra buffer pool instance, this instance is used
1304
for flushing the flush lists, to keep track of n_flush for all
1305
the buffer pools and also used as a waiting object during flushing. */
1306
void *buf_pool_void_ptr= mem_zalloc(n_instances * sizeof *buf_pool_ptr);
1307
buf_pool_ptr = static_cast<buf_pool_struct *>(buf_pool_void_ptr);
1309
for (i = 0; i < n_instances; i++) {
1310
buf_pool_t* ptr = &buf_pool_ptr[i];
1312
if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) {
1314
/* Free all the instances created so far. */
1321
buf_pool_set_sizes();
1322
buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
1324
btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
1329
/********************************************************************//**
1330
Frees the buffer pool at shutdown. This must not be invoked before
1331
freeing all mutexes. */
1336
ulint n_instances) /*!< in: numbere of instances to free */
1340
for (i = 0; i < n_instances; i++) {
1341
buf_pool_free_instance(buf_pool_from_array(i));
1344
mem_free(buf_pool_ptr);
1345
buf_pool_ptr = NULL;
1348
/********************************************************************//**
1349
Drops adaptive hash index for a buffer pool instance. */
1352
buf_pool_drop_hash_index_instance(
1353
/*==============================*/
1354
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1355
ibool* released_search_latch) /*!< out: flag for signalling
1356
whether the search latch was
1359
buf_chunk_t* chunks = buf_pool->chunks;
1360
buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
1362
while (--chunk >= chunks) {
1364
buf_block_t* block = chunk->blocks;
1366
for (i = chunk->size; i--; block++) {
1367
/* block->is_hashed cannot be modified
1368
when we have an x-latch on btr_search_latch;
1369
see the comment in buf0buf.h */
1371
if (!block->is_hashed) {
1375
/* To follow the latching order, we
1376
have to release btr_search_latch
1377
before acquiring block->latch. */
1378
rw_lock_x_unlock(&btr_search_latch);
1379
/* When we release the search latch,
1380
we must rescan all blocks, because
1381
some may become hashed again. */
1382
*released_search_latch = TRUE;
1384
rw_lock_x_lock(&block->lock);
1386
/* This should be guaranteed by the
1387
callers, which will be holding
1388
btr_search_enabled_mutex. */
1389
ut_ad(!btr_search_enabled);
1391
/* Because we did not buffer-fix the
1392
block by calling buf_block_get_gen(),
1393
it is possible that the block has been
1394
allocated for some other use after
1395
btr_search_latch was released above.
1396
We do not care which file page the
1397
block is mapped to. All we want to do
1398
is to drop any hash entries referring
1401
/* It is possible that
1402
block->page.state != BUF_FILE_PAGE.
1403
Even that does not matter, because
1404
btr_search_drop_page_hash_index() will
1405
check block->is_hashed before doing
1406
anything. block->is_hashed can only
1407
be set on uncompressed file pages. */
1409
btr_search_drop_page_hash_index(block);
1411
rw_lock_x_unlock(&block->lock);
1413
rw_lock_x_lock(&btr_search_latch);
1415
ut_ad(!btr_search_enabled);
1420
/********************************************************************//**
1421
Drops the adaptive hash index. To prevent a livelock, this function
1422
is only to be called while holding btr_search_latch and while
1423
btr_search_enabled == FALSE. */
1426
buf_pool_drop_hash_index(void)
1427
/*==========================*/
1429
ibool released_search_latch;
1431
#ifdef UNIV_SYNC_DEBUG
1432
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
1433
#endif /* UNIV_SYNC_DEBUG */
1434
ut_ad(!btr_search_enabled);
1439
released_search_latch = FALSE;
1441
for (i = 0; i < srv_buf_pool_instances; i++) {
1442
buf_pool_t* buf_pool;
1444
buf_pool = buf_pool_from_array(i);
1446
buf_pool_drop_hash_index_instance(
1447
buf_pool, &released_search_latch);
1450
} while (released_search_latch);
1453
/********************************************************************//**
1454
Relocate a buffer control block. Relocates the block on the LRU list
1455
and in buf_pool->page_hash. Does not relocate bpage->list.
1456
The caller must take care of relocating bpage->list. */
1461
buf_page_t* bpage, /*!< in/out: control block being relocated;
1462
buf_page_get_state(bpage) must be
1463
BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
1464
buf_page_t* dpage) /*!< in/out: destination control block */
1468
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1470
ut_ad(buf_pool_mutex_own(buf_pool));
1471
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1472
ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
1473
ut_a(bpage->buf_fix_count == 0);
1474
ut_ad(bpage->in_LRU_list);
1475
ut_ad(!bpage->in_zip_hash);
1476
ut_ad(bpage->in_page_hash);
1477
ut_ad(bpage == buf_page_hash_get(buf_pool,
1478
bpage->space, bpage->offset));
1479
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1481
switch (buf_page_get_state(bpage)) {
1482
case BUF_BLOCK_ZIP_FREE:
1483
case BUF_BLOCK_NOT_USED:
1484
case BUF_BLOCK_READY_FOR_USE:
1485
case BUF_BLOCK_FILE_PAGE:
1486
case BUF_BLOCK_MEMORY:
1487
case BUF_BLOCK_REMOVE_HASH:
1489
case BUF_BLOCK_ZIP_DIRTY:
1490
case BUF_BLOCK_ZIP_PAGE:
1493
#endif /* UNIV_DEBUG */
1495
memcpy(dpage, bpage, sizeof *dpage);
1497
ut_d(bpage->in_LRU_list = FALSE);
1498
ut_d(bpage->in_page_hash = FALSE);
1500
/* relocate buf_pool->LRU */
1501
b = UT_LIST_GET_PREV(LRU, bpage);
1502
UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
1505
UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
1507
UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
1510
if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
1511
buf_pool->LRU_old = dpage;
1512
#ifdef UNIV_LRU_DEBUG
1513
/* buf_pool->LRU_old must be the first item in the LRU list
1514
whose "old" flag is set. */
1515
ut_a(buf_pool->LRU_old->old);
1516
ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
1517
|| !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
1518
ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
1519
|| UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
1521
/* Check that the "old" flag is consistent in
1522
the block and its neighbours. */
1523
buf_page_set_old(dpage, buf_page_is_old(dpage));
1524
#endif /* UNIV_LRU_DEBUG */
1527
ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
1528
ut_ad(ut_list_node_313->in_LRU_list)));
1530
/* relocate buf_pool->page_hash */
1531
fold = buf_page_address_fold(bpage->space, bpage->offset);
1533
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
1534
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
1537
/********************************************************************//**
1538
Shrinks a buffer pool instance. */
1541
buf_pool_shrink_instance(
1542
/*=====================*/
1543
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1544
ulint chunk_size) /*!< in: number of pages to remove */
1546
buf_chunk_t* chunks;
1549
ulint max_free_size;
1550
buf_chunk_t* max_chunk;
1551
buf_chunk_t* max_free_chunk;
1553
ut_ad(!buf_pool_mutex_own(buf_pool));
1556
btr_search_disable(); /* Empty the adaptive hash index again */
1557
buf_pool_mutex_enter(buf_pool);
1560
if (buf_pool->n_chunks <= 1) {
1562
/* Cannot shrink if there is only one chunk */
1566
/* Search for the largest free chunk
1567
not larger than the size difference */
1568
chunks = buf_pool->chunks;
1569
chunk = chunks + buf_pool->n_chunks;
1570
max_size = max_free_size = 0;
1571
max_chunk = max_free_chunk = NULL;
1573
while (--chunk >= chunks) {
1574
if (chunk->size <= chunk_size
1575
&& chunk->size > max_free_size) {
1576
if (chunk->size > max_size) {
1577
max_size = chunk->size;
1581
if (buf_chunk_all_free(chunk)) {
1582
max_free_size = chunk->size;
1583
max_free_chunk = chunk;
1588
if (!max_free_size) {
1595
/* Cannot shrink: try again later
1596
(do not assign srv_buf_pool_old_size) */
1602
block = max_chunk->blocks;
1603
bend = block + max_chunk->size;
1605
/* Move the blocks of chunk to the end of the
1606
LRU list and try to flush them. */
1607
for (; block < bend; block++) {
1608
switch (buf_block_get_state(block)) {
1609
case BUF_BLOCK_NOT_USED:
1611
case BUF_BLOCK_FILE_PAGE:
1618
mutex_enter(&block->mutex);
1619
/* The following calls will temporarily
1620
release block->mutex and buf_pool->mutex.
1621
Therefore, we have to always retry,
1622
even if !dirty && !nonfree. */
1624
if (!buf_flush_ready_for_replace(&block->page)) {
1626
buf_LRU_make_block_old(&block->page);
1628
} else if (buf_LRU_free_block(&block->page, TRUE, NULL)
1633
mutex_exit(&block->mutex);
1636
buf_pool_mutex_exit(buf_pool);
1638
/* Request for a flush of the chunk if it helps.
1639
Do not flush if there are non-free blocks, since
1640
flushing will not make the chunk freeable. */
1642
/* Avoid busy-waiting. */
1643
os_thread_sleep(100000);
1645
&& buf_flush_LRU(buf_pool, dirty)
1646
== ULINT_UNDEFINED) {
1648
buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
1654
max_size = max_free_size;
1655
max_chunk = max_free_chunk;
1657
buf_pool->old_pool_size = buf_pool->curr_pool_size;
1659
/* Rewrite buf_pool->chunks. Copy everything but max_chunk. */
1660
chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks));
1661
memcpy(chunks, buf_pool->chunks,
1662
(max_chunk - buf_pool->chunks) * sizeof *chunks);
1663
memcpy(chunks + (max_chunk - buf_pool->chunks),
1665
buf_pool->chunks + buf_pool->n_chunks
1667
ut_a(buf_pool->curr_size > max_chunk->size);
1668
buf_pool->curr_size -= max_chunk->size;
1669
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1670
chunk_size -= max_chunk->size;
1671
buf_chunk_free(buf_pool, max_chunk);
1672
mem_free(buf_pool->chunks);
1673
buf_pool->chunks = chunks;
1674
buf_pool->n_chunks--;
1676
/* Allow a slack of one megabyte. */
1677
if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
1684
buf_pool->old_pool_size = buf_pool->curr_pool_size;
1686
buf_pool_mutex_exit(buf_pool);
1687
btr_search_enable();
1690
/********************************************************************//**
1691
Shrinks the buffer pool. */
1696
ulint chunk_size) /*!< in: number of pages to remove */
1700
for (i = 0; i < srv_buf_pool_instances; i++) {
1701
buf_pool_t* buf_pool;
1702
ulint instance_chunk_size;
1704
instance_chunk_size = chunk_size / srv_buf_pool_instances;
1705
buf_pool = buf_pool_from_array(i);
1706
buf_pool_shrink_instance(buf_pool, instance_chunk_size);
1709
buf_pool_set_sizes();
1712
/********************************************************************//**
1713
Rebuild buf_pool->page_hash for a buffer pool instance. */
1716
buf_pool_page_hash_rebuild_instance(
1717
/*================================*/
1718
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
1724
hash_table_t* zip_hash;
1725
hash_table_t* page_hash;
1727
buf_pool_mutex_enter(buf_pool);
1729
/* Free, create, and populate the hash table. */
1730
hash_table_free(buf_pool->page_hash);
1731
buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
1732
zip_hash = hash_create(2 * buf_pool->curr_size);
1734
HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
1735
BUF_POOL_ZIP_FOLD_BPAGE);
1737
hash_table_free(buf_pool->zip_hash);
1738
buf_pool->zip_hash = zip_hash;
1740
/* Insert the uncompressed file pages to buf_pool->page_hash. */
1742
chunk = buf_pool->chunks;
1743
n_chunks = buf_pool->n_chunks;
1745
for (i = 0; i < n_chunks; i++, chunk++) {
1747
buf_block_t* block = chunk->blocks;
1749
for (j = 0; j < chunk->size; j++, block++) {
1750
if (buf_block_get_state(block)
1751
== BUF_BLOCK_FILE_PAGE) {
1752
ut_ad(!block->page.in_zip_hash);
1753
ut_ad(block->page.in_page_hash);
1755
HASH_INSERT(buf_page_t, hash, page_hash,
1756
buf_page_address_fold(
1758
block->page.offset),
1764
/* Insert the compressed-only pages to buf_pool->page_hash.
1765
All such blocks are either in buf_pool->zip_clean or
1766
in buf_pool->flush_list. */
1768
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1769
b = UT_LIST_GET_NEXT(list, b)) {
1770
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1771
ut_ad(!b->in_flush_list);
1772
ut_ad(b->in_LRU_list);
1773
ut_ad(b->in_page_hash);
1774
ut_ad(!b->in_zip_hash);
1776
HASH_INSERT(buf_page_t, hash, page_hash,
1777
buf_page_address_fold(b->space, b->offset), b);
1780
buf_flush_list_mutex_enter(buf_pool);
1781
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1782
b = UT_LIST_GET_NEXT(list, b)) {
1783
ut_ad(b->in_flush_list);
1784
ut_ad(b->in_LRU_list);
1785
ut_ad(b->in_page_hash);
1786
ut_ad(!b->in_zip_hash);
1788
switch (buf_page_get_state(b)) {
1789
case BUF_BLOCK_ZIP_DIRTY:
1790
HASH_INSERT(buf_page_t, hash, page_hash,
1791
buf_page_address_fold(b->space,
1794
case BUF_BLOCK_FILE_PAGE:
1795
/* uncompressed page */
1797
case BUF_BLOCK_ZIP_FREE:
1798
case BUF_BLOCK_ZIP_PAGE:
1799
case BUF_BLOCK_NOT_USED:
1800
case BUF_BLOCK_READY_FOR_USE:
1801
case BUF_BLOCK_MEMORY:
1802
case BUF_BLOCK_REMOVE_HASH:
1808
buf_flush_list_mutex_exit(buf_pool);
1809
buf_pool_mutex_exit(buf_pool);
1812
/********************************************************************
1813
Determine if a block is a sentinel for a buffer pool watch.
1814
@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
1817
buf_pool_watch_is_sentinel(
1818
/*=======================*/
1819
buf_pool_t* buf_pool, /*!< buffer pool instance */
1820
const buf_page_t* bpage) /*!< in: block */
1822
ut_ad(buf_page_in_file(bpage));
1824
if (bpage < &buf_pool->watch[0]
1825
|| bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
1827
ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_PAGE
1828
|| bpage->zip.data != NULL);
1833
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
1834
ut_ad(!bpage->in_zip_hash);
1835
ut_ad(bpage->in_page_hash);
1836
ut_ad(bpage->zip.data == NULL);
1837
ut_ad(bpage->buf_fix_count > 0);
1841
/****************************************************************//**
1842
Add watch for the given page to be read in. Caller must have the buffer pool
1844
@return NULL if watch set, block if the page is in the buffer pool */
1849
ulint space, /*!< in: space id */
1850
ulint offset, /*!< in: page number */
1851
ulint fold) /*!< in: buf_page_address_fold(space, offset) */
1855
buf_pool_t* buf_pool = buf_pool_get(space, offset);
1857
ut_ad(buf_pool_mutex_own(buf_pool));
1859
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
1861
if (UNIV_LIKELY_NULL(bpage)) {
1862
if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
1863
/* The page was loaded meanwhile. */
1866
/* Add to an existing watch. */
1867
bpage->buf_fix_count++;
1871
for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
1872
bpage = &buf_pool->watch[i];
1874
ut_ad(bpage->access_time == 0);
1875
ut_ad(bpage->newest_modification == 0);
1876
ut_ad(bpage->oldest_modification == 0);
1877
ut_ad(bpage->zip.data == NULL);
1878
ut_ad(!bpage->in_zip_hash);
1880
switch (bpage->state) {
1881
case BUF_BLOCK_POOL_WATCH:
1882
ut_ad(!bpage->in_page_hash);
1883
ut_ad(bpage->buf_fix_count == 0);
1885
/* bpage is pointing to buf_pool->watch[],
1886
which is protected by buf_pool->mutex.
1887
Normally, buf_page_t objects are protected by
1888
buf_block_t::mutex or buf_pool->zip_mutex or both. */
1890
bpage->state = BUF_BLOCK_ZIP_PAGE;
1891
bpage->space = space;
1892
bpage->offset = offset;
1893
bpage->buf_fix_count = 1;
1895
ut_d(bpage->in_page_hash = TRUE);
1896
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
1899
case BUF_BLOCK_ZIP_PAGE:
1900
ut_ad(bpage->in_page_hash);
1901
ut_ad(bpage->buf_fix_count > 0);
1908
/* Allocation failed. Either the maximum number of purge
1909
threads should never exceed BUF_POOL_WATCH_SIZE, or this code
1910
should be modified to return a special non-NULL value and the
1911
caller should purge the record directly. */
1914
/* Fix compiler warning */
1918
/********************************************************************//**
1919
Rebuild buf_pool->page_hash. */
1922
buf_pool_page_hash_rebuild(void)
1923
/*============================*/
1927
for (i = 0; i < srv_buf_pool_instances; i++) {
1928
buf_pool_page_hash_rebuild_instance(buf_pool_from_array(i));
1932
/********************************************************************//**
1933
Increase the buffer pool size of one buffer pool instance. */
1936
buf_pool_increase_instance(
1937
/*=======================*/
1938
buf_pool_t* buf_pool, /*!< in: buffer pool instane */
1939
ulint change_size) /*!< in: new size of the pool */
1941
buf_chunk_t* chunks;
1944
buf_pool_mutex_enter(buf_pool);
1945
chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks));
1947
memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks * sizeof *chunks);
1949
chunk = &chunks[buf_pool->n_chunks];
1951
if (!buf_chunk_init(buf_pool, chunk, change_size)) {
1954
buf_pool->old_pool_size = buf_pool->curr_pool_size;
1955
buf_pool->curr_size += chunk->size;
1956
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1957
mem_free(buf_pool->chunks);
1958
buf_pool->chunks = chunks;
1959
buf_pool->n_chunks++;
1962
buf_pool_mutex_exit(buf_pool);
1965
/********************************************************************//**
1966
Increase the buffer pool size. */
1975
for (i = 0; i < srv_buf_pool_instances; i++) {
1976
buf_pool_increase_instance(
1977
buf_pool_from_array(i),
1978
change_size / srv_buf_pool_instances);
1981
buf_pool_set_sizes();
1984
/********************************************************************//**
1985
Resizes the buffer pool. */
1988
buf_pool_resize(void)
1989
/*=================*/
1992
ulint min_change_size = 1048576 * srv_buf_pool_instances;
1994
buf_pool_mutex_enter_all();
1996
if (srv_buf_pool_old_size == srv_buf_pool_size) {
1998
buf_pool_mutex_exit_all();
2002
} else if (srv_buf_pool_curr_size + min_change_size
2003
> srv_buf_pool_size) {
2005
change_size = (srv_buf_pool_curr_size - srv_buf_pool_size)
2008
buf_pool_mutex_exit_all();
2010
/* Disable adaptive hash indexes and empty the index
2011
in order to free up memory in the buffer pool chunks. */
2012
buf_pool_shrink(change_size);
2014
} else if (srv_buf_pool_curr_size + min_change_size
2015
< srv_buf_pool_size) {
2017
/* Enlarge the buffer pool by at least one megabyte */
2019
change_size = srv_buf_pool_size - srv_buf_pool_curr_size;
2021
buf_pool_mutex_exit_all();
2023
buf_pool_increase(change_size);
2025
srv_buf_pool_size = srv_buf_pool_old_size;
2027
buf_pool_mutex_exit_all();
2032
buf_pool_page_hash_rebuild();
2035
/****************************************************************//**
2036
Remove the sentinel block for the watch before replacing it with a real block.
2037
buf_page_watch_clear() or buf_page_watch_occurred() will notice that
2038
the block has been replaced with the real block.
2039
@return reference count, to be added to the replacement block */
2042
buf_pool_watch_remove(
2043
/*==================*/
2044
buf_pool_t* buf_pool, /*!< buffer pool instance */
2045
ulint fold, /*!< in: buf_page_address_fold(
2047
buf_page_t* watch) /*!< in/out: sentinel for watch */
2049
ut_ad(buf_pool_mutex_own(buf_pool));
2051
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
2052
ut_d(watch->in_page_hash = FALSE);
2053
watch->buf_fix_count = 0;
2054
watch->state = BUF_BLOCK_POOL_WATCH;
2057
/****************************************************************//**
2058
Stop watching if the page has been read in.
2059
buf_pool_watch_set(space,offset) must have returned NULL before. */
2062
buf_pool_watch_unset(
2063
/*=================*/
2064
ulint space, /*!< in: space id */
2065
ulint offset) /*!< in: page number */
2068
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2069
ulint fold = buf_page_address_fold(space, offset);
2071
buf_pool_mutex_enter(buf_pool);
2072
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2073
/* The page must exist because buf_pool_watch_set()
2074
increments buf_fix_count. */
2077
if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
2078
mutex_t* mutex = buf_page_get_mutex(bpage);
2081
ut_a(bpage->buf_fix_count > 0);
2082
bpage->buf_fix_count--;
2085
ut_a(bpage->buf_fix_count > 0);
2087
if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
2088
buf_pool_watch_remove(buf_pool, fold, bpage);
2092
buf_pool_mutex_exit(buf_pool);
2095
/****************************************************************//**
2096
Check if the page has been read in.
2097
This may only be called after buf_pool_watch_set(space,offset)
2098
has returned NULL and before invoking buf_pool_watch_unset(space,offset).
2099
@return FALSE if the given page was not read in, TRUE if it was */
2102
buf_pool_watch_occurred(
2103
/*====================*/
2104
ulint space, /*!< in: space id */
2105
ulint offset) /*!< in: page number */
2109
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2110
ulint fold = buf_page_address_fold(space, offset);
2112
buf_pool_mutex_enter(buf_pool);
2114
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2115
/* The page must exist because buf_pool_watch_set()
2116
increments buf_fix_count. */
2118
ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
2119
buf_pool_mutex_exit(buf_pool);
2124
/********************************************************************//**
2125
Moves a page to the start of the buffer pool LRU list. This high-level
2126
function can be used to prevent an important page from slipping out of
2130
buf_page_make_young(
2131
/*================*/
2132
buf_page_t* bpage) /*!< in: buffer block of a file page */
2134
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2136
buf_pool_mutex_enter(buf_pool);
2138
ut_a(buf_page_in_file(bpage));
2140
buf_LRU_make_block_young(bpage);
2142
buf_pool_mutex_exit(buf_pool);
2145
/********************************************************************//**
2146
Sets the time of the first access of a page and moves a page to the
2147
start of the buffer pool LRU list if it is too old. This high-level
2148
function can be used to prevent an important page from slipping
2149
out of the buffer pool. */
2152
buf_page_set_accessed_make_young(
2153
/*=============================*/
2154
buf_page_t* bpage, /*!< in/out: buffer block of a
2156
unsigned access_time) /*!< in: bpage->access_time
2157
read under mutex protection,
2160
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2162
ut_ad(!buf_pool_mutex_own(buf_pool));
2163
ut_a(buf_page_in_file(bpage));
2165
if (buf_page_peek_if_too_old(bpage)) {
2166
buf_pool_mutex_enter(buf_pool);
2167
buf_LRU_make_block_young(bpage);
2168
buf_pool_mutex_exit(buf_pool);
2169
} else if (!access_time) {
2170
ulint time_ms = ut_time_ms();
2171
buf_pool_mutex_enter(buf_pool);
2172
buf_page_set_accessed(bpage, time_ms);
2173
buf_pool_mutex_exit(buf_pool);
2177
/********************************************************************//**
2178
Resets the check_index_page_at_flush field of a page if found in the buffer
2182
buf_reset_check_index_page_at_flush(
2183
/*================================*/
2184
ulint space, /*!< in: space id */
2185
ulint offset) /*!< in: page number */
2188
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2190
buf_pool_mutex_enter(buf_pool);
2192
block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2194
if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
2195
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2196
block->check_index_page_at_flush = FALSE;
2199
buf_pool_mutex_exit(buf_pool);
2202
/********************************************************************//**
2203
Returns the current state of is_hashed of a page. FALSE if the page is
2204
not in the pool. NOTE that this operation does not fix the page in the
2205
pool if it is found there.
2206
@return TRUE if page hash index is built in search system */
2209
buf_page_peek_if_search_hashed(
2210
/*===========================*/
2211
ulint space, /*!< in: space id */
2212
ulint offset) /*!< in: page number */
2216
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2218
buf_pool_mutex_enter(buf_pool);
2220
block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2222
if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2225
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2226
is_hashed = block->is_hashed;
2229
buf_pool_mutex_exit(buf_pool);
2234
#ifdef UNIV_DEBUG_FILE_ACCESSES
2235
/********************************************************************//**
2236
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
2237
This function should be called when we free a file page and want the
2238
debug version to check that it is not accessed any more unless
2240
@return control block if found in page hash table, otherwise NULL */
2243
buf_page_set_file_page_was_freed(
2244
/*=============================*/
2245
ulint space, /*!< in: space id */
2246
ulint offset) /*!< in: page number */
2249
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2251
buf_pool_mutex_enter(buf_pool);
2253
bpage = buf_page_hash_get(buf_pool, space, offset);
2256
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2257
bpage->file_page_was_freed = TRUE;
2260
buf_pool_mutex_exit(buf_pool);
2265
/********************************************************************//**
2266
Sets file_page_was_freed FALSE if the page is found in the buffer pool.
2267
This function should be called when we free a file page and want the
2268
debug version to check that it is not accessed any more unless
2270
@return control block if found in page hash table, otherwise NULL */
2273
buf_page_reset_file_page_was_freed(
2274
/*===============================*/
2275
ulint space, /*!< in: space id */
2276
ulint offset) /*!< in: page number */
2279
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2281
buf_pool_mutex_enter(buf_pool);
2283
bpage = buf_page_hash_get(buf_pool, space, offset);
2286
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2287
bpage->file_page_was_freed = FALSE;
2290
buf_pool_mutex_exit(buf_pool);
2294
#endif /* UNIV_DEBUG_FILE_ACCESSES */
2296
/********************************************************************//**
2297
Get read access to a compressed page (usually of type
2298
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
2299
The page must be released with buf_page_release_zip().
2300
NOTE: the page is not protected by any latch. Mutual exclusion has to
2301
be implemented at a higher level. In other words, all possible
2302
accesses to a given page through this function must be protected by
2303
the same set of mutexes or latches.
2304
@return pointer to the block */
2309
ulint space, /*!< in: space id */
2310
ulint zip_size,/*!< in: compressed page size */
2311
ulint offset) /*!< in: page number */
2314
mutex_t* block_mutex;
2316
unsigned access_time;
2317
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2319
#ifndef UNIV_LOG_DEBUG
2320
ut_ad(!ibuf_inside());
2322
buf_pool->stat.n_page_gets++;
2325
buf_pool_mutex_enter(buf_pool);
2327
bpage = buf_page_hash_get(buf_pool, space, offset);
2329
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2333
/* Page not in buf_pool: needs to be read from file */
2335
buf_pool_mutex_exit(buf_pool);
2337
buf_read_page(space, zip_size, offset);
2339
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2340
ut_a(++buf_dbg_counter % 37 || buf_validate());
2341
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2344
if (UNIV_UNLIKELY(!bpage->zip.data)) {
2345
/* There is no compressed page. */
2347
buf_pool_mutex_exit(buf_pool);
2351
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2353
switch (buf_page_get_state(bpage)) {
2354
case BUF_BLOCK_NOT_USED:
2355
case BUF_BLOCK_READY_FOR_USE:
2356
case BUF_BLOCK_MEMORY:
2357
case BUF_BLOCK_REMOVE_HASH:
2358
case BUF_BLOCK_ZIP_FREE:
2360
case BUF_BLOCK_ZIP_PAGE:
2361
case BUF_BLOCK_ZIP_DIRTY:
2362
block_mutex = &buf_pool->zip_mutex;
2363
mutex_enter(block_mutex);
2364
bpage->buf_fix_count++;
2366
case BUF_BLOCK_FILE_PAGE:
2367
block_mutex = &((buf_block_t*) bpage)->mutex;
2368
mutex_enter(block_mutex);
2370
/* Discard the uncompressed page frame if possible. */
2371
if (buf_LRU_free_block(bpage, FALSE, NULL)
2374
mutex_exit(block_mutex);
2378
buf_block_buf_fix_inc((buf_block_t*) bpage,
2379
__FILE__, __LINE__);
2387
must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
2388
access_time = buf_page_is_accessed(bpage);
2390
buf_pool_mutex_exit(buf_pool);
2392
mutex_exit(block_mutex);
2394
buf_page_set_accessed_make_young(bpage, access_time);
2396
#ifdef UNIV_DEBUG_FILE_ACCESSES
2397
ut_a(!bpage->file_page_was_freed);
2400
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2401
ut_a(++buf_dbg_counter % 5771 || buf_validate());
2402
ut_a(bpage->buf_fix_count > 0);
2403
ut_a(buf_page_in_file(bpage));
2404
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2407
/* Let us wait until the read operation
2411
enum buf_io_fix io_fix;
2413
mutex_enter(block_mutex);
2414
io_fix = buf_page_get_io_fix(bpage);
2415
mutex_exit(block_mutex);
2417
if (io_fix == BUF_IO_READ) {
2419
os_thread_sleep(WAIT_FOR_READ);
2426
#ifdef UNIV_IBUF_COUNT_DEBUG
2427
ut_a(ibuf_count_get(buf_page_get_space(bpage),
2428
buf_page_get_page_no(bpage)) == 0);
2433
/********************************************************************//**
2434
Initialize some fields of a control block. */
2439
buf_block_t* block) /*!< in: block to init */
2441
block->check_index_page_at_flush = FALSE;
2442
block->index = NULL;
2444
block->n_hash_helps = 0;
2445
block->is_hashed = FALSE;
2446
block->n_fields = 1;
2448
block->left_side = TRUE;
2450
#endif /* !UNIV_HOTBACKUP */
2452
/********************************************************************//**
2454
@return TRUE if successful */
2459
buf_block_t* block, /*!< in/out: block */
2460
ibool check) /*!< in: TRUE=verify the page checksum */
2462
const byte* frame = block->page.zip.data;
2463
ulint stamp_checksum = mach_read_from_4(
2464
frame + FIL_PAGE_SPACE_OR_CHKSUM);
2466
ut_ad(buf_block_get_zip_size(block));
2467
ut_a(buf_block_get_space(block) != 0);
2469
if (UNIV_LIKELY(check && stamp_checksum != BUF_NO_CHECKSUM_MAGIC)) {
2470
ulint calc_checksum = page_zip_calc_checksum(
2471
frame, page_zip_get_size(&block->page.zip));
2473
if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
2474
ut_print_timestamp(stderr);
2476
" InnoDB: compressed page checksum mismatch"
2477
" (space %u page %u): %lu != %lu\n",
2478
block->page.space, block->page.offset,
2479
stamp_checksum, calc_checksum);
2484
switch (fil_page_get_type(frame)) {
2485
case FIL_PAGE_INDEX:
2486
if (page_zip_decompress(&block->page.zip,
2487
block->frame, TRUE)) {
2492
"InnoDB: unable to decompress space %lu page %lu\n",
2493
(ulong) block->page.space,
2494
(ulong) block->page.offset);
2497
case FIL_PAGE_TYPE_ALLOCATED:
2498
case FIL_PAGE_INODE:
2499
case FIL_PAGE_IBUF_BITMAP:
2500
case FIL_PAGE_TYPE_FSP_HDR:
2501
case FIL_PAGE_TYPE_XDES:
2502
case FIL_PAGE_TYPE_ZBLOB:
2503
case FIL_PAGE_TYPE_ZBLOB2:
2504
/* Copy to uncompressed storage. */
2505
memcpy(block->frame, frame,
2506
buf_block_get_zip_size(block));
2510
ut_print_timestamp(stderr);
2512
" InnoDB: unknown compressed page"
2514
fil_page_get_type(frame));
2518
#ifndef UNIV_HOTBACKUP
2519
/*******************************************************************//**
2520
Gets the block to whose frame the pointer is pointing to if found
2521
in this buffer pool instance.
2522
@return pointer to block */
2525
buf_block_align_instance(
2526
/*=====================*/
2527
buf_pool_t* buf_pool, /*!< in: buffer in which the block
2529
const byte* ptr) /*!< in: pointer to a frame */
2534
/* TODO: protect buf_pool->chunks with a mutex (it will
2535
currently remain constant after buf_pool_init()) */
2536
for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
2537
lint offs = ptr - chunk->blocks->frame;
2539
if (UNIV_UNLIKELY(offs < 0)) {
2544
offs >>= UNIV_PAGE_SIZE_SHIFT;
2546
if (UNIV_LIKELY((ulint) offs < chunk->size)) {
2547
buf_block_t* block = &chunk->blocks[offs];
2549
/* The function buf_chunk_init() invokes
2550
buf_block_init() so that block[n].frame ==
2551
block->frame + n * UNIV_PAGE_SIZE. Check it. */
2552
ut_ad(block->frame == page_align(ptr));
2554
/* A thread that updates these fields must
2555
hold buf_pool->mutex and block->mutex. Acquire
2557
mutex_enter(&block->mutex);
2559
switch (buf_block_get_state(block)) {
2560
case BUF_BLOCK_ZIP_FREE:
2561
case BUF_BLOCK_ZIP_PAGE:
2562
case BUF_BLOCK_ZIP_DIRTY:
2563
/* These types should only be used in
2564
the compressed buffer pool, whose
2565
memory is allocated from
2566
buf_pool->chunks, in UNIV_PAGE_SIZE
2567
blocks flagged as BUF_BLOCK_MEMORY. */
2570
case BUF_BLOCK_NOT_USED:
2571
case BUF_BLOCK_READY_FOR_USE:
2572
case BUF_BLOCK_MEMORY:
2573
/* Some data structures contain
2574
"guess" pointers to file pages. The
2575
file pages may have been freed and
2576
reused. Do not complain. */
2578
case BUF_BLOCK_REMOVE_HASH:
2579
/* buf_LRU_block_remove_hashed_page()
2580
will overwrite the FIL_PAGE_OFFSET and
2581
FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
2582
0xff and set the state to
2583
BUF_BLOCK_REMOVE_HASH. */
2584
ut_ad(page_get_space_id(page_align(ptr))
2586
ut_ad(page_get_page_no(page_align(ptr))
2589
case BUF_BLOCK_FILE_PAGE:
2590
ut_ad(block->page.space
2591
== page_get_space_id(page_align(ptr)));
2592
ut_ad(block->page.offset
2593
== page_get_page_no(page_align(ptr)));
2597
mutex_exit(&block->mutex);
2598
#endif /* UNIV_DEBUG */
2607
/*******************************************************************//**
2608
Gets the block to whose frame the pointer is pointing to.
2609
@return pointer to block, never NULL */
2614
const byte* ptr) /*!< in: pointer to a frame */
2618
for (i = 0; i < srv_buf_pool_instances; i++) {
2621
block = buf_block_align_instance(
2622
buf_pool_from_array(i), ptr);
2628
/* The block should always be found. */
2633
/********************************************************************//**
2634
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
2635
the buf_block_t itself or a member of it. This functions checks one of
2636
the buffer pool instances.
2637
@return TRUE if ptr belongs to a buf_block_t struct */
2640
buf_pointer_is_block_field_instance(
2641
/*================================*/
2642
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2643
const void* ptr) /*!< in: pointer not dereferenced */
2645
const buf_chunk_t* chunk = buf_pool->chunks;
2646
const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
2648
/* TODO: protect buf_pool->chunks with a mutex (it will
2649
currently remain constant after buf_pool_init()) */
2650
while (chunk < echunk) {
2651
if (ptr >= (void *)chunk->blocks
2652
&& ptr < (void *)(chunk->blocks + chunk->size)) {
2663
/********************************************************************//**
2664
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
2665
the buf_block_t itself or a member of it
2666
@return TRUE if ptr belongs to a buf_block_t struct */
2669
buf_pointer_is_block_field(
2670
/*=======================*/
2671
const void* ptr) /*!< in: pointer not dereferenced */
2675
for (i = 0; i < srv_buf_pool_instances; i++) {
2678
found = buf_pointer_is_block_field_instance(
2679
buf_pool_from_array(i), ptr);
2688
/********************************************************************//**
2689
Find out if a buffer block was created by buf_chunk_init().
2690
@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
2693
buf_block_is_uncompressed(
2694
/*======================*/
2695
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2696
const buf_block_t* block) /*!< in: pointer to block,
2699
ut_ad(buf_pool_mutex_own(buf_pool));
2701
if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
2702
/* The pointer should be aligned. */
2706
return(buf_pointer_is_block_field_instance(buf_pool, (void *)block));
2709
/********************************************************************//**
2710
This is the general function used to get access to a database page.
2711
@return pointer to the block or NULL */
2716
ulint space, /*!< in: space id */
2717
ulint zip_size,/*!< in: compressed page size in bytes
2718
or 0 for uncompressed pages */
2719
ulint offset, /*!< in: page number */
2720
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
2721
buf_block_t* guess, /*!< in: guessed block or NULL */
2722
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
2723
BUF_GET_NO_LATCH, or
2724
BUF_GET_IF_IN_POOL_OR_WATCH */
2725
const char* file, /*!< in: file name */
2726
ulint line, /*!< in: line where called */
2727
mtr_t* mtr) /*!< in: mini-transaction */
2731
unsigned access_time;
2735
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2738
ut_ad(mtr->state == MTR_ACTIVE);
2739
ut_ad((rw_latch == RW_S_LATCH)
2740
|| (rw_latch == RW_X_LATCH)
2741
|| (rw_latch == RW_NO_LATCH));
2742
ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
2743
ut_ad(mode == BUF_GET
2744
|| mode == BUF_GET_IF_IN_POOL
2745
|| mode == BUF_GET_NO_LATCH
2746
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH);
2747
ut_ad(zip_size == fil_space_get_zip_size(space));
2748
ut_ad(ut_is_2pow(zip_size));
2749
#ifndef UNIV_LOG_DEBUG
2750
ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
2752
buf_pool->stat.n_page_gets++;
2753
fold = buf_page_address_fold(space, offset);
2756
buf_pool_mutex_enter(buf_pool);
2759
/* If the guess is a compressed page descriptor that
2760
has been allocated by buf_buddy_alloc(), it may have
2761
been invalidated by buf_buddy_relocate(). In that
2762
case, block could point to something that happens to
2763
contain the expected bits in block->page. Similarly,
2764
the guess may be pointing to a buffer pool chunk that
2765
has been released when resizing the buffer pool. */
2767
if (!buf_block_is_uncompressed(buf_pool, block)
2768
|| offset != block->page.offset
2769
|| space != block->page.space
2770
|| buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2772
block = guess = NULL;
2774
ut_ad(!block->page.in_zip_hash);
2775
ut_ad(block->page.in_page_hash);
2779
if (block == NULL) {
2780
block = (buf_block_t*) buf_page_hash_get_low(
2781
buf_pool, space, offset, fold);
2785
if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
2789
if (block == NULL) {
2790
/* Page not in buf_pool: needs to be read from file */
2792
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2793
block = (buf_block_t*) buf_pool_watch_set(
2794
space, offset, fold);
2796
if (UNIV_LIKELY_NULL(block)) {
2802
buf_pool_mutex_exit(buf_pool);
2804
if (mode == BUF_GET_IF_IN_POOL
2805
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2810
if (buf_read_page(space, zip_size, offset)) {
2812
} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
2815
fprintf(stderr, "InnoDB: Error: Unable"
2816
" to read tablespace %lu page no"
2817
" %lu into the buffer pool after"
2819
"InnoDB: The most probable cause"
2820
" of this error may be that the"
2821
" table has been corrupted.\n"
2822
"InnoDB: You can try to fix this"
2824
" innodb_force_recovery.\n"
2825
"InnoDB: Please see reference manual"
2826
" for more details.\n"
2827
"InnoDB: Aborting...\n",
2829
BUF_PAGE_READ_MAX_RETRIES);
2834
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2835
ut_a(++buf_dbg_counter % 37 || buf_validate());
2836
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2841
ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
2843
must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
2845
if (must_read && mode == BUF_GET_IF_IN_POOL) {
2847
/* The page is being read to buffer pool,
2848
but we cannot wait around for the read to
2850
buf_pool_mutex_exit(buf_pool);
2855
switch (buf_block_get_state(block)) {
2859
case BUF_BLOCK_FILE_PAGE:
2862
case BUF_BLOCK_ZIP_PAGE:
2863
case BUF_BLOCK_ZIP_DIRTY:
2864
bpage = &block->page;
2865
/* Protect bpage->buf_fix_count. */
2866
mutex_enter(&buf_pool->zip_mutex);
2868
if (bpage->buf_fix_count
2869
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
2870
/* This condition often occurs when the buffer
2871
is not buffer-fixed, but I/O-fixed by
2872
buf_page_init_for_read(). */
2873
mutex_exit(&buf_pool->zip_mutex);
2875
/* The block is buffer-fixed or I/O-fixed.
2877
buf_pool_mutex_exit(buf_pool);
2878
os_thread_sleep(WAIT_FOR_READ);
2883
/* Allocate an uncompressed page. */
2884
buf_pool_mutex_exit(buf_pool);
2885
mutex_exit(&buf_pool->zip_mutex);
2887
block = buf_LRU_get_free_block(buf_pool, 0);
2890
buf_pool_mutex_enter(buf_pool);
2891
mutex_enter(&block->mutex);
2894
buf_page_t* hash_bpage;
2896
hash_bpage = buf_page_hash_get_low(
2897
buf_pool, space, offset, fold);
2899
if (UNIV_UNLIKELY(bpage != hash_bpage)) {
2900
/* The buf_pool->page_hash was modified
2901
while buf_pool->mutex was released.
2902
Free the block that was allocated. */
2904
buf_LRU_block_free_non_file_page(block);
2905
mutex_exit(&block->mutex);
2907
block = (buf_block_t*) hash_bpage;
2913
(bpage->buf_fix_count
2914
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
2916
/* The block was buffer-fixed or I/O-fixed
2917
while buf_pool->mutex was not held by this thread.
2918
Free the block that was allocated and try again.
2919
This should be extremely unlikely. */
2921
buf_LRU_block_free_non_file_page(block);
2922
mutex_exit(&block->mutex);
2924
goto wait_until_unfixed;
2927
/* Move the compressed page from bpage to block,
2928
and uncompress it. */
2930
mutex_enter(&buf_pool->zip_mutex);
2932
buf_relocate(bpage, &block->page);
2933
buf_block_init_low(block);
2934
block->lock_hash_val = lock_rec_hash(space, offset);
2936
UNIV_MEM_DESC(&block->page.zip.data,
2937
page_zip_get_size(&block->page.zip), block);
2939
if (buf_page_get_state(&block->page)
2940
== BUF_BLOCK_ZIP_PAGE) {
2941
UT_LIST_REMOVE(list, buf_pool->zip_clean,
2943
ut_ad(!block->page.in_flush_list);
2945
/* Relocate buf_pool->flush_list. */
2946
buf_flush_relocate_on_flush_list(bpage,
2950
/* Buffer-fix, I/O-fix, and X-latch the block
2951
for the duration of the decompression.
2952
Also add the block to the unzip_LRU list. */
2953
block->page.state = BUF_BLOCK_FILE_PAGE;
2955
/* Insert at the front of unzip_LRU list */
2956
buf_unzip_LRU_add_block(block, FALSE);
2958
block->page.buf_fix_count = 1;
2959
buf_block_set_io_fix(block, BUF_IO_READ);
2960
rw_lock_x_lock_func(&block->lock, 0, file, line);
2962
UNIV_MEM_INVALID(bpage, sizeof *bpage);
2964
mutex_exit(&block->mutex);
2965
mutex_exit(&buf_pool->zip_mutex);
2966
buf_pool->n_pend_unzip++;
2968
buf_buddy_free(buf_pool, bpage, sizeof *bpage);
2970
buf_pool_mutex_exit(buf_pool);
2972
/* Decompress the page and apply buffered operations
2973
while not holding buf_pool->mutex or block->mutex. */
2974
success = buf_zip_decompress(block, srv_use_checksums);
2977
if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
2978
ibuf_merge_or_delete_for_page(block, space, offset,
2982
/* Unfix and unlatch the block. */
2983
buf_pool_mutex_enter(buf_pool);
2984
mutex_enter(&block->mutex);
2985
block->page.buf_fix_count--;
2986
buf_block_set_io_fix(block, BUF_IO_NONE);
2987
mutex_exit(&block->mutex);
2988
buf_pool->n_pend_unzip--;
2989
rw_lock_x_unlock(&block->lock);
2993
case BUF_BLOCK_ZIP_FREE:
2994
case BUF_BLOCK_NOT_USED:
2995
case BUF_BLOCK_READY_FOR_USE:
2996
case BUF_BLOCK_MEMORY:
2997
case BUF_BLOCK_REMOVE_HASH:
3002
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3004
mutex_enter(&block->mutex);
3005
#if UNIV_WORD_SIZE == 4
3006
/* On 32-bit systems, there is no padding in buf_page_t. On
3007
other systems, Valgrind could complain about uninitialized pad
3009
UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
3011
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
3012
if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)
3014
/* Try to evict the block from the buffer pool, to use the
3015
insert buffer (change buffer) as much as possible. */
3017
if (buf_LRU_free_block(&block->page, TRUE, NULL)
3019
mutex_exit(&block->mutex);
3020
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
3021
/* Set the watch, as it would have
3022
been set if the page were not in the
3023
buffer pool in the first place. */
3024
block = (buf_block_t*) buf_pool_watch_set(
3025
space, offset, fold);
3027
if (UNIV_LIKELY_NULL(block)) {
3029
/* The page entered the buffer
3030
pool for some reason. Try to
3035
buf_pool_mutex_exit(buf_pool);
3037
"innodb_change_buffering_debug evict %u %u\n",
3038
(unsigned) space, (unsigned) offset);
3040
} else if (buf_flush_page_try(buf_pool, block)) {
3042
"innodb_change_buffering_debug flush %u %u\n",
3043
(unsigned) space, (unsigned) offset);
3048
/* Failed to evict the page; change it directly */
3050
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
3052
buf_block_buf_fix_inc(block, file, line);
3054
mutex_exit(&block->mutex);
3056
/* Check if this is the first access to the page */
3058
access_time = buf_page_is_accessed(&block->page);
3060
buf_pool_mutex_exit(buf_pool);
3062
buf_page_set_accessed_make_young(&block->page, access_time);
3064
#ifdef UNIV_DEBUG_FILE_ACCESSES
3065
ut_a(!block->page.file_page_was_freed);
3068
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3069
ut_a(++buf_dbg_counter % 5771 || buf_validate());
3070
ut_a(block->page.buf_fix_count > 0);
3071
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3072
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3077
/* Let us wait until the read operation
3081
enum buf_io_fix io_fix;
3083
mutex_enter(&block->mutex);
3084
io_fix = buf_block_get_io_fix(block);
3085
mutex_exit(&block->mutex);
3087
if (io_fix == BUF_IO_READ) {
3089
os_thread_sleep(WAIT_FOR_READ);
3096
fix_type = MTR_MEMO_BUF_FIX;
3100
rw_lock_s_lock_func(&(block->lock), 0, file, line);
3102
fix_type = MTR_MEMO_PAGE_S_FIX;
3106
ut_ad(rw_latch == RW_X_LATCH);
3107
rw_lock_x_lock_func(&(block->lock), 0, file, line);
3109
fix_type = MTR_MEMO_PAGE_X_FIX;
3113
mtr_memo_push(mtr, block, fix_type);
3116
/* In the case of a first access, try to apply linear
3119
buf_read_ahead_linear(space, zip_size, offset);
3122
#ifdef UNIV_IBUF_COUNT_DEBUG
3123
ut_a(ibuf_count_get(buf_block_get_space(block),
3124
buf_block_get_page_no(block)) == 0);
3129
/********************************************************************//**
3130
This is the general function used to get optimistic access to a database
3132
@return TRUE if success */
3135
buf_page_optimistic_get(
3136
/*====================*/
3137
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
3138
buf_block_t* block, /*!< in: guessed buffer block */
3139
ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
3140
..._GUESS_ON_CLOCK */
3141
const char* file, /*!< in: file name */
3142
ulint line, /*!< in: line where called */
3143
mtr_t* mtr) /*!< in: mini-transaction */
3145
buf_pool_t* buf_pool;
3146
unsigned access_time;
3152
ut_ad(mtr->state == MTR_ACTIVE);
3153
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3155
mutex_enter(&block->mutex);
3157
if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
3159
mutex_exit(&block->mutex);
3164
buf_block_buf_fix_inc(block, file, line);
3166
mutex_exit(&block->mutex);
3168
/* Check if this is the first access to the page.
3169
We do a dirty read on purpose, to avoid mutex contention.
3170
This field is only used for heuristic purposes; it does not
3171
affect correctness. */
3173
access_time = buf_page_is_accessed(&block->page);
3174
buf_page_set_accessed_make_young(&block->page, access_time);
3176
ut_ad(!ibuf_inside()
3177
|| ibuf_page(buf_block_get_space(block),
3178
buf_block_get_zip_size(block),
3179
buf_block_get_page_no(block), NULL));
3181
if (rw_latch == RW_S_LATCH) {
3182
success = rw_lock_s_lock_nowait(&(block->lock),
3184
fix_type = MTR_MEMO_PAGE_S_FIX;
3186
success = rw_lock_x_lock_func_nowait(&(block->lock),
3188
fix_type = MTR_MEMO_PAGE_X_FIX;
3191
if (UNIV_UNLIKELY(!success)) {
3192
mutex_enter(&block->mutex);
3193
buf_block_buf_fix_dec(block);
3194
mutex_exit(&block->mutex);
3199
if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
3200
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3202
if (rw_latch == RW_S_LATCH) {
3203
rw_lock_s_unlock(&(block->lock));
3205
rw_lock_x_unlock(&(block->lock));
3208
mutex_enter(&block->mutex);
3209
buf_block_buf_fix_dec(block);
3210
mutex_exit(&block->mutex);
3215
mtr_memo_push(mtr, block, fix_type);
3217
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3218
ut_a(++buf_dbg_counter % 5771 || buf_validate());
3219
ut_a(block->page.buf_fix_count > 0);
3220
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3221
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3223
#ifdef UNIV_DEBUG_FILE_ACCESSES
3224
ut_a(block->page.file_page_was_freed == FALSE);
3226
if (UNIV_UNLIKELY(!access_time)) {
3227
/* In the case of a first access, try to apply linear
3230
buf_read_ahead_linear(buf_block_get_space(block),
3231
buf_block_get_zip_size(block),
3232
buf_block_get_page_no(block));
3235
#ifdef UNIV_IBUF_COUNT_DEBUG
3236
ut_a(ibuf_count_get(buf_block_get_space(block),
3237
buf_block_get_page_no(block)) == 0);
3239
buf_pool = buf_pool_from_block(block);
3240
buf_pool->stat.n_page_gets++;
3245
/********************************************************************//**
3246
This is used to get access to a known database page, when no waiting can be
3247
done. For example, if a search in an adaptive hash index leads us to this
3249
@return TRUE if success */
3252
buf_page_get_known_nowait(
3253
/*======================*/
3254
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
3255
buf_block_t* block, /*!< in: the known page */
3256
ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
3257
const char* file, /*!< in: file name */
3258
ulint line, /*!< in: line where called */
3259
mtr_t* mtr) /*!< in: mini-transaction */
3261
buf_pool_t* buf_pool;
3266
ut_ad(mtr->state == MTR_ACTIVE);
3267
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3269
mutex_enter(&block->mutex);
3271
if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
3272
/* Another thread is just freeing the block from the LRU list
3273
of the buffer pool: do not try to access this page; this
3274
attempt to access the page can only come through the hash
3275
index because when the buffer block state is ..._REMOVE_HASH,
3276
we have already removed it from the page address hash table
3277
of the buffer pool. */
3279
mutex_exit(&block->mutex);
3284
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3286
buf_block_buf_fix_inc(block, file, line);
3288
mutex_exit(&block->mutex);
3290
buf_pool = buf_pool_from_block(block);
3292
if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
3293
buf_pool_mutex_enter(buf_pool);
3294
buf_LRU_make_block_young(&block->page);
3295
buf_pool_mutex_exit(buf_pool);
3296
} else if (!buf_page_is_accessed(&block->page)) {
3297
/* Above, we do a dirty read on purpose, to avoid
3298
mutex contention. The field buf_page_t::access_time
3299
is only used for heuristic purposes. Writes to the
3300
field must be protected by mutex, however. */
3301
ulint time_ms = ut_time_ms();
3303
buf_pool_mutex_enter(buf_pool);
3304
buf_page_set_accessed(&block->page, time_ms);
3305
buf_pool_mutex_exit(buf_pool);
3308
ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
3310
if (rw_latch == RW_S_LATCH) {
3311
success = rw_lock_s_lock_nowait(&(block->lock),
3313
fix_type = MTR_MEMO_PAGE_S_FIX;
3315
success = rw_lock_x_lock_func_nowait(&(block->lock),
3317
fix_type = MTR_MEMO_PAGE_X_FIX;
3321
mutex_enter(&block->mutex);
3322
buf_block_buf_fix_dec(block);
3323
mutex_exit(&block->mutex);
3328
mtr_memo_push(mtr, block, fix_type);
3330
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3331
ut_a(++buf_dbg_counter % 5771 || buf_validate());
3332
ut_a(block->page.buf_fix_count > 0);
3333
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3334
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3335
#ifdef UNIV_DEBUG_FILE_ACCESSES
3336
ut_a(block->page.file_page_was_freed == FALSE);
3339
#ifdef UNIV_IBUF_COUNT_DEBUG
3340
ut_a((mode == BUF_KEEP_OLD)
3341
|| (ibuf_count_get(buf_block_get_space(block),
3342
buf_block_get_page_no(block)) == 0));
3344
buf_pool->stat.n_page_gets++;
3349
/*******************************************************************//**
3350
Given a tablespace id and page number tries to get that page. If the
3351
page is not in the buffer pool it is not loaded and NULL is returned.
3352
Suitable for using when holding the kernel mutex.
3353
@return pointer to a page or NULL */
3356
buf_page_try_get_func(
3357
/*==================*/
3358
ulint space_id,/*!< in: tablespace id */
3359
ulint page_no,/*!< in: page number */
3360
const char* file, /*!< in: file name */
3361
ulint line, /*!< in: line where called */
3362
mtr_t* mtr) /*!< in: mini-transaction */
3367
buf_pool_t* buf_pool = buf_pool_get(space_id, page_no);
3370
ut_ad(mtr->state == MTR_ACTIVE);
3372
buf_pool_mutex_enter(buf_pool);
3373
block = buf_block_hash_get(buf_pool, space_id, page_no);
3375
if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
3376
buf_pool_mutex_exit(buf_pool);
3380
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
3382
mutex_enter(&block->mutex);
3383
buf_pool_mutex_exit(buf_pool);
3385
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3386
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3387
ut_a(buf_block_get_space(block) == space_id);
3388
ut_a(buf_block_get_page_no(block) == page_no);
3389
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3391
buf_block_buf_fix_inc(block, file, line);
3392
mutex_exit(&block->mutex);
3394
fix_type = MTR_MEMO_PAGE_S_FIX;
3395
success = rw_lock_s_lock_nowait(&block->lock, file, line);
3398
/* Let us try to get an X-latch. If the current thread
3399
is holding an X-latch on the page, we cannot get an
3402
fix_type = MTR_MEMO_PAGE_X_FIX;
3403
success = rw_lock_x_lock_func_nowait(&block->lock,
3408
mutex_enter(&block->mutex);
3409
buf_block_buf_fix_dec(block);
3410
mutex_exit(&block->mutex);
3415
mtr_memo_push(mtr, block, fix_type);
3416
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3417
ut_a(++buf_dbg_counter % 5771 || buf_validate());
3418
ut_a(block->page.buf_fix_count > 0);
3419
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3420
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3421
#ifdef UNIV_DEBUG_FILE_ACCESSES
3422
ut_a(block->page.file_page_was_freed == FALSE);
3423
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3424
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3426
buf_pool->stat.n_page_gets++;
3428
#ifdef UNIV_IBUF_COUNT_DEBUG
3429
ut_a(ibuf_count_get(buf_block_get_space(block),
3430
buf_block_get_page_no(block)) == 0);
3436
/********************************************************************//**
3437
Initialize some fields of a control block. */
3442
buf_page_t* bpage) /*!< in: block to init */
3444
bpage->flush_type = BUF_FLUSH_LRU;
3445
bpage->io_fix = BUF_IO_NONE;
3446
bpage->buf_fix_count = 0;
3447
bpage->freed_page_clock = 0;
3448
bpage->access_time = 0;
3449
bpage->newest_modification = 0;
3450
bpage->oldest_modification = 0;
3451
HASH_INVALIDATE(bpage, hash);
3452
#ifdef UNIV_DEBUG_FILE_ACCESSES
3453
bpage->file_page_was_freed = FALSE;
3454
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3457
/********************************************************************//**
3458
Inits a page to the buffer buf_pool. */
3463
ulint space, /*!< in: space id */
3464
ulint offset, /*!< in: offset of the page within space
3465
in units of a page */
3466
ulint fold, /*!< in: buf_page_address_fold(space,offset) */
3467
buf_block_t* block) /*!< in: block to init */
3469
buf_page_t* hash_page;
3470
buf_pool_t* buf_pool = buf_pool_get(space, offset);
3472
ut_ad(buf_pool_mutex_own(buf_pool));
3473
ut_ad(mutex_own(&(block->mutex)));
3474
ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3476
/* Set the state of the block */
3477
buf_block_set_file_page(block, space, offset);
3479
#ifdef UNIV_DEBUG_VALGRIND
3481
/* Silence valid Valgrind warnings about uninitialized
3482
data being written to data files. There are some unused
3483
bytes on some pages that InnoDB does not initialize. */
3484
UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
3486
#endif /* UNIV_DEBUG_VALGRIND */
3488
buf_block_init_low(block);
3490
block->lock_hash_val = lock_rec_hash(space, offset);
3492
buf_page_init_low(&block->page);
3494
/* Insert into the hash table of file pages */
3496
hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3498
if (UNIV_LIKELY(!hash_page)) {
3499
} else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
3500
/* Preserve the reference count. */
3501
ulint buf_fix_count = hash_page->buf_fix_count;
3503
ut_a(buf_fix_count > 0);
3504
block->page.buf_fix_count += buf_fix_count;
3505
buf_pool_watch_remove(buf_pool, fold, hash_page);
3508
"InnoDB: Error: page %lu %lu already found"
3509
" in the hash table: %p, %p\n",
3512
(const void*) hash_page, (const void*) block);
3513
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3514
mutex_exit(&block->mutex);
3515
buf_pool_mutex_exit(buf_pool);
3520
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3524
ut_ad(!block->page.in_zip_hash);
3525
ut_ad(!block->page.in_page_hash);
3526
ut_d(block->page.in_page_hash = TRUE);
3527
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
3528
fold, &block->page);
3531
/********************************************************************//**
3532
Function which inits a page for read to the buffer buf_pool. If the page is
3533
(1) already in buf_pool, or
3534
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
3535
(3) if the space is deleted or being deleted,
3536
then this function does nothing.
3537
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
3538
on the buffer frame. The io-handler must take care that the flag is cleared
3539
and the lock released later.
3540
@return pointer to the block or NULL */
3543
buf_page_init_for_read(
3544
/*===================*/
3545
ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
3546
ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
3547
ulint space, /*!< in: space id */
3548
ulint zip_size,/*!< in: compressed page size, or 0 */
3549
ibool unzip, /*!< in: TRUE=request uncompressed page */
3550
ib_int64_t tablespace_version,
3551
/*!< in: prevents reading from a wrong
3552
version of the tablespace in case we have done
3554
ulint offset) /*!< in: page number */
3557
buf_page_t* bpage = NULL;
3558
buf_page_t* watch_page;
3563
buf_pool_t* buf_pool = buf_pool_get(space, offset);
3569
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3570
/* It is a read-ahead within an ibuf routine */
3572
ut_ad(!ibuf_bitmap_page(zip_size, offset));
3573
ut_ad(ibuf_inside());
3577
if (!recv_no_ibuf_operations
3578
&& !ibuf_page(space, zip_size, offset, &mtr)) {
3585
ut_ad(mode == BUF_READ_ANY_PAGE);
3588
if (zip_size && UNIV_LIKELY(!unzip)
3589
&& UNIV_LIKELY(!recv_recovery_is_on())) {
3592
block = buf_LRU_get_free_block(buf_pool, 0);
3594
ut_ad(buf_pool_from_block(block) == buf_pool);
3597
fold = buf_page_address_fold(space, offset);
3599
buf_pool_mutex_enter(buf_pool);
3601
watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3602
if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
3603
/* The page is already in the buffer pool. */
3607
mutex_enter(&block->mutex);
3608
buf_LRU_block_free_non_file_page(block);
3609
mutex_exit(&block->mutex);
3616
if (fil_tablespace_deleted_or_being_deleted_in_mem(
3617
space, tablespace_version)) {
3618
/* The page belongs to a space which has been
3619
deleted or is being deleted. */
3620
*err = DB_TABLESPACE_DELETED;
3626
bpage = &block->page;
3627
mutex_enter(&block->mutex);
3629
ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
3631
buf_page_init(space, offset, fold, block);
3633
/* The block must be put to the LRU list, to the old blocks */
3634
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3636
/* We set a pass-type x-lock on the frame because then
3637
the same thread which called for the read operation
3638
(and is running now at this point of code) can wait
3639
for the read to complete by waiting for the x-lock on
3640
the frame; if the x-lock were recursive, the same
3641
thread would illegally get the x-lock before the page
3642
read is completed. The x-lock is cleared by the
3643
io-handler thread. */
3645
rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
3646
buf_page_set_io_fix(bpage, BUF_IO_READ);
3648
if (UNIV_UNLIKELY(zip_size)) {
3649
page_zip_set_size(&block->page.zip, zip_size);
3651
/* buf_pool->mutex may be released and
3652
reacquired by buf_buddy_alloc(). Thus, we
3653
must release block->mutex in order not to
3654
break the latching order in the reacquisition
3655
of buf_pool->mutex. We also must defer this
3656
operation until after the block descriptor has
3657
been added to buf_pool->LRU and
3658
buf_pool->page_hash. */
3659
mutex_exit(&block->mutex);
3660
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3661
mutex_enter(&block->mutex);
3662
block->page.zip.data = static_cast<unsigned char *>(data);
3664
/* To maintain the invariant
3665
block->in_unzip_LRU_list
3666
== buf_page_belongs_to_unzip_LRU(&block->page)
3667
we have to add this block to unzip_LRU
3668
after block->page.zip.data is set. */
3669
ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
3670
buf_unzip_LRU_add_block(block, TRUE);
3673
mutex_exit(&block->mutex);
3675
/* Defer buf_buddy_alloc() until after the block has
3676
been found not to exist. The buf_buddy_alloc() and
3677
buf_buddy_free() calls may be expensive because of
3678
buf_buddy_relocate(). */
3680
/* The compressed page must be allocated before the
3681
control block (bpage), in order to avoid the
3682
invocation of buf_buddy_relocate_block() on
3683
uninitialized data. */
3684
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3685
bpage = static_cast<buf_page_struct *>(buf_buddy_alloc(buf_pool, sizeof *bpage, &lru));
3687
/* Initialize the buf_pool pointer. */
3688
bpage->buf_pool_index = buf_pool_index(buf_pool);
3690
/* If buf_buddy_alloc() allocated storage from the LRU list,
3691
it released and reacquired buf_pool->mutex. Thus, we must
3692
check the page_hash again, as it may have been modified. */
3693
if (UNIV_UNLIKELY(lru)) {
3695
watch_page = buf_page_hash_get_low(
3696
buf_pool, space, offset, fold);
3699
&& !buf_pool_watch_is_sentinel(buf_pool,
3702
/* The block was added by some other thread. */
3704
buf_buddy_free(buf_pool, bpage, sizeof *bpage);
3705
buf_buddy_free(buf_pool, data, zip_size);
3712
page_zip_des_init(&bpage->zip);
3713
page_zip_set_size(&bpage->zip, zip_size);
3714
bpage->zip.data = static_cast<unsigned char *>(data);
3716
mutex_enter(&buf_pool->zip_mutex);
3717
UNIV_MEM_DESC(bpage->zip.data,
3718
page_zip_get_size(&bpage->zip), bpage);
3720
buf_page_init_low(bpage);
3722
bpage->state = BUF_BLOCK_ZIP_PAGE;
3723
bpage->space = space;
3724
bpage->offset = offset;
3728
bpage->in_page_hash = FALSE;
3729
bpage->in_zip_hash = FALSE;
3730
bpage->in_flush_list = FALSE;
3731
bpage->in_free_list = FALSE;
3732
bpage->in_LRU_list = FALSE;
3733
#endif /* UNIV_DEBUG */
3735
ut_d(bpage->in_page_hash = TRUE);
3737
if (UNIV_LIKELY_NULL(watch_page)) {
3738
/* Preserve the reference count. */
3739
ulint buf_fix_count = watch_page->buf_fix_count;
3740
ut_a(buf_fix_count > 0);
3741
bpage->buf_fix_count += buf_fix_count;
3742
ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
3743
buf_pool_watch_remove(buf_pool, fold, watch_page);
3746
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
3749
/* The block must be put to the LRU list, to the old blocks */
3750
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3751
buf_LRU_insert_zip_clean(bpage);
3753
buf_page_set_io_fix(bpage, BUF_IO_READ);
3755
mutex_exit(&buf_pool->zip_mutex);
3758
buf_pool->n_pend_reads++;
3760
buf_pool_mutex_exit(buf_pool);
3762
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3767
ut_ad(!bpage || buf_page_in_file(bpage));
3771
/********************************************************************//**
3772
Initializes a page to the buffer buf_pool. The page is usually not read
3773
from a file even if it cannot be found in the buffer buf_pool. This is one
3774
of the functions which perform to a block a state transition NOT_USED =>
3775
FILE_PAGE (the other is buf_page_get_gen).
3776
@return pointer to the block, page bufferfixed */
3781
ulint space, /*!< in: space id */
3782
ulint offset, /*!< in: offset of the page within space in units of
3784
ulint zip_size,/*!< in: compressed page size, or 0 */
3785
mtr_t* mtr) /*!< in: mini-transaction handle */
3790
buf_block_t* free_block = NULL;
3791
ulint time_ms = ut_time_ms();
3792
buf_pool_t* buf_pool = buf_pool_get(space, offset);
3795
ut_ad(mtr->state == MTR_ACTIVE);
3796
ut_ad(space || !zip_size);
3798
free_block = buf_LRU_get_free_block(buf_pool, 0);
3800
fold = buf_page_address_fold(space, offset);
3802
buf_pool_mutex_enter(buf_pool);
3804
block = (buf_block_t*) buf_page_hash_get_low(
3805
buf_pool, space, offset, fold);
3808
&& buf_page_in_file(&block->page)
3809
&& !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
3810
#ifdef UNIV_IBUF_COUNT_DEBUG
3811
ut_a(ibuf_count_get(space, offset) == 0);
3813
#ifdef UNIV_DEBUG_FILE_ACCESSES
3814
block->page.file_page_was_freed = FALSE;
3815
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3817
/* Page can be found in buf_pool */
3818
buf_pool_mutex_exit(buf_pool);
3820
buf_block_free(free_block);
3822
return(buf_page_get_with_no_latch(space, zip_size,
3826
/* If we get here, the page was not in buf_pool: init it there */
3829
if (buf_debug_prints) {
3830
fprintf(stderr, "Creating space %lu page %lu to buffer\n",
3831
(ulong) space, (ulong) offset);
3833
#endif /* UNIV_DEBUG */
3837
mutex_enter(&block->mutex);
3839
buf_page_init(space, offset, fold, block);
3841
/* The block must be put to the LRU list */
3842
buf_LRU_add_block(&block->page, FALSE);
3844
buf_block_buf_fix_inc(block, __FILE__, __LINE__);
3845
buf_pool->stat.n_pages_created++;
3851
/* Prevent race conditions during buf_buddy_alloc(),
3852
which may release and reacquire buf_pool->mutex,
3853
by IO-fixing and X-latching the block. */
3855
buf_page_set_io_fix(&block->page, BUF_IO_READ);
3856
rw_lock_x_lock(&block->lock);
3858
page_zip_set_size(&block->page.zip, zip_size);
3859
mutex_exit(&block->mutex);
3860
/* buf_pool->mutex may be released and reacquired by
3861
buf_buddy_alloc(). Thus, we must release block->mutex
3862
in order not to break the latching order in
3863
the reacquisition of buf_pool->mutex. We also must
3864
defer this operation until after the block descriptor
3865
has been added to buf_pool->LRU and buf_pool->page_hash. */
3866
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3867
mutex_enter(&block->mutex);
3868
block->page.zip.data = static_cast<unsigned char *>(data);
3870
/* To maintain the invariant
3871
block->in_unzip_LRU_list
3872
== buf_page_belongs_to_unzip_LRU(&block->page)
3873
we have to add this block to unzip_LRU after
3874
block->page.zip.data is set. */
3875
ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
3876
buf_unzip_LRU_add_block(block, FALSE);
3878
buf_page_set_io_fix(&block->page, BUF_IO_NONE);
3879
rw_lock_x_unlock(&block->lock);
3882
buf_page_set_accessed(&block->page, time_ms);
3884
buf_pool_mutex_exit(buf_pool);
3886
mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
3888
mutex_exit(&block->mutex);
3890
/* Delete possible entries for the page from the insert buffer:
3891
such can exist if the page belonged to an index which was dropped */
3893
ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
3895
/* Flush pages from the end of the LRU list if necessary */
3896
buf_flush_free_margin(buf_pool);
3898
frame = block->frame;
3900
memset(frame + FIL_PAGE_PREV, 0xff, 4);
3901
memset(frame + FIL_PAGE_NEXT, 0xff, 4);
3902
mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
3904
/* Reset to zero the file flush lsn field in the page; if the first
3905
page of an ibdata file is 'created' in this function into the buffer
3906
pool then we lose the original contents of the file flush lsn stamp.
3907
Then InnoDB could in a crash recovery print a big, false, corruption
3908
warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
3910
memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
3912
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3913
ut_a(++buf_dbg_counter % 357 || buf_validate());
3914
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3915
#ifdef UNIV_IBUF_COUNT_DEBUG
3916
ut_a(ibuf_count_get(buf_block_get_space(block),
3917
buf_block_get_page_no(block)) == 0);
3922
/********************************************************************//**
3923
Completes an asynchronous read or write request of a file page to or from
3927
buf_page_io_complete(
3928
/*=================*/
3929
buf_page_t* bpage) /*!< in: pointer to the block in question */
3931
enum buf_io_fix io_type;
3932
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3933
const ibool uncompressed = (buf_page_get_state(bpage)
3934
== BUF_BLOCK_FILE_PAGE);
3936
ut_a(buf_page_in_file(bpage));
3938
/* We do not need protect io_fix here by mutex to read
3939
it because this is the only function where we can change the value
3940
from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
3941
ensures that this is the only thread that handles the i/o for this
3944
io_type = buf_page_get_io_fix(bpage);
3945
ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
3947
if (io_type == BUF_IO_READ) {
3949
ulint read_space_id;
3952
if (buf_page_get_zip_size(bpage)) {
3953
frame = bpage->zip.data;
3954
buf_pool->n_pend_unzip++;
3956
&& !buf_zip_decompress((buf_block_t*) bpage,
3959
buf_pool->n_pend_unzip--;
3962
buf_pool->n_pend_unzip--;
3965
frame = ((buf_block_t*) bpage)->frame;
3968
/* If this page is not uninitialized and not in the
3969
doublewrite buffer, then the page number and space id
3970
should be the same as in block. */
3971
read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
3972
read_space_id = mach_read_from_4(
3973
frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
3975
if (bpage->space == TRX_SYS_SPACE
3976
&& trx_doublewrite_page_inside(bpage->offset)) {
3978
ut_print_timestamp(stderr);
3980
" InnoDB: Error: reading page %lu\n"
3981
"InnoDB: which is in the"
3982
" doublewrite buffer!\n",
3983
(ulong) bpage->offset);
3984
} else if (!read_space_id && !read_page_no) {
3985
/* This is likely an uninitialized page. */
3986
} else if ((bpage->space
3987
&& bpage->space != read_space_id)
3988
|| bpage->offset != read_page_no) {
3989
/* We did not compare space_id to read_space_id
3990
if bpage->space == 0, because the field on the
3991
page may contain garbage in MySQL < 4.1.1,
3992
which only supported bpage->space == 0. */
3994
ut_print_timestamp(stderr);
3996
" InnoDB: Error: space id and page n:o"
3997
" stored in the page\n"
3998
"InnoDB: read in are %lu:%lu,"
3999
" should be %lu:%lu!\n",
4000
(ulong) read_space_id, (ulong) read_page_no,
4001
(ulong) bpage->space,
4002
(ulong) bpage->offset);
4005
/* From version 3.23.38 up we store the page checksum
4006
to the 4 first bytes of the page end lsn field */
4008
if (buf_page_is_corrupted(frame,
4009
buf_page_get_zip_size(bpage))) {
4012
"InnoDB: Database page corruption on disk"
4014
"InnoDB: file read of page %lu.\n"
4015
"InnoDB: You may have to recover"
4016
" from a backup.\n",
4017
(ulong) bpage->offset);
4018
buf_page_print(frame, buf_page_get_zip_size(bpage));
4020
"InnoDB: Database page corruption on disk"
4022
"InnoDB: file read of page %lu.\n"
4023
"InnoDB: You may have to recover"
4024
" from a backup.\n",
4025
(ulong) bpage->offset);
4026
fputs("InnoDB: It is also possible that"
4028
"InnoDB: system has corrupted its"
4030
"InnoDB: and rebooting your computer"
4033
"InnoDB: If the corrupt page is an index page\n"
4034
"InnoDB: you can also try to"
4035
" fix the corruption\n"
4036
"InnoDB: by dumping, dropping,"
4037
" and reimporting\n"
4038
"InnoDB: the corrupt table."
4039
" You can use CHECK\n"
4040
"InnoDB: TABLE to scan your"
4041
" table for corruption.\n"
4043
REFMAN "forcing-recovery.html\n"
4044
"InnoDB: about forcing recovery.\n", stderr);
4046
if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
4047
fputs("InnoDB: Ending processing because of"
4048
" a corrupt database page.\n",
4054
if (recv_recovery_is_on()) {
4055
/* Pages must be uncompressed for crash recovery. */
4057
recv_recover_page(TRUE, (buf_block_t*) bpage);
4060
if (uncompressed && !recv_no_ibuf_operations) {
4061
ibuf_merge_or_delete_for_page(
4062
(buf_block_t*) bpage, bpage->space,
4063
bpage->offset, buf_page_get_zip_size(bpage),
4068
buf_pool_mutex_enter(buf_pool);
4069
mutex_enter(buf_page_get_mutex(bpage));
4071
#ifdef UNIV_IBUF_COUNT_DEBUG
4072
if (io_type == BUF_IO_WRITE || uncompressed) {
4073
/* For BUF_IO_READ of compressed-only blocks, the
4074
buffered operations will be merged by buf_page_get_gen()
4075
after the block has been uncompressed. */
4076
ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
4079
/* Because this thread which does the unlocking is not the same that
4080
did the locking, we use a pass value != 0 in unlock, which simply
4081
removes the newest lock debug record, without checking the thread
4084
buf_page_set_io_fix(bpage, BUF_IO_NONE);
4088
/* NOTE that the call to ibuf may have moved the ownership of
4089
the x-latch to this OS thread: do not let this confuse you in
4092
ut_ad(buf_pool->n_pend_reads > 0);
4093
buf_pool->n_pend_reads--;
4094
buf_pool->stat.n_pages_read++;
4097
rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
4104
/* Write means a flush operation: call the completion
4105
routine in the flush system */
4107
buf_flush_write_complete(bpage);
4110
rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
4114
buf_pool->stat.n_pages_written++;
4123
if (buf_debug_prints) {
4124
fprintf(stderr, "Has %s page space %lu page no %lu\n",
4125
io_type == BUF_IO_READ ? "read" : "written",
4126
(ulong) buf_page_get_space(bpage),
4127
(ulong) buf_page_get_page_no(bpage));
4129
#endif /* UNIV_DEBUG */
4131
mutex_exit(buf_page_get_mutex(bpage));
4132
buf_pool_mutex_exit(buf_pool);
4135
/*********************************************************************//**
4136
Asserts that all file pages in the buffer are in a replaceable state.
4140
buf_all_freed_instance(
4141
/*===================*/
4142
buf_pool_t* buf_pool) /*!< in: buffer pool instancce */
4149
buf_pool_mutex_enter(buf_pool);
4151
chunk = buf_pool->chunks;
4153
for (i = buf_pool->n_chunks; i--; chunk++) {
4155
const buf_block_t* block = buf_chunk_not_freed(chunk);
4157
if (UNIV_LIKELY_NULL(block)) {
4159
"Page %lu %lu still fixed or dirty\n",
4160
(ulong) block->page.space,
4161
(ulong) block->page.offset);
4166
buf_pool_mutex_exit(buf_pool);
4171
/*********************************************************************//**
4172
Invalidates file pages in one buffer pool instance */
4175
buf_pool_invalidate_instance(
4176
/*=========================*/
4177
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
4182
buf_pool_mutex_enter(buf_pool);
4184
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
4186
/* As this function is called during startup and
4187
during redo application phase during recovery, InnoDB
4188
is single threaded (apart from IO helper threads) at
4189
this stage. No new write batch can be in intialization
4190
stage at this point. */
4191
ut_ad(buf_pool->init_flush[i] == FALSE);
4193
/* However, it is possible that a write batch that has
4194
been posted earlier is still not complete. For buffer
4195
pool invalidation to proceed we must ensure there is NO
4196
write activity happening. */
4197
if (buf_pool->n_flush[i] > 0) {
4198
buf_pool_mutex_exit(buf_pool);
4199
buf_flush_wait_batch_end(buf_pool, static_cast<buf_flush>(i));
4200
buf_pool_mutex_enter(buf_pool);
4204
buf_pool_mutex_exit(buf_pool);
4206
ut_ad(buf_all_freed_instance(buf_pool));
4211
freed = buf_LRU_search_and_free_block(buf_pool, 100);
4214
buf_pool_mutex_enter(buf_pool);
4216
ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
4217
ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
4219
buf_pool->freed_page_clock = 0;
4220
buf_pool->LRU_old = NULL;
4221
buf_pool->LRU_old_len = 0;
4222
buf_pool->LRU_flush_ended = 0;
4224
memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
4225
buf_refresh_io_stats(buf_pool);
4227
buf_pool_mutex_exit(buf_pool);
4230
/*********************************************************************//**
4231
Invalidates the file pages in the buffer pool when an archive recovery is
4232
completed. All the file pages buffered must be in a replaceable state when
4233
this function is called: not latched and not modified. */
4236
buf_pool_invalidate(void)
4237
/*=====================*/
4241
for (i = 0; i < srv_buf_pool_instances; i++) {
4242
buf_pool_invalidate_instance(buf_pool_from_array(i));
4246
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4247
/*********************************************************************//**
4248
Validates data in one buffer pool instance
4252
buf_pool_validate_instance(
4253
/*=======================*/
4254
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
4259
ulint n_single_flush = 0;
4260
ulint n_lru_flush = 0;
4261
ulint n_list_flush = 0;
4269
buf_pool_mutex_enter(buf_pool);
4271
chunk = buf_pool->chunks;
4273
/* Check the uncompressed blocks. */
4275
for (i = buf_pool->n_chunks; i--; chunk++) {
4278
buf_block_t* block = chunk->blocks;
4280
for (j = chunk->size; j--; block++) {
4282
mutex_enter(&block->mutex);
4284
switch (buf_block_get_state(block)) {
4285
case BUF_BLOCK_ZIP_FREE:
4286
case BUF_BLOCK_ZIP_PAGE:
4287
case BUF_BLOCK_ZIP_DIRTY:
4288
/* These should only occur on
4289
zip_clean, zip_free[], or flush_list. */
4293
case BUF_BLOCK_FILE_PAGE:
4294
ut_a(buf_page_hash_get(buf_pool,
4295
buf_block_get_space(
4297
buf_block_get_page_no(
4301
#ifdef UNIV_IBUF_COUNT_DEBUG
4302
ut_a(buf_page_get_io_fix(&block->page)
4304
|| !ibuf_count_get(buf_block_get_space(
4306
buf_block_get_page_no(
4309
switch (buf_page_get_io_fix(&block->page)) {
4314
switch (buf_page_get_flush_type(
4318
ut_a(rw_lock_is_locked(
4322
case BUF_FLUSH_LIST:
4325
case BUF_FLUSH_SINGLE_PAGE:
4336
ut_a(rw_lock_is_locked(&block->lock,
4344
case BUF_BLOCK_NOT_USED:
4348
case BUF_BLOCK_READY_FOR_USE:
4349
case BUF_BLOCK_MEMORY:
4350
case BUF_BLOCK_REMOVE_HASH:
4355
mutex_exit(&block->mutex);
4359
mutex_enter(&buf_pool->zip_mutex);
4361
/* Check clean compressed-only blocks. */
4363
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4364
b = UT_LIST_GET_NEXT(list, b)) {
4365
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
4366
switch (buf_page_get_io_fix(b)) {
4368
/* All clean blocks should be I/O-unfixed. */
4371
/* In buf_LRU_free_block(), we temporarily set
4372
b->io_fix = BUF_IO_READ for a newly allocated
4373
control block in order to prevent
4374
buf_page_get_gen() from decompressing the block. */
4381
/* It is OK to read oldest_modification here because
4382
we have acquired buf_pool->zip_mutex above which acts
4383
as the 'block->mutex' for these bpages. */
4384
ut_a(!b->oldest_modification);
4385
ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4391
/* Check dirty blocks. */
4393
buf_flush_list_mutex_enter(buf_pool);
4394
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4395
b = UT_LIST_GET_NEXT(list, b)) {
4396
ut_ad(b->in_flush_list);
4397
ut_a(b->oldest_modification);
4400
switch (buf_page_get_state(b)) {
4401
case BUF_BLOCK_ZIP_DIRTY:
4404
switch (buf_page_get_io_fix(b)) {
4409
switch (buf_page_get_flush_type(b)) {
4413
case BUF_FLUSH_LIST:
4416
case BUF_FLUSH_SINGLE_PAGE:
4425
case BUF_BLOCK_FILE_PAGE:
4426
/* uncompressed page */
4428
case BUF_BLOCK_ZIP_FREE:
4429
case BUF_BLOCK_ZIP_PAGE:
4430
case BUF_BLOCK_NOT_USED:
4431
case BUF_BLOCK_READY_FOR_USE:
4432
case BUF_BLOCK_MEMORY:
4433
case BUF_BLOCK_REMOVE_HASH:
4437
ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4440
ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
4442
buf_flush_list_mutex_exit(buf_pool);
4444
mutex_exit(&buf_pool->zip_mutex);
4446
if (n_lru + n_free > buf_pool->curr_size + n_zip) {
4447
fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
4448
(ulong) n_lru, (ulong) n_free,
4449
(ulong) buf_pool->curr_size, (ulong) n_zip);
4453
ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
4454
if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
4455
fprintf(stderr, "Free list len %lu, free blocks %lu\n",
4456
(ulong) UT_LIST_GET_LEN(buf_pool->free),
4461
ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
4462
ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
4463
ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
4465
buf_pool_mutex_exit(buf_pool);
4467
ut_a(buf_LRU_validate());
4468
ut_a(buf_flush_validate(buf_pool));
4473
/*********************************************************************//**
4474
Validates the buffer buf_pool data structure.
4483
for (i = 0; i < srv_buf_pool_instances; i++) {
4484
buf_pool_t* buf_pool;
4486
buf_pool = buf_pool_from_array(i);
4488
buf_pool_validate_instance(buf_pool);
4493
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
4495
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4496
/*********************************************************************//**
4497
Prints info of the buffer buf_pool data structure for one instance. */
4502
buf_pool_t* buf_pool)
4504
index_id_t* index_ids;
4512
dict_index_t* index;
4516
size = buf_pool->curr_size;
4518
index_ids = mem_alloc(size * sizeof *index_ids);
4519
counts = mem_alloc(sizeof(ulint) * size);
4521
buf_pool_mutex_enter(buf_pool);
4522
buf_flush_list_mutex_enter(buf_pool);
4525
"buf_pool size %lu\n"
4526
"database pages %lu\n"
4528
"modified database pages %lu\n"
4529
"n pending decompressions %lu\n"
4530
"n pending reads %lu\n"
4531
"n pending flush LRU %lu list %lu single page %lu\n"
4532
"pages made young %lu, not young %lu\n"
4533
"pages read %lu, created %lu, written %lu\n",
4535
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4536
(ulong) UT_LIST_GET_LEN(buf_pool->free),
4537
(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4538
(ulong) buf_pool->n_pend_unzip,
4539
(ulong) buf_pool->n_pend_reads,
4540
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
4541
(ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
4542
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
4543
(ulong) buf_pool->stat.n_pages_made_young,
4544
(ulong) buf_pool->stat.n_pages_not_made_young,
4545
(ulong) buf_pool->stat.n_pages_read,
4546
(ulong) buf_pool->stat.n_pages_created,
4547
(ulong) buf_pool->stat.n_pages_written);
4549
buf_flush_list_mutex_exit(buf_pool);
4551
/* Count the number of blocks belonging to each index in the buffer */
4555
chunk = buf_pool->chunks;
4557
for (i = buf_pool->n_chunks; i--; chunk++) {
4558
buf_block_t* block = chunk->blocks;
4559
ulint n_blocks = chunk->size;
4561
for (; n_blocks--; block++) {
4562
const buf_frame_t* frame = block->frame;
4564
if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
4566
id = btr_page_get_index_id(frame);
4568
/* Look for the id in the index_ids array */
4571
while (j < n_found) {
4573
if (index_ids[j] == id) {
4590
buf_pool_mutex_exit(buf_pool);
4592
for (i = 0; i < n_found; i++) {
4593
index = dict_index_get_if_in_cache(index_ids[i]);
4596
"Block count for index %llu in buffer is about %lu",
4597
(ullint) index_ids[i],
4602
dict_index_name_print(stderr, NULL, index);
4608
mem_free(index_ids);
4611
ut_a(buf_pool_validate_instance(buf_pool));
4614
/*********************************************************************//**
4615
Prints info of the buffer buf_pool data structure. */
4623
for (i = 0; i < srv_buf_pool_instances; i++) {
4624
buf_pool_t* buf_pool;
4626
buf_pool = buf_pool_from_array(i);
4627
buf_print_instance(buf_pool);
4630
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
4633
/*********************************************************************//**
4634
Returns the number of latched pages in the buffer pool.
4635
@return number of latched pages */
4638
buf_get_latched_pages_number_instance(
4639
/*==================================*/
4640
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
4645
ulint fixed_pages_number = 0;
4647
buf_pool_mutex_enter(buf_pool);
4649
chunk = buf_pool->chunks;
4651
for (i = buf_pool->n_chunks; i--; chunk++) {
4655
block = chunk->blocks;
4657
for (j = chunk->size; j--; block++) {
4658
if (buf_block_get_state(block)
4659
!= BUF_BLOCK_FILE_PAGE) {
4664
mutex_enter(&block->mutex);
4666
if (block->page.buf_fix_count != 0
4667
|| buf_page_get_io_fix(&block->page)
4669
fixed_pages_number++;
4672
mutex_exit(&block->mutex);
4676
mutex_enter(&buf_pool->zip_mutex);
4678
/* Traverse the lists of clean and dirty compressed-only blocks. */
4680
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4681
b = UT_LIST_GET_NEXT(list, b)) {
4682
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
4683
ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
4685
if (b->buf_fix_count != 0
4686
|| buf_page_get_io_fix(b) != BUF_IO_NONE) {
4687
fixed_pages_number++;
4691
buf_flush_list_mutex_enter(buf_pool);
4692
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4693
b = UT_LIST_GET_NEXT(list, b)) {
4694
ut_ad(b->in_flush_list);
4696
switch (buf_page_get_state(b)) {
4697
case BUF_BLOCK_ZIP_DIRTY:
4698
if (b->buf_fix_count != 0
4699
|| buf_page_get_io_fix(b) != BUF_IO_NONE) {
4700
fixed_pages_number++;
4703
case BUF_BLOCK_FILE_PAGE:
4704
/* uncompressed page */
4706
case BUF_BLOCK_ZIP_FREE:
4707
case BUF_BLOCK_ZIP_PAGE:
4708
case BUF_BLOCK_NOT_USED:
4709
case BUF_BLOCK_READY_FOR_USE:
4710
case BUF_BLOCK_MEMORY:
4711
case BUF_BLOCK_REMOVE_HASH:
4717
buf_flush_list_mutex_exit(buf_pool);
4718
mutex_exit(&buf_pool->zip_mutex);
4719
buf_pool_mutex_exit(buf_pool);
4721
return(fixed_pages_number);
4724
/*********************************************************************//**
4725
Returns the number of latched pages in all the buffer pools.
4726
@return number of latched pages */
4729
buf_get_latched_pages_number(void)
4730
/*==============================*/
4733
ulint total_latched_pages = 0;
4735
for (i = 0; i < srv_buf_pool_instances; i++) {
4736
buf_pool_t* buf_pool;
4738
buf_pool = buf_pool_from_array(i);
4740
total_latched_pages += buf_get_latched_pages_number_instance(
4744
return(total_latched_pages);
4747
#endif /* UNIV_DEBUG */
4749
/*********************************************************************//**
4750
Returns the number of pending buf pool ios.
4751
@return number of pending I/O operations */
4754
buf_get_n_pending_ios(void)
4755
/*=======================*/
4760
for (i = 0; i < srv_buf_pool_instances; i++) {
4761
buf_pool_t* buf_pool;
4763
buf_pool = buf_pool_from_array(i);
4766
buf_pool->n_pend_reads
4767
+ buf_pool->n_flush[BUF_FLUSH_LRU]
4768
+ buf_pool->n_flush[BUF_FLUSH_LIST]
4769
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
4775
/*********************************************************************//**
4776
Returns the ratio in percents of modified pages in the buffer pool /
4777
database pages in the buffer pool.
4778
@return modified page percentage ratio */
4781
buf_get_modified_ratio_pct(void)
4782
/*============================*/
4787
ulint flush_list_len = 0;
4789
buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
4791
ratio = (100 * flush_list_len) / (1 + lru_len + free_len);
4793
/* 1 + is there to avoid division by zero */
4798
/*********************************************************************//**
4799
Prints info of the buffer i/o. */
4802
buf_print_io_instance(
4803
/*==================*/
4804
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
4805
FILE* file) /*!< in/out: buffer where to print */
4807
time_t current_time;
4808
double time_elapsed;
4813
buf_pool_mutex_enter(buf_pool);
4814
buf_flush_list_mutex_enter(buf_pool);
4817
"Buffer pool size %lu\n"
4818
"Free buffers %lu\n"
4819
"Database pages %lu\n"
4820
"Old database pages %lu\n"
4821
"Modified db pages %lu\n"
4822
"Pending reads %lu\n"
4823
"Pending writes: LRU %lu, flush list %lu, single page %lu\n",
4824
(ulong) buf_pool->curr_size,
4825
(ulong) UT_LIST_GET_LEN(buf_pool->free),
4826
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4827
(ulong) buf_pool->LRU_old_len,
4828
(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4829
(ulong) buf_pool->n_pend_reads,
4830
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
4831
+ buf_pool->init_flush[BUF_FLUSH_LRU],
4832
(ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
4833
+ buf_pool->init_flush[BUF_FLUSH_LIST],
4834
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
4836
buf_flush_list_mutex_exit(buf_pool);
4838
current_time = time(NULL);
4839
time_elapsed = 0.001 + difftime(current_time,
4840
buf_pool->last_printout_time);
4843
"Pages made young %lu, not young %lu\n"
4844
"%.2f youngs/s, %.2f non-youngs/s\n"
4845
"Pages read %lu, created %lu, written %lu\n"
4846
"%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
4847
(ulong) buf_pool->stat.n_pages_made_young,
4848
(ulong) buf_pool->stat.n_pages_not_made_young,
4849
(buf_pool->stat.n_pages_made_young
4850
- buf_pool->old_stat.n_pages_made_young)
4852
(buf_pool->stat.n_pages_not_made_young
4853
- buf_pool->old_stat.n_pages_not_made_young)
4855
(ulong) buf_pool->stat.n_pages_read,
4856
(ulong) buf_pool->stat.n_pages_created,
4857
(ulong) buf_pool->stat.n_pages_written,
4858
(buf_pool->stat.n_pages_read
4859
- buf_pool->old_stat.n_pages_read)
4861
(buf_pool->stat.n_pages_created
4862
- buf_pool->old_stat.n_pages_created)
4864
(buf_pool->stat.n_pages_written
4865
- buf_pool->old_stat.n_pages_written)
4868
n_gets_diff = buf_pool->stat.n_page_gets
4869
- buf_pool->old_stat.n_page_gets;
4873
"Buffer pool hit rate %lu / 1000,"
4874
" young-making rate %lu / 1000 not %lu / 1000\n",
4876
(1000 - ((1000 * (buf_pool->stat.n_pages_read
4877
- buf_pool->old_stat.n_pages_read))
4878
/ (buf_pool->stat.n_page_gets
4879
- buf_pool->old_stat.n_page_gets))),
4881
(1000 * (buf_pool->stat.n_pages_made_young
4882
- buf_pool->old_stat.n_pages_made_young)
4885
(1000 * (buf_pool->stat.n_pages_not_made_young
4886
- buf_pool->old_stat.n_pages_not_made_young)
4889
fputs("No buffer pool page gets since the last printout\n",
4893
/* Statistics about read ahead algorithm */
4894
fprintf(file, "Pages read ahead %.2f/s,"
4895
" evicted without access %.2f/s\n",
4896
(buf_pool->stat.n_ra_pages_read
4897
- buf_pool->old_stat.n_ra_pages_read)
4899
(buf_pool->stat.n_ra_pages_evicted
4900
- buf_pool->old_stat.n_ra_pages_evicted)
4903
/* Print some values to help us with visualizing what is
4904
happening with LRU eviction. */
4906
"LRU len: %lu, unzip_LRU len: %lu\n"
4907
"I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
4908
static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->LRU)),
4909
static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->unzip_LRU)),
4910
buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
4911
buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
4913
buf_refresh_io_stats(buf_pool);
4914
buf_pool_mutex_exit(buf_pool);
4917
/*********************************************************************//**
4918
Prints info of the buffer i/o. */
4923
FILE* file) /*!< in/out: buffer where to print */
4927
for (i = 0; i < srv_buf_pool_instances; i++) {
4928
buf_pool_t* buf_pool;
4930
buf_pool = buf_pool_from_array(i);
4931
buf_print_io_instance(buf_pool, file);
4935
/**********************************************************************//**
4936
Refreshes the statistics used to print per-second averages. */
4939
buf_refresh_io_stats(
4940
/*=================*/
4941
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
4943
buf_pool->last_printout_time = ut_time();
4944
buf_pool->old_stat = buf_pool->stat;
4947
/**********************************************************************//**
4948
Refreshes the statistics used to print per-second averages. */
4951
buf_refresh_io_stats_all(void)
4952
/*==========================*/
4956
for (i = 0; i < srv_buf_pool_instances; i++) {
4957
buf_pool_t* buf_pool;
4959
buf_pool = buf_pool_from_array(i);
4961
buf_refresh_io_stats(buf_pool);
4965
/**********************************************************************//**
4966
Check if all pages in all buffer pools are in a replacable state.
4967
@return FALSE if not */
4975
for (i = 0; i < srv_buf_pool_instances; i++) {
4976
buf_pool_t* buf_pool;
4978
buf_pool = buf_pool_from_array(i);
4980
if (!buf_all_freed_instance(buf_pool)) {
4988
/*********************************************************************//**
4989
Checks that there currently are no pending i/o-operations for the buffer
4991
@return TRUE if there is no pending i/o */
4994
buf_pool_check_no_pending_io(void)
4995
/*==============================*/
5000
buf_pool_mutex_enter_all();
5002
for (i = 0; i < srv_buf_pool_instances && ret; i++) {
5003
const buf_pool_t* buf_pool;
5005
buf_pool = buf_pool_from_array(i);
5007
if (buf_pool->n_pend_reads
5008
+ buf_pool->n_flush[BUF_FLUSH_LRU]
5009
+ buf_pool->n_flush[BUF_FLUSH_LIST]
5010
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
5016
buf_pool_mutex_exit_all();
5022
Code currently not used
5023
/*********************************************************************//**
5024
Gets the current length of the free list of buffer blocks.
5025
@return length of the free list */
5028
buf_get_free_list_len(void)
5029
/*=======================*/
5033
buf_pool_mutex_enter(buf_pool);
5035
len = UT_LIST_GET_LEN(buf_pool->free);
5037
buf_pool_mutex_exit(buf_pool);
5043
#else /* !UNIV_HOTBACKUP */
5044
/********************************************************************//**
5045
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
5048
buf_page_init_for_backup_restore(
5049
/*=============================*/
5050
ulint space, /*!< in: space id */
5051
ulint offset, /*!< in: offset of the page within space
5052
in units of a page */
5053
ulint zip_size,/*!< in: compressed page size in bytes
5054
or 0 for uncompressed pages */
5055
buf_block_t* block) /*!< in: block to init */
5057
block->page.state = BUF_BLOCK_FILE_PAGE;
5058
block->page.space = space;
5059
block->page.offset = offset;
5061
page_zip_des_init(&block->page.zip);
5063
/* We assume that block->page.data has been allocated
5064
with zip_size == UNIV_PAGE_SIZE. */
5065
ut_ad(zip_size <= UNIV_PAGE_SIZE);
5066
ut_ad(ut_is_2pow(zip_size));
5067
page_zip_set_size(&block->page.zip, zip_size);
5069
block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
5072
#endif /* !UNIV_HOTBACKUP */