1
/*****************************************************************************
3
Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
4
Copyright (C) 2008, Google Inc.
6
Portions of this file contain modifications contributed and copyrighted by
7
Google, Inc. Those modifications are gratefully acknowledged and are described
8
briefly in the InnoDB documentation. The contributions by Google are
9
incorporated with their permission, and subject to the conditions contained in
10
the file COPYING.Google.
12
This program is free software; you can redistribute it and/or modify it under
13
the terms of the GNU General Public License as published by the Free Software
14
Foundation; version 2 of the License.
16
This program is distributed in the hope that it will be useful, but WITHOUT
17
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
You should have received a copy of the GNU General Public License along with
21
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22
St, Fifth Floor, Boston, MA 02110-1301 USA
24
*****************************************************************************/
26
/**************************************************//**
28
The database buffer buf_pool
30
Created 11/5/1995 Heikki Tuuri
31
*******************************************************/
42
#ifndef UNIV_HOTBACKUP
43
#include "buf0buddy.h"
44
#include "lock0lock.h"
46
#include "ibuf0ibuf.h"
49
#endif /* !UNIV_HOTBACKUP */
51
#include "dict0dict.h"
55
#include <drizzled/errmsg_print.h>
58
IMPLEMENTATION OF THE BUFFER POOL
59
=================================
61
Performance improvement:
62
------------------------
63
Thread scheduling in NT may be so slow that the OS wait mechanism should
64
not be used even in waiting for disk reads to complete.
65
Rather, we should put waiting query threads to the queue of
66
waiting jobs, and let the OS thread do something useful while the i/o
67
is processed. In this way we could remove most OS thread switches in
68
an i/o-intensive benchmark like TPC-C.
70
A possibility is to put a user space thread library between the database
71
and NT. User space thread libraries might be very fast.
73
SQL Server 7.0 can be configured to use 'fibers' which are lightweight
74
threads in NT. These should be studied.
76
Buffer frames and blocks
77
------------------------
78
Following the terminology of Gray and Reuter, we call the memory
79
blocks where file pages are loaded buffer frames. For each buffer
80
frame there is a control block, or shortly, a block, in the buffer
81
control array. The control info which does not need to be stored
82
in the file along with the file page, resides in the control block.
86
The buffer buf_pool contains a single mutex which protects all the
87
control data structures of the buf_pool. The content of a buffer frame is
88
protected by a separate read-write lock in its control block, though.
89
These locks can be locked and unlocked without owning the buf_pool->mutex.
90
The OS events in the buf_pool struct can be waited for without owning the
93
The buf_pool->mutex is a hot-spot in main memory, causing a lot of
94
memory bus traffic on multiprocessor systems when processors
95
alternately access the mutex. On our Pentium, the mutex is accessed
96
maybe every 10 microseconds. We gave up the solution to have mutexes
97
for each control block, for instance, because it seemed to be
100
A solution to reduce mutex contention of the buf_pool->mutex is to
101
create a separate mutex for the page hash table. On Pentium,
102
accessing the hash table takes 2 microseconds, about half
103
of the total buf_pool->mutex hold time.
108
The control block contains, for instance, the bufferfix count
109
which is incremented when a thread wants a file page to be fixed
110
in a buffer frame. The bufferfix operation does not lock the
111
contents of the frame, however. For this purpose, the control
112
block contains a read-write lock.
114
The buffer frames have to be aligned so that the start memory
115
address of a frame is divisible by the universal page size, which
118
We intend to make the buffer buf_pool size on-line reconfigurable,
119
that is, the buf_pool size can be changed without closing the database.
120
Then the database administarator may adjust it to be bigger
121
at night, for example. The control block array must
122
contain enough control blocks for the maximum buffer buf_pool size
123
which is used in the particular database.
124
If the buf_pool size is cut, we exploit the virtual memory mechanism of
125
the OS, and just refrain from using frames at high addresses. Then the OS
126
can swap them to disk.
128
The control blocks containing file pages are put to a hash table
129
according to the file address of the page.
130
We could speed up the access to an individual page by using
131
"pointer swizzling": we could replace the page references on
132
non-leaf index pages by direct pointers to the page, if it exists
133
in the buf_pool. We could make a separate hash table where we could
134
chain all the page references in non-leaf pages residing in the buf_pool,
135
using the page reference as the hash key,
136
and at the time of reading of a page update the pointers accordingly.
137
Drawbacks of this solution are added complexity and,
138
possibly, extra space required on non-leaf pages for memory pointers.
139
A simpler solution is just to speed up the hash table mechanism
140
in the database, using tables whose size is a power of 2.
145
There are several lists of control blocks.
147
The free list (buf_pool->free) contains blocks which are currently not
150
The common LRU list contains all the blocks holding a file page
151
except those for which the bufferfix count is non-zero.
152
The pages are in the LRU list roughly in the order of the last
153
access to the page, so that the oldest pages are at the end of the
154
list. We also keep a pointer to near the end of the LRU list,
155
which we can use when we want to artificially age a page in the
156
buf_pool. This is used if we know that some page is not needed
157
again for some time: we insert the block right after the pointer,
158
causing it to be replaced sooner than would normally be the case.
159
Currently this aging mechanism is used for read-ahead mechanism
160
of pages, and it can also be used when there is a scan of a full
161
table which cannot fit in the memory. Putting the pages near the
162
end of the LRU list, we make sure that most of the buf_pool stays
163
in the main memory, undisturbed.
165
The unzip_LRU list contains a subset of the common LRU list. The
166
blocks on the unzip_LRU list hold a compressed file page and the
167
corresponding uncompressed page frame. A block is in unzip_LRU if and
168
only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
169
holds. The blocks in unzip_LRU will be in same order as they are in
170
the common LRU list. That is, each manipulation of the common LRU
171
list will result in the same manipulation of the unzip_LRU list.
173
The chain of modified blocks (buf_pool->flush_list) contains the blocks
174
holding file pages that have been modified in the memory
175
but not written to disk yet. The block with the oldest modification
176
which has not yet been written to disk is at the end of the chain.
177
The access to this list is protected by buf_pool->flush_list_mutex.
179
The chain of unmodified compressed blocks (buf_pool->zip_clean)
180
contains the control blocks (buf_page_t) of those compressed pages
181
that are not in buf_pool->flush_list and for which no uncompressed
182
page has been allocated in the buffer pool. The control blocks for
183
uncompressed pages are accessible via buf_block_t objects that are
184
reachable via buf_pool->chunks[].
186
The chains of free memory blocks (buf_pool->zip_free[]) are used by
187
the buddy allocator (buf0buddy.c) to keep track of currently unused
188
memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2. These
189
blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
190
BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
191
pool. The buddy allocator is solely used for allocating control
192
blocks for compressed pages (buf_page_t) and compressed page frames.
197
First, a victim block for replacement has to be found in the
198
buf_pool. It is taken from the free list or searched for from the
199
end of the LRU-list. An exclusive lock is reserved for the frame,
200
the io_fix field is set in the block fixing the block in buf_pool,
201
and the io-operation for loading the page is queued. The io-handler thread
202
releases the X-lock on the frame and resets the io_fix field
203
when the io operation completes.
205
A thread may request the above operation using the function
206
buf_page_get(). It may then continue to request a lock on the frame.
207
The lock is granted when the io-handler releases the x-lock.
212
The read-ahead mechanism is intended to be intelligent and
213
isolated from the semantically higher levels of the database
214
index management. From the higher level we only need the
215
information if a file page has a natural successor or
216
predecessor page. On the leaf level of a B-tree index,
217
these are the next and previous pages in the natural
220
Let us first explain the read-ahead mechanism when the leafs
221
of a B-tree are scanned in an ascending or descending order.
222
When a read page is the first time referenced in the buf_pool,
223
the buffer manager checks if it is at the border of a so-called
224
linear read-ahead area. The tablespace is divided into these
225
areas of size 64 blocks, for example. So if the page is at the
226
border of such an area, the read-ahead mechanism checks if
227
all the other blocks in the area have been accessed in an
228
ascending or descending order. If this is the case, the system
229
looks at the natural successor or predecessor of the page,
230
checks if that is at the border of another area, and in this case
231
issues read-requests for all the pages in that area. Maybe
232
we could relax the condition that all the pages in the area
233
have to be accessed: if data is deleted from a table, there may
234
appear holes of unused pages in the area.
236
A different read-ahead mechanism is used when there appears
237
to be a random access pattern to a file.
238
If a new page is referenced in the buf_pool, and several pages
239
of its random access area (for instance, 32 consecutive pages
240
in a tablespace) have recently been referenced, we may predict
241
that the whole area may be needed in the near future, and issue
242
the read requests for the whole area.
245
#ifndef UNIV_HOTBACKUP
246
/** Value in microseconds */
247
static const int WAIT_FOR_READ = 5000;
248
/** Number of attemtps made to read in a page in the buffer pool */
249
static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
251
/** The buffer pools of the database */
252
UNIV_INTERN buf_pool_t* buf_pool_ptr;
254
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
255
static ulint buf_dbg_counter = 0; /*!< This is used to insert validation
256
operations in execution in the
258
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
260
/** If this is set TRUE, the program prints info whenever
261
read-ahead or flush occurs */
262
UNIV_INTERN ibool buf_debug_prints = FALSE;
263
#endif /* UNIV_DEBUG */
265
#ifdef UNIV_PFS_RWLOCK
266
/* Keys to register buffer block related rwlocks and mutexes with
267
performance schema */
268
UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
269
# ifdef UNIV_SYNC_DEBUG
270
UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
271
# endif /* UNIV_SYNC_DEBUG */
272
#endif /* UNIV_PFS_RWLOCK */
274
#ifdef UNIV_PFS_MUTEX
275
UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
276
UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
277
UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
278
UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
279
#endif /* UNIV_PFS_MUTEX */
281
#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
282
# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
284
/* Buffer block mutexes and rwlocks can be registered
285
in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
286
is defined, register buffer block mutex and rwlock
287
in one group after their initialization. */
288
# define PFS_GROUP_BUFFER_SYNC
290
/* This define caps the number of mutexes/rwlocks can
291
be registered with performance schema. Developers can
292
modify this define if necessary. Please note, this would
293
be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
294
# define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER ULINT_MAX
296
# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
297
#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
299
/** A chunk of buffers. The buffer pool is allocated in chunks. */
300
struct buf_chunk_struct{
301
ulint mem_size; /*!< allocated size of the chunk */
302
ulint size; /*!< size of frames[] and blocks[] */
303
void* mem; /*!< pointer to the memory area which
304
was allocated for the frames */
305
buf_block_t* blocks; /*!< array of buffer control blocks */
307
#endif /* !UNIV_HOTBACKUP */
309
/********************************************************************//**
310
Gets the smallest oldest_modification lsn for any page in the pool. Returns
311
zero if all modified pages have been flushed to disk.
312
@return oldest modification in pool, zero if none */
315
buf_pool_get_oldest_modification(void)
316
/*==================================*/
321
ib_uint64_t oldest_lsn = 0;
323
/* When we traverse all the flush lists we don't want another
324
thread to add a dirty page to any flush list. */
325
log_flush_order_mutex_enter();
327
for (i = 0; i < srv_buf_pool_instances; i++) {
328
buf_pool_t* buf_pool;
330
buf_pool = buf_pool_from_array(i);
332
buf_flush_list_mutex_enter(buf_pool);
334
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
337
ut_ad(bpage->in_flush_list);
338
lsn = bpage->oldest_modification;
341
buf_flush_list_mutex_exit(buf_pool);
343
if (!oldest_lsn || oldest_lsn > lsn) {
348
log_flush_order_mutex_exit();
350
/* The returned answer may be out of date: the flush_list can
351
change after the mutex has been released. */
356
/********************************************************************//**
357
Get total buffer pool statistics. */
360
buf_get_total_list_len(
361
/*===================*/
362
ulint* LRU_len, /*!< out: length of all LRU lists */
363
ulint* free_len, /*!< out: length of all free lists */
364
ulint* flush_list_len) /*!< out: length of all flush lists */
372
for (i = 0; i < srv_buf_pool_instances; i++) {
373
buf_pool_t* buf_pool;
375
buf_pool = buf_pool_from_array(i);
376
*LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
377
*free_len += UT_LIST_GET_LEN(buf_pool->free);
378
*flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
382
/********************************************************************//**
383
Get total buffer pool statistics. */
388
buf_pool_stat_t* tot_stat) /*!< out: buffer pool stats */
392
memset(tot_stat, 0, sizeof(*tot_stat));
394
for (i = 0; i < srv_buf_pool_instances; i++) {
395
buf_pool_stat_t*buf_stat;
396
buf_pool_t* buf_pool;
398
buf_pool = buf_pool_from_array(i);
400
buf_stat = &buf_pool->stat;
401
tot_stat->n_page_gets += buf_stat->n_page_gets;
402
tot_stat->n_pages_read += buf_stat->n_pages_read;
403
tot_stat->n_pages_written += buf_stat->n_pages_written;
404
tot_stat->n_pages_created += buf_stat->n_pages_created;
405
tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
406
tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
407
tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
409
tot_stat->n_pages_not_made_young +=
410
buf_stat->n_pages_not_made_young;
414
/********************************************************************//**
415
Allocates a buffer block.
416
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
421
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
422
ulint zip_size) /*!< in: compressed page size in bytes,
423
or 0 if uncompressed tablespace */
427
static ulint buf_pool_index;
429
if (buf_pool == NULL) {
430
/* We are allocating memory from any buffer pool, ensure
431
we spread the grace on all buffer pool instances. */
432
index = buf_pool_index++ % srv_buf_pool_instances;
433
buf_pool = buf_pool_from_array(index);
436
block = buf_LRU_get_free_block(buf_pool, zip_size);
438
buf_block_set_state(block, BUF_BLOCK_MEMORY);
443
/********************************************************************//**
444
Calculates a page checksum which is stored to the page when it is written
445
to a file. Note that we must be careful to calculate the same value on
446
32-bit and 64-bit architectures.
450
buf_calc_page_new_checksum(
451
/*=======================*/
452
const byte* page) /*!< in: buffer page */
456
/* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
457
..._ARCH_LOG_NO, are written outside the buffer pool to the first
458
pages of data files, we have to skip them in the page checksum
460
We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
461
checksum is stored, and also the last 8 bytes of page because
462
there we store the old formula checksum. */
464
checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
465
FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
466
+ ut_fold_binary(page + FIL_PAGE_DATA,
467
UNIV_PAGE_SIZE - FIL_PAGE_DATA
468
- FIL_PAGE_END_LSN_OLD_CHKSUM);
469
checksum = checksum & 0xFFFFFFFFUL;
474
/********************************************************************//**
475
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
476
looked at the first few bytes of the page. This calculates that old
478
NOTE: we must first store the new formula checksum to
479
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
480
because this takes that field as an input!
484
buf_calc_page_old_checksum(
485
/*=======================*/
486
const byte* page) /*!< in: buffer page */
490
checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
492
checksum = checksum & 0xFFFFFFFFUL;
497
/********************************************************************//**
498
Checks if a page is corrupt.
499
@return TRUE if corrupted */
502
buf_page_is_corrupted(
503
/*==================*/
504
const byte* read_buf, /*!< in: a database page */
505
ulint zip_size) /*!< in: size of compressed page;
506
0 for uncompressed pages */
508
ulint checksum_field;
509
ulint old_checksum_field;
511
if (UNIV_LIKELY(!zip_size)
512
&& memcmp(read_buf + FIL_PAGE_LSN + 4,
513
read_buf + UNIV_PAGE_SIZE
514
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
516
/* Stored log sequence numbers at the start and the end
517
of page do not match */
522
#ifndef UNIV_HOTBACKUP
523
if (recv_lsn_checks_on) {
524
ib_uint64_t current_lsn;
526
if (log_peek_lsn(¤t_lsn)
529
< mach_read_from_8(read_buf + FIL_PAGE_LSN))) {
530
ut_print_timestamp(stderr);
532
drizzled::errmsg_printf(drizzled::error::INFO,
533
"InnoDB: Error: page %lu log sequence number %"PRIu64". "
534
"InnoDB: is in the future! Current system log sequence number %"PRIu64". "
535
"Your database may be corrupt or you may have copied the InnoDB tablespace but not the InnoDB log files. See "
536
" " REFMAN "forcing-recovery.html for more information. ",
537
(ulong) mach_read_from_4(read_buf
539
mach_read_from_8(read_buf + FIL_PAGE_LSN),
545
/* If we use checksums validation, make additional check before
546
returning TRUE to ensure that the checksum is not equal to
547
BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
548
disabled. Otherwise, skip checksum calculation and return FALSE */
550
if (UNIV_LIKELY(srv_use_checksums)) {
551
checksum_field = mach_read_from_4(read_buf
552
+ FIL_PAGE_SPACE_OR_CHKSUM);
554
if (UNIV_UNLIKELY(zip_size)) {
555
return(checksum_field != BUF_NO_CHECKSUM_MAGIC
557
!= page_zip_calc_checksum(read_buf, zip_size));
560
old_checksum_field = mach_read_from_4(
561
read_buf + UNIV_PAGE_SIZE
562
- FIL_PAGE_END_LSN_OLD_CHKSUM);
564
/* There are 2 valid formulas for old_checksum_field:
566
1. Very old versions of InnoDB only stored 8 byte lsn to the
567
start and the end of the page.
569
2. Newer InnoDB versions store the old formula checksum
572
if (old_checksum_field != mach_read_from_4(read_buf
574
&& old_checksum_field != BUF_NO_CHECKSUM_MAGIC
575
&& old_checksum_field
576
!= buf_calc_page_old_checksum(read_buf)) {
581
/* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
582
(always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
584
if (checksum_field != 0
585
&& checksum_field != BUF_NO_CHECKSUM_MAGIC
587
!= buf_calc_page_new_checksum(read_buf)) {
596
/********************************************************************//**
597
Prints a page to stderr. */
602
const byte* read_buf, /*!< in: a database page */
603
ulint zip_size) /*!< in: compressed page size, or
604
0 for uncompressed pages */
606
#ifndef UNIV_HOTBACKUP
608
#endif /* !UNIV_HOTBACKUP */
611
ulint size = zip_size;
614
size = UNIV_PAGE_SIZE;
617
ut_print_timestamp(stderr);
618
fprintf(stderr, " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
620
ut_print_buf(stderr, read_buf, size);
621
fputs("\nInnoDB: End of page dump\n", stderr);
624
/* Print compressed page. */
626
switch (fil_page_get_type(read_buf)) {
627
case FIL_PAGE_TYPE_ZBLOB:
628
case FIL_PAGE_TYPE_ZBLOB2:
629
checksum = srv_use_checksums
630
? page_zip_calc_checksum(read_buf, zip_size)
631
: BUF_NO_CHECKSUM_MAGIC;
632
ut_print_timestamp(stderr);
634
" InnoDB: Compressed BLOB page"
635
" checksum %lu, stored %lu\n"
636
"InnoDB: Page lsn %lu %lu\n"
637
"InnoDB: Page number (if stored"
638
" to page already) %lu,\n"
639
"InnoDB: space id (if stored"
640
" to page already) %lu\n",
642
(ulong) mach_read_from_4(
643
read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
644
(ulong) mach_read_from_4(
645
read_buf + FIL_PAGE_LSN),
646
(ulong) mach_read_from_4(
647
read_buf + (FIL_PAGE_LSN + 4)),
648
(ulong) mach_read_from_4(
649
read_buf + FIL_PAGE_OFFSET),
650
(ulong) mach_read_from_4(
652
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
655
ut_print_timestamp(stderr);
657
" InnoDB: unknown page type %lu,"
658
" assuming FIL_PAGE_INDEX\n",
659
fil_page_get_type(read_buf));
662
checksum = srv_use_checksums
663
? page_zip_calc_checksum(read_buf, zip_size)
664
: BUF_NO_CHECKSUM_MAGIC;
666
ut_print_timestamp(stderr);
668
" InnoDB: Compressed page checksum %lu,"
670
"InnoDB: Page lsn %lu %lu\n"
671
"InnoDB: Page number (if stored"
672
" to page already) %lu,\n"
673
"InnoDB: space id (if stored"
674
" to page already) %lu\n",
676
(ulong) mach_read_from_4(
677
read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
678
(ulong) mach_read_from_4(
679
read_buf + FIL_PAGE_LSN),
680
(ulong) mach_read_from_4(
681
read_buf + (FIL_PAGE_LSN + 4)),
682
(ulong) mach_read_from_4(
683
read_buf + FIL_PAGE_OFFSET),
684
(ulong) mach_read_from_4(
686
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
688
case FIL_PAGE_TYPE_XDES:
689
/* This is an uncompressed page. */
694
checksum = srv_use_checksums
695
? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
696
old_checksum = srv_use_checksums
697
? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
699
ut_print_timestamp(stderr);
701
" InnoDB: Page checksum %lu, prior-to-4.0.14-form"
703
"InnoDB: stored checksum %lu, prior-to-4.0.14-form"
704
" stored checksum %lu\n"
705
"InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
707
"InnoDB: Page number (if stored to page already) %lu,\n"
708
"InnoDB: space id (if created with >= MySQL-4.1.1"
709
" and stored already) %lu\n",
710
(ulong) checksum, (ulong) old_checksum,
711
(ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
712
(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
713
- FIL_PAGE_END_LSN_OLD_CHKSUM),
714
(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
715
(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
716
(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
717
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
718
(ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
719
(ulong) mach_read_from_4(read_buf
720
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
722
#ifndef UNIV_HOTBACKUP
723
if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
724
== TRX_UNDO_INSERT) {
726
"InnoDB: Page may be an insert undo log page\n");
727
} else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
728
+ TRX_UNDO_PAGE_TYPE)
729
== TRX_UNDO_UPDATE) {
731
"InnoDB: Page may be an update undo log page\n");
733
#endif /* !UNIV_HOTBACKUP */
735
switch (fil_page_get_type(read_buf)) {
738
index_id = btr_page_get_index_id(read_buf);
740
"InnoDB: Page may be an index page where"
741
" index id is %llu\n",
743
#ifndef UNIV_HOTBACKUP
744
index = dict_index_find_on_id_low(index_id);
746
fputs("InnoDB: (", stderr);
747
dict_index_name_print(stderr, NULL, index);
748
fputs(")\n", stderr);
750
#endif /* !UNIV_HOTBACKUP */
753
fputs("InnoDB: Page may be an 'inode' page\n", stderr);
755
case FIL_PAGE_IBUF_FREE_LIST:
756
fputs("InnoDB: Page may be an insert buffer free list page\n",
759
case FIL_PAGE_TYPE_ALLOCATED:
760
fputs("InnoDB: Page may be a freshly allocated page\n",
763
case FIL_PAGE_IBUF_BITMAP:
764
fputs("InnoDB: Page may be an insert buffer bitmap page\n",
767
case FIL_PAGE_TYPE_SYS:
768
fputs("InnoDB: Page may be a system page\n",
771
case FIL_PAGE_TYPE_TRX_SYS:
772
fputs("InnoDB: Page may be a transaction system page\n",
775
case FIL_PAGE_TYPE_FSP_HDR:
776
fputs("InnoDB: Page may be a file space header page\n",
779
case FIL_PAGE_TYPE_XDES:
780
fputs("InnoDB: Page may be an extent descriptor page\n",
783
case FIL_PAGE_TYPE_BLOB:
784
fputs("InnoDB: Page may be a BLOB page\n",
787
case FIL_PAGE_TYPE_ZBLOB:
788
case FIL_PAGE_TYPE_ZBLOB2:
789
fputs("InnoDB: Page may be a compressed BLOB page\n",
795
#ifndef UNIV_HOTBACKUP
797
# ifdef PFS_GROUP_BUFFER_SYNC
798
/********************************************************************//**
799
This function registers mutexes and rwlocks in buffer blocks with
800
performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
801
defined to be a value less than chunk->size, then only mutexes
802
and rwlocks in the first PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER
803
blocks are registered. */
806
pfs_register_buffer_block(
807
/*======================*/
808
buf_chunk_t* chunk) /*!< in/out: chunk of buffers */
811
ulint num_to_register;
814
block = chunk->blocks;
816
num_to_register = ut_min(chunk->size,
817
PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
819
for (i = 0; i < num_to_register; i++) {
823
# ifdef UNIV_PFS_MUTEX
824
mutex = &block->mutex;
825
ut_a(!mutex->pfs_psi);
826
mutex->pfs_psi = (PSI_server)
827
? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
829
# endif /* UNIV_PFS_MUTEX */
831
# ifdef UNIV_PFS_RWLOCK
832
rwlock = &block->lock;
833
ut_a(!rwlock->pfs_psi);
834
rwlock->pfs_psi = (PSI_server)
835
? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
837
# endif /* UNIV_PFS_RWLOCK */
841
# endif /* PFS_GROUP_BUFFER_SYNC */
843
/********************************************************************//**
844
Initializes a buffer control block when the buf_pool is created. */
849
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
850
buf_block_t* block, /*!< in: pointer to control block */
851
byte* frame) /*!< in: pointer to buffer frame */
853
UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
855
block->frame = frame;
857
block->page.buf_pool_index = buf_pool_index(buf_pool);
858
block->page.state = BUF_BLOCK_NOT_USED;
859
block->page.buf_fix_count = 0;
860
block->page.io_fix = BUF_IO_NONE;
862
block->modify_clock = 0;
864
#ifdef UNIV_DEBUG_FILE_ACCESSES
865
block->page.file_page_was_freed = FALSE;
866
#endif /* UNIV_DEBUG_FILE_ACCESSES */
868
block->check_index_page_at_flush = FALSE;
871
block->is_hashed = FALSE;
874
block->page.in_page_hash = FALSE;
875
block->page.in_zip_hash = FALSE;
876
block->page.in_flush_list = FALSE;
877
block->page.in_free_list = FALSE;
878
block->page.in_LRU_list = FALSE;
879
block->in_unzip_LRU_list = FALSE;
880
#endif /* UNIV_DEBUG */
881
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
882
block->n_pointers = 0;
883
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
884
page_zip_des_init(&block->page.zip);
886
#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
887
/* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
888
of buffer block mutex/rwlock with performance schema. If
889
PFS_GROUP_BUFFER_SYNC is defined, skip the registration
890
since buffer block mutex/rwlock will be registered later in
891
pfs_register_buffer_block() */
893
mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
894
rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
895
#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
896
mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
897
rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
898
#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
900
ut_ad(rw_lock_validate(&(block->lock)));
902
#ifdef UNIV_SYNC_DEBUG
903
rw_lock_create(buf_block_debug_latch_key,
904
&block->debug_latch, SYNC_NO_ORDER_CHECK);
905
#endif /* UNIV_SYNC_DEBUG */
908
/********************************************************************//**
909
Allocates a chunk of buffer frames.
910
@return chunk, or NULL on failure */
915
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
916
buf_chunk_t* chunk, /*!< out: chunk of buffers */
917
ulint mem_size) /*!< in: requested size in bytes */
923
/* Round down to a multiple of page size,
924
although it already should be. */
925
mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
926
/* Reserve space for the block descriptors. */
927
mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
928
+ (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
930
chunk->mem_size = mem_size;
931
chunk->mem = os_mem_alloc_large(&chunk->mem_size);
933
if (UNIV_UNLIKELY(chunk->mem == NULL)) {
938
/* Allocate the block descriptors from
939
the start of the memory block. */
940
chunk->blocks = static_cast<buf_block_struct *>(chunk->mem);
942
/* Align a pointer to the first frame. Note that when
943
os_large_page_size is smaller than UNIV_PAGE_SIZE,
944
we may allocate one fewer block than requested. When
945
it is bigger, we may allocate more blocks than requested. */
947
frame = static_cast<unsigned char *>(ut_align(chunk->mem, UNIV_PAGE_SIZE));
948
chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
949
- (frame != chunk->mem);
951
/* Subtract the space needed for block descriptors. */
953
ulint size = chunk->size;
955
while (frame < (byte*) (chunk->blocks + size)) {
956
frame += UNIV_PAGE_SIZE;
963
/* Init block structs and assign frames for them. Then we
964
assign the frames to the first blocks (we already mapped the
967
block = chunk->blocks;
969
for (i = chunk->size; i--; ) {
971
buf_block_init(buf_pool, block, frame);
974
/* Wipe contents of frame to eliminate a Purify warning */
975
memset(block->frame, '\0', UNIV_PAGE_SIZE);
977
/* Add the block to the free list */
978
UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
980
ut_d(block->page.in_free_list = TRUE);
981
ut_ad(buf_pool_from_block(block) == buf_pool);
984
frame += UNIV_PAGE_SIZE;
987
#ifdef PFS_GROUP_BUFFER_SYNC
988
pfs_register_buffer_block(chunk);
994
/*********************************************************************//**
995
Finds a block in the given buffer chunk that points to a
996
given compressed page.
997
@return buffer block pointing to the compressed page, or NULL */
1000
buf_chunk_contains_zip(
1001
/*===================*/
1002
buf_chunk_t* chunk, /*!< in: chunk being checked */
1003
const void* data) /*!< in: pointer to compressed page */
1008
block = chunk->blocks;
1010
for (i = chunk->size; i--; block++) {
1011
if (block->page.zip.data == data) {
1020
/*********************************************************************//**
1021
Finds a block in the buffer pool that points to a
1022
given compressed page.
1023
@return buffer block pointing to the compressed page, or NULL */
1026
buf_pool_contains_zip(
1027
/*==================*/
1028
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1029
const void* data) /*!< in: pointer to compressed page */
1032
buf_chunk_t* chunk = buf_pool->chunks;
1035
ut_ad(buf_pool_mutex_own(buf_pool));
1036
for (n = buf_pool->n_chunks; n--; chunk++) {
1038
buf_block_t* block = buf_chunk_contains_zip(chunk, data);
1047
#endif /* UNIV_DEBUG */
1049
/*********************************************************************//**
1050
Checks that all file pages in the buffer chunk are in a replaceable state.
1051
@return address of a non-free block, or NULL if all freed */
1054
buf_chunk_not_freed(
1055
/*================*/
1056
buf_chunk_t* chunk) /*!< in: chunk being checked */
1061
block = chunk->blocks;
1063
for (i = chunk->size; i--; block++) {
1066
switch (buf_block_get_state(block)) {
1067
case BUF_BLOCK_ZIP_FREE:
1068
case BUF_BLOCK_ZIP_PAGE:
1069
case BUF_BLOCK_ZIP_DIRTY:
1070
/* The uncompressed buffer pool should never
1071
contain compressed block descriptors. */
1074
case BUF_BLOCK_NOT_USED:
1075
case BUF_BLOCK_READY_FOR_USE:
1076
case BUF_BLOCK_MEMORY:
1077
case BUF_BLOCK_REMOVE_HASH:
1078
/* Skip blocks that are not being used for
1081
case BUF_BLOCK_FILE_PAGE:
1082
mutex_enter(&block->mutex);
1083
ready = buf_flush_ready_for_replace(&block->page);
1084
mutex_exit(&block->mutex);
1098
/*********************************************************************//**
1099
Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state.
1100
@return TRUE if all freed */
1105
const buf_chunk_t* chunk) /*!< in: chunk being checked */
1107
const buf_block_t* block;
1110
block = chunk->blocks;
1112
for (i = chunk->size; i--; block++) {
1114
if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
1123
/********************************************************************//**
1124
Frees a chunk of buffer frames. */
1129
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1130
buf_chunk_t* chunk) /*!< out: chunk of buffers */
1133
const buf_block_t* block_end;
1135
ut_ad(buf_pool_mutex_own(buf_pool));
1137
block_end = chunk->blocks + chunk->size;
1139
for (block = chunk->blocks; block < block_end; block++) {
1140
ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
1141
ut_a(!block->page.zip.data);
1143
ut_ad(!block->page.in_LRU_list);
1144
ut_ad(!block->in_unzip_LRU_list);
1145
ut_ad(!block->page.in_flush_list);
1146
/* Remove the block from the free list. */
1147
ut_ad(block->page.in_free_list);
1148
UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
1150
/* Free the latches. */
1151
mutex_free(&block->mutex);
1152
rw_lock_free(&block->lock);
1153
#ifdef UNIV_SYNC_DEBUG
1154
rw_lock_free(&block->debug_latch);
1155
#endif /* UNIV_SYNC_DEBUG */
1156
UNIV_MEM_UNDESC(block);
1159
os_mem_free_large(chunk->mem, chunk->mem_size);
1162
/********************************************************************//**
1163
Set buffer pool size variables after resizing it */
1166
buf_pool_set_sizes(void)
1167
/*====================*/
1170
ulint curr_size = 0;
1172
buf_pool_mutex_enter_all();
1174
for (i = 0; i < srv_buf_pool_instances; i++) {
1175
buf_pool_t* buf_pool;
1177
buf_pool = buf_pool_from_array(i);
1178
curr_size += buf_pool->curr_pool_size;
1181
srv_buf_pool_curr_size = curr_size;
1182
srv_buf_pool_old_size = srv_buf_pool_size;
1184
buf_pool_mutex_exit_all();
1187
/********************************************************************//**
1188
Initialize a buffer pool instance.
1189
@return DB_SUCCESS if all goes well. */
1192
buf_pool_init_instance(
1193
/*===================*/
1194
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1195
ulint buf_pool_size, /*!< in: size in bytes */
1196
ulint instance_no) /*!< in: id of the instance */
1201
/* 1. Initialize general fields
1202
------------------------------- */
1203
mutex_create(buf_pool_mutex_key,
1204
&buf_pool->mutex, SYNC_BUF_POOL);
1205
mutex_create(buf_pool_zip_mutex_key,
1206
&buf_pool->zip_mutex, SYNC_BUF_BLOCK);
1208
buf_pool_mutex_enter(buf_pool);
1210
if (buf_pool_size > 0) {
1211
buf_pool->n_chunks = 1;
1212
void *chunk_ptr= mem_zalloc((sizeof *chunk));
1213
buf_pool->chunks = chunk = static_cast<buf_chunk_t *>(chunk_ptr);
1215
UT_LIST_INIT(buf_pool->free);
1217
if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
1221
buf_pool_mutex_exit(buf_pool);
1226
buf_pool->instance_no = instance_no;
1227
buf_pool->old_pool_size = buf_pool_size;
1228
buf_pool->curr_size = chunk->size;
1229
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1231
buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
1232
buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
1234
buf_pool->last_printout_time = ut_time();
1236
/* 2. Initialize flushing fields
1237
-------------------------------- */
1239
mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
1240
SYNC_BUF_FLUSH_LIST);
1242
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
1243
buf_pool->no_flush[i] = os_event_create(NULL);
1246
/* 3. Initialize LRU fields
1247
--------------------------- */
1249
/* All fields are initialized by mem_zalloc(). */
1251
buf_pool_mutex_exit(buf_pool);
1256
/********************************************************************//**
1257
free one buffer pool instance */
1260
buf_pool_free_instance(
1261
/*===================*/
1262
buf_pool_t* buf_pool) /* in,own: buffer pool instance
1266
buf_chunk_t* chunks;
1268
chunks = buf_pool->chunks;
1269
chunk = chunks + buf_pool->n_chunks;
1271
while (--chunk >= chunks) {
1272
/* Bypass the checks of buf_chunk_free(), since they
1273
would fail at shutdown. */
1274
os_mem_free_large(chunk->mem, chunk->mem_size);
1277
mem_free(buf_pool->chunks);
1278
hash_table_free(buf_pool->page_hash);
1279
hash_table_free(buf_pool->zip_hash);
1282
/********************************************************************//**
1283
Creates the buffer pool.
1284
@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
1289
ulint total_size, /*!< in: size of the total pool in bytes */
1290
ulint n_instances) /*!< in: number of instances */
1293
const ulint size = total_size / n_instances;
1295
ut_ad(n_instances > 0);
1296
ut_ad(n_instances <= MAX_BUFFER_POOLS);
1297
ut_ad(n_instances == srv_buf_pool_instances);
1299
/* We create an extra buffer pool instance, this instance is used
1300
for flushing the flush lists, to keep track of n_flush for all
1301
the buffer pools and also used as a waiting object during flushing. */
1302
void *buf_pool_void_ptr= mem_zalloc(n_instances * sizeof *buf_pool_ptr);
1303
buf_pool_ptr = static_cast<buf_pool_struct *>(buf_pool_void_ptr);
1305
for (i = 0; i < n_instances; i++) {
1306
buf_pool_t* ptr = &buf_pool_ptr[i];
1308
if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) {
1310
/* Free all the instances created so far. */
1317
buf_pool_set_sizes();
1318
buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
1320
btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
1325
/********************************************************************//**
1326
Frees the buffer pool at shutdown. This must not be invoked before
1327
freeing all mutexes. */
1332
ulint n_instances) /*!< in: numbere of instances to free */
1336
for (i = 0; i < n_instances; i++) {
1337
buf_pool_free_instance(buf_pool_from_array(i));
1340
mem_free(buf_pool_ptr);
1341
buf_pool_ptr = NULL;
1344
/********************************************************************//**
1345
Drops adaptive hash index for a buffer pool instance. */
1348
buf_pool_drop_hash_index_instance(
1349
/*==============================*/
1350
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1351
ibool* released_search_latch) /*!< out: flag for signalling
1352
whether the search latch was
1355
buf_chunk_t* chunks = buf_pool->chunks;
1356
buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
1358
while (--chunk >= chunks) {
1360
buf_block_t* block = chunk->blocks;
1362
for (i = chunk->size; i--; block++) {
1363
/* block->is_hashed cannot be modified
1364
when we have an x-latch on btr_search_latch;
1365
see the comment in buf0buf.h */
1367
if (!block->is_hashed) {
1371
/* To follow the latching order, we
1372
have to release btr_search_latch
1373
before acquiring block->latch. */
1374
rw_lock_x_unlock(&btr_search_latch);
1375
/* When we release the search latch,
1376
we must rescan all blocks, because
1377
some may become hashed again. */
1378
*released_search_latch = TRUE;
1380
rw_lock_x_lock(&block->lock);
1382
/* This should be guaranteed by the
1383
callers, which will be holding
1384
btr_search_enabled_mutex. */
1385
ut_ad(!btr_search_enabled);
1387
/* Because we did not buffer-fix the
1388
block by calling buf_block_get_gen(),
1389
it is possible that the block has been
1390
allocated for some other use after
1391
btr_search_latch was released above.
1392
We do not care which file page the
1393
block is mapped to. All we want to do
1394
is to drop any hash entries referring
1397
/* It is possible that
1398
block->page.state != BUF_FILE_PAGE.
1399
Even that does not matter, because
1400
btr_search_drop_page_hash_index() will
1401
check block->is_hashed before doing
1402
anything. block->is_hashed can only
1403
be set on uncompressed file pages. */
1405
btr_search_drop_page_hash_index(block);
1407
rw_lock_x_unlock(&block->lock);
1409
rw_lock_x_lock(&btr_search_latch);
1411
ut_ad(!btr_search_enabled);
1416
/********************************************************************//**
1417
Drops the adaptive hash index. To prevent a livelock, this function
1418
is only to be called while holding btr_search_latch and while
1419
btr_search_enabled == FALSE. */
1422
buf_pool_drop_hash_index(void)
1423
/*==========================*/
1425
ibool released_search_latch;
1427
#ifdef UNIV_SYNC_DEBUG
1428
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
1429
#endif /* UNIV_SYNC_DEBUG */
1430
ut_ad(!btr_search_enabled);
1435
released_search_latch = FALSE;
1437
for (i = 0; i < srv_buf_pool_instances; i++) {
1438
buf_pool_t* buf_pool;
1440
buf_pool = buf_pool_from_array(i);
1442
buf_pool_drop_hash_index_instance(
1443
buf_pool, &released_search_latch);
1446
} while (released_search_latch);
1449
/********************************************************************//**
1450
Relocate a buffer control block. Relocates the block on the LRU list
1451
and in buf_pool->page_hash. Does not relocate bpage->list.
1452
The caller must take care of relocating bpage->list. */
1457
buf_page_t* bpage, /*!< in/out: control block being relocated;
1458
buf_page_get_state(bpage) must be
1459
BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
1460
buf_page_t* dpage) /*!< in/out: destination control block */
1464
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1466
ut_ad(buf_pool_mutex_own(buf_pool));
1467
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1468
ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
1469
ut_a(bpage->buf_fix_count == 0);
1470
ut_ad(bpage->in_LRU_list);
1471
ut_ad(!bpage->in_zip_hash);
1472
ut_ad(bpage->in_page_hash);
1473
ut_ad(bpage == buf_page_hash_get(buf_pool,
1474
bpage->space, bpage->offset));
1475
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1477
switch (buf_page_get_state(bpage)) {
1478
case BUF_BLOCK_ZIP_FREE:
1479
case BUF_BLOCK_NOT_USED:
1480
case BUF_BLOCK_READY_FOR_USE:
1481
case BUF_BLOCK_FILE_PAGE:
1482
case BUF_BLOCK_MEMORY:
1483
case BUF_BLOCK_REMOVE_HASH:
1485
case BUF_BLOCK_ZIP_DIRTY:
1486
case BUF_BLOCK_ZIP_PAGE:
1489
#endif /* UNIV_DEBUG */
1491
memcpy(dpage, bpage, sizeof *dpage);
1493
ut_d(bpage->in_LRU_list = FALSE);
1494
ut_d(bpage->in_page_hash = FALSE);
1496
/* relocate buf_pool->LRU */
1497
b = UT_LIST_GET_PREV(LRU, bpage);
1498
UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
1501
UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
1503
UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
1506
if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
1507
buf_pool->LRU_old = dpage;
1508
#ifdef UNIV_LRU_DEBUG
1509
/* buf_pool->LRU_old must be the first item in the LRU list
1510
whose "old" flag is set. */
1511
ut_a(buf_pool->LRU_old->old);
1512
ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
1513
|| !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
1514
ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
1515
|| UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
1517
/* Check that the "old" flag is consistent in
1518
the block and its neighbours. */
1519
buf_page_set_old(dpage, buf_page_is_old(dpage));
1520
#endif /* UNIV_LRU_DEBUG */
1523
ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
1524
ut_ad(ut_list_node_313->in_LRU_list)));
1526
/* relocate buf_pool->page_hash */
1527
fold = buf_page_address_fold(bpage->space, bpage->offset);
1529
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
1530
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
1533
/********************************************************************//**
1534
Shrinks a buffer pool instance. */
1537
buf_pool_shrink_instance(
1538
/*=====================*/
1539
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1540
ulint chunk_size) /*!< in: number of pages to remove */
1542
buf_chunk_t* chunks;
1545
ulint max_free_size;
1546
buf_chunk_t* max_chunk;
1547
buf_chunk_t* max_free_chunk;
1549
ut_ad(!buf_pool_mutex_own(buf_pool));
1552
btr_search_disable(); /* Empty the adaptive hash index again */
1553
buf_pool_mutex_enter(buf_pool);
1556
if (buf_pool->n_chunks <= 1) {
1558
/* Cannot shrink if there is only one chunk */
1562
/* Search for the largest free chunk
1563
not larger than the size difference */
1564
chunks = buf_pool->chunks;
1565
chunk = chunks + buf_pool->n_chunks;
1566
max_size = max_free_size = 0;
1567
max_chunk = max_free_chunk = NULL;
1569
while (--chunk >= chunks) {
1570
if (chunk->size <= chunk_size
1571
&& chunk->size > max_free_size) {
1572
if (chunk->size > max_size) {
1573
max_size = chunk->size;
1577
if (buf_chunk_all_free(chunk)) {
1578
max_free_size = chunk->size;
1579
max_free_chunk = chunk;
1584
if (!max_free_size) {
1591
/* Cannot shrink: try again later
1592
(do not assign srv_buf_pool_old_size) */
1598
block = max_chunk->blocks;
1599
bend = block + max_chunk->size;
1601
/* Move the blocks of chunk to the end of the
1602
LRU list and try to flush them. */
1603
for (; block < bend; block++) {
1604
switch (buf_block_get_state(block)) {
1605
case BUF_BLOCK_NOT_USED:
1607
case BUF_BLOCK_FILE_PAGE:
1614
mutex_enter(&block->mutex);
1615
/* The following calls will temporarily
1616
release block->mutex and buf_pool->mutex.
1617
Therefore, we have to always retry,
1618
even if !dirty && !nonfree. */
1620
if (!buf_flush_ready_for_replace(&block->page)) {
1622
buf_LRU_make_block_old(&block->page);
1624
} else if (buf_LRU_free_block(&block->page, TRUE, NULL)
1629
mutex_exit(&block->mutex);
1632
buf_pool_mutex_exit(buf_pool);
1634
/* Request for a flush of the chunk if it helps.
1635
Do not flush if there are non-free blocks, since
1636
flushing will not make the chunk freeable. */
1638
/* Avoid busy-waiting. */
1639
os_thread_sleep(100000);
1641
&& buf_flush_LRU(buf_pool, dirty)
1642
== ULINT_UNDEFINED) {
1644
buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
1650
max_size = max_free_size;
1651
max_chunk = max_free_chunk;
1653
buf_pool->old_pool_size = buf_pool->curr_pool_size;
1655
/* Rewrite buf_pool->chunks. Copy everything but max_chunk. */
1656
chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks));
1657
memcpy(chunks, buf_pool->chunks,
1658
(max_chunk - buf_pool->chunks) * sizeof *chunks);
1659
memcpy(chunks + (max_chunk - buf_pool->chunks),
1661
buf_pool->chunks + buf_pool->n_chunks
1663
ut_a(buf_pool->curr_size > max_chunk->size);
1664
buf_pool->curr_size -= max_chunk->size;
1665
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1666
chunk_size -= max_chunk->size;
1667
buf_chunk_free(buf_pool, max_chunk);
1668
mem_free(buf_pool->chunks);
1669
buf_pool->chunks = chunks;
1670
buf_pool->n_chunks--;
1672
/* Allow a slack of one megabyte. */
1673
if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
1680
buf_pool->old_pool_size = buf_pool->curr_pool_size;
1682
buf_pool_mutex_exit(buf_pool);
1683
btr_search_enable();
1686
/********************************************************************//**
1687
Shrinks the buffer pool. */
1692
ulint chunk_size) /*!< in: number of pages to remove */
1696
for (i = 0; i < srv_buf_pool_instances; i++) {
1697
buf_pool_t* buf_pool;
1698
ulint instance_chunk_size;
1700
instance_chunk_size = chunk_size / srv_buf_pool_instances;
1701
buf_pool = buf_pool_from_array(i);
1702
buf_pool_shrink_instance(buf_pool, instance_chunk_size);
1705
buf_pool_set_sizes();
1708
/********************************************************************//**
1709
Rebuild buf_pool->page_hash for a buffer pool instance. */
1712
buf_pool_page_hash_rebuild_instance(
1713
/*================================*/
1714
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
1720
hash_table_t* zip_hash;
1721
hash_table_t* page_hash;
1723
buf_pool_mutex_enter(buf_pool);
1725
/* Free, create, and populate the hash table. */
1726
hash_table_free(buf_pool->page_hash);
1727
buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
1728
zip_hash = hash_create(2 * buf_pool->curr_size);
1730
HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
1731
BUF_POOL_ZIP_FOLD_BPAGE);
1733
hash_table_free(buf_pool->zip_hash);
1734
buf_pool->zip_hash = zip_hash;
1736
/* Insert the uncompressed file pages to buf_pool->page_hash. */
1738
chunk = buf_pool->chunks;
1739
n_chunks = buf_pool->n_chunks;
1741
for (i = 0; i < n_chunks; i++, chunk++) {
1743
buf_block_t* block = chunk->blocks;
1745
for (j = 0; j < chunk->size; j++, block++) {
1746
if (buf_block_get_state(block)
1747
== BUF_BLOCK_FILE_PAGE) {
1748
ut_ad(!block->page.in_zip_hash);
1749
ut_ad(block->page.in_page_hash);
1751
HASH_INSERT(buf_page_t, hash, page_hash,
1752
buf_page_address_fold(
1754
block->page.offset),
1760
/* Insert the compressed-only pages to buf_pool->page_hash.
1761
All such blocks are either in buf_pool->zip_clean or
1762
in buf_pool->flush_list. */
1764
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1765
b = UT_LIST_GET_NEXT(list, b)) {
1766
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1767
ut_ad(!b->in_flush_list);
1768
ut_ad(b->in_LRU_list);
1769
ut_ad(b->in_page_hash);
1770
ut_ad(!b->in_zip_hash);
1772
HASH_INSERT(buf_page_t, hash, page_hash,
1773
buf_page_address_fold(b->space, b->offset), b);
1776
buf_flush_list_mutex_enter(buf_pool);
1777
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1778
b = UT_LIST_GET_NEXT(list, b)) {
1779
ut_ad(b->in_flush_list);
1780
ut_ad(b->in_LRU_list);
1781
ut_ad(b->in_page_hash);
1782
ut_ad(!b->in_zip_hash);
1784
switch (buf_page_get_state(b)) {
1785
case BUF_BLOCK_ZIP_DIRTY:
1786
HASH_INSERT(buf_page_t, hash, page_hash,
1787
buf_page_address_fold(b->space,
1790
case BUF_BLOCK_FILE_PAGE:
1791
/* uncompressed page */
1793
case BUF_BLOCK_ZIP_FREE:
1794
case BUF_BLOCK_ZIP_PAGE:
1795
case BUF_BLOCK_NOT_USED:
1796
case BUF_BLOCK_READY_FOR_USE:
1797
case BUF_BLOCK_MEMORY:
1798
case BUF_BLOCK_REMOVE_HASH:
1804
buf_flush_list_mutex_exit(buf_pool);
1805
buf_pool_mutex_exit(buf_pool);
1808
/********************************************************************
1809
Determine if a block is a sentinel for a buffer pool watch.
1810
@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
1813
buf_pool_watch_is_sentinel(
1814
/*=======================*/
1815
buf_pool_t* buf_pool, /*!< buffer pool instance */
1816
const buf_page_t* bpage) /*!< in: block */
1818
ut_ad(buf_page_in_file(bpage));
1820
if (bpage < &buf_pool->watch[0]
1821
|| bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
1823
ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_PAGE
1824
|| bpage->zip.data != NULL);
1829
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
1830
ut_ad(!bpage->in_zip_hash);
1831
ut_ad(bpage->in_page_hash);
1832
ut_ad(bpage->zip.data == NULL);
1833
ut_ad(bpage->buf_fix_count > 0);
1837
/****************************************************************//**
1838
Add watch for the given page to be read in. Caller must have the buffer pool
1840
@return NULL if watch set, block if the page is in the buffer pool */
1845
ulint space, /*!< in: space id */
1846
ulint offset, /*!< in: page number */
1847
ulint fold) /*!< in: buf_page_address_fold(space, offset) */
1851
buf_pool_t* buf_pool = buf_pool_get(space, offset);
1853
ut_ad(buf_pool_mutex_own(buf_pool));
1855
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
1857
if (UNIV_LIKELY_NULL(bpage)) {
1858
if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
1859
/* The page was loaded meanwhile. */
1862
/* Add to an existing watch. */
1863
bpage->buf_fix_count++;
1867
for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
1868
bpage = &buf_pool->watch[i];
1870
ut_ad(bpage->access_time == 0);
1871
ut_ad(bpage->newest_modification == 0);
1872
ut_ad(bpage->oldest_modification == 0);
1873
ut_ad(bpage->zip.data == NULL);
1874
ut_ad(!bpage->in_zip_hash);
1876
switch (bpage->state) {
1877
case BUF_BLOCK_POOL_WATCH:
1878
ut_ad(!bpage->in_page_hash);
1879
ut_ad(bpage->buf_fix_count == 0);
1881
/* bpage is pointing to buf_pool->watch[],
1882
which is protected by buf_pool->mutex.
1883
Normally, buf_page_t objects are protected by
1884
buf_block_t::mutex or buf_pool->zip_mutex or both. */
1886
bpage->state = BUF_BLOCK_ZIP_PAGE;
1887
bpage->space = space;
1888
bpage->offset = offset;
1889
bpage->buf_fix_count = 1;
1891
ut_d(bpage->in_page_hash = TRUE);
1892
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
1895
case BUF_BLOCK_ZIP_PAGE:
1896
ut_ad(bpage->in_page_hash);
1897
ut_ad(bpage->buf_fix_count > 0);
1904
/* Allocation failed. Either the maximum number of purge
1905
threads should never exceed BUF_POOL_WATCH_SIZE, or this code
1906
should be modified to return a special non-NULL value and the
1907
caller should purge the record directly. */
1910
/* Fix compiler warning */
1914
/********************************************************************//**
1915
Rebuild buf_pool->page_hash. */
1918
buf_pool_page_hash_rebuild(void)
1919
/*============================*/
1923
for (i = 0; i < srv_buf_pool_instances; i++) {
1924
buf_pool_page_hash_rebuild_instance(buf_pool_from_array(i));
1928
/********************************************************************//**
1929
Increase the buffer pool size of one buffer pool instance. */
1932
buf_pool_increase_instance(
1933
/*=======================*/
1934
buf_pool_t* buf_pool, /*!< in: buffer pool instane */
1935
ulint change_size) /*!< in: new size of the pool */
1937
buf_chunk_t* chunks;
1940
buf_pool_mutex_enter(buf_pool);
1941
chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks));
1943
memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks * sizeof *chunks);
1945
chunk = &chunks[buf_pool->n_chunks];
1947
if (!buf_chunk_init(buf_pool, chunk, change_size)) {
1950
buf_pool->old_pool_size = buf_pool->curr_pool_size;
1951
buf_pool->curr_size += chunk->size;
1952
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1953
mem_free(buf_pool->chunks);
1954
buf_pool->chunks = chunks;
1955
buf_pool->n_chunks++;
1958
buf_pool_mutex_exit(buf_pool);
1961
/********************************************************************//**
1962
Increase the buffer pool size. */
1971
for (i = 0; i < srv_buf_pool_instances; i++) {
1972
buf_pool_increase_instance(
1973
buf_pool_from_array(i),
1974
change_size / srv_buf_pool_instances);
1977
buf_pool_set_sizes();
1980
/********************************************************************//**
1981
Resizes the buffer pool. */
1984
buf_pool_resize(void)
1985
/*=================*/
1988
ulint min_change_size = 1048576 * srv_buf_pool_instances;
1990
buf_pool_mutex_enter_all();
1992
if (srv_buf_pool_old_size == srv_buf_pool_size) {
1994
buf_pool_mutex_exit_all();
1998
} else if (srv_buf_pool_curr_size + min_change_size
1999
> srv_buf_pool_size) {
2001
change_size = (srv_buf_pool_curr_size - srv_buf_pool_size)
2004
buf_pool_mutex_exit_all();
2006
/* Disable adaptive hash indexes and empty the index
2007
in order to free up memory in the buffer pool chunks. */
2008
buf_pool_shrink(change_size);
2010
} else if (srv_buf_pool_curr_size + min_change_size
2011
< srv_buf_pool_size) {
2013
/* Enlarge the buffer pool by at least one megabyte */
2015
change_size = srv_buf_pool_size - srv_buf_pool_curr_size;
2017
buf_pool_mutex_exit_all();
2019
buf_pool_increase(change_size);
2021
srv_buf_pool_size = srv_buf_pool_old_size;
2023
buf_pool_mutex_exit_all();
2028
buf_pool_page_hash_rebuild();
2031
/****************************************************************//**
2032
Remove the sentinel block for the watch before replacing it with a real block.
2033
buf_page_watch_clear() or buf_page_watch_occurred() will notice that
2034
the block has been replaced with the real block.
2035
@return reference count, to be added to the replacement block */
2038
buf_pool_watch_remove(
2039
/*==================*/
2040
buf_pool_t* buf_pool, /*!< buffer pool instance */
2041
ulint fold, /*!< in: buf_page_address_fold(
2043
buf_page_t* watch) /*!< in/out: sentinel for watch */
2045
ut_ad(buf_pool_mutex_own(buf_pool));
2047
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
2048
ut_d(watch->in_page_hash = FALSE);
2049
watch->buf_fix_count = 0;
2050
watch->state = BUF_BLOCK_POOL_WATCH;
2053
/****************************************************************//**
2054
Stop watching if the page has been read in.
2055
buf_pool_watch_set(space,offset) must have returned NULL before. */
2058
buf_pool_watch_unset(
2059
/*=================*/
2060
ulint space, /*!< in: space id */
2061
ulint offset) /*!< in: page number */
2064
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2065
ulint fold = buf_page_address_fold(space, offset);
2067
buf_pool_mutex_enter(buf_pool);
2068
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2069
/* The page must exist because buf_pool_watch_set()
2070
increments buf_fix_count. */
2073
if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
2074
mutex_t* mutex = buf_page_get_mutex(bpage);
2077
ut_a(bpage->buf_fix_count > 0);
2078
bpage->buf_fix_count--;
2081
ut_a(bpage->buf_fix_count > 0);
2083
if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
2084
buf_pool_watch_remove(buf_pool, fold, bpage);
2088
buf_pool_mutex_exit(buf_pool);
2091
/****************************************************************//**
2092
Check if the page has been read in.
2093
This may only be called after buf_pool_watch_set(space,offset)
2094
has returned NULL and before invoking buf_pool_watch_unset(space,offset).
2095
@return FALSE if the given page was not read in, TRUE if it was */
2098
buf_pool_watch_occurred(
2099
/*====================*/
2100
ulint space, /*!< in: space id */
2101
ulint offset) /*!< in: page number */
2105
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2106
ulint fold = buf_page_address_fold(space, offset);
2108
buf_pool_mutex_enter(buf_pool);
2110
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2111
/* The page must exist because buf_pool_watch_set()
2112
increments buf_fix_count. */
2114
ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
2115
buf_pool_mutex_exit(buf_pool);
2120
/********************************************************************//**
2121
Moves a page to the start of the buffer pool LRU list. This high-level
2122
function can be used to prevent an important page from slipping out of
2126
buf_page_make_young(
2127
/*================*/
2128
buf_page_t* bpage) /*!< in: buffer block of a file page */
2130
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2132
buf_pool_mutex_enter(buf_pool);
2134
ut_a(buf_page_in_file(bpage));
2136
buf_LRU_make_block_young(bpage);
2138
buf_pool_mutex_exit(buf_pool);
2141
/********************************************************************//**
2142
Sets the time of the first access of a page and moves a page to the
2143
start of the buffer pool LRU list if it is too old. This high-level
2144
function can be used to prevent an important page from slipping
2145
out of the buffer pool. */
2148
buf_page_set_accessed_make_young(
2149
/*=============================*/
2150
buf_page_t* bpage, /*!< in/out: buffer block of a
2152
unsigned access_time) /*!< in: bpage->access_time
2153
read under mutex protection,
2156
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2158
ut_ad(!buf_pool_mutex_own(buf_pool));
2159
ut_a(buf_page_in_file(bpage));
2161
if (buf_page_peek_if_too_old(bpage)) {
2162
buf_pool_mutex_enter(buf_pool);
2163
buf_LRU_make_block_young(bpage);
2164
buf_pool_mutex_exit(buf_pool);
2165
} else if (!access_time) {
2166
ulint time_ms = ut_time_ms();
2167
buf_pool_mutex_enter(buf_pool);
2168
buf_page_set_accessed(bpage, time_ms);
2169
buf_pool_mutex_exit(buf_pool);
2173
/********************************************************************//**
2174
Resets the check_index_page_at_flush field of a page if found in the buffer
2178
buf_reset_check_index_page_at_flush(
2179
/*================================*/
2180
ulint space, /*!< in: space id */
2181
ulint offset) /*!< in: page number */
2184
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2186
buf_pool_mutex_enter(buf_pool);
2188
block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2190
if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
2191
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2192
block->check_index_page_at_flush = FALSE;
2195
buf_pool_mutex_exit(buf_pool);
2198
/********************************************************************//**
2199
Returns the current state of is_hashed of a page. FALSE if the page is
2200
not in the pool. NOTE that this operation does not fix the page in the
2201
pool if it is found there.
2202
@return TRUE if page hash index is built in search system */
2205
buf_page_peek_if_search_hashed(
2206
/*===========================*/
2207
ulint space, /*!< in: space id */
2208
ulint offset) /*!< in: page number */
2212
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2214
buf_pool_mutex_enter(buf_pool);
2216
block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2218
if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2221
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2222
is_hashed = block->is_hashed;
2225
buf_pool_mutex_exit(buf_pool);
2230
#ifdef UNIV_DEBUG_FILE_ACCESSES
2231
/********************************************************************//**
2232
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
2233
This function should be called when we free a file page and want the
2234
debug version to check that it is not accessed any more unless
2236
@return control block if found in page hash table, otherwise NULL */
2239
buf_page_set_file_page_was_freed(
2240
/*=============================*/
2241
ulint space, /*!< in: space id */
2242
ulint offset) /*!< in: page number */
2245
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2247
buf_pool_mutex_enter(buf_pool);
2249
bpage = buf_page_hash_get(buf_pool, space, offset);
2252
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2253
bpage->file_page_was_freed = TRUE;
2256
buf_pool_mutex_exit(buf_pool);
2261
/********************************************************************//**
2262
Sets file_page_was_freed FALSE if the page is found in the buffer pool.
2263
This function should be called when we free a file page and want the
2264
debug version to check that it is not accessed any more unless
2266
@return control block if found in page hash table, otherwise NULL */
2269
buf_page_reset_file_page_was_freed(
2270
/*===============================*/
2271
ulint space, /*!< in: space id */
2272
ulint offset) /*!< in: page number */
2275
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2277
buf_pool_mutex_enter(buf_pool);
2279
bpage = buf_page_hash_get(buf_pool, space, offset);
2282
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2283
bpage->file_page_was_freed = FALSE;
2286
buf_pool_mutex_exit(buf_pool);
2290
#endif /* UNIV_DEBUG_FILE_ACCESSES */
2292
/********************************************************************//**
2293
Get read access to a compressed page (usually of type
2294
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
2295
The page must be released with buf_page_release_zip().
2296
NOTE: the page is not protected by any latch. Mutual exclusion has to
2297
be implemented at a higher level. In other words, all possible
2298
accesses to a given page through this function must be protected by
2299
the same set of mutexes or latches.
2300
@return pointer to the block */
2305
ulint space, /*!< in: space id */
2306
ulint zip_size,/*!< in: compressed page size */
2307
ulint offset) /*!< in: page number */
2310
mutex_t* block_mutex;
2312
unsigned access_time;
2313
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2315
#ifndef UNIV_LOG_DEBUG
2316
ut_ad(!ibuf_inside());
2318
buf_pool->stat.n_page_gets++;
2321
buf_pool_mutex_enter(buf_pool);
2323
bpage = buf_page_hash_get(buf_pool, space, offset);
2325
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2329
/* Page not in buf_pool: needs to be read from file */
2331
buf_pool_mutex_exit(buf_pool);
2333
buf_read_page(space, zip_size, offset);
2335
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2336
ut_a(++buf_dbg_counter % 37 || buf_validate());
2337
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2340
if (UNIV_UNLIKELY(!bpage->zip.data)) {
2341
/* There is no compressed page. */
2343
buf_pool_mutex_exit(buf_pool);
2347
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2349
switch (buf_page_get_state(bpage)) {
2350
case BUF_BLOCK_NOT_USED:
2351
case BUF_BLOCK_READY_FOR_USE:
2352
case BUF_BLOCK_MEMORY:
2353
case BUF_BLOCK_REMOVE_HASH:
2354
case BUF_BLOCK_ZIP_FREE:
2356
case BUF_BLOCK_ZIP_PAGE:
2357
case BUF_BLOCK_ZIP_DIRTY:
2358
block_mutex = &buf_pool->zip_mutex;
2359
mutex_enter(block_mutex);
2360
bpage->buf_fix_count++;
2362
case BUF_BLOCK_FILE_PAGE:
2363
block_mutex = &((buf_block_t*) bpage)->mutex;
2364
mutex_enter(block_mutex);
2366
/* Discard the uncompressed page frame if possible. */
2367
if (buf_LRU_free_block(bpage, FALSE, NULL)
2370
mutex_exit(block_mutex);
2374
buf_block_buf_fix_inc((buf_block_t*) bpage,
2375
__FILE__, __LINE__);
2383
must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
2384
access_time = buf_page_is_accessed(bpage);
2386
buf_pool_mutex_exit(buf_pool);
2388
mutex_exit(block_mutex);
2390
buf_page_set_accessed_make_young(bpage, access_time);
2392
#ifdef UNIV_DEBUG_FILE_ACCESSES
2393
ut_a(!bpage->file_page_was_freed);
2396
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2397
ut_a(++buf_dbg_counter % 5771 || buf_validate());
2398
ut_a(bpage->buf_fix_count > 0);
2399
ut_a(buf_page_in_file(bpage));
2400
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2403
/* Let us wait until the read operation
2407
enum buf_io_fix io_fix;
2409
mutex_enter(block_mutex);
2410
io_fix = buf_page_get_io_fix(bpage);
2411
mutex_exit(block_mutex);
2413
if (io_fix == BUF_IO_READ) {
2415
os_thread_sleep(WAIT_FOR_READ);
2422
#ifdef UNIV_IBUF_COUNT_DEBUG
2423
ut_a(ibuf_count_get(buf_page_get_space(bpage),
2424
buf_page_get_page_no(bpage)) == 0);
2429
/********************************************************************//**
2430
Initialize some fields of a control block. */
2435
buf_block_t* block) /*!< in: block to init */
2437
block->check_index_page_at_flush = FALSE;
2438
block->index = NULL;
2440
block->n_hash_helps = 0;
2441
block->is_hashed = FALSE;
2442
block->n_fields = 1;
2444
block->left_side = TRUE;
2446
#endif /* !UNIV_HOTBACKUP */
2448
/********************************************************************//**
2450
@return TRUE if successful */
2455
buf_block_t* block, /*!< in/out: block */
2456
ibool check) /*!< in: TRUE=verify the page checksum */
2458
const byte* frame = block->page.zip.data;
2459
ulint stamp_checksum = mach_read_from_4(
2460
frame + FIL_PAGE_SPACE_OR_CHKSUM);
2462
ut_ad(buf_block_get_zip_size(block));
2463
ut_a(buf_block_get_space(block) != 0);
2465
if (UNIV_LIKELY(check && stamp_checksum != BUF_NO_CHECKSUM_MAGIC)) {
2466
ulint calc_checksum = page_zip_calc_checksum(
2467
frame, page_zip_get_size(&block->page.zip));
2469
if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
2470
ut_print_timestamp(stderr);
2472
" InnoDB: compressed page checksum mismatch"
2473
" (space %u page %u): %lu != %lu\n",
2474
block->page.space, block->page.offset,
2475
stamp_checksum, calc_checksum);
2480
switch (fil_page_get_type(frame)) {
2481
case FIL_PAGE_INDEX:
2482
if (page_zip_decompress(&block->page.zip,
2483
block->frame, TRUE)) {
2488
"InnoDB: unable to decompress space %lu page %lu\n",
2489
(ulong) block->page.space,
2490
(ulong) block->page.offset);
2493
case FIL_PAGE_TYPE_ALLOCATED:
2494
case FIL_PAGE_INODE:
2495
case FIL_PAGE_IBUF_BITMAP:
2496
case FIL_PAGE_TYPE_FSP_HDR:
2497
case FIL_PAGE_TYPE_XDES:
2498
case FIL_PAGE_TYPE_ZBLOB:
2499
case FIL_PAGE_TYPE_ZBLOB2:
2500
/* Copy to uncompressed storage. */
2501
memcpy(block->frame, frame,
2502
buf_block_get_zip_size(block));
2506
ut_print_timestamp(stderr);
2508
" InnoDB: unknown compressed page"
2510
fil_page_get_type(frame));
2514
#ifndef UNIV_HOTBACKUP
2515
/*******************************************************************//**
2516
Gets the block to whose frame the pointer is pointing to if found
2517
in this buffer pool instance.
2518
@return pointer to block */
2521
buf_block_align_instance(
2522
/*=====================*/
2523
buf_pool_t* buf_pool, /*!< in: buffer in which the block
2525
const byte* ptr) /*!< in: pointer to a frame */
2530
/* TODO: protect buf_pool->chunks with a mutex (it will
2531
currently remain constant after buf_pool_init()) */
2532
for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
2533
lint offs = ptr - chunk->blocks->frame;
2535
if (UNIV_UNLIKELY(offs < 0)) {
2540
offs >>= UNIV_PAGE_SIZE_SHIFT;
2542
if (UNIV_LIKELY((ulint) offs < chunk->size)) {
2543
buf_block_t* block = &chunk->blocks[offs];
2545
/* The function buf_chunk_init() invokes
2546
buf_block_init() so that block[n].frame ==
2547
block->frame + n * UNIV_PAGE_SIZE. Check it. */
2548
ut_ad(block->frame == page_align(ptr));
2550
/* A thread that updates these fields must
2551
hold buf_pool->mutex and block->mutex. Acquire
2553
mutex_enter(&block->mutex);
2555
switch (buf_block_get_state(block)) {
2556
case BUF_BLOCK_ZIP_FREE:
2557
case BUF_BLOCK_ZIP_PAGE:
2558
case BUF_BLOCK_ZIP_DIRTY:
2559
/* These types should only be used in
2560
the compressed buffer pool, whose
2561
memory is allocated from
2562
buf_pool->chunks, in UNIV_PAGE_SIZE
2563
blocks flagged as BUF_BLOCK_MEMORY. */
2566
case BUF_BLOCK_NOT_USED:
2567
case BUF_BLOCK_READY_FOR_USE:
2568
case BUF_BLOCK_MEMORY:
2569
/* Some data structures contain
2570
"guess" pointers to file pages. The
2571
file pages may have been freed and
2572
reused. Do not complain. */
2574
case BUF_BLOCK_REMOVE_HASH:
2575
/* buf_LRU_block_remove_hashed_page()
2576
will overwrite the FIL_PAGE_OFFSET and
2577
FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
2578
0xff and set the state to
2579
BUF_BLOCK_REMOVE_HASH. */
2580
ut_ad(page_get_space_id(page_align(ptr))
2582
ut_ad(page_get_page_no(page_align(ptr))
2585
case BUF_BLOCK_FILE_PAGE:
2586
ut_ad(block->page.space
2587
== page_get_space_id(page_align(ptr)));
2588
ut_ad(block->page.offset
2589
== page_get_page_no(page_align(ptr)));
2593
mutex_exit(&block->mutex);
2594
#endif /* UNIV_DEBUG */
2603
/*******************************************************************//**
2604
Gets the block to whose frame the pointer is pointing to.
2605
@return pointer to block, never NULL */
2610
const byte* ptr) /*!< in: pointer to a frame */
2614
for (i = 0; i < srv_buf_pool_instances; i++) {
2617
block = buf_block_align_instance(
2618
buf_pool_from_array(i), ptr);
2624
/* The block should always be found. */
2629
/********************************************************************//**
2630
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
2631
the buf_block_t itself or a member of it. This functions checks one of
2632
the buffer pool instances.
2633
@return TRUE if ptr belongs to a buf_block_t struct */
2636
buf_pointer_is_block_field_instance(
2637
/*================================*/
2638
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2639
const void* ptr) /*!< in: pointer not dereferenced */
2641
const buf_chunk_t* chunk = buf_pool->chunks;
2642
const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
2644
/* TODO: protect buf_pool->chunks with a mutex (it will
2645
currently remain constant after buf_pool_init()) */
2646
while (chunk < echunk) {
2647
if (ptr >= (void *)chunk->blocks
2648
&& ptr < (void *)(chunk->blocks + chunk->size)) {
2659
/********************************************************************//**
2660
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
2661
the buf_block_t itself or a member of it
2662
@return TRUE if ptr belongs to a buf_block_t struct */
2665
buf_pointer_is_block_field(
2666
/*=======================*/
2667
const void* ptr) /*!< in: pointer not dereferenced */
2671
for (i = 0; i < srv_buf_pool_instances; i++) {
2674
found = buf_pointer_is_block_field_instance(
2675
buf_pool_from_array(i), ptr);
2684
/********************************************************************//**
2685
Find out if a buffer block was created by buf_chunk_init().
2686
@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
2689
buf_block_is_uncompressed(
2690
/*======================*/
2691
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2692
const buf_block_t* block) /*!< in: pointer to block,
2695
ut_ad(buf_pool_mutex_own(buf_pool));
2697
if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
2698
/* The pointer should be aligned. */
2702
return(buf_pointer_is_block_field_instance(buf_pool, (void *)block));
2705
/********************************************************************//**
2706
This is the general function used to get access to a database page.
2707
@return pointer to the block or NULL */
2712
ulint space, /*!< in: space id */
2713
ulint zip_size,/*!< in: compressed page size in bytes
2714
or 0 for uncompressed pages */
2715
ulint offset, /*!< in: page number */
2716
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
2717
buf_block_t* guess, /*!< in: guessed block or NULL */
2718
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
2719
BUF_GET_NO_LATCH, or
2720
BUF_GET_IF_IN_POOL_OR_WATCH */
2721
const char* file, /*!< in: file name */
2722
ulint line, /*!< in: line where called */
2723
mtr_t* mtr) /*!< in: mini-transaction */
2727
unsigned access_time;
2731
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2734
ut_ad(mtr->state == MTR_ACTIVE);
2735
ut_ad((rw_latch == RW_S_LATCH)
2736
|| (rw_latch == RW_X_LATCH)
2737
|| (rw_latch == RW_NO_LATCH));
2738
ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
2739
ut_ad(mode == BUF_GET
2740
|| mode == BUF_GET_IF_IN_POOL
2741
|| mode == BUF_GET_NO_LATCH
2742
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH);
2743
ut_ad(zip_size == fil_space_get_zip_size(space));
2744
ut_ad(ut_is_2pow(zip_size));
2745
#ifndef UNIV_LOG_DEBUG
2746
ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
2748
buf_pool->stat.n_page_gets++;
2749
fold = buf_page_address_fold(space, offset);
2752
buf_pool_mutex_enter(buf_pool);
2755
/* If the guess is a compressed page descriptor that
2756
has been allocated by buf_buddy_alloc(), it may have
2757
been invalidated by buf_buddy_relocate(). In that
2758
case, block could point to something that happens to
2759
contain the expected bits in block->page. Similarly,
2760
the guess may be pointing to a buffer pool chunk that
2761
has been released when resizing the buffer pool. */
2763
if (!buf_block_is_uncompressed(buf_pool, block)
2764
|| offset != block->page.offset
2765
|| space != block->page.space
2766
|| buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2768
block = guess = NULL;
2770
ut_ad(!block->page.in_zip_hash);
2771
ut_ad(block->page.in_page_hash);
2775
if (block == NULL) {
2776
block = (buf_block_t*) buf_page_hash_get_low(
2777
buf_pool, space, offset, fold);
2781
if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
2785
if (block == NULL) {
2786
/* Page not in buf_pool: needs to be read from file */
2788
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2789
block = (buf_block_t*) buf_pool_watch_set(
2790
space, offset, fold);
2792
if (UNIV_LIKELY_NULL(block)) {
2798
buf_pool_mutex_exit(buf_pool);
2800
if (mode == BUF_GET_IF_IN_POOL
2801
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2806
if (buf_read_page(space, zip_size, offset)) {
2808
} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
2811
fprintf(stderr, "InnoDB: Error: Unable"
2812
" to read tablespace %lu page no"
2813
" %lu into the buffer pool after"
2815
"InnoDB: The most probable cause"
2816
" of this error may be that the"
2817
" table has been corrupted.\n"
2818
"InnoDB: You can try to fix this"
2820
" innodb_force_recovery.\n"
2821
"InnoDB: Please see reference manual"
2822
" for more details.\n"
2823
"InnoDB: Aborting...\n",
2825
BUF_PAGE_READ_MAX_RETRIES);
2830
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2831
ut_a(++buf_dbg_counter % 37 || buf_validate());
2832
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2837
ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
2839
must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
2841
if (must_read && mode == BUF_GET_IF_IN_POOL) {
2843
/* The page is being read to buffer pool,
2844
but we cannot wait around for the read to
2846
buf_pool_mutex_exit(buf_pool);
2851
switch (buf_block_get_state(block)) {
2855
case BUF_BLOCK_FILE_PAGE:
2858
case BUF_BLOCK_ZIP_PAGE:
2859
case BUF_BLOCK_ZIP_DIRTY:
2860
bpage = &block->page;
2861
/* Protect bpage->buf_fix_count. */
2862
mutex_enter(&buf_pool->zip_mutex);
2864
if (bpage->buf_fix_count
2865
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
2866
/* This condition often occurs when the buffer
2867
is not buffer-fixed, but I/O-fixed by
2868
buf_page_init_for_read(). */
2869
mutex_exit(&buf_pool->zip_mutex);
2871
/* The block is buffer-fixed or I/O-fixed.
2873
buf_pool_mutex_exit(buf_pool);
2874
os_thread_sleep(WAIT_FOR_READ);
2879
/* Allocate an uncompressed page. */
2880
buf_pool_mutex_exit(buf_pool);
2881
mutex_exit(&buf_pool->zip_mutex);
2883
block = buf_LRU_get_free_block(buf_pool, 0);
2886
buf_pool_mutex_enter(buf_pool);
2887
mutex_enter(&block->mutex);
2890
buf_page_t* hash_bpage;
2892
hash_bpage = buf_page_hash_get_low(
2893
buf_pool, space, offset, fold);
2895
if (UNIV_UNLIKELY(bpage != hash_bpage)) {
2896
/* The buf_pool->page_hash was modified
2897
while buf_pool->mutex was released.
2898
Free the block that was allocated. */
2900
buf_LRU_block_free_non_file_page(block);
2901
mutex_exit(&block->mutex);
2903
block = (buf_block_t*) hash_bpage;
2909
(bpage->buf_fix_count
2910
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
2912
/* The block was buffer-fixed or I/O-fixed
2913
while buf_pool->mutex was not held by this thread.
2914
Free the block that was allocated and try again.
2915
This should be extremely unlikely. */
2917
buf_LRU_block_free_non_file_page(block);
2918
mutex_exit(&block->mutex);
2920
goto wait_until_unfixed;
2923
/* Move the compressed page from bpage to block,
2924
and uncompress it. */
2926
mutex_enter(&buf_pool->zip_mutex);
2928
buf_relocate(bpage, &block->page);
2929
buf_block_init_low(block);
2930
block->lock_hash_val = lock_rec_hash(space, offset);
2932
UNIV_MEM_DESC(&block->page.zip.data,
2933
page_zip_get_size(&block->page.zip), block);
2935
if (buf_page_get_state(&block->page)
2936
== BUF_BLOCK_ZIP_PAGE) {
2937
UT_LIST_REMOVE(list, buf_pool->zip_clean,
2939
ut_ad(!block->page.in_flush_list);
2941
/* Relocate buf_pool->flush_list. */
2942
buf_flush_relocate_on_flush_list(bpage,
2946
/* Buffer-fix, I/O-fix, and X-latch the block
2947
for the duration of the decompression.
2948
Also add the block to the unzip_LRU list. */
2949
block->page.state = BUF_BLOCK_FILE_PAGE;
2951
/* Insert at the front of unzip_LRU list */
2952
buf_unzip_LRU_add_block(block, FALSE);
2954
block->page.buf_fix_count = 1;
2955
buf_block_set_io_fix(block, BUF_IO_READ);
2956
rw_lock_x_lock_func(&block->lock, 0, file, line);
2958
UNIV_MEM_INVALID(bpage, sizeof *bpage);
2960
mutex_exit(&block->mutex);
2961
mutex_exit(&buf_pool->zip_mutex);
2962
buf_pool->n_pend_unzip++;
2964
buf_buddy_free(buf_pool, bpage, sizeof *bpage);
2966
buf_pool_mutex_exit(buf_pool);
2968
/* Decompress the page and apply buffered operations
2969
while not holding buf_pool->mutex or block->mutex. */
2970
success = buf_zip_decompress(block, srv_use_checksums);
2973
if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
2974
ibuf_merge_or_delete_for_page(block, space, offset,
2978
/* Unfix and unlatch the block. */
2979
buf_pool_mutex_enter(buf_pool);
2980
mutex_enter(&block->mutex);
2981
block->page.buf_fix_count--;
2982
buf_block_set_io_fix(block, BUF_IO_NONE);
2983
mutex_exit(&block->mutex);
2984
buf_pool->n_pend_unzip--;
2985
rw_lock_x_unlock(&block->lock);
2989
case BUF_BLOCK_ZIP_FREE:
2990
case BUF_BLOCK_NOT_USED:
2991
case BUF_BLOCK_READY_FOR_USE:
2992
case BUF_BLOCK_MEMORY:
2993
case BUF_BLOCK_REMOVE_HASH:
2998
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3000
mutex_enter(&block->mutex);
3001
#if UNIV_WORD_SIZE == 4
3002
/* On 32-bit systems, there is no padding in buf_page_t. On
3003
other systems, Valgrind could complain about uninitialized pad
3005
UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
3007
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
3008
if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)
3010
/* Try to evict the block from the buffer pool, to use the
3011
insert buffer (change buffer) as much as possible. */
3013
if (buf_LRU_free_block(&block->page, TRUE, NULL)
3015
mutex_exit(&block->mutex);
3016
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
3017
/* Set the watch, as it would have
3018
been set if the page were not in the
3019
buffer pool in the first place. */
3020
block = (buf_block_t*) buf_pool_watch_set(
3021
space, offset, fold);
3023
if (UNIV_LIKELY_NULL(block)) {
3025
/* The page entered the buffer
3026
pool for some reason. Try to
3031
buf_pool_mutex_exit(buf_pool);
3033
"innodb_change_buffering_debug evict %u %u\n",
3034
(unsigned) space, (unsigned) offset);
3036
} else if (buf_flush_page_try(buf_pool, block)) {
3038
"innodb_change_buffering_debug flush %u %u\n",
3039
(unsigned) space, (unsigned) offset);
3044
/* Failed to evict the page; change it directly */
3046
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
3048
buf_block_buf_fix_inc(block, file, line);
3050
mutex_exit(&block->mutex);
3052
/* Check if this is the first access to the page */
3054
access_time = buf_page_is_accessed(&block->page);
3056
buf_pool_mutex_exit(buf_pool);
3058
buf_page_set_accessed_make_young(&block->page, access_time);
3060
#ifdef UNIV_DEBUG_FILE_ACCESSES
3061
ut_a(!block->page.file_page_was_freed);
3064
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3065
ut_a(++buf_dbg_counter % 5771 || buf_validate());
3066
ut_a(block->page.buf_fix_count > 0);
3067
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3068
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3073
/* Let us wait until the read operation
3077
enum buf_io_fix io_fix;
3079
mutex_enter(&block->mutex);
3080
io_fix = buf_block_get_io_fix(block);
3081
mutex_exit(&block->mutex);
3083
if (io_fix == BUF_IO_READ) {
3085
os_thread_sleep(WAIT_FOR_READ);
3092
fix_type = MTR_MEMO_BUF_FIX;
3096
rw_lock_s_lock_func(&(block->lock), 0, file, line);
3098
fix_type = MTR_MEMO_PAGE_S_FIX;
3102
ut_ad(rw_latch == RW_X_LATCH);
3103
rw_lock_x_lock_func(&(block->lock), 0, file, line);
3105
fix_type = MTR_MEMO_PAGE_X_FIX;
3109
mtr_memo_push(mtr, block, fix_type);
3112
/* In the case of a first access, try to apply linear
3115
buf_read_ahead_linear(space, zip_size, offset);
3118
#ifdef UNIV_IBUF_COUNT_DEBUG
3119
ut_a(ibuf_count_get(buf_block_get_space(block),
3120
buf_block_get_page_no(block)) == 0);
3125
/********************************************************************//**
3126
This is the general function used to get optimistic access to a database
3128
@return TRUE if success */
3131
buf_page_optimistic_get(
3132
/*====================*/
3133
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
3134
buf_block_t* block, /*!< in: guessed buffer block */
3135
ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
3136
..._GUESS_ON_CLOCK */
3137
const char* file, /*!< in: file name */
3138
ulint line, /*!< in: line where called */
3139
mtr_t* mtr) /*!< in: mini-transaction */
3141
buf_pool_t* buf_pool;
3142
unsigned access_time;
3148
ut_ad(mtr->state == MTR_ACTIVE);
3149
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3151
mutex_enter(&block->mutex);
3153
if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
3155
mutex_exit(&block->mutex);
3160
buf_block_buf_fix_inc(block, file, line);
3162
mutex_exit(&block->mutex);
3164
/* Check if this is the first access to the page.
3165
We do a dirty read on purpose, to avoid mutex contention.
3166
This field is only used for heuristic purposes; it does not
3167
affect correctness. */
3169
access_time = buf_page_is_accessed(&block->page);
3170
buf_page_set_accessed_make_young(&block->page, access_time);
3172
ut_ad(!ibuf_inside()
3173
|| ibuf_page(buf_block_get_space(block),
3174
buf_block_get_zip_size(block),
3175
buf_block_get_page_no(block), NULL));
3177
if (rw_latch == RW_S_LATCH) {
3178
success = rw_lock_s_lock_nowait(&(block->lock),
3180
fix_type = MTR_MEMO_PAGE_S_FIX;
3182
success = rw_lock_x_lock_func_nowait(&(block->lock),
3184
fix_type = MTR_MEMO_PAGE_X_FIX;
3187
if (UNIV_UNLIKELY(!success)) {
3188
mutex_enter(&block->mutex);
3189
buf_block_buf_fix_dec(block);
3190
mutex_exit(&block->mutex);
3195
if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
3196
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3198
if (rw_latch == RW_S_LATCH) {
3199
rw_lock_s_unlock(&(block->lock));
3201
rw_lock_x_unlock(&(block->lock));
3204
mutex_enter(&block->mutex);
3205
buf_block_buf_fix_dec(block);
3206
mutex_exit(&block->mutex);
3211
mtr_memo_push(mtr, block, fix_type);
3213
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3214
ut_a(++buf_dbg_counter % 5771 || buf_validate());
3215
ut_a(block->page.buf_fix_count > 0);
3216
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3217
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3219
#ifdef UNIV_DEBUG_FILE_ACCESSES
3220
ut_a(block->page.file_page_was_freed == FALSE);
3222
if (UNIV_UNLIKELY(!access_time)) {
3223
/* In the case of a first access, try to apply linear
3226
buf_read_ahead_linear(buf_block_get_space(block),
3227
buf_block_get_zip_size(block),
3228
buf_block_get_page_no(block));
3231
#ifdef UNIV_IBUF_COUNT_DEBUG
3232
ut_a(ibuf_count_get(buf_block_get_space(block),
3233
buf_block_get_page_no(block)) == 0);
3235
buf_pool = buf_pool_from_block(block);
3236
buf_pool->stat.n_page_gets++;
3241
/********************************************************************//**
3242
This is used to get access to a known database page, when no waiting can be
3243
done. For example, if a search in an adaptive hash index leads us to this
3245
@return TRUE if success */
3248
buf_page_get_known_nowait(
3249
/*======================*/
3250
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
3251
buf_block_t* block, /*!< in: the known page */
3252
ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
3253
const char* file, /*!< in: file name */
3254
ulint line, /*!< in: line where called */
3255
mtr_t* mtr) /*!< in: mini-transaction */
3257
buf_pool_t* buf_pool;
3262
ut_ad(mtr->state == MTR_ACTIVE);
3263
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3265
mutex_enter(&block->mutex);
3267
if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
3268
/* Another thread is just freeing the block from the LRU list
3269
of the buffer pool: do not try to access this page; this
3270
attempt to access the page can only come through the hash
3271
index because when the buffer block state is ..._REMOVE_HASH,
3272
we have already removed it from the page address hash table
3273
of the buffer pool. */
3275
mutex_exit(&block->mutex);
3280
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3282
buf_block_buf_fix_inc(block, file, line);
3284
mutex_exit(&block->mutex);
3286
buf_pool = buf_pool_from_block(block);
3288
if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
3289
buf_pool_mutex_enter(buf_pool);
3290
buf_LRU_make_block_young(&block->page);
3291
buf_pool_mutex_exit(buf_pool);
3292
} else if (!buf_page_is_accessed(&block->page)) {
3293
/* Above, we do a dirty read on purpose, to avoid
3294
mutex contention. The field buf_page_t::access_time
3295
is only used for heuristic purposes. Writes to the
3296
field must be protected by mutex, however. */
3297
ulint time_ms = ut_time_ms();
3299
buf_pool_mutex_enter(buf_pool);
3300
buf_page_set_accessed(&block->page, time_ms);
3301
buf_pool_mutex_exit(buf_pool);
3304
ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
3306
if (rw_latch == RW_S_LATCH) {
3307
success = rw_lock_s_lock_nowait(&(block->lock),
3309
fix_type = MTR_MEMO_PAGE_S_FIX;
3311
success = rw_lock_x_lock_func_nowait(&(block->lock),
3313
fix_type = MTR_MEMO_PAGE_X_FIX;
3317
mutex_enter(&block->mutex);
3318
buf_block_buf_fix_dec(block);
3319
mutex_exit(&block->mutex);
3324
mtr_memo_push(mtr, block, fix_type);
3326
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3327
ut_a(++buf_dbg_counter % 5771 || buf_validate());
3328
ut_a(block->page.buf_fix_count > 0);
3329
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3330
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3331
#ifdef UNIV_DEBUG_FILE_ACCESSES
3332
ut_a(block->page.file_page_was_freed == FALSE);
3335
#ifdef UNIV_IBUF_COUNT_DEBUG
3336
ut_a((mode == BUF_KEEP_OLD)
3337
|| (ibuf_count_get(buf_block_get_space(block),
3338
buf_block_get_page_no(block)) == 0));
3340
buf_pool->stat.n_page_gets++;
3345
/*******************************************************************//**
3346
Given a tablespace id and page number tries to get that page. If the
3347
page is not in the buffer pool it is not loaded and NULL is returned.
3348
Suitable for using when holding the kernel mutex.
3349
@return pointer to a page or NULL */
3352
buf_page_try_get_func(
3353
/*==================*/
3354
ulint space_id,/*!< in: tablespace id */
3355
ulint page_no,/*!< in: page number */
3356
const char* file, /*!< in: file name */
3357
ulint line, /*!< in: line where called */
3358
mtr_t* mtr) /*!< in: mini-transaction */
3363
buf_pool_t* buf_pool = buf_pool_get(space_id, page_no);
3366
ut_ad(mtr->state == MTR_ACTIVE);
3368
buf_pool_mutex_enter(buf_pool);
3369
block = buf_block_hash_get(buf_pool, space_id, page_no);
3371
if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
3372
buf_pool_mutex_exit(buf_pool);
3376
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
3378
mutex_enter(&block->mutex);
3379
buf_pool_mutex_exit(buf_pool);
3381
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3382
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3383
ut_a(buf_block_get_space(block) == space_id);
3384
ut_a(buf_block_get_page_no(block) == page_no);
3385
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3387
buf_block_buf_fix_inc(block, file, line);
3388
mutex_exit(&block->mutex);
3390
fix_type = MTR_MEMO_PAGE_S_FIX;
3391
success = rw_lock_s_lock_nowait(&block->lock, file, line);
3394
/* Let us try to get an X-latch. If the current thread
3395
is holding an X-latch on the page, we cannot get an
3398
fix_type = MTR_MEMO_PAGE_X_FIX;
3399
success = rw_lock_x_lock_func_nowait(&block->lock,
3404
mutex_enter(&block->mutex);
3405
buf_block_buf_fix_dec(block);
3406
mutex_exit(&block->mutex);
3411
mtr_memo_push(mtr, block, fix_type);
3412
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3413
ut_a(++buf_dbg_counter % 5771 || buf_validate());
3414
ut_a(block->page.buf_fix_count > 0);
3415
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3416
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3417
#ifdef UNIV_DEBUG_FILE_ACCESSES
3418
ut_a(block->page.file_page_was_freed == FALSE);
3419
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3420
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3422
buf_pool->stat.n_page_gets++;
3424
#ifdef UNIV_IBUF_COUNT_DEBUG
3425
ut_a(ibuf_count_get(buf_block_get_space(block),
3426
buf_block_get_page_no(block)) == 0);
3432
/********************************************************************//**
3433
Initialize some fields of a control block. */
3438
buf_page_t* bpage) /*!< in: block to init */
3440
bpage->flush_type = BUF_FLUSH_LRU;
3441
bpage->io_fix = BUF_IO_NONE;
3442
bpage->buf_fix_count = 0;
3443
bpage->freed_page_clock = 0;
3444
bpage->access_time = 0;
3445
bpage->newest_modification = 0;
3446
bpage->oldest_modification = 0;
3447
HASH_INVALIDATE(bpage, hash);
3448
#ifdef UNIV_DEBUG_FILE_ACCESSES
3449
bpage->file_page_was_freed = FALSE;
3450
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3453
/********************************************************************//**
3454
Inits a page to the buffer buf_pool. */
3459
ulint space, /*!< in: space id */
3460
ulint offset, /*!< in: offset of the page within space
3461
in units of a page */
3462
ulint fold, /*!< in: buf_page_address_fold(space,offset) */
3463
buf_block_t* block) /*!< in: block to init */
3465
buf_page_t* hash_page;
3466
buf_pool_t* buf_pool = buf_pool_get(space, offset);
3468
ut_ad(buf_pool_mutex_own(buf_pool));
3469
ut_ad(mutex_own(&(block->mutex)));
3470
ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3472
/* Set the state of the block */
3473
buf_block_set_file_page(block, space, offset);
3475
#ifdef UNIV_DEBUG_VALGRIND
3477
/* Silence valid Valgrind warnings about uninitialized
3478
data being written to data files. There are some unused
3479
bytes on some pages that InnoDB does not initialize. */
3480
UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
3482
#endif /* UNIV_DEBUG_VALGRIND */
3484
buf_block_init_low(block);
3486
block->lock_hash_val = lock_rec_hash(space, offset);
3488
buf_page_init_low(&block->page);
3490
/* Insert into the hash table of file pages */
3492
hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3494
if (UNIV_LIKELY(!hash_page)) {
3495
} else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
3496
/* Preserve the reference count. */
3497
ulint buf_fix_count = hash_page->buf_fix_count;
3499
ut_a(buf_fix_count > 0);
3500
block->page.buf_fix_count += buf_fix_count;
3501
buf_pool_watch_remove(buf_pool, fold, hash_page);
3504
"InnoDB: Error: page %lu %lu already found"
3505
" in the hash table: %p, %p\n",
3508
(const void*) hash_page, (const void*) block);
3509
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3510
mutex_exit(&block->mutex);
3511
buf_pool_mutex_exit(buf_pool);
3516
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3520
ut_ad(!block->page.in_zip_hash);
3521
ut_ad(!block->page.in_page_hash);
3522
ut_d(block->page.in_page_hash = TRUE);
3523
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
3524
fold, &block->page);
3527
/********************************************************************//**
3528
Function which inits a page for read to the buffer buf_pool. If the page is
3529
(1) already in buf_pool, or
3530
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
3531
(3) if the space is deleted or being deleted,
3532
then this function does nothing.
3533
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
3534
on the buffer frame. The io-handler must take care that the flag is cleared
3535
and the lock released later.
3536
@return pointer to the block or NULL */
3539
buf_page_init_for_read(
3540
/*===================*/
3541
ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
3542
ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
3543
ulint space, /*!< in: space id */
3544
ulint zip_size,/*!< in: compressed page size, or 0 */
3545
ibool unzip, /*!< in: TRUE=request uncompressed page */
3546
ib_int64_t tablespace_version,
3547
/*!< in: prevents reading from a wrong
3548
version of the tablespace in case we have done
3550
ulint offset) /*!< in: page number */
3553
buf_page_t* bpage = NULL;
3554
buf_page_t* watch_page;
3559
buf_pool_t* buf_pool = buf_pool_get(space, offset);
3565
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3566
/* It is a read-ahead within an ibuf routine */
3568
ut_ad(!ibuf_bitmap_page(zip_size, offset));
3569
ut_ad(ibuf_inside());
3573
if (!recv_no_ibuf_operations
3574
&& !ibuf_page(space, zip_size, offset, &mtr)) {
3581
ut_ad(mode == BUF_READ_ANY_PAGE);
3584
if (zip_size && UNIV_LIKELY(!unzip)
3585
&& UNIV_LIKELY(!recv_recovery_is_on())) {
3588
block = buf_LRU_get_free_block(buf_pool, 0);
3590
ut_ad(buf_pool_from_block(block) == buf_pool);
3593
fold = buf_page_address_fold(space, offset);
3595
buf_pool_mutex_enter(buf_pool);
3597
watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3598
if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
3599
/* The page is already in the buffer pool. */
3603
mutex_enter(&block->mutex);
3604
buf_LRU_block_free_non_file_page(block);
3605
mutex_exit(&block->mutex);
3612
if (fil_tablespace_deleted_or_being_deleted_in_mem(
3613
space, tablespace_version)) {
3614
/* The page belongs to a space which has been
3615
deleted or is being deleted. */
3616
*err = DB_TABLESPACE_DELETED;
3622
bpage = &block->page;
3623
mutex_enter(&block->mutex);
3625
ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
3627
buf_page_init(space, offset, fold, block);
3629
/* The block must be put to the LRU list, to the old blocks */
3630
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3632
/* We set a pass-type x-lock on the frame because then
3633
the same thread which called for the read operation
3634
(and is running now at this point of code) can wait
3635
for the read to complete by waiting for the x-lock on
3636
the frame; if the x-lock were recursive, the same
3637
thread would illegally get the x-lock before the page
3638
read is completed. The x-lock is cleared by the
3639
io-handler thread. */
3641
rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
3642
buf_page_set_io_fix(bpage, BUF_IO_READ);
3644
if (UNIV_UNLIKELY(zip_size)) {
3645
page_zip_set_size(&block->page.zip, zip_size);
3647
/* buf_pool->mutex may be released and
3648
reacquired by buf_buddy_alloc(). Thus, we
3649
must release block->mutex in order not to
3650
break the latching order in the reacquisition
3651
of buf_pool->mutex. We also must defer this
3652
operation until after the block descriptor has
3653
been added to buf_pool->LRU and
3654
buf_pool->page_hash. */
3655
mutex_exit(&block->mutex);
3656
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3657
mutex_enter(&block->mutex);
3658
block->page.zip.data = static_cast<unsigned char *>(data);
3660
/* To maintain the invariant
3661
block->in_unzip_LRU_list
3662
== buf_page_belongs_to_unzip_LRU(&block->page)
3663
we have to add this block to unzip_LRU
3664
after block->page.zip.data is set. */
3665
ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
3666
buf_unzip_LRU_add_block(block, TRUE);
3669
mutex_exit(&block->mutex);
3671
/* Defer buf_buddy_alloc() until after the block has
3672
been found not to exist. The buf_buddy_alloc() and
3673
buf_buddy_free() calls may be expensive because of
3674
buf_buddy_relocate(). */
3676
/* The compressed page must be allocated before the
3677
control block (bpage), in order to avoid the
3678
invocation of buf_buddy_relocate_block() on
3679
uninitialized data. */
3680
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3681
bpage = static_cast<buf_page_struct *>(buf_buddy_alloc(buf_pool, sizeof *bpage, &lru));
3683
/* Initialize the buf_pool pointer. */
3684
bpage->buf_pool_index = buf_pool_index(buf_pool);
3686
/* If buf_buddy_alloc() allocated storage from the LRU list,
3687
it released and reacquired buf_pool->mutex. Thus, we must
3688
check the page_hash again, as it may have been modified. */
3689
if (UNIV_UNLIKELY(lru)) {
3691
watch_page = buf_page_hash_get_low(
3692
buf_pool, space, offset, fold);
3695
&& !buf_pool_watch_is_sentinel(buf_pool,
3698
/* The block was added by some other thread. */
3700
buf_buddy_free(buf_pool, bpage, sizeof *bpage);
3701
buf_buddy_free(buf_pool, data, zip_size);
3708
page_zip_des_init(&bpage->zip);
3709
page_zip_set_size(&bpage->zip, zip_size);
3710
bpage->zip.data = static_cast<unsigned char *>(data);
3712
mutex_enter(&buf_pool->zip_mutex);
3713
UNIV_MEM_DESC(bpage->zip.data,
3714
page_zip_get_size(&bpage->zip), bpage);
3716
buf_page_init_low(bpage);
3718
bpage->state = BUF_BLOCK_ZIP_PAGE;
3719
bpage->space = space;
3720
bpage->offset = offset;
3724
bpage->in_page_hash = FALSE;
3725
bpage->in_zip_hash = FALSE;
3726
bpage->in_flush_list = FALSE;
3727
bpage->in_free_list = FALSE;
3728
bpage->in_LRU_list = FALSE;
3729
#endif /* UNIV_DEBUG */
3731
ut_d(bpage->in_page_hash = TRUE);
3733
if (UNIV_LIKELY_NULL(watch_page)) {
3734
/* Preserve the reference count. */
3735
ulint buf_fix_count = watch_page->buf_fix_count;
3736
ut_a(buf_fix_count > 0);
3737
bpage->buf_fix_count += buf_fix_count;
3738
ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
3739
buf_pool_watch_remove(buf_pool, fold, watch_page);
3742
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
3745
/* The block must be put to the LRU list, to the old blocks */
3746
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3747
buf_LRU_insert_zip_clean(bpage);
3749
buf_page_set_io_fix(bpage, BUF_IO_READ);
3751
mutex_exit(&buf_pool->zip_mutex);
3754
buf_pool->n_pend_reads++;
3756
buf_pool_mutex_exit(buf_pool);
3758
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3763
ut_ad(!bpage || buf_page_in_file(bpage));
3767
/********************************************************************//**
3768
Initializes a page to the buffer buf_pool. The page is usually not read
3769
from a file even if it cannot be found in the buffer buf_pool. This is one
3770
of the functions which perform to a block a state transition NOT_USED =>
3771
FILE_PAGE (the other is buf_page_get_gen).
3772
@return pointer to the block, page bufferfixed */
3777
ulint space, /*!< in: space id */
3778
ulint offset, /*!< in: offset of the page within space in units of
3780
ulint zip_size,/*!< in: compressed page size, or 0 */
3781
mtr_t* mtr) /*!< in: mini-transaction handle */
3786
buf_block_t* free_block = NULL;
3787
ulint time_ms = ut_time_ms();
3788
buf_pool_t* buf_pool = buf_pool_get(space, offset);
3791
ut_ad(mtr->state == MTR_ACTIVE);
3792
ut_ad(space || !zip_size);
3794
free_block = buf_LRU_get_free_block(buf_pool, 0);
3796
fold = buf_page_address_fold(space, offset);
3798
buf_pool_mutex_enter(buf_pool);
3800
block = (buf_block_t*) buf_page_hash_get_low(
3801
buf_pool, space, offset, fold);
3804
&& buf_page_in_file(&block->page)
3805
&& !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
3806
#ifdef UNIV_IBUF_COUNT_DEBUG
3807
ut_a(ibuf_count_get(space, offset) == 0);
3809
#ifdef UNIV_DEBUG_FILE_ACCESSES
3810
block->page.file_page_was_freed = FALSE;
3811
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3813
/* Page can be found in buf_pool */
3814
buf_pool_mutex_exit(buf_pool);
3816
buf_block_free(free_block);
3818
return(buf_page_get_with_no_latch(space, zip_size,
3822
/* If we get here, the page was not in buf_pool: init it there */
3825
if (buf_debug_prints) {
3826
fprintf(stderr, "Creating space %lu page %lu to buffer\n",
3827
(ulong) space, (ulong) offset);
3829
#endif /* UNIV_DEBUG */
3833
mutex_enter(&block->mutex);
3835
buf_page_init(space, offset, fold, block);
3837
/* The block must be put to the LRU list */
3838
buf_LRU_add_block(&block->page, FALSE);
3840
buf_block_buf_fix_inc(block, __FILE__, __LINE__);
3841
buf_pool->stat.n_pages_created++;
3847
/* Prevent race conditions during buf_buddy_alloc(),
3848
which may release and reacquire buf_pool->mutex,
3849
by IO-fixing and X-latching the block. */
3851
buf_page_set_io_fix(&block->page, BUF_IO_READ);
3852
rw_lock_x_lock(&block->lock);
3854
page_zip_set_size(&block->page.zip, zip_size);
3855
mutex_exit(&block->mutex);
3856
/* buf_pool->mutex may be released and reacquired by
3857
buf_buddy_alloc(). Thus, we must release block->mutex
3858
in order not to break the latching order in
3859
the reacquisition of buf_pool->mutex. We also must
3860
defer this operation until after the block descriptor
3861
has been added to buf_pool->LRU and buf_pool->page_hash. */
3862
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3863
mutex_enter(&block->mutex);
3864
block->page.zip.data = static_cast<unsigned char *>(data);
3866
/* To maintain the invariant
3867
block->in_unzip_LRU_list
3868
== buf_page_belongs_to_unzip_LRU(&block->page)
3869
we have to add this block to unzip_LRU after
3870
block->page.zip.data is set. */
3871
ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
3872
buf_unzip_LRU_add_block(block, FALSE);
3874
buf_page_set_io_fix(&block->page, BUF_IO_NONE);
3875
rw_lock_x_unlock(&block->lock);
3878
buf_page_set_accessed(&block->page, time_ms);
3880
buf_pool_mutex_exit(buf_pool);
3882
mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
3884
mutex_exit(&block->mutex);
3886
/* Delete possible entries for the page from the insert buffer:
3887
such can exist if the page belonged to an index which was dropped */
3889
ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
3891
/* Flush pages from the end of the LRU list if necessary */
3892
buf_flush_free_margin(buf_pool);
3894
frame = block->frame;
3896
memset(frame + FIL_PAGE_PREV, 0xff, 4);
3897
memset(frame + FIL_PAGE_NEXT, 0xff, 4);
3898
mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
3900
/* Reset to zero the file flush lsn field in the page; if the first
3901
page of an ibdata file is 'created' in this function into the buffer
3902
pool then we lose the original contents of the file flush lsn stamp.
3903
Then InnoDB could in a crash recovery print a big, false, corruption
3904
warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
3906
memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
3908
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3909
ut_a(++buf_dbg_counter % 357 || buf_validate());
3910
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3911
#ifdef UNIV_IBUF_COUNT_DEBUG
3912
ut_a(ibuf_count_get(buf_block_get_space(block),
3913
buf_block_get_page_no(block)) == 0);
3918
/********************************************************************//**
3919
Completes an asynchronous read or write request of a file page to or from
3923
buf_page_io_complete(
3924
/*=================*/
3925
buf_page_t* bpage) /*!< in: pointer to the block in question */
3927
enum buf_io_fix io_type;
3928
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3929
const ibool uncompressed = (buf_page_get_state(bpage)
3930
== BUF_BLOCK_FILE_PAGE);
3932
ut_a(buf_page_in_file(bpage));
3934
/* We do not need protect io_fix here by mutex to read
3935
it because this is the only function where we can change the value
3936
from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
3937
ensures that this is the only thread that handles the i/o for this
3940
io_type = buf_page_get_io_fix(bpage);
3941
ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
3943
if (io_type == BUF_IO_READ) {
3945
ulint read_space_id;
3948
if (buf_page_get_zip_size(bpage)) {
3949
frame = bpage->zip.data;
3950
buf_pool->n_pend_unzip++;
3952
&& !buf_zip_decompress((buf_block_t*) bpage,
3955
buf_pool->n_pend_unzip--;
3958
buf_pool->n_pend_unzip--;
3961
frame = ((buf_block_t*) bpage)->frame;
3964
/* If this page is not uninitialized and not in the
3965
doublewrite buffer, then the page number and space id
3966
should be the same as in block. */
3967
read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
3968
read_space_id = mach_read_from_4(
3969
frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
3971
if (bpage->space == TRX_SYS_SPACE
3972
&& trx_doublewrite_page_inside(bpage->offset)) {
3974
ut_print_timestamp(stderr);
3976
" InnoDB: Error: reading page %lu\n"
3977
"InnoDB: which is in the"
3978
" doublewrite buffer!\n",
3979
(ulong) bpage->offset);
3980
} else if (!read_space_id && !read_page_no) {
3981
/* This is likely an uninitialized page. */
3982
} else if ((bpage->space
3983
&& bpage->space != read_space_id)
3984
|| bpage->offset != read_page_no) {
3985
/* We did not compare space_id to read_space_id
3986
if bpage->space == 0, because the field on the
3987
page may contain garbage in MySQL < 4.1.1,
3988
which only supported bpage->space == 0. */
3990
ut_print_timestamp(stderr);
3992
" InnoDB: Error: space id and page n:o"
3993
" stored in the page\n"
3994
"InnoDB: read in are %lu:%lu,"
3995
" should be %lu:%lu!\n",
3996
(ulong) read_space_id, (ulong) read_page_no,
3997
(ulong) bpage->space,
3998
(ulong) bpage->offset);
4001
/* From version 3.23.38 up we store the page checksum
4002
to the 4 first bytes of the page end lsn field */
4004
if (buf_page_is_corrupted(frame,
4005
buf_page_get_zip_size(bpage))) {
4008
"InnoDB: Database page corruption on disk"
4010
"InnoDB: file read of page %lu.\n"
4011
"InnoDB: You may have to recover"
4012
" from a backup.\n",
4013
(ulong) bpage->offset);
4014
buf_page_print(frame, buf_page_get_zip_size(bpage));
4016
"InnoDB: Database page corruption on disk"
4018
"InnoDB: file read of page %lu.\n"
4019
"InnoDB: You may have to recover"
4020
" from a backup.\n",
4021
(ulong) bpage->offset);
4022
fputs("InnoDB: It is also possible that"
4024
"InnoDB: system has corrupted its"
4026
"InnoDB: and rebooting your computer"
4029
"InnoDB: If the corrupt page is an index page\n"
4030
"InnoDB: you can also try to"
4031
" fix the corruption\n"
4032
"InnoDB: by dumping, dropping,"
4033
" and reimporting\n"
4034
"InnoDB: the corrupt table."
4035
" You can use CHECK\n"
4036
"InnoDB: TABLE to scan your"
4037
" table for corruption.\n"
4039
REFMAN "forcing-recovery.html\n"
4040
"InnoDB: about forcing recovery.\n", stderr);
4042
if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
4043
fputs("InnoDB: Ending processing because of"
4044
" a corrupt database page.\n",
4050
if (recv_recovery_is_on()) {
4051
/* Pages must be uncompressed for crash recovery. */
4053
recv_recover_page(TRUE, (buf_block_t*) bpage);
4056
if (uncompressed && !recv_no_ibuf_operations) {
4057
ibuf_merge_or_delete_for_page(
4058
(buf_block_t*) bpage, bpage->space,
4059
bpage->offset, buf_page_get_zip_size(bpage),
4064
buf_pool_mutex_enter(buf_pool);
4065
mutex_enter(buf_page_get_mutex(bpage));
4067
#ifdef UNIV_IBUF_COUNT_DEBUG
4068
if (io_type == BUF_IO_WRITE || uncompressed) {
4069
/* For BUF_IO_READ of compressed-only blocks, the
4070
buffered operations will be merged by buf_page_get_gen()
4071
after the block has been uncompressed. */
4072
ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
4075
/* Because this thread which does the unlocking is not the same that
4076
did the locking, we use a pass value != 0 in unlock, which simply
4077
removes the newest lock debug record, without checking the thread
4080
buf_page_set_io_fix(bpage, BUF_IO_NONE);
4084
/* NOTE that the call to ibuf may have moved the ownership of
4085
the x-latch to this OS thread: do not let this confuse you in
4088
ut_ad(buf_pool->n_pend_reads > 0);
4089
buf_pool->n_pend_reads--;
4090
buf_pool->stat.n_pages_read++;
4093
rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
4100
/* Write means a flush operation: call the completion
4101
routine in the flush system */
4103
buf_flush_write_complete(bpage);
4106
rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
4110
buf_pool->stat.n_pages_written++;
4119
if (buf_debug_prints) {
4120
fprintf(stderr, "Has %s page space %lu page no %lu\n",
4121
io_type == BUF_IO_READ ? "read" : "written",
4122
(ulong) buf_page_get_space(bpage),
4123
(ulong) buf_page_get_page_no(bpage));
4125
#endif /* UNIV_DEBUG */
4127
mutex_exit(buf_page_get_mutex(bpage));
4128
buf_pool_mutex_exit(buf_pool);
4131
/*********************************************************************//**
4132
Asserts that all file pages in the buffer are in a replaceable state.
4136
buf_all_freed_instance(
4137
/*===================*/
4138
buf_pool_t* buf_pool) /*!< in: buffer pool instancce */
4145
buf_pool_mutex_enter(buf_pool);
4147
chunk = buf_pool->chunks;
4149
for (i = buf_pool->n_chunks; i--; chunk++) {
4151
const buf_block_t* block = buf_chunk_not_freed(chunk);
4153
if (UNIV_LIKELY_NULL(block)) {
4155
"Page %lu %lu still fixed or dirty\n",
4156
(ulong) block->page.space,
4157
(ulong) block->page.offset);
4162
buf_pool_mutex_exit(buf_pool);
4167
/*********************************************************************//**
4168
Invalidates file pages in one buffer pool instance */
4171
buf_pool_invalidate_instance(
4172
/*=========================*/
4173
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
4178
buf_pool_mutex_enter(buf_pool);
4180
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
4182
/* As this function is called during startup and
4183
during redo application phase during recovery, InnoDB
4184
is single threaded (apart from IO helper threads) at
4185
this stage. No new write batch can be in intialization
4186
stage at this point. */
4187
ut_ad(buf_pool->init_flush[i] == FALSE);
4189
/* However, it is possible that a write batch that has
4190
been posted earlier is still not complete. For buffer
4191
pool invalidation to proceed we must ensure there is NO
4192
write activity happening. */
4193
if (buf_pool->n_flush[i] > 0) {
4194
buf_pool_mutex_exit(buf_pool);
4195
buf_flush_wait_batch_end(buf_pool, static_cast<buf_flush>(i));
4196
buf_pool_mutex_enter(buf_pool);
4200
buf_pool_mutex_exit(buf_pool);
4202
ut_ad(buf_all_freed_instance(buf_pool));
4207
freed = buf_LRU_search_and_free_block(buf_pool, 100);
4210
buf_pool_mutex_enter(buf_pool);
4212
ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
4213
ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
4215
buf_pool->freed_page_clock = 0;
4216
buf_pool->LRU_old = NULL;
4217
buf_pool->LRU_old_len = 0;
4218
buf_pool->LRU_flush_ended = 0;
4220
memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
4221
buf_refresh_io_stats(buf_pool);
4223
buf_pool_mutex_exit(buf_pool);
4226
/*********************************************************************//**
4227
Invalidates the file pages in the buffer pool when an archive recovery is
4228
completed. All the file pages buffered must be in a replaceable state when
4229
this function is called: not latched and not modified. */
4232
buf_pool_invalidate(void)
4233
/*=====================*/
4237
for (i = 0; i < srv_buf_pool_instances; i++) {
4238
buf_pool_invalidate_instance(buf_pool_from_array(i));
4242
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4243
/*********************************************************************//**
4244
Validates data in one buffer pool instance
4248
buf_pool_validate_instance(
4249
/*=======================*/
4250
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
4255
ulint n_single_flush = 0;
4256
ulint n_lru_flush = 0;
4257
ulint n_list_flush = 0;
4265
buf_pool_mutex_enter(buf_pool);
4267
chunk = buf_pool->chunks;
4269
/* Check the uncompressed blocks. */
4271
for (i = buf_pool->n_chunks; i--; chunk++) {
4274
buf_block_t* block = chunk->blocks;
4276
for (j = chunk->size; j--; block++) {
4278
mutex_enter(&block->mutex);
4280
switch (buf_block_get_state(block)) {
4281
case BUF_BLOCK_ZIP_FREE:
4282
case BUF_BLOCK_ZIP_PAGE:
4283
case BUF_BLOCK_ZIP_DIRTY:
4284
/* These should only occur on
4285
zip_clean, zip_free[], or flush_list. */
4289
case BUF_BLOCK_FILE_PAGE:
4290
ut_a(buf_page_hash_get(buf_pool,
4291
buf_block_get_space(
4293
buf_block_get_page_no(
4297
#ifdef UNIV_IBUF_COUNT_DEBUG
4298
ut_a(buf_page_get_io_fix(&block->page)
4300
|| !ibuf_count_get(buf_block_get_space(
4302
buf_block_get_page_no(
4305
switch (buf_page_get_io_fix(&block->page)) {
4310
switch (buf_page_get_flush_type(
4314
ut_a(rw_lock_is_locked(
4318
case BUF_FLUSH_LIST:
4321
case BUF_FLUSH_SINGLE_PAGE:
4332
ut_a(rw_lock_is_locked(&block->lock,
4340
case BUF_BLOCK_NOT_USED:
4344
case BUF_BLOCK_READY_FOR_USE:
4345
case BUF_BLOCK_MEMORY:
4346
case BUF_BLOCK_REMOVE_HASH:
4351
mutex_exit(&block->mutex);
4355
mutex_enter(&buf_pool->zip_mutex);
4357
/* Check clean compressed-only blocks. */
4359
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4360
b = UT_LIST_GET_NEXT(list, b)) {
4361
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
4362
switch (buf_page_get_io_fix(b)) {
4364
/* All clean blocks should be I/O-unfixed. */
4367
/* In buf_LRU_free_block(), we temporarily set
4368
b->io_fix = BUF_IO_READ for a newly allocated
4369
control block in order to prevent
4370
buf_page_get_gen() from decompressing the block. */
4377
/* It is OK to read oldest_modification here because
4378
we have acquired buf_pool->zip_mutex above which acts
4379
as the 'block->mutex' for these bpages. */
4380
ut_a(!b->oldest_modification);
4381
ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4387
/* Check dirty blocks. */
4389
buf_flush_list_mutex_enter(buf_pool);
4390
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4391
b = UT_LIST_GET_NEXT(list, b)) {
4392
ut_ad(b->in_flush_list);
4393
ut_a(b->oldest_modification);
4396
switch (buf_page_get_state(b)) {
4397
case BUF_BLOCK_ZIP_DIRTY:
4400
switch (buf_page_get_io_fix(b)) {
4405
switch (buf_page_get_flush_type(b)) {
4409
case BUF_FLUSH_LIST:
4412
case BUF_FLUSH_SINGLE_PAGE:
4421
case BUF_BLOCK_FILE_PAGE:
4422
/* uncompressed page */
4424
case BUF_BLOCK_ZIP_FREE:
4425
case BUF_BLOCK_ZIP_PAGE:
4426
case BUF_BLOCK_NOT_USED:
4427
case BUF_BLOCK_READY_FOR_USE:
4428
case BUF_BLOCK_MEMORY:
4429
case BUF_BLOCK_REMOVE_HASH:
4433
ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4436
ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
4438
buf_flush_list_mutex_exit(buf_pool);
4440
mutex_exit(&buf_pool->zip_mutex);
4442
if (n_lru + n_free > buf_pool->curr_size + n_zip) {
4443
fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
4444
(ulong) n_lru, (ulong) n_free,
4445
(ulong) buf_pool->curr_size, (ulong) n_zip);
4449
ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
4450
if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
4451
fprintf(stderr, "Free list len %lu, free blocks %lu\n",
4452
(ulong) UT_LIST_GET_LEN(buf_pool->free),
4457
ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
4458
ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
4459
ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
4461
buf_pool_mutex_exit(buf_pool);
4463
ut_a(buf_LRU_validate());
4464
ut_a(buf_flush_validate(buf_pool));
4469
/*********************************************************************//**
4470
Validates the buffer buf_pool data structure.
4479
for (i = 0; i < srv_buf_pool_instances; i++) {
4480
buf_pool_t* buf_pool;
4482
buf_pool = buf_pool_from_array(i);
4484
buf_pool_validate_instance(buf_pool);
4489
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
4491
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4492
/*********************************************************************//**
4493
Prints info of the buffer buf_pool data structure for one instance. */
4498
buf_pool_t* buf_pool)
4500
index_id_t* index_ids;
4508
dict_index_t* index;
4512
size = buf_pool->curr_size;
4514
index_ids = mem_alloc(size * sizeof *index_ids);
4515
counts = mem_alloc(sizeof(ulint) * size);
4517
buf_pool_mutex_enter(buf_pool);
4518
buf_flush_list_mutex_enter(buf_pool);
4521
"buf_pool size %lu\n"
4522
"database pages %lu\n"
4524
"modified database pages %lu\n"
4525
"n pending decompressions %lu\n"
4526
"n pending reads %lu\n"
4527
"n pending flush LRU %lu list %lu single page %lu\n"
4528
"pages made young %lu, not young %lu\n"
4529
"pages read %lu, created %lu, written %lu\n",
4531
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4532
(ulong) UT_LIST_GET_LEN(buf_pool->free),
4533
(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4534
(ulong) buf_pool->n_pend_unzip,
4535
(ulong) buf_pool->n_pend_reads,
4536
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
4537
(ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
4538
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
4539
(ulong) buf_pool->stat.n_pages_made_young,
4540
(ulong) buf_pool->stat.n_pages_not_made_young,
4541
(ulong) buf_pool->stat.n_pages_read,
4542
(ulong) buf_pool->stat.n_pages_created,
4543
(ulong) buf_pool->stat.n_pages_written);
4545
buf_flush_list_mutex_exit(buf_pool);
4547
/* Count the number of blocks belonging to each index in the buffer */
4551
chunk = buf_pool->chunks;
4553
for (i = buf_pool->n_chunks; i--; chunk++) {
4554
buf_block_t* block = chunk->blocks;
4555
ulint n_blocks = chunk->size;
4557
for (; n_blocks--; block++) {
4558
const buf_frame_t* frame = block->frame;
4560
if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
4562
id = btr_page_get_index_id(frame);
4564
/* Look for the id in the index_ids array */
4567
while (j < n_found) {
4569
if (index_ids[j] == id) {
4586
buf_pool_mutex_exit(buf_pool);
4588
for (i = 0; i < n_found; i++) {
4589
index = dict_index_get_if_in_cache(index_ids[i]);
4592
"Block count for index %llu in buffer is about %lu",
4593
(ullint) index_ids[i],
4598
dict_index_name_print(stderr, NULL, index);
4604
mem_free(index_ids);
4607
ut_a(buf_pool_validate_instance(buf_pool));
4610
/*********************************************************************//**
4611
Prints info of the buffer buf_pool data structure. */
4619
for (i = 0; i < srv_buf_pool_instances; i++) {
4620
buf_pool_t* buf_pool;
4622
buf_pool = buf_pool_from_array(i);
4623
buf_print_instance(buf_pool);
4626
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
4629
/*********************************************************************//**
4630
Returns the number of latched pages in the buffer pool.
4631
@return number of latched pages */
4634
buf_get_latched_pages_number_instance(
4635
/*==================================*/
4636
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
4641
ulint fixed_pages_number = 0;
4643
buf_pool_mutex_enter(buf_pool);
4645
chunk = buf_pool->chunks;
4647
for (i = buf_pool->n_chunks; i--; chunk++) {
4651
block = chunk->blocks;
4653
for (j = chunk->size; j--; block++) {
4654
if (buf_block_get_state(block)
4655
!= BUF_BLOCK_FILE_PAGE) {
4660
mutex_enter(&block->mutex);
4662
if (block->page.buf_fix_count != 0
4663
|| buf_page_get_io_fix(&block->page)
4665
fixed_pages_number++;
4668
mutex_exit(&block->mutex);
4672
mutex_enter(&buf_pool->zip_mutex);
4674
/* Traverse the lists of clean and dirty compressed-only blocks. */
4676
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4677
b = UT_LIST_GET_NEXT(list, b)) {
4678
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
4679
ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
4681
if (b->buf_fix_count != 0
4682
|| buf_page_get_io_fix(b) != BUF_IO_NONE) {
4683
fixed_pages_number++;
4687
buf_flush_list_mutex_enter(buf_pool);
4688
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4689
b = UT_LIST_GET_NEXT(list, b)) {
4690
ut_ad(b->in_flush_list);
4692
switch (buf_page_get_state(b)) {
4693
case BUF_BLOCK_ZIP_DIRTY:
4694
if (b->buf_fix_count != 0
4695
|| buf_page_get_io_fix(b) != BUF_IO_NONE) {
4696
fixed_pages_number++;
4699
case BUF_BLOCK_FILE_PAGE:
4700
/* uncompressed page */
4702
case BUF_BLOCK_ZIP_FREE:
4703
case BUF_BLOCK_ZIP_PAGE:
4704
case BUF_BLOCK_NOT_USED:
4705
case BUF_BLOCK_READY_FOR_USE:
4706
case BUF_BLOCK_MEMORY:
4707
case BUF_BLOCK_REMOVE_HASH:
4713
buf_flush_list_mutex_exit(buf_pool);
4714
mutex_exit(&buf_pool->zip_mutex);
4715
buf_pool_mutex_exit(buf_pool);
4717
return(fixed_pages_number);
4720
/*********************************************************************//**
4721
Returns the number of latched pages in all the buffer pools.
4722
@return number of latched pages */
4725
buf_get_latched_pages_number(void)
4726
/*==============================*/
4729
ulint total_latched_pages = 0;
4731
for (i = 0; i < srv_buf_pool_instances; i++) {
4732
buf_pool_t* buf_pool;
4734
buf_pool = buf_pool_from_array(i);
4736
total_latched_pages += buf_get_latched_pages_number_instance(
4740
return(total_latched_pages);
4743
#endif /* UNIV_DEBUG */
4745
/*********************************************************************//**
4746
Returns the number of pending buf pool ios.
4747
@return number of pending I/O operations */
4750
buf_get_n_pending_ios(void)
4751
/*=======================*/
4756
for (i = 0; i < srv_buf_pool_instances; i++) {
4757
buf_pool_t* buf_pool;
4759
buf_pool = buf_pool_from_array(i);
4762
buf_pool->n_pend_reads
4763
+ buf_pool->n_flush[BUF_FLUSH_LRU]
4764
+ buf_pool->n_flush[BUF_FLUSH_LIST]
4765
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
4771
/*********************************************************************//**
4772
Returns the ratio in percents of modified pages in the buffer pool /
4773
database pages in the buffer pool.
4774
@return modified page percentage ratio */
4777
buf_get_modified_ratio_pct(void)
4778
/*============================*/
4783
ulint flush_list_len = 0;
4785
buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
4787
ratio = (100 * flush_list_len) / (1 + lru_len + free_len);
4789
/* 1 + is there to avoid division by zero */
4794
/*********************************************************************//**
4795
Prints info of the buffer i/o. */
4798
buf_print_io_instance(
4799
/*==================*/
4800
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
4801
FILE* file) /*!< in/out: buffer where to print */
4803
time_t current_time;
4804
double time_elapsed;
4809
buf_pool_mutex_enter(buf_pool);
4810
buf_flush_list_mutex_enter(buf_pool);
4813
"Buffer pool size %lu\n"
4814
"Free buffers %lu\n"
4815
"Database pages %lu\n"
4816
"Old database pages %lu\n"
4817
"Modified db pages %lu\n"
4818
"Pending reads %lu\n"
4819
"Pending writes: LRU %lu, flush list %lu, single page %lu\n",
4820
(ulong) buf_pool->curr_size,
4821
(ulong) UT_LIST_GET_LEN(buf_pool->free),
4822
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4823
(ulong) buf_pool->LRU_old_len,
4824
(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4825
(ulong) buf_pool->n_pend_reads,
4826
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
4827
+ buf_pool->init_flush[BUF_FLUSH_LRU],
4828
(ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
4829
+ buf_pool->init_flush[BUF_FLUSH_LIST],
4830
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
4832
buf_flush_list_mutex_exit(buf_pool);
4834
current_time = time(NULL);
4835
time_elapsed = 0.001 + difftime(current_time,
4836
buf_pool->last_printout_time);
4839
"Pages made young %lu, not young %lu\n"
4840
"%.2f youngs/s, %.2f non-youngs/s\n"
4841
"Pages read %lu, created %lu, written %lu\n"
4842
"%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
4843
(ulong) buf_pool->stat.n_pages_made_young,
4844
(ulong) buf_pool->stat.n_pages_not_made_young,
4845
(buf_pool->stat.n_pages_made_young
4846
- buf_pool->old_stat.n_pages_made_young)
4848
(buf_pool->stat.n_pages_not_made_young
4849
- buf_pool->old_stat.n_pages_not_made_young)
4851
(ulong) buf_pool->stat.n_pages_read,
4852
(ulong) buf_pool->stat.n_pages_created,
4853
(ulong) buf_pool->stat.n_pages_written,
4854
(buf_pool->stat.n_pages_read
4855
- buf_pool->old_stat.n_pages_read)
4857
(buf_pool->stat.n_pages_created
4858
- buf_pool->old_stat.n_pages_created)
4860
(buf_pool->stat.n_pages_written
4861
- buf_pool->old_stat.n_pages_written)
4864
n_gets_diff = buf_pool->stat.n_page_gets
4865
- buf_pool->old_stat.n_page_gets;
4869
"Buffer pool hit rate %lu / 1000,"
4870
" young-making rate %lu / 1000 not %lu / 1000\n",
4872
(1000 - ((1000 * (buf_pool->stat.n_pages_read
4873
- buf_pool->old_stat.n_pages_read))
4874
/ (buf_pool->stat.n_page_gets
4875
- buf_pool->old_stat.n_page_gets))),
4877
(1000 * (buf_pool->stat.n_pages_made_young
4878
- buf_pool->old_stat.n_pages_made_young)
4881
(1000 * (buf_pool->stat.n_pages_not_made_young
4882
- buf_pool->old_stat.n_pages_not_made_young)
4885
fputs("No buffer pool page gets since the last printout\n",
4889
/* Statistics about read ahead algorithm */
4890
fprintf(file, "Pages read ahead %.2f/s,"
4891
" evicted without access %.2f/s\n",
4892
(buf_pool->stat.n_ra_pages_read
4893
- buf_pool->old_stat.n_ra_pages_read)
4895
(buf_pool->stat.n_ra_pages_evicted
4896
- buf_pool->old_stat.n_ra_pages_evicted)
4899
/* Print some values to help us with visualizing what is
4900
happening with LRU eviction. */
4902
"LRU len: %lu, unzip_LRU len: %lu\n"
4903
"I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
4904
static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->LRU)),
4905
static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->unzip_LRU)),
4906
buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
4907
buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
4909
buf_refresh_io_stats(buf_pool);
4910
buf_pool_mutex_exit(buf_pool);
4913
/*********************************************************************//**
4914
Prints info of the buffer i/o. */
4919
FILE* file) /*!< in/out: buffer where to print */
4923
for (i = 0; i < srv_buf_pool_instances; i++) {
4924
buf_pool_t* buf_pool;
4926
buf_pool = buf_pool_from_array(i);
4927
buf_print_io_instance(buf_pool, file);
4931
/**********************************************************************//**
4932
Refreshes the statistics used to print per-second averages. */
4935
buf_refresh_io_stats(
4936
/*=================*/
4937
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
4939
buf_pool->last_printout_time = ut_time();
4940
buf_pool->old_stat = buf_pool->stat;
4943
/**********************************************************************//**
4944
Refreshes the statistics used to print per-second averages. */
4947
buf_refresh_io_stats_all(void)
4948
/*==========================*/
4952
for (i = 0; i < srv_buf_pool_instances; i++) {
4953
buf_pool_t* buf_pool;
4955
buf_pool = buf_pool_from_array(i);
4957
buf_refresh_io_stats(buf_pool);
4961
/**********************************************************************//**
4962
Check if all pages in all buffer pools are in a replacable state.
4963
@return FALSE if not */
4971
for (i = 0; i < srv_buf_pool_instances; i++) {
4972
buf_pool_t* buf_pool;
4974
buf_pool = buf_pool_from_array(i);
4976
if (!buf_all_freed_instance(buf_pool)) {
4984
/*********************************************************************//**
4985
Checks that there currently are no pending i/o-operations for the buffer
4987
@return TRUE if there is no pending i/o */
4990
buf_pool_check_no_pending_io(void)
4991
/*==============================*/
4996
buf_pool_mutex_enter_all();
4998
for (i = 0; i < srv_buf_pool_instances && ret; i++) {
4999
const buf_pool_t* buf_pool;
5001
buf_pool = buf_pool_from_array(i);
5003
if (buf_pool->n_pend_reads
5004
+ buf_pool->n_flush[BUF_FLUSH_LRU]
5005
+ buf_pool->n_flush[BUF_FLUSH_LIST]
5006
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
5012
buf_pool_mutex_exit_all();
5018
Code currently not used
5019
/*********************************************************************//**
5020
Gets the current length of the free list of buffer blocks.
5021
@return length of the free list */
5024
buf_get_free_list_len(void)
5025
/*=======================*/
5029
buf_pool_mutex_enter(buf_pool);
5031
len = UT_LIST_GET_LEN(buf_pool->free);
5033
buf_pool_mutex_exit(buf_pool);
5039
#else /* !UNIV_HOTBACKUP */
5040
/********************************************************************//**
5041
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
5044
buf_page_init_for_backup_restore(
5045
/*=============================*/
5046
ulint space, /*!< in: space id */
5047
ulint offset, /*!< in: offset of the page within space
5048
in units of a page */
5049
ulint zip_size,/*!< in: compressed page size in bytes
5050
or 0 for uncompressed pages */
5051
buf_block_t* block) /*!< in: block to init */
5053
block->page.state = BUF_BLOCK_FILE_PAGE;
5054
block->page.space = space;
5055
block->page.offset = offset;
5057
page_zip_des_init(&block->page.zip);
5059
/* We assume that block->page.data has been allocated
5060
with zip_size == UNIV_PAGE_SIZE. */
5061
ut_ad(zip_size <= UNIV_PAGE_SIZE);
5062
ut_ad(ut_is_2pow(zip_size));
5063
page_zip_set_size(&block->page.zip, zip_size);
5065
block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
5068
#endif /* !UNIV_HOTBACKUP */