1
/* Innobase relational database engine; Copyright (C) 2001 Innobase Oy
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License 2
5
as published by the Free Software Foundation in June 1991.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License 2
13
along with this program (in file COPYING); if not, write to the Free
14
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
15
/******************************************************
16
The database buffer buf_pool
20
Created 11/5/1995 Heikki Tuuri
21
*******************************************************/
29
#include "buf0buddy.h"
33
#include "lock0lock.h"
35
#include "ibuf0ibuf.h"
36
#include "dict0dict.h"
44
IMPLEMENTATION OF THE BUFFER POOL
45
=================================
47
Performance improvement:
48
------------------------
49
Thread scheduling in NT may be so slow that the OS wait mechanism should
50
not be used even in waiting for disk reads to complete.
51
Rather, we should put waiting query threads to the queue of
52
waiting jobs, and let the OS thread do something useful while the i/o
53
is processed. In this way we could remove most OS thread switches in
54
an i/o-intensive benchmark like TPC-C.
56
A possibility is to put a user space thread library between the database
57
and NT. User space thread libraries might be very fast.
59
SQL Server 7.0 can be configured to use 'fibers' which are lightweight
60
threads in NT. These should be studied.
62
Buffer frames and blocks
63
------------------------
64
Following the terminology of Gray and Reuter, we call the memory
65
blocks where file pages are loaded buffer frames. For each buffer
66
frame there is a control block, or shortly, a block, in the buffer
67
control array. The control info which does not need to be stored
68
in the file along with the file page, resides in the control block.
72
The buffer buf_pool contains a single mutex which protects all the
73
control data structures of the buf_pool. The content of a buffer frame is
74
protected by a separate read-write lock in its control block, though.
75
These locks can be locked and unlocked without owning the buf_pool mutex.
76
The OS events in the buf_pool struct can be waited for without owning the
79
The buf_pool mutex is a hot-spot in main memory, causing a lot of
80
memory bus traffic on multiprocessor systems when processors
81
alternately access the mutex. On our Pentium, the mutex is accessed
82
maybe every 10 microseconds. We gave up the solution to have mutexes
83
for each control block, for instance, because it seemed to be
86
A solution to reduce mutex contention of the buf_pool mutex is to
87
create a separate mutex for the page hash table. On Pentium,
88
accessing the hash table takes 2 microseconds, about half
89
of the total buf_pool mutex hold time.
94
The control block contains, for instance, the bufferfix count
95
which is incremented when a thread wants a file page to be fixed
96
in a buffer frame. The bufferfix operation does not lock the
97
contents of the frame, however. For this purpose, the control
98
block contains a read-write lock.
100
The buffer frames have to be aligned so that the start memory
101
address of a frame is divisible by the universal page size, which
104
We intend to make the buffer buf_pool size on-line reconfigurable,
105
that is, the buf_pool size can be changed without closing the database.
106
Then the database administarator may adjust it to be bigger
107
at night, for example. The control block array must
108
contain enough control blocks for the maximum buffer buf_pool size
109
which is used in the particular database.
110
If the buf_pool size is cut, we exploit the virtual memory mechanism of
111
the OS, and just refrain from using frames at high addresses. Then the OS
112
can swap them to disk.
114
The control blocks containing file pages are put to a hash table
115
according to the file address of the page.
116
We could speed up the access to an individual page by using
117
"pointer swizzling": we could replace the page references on
118
non-leaf index pages by direct pointers to the page, if it exists
119
in the buf_pool. We could make a separate hash table where we could
120
chain all the page references in non-leaf pages residing in the buf_pool,
121
using the page reference as the hash key,
122
and at the time of reading of a page update the pointers accordingly.
123
Drawbacks of this solution are added complexity and,
124
possibly, extra space required on non-leaf pages for memory pointers.
125
A simpler solution is just to speed up the hash table mechanism
126
in the database, using tables whose size is a power of 2.
131
There are several lists of control blocks.
133
The free list (buf_pool->free) contains blocks which are currently not
136
The common LRU list contains all the blocks holding a file page
137
except those for which the bufferfix count is non-zero.
138
The pages are in the LRU list roughly in the order of the last
139
access to the page, so that the oldest pages are at the end of the
140
list. We also keep a pointer to near the end of the LRU list,
141
which we can use when we want to artificially age a page in the
142
buf_pool. This is used if we know that some page is not needed
143
again for some time: we insert the block right after the pointer,
144
causing it to be replaced sooner than would noramlly be the case.
145
Currently this aging mechanism is used for read-ahead mechanism
146
of pages, and it can also be used when there is a scan of a full
147
table which cannot fit in the memory. Putting the pages near the
148
of the LRU list, we make sure that most of the buf_pool stays in the
149
main memory, undisturbed.
151
The unzip_LRU list contains a subset of the common LRU list. The
152
blocks on the unzip_LRU list hold a compressed file page and the
153
corresponding uncompressed page frame. A block is in unzip_LRU if and
154
only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
155
holds. The blocks in unzip_LRU will be in same order as they are in
156
the common LRU list. That is, each manipulation of the common LRU
157
list will result in the same manipulation of the unzip_LRU list.
159
The chain of modified blocks (buf_pool->flush_list) contains the blocks
160
holding file pages that have been modified in the memory
161
but not written to disk yet. The block with the oldest modification
162
which has not yet been written to disk is at the end of the chain.
164
The chain of unmodified compressed blocks (buf_pool->zip_clean)
165
contains the control blocks (buf_page_t) of those compressed pages
166
that are not in buf_pool->flush_list and for which no uncompressed
167
page has been allocated in the buffer pool. The control blocks for
168
uncompressed pages are accessible via buf_block_t objects that are
169
reachable via buf_pool->chunks[].
171
The chains of free memory blocks (buf_pool->zip_free[]) are used by
172
the buddy allocator (buf0buddy.c) to keep track of currently unused
173
memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2. These
174
blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
175
BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
176
pool. The buddy allocator is solely used for allocating control
177
blocks for compressed pages (buf_page_t) and compressed page frames.
182
First, a victim block for replacement has to be found in the
183
buf_pool. It is taken from the free list or searched for from the
184
end of the LRU-list. An exclusive lock is reserved for the frame,
185
the io_fix field is set in the block fixing the block in buf_pool,
186
and the io-operation for loading the page is queued. The io-handler thread
187
releases the X-lock on the frame and resets the io_fix field
188
when the io operation completes.
190
A thread may request the above operation using the function
191
buf_page_get(). It may then continue to request a lock on the frame.
192
The lock is granted when the io-handler releases the x-lock.
197
The read-ahead mechanism is intended to be intelligent and
198
isolated from the semantically higher levels of the database
199
index management. From the higher level we only need the
200
information if a file page has a natural successor or
201
predecessor page. On the leaf level of a B-tree index,
202
these are the next and previous pages in the natural
205
Let us first explain the read-ahead mechanism when the leafs
206
of a B-tree are scanned in an ascending or descending order.
207
When a read page is the first time referenced in the buf_pool,
208
the buffer manager checks if it is at the border of a so-called
209
linear read-ahead area. The tablespace is divided into these
210
areas of size 64 blocks, for example. So if the page is at the
211
border of such an area, the read-ahead mechanism checks if
212
all the other blocks in the area have been accessed in an
213
ascending or descending order. If this is the case, the system
214
looks at the natural successor or predecessor of the page,
215
checks if that is at the border of another area, and in this case
216
issues read-requests for all the pages in that area. Maybe
217
we could relax the condition that all the pages in the area
218
have to be accessed: if data is deleted from a table, there may
219
appear holes of unused pages in the area.
221
A different read-ahead mechanism is used when there appears
222
to be a random access pattern to a file.
223
If a new page is referenced in the buf_pool, and several pages
224
of its random access area (for instance, 32 consecutive pages
225
in a tablespace) have recently been referenced, we may predict
226
that the whole area may be needed in the near future, and issue
227
the read requests for the whole area.
230
/* Value in microseconds */
231
static const int WAIT_FOR_READ = 5000;
233
/* The buffer buf_pool of the database */
234
UNIV_INTERN buf_pool_t* buf_pool = NULL;
236
/* mutex protecting the buffer pool struct and control blocks, except the
237
read-write lock in them */
238
UNIV_INTERN mutex_t buf_pool_mutex;
239
/* mutex protecting the control blocks of compressed-only pages
240
(of type buf_page_t, not buf_block_t) */
241
UNIV_INTERN mutex_t buf_pool_zip_mutex;
243
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
244
static ulint buf_dbg_counter = 0; /* This is used to insert validation
245
operations in excution in the
247
/** Flag to forbid the release of the buffer pool mutex.
248
Protected by buf_pool->mutex. */
249
UNIV_INTERN ulint buf_pool_mutex_exit_forbidden = 0;
250
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
252
/* If this is set TRUE, the program prints info whenever
253
read-ahead or flush occurs */
254
UNIV_INTERN ibool buf_debug_prints = FALSE;
255
#endif /* UNIV_DEBUG */
257
/* A chunk of buffers. The buffer pool is allocated in chunks. */
258
struct buf_chunk_struct{
259
ulint mem_size; /* allocated size of the chunk */
260
ulint size; /* size of frames[] and blocks[] */
261
void* mem; /* pointer to the memory area which
262
was allocated for the frames */
263
buf_block_t* blocks; /* array of buffer control blocks */
266
/************************************************************************
267
Calculates a page checksum which is stored to the page when it is written
268
to a file. Note that we must be careful to calculate the same value on
269
32-bit and 64-bit architectures. */
272
buf_calc_page_new_checksum(
273
/*=======================*/
275
const byte* page) /* in: buffer page */
279
/* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
280
..._ARCH_LOG_NO, are written outside the buffer pool to the first
281
pages of data files, we have to skip them in the page checksum
283
We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
284
checksum is stored, and also the last 8 bytes of page because
285
there we store the old formula checksum. */
287
checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
288
FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
289
+ ut_fold_binary(page + FIL_PAGE_DATA,
290
UNIV_PAGE_SIZE - FIL_PAGE_DATA
291
- FIL_PAGE_END_LSN_OLD_CHKSUM);
292
checksum = checksum & 0xFFFFFFFFUL;
297
/************************************************************************
298
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
299
looked at the first few bytes of the page. This calculates that old
301
NOTE: we must first store the new formula checksum to
302
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
303
because this takes that field as an input! */
306
buf_calc_page_old_checksum(
307
/*=======================*/
309
const byte* page) /* in: buffer page */
313
checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
315
checksum = checksum & 0xFFFFFFFFUL;
320
/************************************************************************
321
Checks if a page is corrupt. */
324
buf_page_is_corrupted(
325
/*==================*/
326
/* out: TRUE if corrupted */
327
const byte* read_buf, /* in: a database page */
328
ulint zip_size) /* in: size of compressed page;
329
0 for uncompressed pages */
331
ulint checksum_field;
332
ulint old_checksum_field;
333
#ifndef UNIV_HOTBACKUP
334
ib_uint64_t current_lsn;
336
if (UNIV_LIKELY(!zip_size)
337
&& memcmp(read_buf + FIL_PAGE_LSN + 4,
338
read_buf + UNIV_PAGE_SIZE
339
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
341
/* Stored log sequence numbers at the start and the end
342
of page do not match */
347
#ifndef UNIV_HOTBACKUP
348
if (recv_lsn_checks_on && log_peek_lsn(¤t_lsn)) {
349
if (current_lsn < mach_read_ull(read_buf + FIL_PAGE_LSN)) {
350
ut_print_timestamp(stderr);
353
" InnoDB: Error: page %lu log sequence number"
355
"InnoDB: is in the future! Current system "
356
"log sequence number %"PRIu64".\n"
357
"InnoDB: Your database may be corrupt or "
358
"you may have copied the InnoDB\n"
359
"InnoDB: tablespace but not the InnoDB "
361
"InnoDB: http://dev.mysql.com/doc/refman/"
362
"5.1/en/forcing-recovery.html\n"
363
"InnoDB: for more information.\n",
364
(ulong) mach_read_from_4(read_buf
366
mach_read_ull(read_buf + FIL_PAGE_LSN),
372
/* If we use checksums validation, make additional check before
373
returning TRUE to ensure that the checksum is not equal to
374
BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
375
disabled. Otherwise, skip checksum calculation and return FALSE */
377
if (UNIV_LIKELY(srv_use_checksums)) {
378
checksum_field = mach_read_from_4(read_buf
379
+ FIL_PAGE_SPACE_OR_CHKSUM);
381
if (UNIV_UNLIKELY(zip_size)) {
382
return(checksum_field != BUF_NO_CHECKSUM_MAGIC
384
!= page_zip_calc_checksum(read_buf, zip_size));
387
old_checksum_field = mach_read_from_4(
388
read_buf + UNIV_PAGE_SIZE
389
- FIL_PAGE_END_LSN_OLD_CHKSUM);
391
/* There are 2 valid formulas for old_checksum_field:
393
1. Very old versions of InnoDB only stored 8 byte lsn to the
394
start and the end of the page.
396
2. Newer InnoDB versions store the old formula checksum
399
if (old_checksum_field != mach_read_from_4(read_buf
401
&& old_checksum_field != BUF_NO_CHECKSUM_MAGIC
402
&& old_checksum_field
403
!= buf_calc_page_old_checksum(read_buf)) {
408
/* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
409
(always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */
411
if (checksum_field != 0
412
&& checksum_field != BUF_NO_CHECKSUM_MAGIC
414
!= buf_calc_page_new_checksum(read_buf)) {
423
/************************************************************************
424
Prints a page to stderr. */
429
const byte* read_buf, /* in: a database page */
430
ulint zip_size) /* in: compressed page size, or
431
0 for uncompressed pages */
436
ulint size = zip_size;
439
size = UNIV_PAGE_SIZE;
442
ut_print_timestamp(stderr);
443
fprintf(stderr, " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
445
ut_print_buf(stderr, read_buf, size);
446
fputs("InnoDB: End of page dump\n", stderr);
449
/* Print compressed page. */
451
switch (fil_page_get_type(read_buf)) {
452
case FIL_PAGE_TYPE_ZBLOB:
453
case FIL_PAGE_TYPE_ZBLOB2:
454
checksum = srv_use_checksums
455
? page_zip_calc_checksum(read_buf, zip_size)
456
: BUF_NO_CHECKSUM_MAGIC;
457
ut_print_timestamp(stderr);
459
" InnoDB: Compressed BLOB page"
460
" checksum %lu, stored %lu\n"
461
"InnoDB: Page lsn %lu %lu\n"
462
"InnoDB: Page number (if stored"
463
" to page already) %lu,\n"
464
"InnoDB: space id (if stored"
465
" to page already) %lu\n",
467
(ulong) mach_read_from_4(
468
read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
469
(ulong) mach_read_from_4(
470
read_buf + FIL_PAGE_LSN),
471
(ulong) mach_read_from_4(
472
read_buf + (FIL_PAGE_LSN + 4)),
473
(ulong) mach_read_from_4(
474
read_buf + FIL_PAGE_OFFSET),
475
(ulong) mach_read_from_4(
477
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
480
ut_print_timestamp(stderr);
482
" InnoDB: unknown page type %lu,"
483
" assuming FIL_PAGE_INDEX\n",
484
fil_page_get_type(read_buf));
487
checksum = srv_use_checksums
488
? page_zip_calc_checksum(read_buf, zip_size)
489
: BUF_NO_CHECKSUM_MAGIC;
491
ut_print_timestamp(stderr);
493
" InnoDB: Compressed page checksum %lu,"
495
"InnoDB: Page lsn %lu %lu\n"
496
"InnoDB: Page number (if stored"
497
" to page already) %lu,\n"
498
"InnoDB: space id (if stored"
499
" to page already) %lu\n",
501
(ulong) mach_read_from_4(
502
read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
503
(ulong) mach_read_from_4(
504
read_buf + FIL_PAGE_LSN),
505
(ulong) mach_read_from_4(
506
read_buf + (FIL_PAGE_LSN + 4)),
507
(ulong) mach_read_from_4(
508
read_buf + FIL_PAGE_OFFSET),
509
(ulong) mach_read_from_4(
511
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
513
case FIL_PAGE_TYPE_XDES:
514
/* This is an uncompressed page. */
519
checksum = srv_use_checksums
520
? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
521
old_checksum = srv_use_checksums
522
? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
524
ut_print_timestamp(stderr);
526
" InnoDB: Page checksum %lu, prior-to-4.0.14-form"
528
"InnoDB: stored checksum %lu, prior-to-4.0.14-form"
529
" stored checksum %lu\n"
530
"InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
532
"InnoDB: Page number (if stored to page already) %lu,\n"
533
"InnoDB: space id (if created with >= MySQL-4.1.1"
534
" and stored already) %lu\n",
535
(ulong) checksum, (ulong) old_checksum,
536
(ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
537
(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
538
- FIL_PAGE_END_LSN_OLD_CHKSUM),
539
(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
540
(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
541
(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
542
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
543
(ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
544
(ulong) mach_read_from_4(read_buf
545
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
547
if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
548
== TRX_UNDO_INSERT) {
550
"InnoDB: Page may be an insert undo log page\n");
551
} else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
552
+ TRX_UNDO_PAGE_TYPE)
553
== TRX_UNDO_UPDATE) {
555
"InnoDB: Page may be an update undo log page\n");
558
switch (fil_page_get_type(read_buf)) {
561
"InnoDB: Page may be an index page where"
562
" index id is %lu %lu\n",
563
(ulong) ut_dulint_get_high(
564
btr_page_get_index_id(read_buf)),
565
(ulong) ut_dulint_get_low(
566
btr_page_get_index_id(read_buf)));
568
#ifdef UNIV_HOTBACKUP
569
/* If the code is in ibbackup, dict_sys may be uninitialized,
572
if (dict_sys == NULL) {
575
#endif /* UNIV_HOTBACKUP */
577
index = dict_index_find_on_id_low(
578
btr_page_get_index_id(read_buf));
580
fputs("InnoDB: (", stderr);
581
dict_index_name_print(stderr, NULL, index);
582
fputs(")\n", stderr);
586
fputs("InnoDB: Page may be an 'inode' page\n", stderr);
588
case FIL_PAGE_IBUF_FREE_LIST:
589
fputs("InnoDB: Page may be an insert buffer free list page\n",
592
case FIL_PAGE_TYPE_ALLOCATED:
593
fputs("InnoDB: Page may be a freshly allocated page\n",
596
case FIL_PAGE_IBUF_BITMAP:
597
fputs("InnoDB: Page may be an insert buffer bitmap page\n",
600
case FIL_PAGE_TYPE_SYS:
601
fputs("InnoDB: Page may be a system page\n",
604
case FIL_PAGE_TYPE_TRX_SYS:
605
fputs("InnoDB: Page may be a transaction system page\n",
608
case FIL_PAGE_TYPE_FSP_HDR:
609
fputs("InnoDB: Page may be a file space header page\n",
612
case FIL_PAGE_TYPE_XDES:
613
fputs("InnoDB: Page may be an extent descriptor page\n",
616
case FIL_PAGE_TYPE_BLOB:
617
fputs("InnoDB: Page may be a BLOB page\n",
620
case FIL_PAGE_TYPE_ZBLOB:
621
case FIL_PAGE_TYPE_ZBLOB2:
622
fputs("InnoDB: Page may be a compressed BLOB page\n",
628
/************************************************************************
629
Initializes a buffer control block when the buf_pool is created. */
634
buf_block_t* block, /* in: pointer to control block */
635
byte* frame) /* in: pointer to buffer frame */
637
UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
639
block->frame = frame;
641
block->page.state = BUF_BLOCK_NOT_USED;
642
block->page.buf_fix_count = 0;
643
block->page.io_fix = BUF_IO_NONE;
645
block->modify_clock = 0;
647
#ifdef UNIV_DEBUG_FILE_ACCESSES
648
block->page.file_page_was_freed = FALSE;
649
#endif /* UNIV_DEBUG_FILE_ACCESSES */
651
block->check_index_page_at_flush = FALSE;
655
block->page.in_page_hash = FALSE;
656
block->page.in_zip_hash = FALSE;
657
block->page.in_flush_list = FALSE;
658
block->page.in_free_list = FALSE;
659
block->page.in_LRU_list = FALSE;
660
block->in_unzip_LRU_list = FALSE;
661
block->n_pointers = 0;
662
#endif /* UNIV_DEBUG */
663
page_zip_des_init(&block->page.zip);
665
mutex_create(&block->mutex, SYNC_BUF_BLOCK);
667
rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
668
ut_ad(rw_lock_validate(&(block->lock)));
670
#ifdef UNIV_SYNC_DEBUG
671
rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
672
#endif /* UNIV_SYNC_DEBUG */
675
/************************************************************************
676
Allocates a chunk of buffer frames. */
681
/* out: chunk, or NULL on failure */
682
buf_chunk_t* chunk, /* out: chunk of buffers */
683
ulint mem_size) /* in: requested size in bytes */
689
/* Round down to a multiple of page size,
690
although it already should be. */
691
mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
692
/* Reserve space for the block descriptors. */
693
mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
694
+ (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
696
chunk->mem_size = mem_size;
697
chunk->mem = os_mem_alloc_large(&chunk->mem_size);
699
if (UNIV_UNLIKELY(chunk->mem == NULL)) {
704
/* Allocate the block descriptors from
705
the start of the memory block. */
706
chunk->blocks = chunk->mem;
708
/* Align a pointer to the first frame. Note that when
709
os_large_page_size is smaller than UNIV_PAGE_SIZE,
710
we may allocate one fewer block than requested. When
711
it is bigger, we may allocate more blocks than requested. */
713
frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
714
chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
715
- (frame != chunk->mem);
717
/* Subtract the space needed for block descriptors. */
719
ulint size = chunk->size;
721
while (frame < (byte*) (chunk->blocks + size)) {
722
frame += UNIV_PAGE_SIZE;
729
/* Init block structs and assign frames for them. Then we
730
assign the frames to the first blocks (we already mapped the
733
block = chunk->blocks;
735
for (i = chunk->size; i--; ) {
737
buf_block_init(block, frame);
740
/* Wipe contents of frame to eliminate a Purify warning */
741
memset(block->frame, '\0', UNIV_PAGE_SIZE);
743
/* Add the block to the free list */
744
UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
745
ut_d(block->page.in_free_list = TRUE);
748
frame += UNIV_PAGE_SIZE;
755
/*************************************************************************
756
Finds a block in the given buffer chunk that points to a
757
given compressed page. */
760
buf_chunk_contains_zip(
761
/*===================*/
762
/* out: buffer block pointing to
763
the compressed page, or NULL */
764
buf_chunk_t* chunk, /* in: chunk being checked */
765
const void* data) /* in: pointer to compressed page */
771
ut_ad(buf_pool_mutex_own());
773
block = chunk->blocks;
775
for (i = chunk->size; i--; block++) {
776
if (block->page.zip.data == data) {
785
/*************************************************************************
786
Finds a block in the buffer pool that points to a
787
given compressed page. */
790
buf_pool_contains_zip(
791
/*==================*/
792
/* out: buffer block pointing to
793
the compressed page, or NULL */
794
const void* data) /* in: pointer to compressed page */
797
buf_chunk_t* chunk = buf_pool->chunks;
799
for (n = buf_pool->n_chunks; n--; chunk++) {
800
buf_block_t* block = buf_chunk_contains_zip(chunk, data);
809
#endif /* UNIV_DEBUG */
811
/*************************************************************************
812
Checks that all file pages in the buffer chunk are in a replaceable state. */
817
/* out: address of a non-free block,
818
or NULL if all freed */
819
buf_chunk_t* chunk) /* in: chunk being checked */
825
ut_ad(buf_pool_mutex_own());
827
block = chunk->blocks;
829
for (i = chunk->size; i--; block++) {
830
mutex_enter(&block->mutex);
832
if (buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
833
&& !buf_flush_ready_for_replace(&block->page)) {
835
mutex_exit(&block->mutex);
839
mutex_exit(&block->mutex);
845
/*************************************************************************
846
Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state. */
851
/* out: TRUE if all freed */
852
const buf_chunk_t* chunk) /* in: chunk being checked */
854
const buf_block_t* block;
858
ut_ad(buf_pool_mutex_own());
860
block = chunk->blocks;
862
for (i = chunk->size; i--; block++) {
864
if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
873
/************************************************************************
874
Frees a chunk of buffer frames. */
879
buf_chunk_t* chunk) /* out: chunk of buffers */
882
const buf_block_t* block_end;
884
ut_ad(buf_pool_mutex_own());
886
block_end = chunk->blocks + chunk->size;
888
for (block = chunk->blocks; block < block_end; block++) {
889
ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
890
ut_a(!block->page.zip.data);
892
ut_ad(!block->page.in_LRU_list);
893
ut_ad(!block->in_unzip_LRU_list);
894
ut_ad(!block->page.in_flush_list);
895
/* Remove the block from the free list. */
896
ut_ad(block->page.in_free_list);
897
UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
899
/* Free the latches. */
900
mutex_free(&block->mutex);
901
rw_lock_free(&block->lock);
902
#ifdef UNIV_SYNC_DEBUG
903
rw_lock_free(&block->debug_latch);
904
#endif /* UNIV_SYNC_DEBUG */
905
UNIV_MEM_UNDESC(block);
908
os_mem_free_large(chunk->mem, chunk->mem_size);
911
/************************************************************************
912
Creates the buffer pool. */
917
/* out, own: buf_pool object, NULL if not
918
enough memory or error */
923
buf_pool = mem_zalloc(sizeof(buf_pool_t));
925
/* 1. Initialize general fields
926
------------------------------- */
927
mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
928
mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
930
buf_pool_mutex_enter();
932
buf_pool->n_chunks = 1;
933
buf_pool->chunks = chunk = mem_alloc(sizeof *chunk);
935
UT_LIST_INIT(buf_pool->free);
937
if (!buf_chunk_init(chunk, srv_buf_pool_size)) {
944
srv_buf_pool_old_size = srv_buf_pool_size;
945
buf_pool->curr_size = chunk->size;
946
srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
948
buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
949
buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
951
buf_pool->last_printout_time = time(NULL);
953
/* 2. Initialize flushing fields
954
-------------------------------- */
956
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
957
buf_pool->no_flush[i] = os_event_create(NULL);
960
buf_pool->ulint_clock = 1;
962
/* 3. Initialize LRU fields
963
--------------------------- */
964
/* All fields are initialized by mem_zalloc(). */
966
buf_pool_mutex_exit();
968
btr_search_sys_create(buf_pool->curr_size
969
* UNIV_PAGE_SIZE / sizeof(void*) / 64);
971
/* 4. Initialize the buddy allocator fields */
972
/* All fields are initialized by mem_zalloc(). */
977
/************************************************************************
978
Frees the buffer pool at shutdown. This must not be invoked before
979
freeing all mutexes. */
988
chunks = buf_pool->chunks;
989
chunk = chunks + buf_pool->n_chunks;
991
while (--chunk >= chunks) {
992
/* Bypass the checks of buf_chunk_free(), since they
993
would fail at shutdown. */
994
os_mem_free_large(chunk->mem, chunk->mem_size);
997
buf_pool->n_chunks = 0;
1000
/************************************************************************
1001
Relocate a buffer control block. Relocates the block on the LRU list
1002
and in buf_pool->page_hash. Does not relocate bpage->list.
1003
The caller must take care of relocating bpage->list. */
1008
buf_page_t* bpage, /* in/out: control block being relocated;
1009
buf_page_get_state(bpage) must be
1010
BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
1011
buf_page_t* dpage) /* in/out: destination control block */
1016
ut_ad(buf_pool_mutex_own());
1017
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1018
ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
1019
ut_a(bpage->buf_fix_count == 0);
1020
ut_ad(bpage->in_LRU_list);
1021
ut_ad(!bpage->in_zip_hash);
1022
ut_ad(bpage->in_page_hash);
1023
ut_ad(bpage == buf_page_hash_get(bpage->space, bpage->offset));
1025
switch (buf_page_get_state(bpage)) {
1026
case BUF_BLOCK_ZIP_FREE:
1027
case BUF_BLOCK_NOT_USED:
1028
case BUF_BLOCK_READY_FOR_USE:
1029
case BUF_BLOCK_FILE_PAGE:
1030
case BUF_BLOCK_MEMORY:
1031
case BUF_BLOCK_REMOVE_HASH:
1033
case BUF_BLOCK_ZIP_DIRTY:
1034
case BUF_BLOCK_ZIP_PAGE:
1037
#endif /* UNIV_DEBUG */
1039
memcpy(dpage, bpage, sizeof *dpage);
1041
ut_d(bpage->in_LRU_list = FALSE);
1042
ut_d(bpage->in_page_hash = FALSE);
1044
/* relocate buf_pool->LRU */
1045
b = UT_LIST_GET_PREV(LRU, bpage);
1046
UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
1049
UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
1051
UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
1054
if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
1055
buf_pool->LRU_old = dpage;
1058
ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU));
1060
/* relocate buf_pool->page_hash */
1061
fold = buf_page_address_fold(bpage->space, bpage->offset);
1063
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
1064
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
1066
UNIV_MEM_INVALID(bpage, sizeof *bpage);
1069
/************************************************************************
1070
Shrinks the buffer pool. */
1075
/* out: TRUE if shrunk */
1076
ulint chunk_size) /* in: number of pages to remove */
1078
buf_chunk_t* chunks;
1081
ulint max_free_size;
1082
buf_chunk_t* max_chunk;
1083
buf_chunk_t* max_free_chunk;
1085
ut_ad(!buf_pool_mutex_own());
1088
btr_search_disable(); /* Empty the adaptive hash index again */
1089
buf_pool_mutex_enter();
1092
if (buf_pool->n_chunks <= 1) {
1094
/* Cannot shrink if there is only one chunk */
1098
/* Search for the largest free chunk
1099
not larger than the size difference */
1100
chunks = buf_pool->chunks;
1101
chunk = chunks + buf_pool->n_chunks;
1102
max_size = max_free_size = 0;
1103
max_chunk = max_free_chunk = NULL;
1105
while (--chunk >= chunks) {
1106
if (chunk->size <= chunk_size
1107
&& chunk->size > max_free_size) {
1108
if (chunk->size > max_size) {
1109
max_size = chunk->size;
1113
if (buf_chunk_all_free(chunk)) {
1114
max_free_size = chunk->size;
1115
max_free_chunk = chunk;
1120
if (!max_free_size) {
1127
/* Cannot shrink: try again later
1128
(do not assign srv_buf_pool_old_size) */
1134
block = max_chunk->blocks;
1135
bend = block + max_chunk->size;
1137
/* Move the blocks of chunk to the end of the
1138
LRU list and try to flush them. */
1139
for (; block < bend; block++) {
1140
switch (buf_block_get_state(block)) {
1141
case BUF_BLOCK_NOT_USED:
1143
case BUF_BLOCK_FILE_PAGE:
1150
mutex_enter(&block->mutex);
1151
/* The following calls will temporarily
1152
release block->mutex and buf_pool_mutex.
1153
Therefore, we have to always retry,
1154
even if !dirty && !nonfree. */
1156
if (!buf_flush_ready_for_replace(&block->page)) {
1158
buf_LRU_make_block_old(&block->page);
1160
} else if (buf_LRU_free_block(&block->page, TRUE, NULL)
1165
mutex_exit(&block->mutex);
1168
buf_pool_mutex_exit();
1170
/* Request for a flush of the chunk if it helps.
1171
Do not flush if there are non-free blocks, since
1172
flushing will not make the chunk freeable. */
1174
/* Avoid busy-waiting. */
1175
os_thread_sleep(100000);
1177
&& buf_flush_batch(BUF_FLUSH_LRU, dirty, 0)
1178
== ULINT_UNDEFINED) {
1180
buf_flush_wait_batch_end(BUF_FLUSH_LRU);
1186
max_size = max_free_size;
1187
max_chunk = max_free_chunk;
1189
srv_buf_pool_old_size = srv_buf_pool_size;
1191
/* Rewrite buf_pool->chunks. Copy everything but max_chunk. */
1192
chunks = mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks);
1193
memcpy(chunks, buf_pool->chunks,
1194
(max_chunk - buf_pool->chunks) * sizeof *chunks);
1195
memcpy(chunks + (max_chunk - buf_pool->chunks),
1197
buf_pool->chunks + buf_pool->n_chunks
1199
ut_a(buf_pool->curr_size > max_chunk->size);
1200
buf_pool->curr_size -= max_chunk->size;
1201
srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1202
chunk_size -= max_chunk->size;
1203
buf_chunk_free(max_chunk);
1204
mem_free(buf_pool->chunks);
1205
buf_pool->chunks = chunks;
1206
buf_pool->n_chunks--;
1208
/* Allow a slack of one megabyte. */
1209
if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
1215
srv_buf_pool_old_size = srv_buf_pool_size;
1217
buf_pool_mutex_exit();
1218
btr_search_enable();
1221
/************************************************************************
1222
Rebuild buf_pool->page_hash. */
1225
buf_pool_page_hash_rebuild(void)
1226
/*============================*/
1231
hash_table_t* page_hash;
1232
hash_table_t* zip_hash;
1235
buf_pool_mutex_enter();
1237
/* Free, create, and populate the hash table. */
1238
hash_table_free(buf_pool->page_hash);
1239
buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
1240
zip_hash = hash_create(2 * buf_pool->curr_size);
1242
HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
1243
BUF_POOL_ZIP_FOLD_BPAGE);
1245
hash_table_free(buf_pool->zip_hash);
1246
buf_pool->zip_hash = zip_hash;
1248
/* Insert the uncompressed file pages to buf_pool->page_hash. */
1250
chunk = buf_pool->chunks;
1251
n_chunks = buf_pool->n_chunks;
1253
for (i = 0; i < n_chunks; i++, chunk++) {
1255
buf_block_t* block = chunk->blocks;
1257
for (j = 0; j < chunk->size; j++, block++) {
1258
if (buf_block_get_state(block)
1259
== BUF_BLOCK_FILE_PAGE) {
1260
ut_ad(!block->page.in_zip_hash);
1261
ut_ad(block->page.in_page_hash);
1263
HASH_INSERT(buf_page_t, hash, page_hash,
1264
buf_page_address_fold(
1266
block->page.offset),
1272
/* Insert the compressed-only pages to buf_pool->page_hash.
1273
All such blocks are either in buf_pool->zip_clean or
1274
in buf_pool->flush_list. */
1276
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1277
b = UT_LIST_GET_NEXT(list, b)) {
1278
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1279
ut_ad(!b->in_flush_list);
1280
ut_ad(b->in_LRU_list);
1281
ut_ad(b->in_page_hash);
1282
ut_ad(!b->in_zip_hash);
1284
HASH_INSERT(buf_page_t, hash, page_hash,
1285
buf_page_address_fold(b->space, b->offset), b);
1288
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1289
b = UT_LIST_GET_NEXT(list, b)) {
1290
ut_ad(b->in_flush_list);
1291
ut_ad(b->in_LRU_list);
1292
ut_ad(b->in_page_hash);
1293
ut_ad(!b->in_zip_hash);
1295
switch (buf_page_get_state(b)) {
1296
case BUF_BLOCK_ZIP_DIRTY:
1297
HASH_INSERT(buf_page_t, hash, page_hash,
1298
buf_page_address_fold(b->space,
1301
case BUF_BLOCK_FILE_PAGE:
1302
/* uncompressed page */
1304
case BUF_BLOCK_ZIP_FREE:
1305
case BUF_BLOCK_ZIP_PAGE:
1306
case BUF_BLOCK_NOT_USED:
1307
case BUF_BLOCK_READY_FOR_USE:
1308
case BUF_BLOCK_MEMORY:
1309
case BUF_BLOCK_REMOVE_HASH:
1315
buf_pool_mutex_exit();
1318
/************************************************************************
1319
Resizes the buffer pool. */
1322
buf_pool_resize(void)
1323
/*=================*/
1325
buf_pool_mutex_enter();
1327
if (srv_buf_pool_old_size == srv_buf_pool_size) {
1329
buf_pool_mutex_exit();
1333
if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
1335
buf_pool_mutex_exit();
1337
/* Disable adaptive hash indexes and empty the index
1338
in order to free up memory in the buffer pool chunks. */
1339
buf_pool_shrink((srv_buf_pool_curr_size - srv_buf_pool_size)
1341
} else if (srv_buf_pool_curr_size + 1048576 < srv_buf_pool_size) {
1343
/* Enlarge the buffer pool by at least one megabyte */
1346
= srv_buf_pool_size - srv_buf_pool_curr_size;
1347
buf_chunk_t* chunks;
1350
chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
1352
memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks
1355
chunk = &chunks[buf_pool->n_chunks];
1357
if (!buf_chunk_init(chunk, mem_size)) {
1360
buf_pool->curr_size += chunk->size;
1361
srv_buf_pool_curr_size = buf_pool->curr_size
1363
mem_free(buf_pool->chunks);
1364
buf_pool->chunks = chunks;
1365
buf_pool->n_chunks++;
1368
srv_buf_pool_old_size = srv_buf_pool_size;
1369
buf_pool_mutex_exit();
1372
buf_pool_page_hash_rebuild();
1375
/************************************************************************
1376
Moves to the block to the start of the LRU list if there is a danger
1377
that the block would drift out of the buffer pool. */
1380
buf_block_make_young(
1381
/*=================*/
1382
buf_page_t* bpage) /* in: block to make younger */
1384
ut_ad(!buf_pool_mutex_own());
1386
/* Note that we read freed_page_clock's without holding any mutex:
1387
this is allowed since the result is used only in heuristics */
1389
if (buf_page_peek_if_too_old(bpage)) {
1391
buf_pool_mutex_enter();
1392
/* There has been freeing activity in the LRU list:
1393
best to move to the head of the LRU list */
1395
buf_LRU_make_block_young(bpage);
1396
buf_pool_mutex_exit();
1400
/************************************************************************
1401
Moves a page to the start of the buffer pool LRU list. This high-level
1402
function can be used to prevent an important page from from slipping out of
1406
buf_page_make_young(
1407
/*================*/
1408
buf_page_t* bpage) /* in: buffer block of a file page */
1410
buf_pool_mutex_enter();
1412
ut_a(buf_page_in_file(bpage));
1414
buf_LRU_make_block_young(bpage);
1416
buf_pool_mutex_exit();
1419
/************************************************************************
1420
Resets the check_index_page_at_flush field of a page if found in the buffer
1424
buf_reset_check_index_page_at_flush(
1425
/*================================*/
1426
ulint space, /* in: space id */
1427
ulint offset) /* in: page number */
1431
buf_pool_mutex_enter();
1433
block = (buf_block_t*) buf_page_hash_get(space, offset);
1435
if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
1436
block->check_index_page_at_flush = FALSE;
1439
buf_pool_mutex_exit();
1442
/************************************************************************
1443
Returns the current state of is_hashed of a page. FALSE if the page is
1444
not in the pool. NOTE that this operation does not fix the page in the
1445
pool if it is found there. */
1448
buf_page_peek_if_search_hashed(
1449
/*===========================*/
1450
/* out: TRUE if page hash index is built in search
1452
ulint space, /* in: space id */
1453
ulint offset) /* in: page number */
1458
buf_pool_mutex_enter();
1460
block = (buf_block_t*) buf_page_hash_get(space, offset);
1462
if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1465
is_hashed = block->is_hashed;
1468
buf_pool_mutex_exit();
1473
#ifdef UNIV_DEBUG_FILE_ACCESSES
1474
/************************************************************************
1475
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
1476
This function should be called when we free a file page and want the
1477
debug version to check that it is not accessed any more unless
1481
buf_page_set_file_page_was_freed(
1482
/*=============================*/
1483
/* out: control block if found in page hash table,
1485
ulint space, /* in: space id */
1486
ulint offset) /* in: page number */
1490
buf_pool_mutex_enter();
1492
bpage = buf_page_hash_get(space, offset);
1495
bpage->file_page_was_freed = TRUE;
1498
buf_pool_mutex_exit();
1503
/************************************************************************
1504
Sets file_page_was_freed FALSE if the page is found in the buffer pool.
1505
This function should be called when we free a file page and want the
1506
debug version to check that it is not accessed any more unless
1510
buf_page_reset_file_page_was_freed(
1511
/*===============================*/
1512
/* out: control block if found in page hash table,
1514
ulint space, /* in: space id */
1515
ulint offset) /* in: page number */
1519
buf_pool_mutex_enter();
1521
bpage = buf_page_hash_get(space, offset);
1524
bpage->file_page_was_freed = FALSE;
1527
buf_pool_mutex_exit();
1531
#endif /* UNIV_DEBUG_FILE_ACCESSES */
1533
/************************************************************************
1534
Get read access to a compressed page (usually of type
1535
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
1536
The page must be released with buf_page_release_zip().
1537
NOTE: the page is not protected by any latch. Mutual exclusion has to
1538
be implemented at a higher level. In other words, all possible
1539
accesses to a given page through this function must be protected by
1540
the same set of mutexes or latches. */
1545
/* out: pointer to the block */
1546
ulint space, /* in: space id */
1547
ulint zip_size,/* in: compressed page size */
1548
ulint offset) /* in: page number */
1551
mutex_t* block_mutex;
1554
#ifndef UNIV_LOG_DEBUG
1555
ut_ad(!ibuf_inside());
1557
buf_pool->n_page_gets++;
1560
buf_pool_mutex_enter();
1562
bpage = buf_page_hash_get(space, offset);
1567
/* Page not in buf_pool: needs to be read from file */
1569
buf_pool_mutex_exit();
1571
buf_read_page(space, zip_size, offset);
1573
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1574
ut_a(++buf_dbg_counter % 37 || buf_validate());
1575
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1578
if (UNIV_UNLIKELY(!bpage->zip.data)) {
1579
/* There is no compressed page. */
1580
buf_pool_mutex_exit();
1584
block_mutex = buf_page_get_mutex(bpage);
1585
mutex_enter(block_mutex);
1587
switch (buf_page_get_state(bpage)) {
1588
case BUF_BLOCK_NOT_USED:
1589
case BUF_BLOCK_READY_FOR_USE:
1590
case BUF_BLOCK_MEMORY:
1591
case BUF_BLOCK_REMOVE_HASH:
1592
case BUF_BLOCK_ZIP_FREE:
1595
case BUF_BLOCK_ZIP_PAGE:
1596
case BUF_BLOCK_ZIP_DIRTY:
1597
bpage->buf_fix_count++;
1599
case BUF_BLOCK_FILE_PAGE:
1600
/* Discard the uncompressed page frame if possible. */
1601
if (buf_LRU_free_block(bpage, FALSE, NULL)
1604
mutex_exit(block_mutex);
1608
buf_block_buf_fix_inc((buf_block_t*) bpage,
1609
__FILE__, __LINE__);
1613
must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1615
buf_pool_mutex_exit();
1617
buf_page_set_accessed(bpage, TRUE);
1619
mutex_exit(block_mutex);
1621
buf_block_make_young(bpage);
1623
#ifdef UNIV_DEBUG_FILE_ACCESSES
1624
ut_a(!bpage->file_page_was_freed);
1627
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1628
ut_a(++buf_dbg_counter % 5771 || buf_validate());
1629
ut_a(bpage->buf_fix_count > 0);
1630
ut_a(buf_page_in_file(bpage));
1631
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1634
/* Let us wait until the read operation
1638
enum buf_io_fix io_fix;
1640
mutex_enter(block_mutex);
1641
io_fix = buf_page_get_io_fix(bpage);
1642
mutex_exit(block_mutex);
1644
if (io_fix == BUF_IO_READ) {
1646
os_thread_sleep(WAIT_FOR_READ);
1653
#ifdef UNIV_IBUF_COUNT_DEBUG
1654
ut_a(ibuf_count_get(buf_page_get_space(bpage),
1655
buf_page_get_page_no(bpage)) == 0);
1660
/************************************************************************
1661
Initialize some fields of a control block. */
1666
buf_block_t* block) /* in: block to init */
1668
block->check_index_page_at_flush = FALSE;
1669
block->index = NULL;
1671
block->n_hash_helps = 0;
1672
block->is_hashed = FALSE;
1673
block->n_fields = 1;
1675
block->left_side = TRUE;
1678
/************************************************************************
1679
Decompress a block. */
1684
/* out: TRUE if successful */
1685
buf_block_t* block, /* in/out: block */
1686
ibool check) /* in: TRUE=verify the page checksum */
1688
const byte* frame = block->page.zip.data;
1690
ut_ad(buf_block_get_zip_size(block));
1691
ut_a(buf_block_get_space(block) != 0);
1693
if (UNIV_LIKELY(check)) {
1694
ulint stamp_checksum = mach_read_from_4(
1695
frame + FIL_PAGE_SPACE_OR_CHKSUM);
1696
ulint calc_checksum = page_zip_calc_checksum(
1697
frame, page_zip_get_size(&block->page.zip));
1699
if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
1700
ut_print_timestamp(stderr);
1702
" InnoDB: compressed page checksum mismatch"
1703
" (space %u page %u): %lu != %lu\n",
1704
block->page.space, block->page.offset,
1705
stamp_checksum, calc_checksum);
1710
switch (fil_page_get_type(frame)) {
1711
case FIL_PAGE_INDEX:
1712
if (page_zip_decompress(&block->page.zip,
1718
"InnoDB: unable to decompress space %lu page %lu\n",
1719
(ulong) block->page.space,
1720
(ulong) block->page.offset);
1723
case FIL_PAGE_TYPE_ALLOCATED:
1724
case FIL_PAGE_INODE:
1725
case FIL_PAGE_IBUF_BITMAP:
1726
case FIL_PAGE_TYPE_FSP_HDR:
1727
case FIL_PAGE_TYPE_XDES:
1728
case FIL_PAGE_TYPE_ZBLOB:
1729
case FIL_PAGE_TYPE_ZBLOB2:
1730
/* Copy to uncompressed storage. */
1731
memcpy(block->frame, frame,
1732
buf_block_get_zip_size(block));
1736
ut_print_timestamp(stderr);
1738
" InnoDB: unknown compressed page"
1740
fil_page_get_type(frame));
1744
/************************************************************************
1745
Find out if a buffer block was created by buf_chunk_init(). */
1748
buf_block_is_uncompressed(
1749
/*======================*/
1750
/* out: TRUE if "block" has
1751
been added to buf_pool->free
1752
by buf_chunk_init() */
1753
const buf_block_t* block) /* in: pointer to block,
1756
const buf_chunk_t* chunk = buf_pool->chunks;
1757
const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
1759
ut_ad(buf_pool_mutex_own());
1761
if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1762
/* The pointer should be aligned. */
1766
while (chunk < echunk) {
1767
if (block >= chunk->blocks
1768
&& block < chunk->blocks + chunk->size) {
1779
/************************************************************************
1780
This is the general function used to get access to a database page. */
1785
/* out: pointer to the block or NULL */
1786
ulint space, /* in: space id */
1787
ulint zip_size,/* in: compressed page size in bytes
1788
or 0 for uncompressed pages */
1789
ulint offset, /* in: page number */
1790
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
1791
buf_block_t* guess, /* in: guessed block or NULL */
1792
ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL,
1793
BUF_GET_NO_LATCH, BUF_GET_NOWAIT */
1794
const char* file, /* in: file name */
1795
ulint line, /* in: line where called */
1796
mtr_t* mtr) /* in: mini-transaction */
1804
ut_ad((rw_latch == RW_S_LATCH)
1805
|| (rw_latch == RW_X_LATCH)
1806
|| (rw_latch == RW_NO_LATCH));
1807
ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
1808
ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
1809
|| (mode == BUF_GET_NO_LATCH) || (mode == BUF_GET_NOWAIT));
1810
ut_ad(zip_size == fil_space_get_zip_size(space));
1811
#ifndef UNIV_LOG_DEBUG
1812
ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset));
1814
buf_pool->n_page_gets++;
1817
buf_pool_mutex_enter();
1820
/* If the guess is a compressed page descriptor that
1821
has been allocated by buf_buddy_alloc(), it may have
1822
been invalidated by buf_buddy_relocate(). In that
1823
case, block could point to something that happens to
1824
contain the expected bits in block->page. Similarly,
1825
the guess may be pointing to a buffer pool chunk that
1826
has been released when resizing the buffer pool. */
1828
if (!buf_block_is_uncompressed(block)
1829
|| offset != block->page.offset
1830
|| space != block->page.space
1831
|| buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1833
block = guess = NULL;
1835
ut_ad(!block->page.in_zip_hash);
1836
ut_ad(block->page.in_page_hash);
1840
if (block == NULL) {
1841
block = (buf_block_t*) buf_page_hash_get(space, offset);
1845
if (block == NULL) {
1846
/* Page not in buf_pool: needs to be read from file */
1848
buf_pool_mutex_exit();
1850
if (mode == BUF_GET_IF_IN_POOL) {
1855
buf_read_page(space, zip_size, offset);
1857
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1858
ut_a(++buf_dbg_counter % 37 || buf_validate());
1859
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1863
ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
1865
must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
1867
if (must_read && mode == BUF_GET_IF_IN_POOL) {
1868
/* The page is only being read to buffer */
1869
buf_pool_mutex_exit();
1874
switch (buf_block_get_state(block)) {
1878
case BUF_BLOCK_FILE_PAGE:
1881
case BUF_BLOCK_ZIP_PAGE:
1882
case BUF_BLOCK_ZIP_DIRTY:
1883
bpage = &block->page;
1885
if (bpage->buf_fix_count
1886
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1887
/* This condition often occurs when the buffer
1888
is not buffer-fixed, but I/O-fixed by
1889
buf_page_init_for_read(). */
1891
/* The block is buffer-fixed or I/O-fixed.
1893
buf_pool_mutex_exit();
1894
os_thread_sleep(WAIT_FOR_READ);
1899
/* Allocate an uncompressed page. */
1900
buf_pool_mutex_exit();
1902
block = buf_LRU_get_free_block(0);
1905
buf_pool_mutex_enter();
1906
mutex_enter(&block->mutex);
1909
buf_page_t* hash_bpage
1910
= buf_page_hash_get(space, offset);
1912
if (UNIV_UNLIKELY(bpage != hash_bpage)) {
1913
/* The buf_pool->page_hash was modified
1914
while buf_pool_mutex was released.
1915
Free the block that was allocated. */
1917
buf_LRU_block_free_non_file_page(block);
1918
mutex_exit(&block->mutex);
1920
block = (buf_block_t*) hash_bpage;
1926
(bpage->buf_fix_count
1927
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1929
/* The block was buffer-fixed or I/O-fixed
1930
while buf_pool_mutex was not held by this thread.
1931
Free the block that was allocated and try again.
1932
This should be extremely unlikely. */
1934
buf_LRU_block_free_non_file_page(block);
1935
mutex_exit(&block->mutex);
1937
goto wait_until_unfixed;
1940
/* Move the compressed page from bpage to block,
1941
and uncompress it. */
1943
mutex_enter(&buf_pool_zip_mutex);
1945
buf_relocate(bpage, &block->page);
1946
buf_block_init_low(block);
1947
block->lock_hash_val = lock_rec_hash(space, offset);
1949
UNIV_MEM_DESC(&block->page.zip.data,
1950
page_zip_get_size(&block->page.zip), block);
1952
if (buf_page_get_state(&block->page)
1953
== BUF_BLOCK_ZIP_PAGE) {
1954
UT_LIST_REMOVE(list, buf_pool->zip_clean,
1956
ut_ad(!block->page.in_flush_list);
1958
/* Relocate buf_pool->flush_list. */
1961
b = UT_LIST_GET_PREV(list, &block->page);
1962
ut_ad(block->page.in_flush_list);
1963
UT_LIST_REMOVE(list, buf_pool->flush_list,
1967
UT_LIST_INSERT_AFTER(
1968
list, buf_pool->flush_list, b,
1972
list, buf_pool->flush_list,
1977
/* Buffer-fix, I/O-fix, and X-latch the block
1978
for the duration of the decompression.
1979
Also add the block to the unzip_LRU list. */
1980
block->page.state = BUF_BLOCK_FILE_PAGE;
1982
/* Insert at the front of unzip_LRU list */
1983
buf_unzip_LRU_add_block(block, FALSE);
1985
block->page.buf_fix_count = 1;
1986
buf_block_set_io_fix(block, BUF_IO_READ);
1987
buf_pool->n_pend_unzip++;
1988
rw_lock_x_lock(&block->lock);
1989
mutex_exit(&block->mutex);
1990
mutex_exit(&buf_pool_zip_mutex);
1992
buf_buddy_free(bpage, sizeof *bpage);
1994
buf_pool_mutex_exit();
1996
/* Decompress the page and apply buffered operations
1997
while not holding buf_pool_mutex or block->mutex. */
1998
success = buf_zip_decompress(block, srv_use_checksums);
2000
if (UNIV_LIKELY(success)) {
2001
ibuf_merge_or_delete_for_page(block, space, offset,
2005
/* Unfix and unlatch the block. */
2006
buf_pool_mutex_enter();
2007
mutex_enter(&block->mutex);
2008
buf_pool->n_pend_unzip--;
2009
block->page.buf_fix_count--;
2010
buf_block_set_io_fix(block, BUF_IO_NONE);
2011
mutex_exit(&block->mutex);
2012
rw_lock_x_unlock(&block->lock);
2014
if (UNIV_UNLIKELY(!success)) {
2016
buf_pool_mutex_exit();
2022
case BUF_BLOCK_ZIP_FREE:
2023
case BUF_BLOCK_NOT_USED:
2024
case BUF_BLOCK_READY_FOR_USE:
2025
case BUF_BLOCK_MEMORY:
2026
case BUF_BLOCK_REMOVE_HASH:
2031
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2033
mutex_enter(&block->mutex);
2034
UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
2036
buf_block_buf_fix_inc(block, file, line);
2037
buf_pool_mutex_exit();
2039
/* Check if this is the first access to the page */
2041
accessed = buf_page_is_accessed(&block->page);
2043
buf_page_set_accessed(&block->page, TRUE);
2045
mutex_exit(&block->mutex);
2047
buf_block_make_young(&block->page);
2049
#ifdef UNIV_DEBUG_FILE_ACCESSES
2050
ut_a(!block->page.file_page_was_freed);
2053
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2054
ut_a(++buf_dbg_counter % 5771 || buf_validate());
2055
ut_a(block->page.buf_fix_count > 0);
2056
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2057
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2059
if (mode == BUF_GET_NOWAIT) {
2062
if (rw_latch == RW_S_LATCH) {
2063
success = rw_lock_s_lock_func_nowait(&(block->lock),
2065
fix_type = MTR_MEMO_PAGE_S_FIX;
2067
ut_ad(rw_latch == RW_X_LATCH);
2068
success = rw_lock_x_lock_func_nowait(&(block->lock),
2070
fix_type = MTR_MEMO_PAGE_X_FIX;
2074
mutex_enter(&block->mutex);
2075
buf_block_buf_fix_dec(block);
2076
mutex_exit(&block->mutex);
2080
} else if (rw_latch == RW_NO_LATCH) {
2083
/* Let us wait until the read operation
2087
enum buf_io_fix io_fix;
2089
mutex_enter(&block->mutex);
2090
io_fix = buf_block_get_io_fix(block);
2091
mutex_exit(&block->mutex);
2093
if (io_fix == BUF_IO_READ) {
2095
os_thread_sleep(WAIT_FOR_READ);
2102
fix_type = MTR_MEMO_BUF_FIX;
2103
} else if (rw_latch == RW_S_LATCH) {
2105
rw_lock_s_lock_func(&(block->lock), 0, file, line);
2107
fix_type = MTR_MEMO_PAGE_S_FIX;
2109
rw_lock_x_lock_func(&(block->lock), 0, file, line);
2111
fix_type = MTR_MEMO_PAGE_X_FIX;
2114
mtr_memo_push(mtr, block, fix_type);
2117
/* In the case of a first access, try to apply linear
2120
buf_read_ahead_linear(space, zip_size, offset);
2123
#ifdef UNIV_IBUF_COUNT_DEBUG
2124
ut_a(ibuf_count_get(buf_block_get_space(block),
2125
buf_block_get_page_no(block)) == 0);
2130
/************************************************************************
2131
This is the general function used to get optimistic access to a database
2135
buf_page_optimistic_get_func(
2136
/*=========================*/
2137
/* out: TRUE if success */
2138
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
2139
buf_block_t* block, /* in: guessed buffer block */
2140
ib_uint64_t modify_clock,/* in: modify clock value if mode is
2141
..._GUESS_ON_CLOCK */
2142
const char* file, /* in: file name */
2143
ulint line, /* in: line where called */
2144
mtr_t* mtr) /* in: mini-transaction */
2150
ut_ad(mtr && block);
2151
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
2153
mutex_enter(&block->mutex);
2155
if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
2157
mutex_exit(&block->mutex);
2162
buf_block_buf_fix_inc(block, file, line);
2163
accessed = buf_page_is_accessed(&block->page);
2164
buf_page_set_accessed(&block->page, TRUE);
2166
mutex_exit(&block->mutex);
2168
buf_block_make_young(&block->page);
2170
/* Check if this is the first access to the page */
2172
ut_ad(!ibuf_inside()
2173
|| ibuf_page(buf_block_get_space(block),
2174
buf_block_get_zip_size(block),
2175
buf_block_get_page_no(block)));
2177
if (rw_latch == RW_S_LATCH) {
2178
success = rw_lock_s_lock_func_nowait(&(block->lock),
2180
fix_type = MTR_MEMO_PAGE_S_FIX;
2182
success = rw_lock_x_lock_func_nowait(&(block->lock),
2184
fix_type = MTR_MEMO_PAGE_X_FIX;
2187
if (UNIV_UNLIKELY(!success)) {
2188
mutex_enter(&block->mutex);
2189
buf_block_buf_fix_dec(block);
2190
mutex_exit(&block->mutex);
2195
if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
2196
#ifdef UNIV_SYNC_DEBUG
2197
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
2198
#endif /* UNIV_SYNC_DEBUG */
2199
if (rw_latch == RW_S_LATCH) {
2200
rw_lock_s_unlock(&(block->lock));
2202
rw_lock_x_unlock(&(block->lock));
2205
mutex_enter(&block->mutex);
2206
buf_block_buf_fix_dec(block);
2207
mutex_exit(&block->mutex);
2212
mtr_memo_push(mtr, block, fix_type);
2214
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2215
ut_a(++buf_dbg_counter % 5771 || buf_validate());
2216
ut_a(block->page.buf_fix_count > 0);
2217
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2218
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2220
#ifdef UNIV_DEBUG_FILE_ACCESSES
2221
ut_a(block->page.file_page_was_freed == FALSE);
2223
if (UNIV_UNLIKELY(!accessed)) {
2224
/* In the case of a first access, try to apply linear
2227
buf_read_ahead_linear(buf_block_get_space(block),
2228
buf_block_get_zip_size(block),
2229
buf_block_get_page_no(block));
2232
#ifdef UNIV_IBUF_COUNT_DEBUG
2233
ut_a(ibuf_count_get(buf_block_get_space(block),
2234
buf_block_get_page_no(block)) == 0);
2236
buf_pool->n_page_gets++;
2241
/************************************************************************
2242
This is used to get access to a known database page, when no waiting can be
2243
done. For example, if a search in an adaptive hash index leads us to this
2247
buf_page_get_known_nowait(
2248
/*======================*/
2249
/* out: TRUE if success */
2250
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
2251
buf_block_t* block, /* in: the known page */
2252
ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
2253
const char* file, /* in: file name */
2254
ulint line, /* in: line where called */
2255
mtr_t* mtr) /* in: mini-transaction */
2261
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
2263
mutex_enter(&block->mutex);
2265
if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
2266
/* Another thread is just freeing the block from the LRU list
2267
of the buffer pool: do not try to access this page; this
2268
attempt to access the page can only come through the hash
2269
index because when the buffer block state is ..._REMOVE_HASH,
2270
we have already removed it from the page address hash table
2271
of the buffer pool. */
2273
mutex_exit(&block->mutex);
2278
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2280
buf_block_buf_fix_inc(block, file, line);
2282
mutex_exit(&block->mutex);
2284
if (mode == BUF_MAKE_YOUNG) {
2285
buf_block_make_young(&block->page);
2288
ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
2290
if (rw_latch == RW_S_LATCH) {
2291
success = rw_lock_s_lock_func_nowait(&(block->lock),
2293
fix_type = MTR_MEMO_PAGE_S_FIX;
2295
success = rw_lock_x_lock_func_nowait(&(block->lock),
2297
fix_type = MTR_MEMO_PAGE_X_FIX;
2301
mutex_enter(&block->mutex);
2302
buf_block_buf_fix_dec(block);
2303
mutex_exit(&block->mutex);
2308
mtr_memo_push(mtr, block, fix_type);
2310
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2311
ut_a(++buf_dbg_counter % 5771 || buf_validate());
2312
ut_a(block->page.buf_fix_count > 0);
2313
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2314
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2315
#ifdef UNIV_DEBUG_FILE_ACCESSES
2316
ut_a(block->page.file_page_was_freed == FALSE);
2319
#ifdef UNIV_IBUF_COUNT_DEBUG
2320
ut_a((mode == BUF_KEEP_OLD)
2321
|| (ibuf_count_get(buf_block_get_space(block),
2322
buf_block_get_page_no(block)) == 0));
2324
buf_pool->n_page_gets++;
2329
/***********************************************************************
2330
Given a tablespace id and page number tries to get that page. If the
2331
page is not in the buffer pool it is not loaded and NULL is returned.
2332
Suitable for using when holding the kernel mutex. */
2335
buf_page_try_get_func(
2336
/*==================*/
2337
/* out: pointer to a page or NULL */
2338
ulint space_id,/* in: tablespace id */
2339
ulint page_no,/* in: page number */
2340
const char* file, /* in: file name */
2341
ulint line, /* in: line where called */
2342
mtr_t* mtr) /* in: mini-transaction */
2348
buf_pool_mutex_enter();
2349
block = buf_block_hash_get(space_id, page_no);
2352
buf_pool_mutex_exit();
2356
mutex_enter(&block->mutex);
2357
buf_pool_mutex_exit();
2359
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2360
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2361
ut_a(buf_block_get_space(block) == space_id);
2362
ut_a(buf_block_get_page_no(block) == page_no);
2363
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2365
buf_block_buf_fix_inc(block, file, line);
2366
mutex_exit(&block->mutex);
2368
fix_type = MTR_MEMO_PAGE_S_FIX;
2369
success = rw_lock_s_lock_func_nowait(&block->lock, file, line);
2372
/* Let us try to get an X-latch. If the current thread
2373
is holding an X-latch on the page, we cannot get an
2376
fix_type = MTR_MEMO_PAGE_X_FIX;
2377
success = rw_lock_x_lock_func_nowait(&block->lock,
2382
mutex_enter(&block->mutex);
2383
buf_block_buf_fix_dec(block);
2384
mutex_exit(&block->mutex);
2389
mtr_memo_push(mtr, block, fix_type);
2390
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2391
ut_a(++buf_dbg_counter % 5771 || buf_validate());
2392
ut_a(block->page.buf_fix_count > 0);
2393
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2394
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2395
#ifdef UNIV_DEBUG_FILE_ACCESSES
2396
ut_a(block->page.file_page_was_freed == FALSE);
2397
#endif /* UNIV_DEBUG_FILE_ACCESSES */
2398
#ifdef UNIV_SYNC_DEBUG
2399
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
2400
#endif /* UNIV_SYNC_DEBUG */
2401
buf_pool->n_page_gets++;
2406
/************************************************************************
2407
Initialize some fields of a control block. */
2412
buf_page_t* bpage) /* in: block to init */
2414
bpage->flush_type = BUF_FLUSH_LRU;
2415
bpage->accessed = FALSE;
2416
bpage->io_fix = BUF_IO_NONE;
2417
bpage->buf_fix_count = 0;
2418
bpage->freed_page_clock = 0;
2419
bpage->newest_modification = 0;
2420
bpage->oldest_modification = 0;
2421
HASH_INVALIDATE(bpage, hash);
2422
#ifdef UNIV_DEBUG_FILE_ACCESSES
2423
bpage->file_page_was_freed = FALSE;
2424
#endif /* UNIV_DEBUG_FILE_ACCESSES */
2427
#ifdef UNIV_HOTBACKUP
2428
/************************************************************************
2429
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
2432
buf_page_init_for_backup_restore(
2433
/*=============================*/
2434
ulint space, /* in: space id */
2435
ulint offset, /* in: offset of the page within space
2436
in units of a page */
2437
ulint zip_size,/* in: compressed page size in bytes
2438
or 0 for uncompressed pages */
2439
buf_block_t* block) /* in: block to init */
2441
buf_block_init_low(block);
2443
block->lock_hash_val = 0;
2445
buf_page_init_low(&block->page);
2446
block->page.state = BUF_BLOCK_FILE_PAGE;
2447
block->page.space = space;
2448
block->page.offset = offset;
2450
page_zip_des_init(&block->page.zip);
2452
/* We assume that block->page.data has been allocated
2453
with zip_size == UNIV_PAGE_SIZE. */
2454
ut_ad(zip_size <= UNIV_PAGE_SIZE);
2455
ut_ad(ut_is_2pow(zip_size));
2456
page_zip_set_size(&block->page.zip, zip_size);
2458
#endif /* UNIV_HOTBACKUP */
2460
/************************************************************************
2461
Inits a page to the buffer buf_pool. */
2466
ulint space, /* in: space id */
2467
ulint offset, /* in: offset of the page within space
2468
in units of a page */
2469
buf_block_t* block) /* in: block to init */
2471
buf_page_t* hash_page;
2473
ut_ad(buf_pool_mutex_own());
2474
ut_ad(mutex_own(&(block->mutex)));
2475
ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
2477
/* Set the state of the block */
2478
buf_block_set_file_page(block, space, offset);
2480
#ifdef UNIV_DEBUG_VALGRIND
2482
/* Silence valid Valgrind warnings about uninitialized
2483
data being written to data files. There are some unused
2484
bytes on some pages that InnoDB does not initialize. */
2485
UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
2487
#endif /* UNIV_DEBUG_VALGRIND */
2489
buf_block_init_low(block);
2491
block->lock_hash_val = lock_rec_hash(space, offset);
2493
/* Insert into the hash table of file pages */
2495
hash_page = buf_page_hash_get(space, offset);
2497
if (UNIV_LIKELY_NULL(hash_page)) {
2499
"InnoDB: Error: page %lu %lu already found"
2500
" in the hash table: %p, %p\n",
2503
(const void*) hash_page, (const void*) block);
2504
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2505
mutex_exit(&block->mutex);
2506
buf_pool_mutex_exit();
2511
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2515
buf_page_init_low(&block->page);
2517
ut_ad(!block->page.in_zip_hash);
2518
ut_ad(!block->page.in_page_hash);
2519
ut_d(block->page.in_page_hash = TRUE);
2520
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
2521
buf_page_address_fold(space, offset), &block->page);
2524
/************************************************************************
2525
Function which inits a page for read to the buffer buf_pool. If the page is
2526
(1) already in buf_pool, or
2527
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
2528
(3) if the space is deleted or being deleted,
2529
then this function does nothing.
2530
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
2531
on the buffer frame. The io-handler must take care that the flag is cleared
2532
and the lock released later. */
2535
buf_page_init_for_read(
2536
/*===================*/
2537
/* out: pointer to the block or NULL */
2538
ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
2539
ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */
2540
ulint space, /* in: space id */
2541
ulint zip_size,/* in: compressed page size, or 0 */
2542
ibool unzip, /* in: TRUE=request uncompressed page */
2543
ib_int64_t tablespace_version,/* in: prevents reading from a wrong
2544
version of the tablespace in case we have done
2546
ulint offset) /* in: page number */
2558
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
2559
/* It is a read-ahead within an ibuf routine */
2561
ut_ad(!ibuf_bitmap_page(zip_size, offset));
2562
ut_ad(ibuf_inside());
2566
if (!ibuf_page_low(space, zip_size, offset, &mtr)) {
2573
ut_ad(mode == BUF_READ_ANY_PAGE);
2576
if (zip_size && UNIV_LIKELY(!unzip)
2577
&& UNIV_LIKELY(!recv_recovery_is_on())) {
2580
block = buf_LRU_get_free_block(0);
2584
buf_pool_mutex_enter();
2586
if (buf_page_hash_get(space, offset)) {
2587
/* The page is already in the buffer pool. */
2590
mutex_enter(&block->mutex);
2591
buf_LRU_block_free_non_file_page(block);
2592
mutex_exit(&block->mutex);
2596
buf_pool_mutex_exit();
2598
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
2606
if (fil_tablespace_deleted_or_being_deleted_in_mem(
2607
space, tablespace_version)) {
2608
/* The page belongs to a space which has been
2609
deleted or is being deleted. */
2610
*err = DB_TABLESPACE_DELETED;
2616
bpage = &block->page;
2617
mutex_enter(&block->mutex);
2618
buf_page_init(space, offset, block);
2620
/* The block must be put to the LRU list, to the old blocks */
2621
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
2623
/* We set a pass-type x-lock on the frame because then
2624
the same thread which called for the read operation
2625
(and is running now at this point of code) can wait
2626
for the read to complete by waiting for the x-lock on
2627
the frame; if the x-lock were recursive, the same
2628
thread would illegally get the x-lock before the page
2629
read is completed. The x-lock is cleared by the
2630
io-handler thread. */
2632
rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
2633
buf_page_set_io_fix(bpage, BUF_IO_READ);
2635
if (UNIV_UNLIKELY(zip_size)) {
2636
page_zip_set_size(&block->page.zip, zip_size);
2638
/* buf_pool_mutex may be released and
2639
reacquired by buf_buddy_alloc(). Thus, we
2640
must release block->mutex in order not to
2641
break the latching order in the reacquisition
2642
of buf_pool_mutex. We also must defer this
2643
operation until after the block descriptor has
2644
been added to buf_pool->LRU and
2645
buf_pool->page_hash. */
2646
mutex_exit(&block->mutex);
2647
data = buf_buddy_alloc(zip_size, &lru);
2648
mutex_enter(&block->mutex);
2649
block->page.zip.data = data;
2651
/* To maintain the invariant
2652
block->in_unzip_LRU_list
2653
== buf_page_belongs_to_unzip_LRU(&block->page)
2654
we have to add this block to unzip_LRU
2655
after block->page.zip.data is set. */
2656
ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
2657
buf_unzip_LRU_add_block(block, TRUE);
2660
mutex_exit(&block->mutex);
2662
/* Defer buf_buddy_alloc() until after the block has
2663
been found not to exist. The buf_buddy_alloc() and
2664
buf_buddy_free() calls may be expensive because of
2665
buf_buddy_relocate(). */
2667
/* The compressed page must be allocated before the
2668
control block (bpage), in order to avoid the
2669
invocation of buf_buddy_relocate_block() on
2670
uninitialized data. */
2671
data = buf_buddy_alloc(zip_size, &lru);
2672
bpage = buf_buddy_alloc(sizeof *bpage, &lru);
2674
/* If buf_buddy_alloc() allocated storage from the LRU list,
2675
it released and reacquired buf_pool_mutex. Thus, we must
2676
check the page_hash again, as it may have been modified. */
2677
if (UNIV_UNLIKELY(lru)
2678
&& UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
2680
/* The block was added by some other thread. */
2681
buf_buddy_free(bpage, sizeof *bpage);
2682
buf_buddy_free(data, zip_size);
2686
page_zip_des_init(&bpage->zip);
2687
page_zip_set_size(&bpage->zip, zip_size);
2688
bpage->zip.data = data;
2690
mutex_enter(&buf_pool_zip_mutex);
2691
UNIV_MEM_DESC(bpage->zip.data,
2692
page_zip_get_size(&bpage->zip), bpage);
2693
buf_page_init_low(bpage);
2694
bpage->state = BUF_BLOCK_ZIP_PAGE;
2695
bpage->space = space;
2696
bpage->offset = offset;
2699
bpage->in_page_hash = FALSE;
2700
bpage->in_zip_hash = FALSE;
2701
bpage->in_flush_list = FALSE;
2702
bpage->in_free_list = FALSE;
2703
bpage->in_LRU_list = FALSE;
2704
#endif /* UNIV_DEBUG */
2706
ut_d(bpage->in_page_hash = TRUE);
2707
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
2708
buf_page_address_fold(space, offset), bpage);
2710
/* The block must be put to the LRU list, to the old blocks */
2711
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
2712
buf_LRU_insert_zip_clean(bpage);
2714
buf_page_set_io_fix(bpage, BUF_IO_READ);
2716
mutex_exit(&buf_pool_zip_mutex);
2719
buf_pool->n_pend_reads++;
2720
buf_pool_mutex_exit();
2722
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
2727
ut_ad(buf_page_in_file(bpage));
2731
/************************************************************************
2732
Initializes a page to the buffer buf_pool. The page is usually not read
2733
from a file even if it cannot be found in the buffer buf_pool. This is one
2734
of the functions which perform to a block a state transition NOT_USED =>
2735
FILE_PAGE (the other is buf_page_get_gen). */
2740
/* out: pointer to the block, page bufferfixed */
2741
ulint space, /* in: space id */
2742
ulint offset, /* in: offset of the page within space in units of
2744
ulint zip_size,/* in: compressed page size, or 0 */
2745
mtr_t* mtr) /* in: mini-transaction handle */
2749
buf_block_t* free_block = NULL;
2752
ut_ad(space || !zip_size);
2754
free_block = buf_LRU_get_free_block(0);
2756
buf_pool_mutex_enter();
2758
block = (buf_block_t*) buf_page_hash_get(space, offset);
2760
if (block && buf_page_in_file(&block->page)) {
2761
#ifdef UNIV_IBUF_COUNT_DEBUG
2762
ut_a(ibuf_count_get(space, offset) == 0);
2764
#ifdef UNIV_DEBUG_FILE_ACCESSES
2765
block->page.file_page_was_freed = FALSE;
2766
#endif /* UNIV_DEBUG_FILE_ACCESSES */
2768
/* Page can be found in buf_pool */
2769
buf_pool_mutex_exit();
2771
buf_block_free(free_block);
2773
return(buf_page_get_with_no_latch(space, zip_size,
2777
/* If we get here, the page was not in buf_pool: init it there */
2780
if (buf_debug_prints) {
2781
fprintf(stderr, "Creating space %lu page %lu to buffer\n",
2782
(ulong) space, (ulong) offset);
2784
#endif /* UNIV_DEBUG */
2788
mutex_enter(&block->mutex);
2790
buf_page_init(space, offset, block);
2792
/* The block must be put to the LRU list */
2793
buf_LRU_add_block(&block->page, FALSE);
2795
buf_block_buf_fix_inc(block, __FILE__, __LINE__);
2796
buf_pool->n_pages_created++;
2802
/* Prevent race conditions during buf_buddy_alloc(),
2803
which may release and reacquire buf_pool_mutex,
2804
by IO-fixing and X-latching the block. */
2806
buf_page_set_io_fix(&block->page, BUF_IO_READ);
2807
rw_lock_x_lock(&block->lock);
2809
page_zip_set_size(&block->page.zip, zip_size);
2810
mutex_exit(&block->mutex);
2811
/* buf_pool_mutex may be released and reacquired by
2812
buf_buddy_alloc(). Thus, we must release block->mutex
2813
in order not to break the latching order in
2814
the reacquisition of buf_pool_mutex. We also must
2815
defer this operation until after the block descriptor
2816
has been added to buf_pool->LRU and buf_pool->page_hash. */
2817
data = buf_buddy_alloc(zip_size, &lru);
2818
mutex_enter(&block->mutex);
2819
block->page.zip.data = data;
2821
/* To maintain the invariant
2822
block->in_unzip_LRU_list
2823
== buf_page_belongs_to_unzip_LRU(&block->page)
2824
we have to add this block to unzip_LRU after
2825
block->page.zip.data is set. */
2826
ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
2827
buf_unzip_LRU_add_block(block, FALSE);
2829
buf_page_set_io_fix(&block->page, BUF_IO_NONE);
2830
rw_lock_x_unlock(&block->lock);
2833
buf_pool_mutex_exit();
2835
mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
2837
buf_page_set_accessed(&block->page, TRUE);
2839
mutex_exit(&block->mutex);
2841
/* Delete possible entries for the page from the insert buffer:
2842
such can exist if the page belonged to an index which was dropped */
2844
ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
2846
/* Flush pages from the end of the LRU list if necessary */
2847
buf_flush_free_margin();
2849
frame = block->frame;
2851
memset(frame + FIL_PAGE_PREV, 0xff, 4);
2852
memset(frame + FIL_PAGE_NEXT, 0xff, 4);
2853
mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
2855
/* Reset to zero the file flush lsn field in the page; if the first
2856
page of an ibdata file is 'created' in this function into the buffer
2857
pool then we lose the original contents of the file flush lsn stamp.
2858
Then InnoDB could in a crash recovery print a big, false, corruption
2859
warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
2861
memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
2863
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2864
ut_a(++buf_dbg_counter % 357 || buf_validate());
2865
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2866
#ifdef UNIV_IBUF_COUNT_DEBUG
2867
ut_a(ibuf_count_get(buf_block_get_space(block),
2868
buf_block_get_page_no(block)) == 0);
2873
/************************************************************************
2874
Completes an asynchronous read or write request of a file page to or from
2878
buf_page_io_complete(
2879
/*=================*/
2880
buf_page_t* bpage) /* in: pointer to the block in question */
2882
enum buf_io_fix io_type;
2883
const ibool uncompressed = (buf_page_get_state(bpage)
2884
== BUF_BLOCK_FILE_PAGE);
2886
ut_a(buf_page_in_file(bpage));
2888
/* We do not need protect io_fix here by mutex to read
2889
it because this is the only function where we can change the value
2890
from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
2891
ensures that this is the only thread that handles the i/o for this
2894
io_type = buf_page_get_io_fix(bpage);
2895
ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
2897
if (io_type == BUF_IO_READ) {
2899
ulint read_space_id;
2902
if (buf_page_get_zip_size(bpage)) {
2903
frame = bpage->zip.data;
2904
buf_pool->n_pend_unzip++;
2906
&& !buf_zip_decompress((buf_block_t*) bpage,
2909
buf_pool->n_pend_unzip--;
2912
buf_pool->n_pend_unzip--;
2915
frame = ((buf_block_t*) bpage)->frame;
2918
/* If this page is not uninitialized and not in the
2919
doublewrite buffer, then the page number and space id
2920
should be the same as in block. */
2921
read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
2922
read_space_id = mach_read_from_4(
2923
frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
2925
if (bpage->space == TRX_SYS_SPACE
2926
&& trx_doublewrite_page_inside(bpage->offset)) {
2928
ut_print_timestamp(stderr);
2930
" InnoDB: Error: reading page %lu\n"
2931
"InnoDB: which is in the"
2932
" doublewrite buffer!\n",
2933
(ulong) bpage->offset);
2934
} else if (!read_space_id && !read_page_no) {
2935
/* This is likely an uninitialized page. */
2936
} else if ((bpage->space
2937
&& bpage->space != read_space_id)
2938
|| bpage->offset != read_page_no) {
2939
/* We did not compare space_id to read_space_id
2940
if bpage->space == 0, because the field on the
2941
page may contain garbage in MySQL < 4.1.1,
2942
which only supported bpage->space == 0. */
2944
ut_print_timestamp(stderr);
2946
" InnoDB: Error: space id and page n:o"
2947
" stored in the page\n"
2948
"InnoDB: read in are %lu:%lu,"
2949
" should be %lu:%lu!\n",
2950
(ulong) read_space_id, (ulong) read_page_no,
2951
(ulong) bpage->space,
2952
(ulong) bpage->offset);
2955
/* From version 3.23.38 up we store the page checksum
2956
to the 4 first bytes of the page end lsn field */
2958
if (buf_page_is_corrupted(frame,
2959
buf_page_get_zip_size(bpage))) {
2962
"InnoDB: Database page corruption on disk"
2964
"InnoDB: file read of page %lu.\n"
2965
"InnoDB: You may have to recover"
2966
" from a backup.\n",
2967
(ulong) bpage->offset);
2968
buf_page_print(frame, buf_page_get_zip_size(bpage));
2970
"InnoDB: Database page corruption on disk"
2972
"InnoDB: file read of page %lu.\n"
2973
"InnoDB: You may have to recover"
2974
" from a backup.\n",
2975
(ulong) bpage->offset);
2976
fputs("InnoDB: It is also possible that"
2978
"InnoDB: system has corrupted its"
2980
"InnoDB: and rebooting your computer"
2983
"InnoDB: If the corrupt page is an index page\n"
2984
"InnoDB: you can also try to"
2985
" fix the corruption\n"
2986
"InnoDB: by dumping, dropping,"
2987
" and reimporting\n"
2988
"InnoDB: the corrupt table."
2989
" You can use CHECK\n"
2990
"InnoDB: TABLE to scan your"
2991
" table for corruption.\n"
2993
" http://dev.mysql.com/doc/refman/5.1/en/"
2994
"forcing-recovery.html\n"
2995
"InnoDB: about forcing recovery.\n", stderr);
2997
if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
2998
fputs("InnoDB: Ending processing because of"
2999
" a corrupt database page.\n",
3005
if (recv_recovery_is_on()) {
3006
/* Pages must be uncompressed for crash recovery. */
3008
recv_recover_page(FALSE, TRUE, (buf_block_t*) bpage);
3011
if (uncompressed && !recv_no_ibuf_operations) {
3012
ibuf_merge_or_delete_for_page(
3013
(buf_block_t*) bpage, bpage->space,
3014
bpage->offset, buf_page_get_zip_size(bpage),
3019
buf_pool_mutex_enter();
3020
mutex_enter(buf_page_get_mutex(bpage));
3022
#ifdef UNIV_IBUF_COUNT_DEBUG
3023
if (io_type == BUF_IO_WRITE || uncompressed) {
3024
/* For BUF_IO_READ of compressed-only blocks, the
3025
buffered operations will be merged by buf_page_get_gen()
3026
after the block has been uncompressed. */
3027
ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
3030
/* Because this thread which does the unlocking is not the same that
3031
did the locking, we use a pass value != 0 in unlock, which simply
3032
removes the newest lock debug record, without checking the thread
3035
buf_page_set_io_fix(bpage, BUF_IO_NONE);
3039
/* NOTE that the call to ibuf may have moved the ownership of
3040
the x-latch to this OS thread: do not let this confuse you in
3043
ut_ad(buf_pool->n_pend_reads > 0);
3044
buf_pool->n_pend_reads--;
3045
buf_pool->n_pages_read++;
3048
rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
3055
/* Write means a flush operation: call the completion
3056
routine in the flush system */
3058
buf_flush_write_complete(bpage);
3061
rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
3065
buf_pool->n_pages_written++;
3073
mutex_exit(buf_page_get_mutex(bpage));
3074
buf_pool_mutex_exit();
3077
if (buf_debug_prints) {
3078
fprintf(stderr, "Has %s page space %lu page no %lu\n",
3079
io_type == BUF_IO_READ ? "read" : "written",
3080
(ulong) buf_page_get_space(bpage),
3081
(ulong) buf_page_get_page_no(bpage));
3083
#endif /* UNIV_DEBUG */
3086
/*************************************************************************
3087
Invalidates the file pages in the buffer pool when an archive recovery is
3088
completed. All the file pages buffered must be in a replaceable state when
3089
this function is called: not latched and not modified. */
3092
buf_pool_invalidate(void)
3093
/*=====================*/
3097
ut_ad(buf_all_freed());
3102
freed = buf_LRU_search_and_free_block(100);
3105
buf_pool_mutex_enter();
3107
ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
3108
ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
3110
buf_pool_mutex_exit();
3113
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3114
/*************************************************************************
3115
Validates the buffer buf_pool data structure. */
3124
ulint n_single_flush = 0;
3125
ulint n_lru_flush = 0;
3126
ulint n_list_flush = 0;
3134
buf_pool_mutex_enter();
3136
chunk = buf_pool->chunks;
3138
/* Check the uncompressed blocks. */
3140
for (i = buf_pool->n_chunks; i--; chunk++) {
3143
buf_block_t* block = chunk->blocks;
3145
for (j = chunk->size; j--; block++) {
3147
mutex_enter(&block->mutex);
3149
switch (buf_block_get_state(block)) {
3150
case BUF_BLOCK_ZIP_FREE:
3151
case BUF_BLOCK_ZIP_PAGE:
3152
case BUF_BLOCK_ZIP_DIRTY:
3153
/* These should only occur on
3154
zip_clean, zip_free[], or flush_list. */
3158
case BUF_BLOCK_FILE_PAGE:
3159
ut_a(buf_page_hash_get(buf_block_get_space(
3161
buf_block_get_page_no(
3165
#ifdef UNIV_IBUF_COUNT_DEBUG
3166
ut_a(buf_page_get_io_fix(&block->page)
3168
|| !ibuf_count_get(buf_block_get_space(
3170
buf_block_get_page_no(
3173
switch (buf_page_get_io_fix(&block->page)) {
3178
switch (buf_page_get_flush_type(
3182
ut_a(rw_lock_is_locked(
3186
case BUF_FLUSH_LIST:
3189
case BUF_FLUSH_SINGLE_PAGE:
3200
ut_a(rw_lock_is_locked(&block->lock,
3207
if (block->page.oldest_modification > 0) {
3213
case BUF_BLOCK_NOT_USED:
3217
case BUF_BLOCK_READY_FOR_USE:
3218
case BUF_BLOCK_MEMORY:
3219
case BUF_BLOCK_REMOVE_HASH:
3224
mutex_exit(&block->mutex);
3228
mutex_enter(&buf_pool_zip_mutex);
3230
/* Check clean compressed-only blocks. */
3232
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
3233
b = UT_LIST_GET_NEXT(list, b)) {
3234
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
3235
switch (buf_page_get_io_fix(b)) {
3237
/* All clean blocks should be I/O-unfixed. */
3240
/* In buf_LRU_free_block(), we temporarily set
3241
b->io_fix = BUF_IO_READ for a newly allocated
3242
control block in order to prevent
3243
buf_page_get_gen() from decompressing the block. */
3249
ut_a(!b->oldest_modification);
3250
ut_a(buf_page_hash_get(b->space, b->offset) == b);
3256
/* Check dirty compressed-only blocks. */
3258
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
3259
b = UT_LIST_GET_NEXT(list, b)) {
3260
ut_ad(b->in_flush_list);
3262
switch (buf_page_get_state(b)) {
3263
case BUF_BLOCK_ZIP_DIRTY:
3264
ut_a(b->oldest_modification);
3268
switch (buf_page_get_io_fix(b)) {
3274
switch (buf_page_get_flush_type(b)) {
3278
case BUF_FLUSH_LIST:
3281
case BUF_FLUSH_SINGLE_PAGE:
3290
case BUF_BLOCK_FILE_PAGE:
3291
/* uncompressed page */
3293
case BUF_BLOCK_ZIP_FREE:
3294
case BUF_BLOCK_ZIP_PAGE:
3295
case BUF_BLOCK_NOT_USED:
3296
case BUF_BLOCK_READY_FOR_USE:
3297
case BUF_BLOCK_MEMORY:
3298
case BUF_BLOCK_REMOVE_HASH:
3302
ut_a(buf_page_hash_get(b->space, b->offset) == b);
3305
mutex_exit(&buf_pool_zip_mutex);
3307
if (n_lru + n_free > buf_pool->curr_size + n_zip) {
3308
fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
3309
(ulong) n_lru, (ulong) n_free,
3310
(ulong) buf_pool->curr_size, (ulong) n_zip);
3314
ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
3315
if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
3316
fprintf(stderr, "Free list len %lu, free blocks %lu\n",
3317
(ulong) UT_LIST_GET_LEN(buf_pool->free),
3321
ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
3323
ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
3324
ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
3325
ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
3327
buf_pool_mutex_exit();
3329
ut_a(buf_LRU_validate());
3330
ut_a(buf_flush_validate());
3334
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3336
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3337
/*************************************************************************
3338
Prints info of the buffer buf_pool data structure. */
3352
dict_index_t* index;
3356
size = buf_pool->curr_size;
3358
index_ids = mem_alloc(sizeof(dulint) * size);
3359
counts = mem_alloc(sizeof(ulint) * size);
3361
buf_pool_mutex_enter();
3364
"buf_pool size %lu\n"
3365
"database pages %lu\n"
3367
"modified database pages %lu\n"
3368
"n pending decompressions %lu\n"
3369
"n pending reads %lu\n"
3370
"n pending flush LRU %lu list %lu single page %lu\n"
3371
"pages read %lu, created %lu, written %lu\n",
3373
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
3374
(ulong) UT_LIST_GET_LEN(buf_pool->free),
3375
(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
3376
(ulong) buf_pool->n_pend_unzip,
3377
(ulong) buf_pool->n_pend_reads,
3378
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
3379
(ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
3380
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
3381
(ulong) buf_pool->n_pages_read, buf_pool->n_pages_created,
3382
(ulong) buf_pool->n_pages_written);
3384
/* Count the number of blocks belonging to each index in the buffer */
3388
chunk = buf_pool->chunks;
3390
for (i = buf_pool->n_chunks; i--; chunk++) {
3391
buf_block_t* block = chunk->blocks;
3392
ulint n_blocks = chunk->size;
3394
for (; n_blocks--; block++) {
3395
const buf_frame_t* frame = block->frame;
3397
if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
3399
id = btr_page_get_index_id(frame);
3401
/* Look for the id in the index_ids array */
3404
while (j < n_found) {
3406
if (ut_dulint_cmp(index_ids[j],
3424
buf_pool_mutex_exit();
3426
for (i = 0; i < n_found; i++) {
3427
index = dict_index_get_if_in_cache(index_ids[i]);
3430
"Block count for index %lu in buffer is about %lu",
3431
(ulong) ut_dulint_get_low(index_ids[i]),
3436
dict_index_name_print(stderr, NULL, index);
3442
mem_free(index_ids);
3445
ut_a(buf_validate());
3447
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
3449
/*************************************************************************
3450
Returns the number of latched pages in the buffer pool. */
3453
buf_get_latched_pages_number(void)
3454
/*==============================*/
3459
ulint fixed_pages_number = 0;
3461
buf_pool_mutex_enter();
3463
chunk = buf_pool->chunks;
3465
for (i = buf_pool->n_chunks; i--; chunk++) {
3469
block = chunk->blocks;
3471
for (j = chunk->size; j--; block++) {
3472
if (buf_block_get_state(block)
3473
!= BUF_BLOCK_FILE_PAGE) {
3478
mutex_enter(&block->mutex);
3480
if (block->page.buf_fix_count != 0
3481
|| buf_page_get_io_fix(&block->page)
3483
fixed_pages_number++;
3486
mutex_exit(&block->mutex);
3490
mutex_enter(&buf_pool_zip_mutex);
3492
/* Traverse the lists of clean and dirty compressed-only blocks. */
3494
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
3495
b = UT_LIST_GET_NEXT(list, b)) {
3496
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
3497
ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
3499
if (b->buf_fix_count != 0
3500
|| buf_page_get_io_fix(b) != BUF_IO_NONE) {
3501
fixed_pages_number++;
3505
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
3506
b = UT_LIST_GET_NEXT(list, b)) {
3507
ut_ad(b->in_flush_list);
3509
switch (buf_page_get_state(b)) {
3510
case BUF_BLOCK_ZIP_DIRTY:
3511
if (b->buf_fix_count != 0
3512
|| buf_page_get_io_fix(b) != BUF_IO_NONE) {
3513
fixed_pages_number++;
3516
case BUF_BLOCK_FILE_PAGE:
3517
/* uncompressed page */
3519
case BUF_BLOCK_ZIP_FREE:
3520
case BUF_BLOCK_ZIP_PAGE:
3521
case BUF_BLOCK_NOT_USED:
3522
case BUF_BLOCK_READY_FOR_USE:
3523
case BUF_BLOCK_MEMORY:
3524
case BUF_BLOCK_REMOVE_HASH:
3530
mutex_exit(&buf_pool_zip_mutex);
3531
buf_pool_mutex_exit();
3533
return(fixed_pages_number);
3536
/*************************************************************************
3537
Returns the number of pending buf pool ios. */
3540
buf_get_n_pending_ios(void)
3541
/*=======================*/
3543
return(buf_pool->n_pend_reads
3544
+ buf_pool->n_flush[BUF_FLUSH_LRU]
3545
+ buf_pool->n_flush[BUF_FLUSH_LIST]
3546
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
3549
/*************************************************************************
3550
Returns the ratio in percents of modified pages in the buffer pool /
3551
database pages in the buffer pool. */
3554
buf_get_modified_ratio_pct(void)
3555
/*============================*/
3559
buf_pool_mutex_enter();
3561
ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
3562
/ (1 + UT_LIST_GET_LEN(buf_pool->LRU)
3563
+ UT_LIST_GET_LEN(buf_pool->free));
3565
/* 1 + is there to avoid division by zero */
3567
buf_pool_mutex_exit();
3572
/*************************************************************************
3573
Prints info of the buffer i/o. */
3578
FILE* file) /* in/out: buffer where to print */
3580
time_t current_time;
3581
double time_elapsed;
3585
size = buf_pool->curr_size;
3587
buf_pool_mutex_enter();
3590
"Buffer pool size %lu\n"
3591
"Free buffers %lu\n"
3592
"Database pages %lu\n"
3593
"Modified db pages %lu\n"
3594
"Pending reads %lu\n"
3595
"Pending writes: LRU %lu, flush list %lu, single page %lu\n",
3597
(ulong) UT_LIST_GET_LEN(buf_pool->free),
3598
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
3599
(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
3600
(ulong) buf_pool->n_pend_reads,
3601
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
3602
+ buf_pool->init_flush[BUF_FLUSH_LRU],
3603
(ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
3604
+ buf_pool->init_flush[BUF_FLUSH_LIST],
3605
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
3607
current_time = time(NULL);
3608
time_elapsed = 0.001 + difftime(current_time,
3609
buf_pool->last_printout_time);
3610
buf_pool->last_printout_time = current_time;
3613
"Pages read %lu, created %lu, written %lu\n"
3614
"%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
3615
(ulong) buf_pool->n_pages_read,
3616
(ulong) buf_pool->n_pages_created,
3617
(ulong) buf_pool->n_pages_written,
3618
(buf_pool->n_pages_read - buf_pool->n_pages_read_old)
3620
(buf_pool->n_pages_created - buf_pool->n_pages_created_old)
3622
(buf_pool->n_pages_written - buf_pool->n_pages_written_old)
3625
if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
3626
fprintf(file, "Buffer pool hit rate %lu / 1000\n",
3628
(1000 - ((1000 * (buf_pool->n_pages_read
3629
- buf_pool->n_pages_read_old))
3630
/ (buf_pool->n_page_gets
3631
- buf_pool->n_page_gets_old))));
3633
fputs("No buffer pool page gets since the last printout\n",
3637
buf_pool->n_page_gets_old = buf_pool->n_page_gets;
3638
buf_pool->n_pages_read_old = buf_pool->n_pages_read;
3639
buf_pool->n_pages_created_old = buf_pool->n_pages_created;
3640
buf_pool->n_pages_written_old = buf_pool->n_pages_written;
3642
/* Print some values to help us with visualizing what is
3643
happening with LRU eviction. */
3645
"LRU len: %lu, unzip_LRU len: %lu\n"
3646
"I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
3647
UT_LIST_GET_LEN(buf_pool->LRU),
3648
UT_LIST_GET_LEN(buf_pool->unzip_LRU),
3649
buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
3650
buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
3652
buf_pool_mutex_exit();
3655
/**************************************************************************
3656
Refreshes the statistics used to print per-second averages. */
3659
buf_refresh_io_stats(void)
3660
/*======================*/
3662
buf_pool->last_printout_time = time(NULL);
3663
buf_pool->n_page_gets_old = buf_pool->n_page_gets;
3664
buf_pool->n_pages_read_old = buf_pool->n_pages_read;
3665
buf_pool->n_pages_created_old = buf_pool->n_pages_created;
3666
buf_pool->n_pages_written_old = buf_pool->n_pages_written;
3669
/*************************************************************************
3670
Checks that all file pages in the buffer are in a replaceable state. */
3681
buf_pool_mutex_enter();
3683
chunk = buf_pool->chunks;
3685
for (i = buf_pool->n_chunks; i--; chunk++) {
3687
const buf_block_t* block = buf_chunk_not_freed(chunk);
3689
if (UNIV_LIKELY_NULL(block)) {
3691
"Page %lu %lu still fixed or dirty\n",
3692
(ulong) block->page.space,
3693
(ulong) block->page.offset);
3698
buf_pool_mutex_exit();
3703
/*************************************************************************
3704
Checks that there currently are no pending i/o-operations for the buffer
3708
buf_pool_check_no_pending_io(void)
3709
/*==============================*/
3710
/* out: TRUE if there is no pending i/o */
3714
buf_pool_mutex_enter();
3716
if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
3717
+ buf_pool->n_flush[BUF_FLUSH_LIST]
3718
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
3724
buf_pool_mutex_exit();
3729
/*************************************************************************
3730
Gets the current length of the free list of buffer blocks. */
3733
buf_get_free_list_len(void)
3734
/*=======================*/
3738
buf_pool_mutex_enter();
3740
len = UT_LIST_GET_LEN(buf_pool->free);
3742
buf_pool_mutex_exit();