~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/innobase/buf/buf0buf.cc

Merged vcol stuff.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*****************************************************************************
2
 
 
3
 
Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
4
 
Copyright (C) 2008, Google Inc.
5
 
 
6
 
Portions of this file contain modifications contributed and copyrighted by
7
 
Google, Inc. Those modifications are gratefully acknowledged and are described
8
 
briefly in the InnoDB documentation. The contributions by Google are
9
 
incorporated with their permission, and subject to the conditions contained in
10
 
the file COPYING.Google.
11
 
 
12
 
This program is free software; you can redistribute it and/or modify it under
13
 
the terms of the GNU General Public License as published by the Free Software
14
 
Foundation; version 2 of the License.
15
 
 
16
 
This program is distributed in the hope that it will be useful, but WITHOUT
17
 
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
 
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19
 
 
20
 
You should have received a copy of the GNU General Public License along with
21
 
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22
 
St, Fifth Floor, Boston, MA 02110-1301 USA
23
 
 
24
 
*****************************************************************************/
25
 
 
26
 
/**************************************************//**
27
 
@file buf/buf0buf.c
28
 
The database buffer buf_pool
29
 
 
30
 
Created 11/5/1995 Heikki Tuuri
31
 
*******************************************************/
32
 
 
33
 
#include "buf0buf.h"
34
 
 
35
 
#ifdef UNIV_NONINL
36
 
#include "buf0buf.ic"
37
 
#endif
38
 
 
39
 
#include "mem0mem.h"
40
 
#include "btr0btr.h"
41
 
#include "fil0fil.h"
42
 
#ifndef UNIV_HOTBACKUP
43
 
#include "buf0buddy.h"
44
 
#include "lock0lock.h"
45
 
#include "btr0sea.h"
46
 
#include "ibuf0ibuf.h"
47
 
#include "trx0undo.h"
48
 
#include "log0log.h"
49
 
#endif /* !UNIV_HOTBACKUP */
50
 
#include "srv0srv.h"
51
 
#include "dict0dict.h"
52
 
#include "log0recv.h"
53
 
#include "page0zip.h"
54
 
 
55
 
/*
56
 
                IMPLEMENTATION OF THE BUFFER POOL
57
 
                =================================
58
 
 
59
 
Performance improvement:
60
 
------------------------
61
 
Thread scheduling in NT may be so slow that the OS wait mechanism should
62
 
not be used even in waiting for disk reads to complete.
63
 
Rather, we should put waiting query threads to the queue of
64
 
waiting jobs, and let the OS thread do something useful while the i/o
65
 
is processed. In this way we could remove most OS thread switches in
66
 
an i/o-intensive benchmark like TPC-C.
67
 
 
68
 
A possibility is to put a user space thread library between the database
69
 
and NT. User space thread libraries might be very fast.
70
 
 
71
 
SQL Server 7.0 can be configured to use 'fibers' which are lightweight
72
 
threads in NT. These should be studied.
73
 
 
74
 
                Buffer frames and blocks
75
 
                ------------------------
76
 
Following the terminology of Gray and Reuter, we call the memory
77
 
blocks where file pages are loaded buffer frames. For each buffer
78
 
frame there is a control block, or shortly, a block, in the buffer
79
 
control array. The control info which does not need to be stored
80
 
in the file along with the file page, resides in the control block.
81
 
 
82
 
                Buffer pool struct
83
 
                ------------------
84
 
The buffer buf_pool contains a single mutex which protects all the
85
 
control data structures of the buf_pool. The content of a buffer frame is
86
 
protected by a separate read-write lock in its control block, though.
87
 
These locks can be locked and unlocked without owning the buf_pool->mutex.
88
 
The OS events in the buf_pool struct can be waited for without owning the
89
 
buf_pool->mutex.
90
 
 
91
 
The buf_pool->mutex is a hot-spot in main memory, causing a lot of
92
 
memory bus traffic on multiprocessor systems when processors
93
 
alternately access the mutex. On our Pentium, the mutex is accessed
94
 
maybe every 10 microseconds. We gave up the solution to have mutexes
95
 
for each control block, for instance, because it seemed to be
96
 
complicated.
97
 
 
98
 
A solution to reduce mutex contention of the buf_pool->mutex is to
99
 
create a separate mutex for the page hash table. On Pentium,
100
 
accessing the hash table takes 2 microseconds, about half
101
 
of the total buf_pool->mutex hold time.
102
 
 
103
 
                Control blocks
104
 
                --------------
105
 
 
106
 
The control block contains, for instance, the bufferfix count
107
 
which is incremented when a thread wants a file page to be fixed
108
 
in a buffer frame. The bufferfix operation does not lock the
109
 
contents of the frame, however. For this purpose, the control
110
 
block contains a read-write lock.
111
 
 
112
 
The buffer frames have to be aligned so that the start memory
113
 
address of a frame is divisible by the universal page size, which
114
 
is a power of two.
115
 
 
116
 
We intend to make the buffer buf_pool size on-line reconfigurable,
117
 
that is, the buf_pool size can be changed without closing the database.
118
 
Then the database administarator may adjust it to be bigger
119
 
at night, for example. The control block array must
120
 
contain enough control blocks for the maximum buffer buf_pool size
121
 
which is used in the particular database.
122
 
If the buf_pool size is cut, we exploit the virtual memory mechanism of
123
 
the OS, and just refrain from using frames at high addresses. Then the OS
124
 
can swap them to disk.
125
 
 
126
 
The control blocks containing file pages are put to a hash table
127
 
according to the file address of the page.
128
 
We could speed up the access to an individual page by using
129
 
"pointer swizzling": we could replace the page references on
130
 
non-leaf index pages by direct pointers to the page, if it exists
131
 
in the buf_pool. We could make a separate hash table where we could
132
 
chain all the page references in non-leaf pages residing in the buf_pool,
133
 
using the page reference as the hash key,
134
 
and at the time of reading of a page update the pointers accordingly.
135
 
Drawbacks of this solution are added complexity and,
136
 
possibly, extra space required on non-leaf pages for memory pointers.
137
 
A simpler solution is just to speed up the hash table mechanism
138
 
in the database, using tables whose size is a power of 2.
139
 
 
140
 
                Lists of blocks
141
 
                ---------------
142
 
 
143
 
There are several lists of control blocks.
144
 
 
145
 
The free list (buf_pool->free) contains blocks which are currently not
146
 
used.
147
 
 
148
 
The common LRU list contains all the blocks holding a file page
149
 
except those for which the bufferfix count is non-zero.
150
 
The pages are in the LRU list roughly in the order of the last
151
 
access to the page, so that the oldest pages are at the end of the
152
 
list. We also keep a pointer to near the end of the LRU list,
153
 
which we can use when we want to artificially age a page in the
154
 
buf_pool. This is used if we know that some page is not needed
155
 
again for some time: we insert the block right after the pointer,
156
 
causing it to be replaced sooner than would normally be the case.
157
 
Currently this aging mechanism is used for read-ahead mechanism
158
 
of pages, and it can also be used when there is a scan of a full
159
 
table which cannot fit in the memory. Putting the pages near the
160
 
end of the LRU list, we make sure that most of the buf_pool stays
161
 
in the main memory, undisturbed.
162
 
 
163
 
The unzip_LRU list contains a subset of the common LRU list.  The
164
 
blocks on the unzip_LRU list hold a compressed file page and the
165
 
corresponding uncompressed page frame.  A block is in unzip_LRU if and
166
 
only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
167
 
holds.  The blocks in unzip_LRU will be in same order as they are in
168
 
the common LRU list.  That is, each manipulation of the common LRU
169
 
list will result in the same manipulation of the unzip_LRU list.
170
 
 
171
 
The chain of modified blocks (buf_pool->flush_list) contains the blocks
172
 
holding file pages that have been modified in the memory
173
 
but not written to disk yet. The block with the oldest modification
174
 
which has not yet been written to disk is at the end of the chain.
175
 
The access to this list is protected by buf_pool->flush_list_mutex.
176
 
 
177
 
The chain of unmodified compressed blocks (buf_pool->zip_clean)
178
 
contains the control blocks (buf_page_t) of those compressed pages
179
 
that are not in buf_pool->flush_list and for which no uncompressed
180
 
page has been allocated in the buffer pool.  The control blocks for
181
 
uncompressed pages are accessible via buf_block_t objects that are
182
 
reachable via buf_pool->chunks[].
183
 
 
184
 
The chains of free memory blocks (buf_pool->zip_free[]) are used by
185
 
the buddy allocator (buf0buddy.c) to keep track of currently unused
186
 
memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2.  These
187
 
blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
188
 
BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
189
 
pool.  The buddy allocator is solely used for allocating control
190
 
blocks for compressed pages (buf_page_t) and compressed page frames.
191
 
 
192
 
                Loading a file page
193
 
                -------------------
194
 
 
195
 
First, a victim block for replacement has to be found in the
196
 
buf_pool. It is taken from the free list or searched for from the
197
 
end of the LRU-list. An exclusive lock is reserved for the frame,
198
 
the io_fix field is set in the block fixing the block in buf_pool,
199
 
and the io-operation for loading the page is queued. The io-handler thread
200
 
releases the X-lock on the frame and resets the io_fix field
201
 
when the io operation completes.
202
 
 
203
 
A thread may request the above operation using the function
204
 
buf_page_get(). It may then continue to request a lock on the frame.
205
 
The lock is granted when the io-handler releases the x-lock.
206
 
 
207
 
                Read-ahead
208
 
                ----------
209
 
 
210
 
The read-ahead mechanism is intended to be intelligent and
211
 
isolated from the semantically higher levels of the database
212
 
index management. From the higher level we only need the
213
 
information if a file page has a natural successor or
214
 
predecessor page. On the leaf level of a B-tree index,
215
 
these are the next and previous pages in the natural
216
 
order of the pages.
217
 
 
218
 
Let us first explain the read-ahead mechanism when the leafs
219
 
of a B-tree are scanned in an ascending or descending order.
220
 
When a read page is the first time referenced in the buf_pool,
221
 
the buffer manager checks if it is at the border of a so-called
222
 
linear read-ahead area. The tablespace is divided into these
223
 
areas of size 64 blocks, for example. So if the page is at the
224
 
border of such an area, the read-ahead mechanism checks if
225
 
all the other blocks in the area have been accessed in an
226
 
ascending or descending order. If this is the case, the system
227
 
looks at the natural successor or predecessor of the page,
228
 
checks if that is at the border of another area, and in this case
229
 
issues read-requests for all the pages in that area. Maybe
230
 
we could relax the condition that all the pages in the area
231
 
have to be accessed: if data is deleted from a table, there may
232
 
appear holes of unused pages in the area.
233
 
 
234
 
A different read-ahead mechanism is used when there appears
235
 
to be a random access pattern to a file.
236
 
If a new page is referenced in the buf_pool, and several pages
237
 
of its random access area (for instance, 32 consecutive pages
238
 
in a tablespace) have recently been referenced, we may predict
239
 
that the whole area may be needed in the near future, and issue
240
 
the read requests for the whole area.
241
 
*/
242
 
 
243
 
#ifndef UNIV_HOTBACKUP
244
 
/** Value in microseconds */
245
 
static const int WAIT_FOR_READ  = 5000;
246
 
/** Number of attemtps made to read in a page in the buffer pool */
247
 
static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
248
 
 
249
 
/** The buffer pools of the database */
250
 
UNIV_INTERN buf_pool_t* buf_pool_ptr;
251
 
 
252
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
253
 
static ulint    buf_dbg_counter = 0; /*!< This is used to insert validation
254
 
                                        operations in execution in the
255
 
                                        debug version */
256
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
257
 
#ifdef UNIV_DEBUG
258
 
/** If this is set TRUE, the program prints info whenever
259
 
read-ahead or flush occurs */
260
 
UNIV_INTERN ibool               buf_debug_prints = FALSE;
261
 
#endif /* UNIV_DEBUG */
262
 
 
263
 
#ifdef UNIV_PFS_RWLOCK
264
 
/* Keys to register buffer block related rwlocks and mutexes with
265
 
performance schema */
266
 
UNIV_INTERN mysql_pfs_key_t     buf_block_lock_key;
267
 
# ifdef UNIV_SYNC_DEBUG
268
 
UNIV_INTERN mysql_pfs_key_t     buf_block_debug_latch_key;
269
 
# endif /* UNIV_SYNC_DEBUG */
270
 
#endif /* UNIV_PFS_RWLOCK */
271
 
 
272
 
#ifdef UNIV_PFS_MUTEX
273
 
UNIV_INTERN mysql_pfs_key_t     buffer_block_mutex_key;
274
 
UNIV_INTERN mysql_pfs_key_t     buf_pool_mutex_key;
275
 
UNIV_INTERN mysql_pfs_key_t     buf_pool_zip_mutex_key;
276
 
UNIV_INTERN mysql_pfs_key_t     flush_list_mutex_key;
277
 
#endif /* UNIV_PFS_MUTEX */
278
 
 
279
 
#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
280
 
# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
281
 
 
282
 
/* Buffer block mutexes and rwlocks can be registered
283
 
in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
284
 
is defined, register buffer block mutex and rwlock
285
 
in one group after their initialization. */
286
 
#  define PFS_GROUP_BUFFER_SYNC
287
 
 
288
 
/* This define caps the number of mutexes/rwlocks can
289
 
be registered with performance schema. Developers can
290
 
modify this define if necessary. Please note, this would
291
 
be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
292
 
#  define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER    ULINT_MAX
293
 
 
294
 
# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
295
 
#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
296
 
 
297
 
/** A chunk of buffers.  The buffer pool is allocated in chunks. */
298
 
struct buf_chunk_struct{
299
 
        ulint           mem_size;       /*!< allocated size of the chunk */
300
 
        ulint           size;           /*!< size of frames[] and blocks[] */
301
 
        void*           mem;            /*!< pointer to the memory area which
302
 
                                        was allocated for the frames */
303
 
        buf_block_t*    blocks;         /*!< array of buffer control blocks */
304
 
};
305
 
#endif /* !UNIV_HOTBACKUP */
306
 
 
307
 
/********************************************************************//**
308
 
Gets the smallest oldest_modification lsn for any page in the pool. Returns
309
 
zero if all modified pages have been flushed to disk.
310
 
@return oldest modification in pool, zero if none */
311
 
UNIV_INTERN
312
 
ib_uint64_t
313
 
buf_pool_get_oldest_modification(void)
314
 
/*==================================*/
315
 
{
316
 
        ulint           i;
317
 
        buf_page_t*     bpage;
318
 
        ib_uint64_t     lsn = 0;
319
 
        ib_uint64_t     oldest_lsn = 0;
320
 
 
321
 
        /* When we traverse all the flush lists we don't want another
322
 
        thread to add a dirty page to any flush list. */
323
 
        log_flush_order_mutex_enter();
324
 
 
325
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
326
 
                buf_pool_t*     buf_pool;
327
 
 
328
 
                buf_pool = buf_pool_from_array(i);
329
 
 
330
 
                buf_flush_list_mutex_enter(buf_pool);
331
 
 
332
 
                bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
333
 
 
334
 
                if (bpage != NULL) {
335
 
                        ut_ad(bpage->in_flush_list);
336
 
                        lsn = bpage->oldest_modification;
337
 
                }
338
 
 
339
 
                buf_flush_list_mutex_exit(buf_pool);
340
 
 
341
 
                if (!oldest_lsn || oldest_lsn > lsn) {
342
 
                        oldest_lsn = lsn;
343
 
                }
344
 
        }
345
 
 
346
 
        log_flush_order_mutex_exit();
347
 
 
348
 
        /* The returned answer may be out of date: the flush_list can
349
 
        change after the mutex has been released. */
350
 
 
351
 
        return(oldest_lsn);
352
 
}
353
 
 
354
 
/********************************************************************//**
355
 
Get total buffer pool statistics. */
356
 
UNIV_INTERN
357
 
void
358
 
buf_get_total_list_len(
359
 
/*===================*/
360
 
        ulint*          LRU_len,        /*!< out: length of all LRU lists */
361
 
        ulint*          free_len,       /*!< out: length of all free lists */
362
 
        ulint*          flush_list_len) /*!< out: length of all flush lists */
363
 
{
364
 
        ulint           i;
365
 
 
366
 
        *LRU_len = 0;
367
 
        *free_len = 0;
368
 
        *flush_list_len = 0;
369
 
 
370
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
371
 
                buf_pool_t*     buf_pool;
372
 
 
373
 
                buf_pool = buf_pool_from_array(i);
374
 
                *LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
375
 
                *free_len += UT_LIST_GET_LEN(buf_pool->free);
376
 
                *flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
377
 
        }
378
 
}
379
 
 
380
 
/********************************************************************//**
381
 
Get total buffer pool statistics. */
382
 
UNIV_INTERN
383
 
void
384
 
buf_get_total_stat(
385
 
/*===============*/
386
 
        buf_pool_stat_t*        tot_stat)       /*!< out: buffer pool stats */
387
 
{
388
 
        ulint                   i;
389
 
 
390
 
        memset(tot_stat, 0, sizeof(*tot_stat));
391
 
 
392
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
393
 
                buf_pool_stat_t*buf_stat;
394
 
                buf_pool_t*     buf_pool;
395
 
 
396
 
                buf_pool = buf_pool_from_array(i);
397
 
 
398
 
                buf_stat = &buf_pool->stat;
399
 
                tot_stat->n_page_gets += buf_stat->n_page_gets;
400
 
                tot_stat->n_pages_read += buf_stat->n_pages_read;
401
 
                tot_stat->n_pages_written += buf_stat->n_pages_written;
402
 
                tot_stat->n_pages_created += buf_stat->n_pages_created;
403
 
                tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
404
 
                tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
405
 
                tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
406
 
 
407
 
                tot_stat->n_pages_not_made_young +=
408
 
                        buf_stat->n_pages_not_made_young;
409
 
        }
410
 
}
411
 
 
412
 
/********************************************************************//**
413
 
Allocates a buffer block.
414
 
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
415
 
UNIV_INTERN
416
 
buf_block_t*
417
 
buf_block_alloc(
418
 
/*============*/
419
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
420
 
        ulint           zip_size)       /*!< in: compressed page size in bytes,
421
 
                                        or 0 if uncompressed tablespace */
422
 
{
423
 
        buf_block_t*    block;
424
 
        ulint           index;
425
 
        static ulint    buf_pool_index;
426
 
 
427
 
        if (buf_pool == NULL) {
428
 
                /* We are allocating memory from any buffer pool, ensure
429
 
                we spread the grace on all buffer pool instances. */
430
 
                index = buf_pool_index++ % srv_buf_pool_instances;
431
 
                buf_pool = buf_pool_from_array(index);
432
 
        }
433
 
 
434
 
        block = buf_LRU_get_free_block(buf_pool, zip_size);
435
 
 
436
 
        buf_block_set_state(block, BUF_BLOCK_MEMORY);
437
 
 
438
 
        return(block);
439
 
}
440
 
 
441
 
/********************************************************************//**
442
 
Calculates a page checksum which is stored to the page when it is written
443
 
to a file. Note that we must be careful to calculate the same value on
444
 
32-bit and 64-bit architectures.
445
 
@return checksum */
446
 
UNIV_INTERN
447
 
ulint
448
 
buf_calc_page_new_checksum(
449
 
/*=======================*/
450
 
        const byte*     page)   /*!< in: buffer page */
451
 
{
452
 
        ulint checksum;
453
 
 
454
 
        /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
455
 
        ..._ARCH_LOG_NO, are written outside the buffer pool to the first
456
 
        pages of data files, we have to skip them in the page checksum
457
 
        calculation.
458
 
        We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
459
 
        checksum is stored, and also the last 8 bytes of page because
460
 
        there we store the old formula checksum. */
461
 
 
462
 
        checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
463
 
                                  FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
464
 
                + ut_fold_binary(page + FIL_PAGE_DATA,
465
 
                                 UNIV_PAGE_SIZE - FIL_PAGE_DATA
466
 
                                 - FIL_PAGE_END_LSN_OLD_CHKSUM);
467
 
        checksum = checksum & 0xFFFFFFFFUL;
468
 
 
469
 
        return(checksum);
470
 
}
471
 
 
472
 
/********************************************************************//**
473
 
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
474
 
looked at the first few bytes of the page. This calculates that old
475
 
checksum.
476
 
NOTE: we must first store the new formula checksum to
477
 
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
478
 
because this takes that field as an input!
479
 
@return checksum */
480
 
UNIV_INTERN
481
 
ulint
482
 
buf_calc_page_old_checksum(
483
 
/*=======================*/
484
 
        const byte*     page)   /*!< in: buffer page */
485
 
{
486
 
        ulint checksum;
487
 
 
488
 
        checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
489
 
 
490
 
        checksum = checksum & 0xFFFFFFFFUL;
491
 
 
492
 
        return(checksum);
493
 
}
494
 
 
495
 
/********************************************************************//**
496
 
Checks if a page is corrupt.
497
 
@return TRUE if corrupted */
498
 
UNIV_INTERN
499
 
ibool
500
 
buf_page_is_corrupted(
501
 
/*==================*/
502
 
        const byte*     read_buf,       /*!< in: a database page */
503
 
        ulint           zip_size)       /*!< in: size of compressed page;
504
 
                                        0 for uncompressed pages */
505
 
{
506
 
        ulint           checksum_field;
507
 
        ulint           old_checksum_field;
508
 
 
509
 
        if (UNIV_LIKELY(!zip_size)
510
 
            && memcmp(read_buf + FIL_PAGE_LSN + 4,
511
 
                      read_buf + UNIV_PAGE_SIZE
512
 
                      - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
513
 
 
514
 
                /* Stored log sequence numbers at the start and the end
515
 
                of page do not match */
516
 
 
517
 
                return(TRUE);
518
 
        }
519
 
 
520
 
#ifndef UNIV_HOTBACKUP
521
 
        if (recv_lsn_checks_on) {
522
 
                ib_uint64_t     current_lsn;
523
 
 
524
 
                if (log_peek_lsn(&current_lsn)
525
 
                    && UNIV_UNLIKELY
526
 
                    (current_lsn
527
 
                     < mach_read_from_8(read_buf + FIL_PAGE_LSN))) {
528
 
                        ut_print_timestamp(stderr);
529
 
 
530
 
                        fprintf(stderr,
531
 
                                "  InnoDB: Error: page %lu log sequence number"
532
 
                                " %"PRIu64"\n"
533
 
                                "InnoDB: is in the future! Current system "
534
 
                                "log sequence number %"PRIu64".\n"
535
 
                                "InnoDB: Your database may be corrupt or "
536
 
                                "you may have copied the InnoDB\n"
537
 
                                "InnoDB: tablespace but not the InnoDB "
538
 
                                "log files. See\n"
539
 
                                "InnoDB: " REFMAN "forcing-recovery.html\n"
540
 
                                "InnoDB: for more information.\n",
541
 
                                (ulong) mach_read_from_4(read_buf
542
 
                                                         + FIL_PAGE_OFFSET),
543
 
                                mach_read_from_8(read_buf + FIL_PAGE_LSN),
544
 
                                current_lsn);
545
 
                }
546
 
        }
547
 
#endif
548
 
 
549
 
        /* If we use checksums validation, make additional check before
550
 
        returning TRUE to ensure that the checksum is not equal to
551
 
        BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
552
 
        disabled. Otherwise, skip checksum calculation and return FALSE */
553
 
 
554
 
        if (UNIV_LIKELY(srv_use_checksums)) {
555
 
                checksum_field = mach_read_from_4(read_buf
556
 
                                                  + FIL_PAGE_SPACE_OR_CHKSUM);
557
 
 
558
 
                if (UNIV_UNLIKELY(zip_size)) {
559
 
                        return(checksum_field != BUF_NO_CHECKSUM_MAGIC
560
 
                               && checksum_field
561
 
                               != page_zip_calc_checksum(read_buf, zip_size));
562
 
                }
563
 
 
564
 
                old_checksum_field = mach_read_from_4(
565
 
                        read_buf + UNIV_PAGE_SIZE
566
 
                        - FIL_PAGE_END_LSN_OLD_CHKSUM);
567
 
 
568
 
                /* There are 2 valid formulas for old_checksum_field:
569
 
 
570
 
                1. Very old versions of InnoDB only stored 8 byte lsn to the
571
 
                start and the end of the page.
572
 
 
573
 
                2. Newer InnoDB versions store the old formula checksum
574
 
                there. */
575
 
 
576
 
                if (old_checksum_field != mach_read_from_4(read_buf
577
 
                                                           + FIL_PAGE_LSN)
578
 
                    && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
579
 
                    && old_checksum_field
580
 
                    != buf_calc_page_old_checksum(read_buf)) {
581
 
 
582
 
                        return(TRUE);
583
 
                }
584
 
 
585
 
                /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
586
 
                (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
587
 
 
588
 
                if (checksum_field != 0
589
 
                    && checksum_field != BUF_NO_CHECKSUM_MAGIC
590
 
                    && checksum_field
591
 
                    != buf_calc_page_new_checksum(read_buf)) {
592
 
 
593
 
                        return(TRUE);
594
 
                }
595
 
        }
596
 
 
597
 
        return(FALSE);
598
 
}
599
 
 
600
 
/********************************************************************//**
601
 
Prints a page to stderr. */
602
 
UNIV_INTERN
603
 
void
604
 
buf_page_print(
605
 
/*===========*/
606
 
        const byte*     read_buf,       /*!< in: a database page */
607
 
        ulint           zip_size)       /*!< in: compressed page size, or
608
 
                                0 for uncompressed pages */
609
 
{
610
 
#ifndef UNIV_HOTBACKUP
611
 
        dict_index_t*   index;
612
 
#endif /* !UNIV_HOTBACKUP */
613
 
        ulint           checksum;
614
 
        ulint           old_checksum;
615
 
        ulint           size    = zip_size;
616
 
 
617
 
        if (!size) {
618
 
                size = UNIV_PAGE_SIZE;
619
 
        }
620
 
 
621
 
        ut_print_timestamp(stderr);
622
 
        fprintf(stderr, "  InnoDB: Page dump in ascii and hex (%lu bytes):\n",
623
 
                (ulong) size);
624
 
        ut_print_buf(stderr, read_buf, size);
625
 
        fputs("\nInnoDB: End of page dump\n", stderr);
626
 
 
627
 
        if (zip_size) {
628
 
                /* Print compressed page. */
629
 
 
630
 
                switch (fil_page_get_type(read_buf)) {
631
 
                case FIL_PAGE_TYPE_ZBLOB:
632
 
                case FIL_PAGE_TYPE_ZBLOB2:
633
 
                        checksum = srv_use_checksums
634
 
                                ? page_zip_calc_checksum(read_buf, zip_size)
635
 
                                : BUF_NO_CHECKSUM_MAGIC;
636
 
                        ut_print_timestamp(stderr);
637
 
                        fprintf(stderr,
638
 
                                "  InnoDB: Compressed BLOB page"
639
 
                                " checksum %lu, stored %lu\n"
640
 
                                "InnoDB: Page lsn %lu %lu\n"
641
 
                                "InnoDB: Page number (if stored"
642
 
                                " to page already) %lu,\n"
643
 
                                "InnoDB: space id (if stored"
644
 
                                " to page already) %lu\n",
645
 
                                (ulong) checksum,
646
 
                                (ulong) mach_read_from_4(
647
 
                                        read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
648
 
                                (ulong) mach_read_from_4(
649
 
                                        read_buf + FIL_PAGE_LSN),
650
 
                                (ulong) mach_read_from_4(
651
 
                                        read_buf + (FIL_PAGE_LSN + 4)),
652
 
                                (ulong) mach_read_from_4(
653
 
                                        read_buf + FIL_PAGE_OFFSET),
654
 
                                (ulong) mach_read_from_4(
655
 
                                        read_buf
656
 
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
657
 
                        return;
658
 
                default:
659
 
                        ut_print_timestamp(stderr);
660
 
                        fprintf(stderr,
661
 
                                "  InnoDB: unknown page type %lu,"
662
 
                                " assuming FIL_PAGE_INDEX\n",
663
 
                                fil_page_get_type(read_buf));
664
 
                        /* fall through */
665
 
                case FIL_PAGE_INDEX:
666
 
                        checksum = srv_use_checksums
667
 
                                ? page_zip_calc_checksum(read_buf, zip_size)
668
 
                                : BUF_NO_CHECKSUM_MAGIC;
669
 
 
670
 
                        ut_print_timestamp(stderr);
671
 
                        fprintf(stderr,
672
 
                                "  InnoDB: Compressed page checksum %lu,"
673
 
                                " stored %lu\n"
674
 
                                "InnoDB: Page lsn %lu %lu\n"
675
 
                                "InnoDB: Page number (if stored"
676
 
                                " to page already) %lu,\n"
677
 
                                "InnoDB: space id (if stored"
678
 
                                " to page already) %lu\n",
679
 
                                (ulong) checksum,
680
 
                                (ulong) mach_read_from_4(
681
 
                                        read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
682
 
                                (ulong) mach_read_from_4(
683
 
                                        read_buf + FIL_PAGE_LSN),
684
 
                                (ulong) mach_read_from_4(
685
 
                                        read_buf + (FIL_PAGE_LSN + 4)),
686
 
                                (ulong) mach_read_from_4(
687
 
                                        read_buf + FIL_PAGE_OFFSET),
688
 
                                (ulong) mach_read_from_4(
689
 
                                        read_buf
690
 
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
691
 
                        return;
692
 
                case FIL_PAGE_TYPE_XDES:
693
 
                        /* This is an uncompressed page. */
694
 
                        break;
695
 
                }
696
 
        }
697
 
 
698
 
        checksum = srv_use_checksums
699
 
                ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
700
 
        old_checksum = srv_use_checksums
701
 
                ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
702
 
 
703
 
        ut_print_timestamp(stderr);
704
 
        fprintf(stderr,
705
 
                "  InnoDB: Page checksum %lu, prior-to-4.0.14-form"
706
 
                " checksum %lu\n"
707
 
                "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
708
 
                " stored checksum %lu\n"
709
 
                "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
710
 
                " at page end %lu\n"
711
 
                "InnoDB: Page number (if stored to page already) %lu,\n"
712
 
                "InnoDB: space id (if created with >= MySQL-4.1.1"
713
 
                " and stored already) %lu\n",
714
 
                (ulong) checksum, (ulong) old_checksum,
715
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
716
 
                (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
717
 
                                         - FIL_PAGE_END_LSN_OLD_CHKSUM),
718
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
719
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
720
 
                (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
721
 
                                         - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
722
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
723
 
                (ulong) mach_read_from_4(read_buf
724
 
                                         + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
725
 
 
726
 
#ifndef UNIV_HOTBACKUP
727
 
        if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
728
 
            == TRX_UNDO_INSERT) {
729
 
                fprintf(stderr,
730
 
                        "InnoDB: Page may be an insert undo log page\n");
731
 
        } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
732
 
                                    + TRX_UNDO_PAGE_TYPE)
733
 
                   == TRX_UNDO_UPDATE) {
734
 
                fprintf(stderr,
735
 
                        "InnoDB: Page may be an update undo log page\n");
736
 
        }
737
 
#endif /* !UNIV_HOTBACKUP */
738
 
 
739
 
        switch (fil_page_get_type(read_buf)) {
740
 
                index_id_t      index_id;
741
 
        case FIL_PAGE_INDEX:
742
 
                index_id = btr_page_get_index_id(read_buf);
743
 
                fprintf(stderr,
744
 
                        "InnoDB: Page may be an index page where"
745
 
                        " index id is %llu\n",
746
 
                        (ullint) index_id);
747
 
#ifndef UNIV_HOTBACKUP
748
 
                index = dict_index_find_on_id_low(index_id);
749
 
                if (index) {
750
 
                        fputs("InnoDB: (", stderr);
751
 
                        dict_index_name_print(stderr, NULL, index);
752
 
                        fputs(")\n", stderr);
753
 
                }
754
 
#endif /* !UNIV_HOTBACKUP */
755
 
                break;
756
 
        case FIL_PAGE_INODE:
757
 
                fputs("InnoDB: Page may be an 'inode' page\n", stderr);
758
 
                break;
759
 
        case FIL_PAGE_IBUF_FREE_LIST:
760
 
                fputs("InnoDB: Page may be an insert buffer free list page\n",
761
 
                      stderr);
762
 
                break;
763
 
        case FIL_PAGE_TYPE_ALLOCATED:
764
 
                fputs("InnoDB: Page may be a freshly allocated page\n",
765
 
                      stderr);
766
 
                break;
767
 
        case FIL_PAGE_IBUF_BITMAP:
768
 
                fputs("InnoDB: Page may be an insert buffer bitmap page\n",
769
 
                      stderr);
770
 
                break;
771
 
        case FIL_PAGE_TYPE_SYS:
772
 
                fputs("InnoDB: Page may be a system page\n",
773
 
                      stderr);
774
 
                break;
775
 
        case FIL_PAGE_TYPE_TRX_SYS:
776
 
                fputs("InnoDB: Page may be a transaction system page\n",
777
 
                      stderr);
778
 
                break;
779
 
        case FIL_PAGE_TYPE_FSP_HDR:
780
 
                fputs("InnoDB: Page may be a file space header page\n",
781
 
                      stderr);
782
 
                break;
783
 
        case FIL_PAGE_TYPE_XDES:
784
 
                fputs("InnoDB: Page may be an extent descriptor page\n",
785
 
                      stderr);
786
 
                break;
787
 
        case FIL_PAGE_TYPE_BLOB:
788
 
                fputs("InnoDB: Page may be a BLOB page\n",
789
 
                      stderr);
790
 
                break;
791
 
        case FIL_PAGE_TYPE_ZBLOB:
792
 
        case FIL_PAGE_TYPE_ZBLOB2:
793
 
                fputs("InnoDB: Page may be a compressed BLOB page\n",
794
 
                      stderr);
795
 
                break;
796
 
        }
797
 
}
798
 
 
799
 
#ifndef UNIV_HOTBACKUP
800
 
 
801
 
# ifdef PFS_GROUP_BUFFER_SYNC
802
 
/********************************************************************//**
803
 
This function registers mutexes and rwlocks in buffer blocks with
804
 
performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
805
 
defined to be a value less than chunk->size, then only mutexes
806
 
and rwlocks in the first PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER
807
 
blocks are registered. */
808
 
static
809
 
void
810
 
pfs_register_buffer_block(
811
 
/*======================*/
812
 
        buf_chunk_t*    chunk)          /*!< in/out: chunk of buffers */
813
 
{
814
 
        ulint           i;
815
 
        ulint           num_to_register;
816
 
        buf_block_t*    block;
817
 
 
818
 
        block = chunk->blocks;
819
 
 
820
 
        num_to_register = ut_min(chunk->size,
821
 
                                 PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
822
 
 
823
 
        for (i = 0; i < num_to_register; i++) {
824
 
                mutex_t*        mutex;
825
 
                rw_lock_t*      rwlock;
826
 
 
827
 
#  ifdef UNIV_PFS_MUTEX
828
 
                mutex = &block->mutex;
829
 
                ut_a(!mutex->pfs_psi);
830
 
                mutex->pfs_psi = (PSI_server)
831
 
                        ? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
832
 
                        : NULL;
833
 
#  endif /* UNIV_PFS_MUTEX */
834
 
 
835
 
#  ifdef UNIV_PFS_RWLOCK
836
 
                rwlock = &block->lock;
837
 
                ut_a(!rwlock->pfs_psi);
838
 
                rwlock->pfs_psi = (PSI_server)
839
 
                        ? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
840
 
                        : NULL;
841
 
#  endif /* UNIV_PFS_RWLOCK */
842
 
                block++;
843
 
        }
844
 
}
845
 
# endif /* PFS_GROUP_BUFFER_SYNC */
846
 
 
847
 
/********************************************************************//**
848
 
Initializes a buffer control block when the buf_pool is created. */
849
 
static
850
 
void
851
 
buf_block_init(
852
 
/*===========*/
853
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
854
 
        buf_block_t*    block,          /*!< in: pointer to control block */
855
 
        byte*           frame)          /*!< in: pointer to buffer frame */
856
 
{
857
 
        UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
858
 
 
859
 
        block->frame = frame;
860
 
 
861
 
        block->page.buf_pool_index = buf_pool_index(buf_pool);
862
 
        block->page.state = BUF_BLOCK_NOT_USED;
863
 
        block->page.buf_fix_count = 0;
864
 
        block->page.io_fix = BUF_IO_NONE;
865
 
 
866
 
        block->modify_clock = 0;
867
 
 
868
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
869
 
        block->page.file_page_was_freed = FALSE;
870
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
871
 
 
872
 
        block->check_index_page_at_flush = FALSE;
873
 
        block->index = NULL;
874
 
 
875
 
        block->is_hashed = FALSE;
876
 
 
877
 
#ifdef UNIV_DEBUG
878
 
        block->page.in_page_hash = FALSE;
879
 
        block->page.in_zip_hash = FALSE;
880
 
        block->page.in_flush_list = FALSE;
881
 
        block->page.in_free_list = FALSE;
882
 
        block->page.in_LRU_list = FALSE;
883
 
        block->in_unzip_LRU_list = FALSE;
884
 
#endif /* UNIV_DEBUG */
885
 
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
886
 
        block->n_pointers = 0;
887
 
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
888
 
        page_zip_des_init(&block->page.zip);
889
 
 
890
 
#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
891
 
        /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
892
 
        of buffer block mutex/rwlock with performance schema. If
893
 
        PFS_GROUP_BUFFER_SYNC is defined, skip the registration
894
 
        since buffer block mutex/rwlock will be registered later in
895
 
        pfs_register_buffer_block() */
896
 
 
897
 
        mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
898
 
        rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
899
 
#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
900
 
        mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
901
 
        rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
902
 
#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
903
 
 
904
 
        ut_ad(rw_lock_validate(&(block->lock)));
905
 
 
906
 
#ifdef UNIV_SYNC_DEBUG
907
 
        rw_lock_create(buf_block_debug_latch_key,
908
 
                       &block->debug_latch, SYNC_NO_ORDER_CHECK);
909
 
#endif /* UNIV_SYNC_DEBUG */
910
 
}
911
 
 
912
 
/********************************************************************//**
913
 
Allocates a chunk of buffer frames.
914
 
@return chunk, or NULL on failure */
915
 
static
916
 
buf_chunk_t*
917
 
buf_chunk_init(
918
 
/*===========*/
919
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
920
 
        buf_chunk_t*    chunk,          /*!< out: chunk of buffers */
921
 
        ulint           mem_size)       /*!< in: requested size in bytes */
922
 
{
923
 
        buf_block_t*    block;
924
 
        byte*           frame;
925
 
        ulint           i;
926
 
 
927
 
        /* Round down to a multiple of page size,
928
 
        although it already should be. */
929
 
        mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
930
 
        /* Reserve space for the block descriptors. */
931
 
        mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
932
 
                                  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
933
 
 
934
 
        chunk->mem_size = mem_size;
935
 
        chunk->mem = os_mem_alloc_large(&chunk->mem_size);
936
 
 
937
 
        if (UNIV_UNLIKELY(chunk->mem == NULL)) {
938
 
 
939
 
                return(NULL);
940
 
        }
941
 
 
942
 
        /* Allocate the block descriptors from
943
 
        the start of the memory block. */
944
 
        chunk->blocks = static_cast<buf_block_struct *>(chunk->mem);
945
 
 
946
 
        /* Align a pointer to the first frame.  Note that when
947
 
        os_large_page_size is smaller than UNIV_PAGE_SIZE,
948
 
        we may allocate one fewer block than requested.  When
949
 
        it is bigger, we may allocate more blocks than requested. */
950
 
 
951
 
        frame = static_cast<unsigned char *>(ut_align(chunk->mem, UNIV_PAGE_SIZE));
952
 
        chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
953
 
                - (frame != chunk->mem);
954
 
 
955
 
        /* Subtract the space needed for block descriptors. */
956
 
        {
957
 
                ulint   size = chunk->size;
958
 
 
959
 
                while (frame < (byte*) (chunk->blocks + size)) {
960
 
                        frame += UNIV_PAGE_SIZE;
961
 
                        size--;
962
 
                }
963
 
 
964
 
                chunk->size = size;
965
 
        }
966
 
 
967
 
        /* Init block structs and assign frames for them. Then we
968
 
        assign the frames to the first blocks (we already mapped the
969
 
        memory above). */
970
 
 
971
 
        block = chunk->blocks;
972
 
 
973
 
        for (i = chunk->size; i--; ) {
974
 
 
975
 
                buf_block_init(buf_pool, block, frame);
976
 
 
977
 
#ifdef HAVE_VALGRIND
978
 
                /* Wipe contents of frame to eliminate a Purify warning */
979
 
                memset(block->frame, '\0', UNIV_PAGE_SIZE);
980
 
#endif
981
 
                /* Add the block to the free list */
982
 
                UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
983
 
 
984
 
                ut_d(block->page.in_free_list = TRUE);
985
 
                ut_ad(buf_pool_from_block(block) == buf_pool);
986
 
 
987
 
                block++;
988
 
                frame += UNIV_PAGE_SIZE;
989
 
        }
990
 
 
991
 
#ifdef PFS_GROUP_BUFFER_SYNC
992
 
        pfs_register_buffer_block(chunk);
993
 
#endif
994
 
        return(chunk);
995
 
}
996
 
 
997
 
#ifdef UNIV_DEBUG
998
 
/*********************************************************************//**
999
 
Finds a block in the given buffer chunk that points to a
1000
 
given compressed page.
1001
 
@return buffer block pointing to the compressed page, or NULL */
1002
 
static
1003
 
buf_block_t*
1004
 
buf_chunk_contains_zip(
1005
 
/*===================*/
1006
 
        buf_chunk_t*    chunk,  /*!< in: chunk being checked */
1007
 
        const void*     data)   /*!< in: pointer to compressed page */
1008
 
{
1009
 
        buf_block_t*    block;
1010
 
        ulint           i;
1011
 
 
1012
 
        block = chunk->blocks;
1013
 
 
1014
 
        for (i = chunk->size; i--; block++) {
1015
 
                if (block->page.zip.data == data) {
1016
 
 
1017
 
                        return(block);
1018
 
                }
1019
 
        }
1020
 
 
1021
 
        return(NULL);
1022
 
}
1023
 
 
1024
 
/*********************************************************************//**
1025
 
Finds a block in the buffer pool that points to a
1026
 
given compressed page.
1027
 
@return buffer block pointing to the compressed page, or NULL */
1028
 
UNIV_INTERN
1029
 
buf_block_t*
1030
 
buf_pool_contains_zip(
1031
 
/*==================*/
1032
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1033
 
        const void*     data)           /*!< in: pointer to compressed page */
1034
 
{
1035
 
        ulint           n;
1036
 
        buf_chunk_t*    chunk = buf_pool->chunks;
1037
 
 
1038
 
        ut_ad(buf_pool);
1039
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1040
 
        for (n = buf_pool->n_chunks; n--; chunk++) {
1041
 
 
1042
 
                buf_block_t* block = buf_chunk_contains_zip(chunk, data);
1043
 
 
1044
 
                if (block) {
1045
 
                        return(block);
1046
 
                }
1047
 
        }
1048
 
 
1049
 
        return(NULL);
1050
 
}
1051
 
#endif /* UNIV_DEBUG */
1052
 
 
1053
 
/*********************************************************************//**
1054
 
Checks that all file pages in the buffer chunk are in a replaceable state.
1055
 
@return address of a non-free block, or NULL if all freed */
1056
 
static
1057
 
const buf_block_t*
1058
 
buf_chunk_not_freed(
1059
 
/*================*/
1060
 
        buf_chunk_t*    chunk)  /*!< in: chunk being checked */
1061
 
{
1062
 
        buf_block_t*    block;
1063
 
        ulint           i;
1064
 
 
1065
 
        block = chunk->blocks;
1066
 
 
1067
 
        for (i = chunk->size; i--; block++) {
1068
 
                ibool   ready;
1069
 
 
1070
 
                switch (buf_block_get_state(block)) {
1071
 
                case BUF_BLOCK_ZIP_FREE:
1072
 
                case BUF_BLOCK_ZIP_PAGE:
1073
 
                case BUF_BLOCK_ZIP_DIRTY:
1074
 
                        /* The uncompressed buffer pool should never
1075
 
                        contain compressed block descriptors. */
1076
 
                        ut_error;
1077
 
                        break;
1078
 
                case BUF_BLOCK_NOT_USED:
1079
 
                case BUF_BLOCK_READY_FOR_USE:
1080
 
                case BUF_BLOCK_MEMORY:
1081
 
                case BUF_BLOCK_REMOVE_HASH:
1082
 
                        /* Skip blocks that are not being used for
1083
 
                        file pages. */
1084
 
                        break;
1085
 
                case BUF_BLOCK_FILE_PAGE:
1086
 
                        mutex_enter(&block->mutex);
1087
 
                        ready = buf_flush_ready_for_replace(&block->page);
1088
 
                        mutex_exit(&block->mutex);
1089
 
 
1090
 
                        if (!ready) {
1091
 
 
1092
 
                                return(block);
1093
 
                        }
1094
 
 
1095
 
                        break;
1096
 
                }
1097
 
        }
1098
 
 
1099
 
        return(NULL);
1100
 
}
1101
 
 
1102
 
/*********************************************************************//**
1103
 
Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state.
1104
 
@return TRUE if all freed */
1105
 
static
1106
 
ibool
1107
 
buf_chunk_all_free(
1108
 
/*===============*/
1109
 
        const buf_chunk_t*      chunk)  /*!< in: chunk being checked */
1110
 
{
1111
 
        const buf_block_t*      block;
1112
 
        ulint                   i;
1113
 
 
1114
 
        block = chunk->blocks;
1115
 
 
1116
 
        for (i = chunk->size; i--; block++) {
1117
 
 
1118
 
                if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
1119
 
 
1120
 
                        return(FALSE);
1121
 
                }
1122
 
        }
1123
 
 
1124
 
        return(TRUE);
1125
 
}
1126
 
 
1127
 
/********************************************************************//**
1128
 
Frees a chunk of buffer frames. */
1129
 
static
1130
 
void
1131
 
buf_chunk_free(
1132
 
/*===========*/
1133
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1134
 
        buf_chunk_t*    chunk)          /*!< out: chunk of buffers */
1135
 
{
1136
 
        buf_block_t*            block;
1137
 
        const buf_block_t*      block_end;
1138
 
 
1139
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1140
 
 
1141
 
        block_end = chunk->blocks + chunk->size;
1142
 
 
1143
 
        for (block = chunk->blocks; block < block_end; block++) {
1144
 
                ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
1145
 
                ut_a(!block->page.zip.data);
1146
 
 
1147
 
                ut_ad(!block->page.in_LRU_list);
1148
 
                ut_ad(!block->in_unzip_LRU_list);
1149
 
                ut_ad(!block->page.in_flush_list);
1150
 
                /* Remove the block from the free list. */
1151
 
                ut_ad(block->page.in_free_list);
1152
 
                UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
1153
 
 
1154
 
                /* Free the latches. */
1155
 
                mutex_free(&block->mutex);
1156
 
                rw_lock_free(&block->lock);
1157
 
#ifdef UNIV_SYNC_DEBUG
1158
 
                rw_lock_free(&block->debug_latch);
1159
 
#endif /* UNIV_SYNC_DEBUG */
1160
 
                UNIV_MEM_UNDESC(block);
1161
 
        }
1162
 
 
1163
 
        os_mem_free_large(chunk->mem, chunk->mem_size);
1164
 
}
1165
 
 
1166
 
/********************************************************************//**
1167
 
Set buffer pool size variables after resizing it */
1168
 
static
1169
 
void
1170
 
buf_pool_set_sizes(void)
1171
 
/*====================*/
1172
 
{
1173
 
        ulint   i;
1174
 
        ulint   curr_size = 0;
1175
 
 
1176
 
        buf_pool_mutex_enter_all();
1177
 
 
1178
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1179
 
                buf_pool_t*     buf_pool;
1180
 
 
1181
 
                buf_pool = buf_pool_from_array(i);
1182
 
                curr_size += buf_pool->curr_pool_size;
1183
 
        }
1184
 
 
1185
 
        srv_buf_pool_curr_size = curr_size;
1186
 
        srv_buf_pool_old_size = srv_buf_pool_size;
1187
 
 
1188
 
        buf_pool_mutex_exit_all();
1189
 
}
1190
 
 
1191
 
/********************************************************************//**
1192
 
Initialize a buffer pool instance.
1193
 
@return DB_SUCCESS if all goes well. */
1194
 
static
1195
 
ulint
1196
 
buf_pool_init_instance(
1197
 
/*===================*/
1198
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1199
 
        ulint           buf_pool_size,  /*!< in: size in bytes */
1200
 
        ulint           instance_no)    /*!< in: id of the instance */
1201
 
{
1202
 
        ulint           i;
1203
 
        buf_chunk_t*    chunk;
1204
 
 
1205
 
        /* 1. Initialize general fields
1206
 
        ------------------------------- */
1207
 
        mutex_create(buf_pool_mutex_key,
1208
 
                     &buf_pool->mutex, SYNC_BUF_POOL);
1209
 
        mutex_create(buf_pool_zip_mutex_key,
1210
 
                     &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
1211
 
 
1212
 
        buf_pool_mutex_enter(buf_pool);
1213
 
 
1214
 
        if (buf_pool_size > 0) {
1215
 
                buf_pool->n_chunks = 1;
1216
 
                void *chunk_ptr= mem_zalloc((sizeof *chunk));
1217
 
                buf_pool->chunks = chunk = static_cast<buf_chunk_t *>(chunk_ptr);
1218
 
 
1219
 
                UT_LIST_INIT(buf_pool->free);
1220
 
 
1221
 
                if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
1222
 
                        mem_free(chunk);
1223
 
                        mem_free(buf_pool);
1224
 
 
1225
 
                        buf_pool_mutex_exit(buf_pool);
1226
 
 
1227
 
                        return(DB_ERROR);
1228
 
                }
1229
 
 
1230
 
                buf_pool->instance_no = instance_no;
1231
 
                buf_pool->old_pool_size = buf_pool_size;
1232
 
                buf_pool->curr_size = chunk->size;
1233
 
                buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1234
 
 
1235
 
                buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
1236
 
                buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
1237
 
                
1238
 
                buf_pool->last_printout_time = ut_time();
1239
 
        }
1240
 
        /* 2. Initialize flushing fields
1241
 
        -------------------------------- */
1242
 
 
1243
 
        mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
1244
 
                     SYNC_BUF_FLUSH_LIST);
1245
 
 
1246
 
        for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
1247
 
                buf_pool->no_flush[i] = os_event_create(NULL);
1248
 
        }
1249
 
 
1250
 
        /* 3. Initialize LRU fields
1251
 
        --------------------------- */
1252
 
 
1253
 
        /* All fields are initialized by mem_zalloc(). */
1254
 
 
1255
 
        buf_pool_mutex_exit(buf_pool);
1256
 
 
1257
 
        return(DB_SUCCESS);
1258
 
}
1259
 
 
1260
 
/********************************************************************//**
1261
 
free one buffer pool instance */
1262
 
static
1263
 
void
1264
 
buf_pool_free_instance(
1265
 
/*===================*/
1266
 
        buf_pool_t*     buf_pool)       /* in,own: buffer pool instance
1267
 
                                        to free */
1268
 
{
1269
 
        buf_chunk_t*    chunk;
1270
 
        buf_chunk_t*    chunks;
1271
 
 
1272
 
        chunks = buf_pool->chunks;
1273
 
        chunk = chunks + buf_pool->n_chunks;
1274
 
 
1275
 
        while (--chunk >= chunks) {
1276
 
                /* Bypass the checks of buf_chunk_free(), since they
1277
 
                would fail at shutdown. */
1278
 
                os_mem_free_large(chunk->mem, chunk->mem_size);
1279
 
        }
1280
 
 
1281
 
        mem_free(buf_pool->chunks);
1282
 
        hash_table_free(buf_pool->page_hash);
1283
 
        hash_table_free(buf_pool->zip_hash);
1284
 
}
1285
 
 
1286
 
/********************************************************************//**
1287
 
Creates the buffer pool.
1288
 
@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
1289
 
UNIV_INTERN
1290
 
ulint
1291
 
buf_pool_init(
1292
 
/*==========*/
1293
 
        ulint   total_size,     /*!< in: size of the total pool in bytes */
1294
 
        ulint   n_instances)    /*!< in: number of instances */
1295
 
{
1296
 
        ulint           i;
1297
 
        const ulint     size    = total_size / n_instances;
1298
 
 
1299
 
        ut_ad(n_instances > 0);
1300
 
        ut_ad(n_instances <= MAX_BUFFER_POOLS);
1301
 
        ut_ad(n_instances == srv_buf_pool_instances);
1302
 
 
1303
 
        /* We create an extra buffer pool instance, this instance is used
1304
 
        for flushing the flush lists, to keep track of n_flush for all
1305
 
        the buffer pools and also used as a waiting object during flushing. */
1306
 
        void *buf_pool_void_ptr= mem_zalloc(n_instances * sizeof *buf_pool_ptr);
1307
 
        buf_pool_ptr = static_cast<buf_pool_struct *>(buf_pool_void_ptr);
1308
 
 
1309
 
        for (i = 0; i < n_instances; i++) {
1310
 
                buf_pool_t*     ptr     = &buf_pool_ptr[i];
1311
 
 
1312
 
                if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) {
1313
 
 
1314
 
                        /* Free all the instances created so far. */
1315
 
                        buf_pool_free(i);
1316
 
 
1317
 
                        return(DB_ERROR);
1318
 
                }
1319
 
        }
1320
 
 
1321
 
        buf_pool_set_sizes();
1322
 
        buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
1323
 
 
1324
 
        btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
1325
 
 
1326
 
        return(DB_SUCCESS);
1327
 
}
1328
 
 
1329
 
/********************************************************************//**
1330
 
Frees the buffer pool at shutdown.  This must not be invoked before
1331
 
freeing all mutexes. */
1332
 
UNIV_INTERN
1333
 
void
1334
 
buf_pool_free(
1335
 
/*==========*/
1336
 
        ulint   n_instances)    /*!< in: numbere of instances to free */
1337
 
{
1338
 
        ulint   i;
1339
 
 
1340
 
        for (i = 0; i < n_instances; i++) {
1341
 
                buf_pool_free_instance(buf_pool_from_array(i));
1342
 
        }
1343
 
 
1344
 
        mem_free(buf_pool_ptr);
1345
 
        buf_pool_ptr = NULL;
1346
 
}
1347
 
 
1348
 
/********************************************************************//**
1349
 
Drops adaptive hash index for a buffer pool instance. */
1350
 
static
1351
 
void
1352
 
buf_pool_drop_hash_index_instance(
1353
 
/*==============================*/
1354
 
        buf_pool_t*     buf_pool,               /*!< in: buffer pool instance */
1355
 
        ibool*          released_search_latch)  /*!< out: flag for signalling
1356
 
                                                whether the search latch was
1357
 
                                                released */
1358
 
{
1359
 
        buf_chunk_t*    chunks  = buf_pool->chunks;
1360
 
        buf_chunk_t*    chunk   = chunks + buf_pool->n_chunks;
1361
 
 
1362
 
        while (--chunk >= chunks) {
1363
 
                ulint           i;
1364
 
                buf_block_t*    block   = chunk->blocks;
1365
 
 
1366
 
                for (i = chunk->size; i--; block++) {
1367
 
                        /* block->is_hashed cannot be modified
1368
 
                        when we have an x-latch on btr_search_latch;
1369
 
                        see the comment in buf0buf.h */
1370
 
                        
1371
 
                        if (!block->is_hashed) {
1372
 
                                continue;
1373
 
                        }
1374
 
                        
1375
 
                        /* To follow the latching order, we
1376
 
                        have to release btr_search_latch
1377
 
                        before acquiring block->latch. */
1378
 
                        rw_lock_x_unlock(&btr_search_latch);
1379
 
                        /* When we release the search latch,
1380
 
                        we must rescan all blocks, because
1381
 
                        some may become hashed again. */
1382
 
                        *released_search_latch = TRUE;
1383
 
                        
1384
 
                        rw_lock_x_lock(&block->lock);
1385
 
                        
1386
 
                        /* This should be guaranteed by the
1387
 
                        callers, which will be holding
1388
 
                        btr_search_enabled_mutex. */
1389
 
                        ut_ad(!btr_search_enabled);
1390
 
                        
1391
 
                        /* Because we did not buffer-fix the
1392
 
                        block by calling buf_block_get_gen(),
1393
 
                        it is possible that the block has been
1394
 
                        allocated for some other use after
1395
 
                        btr_search_latch was released above.
1396
 
                        We do not care which file page the
1397
 
                        block is mapped to.  All we want to do
1398
 
                        is to drop any hash entries referring
1399
 
                        to the page. */
1400
 
                        
1401
 
                        /* It is possible that
1402
 
                        block->page.state != BUF_FILE_PAGE.
1403
 
                        Even that does not matter, because
1404
 
                        btr_search_drop_page_hash_index() will
1405
 
                        check block->is_hashed before doing
1406
 
                        anything.  block->is_hashed can only
1407
 
                        be set on uncompressed file pages. */
1408
 
                        
1409
 
                        btr_search_drop_page_hash_index(block);
1410
 
                        
1411
 
                        rw_lock_x_unlock(&block->lock);
1412
 
                        
1413
 
                        rw_lock_x_lock(&btr_search_latch);
1414
 
                        
1415
 
                        ut_ad(!btr_search_enabled);
1416
 
                }
1417
 
        }
1418
 
}
1419
 
 
1420
 
/********************************************************************//**
1421
 
Drops the adaptive hash index.  To prevent a livelock, this function
1422
 
is only to be called while holding btr_search_latch and while
1423
 
btr_search_enabled == FALSE. */
1424
 
UNIV_INTERN
1425
 
void
1426
 
buf_pool_drop_hash_index(void)
1427
 
/*==========================*/
1428
 
{
1429
 
        ibool           released_search_latch;
1430
 
 
1431
 
#ifdef UNIV_SYNC_DEBUG
1432
 
        ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
1433
 
#endif /* UNIV_SYNC_DEBUG */
1434
 
        ut_ad(!btr_search_enabled);
1435
 
 
1436
 
        do {
1437
 
                ulint   i;
1438
 
 
1439
 
                released_search_latch = FALSE;
1440
 
 
1441
 
                for (i = 0; i < srv_buf_pool_instances; i++) {
1442
 
                        buf_pool_t*     buf_pool;
1443
 
 
1444
 
                        buf_pool = buf_pool_from_array(i);
1445
 
 
1446
 
                        buf_pool_drop_hash_index_instance(
1447
 
                                buf_pool, &released_search_latch);
1448
 
                }
1449
 
 
1450
 
        } while (released_search_latch);
1451
 
}
1452
 
 
1453
 
/********************************************************************//**
1454
 
Relocate a buffer control block.  Relocates the block on the LRU list
1455
 
and in buf_pool->page_hash.  Does not relocate bpage->list.
1456
 
The caller must take care of relocating bpage->list. */
1457
 
UNIV_INTERN
1458
 
void
1459
 
buf_relocate(
1460
 
/*=========*/
1461
 
        buf_page_t*     bpage,  /*!< in/out: control block being relocated;
1462
 
                                buf_page_get_state(bpage) must be
1463
 
                                BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
1464
 
        buf_page_t*     dpage)  /*!< in/out: destination control block */
1465
 
{
1466
 
        buf_page_t*     b;
1467
 
        ulint           fold;
1468
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1469
 
 
1470
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1471
 
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1472
 
        ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
1473
 
        ut_a(bpage->buf_fix_count == 0);
1474
 
        ut_ad(bpage->in_LRU_list);
1475
 
        ut_ad(!bpage->in_zip_hash);
1476
 
        ut_ad(bpage->in_page_hash);
1477
 
        ut_ad(bpage == buf_page_hash_get(buf_pool,
1478
 
                                         bpage->space, bpage->offset));
1479
 
        ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1480
 
#ifdef UNIV_DEBUG
1481
 
        switch (buf_page_get_state(bpage)) {
1482
 
        case BUF_BLOCK_ZIP_FREE:
1483
 
        case BUF_BLOCK_NOT_USED:
1484
 
        case BUF_BLOCK_READY_FOR_USE:
1485
 
        case BUF_BLOCK_FILE_PAGE:
1486
 
        case BUF_BLOCK_MEMORY:
1487
 
        case BUF_BLOCK_REMOVE_HASH:
1488
 
                ut_error;
1489
 
        case BUF_BLOCK_ZIP_DIRTY:
1490
 
        case BUF_BLOCK_ZIP_PAGE:
1491
 
                break;
1492
 
        }
1493
 
#endif /* UNIV_DEBUG */
1494
 
 
1495
 
        memcpy(dpage, bpage, sizeof *dpage);
1496
 
 
1497
 
        ut_d(bpage->in_LRU_list = FALSE);
1498
 
        ut_d(bpage->in_page_hash = FALSE);
1499
 
 
1500
 
        /* relocate buf_pool->LRU */
1501
 
        b = UT_LIST_GET_PREV(LRU, bpage);
1502
 
        UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
1503
 
 
1504
 
        if (b) {
1505
 
                UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
1506
 
        } else {
1507
 
                UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
1508
 
        }
1509
 
 
1510
 
        if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
1511
 
                buf_pool->LRU_old = dpage;
1512
 
#ifdef UNIV_LRU_DEBUG
1513
 
                /* buf_pool->LRU_old must be the first item in the LRU list
1514
 
                whose "old" flag is set. */
1515
 
                ut_a(buf_pool->LRU_old->old);
1516
 
                ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
1517
 
                     || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
1518
 
                ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
1519
 
                     || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
1520
 
        } else {
1521
 
                /* Check that the "old" flag is consistent in
1522
 
                the block and its neighbours. */
1523
 
                buf_page_set_old(dpage, buf_page_is_old(dpage));
1524
 
#endif /* UNIV_LRU_DEBUG */
1525
 
        }
1526
 
 
1527
 
        ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
1528
 
                              ut_ad(ut_list_node_313->in_LRU_list)));
1529
 
 
1530
 
        /* relocate buf_pool->page_hash */
1531
 
        fold = buf_page_address_fold(bpage->space, bpage->offset);
1532
 
 
1533
 
        HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
1534
 
        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
1535
 
}
1536
 
 
1537
 
/********************************************************************//**
1538
 
Shrinks a buffer pool instance. */
1539
 
static
1540
 
void
1541
 
buf_pool_shrink_instance(
1542
 
/*=====================*/
1543
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1544
 
        ulint           chunk_size)     /*!< in: number of pages to remove */
1545
 
{
1546
 
        buf_chunk_t*    chunks;
1547
 
        buf_chunk_t*    chunk;
1548
 
        ulint           max_size;
1549
 
        ulint           max_free_size;
1550
 
        buf_chunk_t*    max_chunk;
1551
 
        buf_chunk_t*    max_free_chunk;
1552
 
 
1553
 
        ut_ad(!buf_pool_mutex_own(buf_pool));
1554
 
 
1555
 
try_again:
1556
 
        btr_search_disable(); /* Empty the adaptive hash index again */
1557
 
        buf_pool_mutex_enter(buf_pool);
1558
 
 
1559
 
shrink_again:
1560
 
        if (buf_pool->n_chunks <= 1) {
1561
 
 
1562
 
                /* Cannot shrink if there is only one chunk */
1563
 
                goto func_done;
1564
 
        }
1565
 
 
1566
 
        /* Search for the largest free chunk
1567
 
        not larger than the size difference */
1568
 
        chunks = buf_pool->chunks;
1569
 
        chunk = chunks + buf_pool->n_chunks;
1570
 
        max_size = max_free_size = 0;
1571
 
        max_chunk = max_free_chunk = NULL;
1572
 
 
1573
 
        while (--chunk >= chunks) {
1574
 
                if (chunk->size <= chunk_size
1575
 
                    && chunk->size > max_free_size) {
1576
 
                        if (chunk->size > max_size) {
1577
 
                                max_size = chunk->size;
1578
 
                                max_chunk = chunk;
1579
 
                        }
1580
 
 
1581
 
                        if (buf_chunk_all_free(chunk)) {
1582
 
                                max_free_size = chunk->size;
1583
 
                                max_free_chunk = chunk;
1584
 
                        }
1585
 
                }
1586
 
        }
1587
 
 
1588
 
        if (!max_free_size) {
1589
 
 
1590
 
                ulint           dirty   = 0;
1591
 
                ulint           nonfree = 0;
1592
 
                buf_block_t*    block;
1593
 
                buf_block_t*    bend;
1594
 
 
1595
 
                /* Cannot shrink: try again later
1596
 
                (do not assign srv_buf_pool_old_size) */
1597
 
                if (!max_chunk) {
1598
 
 
1599
 
                        goto func_exit;
1600
 
                }
1601
 
 
1602
 
                block = max_chunk->blocks;
1603
 
                bend = block + max_chunk->size;
1604
 
 
1605
 
                /* Move the blocks of chunk to the end of the
1606
 
                LRU list and try to flush them. */
1607
 
                for (; block < bend; block++) {
1608
 
                        switch (buf_block_get_state(block)) {
1609
 
                        case BUF_BLOCK_NOT_USED:
1610
 
                                continue;
1611
 
                        case BUF_BLOCK_FILE_PAGE:
1612
 
                                break;
1613
 
                        default:
1614
 
                                nonfree++;
1615
 
                                continue;
1616
 
                        }
1617
 
 
1618
 
                        mutex_enter(&block->mutex);
1619
 
                        /* The following calls will temporarily
1620
 
                        release block->mutex and buf_pool->mutex.
1621
 
                        Therefore, we have to always retry,
1622
 
                        even if !dirty && !nonfree. */
1623
 
 
1624
 
                        if (!buf_flush_ready_for_replace(&block->page)) {
1625
 
 
1626
 
                                buf_LRU_make_block_old(&block->page);
1627
 
                                dirty++;
1628
 
                        } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
1629
 
                                   != BUF_LRU_FREED) {
1630
 
                                nonfree++;
1631
 
                        }
1632
 
 
1633
 
                        mutex_exit(&block->mutex);
1634
 
                }
1635
 
 
1636
 
                buf_pool_mutex_exit(buf_pool);
1637
 
 
1638
 
                /* Request for a flush of the chunk if it helps.
1639
 
                Do not flush if there are non-free blocks, since
1640
 
                flushing will not make the chunk freeable. */
1641
 
                if (nonfree) {
1642
 
                        /* Avoid busy-waiting. */
1643
 
                        os_thread_sleep(100000);
1644
 
                } else if (dirty
1645
 
                           && buf_flush_LRU(buf_pool, dirty)
1646
 
                              == ULINT_UNDEFINED) {
1647
 
 
1648
 
                        buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
1649
 
                }
1650
 
 
1651
 
                goto try_again;
1652
 
        }
1653
 
 
1654
 
        max_size = max_free_size;
1655
 
        max_chunk = max_free_chunk;
1656
 
 
1657
 
        buf_pool->old_pool_size = buf_pool->curr_pool_size;
1658
 
 
1659
 
        /* Rewrite buf_pool->chunks.  Copy everything but max_chunk. */
1660
 
        chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks));
1661
 
        memcpy(chunks, buf_pool->chunks,
1662
 
               (max_chunk - buf_pool->chunks) * sizeof *chunks);
1663
 
        memcpy(chunks + (max_chunk - buf_pool->chunks),
1664
 
               max_chunk + 1,
1665
 
               buf_pool->chunks + buf_pool->n_chunks
1666
 
               - (max_chunk + 1));
1667
 
        ut_a(buf_pool->curr_size > max_chunk->size);
1668
 
        buf_pool->curr_size -= max_chunk->size;
1669
 
        buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1670
 
        chunk_size -= max_chunk->size;
1671
 
        buf_chunk_free(buf_pool, max_chunk);
1672
 
        mem_free(buf_pool->chunks);
1673
 
        buf_pool->chunks = chunks;
1674
 
        buf_pool->n_chunks--;
1675
 
 
1676
 
        /* Allow a slack of one megabyte. */
1677
 
        if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
1678
 
 
1679
 
                goto shrink_again;
1680
 
        }
1681
 
        goto func_exit;
1682
 
 
1683
 
func_done:
1684
 
        buf_pool->old_pool_size = buf_pool->curr_pool_size;
1685
 
func_exit:
1686
 
        buf_pool_mutex_exit(buf_pool);
1687
 
        btr_search_enable();
1688
 
}
1689
 
 
1690
 
/********************************************************************//**
1691
 
Shrinks the buffer pool. */
1692
 
static
1693
 
void
1694
 
buf_pool_shrink(
1695
 
/*============*/
1696
 
        ulint   chunk_size)     /*!< in: number of pages to remove */
1697
 
{
1698
 
        ulint   i;
1699
 
 
1700
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1701
 
                buf_pool_t*     buf_pool;
1702
 
                ulint           instance_chunk_size;
1703
 
 
1704
 
                instance_chunk_size = chunk_size / srv_buf_pool_instances;
1705
 
                buf_pool = buf_pool_from_array(i);
1706
 
                buf_pool_shrink_instance(buf_pool, instance_chunk_size);
1707
 
        }
1708
 
 
1709
 
        buf_pool_set_sizes();
1710
 
}
1711
 
 
1712
 
/********************************************************************//**
1713
 
Rebuild buf_pool->page_hash for a buffer pool instance. */
1714
 
static
1715
 
void
1716
 
buf_pool_page_hash_rebuild_instance(
1717
 
/*================================*/
1718
 
        buf_pool_t*     buf_pool)               /*!< in: buffer pool instance */
1719
 
{
1720
 
        ulint           i;
1721
 
        buf_page_t*     b;
1722
 
        buf_chunk_t*    chunk;
1723
 
        ulint           n_chunks;
1724
 
        hash_table_t*   zip_hash;
1725
 
        hash_table_t*   page_hash;
1726
 
 
1727
 
        buf_pool_mutex_enter(buf_pool);
1728
 
 
1729
 
        /* Free, create, and populate the hash table. */
1730
 
        hash_table_free(buf_pool->page_hash);
1731
 
        buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
1732
 
        zip_hash = hash_create(2 * buf_pool->curr_size);
1733
 
 
1734
 
        HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
1735
 
                     BUF_POOL_ZIP_FOLD_BPAGE);
1736
 
 
1737
 
        hash_table_free(buf_pool->zip_hash);
1738
 
        buf_pool->zip_hash = zip_hash;
1739
 
 
1740
 
        /* Insert the uncompressed file pages to buf_pool->page_hash. */
1741
 
 
1742
 
        chunk = buf_pool->chunks;
1743
 
        n_chunks = buf_pool->n_chunks;
1744
 
 
1745
 
        for (i = 0; i < n_chunks; i++, chunk++) {
1746
 
                ulint           j;
1747
 
                buf_block_t*    block = chunk->blocks;
1748
 
 
1749
 
                for (j = 0; j < chunk->size; j++, block++) {
1750
 
                        if (buf_block_get_state(block)
1751
 
                            == BUF_BLOCK_FILE_PAGE) {
1752
 
                                ut_ad(!block->page.in_zip_hash);
1753
 
                                ut_ad(block->page.in_page_hash);
1754
 
 
1755
 
                                HASH_INSERT(buf_page_t, hash, page_hash,
1756
 
                                            buf_page_address_fold(
1757
 
                                                    block->page.space,
1758
 
                                                    block->page.offset),
1759
 
                                            &block->page);
1760
 
                        }
1761
 
                }
1762
 
        }
1763
 
 
1764
 
        /* Insert the compressed-only pages to buf_pool->page_hash.
1765
 
        All such blocks are either in buf_pool->zip_clean or
1766
 
        in buf_pool->flush_list. */
1767
 
 
1768
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1769
 
             b = UT_LIST_GET_NEXT(list, b)) {
1770
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1771
 
                ut_ad(!b->in_flush_list);
1772
 
                ut_ad(b->in_LRU_list);
1773
 
                ut_ad(b->in_page_hash);
1774
 
                ut_ad(!b->in_zip_hash);
1775
 
 
1776
 
                HASH_INSERT(buf_page_t, hash, page_hash,
1777
 
                            buf_page_address_fold(b->space, b->offset), b);
1778
 
        }
1779
 
 
1780
 
        buf_flush_list_mutex_enter(buf_pool);
1781
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1782
 
             b = UT_LIST_GET_NEXT(list, b)) {
1783
 
                ut_ad(b->in_flush_list);
1784
 
                ut_ad(b->in_LRU_list);
1785
 
                ut_ad(b->in_page_hash);
1786
 
                ut_ad(!b->in_zip_hash);
1787
 
 
1788
 
                switch (buf_page_get_state(b)) {
1789
 
                case BUF_BLOCK_ZIP_DIRTY:
1790
 
                        HASH_INSERT(buf_page_t, hash, page_hash,
1791
 
                                    buf_page_address_fold(b->space,
1792
 
                                                          b->offset), b);
1793
 
                        break;
1794
 
                case BUF_BLOCK_FILE_PAGE:
1795
 
                        /* uncompressed page */
1796
 
                        break;
1797
 
                case BUF_BLOCK_ZIP_FREE:
1798
 
                case BUF_BLOCK_ZIP_PAGE:
1799
 
                case BUF_BLOCK_NOT_USED:
1800
 
                case BUF_BLOCK_READY_FOR_USE:
1801
 
                case BUF_BLOCK_MEMORY:
1802
 
                case BUF_BLOCK_REMOVE_HASH:
1803
 
                        ut_error;
1804
 
                        break;
1805
 
                }
1806
 
        }
1807
 
 
1808
 
        buf_flush_list_mutex_exit(buf_pool);
1809
 
        buf_pool_mutex_exit(buf_pool);
1810
 
}
1811
 
 
1812
 
/********************************************************************
1813
 
Determine if a block is a sentinel for a buffer pool watch.
1814
 
@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
1815
 
UNIV_INTERN
1816
 
ibool
1817
 
buf_pool_watch_is_sentinel(
1818
 
/*=======================*/
1819
 
        buf_pool_t*             buf_pool,       /*!< buffer pool instance */
1820
 
        const buf_page_t*       bpage)          /*!< in: block */
1821
 
{
1822
 
        ut_ad(buf_page_in_file(bpage));
1823
 
 
1824
 
        if (bpage < &buf_pool->watch[0]
1825
 
            || bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
1826
 
 
1827
 
                ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_PAGE
1828
 
                      || bpage->zip.data != NULL);
1829
 
 
1830
 
                return(FALSE);
1831
 
        }
1832
 
 
1833
 
        ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
1834
 
        ut_ad(!bpage->in_zip_hash);
1835
 
        ut_ad(bpage->in_page_hash);
1836
 
        ut_ad(bpage->zip.data == NULL);
1837
 
        ut_ad(bpage->buf_fix_count > 0);
1838
 
        return(TRUE);
1839
 
}
1840
 
 
1841
 
/****************************************************************//**
1842
 
Add watch for the given page to be read in. Caller must have the buffer pool
1843
 
mutex reserved.
1844
 
@return NULL if watch set, block if the page is in the buffer pool */
1845
 
UNIV_INTERN
1846
 
buf_page_t*
1847
 
buf_pool_watch_set(
1848
 
/*===============*/
1849
 
        ulint   space,  /*!< in: space id */
1850
 
        ulint   offset, /*!< in: page number */
1851
 
        ulint   fold)   /*!< in: buf_page_address_fold(space, offset) */
1852
 
{
1853
 
        buf_page_t*     bpage;
1854
 
        ulint           i;
1855
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1856
 
 
1857
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1858
 
 
1859
 
        bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
1860
 
 
1861
 
        if (UNIV_LIKELY_NULL(bpage)) {
1862
 
                if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
1863
 
                        /* The page was loaded meanwhile. */
1864
 
                        return(bpage);
1865
 
                }
1866
 
                /* Add to an existing watch. */
1867
 
                bpage->buf_fix_count++;
1868
 
                return(NULL);
1869
 
        }
1870
 
 
1871
 
        for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
1872
 
                bpage = &buf_pool->watch[i];
1873
 
 
1874
 
                ut_ad(bpage->access_time == 0);
1875
 
                ut_ad(bpage->newest_modification == 0);
1876
 
                ut_ad(bpage->oldest_modification == 0);
1877
 
                ut_ad(bpage->zip.data == NULL);
1878
 
                ut_ad(!bpage->in_zip_hash);
1879
 
 
1880
 
                switch (bpage->state) {
1881
 
                case BUF_BLOCK_POOL_WATCH:
1882
 
                        ut_ad(!bpage->in_page_hash);
1883
 
                        ut_ad(bpage->buf_fix_count == 0);
1884
 
 
1885
 
                        /* bpage is pointing to buf_pool->watch[],
1886
 
                        which is protected by buf_pool->mutex.
1887
 
                        Normally, buf_page_t objects are protected by
1888
 
                        buf_block_t::mutex or buf_pool->zip_mutex or both. */
1889
 
 
1890
 
                        bpage->state = BUF_BLOCK_ZIP_PAGE;
1891
 
                        bpage->space = space;
1892
 
                        bpage->offset = offset;
1893
 
                        bpage->buf_fix_count = 1;
1894
 
 
1895
 
                        ut_d(bpage->in_page_hash = TRUE);
1896
 
                        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
1897
 
                                    fold, bpage);
1898
 
                        return(NULL);
1899
 
                case BUF_BLOCK_ZIP_PAGE:
1900
 
                        ut_ad(bpage->in_page_hash);
1901
 
                        ut_ad(bpage->buf_fix_count > 0);
1902
 
                        break;
1903
 
                default:
1904
 
                        ut_error;
1905
 
                }
1906
 
        }
1907
 
 
1908
 
        /* Allocation failed.  Either the maximum number of purge
1909
 
        threads should never exceed BUF_POOL_WATCH_SIZE, or this code
1910
 
        should be modified to return a special non-NULL value and the
1911
 
        caller should purge the record directly. */
1912
 
        ut_error;
1913
 
 
1914
 
        /* Fix compiler warning */
1915
 
        return(NULL);
1916
 
}
1917
 
 
1918
 
/********************************************************************//**
1919
 
Rebuild buf_pool->page_hash. */
1920
 
static
1921
 
void
1922
 
buf_pool_page_hash_rebuild(void)
1923
 
/*============================*/
1924
 
{
1925
 
        ulint   i;
1926
 
 
1927
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1928
 
                buf_pool_page_hash_rebuild_instance(buf_pool_from_array(i));
1929
 
        }
1930
 
}
1931
 
 
1932
 
/********************************************************************//**
1933
 
Increase the buffer pool size of one buffer pool instance. */
1934
 
static
1935
 
void
1936
 
buf_pool_increase_instance(
1937
 
/*=======================*/
1938
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instane */
1939
 
        ulint           change_size)    /*!< in: new size of the pool */
1940
 
{
1941
 
        buf_chunk_t*    chunks;
1942
 
        buf_chunk_t*    chunk;
1943
 
 
1944
 
        buf_pool_mutex_enter(buf_pool);
1945
 
        chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks));
1946
 
 
1947
 
        memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks * sizeof *chunks);
1948
 
 
1949
 
        chunk = &chunks[buf_pool->n_chunks];
1950
 
 
1951
 
        if (!buf_chunk_init(buf_pool, chunk, change_size)) {
1952
 
                mem_free(chunks);
1953
 
        } else {
1954
 
                buf_pool->old_pool_size = buf_pool->curr_pool_size;
1955
 
                buf_pool->curr_size += chunk->size;
1956
 
                buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1957
 
                mem_free(buf_pool->chunks);
1958
 
                buf_pool->chunks = chunks;
1959
 
                buf_pool->n_chunks++;
1960
 
        }
1961
 
 
1962
 
        buf_pool_mutex_exit(buf_pool);
1963
 
}
1964
 
 
1965
 
/********************************************************************//**
1966
 
Increase the buffer pool size. */
1967
 
static
1968
 
void
1969
 
buf_pool_increase(
1970
 
/*==============*/
1971
 
        ulint   change_size)
1972
 
{
1973
 
        ulint   i;
1974
 
 
1975
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1976
 
                buf_pool_increase_instance(
1977
 
                        buf_pool_from_array(i),
1978
 
                        change_size / srv_buf_pool_instances);
1979
 
        }
1980
 
 
1981
 
        buf_pool_set_sizes();
1982
 
}
1983
 
 
1984
 
/********************************************************************//**
1985
 
Resizes the buffer pool. */
1986
 
UNIV_INTERN
1987
 
void
1988
 
buf_pool_resize(void)
1989
 
/*=================*/
1990
 
{
1991
 
        ulint   change_size;
1992
 
        ulint   min_change_size = 1048576 * srv_buf_pool_instances;
1993
 
 
1994
 
        buf_pool_mutex_enter_all();
1995
 
  
1996
 
        if (srv_buf_pool_old_size == srv_buf_pool_size) {
1997
 
  
1998
 
                buf_pool_mutex_exit_all();
1999
 
 
2000
 
                return;
2001
 
 
2002
 
        } else if (srv_buf_pool_curr_size + min_change_size
2003
 
                   > srv_buf_pool_size) {
2004
 
  
2005
 
                change_size = (srv_buf_pool_curr_size - srv_buf_pool_size)
2006
 
                            / UNIV_PAGE_SIZE;
2007
 
 
2008
 
                buf_pool_mutex_exit_all();
2009
 
  
2010
 
                /* Disable adaptive hash indexes and empty the index
2011
 
                in order to free up memory in the buffer pool chunks. */
2012
 
                buf_pool_shrink(change_size);
2013
 
 
2014
 
        } else if (srv_buf_pool_curr_size + min_change_size
2015
 
                   < srv_buf_pool_size) {
2016
 
 
2017
 
                /* Enlarge the buffer pool by at least one megabyte */
2018
 
  
2019
 
                change_size = srv_buf_pool_size - srv_buf_pool_curr_size;
2020
 
 
2021
 
                buf_pool_mutex_exit_all();
2022
 
 
2023
 
                buf_pool_increase(change_size);
2024
 
        } else {
2025
 
                srv_buf_pool_size = srv_buf_pool_old_size;
2026
 
 
2027
 
                buf_pool_mutex_exit_all();
2028
 
 
2029
 
                return;
2030
 
        }
2031
 
  
2032
 
        buf_pool_page_hash_rebuild();
2033
 
}
2034
 
 
2035
 
/****************************************************************//**
2036
 
Remove the sentinel block for the watch before replacing it with a real block.
2037
 
buf_page_watch_clear() or buf_page_watch_occurred() will notice that
2038
 
the block has been replaced with the real block.
2039
 
@return reference count, to be added to the replacement block */
2040
 
static
2041
 
void
2042
 
buf_pool_watch_remove(
2043
 
/*==================*/
2044
 
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
2045
 
        ulint           fold,           /*!< in: buf_page_address_fold(
2046
 
                                        space, offset) */
2047
 
        buf_page_t*     watch)          /*!< in/out: sentinel for watch */
2048
 
{
2049
 
        ut_ad(buf_pool_mutex_own(buf_pool));
2050
 
 
2051
 
        HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
2052
 
        ut_d(watch->in_page_hash = FALSE);
2053
 
        watch->buf_fix_count = 0;
2054
 
        watch->state = BUF_BLOCK_POOL_WATCH;
2055
 
}
2056
 
 
2057
 
/****************************************************************//**
2058
 
Stop watching if the page has been read in.
2059
 
buf_pool_watch_set(space,offset) must have returned NULL before. */
2060
 
UNIV_INTERN
2061
 
void
2062
 
buf_pool_watch_unset(
2063
 
/*=================*/
2064
 
        ulint   space,  /*!< in: space id */
2065
 
        ulint   offset) /*!< in: page number */
2066
 
{
2067
 
        buf_page_t*     bpage;
2068
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2069
 
        ulint           fold = buf_page_address_fold(space, offset);
2070
 
 
2071
 
        buf_pool_mutex_enter(buf_pool);
2072
 
        bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2073
 
        /* The page must exist because buf_pool_watch_set()
2074
 
        increments buf_fix_count. */
2075
 
        ut_a(bpage);
2076
 
 
2077
 
        if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
2078
 
                mutex_t* mutex = buf_page_get_mutex(bpage);
2079
 
 
2080
 
                mutex_enter(mutex);
2081
 
                ut_a(bpage->buf_fix_count > 0);
2082
 
                bpage->buf_fix_count--;
2083
 
                mutex_exit(mutex);
2084
 
        } else {
2085
 
                ut_a(bpage->buf_fix_count > 0);
2086
 
 
2087
 
                if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
2088
 
                        buf_pool_watch_remove(buf_pool, fold, bpage);
2089
 
                }
2090
 
        }
2091
 
 
2092
 
        buf_pool_mutex_exit(buf_pool);
2093
 
}
2094
 
 
2095
 
/****************************************************************//**
2096
 
Check if the page has been read in.
2097
 
This may only be called after buf_pool_watch_set(space,offset)
2098
 
has returned NULL and before invoking buf_pool_watch_unset(space,offset).
2099
 
@return FALSE if the given page was not read in, TRUE if it was */
2100
 
UNIV_INTERN
2101
 
ibool
2102
 
buf_pool_watch_occurred(
2103
 
/*====================*/
2104
 
        ulint   space,  /*!< in: space id */
2105
 
        ulint   offset) /*!< in: page number */
2106
 
{
2107
 
        ibool           ret;
2108
 
        buf_page_t*     bpage;
2109
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2110
 
        ulint           fold    = buf_page_address_fold(space, offset);
2111
 
 
2112
 
        buf_pool_mutex_enter(buf_pool);
2113
 
 
2114
 
        bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2115
 
        /* The page must exist because buf_pool_watch_set()
2116
 
        increments buf_fix_count. */
2117
 
        ut_a(bpage);
2118
 
        ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
2119
 
        buf_pool_mutex_exit(buf_pool);
2120
 
 
2121
 
        return(ret);
2122
 
}
2123
 
 
2124
 
/********************************************************************//**
2125
 
Moves a page to the start of the buffer pool LRU list. This high-level
2126
 
function can be used to prevent an important page from slipping out of
2127
 
the buffer pool. */
2128
 
UNIV_INTERN
2129
 
void
2130
 
buf_page_make_young(
2131
 
/*================*/
2132
 
        buf_page_t*     bpage)  /*!< in: buffer block of a file page */
2133
 
{
2134
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2135
 
 
2136
 
        buf_pool_mutex_enter(buf_pool);
2137
 
 
2138
 
        ut_a(buf_page_in_file(bpage));
2139
 
 
2140
 
        buf_LRU_make_block_young(bpage);
2141
 
 
2142
 
        buf_pool_mutex_exit(buf_pool);
2143
 
}
2144
 
 
2145
 
/********************************************************************//**
2146
 
Sets the time of the first access of a page and moves a page to the
2147
 
start of the buffer pool LRU list if it is too old.  This high-level
2148
 
function can be used to prevent an important page from slipping
2149
 
out of the buffer pool. */
2150
 
static
2151
 
void
2152
 
buf_page_set_accessed_make_young(
2153
 
/*=============================*/
2154
 
        buf_page_t*     bpage,          /*!< in/out: buffer block of a
2155
 
                                        file page */
2156
 
        unsigned        access_time)    /*!< in: bpage->access_time
2157
 
                                        read under mutex protection,
2158
 
                                        or 0 if unknown */
2159
 
{
2160
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2161
 
 
2162
 
        ut_ad(!buf_pool_mutex_own(buf_pool));
2163
 
        ut_a(buf_page_in_file(bpage));
2164
 
 
2165
 
        if (buf_page_peek_if_too_old(bpage)) {
2166
 
                buf_pool_mutex_enter(buf_pool);
2167
 
                buf_LRU_make_block_young(bpage);
2168
 
                buf_pool_mutex_exit(buf_pool);
2169
 
        } else if (!access_time) {
2170
 
                ulint   time_ms = ut_time_ms();
2171
 
                buf_pool_mutex_enter(buf_pool);
2172
 
                buf_page_set_accessed(bpage, time_ms);
2173
 
                buf_pool_mutex_exit(buf_pool);
2174
 
        }
2175
 
}
2176
 
 
2177
 
/********************************************************************//**
2178
 
Resets the check_index_page_at_flush field of a page if found in the buffer
2179
 
pool. */
2180
 
UNIV_INTERN
2181
 
void
2182
 
buf_reset_check_index_page_at_flush(
2183
 
/*================================*/
2184
 
        ulint   space,  /*!< in: space id */
2185
 
        ulint   offset) /*!< in: page number */
2186
 
{
2187
 
        buf_block_t*    block;
2188
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2189
 
 
2190
 
        buf_pool_mutex_enter(buf_pool);
2191
 
 
2192
 
        block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2193
 
 
2194
 
        if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
2195
 
                ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2196
 
                block->check_index_page_at_flush = FALSE;
2197
 
        }
2198
 
 
2199
 
        buf_pool_mutex_exit(buf_pool);
2200
 
}
2201
 
 
2202
 
/********************************************************************//**
2203
 
Returns the current state of is_hashed of a page. FALSE if the page is
2204
 
not in the pool. NOTE that this operation does not fix the page in the
2205
 
pool if it is found there.
2206
 
@return TRUE if page hash index is built in search system */
2207
 
UNIV_INTERN
2208
 
ibool
2209
 
buf_page_peek_if_search_hashed(
2210
 
/*===========================*/
2211
 
        ulint   space,  /*!< in: space id */
2212
 
        ulint   offset) /*!< in: page number */
2213
 
{
2214
 
        buf_block_t*    block;
2215
 
        ibool           is_hashed;
2216
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2217
 
 
2218
 
        buf_pool_mutex_enter(buf_pool);
2219
 
 
2220
 
        block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2221
 
 
2222
 
        if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2223
 
                is_hashed = FALSE;
2224
 
        } else {
2225
 
                ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2226
 
                is_hashed = block->is_hashed;
2227
 
        }
2228
 
 
2229
 
        buf_pool_mutex_exit(buf_pool);
2230
 
 
2231
 
        return(is_hashed);
2232
 
}
2233
 
 
2234
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2235
 
/********************************************************************//**
2236
 
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
2237
 
This function should be called when we free a file page and want the
2238
 
debug version to check that it is not accessed any more unless
2239
 
reallocated.
2240
 
@return control block if found in page hash table, otherwise NULL */
2241
 
UNIV_INTERN
2242
 
buf_page_t*
2243
 
buf_page_set_file_page_was_freed(
2244
 
/*=============================*/
2245
 
        ulint   space,  /*!< in: space id */
2246
 
        ulint   offset) /*!< in: page number */
2247
 
{
2248
 
        buf_page_t*     bpage;
2249
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2250
 
 
2251
 
        buf_pool_mutex_enter(buf_pool);
2252
 
 
2253
 
        bpage = buf_page_hash_get(buf_pool, space, offset);
2254
 
 
2255
 
        if (bpage) {
2256
 
                ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2257
 
                bpage->file_page_was_freed = TRUE;
2258
 
        }
2259
 
 
2260
 
        buf_pool_mutex_exit(buf_pool);
2261
 
 
2262
 
        return(bpage);
2263
 
}
2264
 
 
2265
 
/********************************************************************//**
2266
 
Sets file_page_was_freed FALSE if the page is found in the buffer pool.
2267
 
This function should be called when we free a file page and want the
2268
 
debug version to check that it is not accessed any more unless
2269
 
reallocated.
2270
 
@return control block if found in page hash table, otherwise NULL */
2271
 
UNIV_INTERN
2272
 
buf_page_t*
2273
 
buf_page_reset_file_page_was_freed(
2274
 
/*===============================*/
2275
 
        ulint   space,  /*!< in: space id */
2276
 
        ulint   offset) /*!< in: page number */
2277
 
{
2278
 
        buf_page_t*     bpage;
2279
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2280
 
 
2281
 
        buf_pool_mutex_enter(buf_pool);
2282
 
 
2283
 
        bpage = buf_page_hash_get(buf_pool, space, offset);
2284
 
 
2285
 
        if (bpage) {
2286
 
                ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2287
 
                bpage->file_page_was_freed = FALSE;
2288
 
        }
2289
 
 
2290
 
        buf_pool_mutex_exit(buf_pool);
2291
 
 
2292
 
        return(bpage);
2293
 
}
2294
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
2295
 
 
2296
 
/********************************************************************//**
2297
 
Get read access to a compressed page (usually of type
2298
 
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
2299
 
The page must be released with buf_page_release_zip().
2300
 
NOTE: the page is not protected by any latch.  Mutual exclusion has to
2301
 
be implemented at a higher level.  In other words, all possible
2302
 
accesses to a given page through this function must be protected by
2303
 
the same set of mutexes or latches.
2304
 
@return pointer to the block */
2305
 
UNIV_INTERN
2306
 
buf_page_t*
2307
 
buf_page_get_zip(
2308
 
/*=============*/
2309
 
        ulint           space,  /*!< in: space id */
2310
 
        ulint           zip_size,/*!< in: compressed page size */
2311
 
        ulint           offset) /*!< in: page number */
2312
 
{
2313
 
        buf_page_t*     bpage;
2314
 
        mutex_t*        block_mutex;
2315
 
        ibool           must_read;
2316
 
        unsigned        access_time;
2317
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2318
 
 
2319
 
#ifndef UNIV_LOG_DEBUG
2320
 
        ut_ad(!ibuf_inside());
2321
 
#endif
2322
 
        buf_pool->stat.n_page_gets++;
2323
 
 
2324
 
        for (;;) {
2325
 
                buf_pool_mutex_enter(buf_pool);
2326
 
lookup:
2327
 
                bpage = buf_page_hash_get(buf_pool, space, offset);
2328
 
                if (bpage) {
2329
 
                        ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2330
 
                        break;
2331
 
                }
2332
 
 
2333
 
                /* Page not in buf_pool: needs to be read from file */
2334
 
 
2335
 
                buf_pool_mutex_exit(buf_pool);
2336
 
 
2337
 
                buf_read_page(space, zip_size, offset);
2338
 
 
2339
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2340
 
                ut_a(++buf_dbg_counter % 37 || buf_validate());
2341
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2342
 
        }
2343
 
 
2344
 
        if (UNIV_UNLIKELY(!bpage->zip.data)) {
2345
 
                /* There is no compressed page. */
2346
 
err_exit:
2347
 
                buf_pool_mutex_exit(buf_pool);
2348
 
                return(NULL);
2349
 
        }
2350
 
 
2351
 
        ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2352
 
 
2353
 
        switch (buf_page_get_state(bpage)) {
2354
 
        case BUF_BLOCK_NOT_USED:
2355
 
        case BUF_BLOCK_READY_FOR_USE:
2356
 
        case BUF_BLOCK_MEMORY:
2357
 
        case BUF_BLOCK_REMOVE_HASH:
2358
 
        case BUF_BLOCK_ZIP_FREE:
2359
 
                break;
2360
 
        case BUF_BLOCK_ZIP_PAGE:
2361
 
        case BUF_BLOCK_ZIP_DIRTY:
2362
 
                block_mutex = &buf_pool->zip_mutex;
2363
 
                mutex_enter(block_mutex);
2364
 
                bpage->buf_fix_count++;
2365
 
                goto got_block;
2366
 
        case BUF_BLOCK_FILE_PAGE:
2367
 
                block_mutex = &((buf_block_t*) bpage)->mutex;
2368
 
                mutex_enter(block_mutex);
2369
 
 
2370
 
                /* Discard the uncompressed page frame if possible. */
2371
 
                if (buf_LRU_free_block(bpage, FALSE, NULL)
2372
 
                    == BUF_LRU_FREED) {
2373
 
 
2374
 
                        mutex_exit(block_mutex);
2375
 
                        goto lookup;
2376
 
                }
2377
 
 
2378
 
                buf_block_buf_fix_inc((buf_block_t*) bpage,
2379
 
                                      __FILE__, __LINE__);
2380
 
                goto got_block;
2381
 
        }
2382
 
 
2383
 
        ut_error;
2384
 
        goto err_exit;
2385
 
 
2386
 
got_block:
2387
 
        must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
2388
 
        access_time = buf_page_is_accessed(bpage);
2389
 
 
2390
 
        buf_pool_mutex_exit(buf_pool);
2391
 
 
2392
 
        mutex_exit(block_mutex);
2393
 
 
2394
 
        buf_page_set_accessed_make_young(bpage, access_time);
2395
 
 
2396
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2397
 
        ut_a(!bpage->file_page_was_freed);
2398
 
#endif
2399
 
 
2400
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2401
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
2402
 
        ut_a(bpage->buf_fix_count > 0);
2403
 
        ut_a(buf_page_in_file(bpage));
2404
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2405
 
 
2406
 
        if (must_read) {
2407
 
                /* Let us wait until the read operation
2408
 
                completes */
2409
 
 
2410
 
                for (;;) {
2411
 
                        enum buf_io_fix io_fix;
2412
 
 
2413
 
                        mutex_enter(block_mutex);
2414
 
                        io_fix = buf_page_get_io_fix(bpage);
2415
 
                        mutex_exit(block_mutex);
2416
 
 
2417
 
                        if (io_fix == BUF_IO_READ) {
2418
 
 
2419
 
                                os_thread_sleep(WAIT_FOR_READ);
2420
 
                        } else {
2421
 
                                break;
2422
 
                        }
2423
 
                }
2424
 
        }
2425
 
 
2426
 
#ifdef UNIV_IBUF_COUNT_DEBUG
2427
 
        ut_a(ibuf_count_get(buf_page_get_space(bpage),
2428
 
                            buf_page_get_page_no(bpage)) == 0);
2429
 
#endif
2430
 
        return(bpage);
2431
 
}
2432
 
 
2433
 
/********************************************************************//**
2434
 
Initialize some fields of a control block. */
2435
 
UNIV_INLINE
2436
 
void
2437
 
buf_block_init_low(
2438
 
/*===============*/
2439
 
        buf_block_t*    block)  /*!< in: block to init */
2440
 
{
2441
 
        block->check_index_page_at_flush = FALSE;
2442
 
        block->index            = NULL;
2443
 
 
2444
 
        block->n_hash_helps     = 0;
2445
 
        block->is_hashed        = FALSE;
2446
 
        block->n_fields         = 1;
2447
 
        block->n_bytes          = 0;
2448
 
        block->left_side        = TRUE;
2449
 
}
2450
 
#endif /* !UNIV_HOTBACKUP */
2451
 
 
2452
 
/********************************************************************//**
2453
 
Decompress a block.
2454
 
@return TRUE if successful */
2455
 
UNIV_INTERN
2456
 
ibool
2457
 
buf_zip_decompress(
2458
 
/*===============*/
2459
 
        buf_block_t*    block,  /*!< in/out: block */
2460
 
        ibool           check)  /*!< in: TRUE=verify the page checksum */
2461
 
{
2462
 
        const byte*     frame           = block->page.zip.data;
2463
 
        ulint           stamp_checksum  = mach_read_from_4(
2464
 
                frame + FIL_PAGE_SPACE_OR_CHKSUM);
2465
 
 
2466
 
        ut_ad(buf_block_get_zip_size(block));
2467
 
        ut_a(buf_block_get_space(block) != 0);
2468
 
 
2469
 
        if (UNIV_LIKELY(check && stamp_checksum != BUF_NO_CHECKSUM_MAGIC)) {
2470
 
                ulint   calc_checksum   = page_zip_calc_checksum(
2471
 
                        frame, page_zip_get_size(&block->page.zip));
2472
 
 
2473
 
                if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
2474
 
                        ut_print_timestamp(stderr);
2475
 
                        fprintf(stderr,
2476
 
                                "  InnoDB: compressed page checksum mismatch"
2477
 
                                " (space %u page %u): %lu != %lu\n",
2478
 
                                block->page.space, block->page.offset,
2479
 
                                stamp_checksum, calc_checksum);
2480
 
                        return(FALSE);
2481
 
                }
2482
 
        }
2483
 
 
2484
 
        switch (fil_page_get_type(frame)) {
2485
 
        case FIL_PAGE_INDEX:
2486
 
                if (page_zip_decompress(&block->page.zip,
2487
 
                                        block->frame, TRUE)) {
2488
 
                        return(TRUE);
2489
 
                }
2490
 
 
2491
 
                fprintf(stderr,
2492
 
                        "InnoDB: unable to decompress space %lu page %lu\n",
2493
 
                        (ulong) block->page.space,
2494
 
                        (ulong) block->page.offset);
2495
 
                return(FALSE);
2496
 
 
2497
 
        case FIL_PAGE_TYPE_ALLOCATED:
2498
 
        case FIL_PAGE_INODE:
2499
 
        case FIL_PAGE_IBUF_BITMAP:
2500
 
        case FIL_PAGE_TYPE_FSP_HDR:
2501
 
        case FIL_PAGE_TYPE_XDES:
2502
 
        case FIL_PAGE_TYPE_ZBLOB:
2503
 
        case FIL_PAGE_TYPE_ZBLOB2:
2504
 
                /* Copy to uncompressed storage. */
2505
 
                memcpy(block->frame, frame,
2506
 
                       buf_block_get_zip_size(block));
2507
 
                return(TRUE);
2508
 
        }
2509
 
 
2510
 
        ut_print_timestamp(stderr);
2511
 
        fprintf(stderr,
2512
 
                "  InnoDB: unknown compressed page"
2513
 
                " type %lu\n",
2514
 
                fil_page_get_type(frame));
2515
 
        return(FALSE);
2516
 
}
2517
 
 
2518
 
#ifndef UNIV_HOTBACKUP
2519
 
/*******************************************************************//**
2520
 
Gets the block to whose frame the pointer is pointing to if found
2521
 
in this buffer pool instance.
2522
 
@return pointer to block */
2523
 
static
2524
 
buf_block_t*
2525
 
buf_block_align_instance(
2526
 
/*=====================*/
2527
 
        buf_pool_t*     buf_pool,       /*!< in: buffer in which the block
2528
 
                                        resides */
2529
 
        const byte*     ptr)            /*!< in: pointer to a frame */
2530
 
{
2531
 
        buf_chunk_t*    chunk;
2532
 
        ulint           i;
2533
 
 
2534
 
        /* TODO: protect buf_pool->chunks with a mutex (it will
2535
 
        currently remain constant after buf_pool_init()) */
2536
 
        for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
2537
 
                lint    offs = ptr - chunk->blocks->frame;
2538
 
 
2539
 
                if (UNIV_UNLIKELY(offs < 0)) {
2540
 
 
2541
 
                        continue;
2542
 
                }
2543
 
 
2544
 
                offs >>= UNIV_PAGE_SIZE_SHIFT;
2545
 
 
2546
 
                if (UNIV_LIKELY((ulint) offs < chunk->size)) {
2547
 
                        buf_block_t*    block = &chunk->blocks[offs];
2548
 
 
2549
 
                        /* The function buf_chunk_init() invokes
2550
 
                        buf_block_init() so that block[n].frame ==
2551
 
                        block->frame + n * UNIV_PAGE_SIZE.  Check it. */
2552
 
                        ut_ad(block->frame == page_align(ptr));
2553
 
#ifdef UNIV_DEBUG
2554
 
                        /* A thread that updates these fields must
2555
 
                        hold buf_pool->mutex and block->mutex.  Acquire
2556
 
                        only the latter. */
2557
 
                        mutex_enter(&block->mutex);
2558
 
 
2559
 
                        switch (buf_block_get_state(block)) {
2560
 
                        case BUF_BLOCK_ZIP_FREE:
2561
 
                        case BUF_BLOCK_ZIP_PAGE:
2562
 
                        case BUF_BLOCK_ZIP_DIRTY:
2563
 
                                /* These types should only be used in
2564
 
                                the compressed buffer pool, whose
2565
 
                                memory is allocated from
2566
 
                                buf_pool->chunks, in UNIV_PAGE_SIZE
2567
 
                                blocks flagged as BUF_BLOCK_MEMORY. */
2568
 
                                ut_error;
2569
 
                                break;
2570
 
                        case BUF_BLOCK_NOT_USED:
2571
 
                        case BUF_BLOCK_READY_FOR_USE:
2572
 
                        case BUF_BLOCK_MEMORY:
2573
 
                                /* Some data structures contain
2574
 
                                "guess" pointers to file pages.  The
2575
 
                                file pages may have been freed and
2576
 
                                reused.  Do not complain. */
2577
 
                                break;
2578
 
                        case BUF_BLOCK_REMOVE_HASH:
2579
 
                                /* buf_LRU_block_remove_hashed_page()
2580
 
                                will overwrite the FIL_PAGE_OFFSET and
2581
 
                                FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
2582
 
                                0xff and set the state to
2583
 
                                BUF_BLOCK_REMOVE_HASH. */
2584
 
                                ut_ad(page_get_space_id(page_align(ptr))
2585
 
                                      == 0xffffffff);
2586
 
                                ut_ad(page_get_page_no(page_align(ptr))
2587
 
                                      == 0xffffffff);
2588
 
                                break;
2589
 
                        case BUF_BLOCK_FILE_PAGE:
2590
 
                                ut_ad(block->page.space
2591
 
                                      == page_get_space_id(page_align(ptr)));
2592
 
                                ut_ad(block->page.offset
2593
 
                                      == page_get_page_no(page_align(ptr)));
2594
 
                                break;
2595
 
                        }
2596
 
 
2597
 
                        mutex_exit(&block->mutex);
2598
 
#endif /* UNIV_DEBUG */
2599
 
 
2600
 
                        return(block);
2601
 
                }
2602
 
        }
2603
 
 
2604
 
        return(NULL);
2605
 
}
2606
 
 
2607
 
/*******************************************************************//**
2608
 
Gets the block to whose frame the pointer is pointing to.
2609
 
@return pointer to block, never NULL */
2610
 
UNIV_INTERN
2611
 
buf_block_t*
2612
 
buf_block_align(
2613
 
/*============*/
2614
 
        const byte*     ptr)    /*!< in: pointer to a frame */
2615
 
{
2616
 
        ulint           i;
2617
 
 
2618
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
2619
 
                buf_block_t*    block;
2620
 
 
2621
 
                block = buf_block_align_instance(
2622
 
                        buf_pool_from_array(i), ptr);
2623
 
                if (block) {
2624
 
                        return(block);
2625
 
                }
2626
 
        }
2627
 
 
2628
 
        /* The block should always be found. */
2629
 
        ut_error;
2630
 
        return(NULL);
2631
 
}
2632
 
 
2633
 
/********************************************************************//**
2634
 
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
2635
 
the buf_block_t itself or a member of it. This functions checks one of
2636
 
the buffer pool instances.
2637
 
@return TRUE if ptr belongs to a buf_block_t struct */
2638
 
static
2639
 
ibool
2640
 
buf_pointer_is_block_field_instance(
2641
 
/*================================*/
2642
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
2643
 
        const void*     ptr)            /*!< in: pointer not dereferenced */
2644
 
{
2645
 
        const buf_chunk_t*              chunk   = buf_pool->chunks;
2646
 
        const buf_chunk_t* const        echunk  = chunk + buf_pool->n_chunks;
2647
 
 
2648
 
        /* TODO: protect buf_pool->chunks with a mutex (it will
2649
 
        currently remain constant after buf_pool_init()) */
2650
 
        while (chunk < echunk) {
2651
 
                if (ptr >= (void *)chunk->blocks
2652
 
                    && ptr < (void *)(chunk->blocks + chunk->size)) {
2653
 
 
2654
 
                        return(TRUE);
2655
 
                }
2656
 
 
2657
 
                chunk++;
2658
 
        }
2659
 
 
2660
 
        return(FALSE);
2661
 
}
2662
 
 
2663
 
/********************************************************************//**
2664
 
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
2665
 
the buf_block_t itself or a member of it
2666
 
@return TRUE if ptr belongs to a buf_block_t struct */
2667
 
UNIV_INTERN
2668
 
ibool
2669
 
buf_pointer_is_block_field(
2670
 
/*=======================*/
2671
 
        const void*     ptr)    /*!< in: pointer not dereferenced */
2672
 
{
2673
 
        ulint   i;
2674
 
 
2675
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
2676
 
                ibool   found;
2677
 
 
2678
 
                found = buf_pointer_is_block_field_instance(
2679
 
                        buf_pool_from_array(i), ptr);
2680
 
                if (found) {
2681
 
                        return(TRUE);
2682
 
                }
2683
 
        }
2684
 
 
2685
 
        return(FALSE);
2686
 
}
2687
 
 
2688
 
/********************************************************************//**
2689
 
Find out if a buffer block was created by buf_chunk_init().
2690
 
@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
2691
 
static
2692
 
ibool
2693
 
buf_block_is_uncompressed(
2694
 
/*======================*/
2695
 
        buf_pool_t*             buf_pool,       /*!< in: buffer pool instance */
2696
 
        const buf_block_t*      block)          /*!< in: pointer to block,
2697
 
                                                not dereferenced */
2698
 
{
2699
 
        ut_ad(buf_pool_mutex_own(buf_pool));
2700
 
 
2701
 
        if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
2702
 
                /* The pointer should be aligned. */
2703
 
                return(FALSE);
2704
 
        }
2705
 
 
2706
 
        return(buf_pointer_is_block_field_instance(buf_pool, (void *)block));
2707
 
}
2708
 
 
2709
 
/********************************************************************//**
2710
 
This is the general function used to get access to a database page.
2711
 
@return pointer to the block or NULL */
2712
 
UNIV_INTERN
2713
 
buf_block_t*
2714
 
buf_page_get_gen(
2715
 
/*=============*/
2716
 
        ulint           space,  /*!< in: space id */
2717
 
        ulint           zip_size,/*!< in: compressed page size in bytes
2718
 
                                or 0 for uncompressed pages */
2719
 
        ulint           offset, /*!< in: page number */
2720
 
        ulint           rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
2721
 
        buf_block_t*    guess,  /*!< in: guessed block or NULL */
2722
 
        ulint           mode,   /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
2723
 
                                BUF_GET_NO_LATCH, or
2724
 
                                BUF_GET_IF_IN_POOL_OR_WATCH */
2725
 
        const char*     file,   /*!< in: file name */
2726
 
        ulint           line,   /*!< in: line where called */
2727
 
        mtr_t*          mtr)    /*!< in: mini-transaction */
2728
 
{
2729
 
        buf_block_t*    block;
2730
 
        ulint           fold;
2731
 
        unsigned        access_time;
2732
 
        ulint           fix_type;
2733
 
        ibool           must_read;
2734
 
        ulint           retries = 0;
2735
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2736
 
 
2737
 
        ut_ad(mtr);
2738
 
        ut_ad(mtr->state == MTR_ACTIVE);
2739
 
        ut_ad((rw_latch == RW_S_LATCH)
2740
 
              || (rw_latch == RW_X_LATCH)
2741
 
              || (rw_latch == RW_NO_LATCH));
2742
 
        ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
2743
 
        ut_ad(mode == BUF_GET
2744
 
              || mode == BUF_GET_IF_IN_POOL
2745
 
              || mode == BUF_GET_NO_LATCH
2746
 
              || mode == BUF_GET_IF_IN_POOL_OR_WATCH);
2747
 
        ut_ad(zip_size == fil_space_get_zip_size(space));
2748
 
        ut_ad(ut_is_2pow(zip_size));
2749
 
#ifndef UNIV_LOG_DEBUG
2750
 
        ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
2751
 
#endif
2752
 
        buf_pool->stat.n_page_gets++;
2753
 
        fold = buf_page_address_fold(space, offset);
2754
 
loop:
2755
 
        block = guess;
2756
 
        buf_pool_mutex_enter(buf_pool);
2757
 
 
2758
 
        if (block) {
2759
 
                /* If the guess is a compressed page descriptor that
2760
 
                has been allocated by buf_buddy_alloc(), it may have
2761
 
                been invalidated by buf_buddy_relocate().  In that
2762
 
                case, block could point to something that happens to
2763
 
                contain the expected bits in block->page.  Similarly,
2764
 
                the guess may be pointing to a buffer pool chunk that
2765
 
                has been released when resizing the buffer pool. */
2766
 
 
2767
 
                if (!buf_block_is_uncompressed(buf_pool, block)
2768
 
                    || offset != block->page.offset
2769
 
                    || space != block->page.space
2770
 
                    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2771
 
 
2772
 
                        block = guess = NULL;
2773
 
                } else {
2774
 
                        ut_ad(!block->page.in_zip_hash);
2775
 
                        ut_ad(block->page.in_page_hash);
2776
 
                }
2777
 
        }
2778
 
 
2779
 
        if (block == NULL) {
2780
 
                block = (buf_block_t*) buf_page_hash_get_low(
2781
 
                        buf_pool, space, offset, fold);
2782
 
        }
2783
 
 
2784
 
loop2:
2785
 
        if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
2786
 
                block = NULL;
2787
 
        }
2788
 
 
2789
 
        if (block == NULL) {
2790
 
                /* Page not in buf_pool: needs to be read from file */
2791
 
 
2792
 
                if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2793
 
                        block = (buf_block_t*) buf_pool_watch_set(
2794
 
                                space, offset, fold);
2795
 
 
2796
 
                        if (UNIV_LIKELY_NULL(block)) {
2797
 
 
2798
 
                                goto got_block;
2799
 
                        }
2800
 
                }
2801
 
 
2802
 
                buf_pool_mutex_exit(buf_pool);
2803
 
 
2804
 
                if (mode == BUF_GET_IF_IN_POOL
2805
 
                    || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2806
 
 
2807
 
                        return(NULL);
2808
 
                }
2809
 
 
2810
 
                if (buf_read_page(space, zip_size, offset)) {
2811
 
                        retries = 0;
2812
 
                } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
2813
 
                        ++retries;
2814
 
                } else {
2815
 
                        fprintf(stderr, "InnoDB: Error: Unable"
2816
 
                                " to read tablespace %lu page no"
2817
 
                                " %lu into the buffer pool after"
2818
 
                                " %lu attempts\n"
2819
 
                                "InnoDB: The most probable cause"
2820
 
                                " of this error may be that the"
2821
 
                                " table has been corrupted.\n"
2822
 
                                "InnoDB: You can try to fix this"
2823
 
                                " problem by using"
2824
 
                                " innodb_force_recovery.\n"
2825
 
                                "InnoDB: Please see reference manual"
2826
 
                                " for more details.\n"
2827
 
                                "InnoDB: Aborting...\n",
2828
 
                                space, offset,
2829
 
                                BUF_PAGE_READ_MAX_RETRIES);
2830
 
 
2831
 
                        ut_error;
2832
 
                }
2833
 
 
2834
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2835
 
                ut_a(++buf_dbg_counter % 37 || buf_validate());
2836
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2837
 
                goto loop;
2838
 
        }
2839
 
 
2840
 
got_block:
2841
 
        ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
2842
 
 
2843
 
        must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
2844
 
 
2845
 
        if (must_read && mode == BUF_GET_IF_IN_POOL) {
2846
 
 
2847
 
                /* The page is being read to buffer pool,
2848
 
                but we cannot wait around for the read to
2849
 
                complete. */
2850
 
                buf_pool_mutex_exit(buf_pool);
2851
 
 
2852
 
                return(NULL);
2853
 
        }
2854
 
 
2855
 
        switch (buf_block_get_state(block)) {
2856
 
                buf_page_t*     bpage;
2857
 
                ibool           success;
2858
 
 
2859
 
        case BUF_BLOCK_FILE_PAGE:
2860
 
                break;
2861
 
 
2862
 
        case BUF_BLOCK_ZIP_PAGE:
2863
 
        case BUF_BLOCK_ZIP_DIRTY:
2864
 
                bpage = &block->page;
2865
 
                /* Protect bpage->buf_fix_count. */
2866
 
                mutex_enter(&buf_pool->zip_mutex);
2867
 
 
2868
 
                if (bpage->buf_fix_count
2869
 
                    || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
2870
 
                        /* This condition often occurs when the buffer
2871
 
                        is not buffer-fixed, but I/O-fixed by
2872
 
                        buf_page_init_for_read(). */
2873
 
                        mutex_exit(&buf_pool->zip_mutex);
2874
 
wait_until_unfixed:
2875
 
                        /* The block is buffer-fixed or I/O-fixed.
2876
 
                        Try again later. */
2877
 
                        buf_pool_mutex_exit(buf_pool);
2878
 
                        os_thread_sleep(WAIT_FOR_READ);
2879
 
  
2880
 
                        goto loop;
2881
 
                }
2882
 
 
2883
 
                /* Allocate an uncompressed page. */
2884
 
                buf_pool_mutex_exit(buf_pool);
2885
 
                mutex_exit(&buf_pool->zip_mutex);
2886
 
 
2887
 
                block = buf_LRU_get_free_block(buf_pool, 0);
2888
 
                ut_a(block);
2889
 
 
2890
 
                buf_pool_mutex_enter(buf_pool);
2891
 
                mutex_enter(&block->mutex);
2892
 
 
2893
 
                {
2894
 
                        buf_page_t*     hash_bpage;
2895
 
 
2896
 
                        hash_bpage = buf_page_hash_get_low(
2897
 
                                buf_pool, space, offset, fold);
2898
 
 
2899
 
                        if (UNIV_UNLIKELY(bpage != hash_bpage)) {
2900
 
                                /* The buf_pool->page_hash was modified
2901
 
                                while buf_pool->mutex was released.
2902
 
                                Free the block that was allocated. */
2903
 
 
2904
 
                                buf_LRU_block_free_non_file_page(block);
2905
 
                                mutex_exit(&block->mutex);
2906
 
 
2907
 
                                block = (buf_block_t*) hash_bpage;
2908
 
                                goto loop2;
2909
 
                        }
2910
 
                }
2911
 
 
2912
 
                if (UNIV_UNLIKELY
2913
 
                    (bpage->buf_fix_count
2914
 
                     || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
2915
 
 
2916
 
                        /* The block was buffer-fixed or I/O-fixed
2917
 
                        while buf_pool->mutex was not held by this thread.
2918
 
                        Free the block that was allocated and try again.
2919
 
                        This should be extremely unlikely. */
2920
 
 
2921
 
                        buf_LRU_block_free_non_file_page(block);
2922
 
                        mutex_exit(&block->mutex);
2923
 
 
2924
 
                        goto wait_until_unfixed;
2925
 
                }
2926
 
 
2927
 
                /* Move the compressed page from bpage to block,
2928
 
                and uncompress it. */
2929
 
 
2930
 
                mutex_enter(&buf_pool->zip_mutex);
2931
 
 
2932
 
                buf_relocate(bpage, &block->page);
2933
 
                buf_block_init_low(block);
2934
 
                block->lock_hash_val = lock_rec_hash(space, offset);
2935
 
 
2936
 
                UNIV_MEM_DESC(&block->page.zip.data,
2937
 
                              page_zip_get_size(&block->page.zip), block);
2938
 
 
2939
 
                if (buf_page_get_state(&block->page)
2940
 
                    == BUF_BLOCK_ZIP_PAGE) {
2941
 
                        UT_LIST_REMOVE(list, buf_pool->zip_clean,
2942
 
                                       &block->page);
2943
 
                        ut_ad(!block->page.in_flush_list);
2944
 
                } else {
2945
 
                        /* Relocate buf_pool->flush_list. */
2946
 
                        buf_flush_relocate_on_flush_list(bpage,
2947
 
                                                         &block->page);
2948
 
                }
2949
 
 
2950
 
                /* Buffer-fix, I/O-fix, and X-latch the block
2951
 
                for the duration of the decompression.
2952
 
                Also add the block to the unzip_LRU list. */
2953
 
                block->page.state = BUF_BLOCK_FILE_PAGE;
2954
 
 
2955
 
                /* Insert at the front of unzip_LRU list */
2956
 
                buf_unzip_LRU_add_block(block, FALSE);
2957
 
 
2958
 
                block->page.buf_fix_count = 1;
2959
 
                buf_block_set_io_fix(block, BUF_IO_READ);
2960
 
                rw_lock_x_lock_func(&block->lock, 0, file, line);
2961
 
 
2962
 
                UNIV_MEM_INVALID(bpage, sizeof *bpage);
2963
 
 
2964
 
                mutex_exit(&block->mutex);
2965
 
                mutex_exit(&buf_pool->zip_mutex);
2966
 
                buf_pool->n_pend_unzip++;
2967
 
 
2968
 
                buf_buddy_free(buf_pool, bpage, sizeof *bpage);
2969
 
 
2970
 
                buf_pool_mutex_exit(buf_pool);
2971
 
 
2972
 
                /* Decompress the page and apply buffered operations
2973
 
                while not holding buf_pool->mutex or block->mutex. */
2974
 
                success = buf_zip_decompress(block, srv_use_checksums);
2975
 
                ut_a(success);
2976
 
 
2977
 
                if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
2978
 
                        ibuf_merge_or_delete_for_page(block, space, offset,
2979
 
                                                      zip_size, TRUE);
2980
 
                }
2981
 
 
2982
 
                /* Unfix and unlatch the block. */
2983
 
                buf_pool_mutex_enter(buf_pool);
2984
 
                mutex_enter(&block->mutex);
2985
 
                block->page.buf_fix_count--;
2986
 
                buf_block_set_io_fix(block, BUF_IO_NONE);
2987
 
                mutex_exit(&block->mutex);
2988
 
                buf_pool->n_pend_unzip--;
2989
 
                rw_lock_x_unlock(&block->lock);
2990
 
 
2991
 
                break;
2992
 
 
2993
 
        case BUF_BLOCK_ZIP_FREE:
2994
 
        case BUF_BLOCK_NOT_USED:
2995
 
        case BUF_BLOCK_READY_FOR_USE:
2996
 
        case BUF_BLOCK_MEMORY:
2997
 
        case BUF_BLOCK_REMOVE_HASH:
2998
 
                ut_error;
2999
 
                break;
3000
 
        }
3001
 
 
3002
 
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3003
 
 
3004
 
        mutex_enter(&block->mutex);
3005
 
#if UNIV_WORD_SIZE == 4
3006
 
        /* On 32-bit systems, there is no padding in buf_page_t.  On
3007
 
        other systems, Valgrind could complain about uninitialized pad
3008
 
        bytes. */
3009
 
        UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
3010
 
#endif
3011
 
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
3012
 
        if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)
3013
 
            && ibuf_debug) {
3014
 
                /* Try to evict the block from the buffer pool, to use the
3015
 
                insert buffer (change buffer) as much as possible. */
3016
 
 
3017
 
                if (buf_LRU_free_block(&block->page, TRUE, NULL)
3018
 
                    == BUF_LRU_FREED) {
3019
 
                        mutex_exit(&block->mutex);
3020
 
                        if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
3021
 
                                /* Set the watch, as it would have
3022
 
                                been set if the page were not in the
3023
 
                                buffer pool in the first place. */
3024
 
                                block = (buf_block_t*) buf_pool_watch_set(
3025
 
                                        space, offset, fold);
3026
 
 
3027
 
                                if (UNIV_LIKELY_NULL(block)) {
3028
 
 
3029
 
                                        /* The page entered the buffer
3030
 
                                        pool for some reason. Try to
3031
 
                                        evict it again. */
3032
 
                                        goto got_block;
3033
 
                                }
3034
 
                        }
3035
 
                        buf_pool_mutex_exit(buf_pool);
3036
 
                        fprintf(stderr,
3037
 
                                "innodb_change_buffering_debug evict %u %u\n",
3038
 
                                (unsigned) space, (unsigned) offset);
3039
 
                        return(NULL);
3040
 
                } else if (buf_flush_page_try(buf_pool, block)) {
3041
 
                        fprintf(stderr,
3042
 
                                "innodb_change_buffering_debug flush %u %u\n",
3043
 
                                (unsigned) space, (unsigned) offset);
3044
 
                        guess = block;
3045
 
                        goto loop;
3046
 
                }
3047
 
 
3048
 
                /* Failed to evict the page; change it directly */
3049
 
        }
3050
 
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
3051
 
 
3052
 
        buf_block_buf_fix_inc(block, file, line);
3053
 
 
3054
 
        mutex_exit(&block->mutex);
3055
 
 
3056
 
        /* Check if this is the first access to the page */
3057
 
 
3058
 
        access_time = buf_page_is_accessed(&block->page);
3059
 
 
3060
 
        buf_pool_mutex_exit(buf_pool);
3061
 
 
3062
 
        buf_page_set_accessed_make_young(&block->page, access_time);
3063
 
 
3064
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3065
 
        ut_a(!block->page.file_page_was_freed);
3066
 
#endif
3067
 
 
3068
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3069
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
3070
 
        ut_a(block->page.buf_fix_count > 0);
3071
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3072
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3073
 
 
3074
 
        switch (rw_latch) {
3075
 
        case RW_NO_LATCH:
3076
 
                if (must_read) {
3077
 
                        /* Let us wait until the read operation
3078
 
                        completes */
3079
 
 
3080
 
                        for (;;) {
3081
 
                                enum buf_io_fix io_fix;
3082
 
 
3083
 
                                mutex_enter(&block->mutex);
3084
 
                                io_fix = buf_block_get_io_fix(block);
3085
 
                                mutex_exit(&block->mutex);
3086
 
 
3087
 
                                if (io_fix == BUF_IO_READ) {
3088
 
 
3089
 
                                        os_thread_sleep(WAIT_FOR_READ);
3090
 
                                } else {
3091
 
                                        break;
3092
 
                                }
3093
 
                        }
3094
 
                }
3095
 
 
3096
 
                fix_type = MTR_MEMO_BUF_FIX;
3097
 
                break;
3098
 
 
3099
 
        case RW_S_LATCH:
3100
 
                rw_lock_s_lock_func(&(block->lock), 0, file, line);
3101
 
 
3102
 
                fix_type = MTR_MEMO_PAGE_S_FIX;
3103
 
                break;
3104
 
 
3105
 
        default:
3106
 
                ut_ad(rw_latch == RW_X_LATCH);
3107
 
                rw_lock_x_lock_func(&(block->lock), 0, file, line);
3108
 
 
3109
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
3110
 
                break;
3111
 
        }
3112
 
 
3113
 
        mtr_memo_push(mtr, block, fix_type);
3114
 
 
3115
 
        if (!access_time) {
3116
 
                /* In the case of a first access, try to apply linear
3117
 
                read-ahead */
3118
 
 
3119
 
                buf_read_ahead_linear(space, zip_size, offset);
3120
 
        }
3121
 
 
3122
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3123
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
3124
 
                            buf_block_get_page_no(block)) == 0);
3125
 
#endif
3126
 
        return(block);
3127
 
}
3128
 
 
3129
 
/********************************************************************//**
3130
 
This is the general function used to get optimistic access to a database
3131
 
page.
3132
 
@return TRUE if success */
3133
 
UNIV_INTERN
3134
 
ibool
3135
 
buf_page_optimistic_get(
3136
 
/*====================*/
3137
 
        ulint           rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
3138
 
        buf_block_t*    block,  /*!< in: guessed buffer block */
3139
 
        ib_uint64_t     modify_clock,/*!< in: modify clock value if mode is
3140
 
                                ..._GUESS_ON_CLOCK */
3141
 
        const char*     file,   /*!< in: file name */
3142
 
        ulint           line,   /*!< in: line where called */
3143
 
        mtr_t*          mtr)    /*!< in: mini-transaction */
3144
 
{
3145
 
        buf_pool_t*     buf_pool;
3146
 
        unsigned        access_time;
3147
 
        ibool           success;
3148
 
        ulint           fix_type;
3149
 
 
3150
 
        ut_ad(block);
3151
 
        ut_ad(mtr);
3152
 
        ut_ad(mtr->state == MTR_ACTIVE);
3153
 
        ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3154
 
 
3155
 
        mutex_enter(&block->mutex);
3156
 
 
3157
 
        if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
3158
 
 
3159
 
                mutex_exit(&block->mutex);
3160
 
 
3161
 
                return(FALSE);
3162
 
        }
3163
 
 
3164
 
        buf_block_buf_fix_inc(block, file, line);
3165
 
 
3166
 
        mutex_exit(&block->mutex);
3167
 
 
3168
 
        /* Check if this is the first access to the page.
3169
 
        We do a dirty read on purpose, to avoid mutex contention.
3170
 
        This field is only used for heuristic purposes; it does not
3171
 
        affect correctness. */
3172
 
 
3173
 
        access_time = buf_page_is_accessed(&block->page);
3174
 
        buf_page_set_accessed_make_young(&block->page, access_time);
3175
 
 
3176
 
        ut_ad(!ibuf_inside()
3177
 
              || ibuf_page(buf_block_get_space(block),
3178
 
                           buf_block_get_zip_size(block),
3179
 
                           buf_block_get_page_no(block), NULL));
3180
 
 
3181
 
        if (rw_latch == RW_S_LATCH) {
3182
 
                success = rw_lock_s_lock_nowait(&(block->lock),
3183
 
                                                file, line);
3184
 
                fix_type = MTR_MEMO_PAGE_S_FIX;
3185
 
        } else {
3186
 
                success = rw_lock_x_lock_func_nowait(&(block->lock),
3187
 
                                                     file, line);
3188
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
3189
 
        }
3190
 
 
3191
 
        if (UNIV_UNLIKELY(!success)) {
3192
 
                mutex_enter(&block->mutex);
3193
 
                buf_block_buf_fix_dec(block);
3194
 
                mutex_exit(&block->mutex);
3195
 
 
3196
 
                return(FALSE);
3197
 
        }
3198
 
 
3199
 
        if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
3200
 
                buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3201
 
 
3202
 
                if (rw_latch == RW_S_LATCH) {
3203
 
                        rw_lock_s_unlock(&(block->lock));
3204
 
                } else {
3205
 
                        rw_lock_x_unlock(&(block->lock));
3206
 
                }
3207
 
 
3208
 
                mutex_enter(&block->mutex);
3209
 
                buf_block_buf_fix_dec(block);
3210
 
                mutex_exit(&block->mutex);
3211
 
 
3212
 
                return(FALSE);
3213
 
        }
3214
 
 
3215
 
        mtr_memo_push(mtr, block, fix_type);
3216
 
 
3217
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3218
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
3219
 
        ut_a(block->page.buf_fix_count > 0);
3220
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3221
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3222
 
 
3223
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3224
 
        ut_a(block->page.file_page_was_freed == FALSE);
3225
 
#endif
3226
 
        if (UNIV_UNLIKELY(!access_time)) {
3227
 
                /* In the case of a first access, try to apply linear
3228
 
                read-ahead */
3229
 
 
3230
 
                buf_read_ahead_linear(buf_block_get_space(block),
3231
 
                                      buf_block_get_zip_size(block),
3232
 
                                      buf_block_get_page_no(block));
3233
 
        }
3234
 
 
3235
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3236
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
3237
 
                            buf_block_get_page_no(block)) == 0);
3238
 
#endif
3239
 
        buf_pool = buf_pool_from_block(block);
3240
 
        buf_pool->stat.n_page_gets++;
3241
 
 
3242
 
        return(TRUE);
3243
 
}
3244
 
 
3245
 
/********************************************************************//**
3246
 
This is used to get access to a known database page, when no waiting can be
3247
 
done. For example, if a search in an adaptive hash index leads us to this
3248
 
frame.
3249
 
@return TRUE if success */
3250
 
UNIV_INTERN
3251
 
ibool
3252
 
buf_page_get_known_nowait(
3253
 
/*======================*/
3254
 
        ulint           rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
3255
 
        buf_block_t*    block,  /*!< in: the known page */
3256
 
        ulint           mode,   /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
3257
 
        const char*     file,   /*!< in: file name */
3258
 
        ulint           line,   /*!< in: line where called */
3259
 
        mtr_t*          mtr)    /*!< in: mini-transaction */
3260
 
{
3261
 
        buf_pool_t*     buf_pool;
3262
 
        ibool           success;
3263
 
        ulint           fix_type;
3264
 
 
3265
 
        ut_ad(mtr);
3266
 
        ut_ad(mtr->state == MTR_ACTIVE);
3267
 
        ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3268
 
 
3269
 
        mutex_enter(&block->mutex);
3270
 
 
3271
 
        if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
3272
 
                /* Another thread is just freeing the block from the LRU list
3273
 
                of the buffer pool: do not try to access this page; this
3274
 
                attempt to access the page can only come through the hash
3275
 
                index because when the buffer block state is ..._REMOVE_HASH,
3276
 
                we have already removed it from the page address hash table
3277
 
                of the buffer pool. */
3278
 
 
3279
 
                mutex_exit(&block->mutex);
3280
 
 
3281
 
                return(FALSE);
3282
 
        }
3283
 
 
3284
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3285
 
 
3286
 
        buf_block_buf_fix_inc(block, file, line);
3287
 
 
3288
 
        mutex_exit(&block->mutex);
3289
 
 
3290
 
        buf_pool = buf_pool_from_block(block);
3291
 
 
3292
 
        if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
3293
 
                buf_pool_mutex_enter(buf_pool);
3294
 
                buf_LRU_make_block_young(&block->page);
3295
 
                buf_pool_mutex_exit(buf_pool);
3296
 
        } else if (!buf_page_is_accessed(&block->page)) {
3297
 
                /* Above, we do a dirty read on purpose, to avoid
3298
 
                mutex contention.  The field buf_page_t::access_time
3299
 
                is only used for heuristic purposes.  Writes to the
3300
 
                field must be protected by mutex, however. */
3301
 
                ulint   time_ms = ut_time_ms();
3302
 
 
3303
 
                buf_pool_mutex_enter(buf_pool);
3304
 
                buf_page_set_accessed(&block->page, time_ms);
3305
 
                buf_pool_mutex_exit(buf_pool);
3306
 
        }
3307
 
 
3308
 
        ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
3309
 
 
3310
 
        if (rw_latch == RW_S_LATCH) {
3311
 
                success = rw_lock_s_lock_nowait(&(block->lock),
3312
 
                                                file, line);
3313
 
                fix_type = MTR_MEMO_PAGE_S_FIX;
3314
 
        } else {
3315
 
                success = rw_lock_x_lock_func_nowait(&(block->lock),
3316
 
                                                     file, line);
3317
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
3318
 
        }
3319
 
 
3320
 
        if (!success) {
3321
 
                mutex_enter(&block->mutex);
3322
 
                buf_block_buf_fix_dec(block);
3323
 
                mutex_exit(&block->mutex);
3324
 
 
3325
 
                return(FALSE);
3326
 
        }
3327
 
 
3328
 
        mtr_memo_push(mtr, block, fix_type);
3329
 
 
3330
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3331
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
3332
 
        ut_a(block->page.buf_fix_count > 0);
3333
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3334
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3335
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3336
 
        ut_a(block->page.file_page_was_freed == FALSE);
3337
 
#endif
3338
 
 
3339
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3340
 
        ut_a((mode == BUF_KEEP_OLD)
3341
 
             || (ibuf_count_get(buf_block_get_space(block),
3342
 
                                buf_block_get_page_no(block)) == 0));
3343
 
#endif
3344
 
        buf_pool->stat.n_page_gets++;
3345
 
 
3346
 
        return(TRUE);
3347
 
}
3348
 
 
3349
 
/*******************************************************************//**
3350
 
Given a tablespace id and page number tries to get that page. If the
3351
 
page is not in the buffer pool it is not loaded and NULL is returned.
3352
 
Suitable for using when holding the kernel mutex.
3353
 
@return pointer to a page or NULL */
3354
 
UNIV_INTERN
3355
 
const buf_block_t*
3356
 
buf_page_try_get_func(
3357
 
/*==================*/
3358
 
        ulint           space_id,/*!< in: tablespace id */
3359
 
        ulint           page_no,/*!< in: page number */
3360
 
        const char*     file,   /*!< in: file name */
3361
 
        ulint           line,   /*!< in: line where called */
3362
 
        mtr_t*          mtr)    /*!< in: mini-transaction */
3363
 
{
3364
 
        buf_block_t*    block;
3365
 
        ibool           success;
3366
 
        ulint           fix_type;
3367
 
        buf_pool_t*     buf_pool = buf_pool_get(space_id, page_no);
3368
 
 
3369
 
        ut_ad(mtr);
3370
 
        ut_ad(mtr->state == MTR_ACTIVE);
3371
 
 
3372
 
        buf_pool_mutex_enter(buf_pool);
3373
 
        block = buf_block_hash_get(buf_pool, space_id, page_no);
3374
 
 
3375
 
        if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
3376
 
                buf_pool_mutex_exit(buf_pool);
3377
 
                return(NULL);
3378
 
        }
3379
 
 
3380
 
        ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
3381
 
 
3382
 
        mutex_enter(&block->mutex);
3383
 
        buf_pool_mutex_exit(buf_pool);
3384
 
 
3385
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3386
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3387
 
        ut_a(buf_block_get_space(block) == space_id);
3388
 
        ut_a(buf_block_get_page_no(block) == page_no);
3389
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3390
 
 
3391
 
        buf_block_buf_fix_inc(block, file, line);
3392
 
        mutex_exit(&block->mutex);
3393
 
 
3394
 
        fix_type = MTR_MEMO_PAGE_S_FIX;
3395
 
        success = rw_lock_s_lock_nowait(&block->lock, file, line);
3396
 
 
3397
 
        if (!success) {
3398
 
                /* Let us try to get an X-latch. If the current thread
3399
 
                is holding an X-latch on the page, we cannot get an
3400
 
                S-latch. */
3401
 
 
3402
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
3403
 
                success = rw_lock_x_lock_func_nowait(&block->lock,
3404
 
                                                     file, line);
3405
 
        }
3406
 
 
3407
 
        if (!success) {
3408
 
                mutex_enter(&block->mutex);
3409
 
                buf_block_buf_fix_dec(block);
3410
 
                mutex_exit(&block->mutex);
3411
 
 
3412
 
                return(NULL);
3413
 
        }
3414
 
 
3415
 
        mtr_memo_push(mtr, block, fix_type);
3416
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3417
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
3418
 
        ut_a(block->page.buf_fix_count > 0);
3419
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3420
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3421
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3422
 
        ut_a(block->page.file_page_was_freed == FALSE);
3423
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3424
 
        buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3425
 
 
3426
 
        buf_pool->stat.n_page_gets++;
3427
 
 
3428
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3429
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
3430
 
                            buf_block_get_page_no(block)) == 0);
3431
 
#endif
3432
 
 
3433
 
        return(block);
3434
 
}
3435
 
 
3436
 
/********************************************************************//**
3437
 
Initialize some fields of a control block. */
3438
 
UNIV_INLINE
3439
 
void
3440
 
buf_page_init_low(
3441
 
/*==============*/
3442
 
        buf_page_t*     bpage)  /*!< in: block to init */
3443
 
{
3444
 
        bpage->flush_type = BUF_FLUSH_LRU;
3445
 
        bpage->io_fix = BUF_IO_NONE;
3446
 
        bpage->buf_fix_count = 0;
3447
 
        bpage->freed_page_clock = 0;
3448
 
        bpage->access_time = 0;
3449
 
        bpage->newest_modification = 0;
3450
 
        bpage->oldest_modification = 0;
3451
 
        HASH_INVALIDATE(bpage, hash);
3452
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3453
 
        bpage->file_page_was_freed = FALSE;
3454
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3455
 
}
3456
 
 
3457
 
/********************************************************************//**
3458
 
Inits a page to the buffer buf_pool. */
3459
 
static
3460
 
void
3461
 
buf_page_init(
3462
 
/*==========*/
3463
 
        ulint           space,  /*!< in: space id */
3464
 
        ulint           offset, /*!< in: offset of the page within space
3465
 
                                in units of a page */
3466
 
        ulint           fold,   /*!< in: buf_page_address_fold(space,offset) */
3467
 
        buf_block_t*    block)  /*!< in: block to init */
3468
 
{
3469
 
        buf_page_t*     hash_page;
3470
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
3471
 
 
3472
 
        ut_ad(buf_pool_mutex_own(buf_pool));
3473
 
        ut_ad(mutex_own(&(block->mutex)));
3474
 
        ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3475
 
 
3476
 
        /* Set the state of the block */
3477
 
        buf_block_set_file_page(block, space, offset);
3478
 
 
3479
 
#ifdef UNIV_DEBUG_VALGRIND
3480
 
        if (!space) {
3481
 
                /* Silence valid Valgrind warnings about uninitialized
3482
 
                data being written to data files.  There are some unused
3483
 
                bytes on some pages that InnoDB does not initialize. */
3484
 
                UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
3485
 
        }
3486
 
#endif /* UNIV_DEBUG_VALGRIND */
3487
 
 
3488
 
        buf_block_init_low(block);
3489
 
 
3490
 
        block->lock_hash_val = lock_rec_hash(space, offset);
3491
 
 
3492
 
        buf_page_init_low(&block->page);
3493
 
 
3494
 
        /* Insert into the hash table of file pages */
3495
 
 
3496
 
        hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3497
 
 
3498
 
        if (UNIV_LIKELY(!hash_page)) {
3499
 
        } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
3500
 
                /* Preserve the reference count. */
3501
 
                ulint   buf_fix_count = hash_page->buf_fix_count;
3502
 
 
3503
 
                ut_a(buf_fix_count > 0);
3504
 
                block->page.buf_fix_count += buf_fix_count;
3505
 
                buf_pool_watch_remove(buf_pool, fold, hash_page);
3506
 
        } else {
3507
 
                fprintf(stderr,
3508
 
                        "InnoDB: Error: page %lu %lu already found"
3509
 
                        " in the hash table: %p, %p\n",
3510
 
                        (ulong) space,
3511
 
                        (ulong) offset,
3512
 
                        (const void*) hash_page, (const void*) block);
3513
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3514
 
                mutex_exit(&block->mutex);
3515
 
                buf_pool_mutex_exit(buf_pool);
3516
 
                buf_print();
3517
 
                buf_LRU_print();
3518
 
                buf_validate();
3519
 
                buf_LRU_validate();
3520
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3521
 
                ut_error;
3522
 
        }
3523
 
 
3524
 
        ut_ad(!block->page.in_zip_hash);
3525
 
        ut_ad(!block->page.in_page_hash);
3526
 
        ut_d(block->page.in_page_hash = TRUE);
3527
 
        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
3528
 
                    fold, &block->page);
3529
 
}
3530
 
 
3531
 
/********************************************************************//**
3532
 
Function which inits a page for read to the buffer buf_pool. If the page is
3533
 
(1) already in buf_pool, or
3534
 
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
3535
 
(3) if the space is deleted or being deleted,
3536
 
then this function does nothing.
3537
 
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
3538
 
on the buffer frame. The io-handler must take care that the flag is cleared
3539
 
and the lock released later.
3540
 
@return pointer to the block or NULL */
3541
 
UNIV_INTERN
3542
 
buf_page_t*
3543
 
buf_page_init_for_read(
3544
 
/*===================*/
3545
 
        ulint*          err,    /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
3546
 
        ulint           mode,   /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
3547
 
        ulint           space,  /*!< in: space id */
3548
 
        ulint           zip_size,/*!< in: compressed page size, or 0 */
3549
 
        ibool           unzip,  /*!< in: TRUE=request uncompressed page */
3550
 
        ib_int64_t      tablespace_version,
3551
 
                                /*!< in: prevents reading from a wrong
3552
 
                                version of the tablespace in case we have done
3553
 
                                DISCARD + IMPORT */
3554
 
        ulint           offset) /*!< in: page number */
3555
 
{
3556
 
        buf_block_t*    block;
3557
 
        buf_page_t*     bpage   = NULL;
3558
 
        buf_page_t*     watch_page;
3559
 
        mtr_t           mtr;
3560
 
        ulint           fold;
3561
 
        ibool           lru     = FALSE;
3562
 
        void*           data;
3563
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
3564
 
 
3565
 
        ut_ad(buf_pool);
3566
 
 
3567
 
        *err = DB_SUCCESS;
3568
 
 
3569
 
        if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3570
 
                /* It is a read-ahead within an ibuf routine */
3571
 
 
3572
 
                ut_ad(!ibuf_bitmap_page(zip_size, offset));
3573
 
                ut_ad(ibuf_inside());
3574
 
 
3575
 
                mtr_start(&mtr);
3576
 
 
3577
 
                if (!recv_no_ibuf_operations
3578
 
                    && !ibuf_page(space, zip_size, offset, &mtr)) {
3579
 
 
3580
 
                        mtr_commit(&mtr);
3581
 
 
3582
 
                        return(NULL);
3583
 
                }
3584
 
        } else {
3585
 
                ut_ad(mode == BUF_READ_ANY_PAGE);
3586
 
        }
3587
 
 
3588
 
        if (zip_size && UNIV_LIKELY(!unzip)
3589
 
            && UNIV_LIKELY(!recv_recovery_is_on())) {
3590
 
                block = NULL;
3591
 
        } else {
3592
 
                block = buf_LRU_get_free_block(buf_pool, 0);
3593
 
                ut_ad(block);
3594
 
                ut_ad(buf_pool_from_block(block) == buf_pool);
3595
 
        }
3596
 
 
3597
 
        fold = buf_page_address_fold(space, offset);
3598
 
 
3599
 
        buf_pool_mutex_enter(buf_pool);
3600
 
 
3601
 
        watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3602
 
        if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
3603
 
                /* The page is already in the buffer pool. */
3604
 
                watch_page = NULL;
3605
 
err_exit:
3606
 
                if (block) {
3607
 
                        mutex_enter(&block->mutex);
3608
 
                        buf_LRU_block_free_non_file_page(block);
3609
 
                        mutex_exit(&block->mutex);
3610
 
                }
3611
 
 
3612
 
                bpage = NULL;
3613
 
                goto func_exit;
3614
 
        }
3615
 
 
3616
 
        if (fil_tablespace_deleted_or_being_deleted_in_mem(
3617
 
                    space, tablespace_version)) {
3618
 
                /* The page belongs to a space which has been
3619
 
                deleted or is being deleted. */
3620
 
                *err = DB_TABLESPACE_DELETED;
3621
 
 
3622
 
                goto err_exit;
3623
 
        }
3624
 
 
3625
 
        if (block) {
3626
 
                bpage = &block->page;
3627
 
                mutex_enter(&block->mutex);
3628
 
 
3629
 
                ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
3630
 
 
3631
 
                buf_page_init(space, offset, fold, block);
3632
 
 
3633
 
                /* The block must be put to the LRU list, to the old blocks */
3634
 
                buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3635
 
 
3636
 
                /* We set a pass-type x-lock on the frame because then
3637
 
                the same thread which called for the read operation
3638
 
                (and is running now at this point of code) can wait
3639
 
                for the read to complete by waiting for the x-lock on
3640
 
                the frame; if the x-lock were recursive, the same
3641
 
                thread would illegally get the x-lock before the page
3642
 
                read is completed.  The x-lock is cleared by the
3643
 
                io-handler thread. */
3644
 
 
3645
 
                rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
3646
 
                buf_page_set_io_fix(bpage, BUF_IO_READ);
3647
 
 
3648
 
                if (UNIV_UNLIKELY(zip_size)) {
3649
 
                        page_zip_set_size(&block->page.zip, zip_size);
3650
 
 
3651
 
                        /* buf_pool->mutex may be released and
3652
 
                        reacquired by buf_buddy_alloc().  Thus, we
3653
 
                        must release block->mutex in order not to
3654
 
                        break the latching order in the reacquisition
3655
 
                        of buf_pool->mutex.  We also must defer this
3656
 
                        operation until after the block descriptor has
3657
 
                        been added to buf_pool->LRU and
3658
 
                        buf_pool->page_hash. */
3659
 
                        mutex_exit(&block->mutex);
3660
 
                        data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3661
 
                        mutex_enter(&block->mutex);
3662
 
                        block->page.zip.data = static_cast<unsigned char *>(data);
3663
 
 
3664
 
                        /* To maintain the invariant
3665
 
                        block->in_unzip_LRU_list
3666
 
                        == buf_page_belongs_to_unzip_LRU(&block->page)
3667
 
                        we have to add this block to unzip_LRU
3668
 
                        after block->page.zip.data is set. */
3669
 
                        ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
3670
 
                        buf_unzip_LRU_add_block(block, TRUE);
3671
 
                }
3672
 
 
3673
 
                mutex_exit(&block->mutex);
3674
 
        } else {
3675
 
                /* Defer buf_buddy_alloc() until after the block has
3676
 
                been found not to exist.  The buf_buddy_alloc() and
3677
 
                buf_buddy_free() calls may be expensive because of
3678
 
                buf_buddy_relocate(). */
3679
 
 
3680
 
                /* The compressed page must be allocated before the
3681
 
                control block (bpage), in order to avoid the
3682
 
                invocation of buf_buddy_relocate_block() on
3683
 
                uninitialized data. */
3684
 
                data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3685
 
                bpage = static_cast<buf_page_struct *>(buf_buddy_alloc(buf_pool, sizeof *bpage, &lru));
3686
 
 
3687
 
                /* Initialize the buf_pool pointer. */
3688
 
                bpage->buf_pool_index = buf_pool_index(buf_pool);
3689
 
 
3690
 
                /* If buf_buddy_alloc() allocated storage from the LRU list,
3691
 
                it released and reacquired buf_pool->mutex.  Thus, we must
3692
 
                check the page_hash again, as it may have been modified. */
3693
 
                if (UNIV_UNLIKELY(lru)) {
3694
 
 
3695
 
                        watch_page = buf_page_hash_get_low(
3696
 
                                buf_pool, space, offset, fold);
3697
 
 
3698
 
                        if (watch_page
3699
 
                            && !buf_pool_watch_is_sentinel(buf_pool,
3700
 
                                                           watch_page)) {
3701
 
 
3702
 
                                /* The block was added by some other thread. */
3703
 
                                watch_page = NULL;
3704
 
                                buf_buddy_free(buf_pool, bpage, sizeof *bpage);
3705
 
                                buf_buddy_free(buf_pool, data, zip_size);
3706
 
 
3707
 
                                bpage = NULL;
3708
 
                                goto func_exit;
3709
 
                        }
3710
 
                }
3711
 
 
3712
 
                page_zip_des_init(&bpage->zip);
3713
 
                page_zip_set_size(&bpage->zip, zip_size);
3714
 
                bpage->zip.data = static_cast<unsigned char *>(data);
3715
 
 
3716
 
                mutex_enter(&buf_pool->zip_mutex);
3717
 
                UNIV_MEM_DESC(bpage->zip.data,
3718
 
                              page_zip_get_size(&bpage->zip), bpage);
3719
 
 
3720
 
                buf_page_init_low(bpage);
3721
 
 
3722
 
                bpage->state    = BUF_BLOCK_ZIP_PAGE;
3723
 
                bpage->space    = space;
3724
 
                bpage->offset   = offset;
3725
 
 
3726
 
 
3727
 
#ifdef UNIV_DEBUG
3728
 
                bpage->in_page_hash = FALSE;
3729
 
                bpage->in_zip_hash = FALSE;
3730
 
                bpage->in_flush_list = FALSE;
3731
 
                bpage->in_free_list = FALSE;
3732
 
                bpage->in_LRU_list = FALSE;
3733
 
#endif /* UNIV_DEBUG */
3734
 
 
3735
 
                ut_d(bpage->in_page_hash = TRUE);
3736
 
 
3737
 
                if (UNIV_LIKELY_NULL(watch_page)) {
3738
 
                        /* Preserve the reference count. */
3739
 
                        ulint   buf_fix_count = watch_page->buf_fix_count;
3740
 
                        ut_a(buf_fix_count > 0);
3741
 
                        bpage->buf_fix_count += buf_fix_count;
3742
 
                        ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
3743
 
                        buf_pool_watch_remove(buf_pool, fold, watch_page);
3744
 
                }
3745
 
 
3746
 
                HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
3747
 
                            bpage);
3748
 
 
3749
 
                /* The block must be put to the LRU list, to the old blocks */
3750
 
                buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3751
 
                buf_LRU_insert_zip_clean(bpage);
3752
 
 
3753
 
                buf_page_set_io_fix(bpage, BUF_IO_READ);
3754
 
 
3755
 
                mutex_exit(&buf_pool->zip_mutex);
3756
 
        }
3757
 
 
3758
 
        buf_pool->n_pend_reads++;
3759
 
func_exit:
3760
 
        buf_pool_mutex_exit(buf_pool);
3761
 
 
3762
 
        if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3763
 
 
3764
 
                mtr_commit(&mtr);
3765
 
        }
3766
 
 
3767
 
        ut_ad(!bpage || buf_page_in_file(bpage));
3768
 
        return(bpage);
3769
 
}
3770
 
 
3771
 
/********************************************************************//**
3772
 
Initializes a page to the buffer buf_pool. The page is usually not read
3773
 
from a file even if it cannot be found in the buffer buf_pool. This is one
3774
 
of the functions which perform to a block a state transition NOT_USED =>
3775
 
FILE_PAGE (the other is buf_page_get_gen).
3776
 
@return pointer to the block, page bufferfixed */
3777
 
UNIV_INTERN
3778
 
buf_block_t*
3779
 
buf_page_create(
3780
 
/*============*/
3781
 
        ulint   space,  /*!< in: space id */
3782
 
        ulint   offset, /*!< in: offset of the page within space in units of
3783
 
                        a page */
3784
 
        ulint   zip_size,/*!< in: compressed page size, or 0 */
3785
 
        mtr_t*  mtr)    /*!< in: mini-transaction handle */
3786
 
{
3787
 
        buf_frame_t*    frame;
3788
 
        buf_block_t*    block;
3789
 
        ulint           fold;
3790
 
        buf_block_t*    free_block      = NULL;
3791
 
        ulint           time_ms         = ut_time_ms();
3792
 
        buf_pool_t*     buf_pool        = buf_pool_get(space, offset);
3793
 
 
3794
 
        ut_ad(mtr);
3795
 
        ut_ad(mtr->state == MTR_ACTIVE);
3796
 
        ut_ad(space || !zip_size);
3797
 
 
3798
 
        free_block = buf_LRU_get_free_block(buf_pool, 0);
3799
 
 
3800
 
        fold = buf_page_address_fold(space, offset);
3801
 
 
3802
 
        buf_pool_mutex_enter(buf_pool);
3803
 
 
3804
 
        block = (buf_block_t*) buf_page_hash_get_low(
3805
 
                buf_pool, space, offset, fold);
3806
 
 
3807
 
        if (block
3808
 
            && buf_page_in_file(&block->page)
3809
 
            && !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
3810
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3811
 
                ut_a(ibuf_count_get(space, offset) == 0);
3812
 
#endif
3813
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3814
 
                block->page.file_page_was_freed = FALSE;
3815
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3816
 
 
3817
 
                /* Page can be found in buf_pool */
3818
 
                buf_pool_mutex_exit(buf_pool);
3819
 
 
3820
 
                buf_block_free(free_block);
3821
 
 
3822
 
                return(buf_page_get_with_no_latch(space, zip_size,
3823
 
                                                  offset, mtr));
3824
 
        }
3825
 
 
3826
 
        /* If we get here, the page was not in buf_pool: init it there */
3827
 
 
3828
 
#ifdef UNIV_DEBUG
3829
 
        if (buf_debug_prints) {
3830
 
                fprintf(stderr, "Creating space %lu page %lu to buffer\n",
3831
 
                        (ulong) space, (ulong) offset);
3832
 
        }
3833
 
#endif /* UNIV_DEBUG */
3834
 
 
3835
 
        block = free_block;
3836
 
 
3837
 
        mutex_enter(&block->mutex);
3838
 
 
3839
 
        buf_page_init(space, offset, fold, block);
3840
 
 
3841
 
        /* The block must be put to the LRU list */
3842
 
        buf_LRU_add_block(&block->page, FALSE);
3843
 
 
3844
 
        buf_block_buf_fix_inc(block, __FILE__, __LINE__);
3845
 
        buf_pool->stat.n_pages_created++;
3846
 
 
3847
 
        if (zip_size) {
3848
 
                void*   data;
3849
 
                ibool   lru;
3850
 
 
3851
 
                /* Prevent race conditions during buf_buddy_alloc(),
3852
 
                which may release and reacquire buf_pool->mutex,
3853
 
                by IO-fixing and X-latching the block. */
3854
 
 
3855
 
                buf_page_set_io_fix(&block->page, BUF_IO_READ);
3856
 
                rw_lock_x_lock(&block->lock);
3857
 
 
3858
 
                page_zip_set_size(&block->page.zip, zip_size);
3859
 
                mutex_exit(&block->mutex);
3860
 
                /* buf_pool->mutex may be released and reacquired by
3861
 
                buf_buddy_alloc().  Thus, we must release block->mutex
3862
 
                in order not to break the latching order in
3863
 
                the reacquisition of buf_pool->mutex.  We also must
3864
 
                defer this operation until after the block descriptor
3865
 
                has been added to buf_pool->LRU and buf_pool->page_hash. */
3866
 
                data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3867
 
                mutex_enter(&block->mutex);
3868
 
                block->page.zip.data = static_cast<unsigned char *>(data);
3869
 
 
3870
 
                /* To maintain the invariant
3871
 
                block->in_unzip_LRU_list
3872
 
                == buf_page_belongs_to_unzip_LRU(&block->page)
3873
 
                we have to add this block to unzip_LRU after
3874
 
                block->page.zip.data is set. */
3875
 
                ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
3876
 
                buf_unzip_LRU_add_block(block, FALSE);
3877
 
 
3878
 
                buf_page_set_io_fix(&block->page, BUF_IO_NONE);
3879
 
                rw_lock_x_unlock(&block->lock);
3880
 
        }
3881
 
 
3882
 
        buf_page_set_accessed(&block->page, time_ms);
3883
 
 
3884
 
        buf_pool_mutex_exit(buf_pool);
3885
 
 
3886
 
        mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
3887
 
 
3888
 
        mutex_exit(&block->mutex);
3889
 
 
3890
 
        /* Delete possible entries for the page from the insert buffer:
3891
 
        such can exist if the page belonged to an index which was dropped */
3892
 
 
3893
 
        ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
3894
 
 
3895
 
        /* Flush pages from the end of the LRU list if necessary */
3896
 
        buf_flush_free_margin(buf_pool);
3897
 
 
3898
 
        frame = block->frame;
3899
 
 
3900
 
        memset(frame + FIL_PAGE_PREV, 0xff, 4);
3901
 
        memset(frame + FIL_PAGE_NEXT, 0xff, 4);
3902
 
        mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
3903
 
 
3904
 
        /* Reset to zero the file flush lsn field in the page; if the first
3905
 
        page of an ibdata file is 'created' in this function into the buffer
3906
 
        pool then we lose the original contents of the file flush lsn stamp.
3907
 
        Then InnoDB could in a crash recovery print a big, false, corruption
3908
 
        warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
3909
 
 
3910
 
        memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
3911
 
 
3912
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3913
 
        ut_a(++buf_dbg_counter % 357 || buf_validate());
3914
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3915
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3916
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
3917
 
                            buf_block_get_page_no(block)) == 0);
3918
 
#endif
3919
 
        return(block);
3920
 
}
3921
 
 
3922
 
/********************************************************************//**
3923
 
Completes an asynchronous read or write request of a file page to or from
3924
 
the buffer pool. */
3925
 
UNIV_INTERN
3926
 
void
3927
 
buf_page_io_complete(
3928
 
/*=================*/
3929
 
        buf_page_t*     bpage)  /*!< in: pointer to the block in question */
3930
 
{
3931
 
        enum buf_io_fix io_type;
3932
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3933
 
        const ibool     uncompressed = (buf_page_get_state(bpage)
3934
 
                                        == BUF_BLOCK_FILE_PAGE);
3935
 
 
3936
 
        ut_a(buf_page_in_file(bpage));
3937
 
 
3938
 
        /* We do not need protect io_fix here by mutex to read
3939
 
        it because this is the only function where we can change the value
3940
 
        from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
3941
 
        ensures that this is the only thread that handles the i/o for this
3942
 
        block. */
3943
 
 
3944
 
        io_type = buf_page_get_io_fix(bpage);
3945
 
        ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
3946
 
 
3947
 
        if (io_type == BUF_IO_READ) {
3948
 
                ulint   read_page_no;
3949
 
                ulint   read_space_id;
3950
 
                byte*   frame;
3951
 
 
3952
 
                if (buf_page_get_zip_size(bpage)) {
3953
 
                        frame = bpage->zip.data;
3954
 
                        buf_pool->n_pend_unzip++;
3955
 
                        if (uncompressed
3956
 
                            && !buf_zip_decompress((buf_block_t*) bpage,
3957
 
                                                   FALSE)) {
3958
 
 
3959
 
                                buf_pool->n_pend_unzip--;
3960
 
                                goto corrupt;
3961
 
                        }
3962
 
                        buf_pool->n_pend_unzip--;
3963
 
                } else {
3964
 
                        ut_a(uncompressed);
3965
 
                        frame = ((buf_block_t*) bpage)->frame;
3966
 
                }
3967
 
 
3968
 
                /* If this page is not uninitialized and not in the
3969
 
                doublewrite buffer, then the page number and space id
3970
 
                should be the same as in block. */
3971
 
                read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
3972
 
                read_space_id = mach_read_from_4(
3973
 
                        frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
3974
 
 
3975
 
                if (bpage->space == TRX_SYS_SPACE
3976
 
                    && trx_doublewrite_page_inside(bpage->offset)) {
3977
 
 
3978
 
                        ut_print_timestamp(stderr);
3979
 
                        fprintf(stderr,
3980
 
                                "  InnoDB: Error: reading page %lu\n"
3981
 
                                "InnoDB: which is in the"
3982
 
                                " doublewrite buffer!\n",
3983
 
                                (ulong) bpage->offset);
3984
 
                } else if (!read_space_id && !read_page_no) {
3985
 
                        /* This is likely an uninitialized page. */
3986
 
                } else if ((bpage->space
3987
 
                            && bpage->space != read_space_id)
3988
 
                           || bpage->offset != read_page_no) {
3989
 
                        /* We did not compare space_id to read_space_id
3990
 
                        if bpage->space == 0, because the field on the
3991
 
                        page may contain garbage in MySQL < 4.1.1,
3992
 
                        which only supported bpage->space == 0. */
3993
 
 
3994
 
                        ut_print_timestamp(stderr);
3995
 
                        fprintf(stderr,
3996
 
                                "  InnoDB: Error: space id and page n:o"
3997
 
                                " stored in the page\n"
3998
 
                                "InnoDB: read in are %lu:%lu,"
3999
 
                                " should be %lu:%lu!\n",
4000
 
                                (ulong) read_space_id, (ulong) read_page_no,
4001
 
                                (ulong) bpage->space,
4002
 
                                (ulong) bpage->offset);
4003
 
                }
4004
 
 
4005
 
                /* From version 3.23.38 up we store the page checksum
4006
 
                to the 4 first bytes of the page end lsn field */
4007
 
 
4008
 
                if (buf_page_is_corrupted(frame,
4009
 
                                          buf_page_get_zip_size(bpage))) {
4010
 
corrupt:
4011
 
                        fprintf(stderr,
4012
 
                                "InnoDB: Database page corruption on disk"
4013
 
                                " or a failed\n"
4014
 
                                "InnoDB: file read of page %lu.\n"
4015
 
                                "InnoDB: You may have to recover"
4016
 
                                " from a backup.\n",
4017
 
                                (ulong) bpage->offset);
4018
 
                        buf_page_print(frame, buf_page_get_zip_size(bpage));
4019
 
                        fprintf(stderr,
4020
 
                                "InnoDB: Database page corruption on disk"
4021
 
                                " or a failed\n"
4022
 
                                "InnoDB: file read of page %lu.\n"
4023
 
                                "InnoDB: You may have to recover"
4024
 
                                " from a backup.\n",
4025
 
                                (ulong) bpage->offset);
4026
 
                        fputs("InnoDB: It is also possible that"
4027
 
                              " your operating\n"
4028
 
                              "InnoDB: system has corrupted its"
4029
 
                              " own file cache\n"
4030
 
                              "InnoDB: and rebooting your computer"
4031
 
                              " removes the\n"
4032
 
                              "InnoDB: error.\n"
4033
 
                              "InnoDB: If the corrupt page is an index page\n"
4034
 
                              "InnoDB: you can also try to"
4035
 
                              " fix the corruption\n"
4036
 
                              "InnoDB: by dumping, dropping,"
4037
 
                              " and reimporting\n"
4038
 
                              "InnoDB: the corrupt table."
4039
 
                              " You can use CHECK\n"
4040
 
                              "InnoDB: TABLE to scan your"
4041
 
                              " table for corruption.\n"
4042
 
                              "InnoDB: See also "
4043
 
                              REFMAN "forcing-recovery.html\n"
4044
 
                              "InnoDB: about forcing recovery.\n", stderr);
4045
 
 
4046
 
                        if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
4047
 
                                fputs("InnoDB: Ending processing because of"
4048
 
                                      " a corrupt database page.\n",
4049
 
                                      stderr);
4050
 
                                exit(1);
4051
 
                        }
4052
 
                }
4053
 
 
4054
 
                if (recv_recovery_is_on()) {
4055
 
                        /* Pages must be uncompressed for crash recovery. */
4056
 
                        ut_a(uncompressed);
4057
 
                        recv_recover_page(TRUE, (buf_block_t*) bpage);
4058
 
                }
4059
 
 
4060
 
                if (uncompressed && !recv_no_ibuf_operations) {
4061
 
                        ibuf_merge_or_delete_for_page(
4062
 
                                (buf_block_t*) bpage, bpage->space,
4063
 
                                bpage->offset, buf_page_get_zip_size(bpage),
4064
 
                                TRUE);
4065
 
                }
4066
 
        }
4067
 
 
4068
 
        buf_pool_mutex_enter(buf_pool);
4069
 
        mutex_enter(buf_page_get_mutex(bpage));
4070
 
 
4071
 
#ifdef UNIV_IBUF_COUNT_DEBUG
4072
 
        if (io_type == BUF_IO_WRITE || uncompressed) {
4073
 
                /* For BUF_IO_READ of compressed-only blocks, the
4074
 
                buffered operations will be merged by buf_page_get_gen()
4075
 
                after the block has been uncompressed. */
4076
 
                ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
4077
 
        }
4078
 
#endif
4079
 
        /* Because this thread which does the unlocking is not the same that
4080
 
        did the locking, we use a pass value != 0 in unlock, which simply
4081
 
        removes the newest lock debug record, without checking the thread
4082
 
        id. */
4083
 
 
4084
 
        buf_page_set_io_fix(bpage, BUF_IO_NONE);
4085
 
 
4086
 
        switch (io_type) {
4087
 
        case BUF_IO_READ:
4088
 
                /* NOTE that the call to ibuf may have moved the ownership of
4089
 
                the x-latch to this OS thread: do not let this confuse you in
4090
 
                debugging! */
4091
 
 
4092
 
                ut_ad(buf_pool->n_pend_reads > 0);
4093
 
                buf_pool->n_pend_reads--;
4094
 
                buf_pool->stat.n_pages_read++;
4095
 
 
4096
 
                if (uncompressed) {
4097
 
                        rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
4098
 
                                             BUF_IO_READ);
4099
 
                }
4100
 
 
4101
 
                break;
4102
 
 
4103
 
        case BUF_IO_WRITE:
4104
 
                /* Write means a flush operation: call the completion
4105
 
                routine in the flush system */
4106
 
 
4107
 
                buf_flush_write_complete(bpage);
4108
 
 
4109
 
                if (uncompressed) {
4110
 
                        rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
4111
 
                                             BUF_IO_WRITE);
4112
 
                }
4113
 
 
4114
 
                buf_pool->stat.n_pages_written++;
4115
 
 
4116
 
                break;
4117
 
 
4118
 
        default:
4119
 
                ut_error;
4120
 
        }
4121
 
 
4122
 
#ifdef UNIV_DEBUG
4123
 
        if (buf_debug_prints) {
4124
 
                fprintf(stderr, "Has %s page space %lu page no %lu\n",
4125
 
                        io_type == BUF_IO_READ ? "read" : "written",
4126
 
                        (ulong) buf_page_get_space(bpage),
4127
 
                        (ulong) buf_page_get_page_no(bpage));
4128
 
        }
4129
 
#endif /* UNIV_DEBUG */
4130
 
 
4131
 
        mutex_exit(buf_page_get_mutex(bpage));
4132
 
        buf_pool_mutex_exit(buf_pool);
4133
 
}
4134
 
 
4135
 
/*********************************************************************//**
4136
 
Asserts that all file pages in the buffer are in a replaceable state.
4137
 
@return TRUE */
4138
 
static
4139
 
ibool
4140
 
buf_all_freed_instance(
4141
 
/*===================*/
4142
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instancce */
4143
 
{
4144
 
        ulint           i;
4145
 
        buf_chunk_t*    chunk;
4146
 
 
4147
 
        ut_ad(buf_pool);
4148
 
 
4149
 
        buf_pool_mutex_enter(buf_pool);
4150
 
 
4151
 
        chunk = buf_pool->chunks;
4152
 
 
4153
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
4154
 
 
4155
 
                const buf_block_t* block = buf_chunk_not_freed(chunk);
4156
 
 
4157
 
                if (UNIV_LIKELY_NULL(block)) {
4158
 
                        fprintf(stderr,
4159
 
                                "Page %lu %lu still fixed or dirty\n",
4160
 
                                (ulong) block->page.space,
4161
 
                                (ulong) block->page.offset);
4162
 
                        ut_error;
4163
 
                }
4164
 
        }
4165
 
 
4166
 
        buf_pool_mutex_exit(buf_pool);
4167
 
 
4168
 
        return(TRUE);
4169
 
}
4170
 
 
4171
 
/*********************************************************************//**
4172
 
Invalidates file pages in one buffer pool instance */
4173
 
static
4174
 
void
4175
 
buf_pool_invalidate_instance(
4176
 
/*=========================*/
4177
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instance */
4178
 
{
4179
 
        ibool           freed;
4180
 
        int     i;
4181
 
 
4182
 
        buf_pool_mutex_enter(buf_pool);
4183
 
 
4184
 
        for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
4185
 
 
4186
 
                /* As this function is called during startup and
4187
 
                during redo application phase during recovery, InnoDB
4188
 
                is single threaded (apart from IO helper threads) at
4189
 
                this stage. No new write batch can be in intialization
4190
 
                stage at this point. */
4191
 
                ut_ad(buf_pool->init_flush[i] == FALSE);
4192
 
 
4193
 
                /* However, it is possible that a write batch that has
4194
 
                been posted earlier is still not complete. For buffer
4195
 
                pool invalidation to proceed we must ensure there is NO
4196
 
                write activity happening. */
4197
 
                if (buf_pool->n_flush[i] > 0) {
4198
 
                        buf_pool_mutex_exit(buf_pool);
4199
 
                        buf_flush_wait_batch_end(buf_pool, static_cast<buf_flush>(i));
4200
 
                        buf_pool_mutex_enter(buf_pool);
4201
 
                }
4202
 
        }
4203
 
 
4204
 
        buf_pool_mutex_exit(buf_pool);
4205
 
 
4206
 
        ut_ad(buf_all_freed_instance(buf_pool));
4207
 
 
4208
 
        freed = TRUE;
4209
 
 
4210
 
        while (freed) {
4211
 
                freed = buf_LRU_search_and_free_block(buf_pool, 100);
4212
 
        }
4213
 
 
4214
 
        buf_pool_mutex_enter(buf_pool);
4215
 
 
4216
 
        ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
4217
 
        ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
4218
 
 
4219
 
        buf_pool->freed_page_clock = 0;
4220
 
        buf_pool->LRU_old = NULL;
4221
 
        buf_pool->LRU_old_len = 0;
4222
 
        buf_pool->LRU_flush_ended = 0;
4223
 
 
4224
 
        memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
4225
 
        buf_refresh_io_stats(buf_pool);
4226
 
 
4227
 
        buf_pool_mutex_exit(buf_pool);
4228
 
}
4229
 
 
4230
 
/*********************************************************************//**
4231
 
Invalidates the file pages in the buffer pool when an archive recovery is
4232
 
completed. All the file pages buffered must be in a replaceable state when
4233
 
this function is called: not latched and not modified. */
4234
 
UNIV_INTERN
4235
 
void
4236
 
buf_pool_invalidate(void)
4237
 
/*=====================*/
4238
 
{
4239
 
        ulint   i;
4240
 
 
4241
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4242
 
                buf_pool_invalidate_instance(buf_pool_from_array(i));
4243
 
        }
4244
 
}
4245
 
 
4246
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4247
 
/*********************************************************************//**
4248
 
Validates data in one buffer pool instance
4249
 
@return TRUE */
4250
 
static
4251
 
ibool
4252
 
buf_pool_validate_instance(
4253
 
/*=======================*/
4254
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instance */
4255
 
{
4256
 
        buf_page_t*     b;
4257
 
        buf_chunk_t*    chunk;
4258
 
        ulint           i;
4259
 
        ulint           n_single_flush  = 0;
4260
 
        ulint           n_lru_flush     = 0;
4261
 
        ulint           n_list_flush    = 0;
4262
 
        ulint           n_lru           = 0;
4263
 
        ulint           n_flush         = 0;
4264
 
        ulint           n_free          = 0;
4265
 
        ulint           n_zip           = 0;
4266
 
 
4267
 
        ut_ad(buf_pool);
4268
 
 
4269
 
        buf_pool_mutex_enter(buf_pool);
4270
 
 
4271
 
        chunk = buf_pool->chunks;
4272
 
 
4273
 
        /* Check the uncompressed blocks. */
4274
 
 
4275
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
4276
 
 
4277
 
                ulint           j;
4278
 
                buf_block_t*    block = chunk->blocks;
4279
 
 
4280
 
                for (j = chunk->size; j--; block++) {
4281
 
 
4282
 
                        mutex_enter(&block->mutex);
4283
 
 
4284
 
                        switch (buf_block_get_state(block)) {
4285
 
                        case BUF_BLOCK_ZIP_FREE:
4286
 
                        case BUF_BLOCK_ZIP_PAGE:
4287
 
                        case BUF_BLOCK_ZIP_DIRTY:
4288
 
                                /* These should only occur on
4289
 
                                zip_clean, zip_free[], or flush_list. */
4290
 
                                ut_error;
4291
 
                                break;
4292
 
 
4293
 
                        case BUF_BLOCK_FILE_PAGE:
4294
 
                                ut_a(buf_page_hash_get(buf_pool,
4295
 
                                                       buf_block_get_space(
4296
 
                                                               block),
4297
 
                                                       buf_block_get_page_no(
4298
 
                                                               block))
4299
 
                                     == &block->page);
4300
 
 
4301
 
#ifdef UNIV_IBUF_COUNT_DEBUG
4302
 
                                ut_a(buf_page_get_io_fix(&block->page)
4303
 
                                     == BUF_IO_READ
4304
 
                                     || !ibuf_count_get(buf_block_get_space(
4305
 
                                                                block),
4306
 
                                                        buf_block_get_page_no(
4307
 
                                                                block)));
4308
 
#endif
4309
 
                                switch (buf_page_get_io_fix(&block->page)) {
4310
 
                                case BUF_IO_NONE:
4311
 
                                        break;
4312
 
 
4313
 
                                case BUF_IO_WRITE:
4314
 
                                        switch (buf_page_get_flush_type(
4315
 
                                                        &block->page)) {
4316
 
                                        case BUF_FLUSH_LRU:
4317
 
                                                n_lru_flush++;
4318
 
                                                ut_a(rw_lock_is_locked(
4319
 
                                                             &block->lock,
4320
 
                                                             RW_LOCK_SHARED));
4321
 
                                                break;
4322
 
                                        case BUF_FLUSH_LIST:
4323
 
                                                n_list_flush++;
4324
 
                                                break;
4325
 
                                        case BUF_FLUSH_SINGLE_PAGE:
4326
 
                                                n_single_flush++;
4327
 
                                                break;
4328
 
                                        default:
4329
 
                                                ut_error;
4330
 
                                        }
4331
 
 
4332
 
                                        break;
4333
 
 
4334
 
                                case BUF_IO_READ:
4335
 
 
4336
 
                                        ut_a(rw_lock_is_locked(&block->lock,
4337
 
                                                               RW_LOCK_EX));
4338
 
                                        break;
4339
 
                                }
4340
 
 
4341
 
                                n_lru++;
4342
 
                                break;
4343
 
 
4344
 
                        case BUF_BLOCK_NOT_USED:
4345
 
                                n_free++;
4346
 
                                break;
4347
 
 
4348
 
                        case BUF_BLOCK_READY_FOR_USE:
4349
 
                        case BUF_BLOCK_MEMORY:
4350
 
                        case BUF_BLOCK_REMOVE_HASH:
4351
 
                                /* do nothing */
4352
 
                                break;
4353
 
                        }
4354
 
 
4355
 
                        mutex_exit(&block->mutex);
4356
 
                }
4357
 
        }
4358
 
 
4359
 
        mutex_enter(&buf_pool->zip_mutex);
4360
 
 
4361
 
        /* Check clean compressed-only blocks. */
4362
 
 
4363
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4364
 
             b = UT_LIST_GET_NEXT(list, b)) {
4365
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
4366
 
                switch (buf_page_get_io_fix(b)) {
4367
 
                case BUF_IO_NONE:
4368
 
                        /* All clean blocks should be I/O-unfixed. */
4369
 
                        break;
4370
 
                case BUF_IO_READ:
4371
 
                        /* In buf_LRU_free_block(), we temporarily set
4372
 
                        b->io_fix = BUF_IO_READ for a newly allocated
4373
 
                        control block in order to prevent
4374
 
                        buf_page_get_gen() from decompressing the block. */
4375
 
                        break;
4376
 
                default:
4377
 
                        ut_error;
4378
 
                        break;
4379
 
                }
4380
 
 
4381
 
                /* It is OK to read oldest_modification here because
4382
 
                we have acquired buf_pool->zip_mutex above which acts
4383
 
                as the 'block->mutex' for these bpages. */
4384
 
                ut_a(!b->oldest_modification);
4385
 
                ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4386
 
 
4387
 
                n_lru++;
4388
 
                n_zip++;
4389
 
        }
4390
 
 
4391
 
        /* Check dirty blocks. */
4392
 
 
4393
 
        buf_flush_list_mutex_enter(buf_pool);
4394
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4395
 
             b = UT_LIST_GET_NEXT(list, b)) {
4396
 
                ut_ad(b->in_flush_list);
4397
 
                ut_a(b->oldest_modification);
4398
 
                n_flush++;
4399
 
 
4400
 
                switch (buf_page_get_state(b)) {
4401
 
                case BUF_BLOCK_ZIP_DIRTY:
4402
 
                        n_lru++;
4403
 
                        n_zip++;
4404
 
                        switch (buf_page_get_io_fix(b)) {
4405
 
                        case BUF_IO_NONE:
4406
 
                        case BUF_IO_READ:
4407
 
                                break;
4408
 
                        case BUF_IO_WRITE:
4409
 
                                switch (buf_page_get_flush_type(b)) {
4410
 
                                case BUF_FLUSH_LRU:
4411
 
                                        n_lru_flush++;
4412
 
                                        break;
4413
 
                                case BUF_FLUSH_LIST:
4414
 
                                        n_list_flush++;
4415
 
                                        break;
4416
 
                                case BUF_FLUSH_SINGLE_PAGE:
4417
 
                                        n_single_flush++;
4418
 
                                        break;
4419
 
                                default:
4420
 
                                        ut_error;
4421
 
                                }
4422
 
                                break;
4423
 
                        }
4424
 
                        break;
4425
 
                case BUF_BLOCK_FILE_PAGE:
4426
 
                        /* uncompressed page */
4427
 
                        break;
4428
 
                case BUF_BLOCK_ZIP_FREE:
4429
 
                case BUF_BLOCK_ZIP_PAGE:
4430
 
                case BUF_BLOCK_NOT_USED:
4431
 
                case BUF_BLOCK_READY_FOR_USE:
4432
 
                case BUF_BLOCK_MEMORY:
4433
 
                case BUF_BLOCK_REMOVE_HASH:
4434
 
                        ut_error;
4435
 
                        break;
4436
 
                }
4437
 
                ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4438
 
        }
4439
 
 
4440
 
        ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
4441
 
 
4442
 
        buf_flush_list_mutex_exit(buf_pool);
4443
 
 
4444
 
        mutex_exit(&buf_pool->zip_mutex);
4445
 
 
4446
 
        if (n_lru + n_free > buf_pool->curr_size + n_zip) {
4447
 
                fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
4448
 
                        (ulong) n_lru, (ulong) n_free,
4449
 
                        (ulong) buf_pool->curr_size, (ulong) n_zip);
4450
 
                ut_error;
4451
 
        }
4452
 
 
4453
 
        ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
4454
 
        if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
4455
 
                fprintf(stderr, "Free list len %lu, free blocks %lu\n",
4456
 
                        (ulong) UT_LIST_GET_LEN(buf_pool->free),
4457
 
                        (ulong) n_free);
4458
 
                ut_error;
4459
 
        }
4460
 
 
4461
 
        ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
4462
 
        ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
4463
 
        ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
4464
 
 
4465
 
        buf_pool_mutex_exit(buf_pool);
4466
 
 
4467
 
        ut_a(buf_LRU_validate());
4468
 
        ut_a(buf_flush_validate(buf_pool));
4469
 
 
4470
 
        return(TRUE);
4471
 
}
4472
 
 
4473
 
/*********************************************************************//**
4474
 
Validates the buffer buf_pool data structure.
4475
 
@return TRUE */
4476
 
UNIV_INTERN
4477
 
ibool
4478
 
buf_validate(void)
4479
 
/*==============*/
4480
 
{
4481
 
        ulint   i;
4482
 
 
4483
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4484
 
                buf_pool_t*     buf_pool;
4485
 
 
4486
 
                buf_pool = buf_pool_from_array(i);
4487
 
 
4488
 
                buf_pool_validate_instance(buf_pool);
4489
 
        }
4490
 
        return(TRUE);
4491
 
}
4492
 
 
4493
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
4494
 
 
4495
 
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4496
 
/*********************************************************************//**
4497
 
Prints info of the buffer buf_pool data structure for one instance. */
4498
 
static
4499
 
void
4500
 
buf_print_instance(
4501
 
/*===============*/
4502
 
        buf_pool_t*     buf_pool)
4503
 
{
4504
 
        index_id_t*     index_ids;
4505
 
        ulint*          counts;
4506
 
        ulint           size;
4507
 
        ulint           i;
4508
 
        ulint           j;
4509
 
        index_id_t      id;
4510
 
        ulint           n_found;
4511
 
        buf_chunk_t*    chunk;
4512
 
        dict_index_t*   index;
4513
 
 
4514
 
        ut_ad(buf_pool);
4515
 
 
4516
 
        size = buf_pool->curr_size;
4517
 
 
4518
 
        index_ids = mem_alloc(size * sizeof *index_ids);
4519
 
        counts = mem_alloc(sizeof(ulint) * size);
4520
 
 
4521
 
        buf_pool_mutex_enter(buf_pool);
4522
 
        buf_flush_list_mutex_enter(buf_pool);
4523
 
 
4524
 
        fprintf(stderr,
4525
 
                "buf_pool size %lu\n"
4526
 
                "database pages %lu\n"
4527
 
                "free pages %lu\n"
4528
 
                "modified database pages %lu\n"
4529
 
                "n pending decompressions %lu\n"
4530
 
                "n pending reads %lu\n"
4531
 
                "n pending flush LRU %lu list %lu single page %lu\n"
4532
 
                "pages made young %lu, not young %lu\n"
4533
 
                "pages read %lu, created %lu, written %lu\n",
4534
 
                (ulong) size,
4535
 
                (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4536
 
                (ulong) UT_LIST_GET_LEN(buf_pool->free),
4537
 
                (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4538
 
                (ulong) buf_pool->n_pend_unzip,
4539
 
                (ulong) buf_pool->n_pend_reads,
4540
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
4541
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
4542
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
4543
 
                (ulong) buf_pool->stat.n_pages_made_young,
4544
 
                (ulong) buf_pool->stat.n_pages_not_made_young,
4545
 
                (ulong) buf_pool->stat.n_pages_read,
4546
 
                (ulong) buf_pool->stat.n_pages_created,
4547
 
                (ulong) buf_pool->stat.n_pages_written);
4548
 
 
4549
 
        buf_flush_list_mutex_exit(buf_pool);
4550
 
 
4551
 
        /* Count the number of blocks belonging to each index in the buffer */
4552
 
 
4553
 
        n_found = 0;
4554
 
 
4555
 
        chunk = buf_pool->chunks;
4556
 
 
4557
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
4558
 
                buf_block_t*    block           = chunk->blocks;
4559
 
                ulint           n_blocks        = chunk->size;
4560
 
 
4561
 
                for (; n_blocks--; block++) {
4562
 
                        const buf_frame_t* frame = block->frame;
4563
 
 
4564
 
                        if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
4565
 
 
4566
 
                                id = btr_page_get_index_id(frame);
4567
 
 
4568
 
                                /* Look for the id in the index_ids array */
4569
 
                                j = 0;
4570
 
 
4571
 
                                while (j < n_found) {
4572
 
 
4573
 
                                        if (index_ids[j] == id) {
4574
 
                                                counts[j]++;
4575
 
 
4576
 
                                                break;
4577
 
                                        }
4578
 
                                        j++;
4579
 
                                }
4580
 
 
4581
 
                                if (j == n_found) {
4582
 
                                        n_found++;
4583
 
                                        index_ids[j] = id;
4584
 
                                        counts[j] = 1;
4585
 
                                }
4586
 
                        }
4587
 
                }
4588
 
        }
4589
 
 
4590
 
        buf_pool_mutex_exit(buf_pool);
4591
 
 
4592
 
        for (i = 0; i < n_found; i++) {
4593
 
                index = dict_index_get_if_in_cache(index_ids[i]);
4594
 
 
4595
 
                fprintf(stderr,
4596
 
                        "Block count for index %llu in buffer is about %lu",
4597
 
                        (ullint) index_ids[i],
4598
 
                        (ulong) counts[i]);
4599
 
 
4600
 
                if (index) {
4601
 
                        putc(' ', stderr);
4602
 
                        dict_index_name_print(stderr, NULL, index);
4603
 
                }
4604
 
 
4605
 
                putc('\n', stderr);
4606
 
        }
4607
 
 
4608
 
        mem_free(index_ids);
4609
 
        mem_free(counts);
4610
 
 
4611
 
        ut_a(buf_pool_validate_instance(buf_pool));
4612
 
}
4613
 
 
4614
 
/*********************************************************************//**
4615
 
Prints info of the buffer buf_pool data structure. */
4616
 
UNIV_INTERN
4617
 
void
4618
 
buf_print(void)
4619
 
/*===========*/
4620
 
{
4621
 
        ulint   i;
4622
 
 
4623
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4624
 
                buf_pool_t*     buf_pool;
4625
 
 
4626
 
                buf_pool = buf_pool_from_array(i);
4627
 
                buf_print_instance(buf_pool);
4628
 
        }
4629
 
}
4630
 
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
4631
 
 
4632
 
#ifdef UNIV_DEBUG
4633
 
/*********************************************************************//**
4634
 
Returns the number of latched pages in the buffer pool.
4635
 
@return number of latched pages */
4636
 
UNIV_INTERN
4637
 
ulint
4638
 
buf_get_latched_pages_number_instance(
4639
 
/*==================================*/
4640
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instance */
4641
 
{
4642
 
        buf_page_t*     b;
4643
 
        ulint           i;
4644
 
        buf_chunk_t*    chunk;
4645
 
        ulint           fixed_pages_number = 0;
4646
 
 
4647
 
        buf_pool_mutex_enter(buf_pool);
4648
 
 
4649
 
        chunk = buf_pool->chunks;
4650
 
 
4651
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
4652
 
                buf_block_t*    block;
4653
 
                ulint           j;
4654
 
 
4655
 
                block = chunk->blocks;
4656
 
 
4657
 
                for (j = chunk->size; j--; block++) {
4658
 
                        if (buf_block_get_state(block)
4659
 
                            != BUF_BLOCK_FILE_PAGE) {
4660
 
 
4661
 
                                continue;
4662
 
                        }
4663
 
 
4664
 
                        mutex_enter(&block->mutex);
4665
 
 
4666
 
                        if (block->page.buf_fix_count != 0
4667
 
                            || buf_page_get_io_fix(&block->page)
4668
 
                            != BUF_IO_NONE) {
4669
 
                                fixed_pages_number++;
4670
 
                        }
4671
 
 
4672
 
                        mutex_exit(&block->mutex);
4673
 
                }
4674
 
        }
4675
 
 
4676
 
        mutex_enter(&buf_pool->zip_mutex);
4677
 
 
4678
 
        /* Traverse the lists of clean and dirty compressed-only blocks. */
4679
 
 
4680
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4681
 
             b = UT_LIST_GET_NEXT(list, b)) {
4682
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
4683
 
                ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
4684
 
 
4685
 
                if (b->buf_fix_count != 0
4686
 
                    || buf_page_get_io_fix(b) != BUF_IO_NONE) {
4687
 
                        fixed_pages_number++;
4688
 
                }
4689
 
        }
4690
 
 
4691
 
        buf_flush_list_mutex_enter(buf_pool);
4692
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4693
 
             b = UT_LIST_GET_NEXT(list, b)) {
4694
 
                ut_ad(b->in_flush_list);
4695
 
 
4696
 
                switch (buf_page_get_state(b)) {
4697
 
                case BUF_BLOCK_ZIP_DIRTY:
4698
 
                        if (b->buf_fix_count != 0
4699
 
                            || buf_page_get_io_fix(b) != BUF_IO_NONE) {
4700
 
                                fixed_pages_number++;
4701
 
                        }
4702
 
                        break;
4703
 
                case BUF_BLOCK_FILE_PAGE:
4704
 
                        /* uncompressed page */
4705
 
                        break;
4706
 
                case BUF_BLOCK_ZIP_FREE:
4707
 
                case BUF_BLOCK_ZIP_PAGE:
4708
 
                case BUF_BLOCK_NOT_USED:
4709
 
                case BUF_BLOCK_READY_FOR_USE:
4710
 
                case BUF_BLOCK_MEMORY:
4711
 
                case BUF_BLOCK_REMOVE_HASH:
4712
 
                        ut_error;
4713
 
                        break;
4714
 
                }
4715
 
        }
4716
 
 
4717
 
        buf_flush_list_mutex_exit(buf_pool);
4718
 
        mutex_exit(&buf_pool->zip_mutex);
4719
 
        buf_pool_mutex_exit(buf_pool);
4720
 
 
4721
 
        return(fixed_pages_number);
4722
 
}
4723
 
 
4724
 
/*********************************************************************//**
4725
 
Returns the number of latched pages in all the buffer pools.
4726
 
@return number of latched pages */
4727
 
UNIV_INTERN
4728
 
ulint
4729
 
buf_get_latched_pages_number(void)
4730
 
/*==============================*/
4731
 
{
4732
 
        ulint   i;
4733
 
        ulint   total_latched_pages = 0;
4734
 
 
4735
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4736
 
                buf_pool_t*     buf_pool;
4737
 
 
4738
 
                buf_pool = buf_pool_from_array(i);
4739
 
 
4740
 
                total_latched_pages += buf_get_latched_pages_number_instance(
4741
 
                        buf_pool);
4742
 
        }
4743
 
 
4744
 
        return(total_latched_pages);
4745
 
}
4746
 
 
4747
 
#endif /* UNIV_DEBUG */
4748
 
 
4749
 
/*********************************************************************//**
4750
 
Returns the number of pending buf pool ios.
4751
 
@return number of pending I/O operations */
4752
 
UNIV_INTERN
4753
 
ulint
4754
 
buf_get_n_pending_ios(void)
4755
 
/*=======================*/
4756
 
{
4757
 
        ulint   i;
4758
 
        ulint   pend_ios = 0;
4759
 
 
4760
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4761
 
                buf_pool_t*     buf_pool;
4762
 
 
4763
 
                buf_pool = buf_pool_from_array(i);
4764
 
 
4765
 
                pend_ios +=
4766
 
                        buf_pool->n_pend_reads
4767
 
                        + buf_pool->n_flush[BUF_FLUSH_LRU]
4768
 
                        + buf_pool->n_flush[BUF_FLUSH_LIST]
4769
 
                        + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
4770
 
        }
4771
 
 
4772
 
        return(pend_ios);
4773
 
}
4774
 
 
4775
 
/*********************************************************************//**
4776
 
Returns the ratio in percents of modified pages in the buffer pool /
4777
 
database pages in the buffer pool.
4778
 
@return modified page percentage ratio */
4779
 
UNIV_INTERN
4780
 
ulint
4781
 
buf_get_modified_ratio_pct(void)
4782
 
/*============================*/
4783
 
{
4784
 
        ulint           ratio;
4785
 
        ulint           lru_len = 0;
4786
 
        ulint           free_len = 0;
4787
 
        ulint           flush_list_len = 0;
4788
 
 
4789
 
        buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
4790
 
 
4791
 
        ratio = (100 * flush_list_len) / (1 + lru_len + free_len);
4792
 
  
4793
 
        /* 1 + is there to avoid division by zero */
4794
 
 
4795
 
        return(ratio);
4796
 
}
4797
 
 
4798
 
/*********************************************************************//**
4799
 
Prints info of the buffer i/o. */
4800
 
static
4801
 
void
4802
 
buf_print_io_instance(
4803
 
/*==================*/
4804
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
4805
 
        FILE*           file)           /*!< in/out: buffer where to print */
4806
 
{
4807
 
        time_t  current_time;
4808
 
        double  time_elapsed;
4809
 
        ulint   n_gets_diff;
4810
 
 
4811
 
        ut_ad(buf_pool);
4812
 
 
4813
 
        buf_pool_mutex_enter(buf_pool);
4814
 
        buf_flush_list_mutex_enter(buf_pool);
4815
 
 
4816
 
        fprintf(file,
4817
 
                "Buffer pool size   %lu\n"
4818
 
                "Free buffers       %lu\n"
4819
 
                "Database pages     %lu\n"
4820
 
                "Old database pages %lu\n"
4821
 
                "Modified db pages  %lu\n"
4822
 
                "Pending reads %lu\n"
4823
 
                "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
4824
 
                (ulong) buf_pool->curr_size,
4825
 
                (ulong) UT_LIST_GET_LEN(buf_pool->free),
4826
 
                (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4827
 
                (ulong) buf_pool->LRU_old_len,
4828
 
                (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4829
 
                (ulong) buf_pool->n_pend_reads,
4830
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
4831
 
                + buf_pool->init_flush[BUF_FLUSH_LRU],
4832
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
4833
 
                + buf_pool->init_flush[BUF_FLUSH_LIST],
4834
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
4835
 
 
4836
 
        buf_flush_list_mutex_exit(buf_pool);
4837
 
 
4838
 
        current_time = time(NULL);
4839
 
        time_elapsed = 0.001 + difftime(current_time,
4840
 
                                        buf_pool->last_printout_time);
4841
 
 
4842
 
        fprintf(file,
4843
 
                "Pages made young %lu, not young %lu\n"
4844
 
                "%.2f youngs/s, %.2f non-youngs/s\n"
4845
 
                "Pages read %lu, created %lu, written %lu\n"
4846
 
                "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
4847
 
                (ulong) buf_pool->stat.n_pages_made_young,
4848
 
                (ulong) buf_pool->stat.n_pages_not_made_young,
4849
 
                (buf_pool->stat.n_pages_made_young
4850
 
                 - buf_pool->old_stat.n_pages_made_young)
4851
 
                / time_elapsed,
4852
 
                (buf_pool->stat.n_pages_not_made_young
4853
 
                 - buf_pool->old_stat.n_pages_not_made_young)
4854
 
                / time_elapsed,
4855
 
                (ulong) buf_pool->stat.n_pages_read,
4856
 
                (ulong) buf_pool->stat.n_pages_created,
4857
 
                (ulong) buf_pool->stat.n_pages_written,
4858
 
                (buf_pool->stat.n_pages_read
4859
 
                 - buf_pool->old_stat.n_pages_read)
4860
 
                / time_elapsed,
4861
 
                (buf_pool->stat.n_pages_created
4862
 
                 - buf_pool->old_stat.n_pages_created)
4863
 
                / time_elapsed,
4864
 
                (buf_pool->stat.n_pages_written
4865
 
                 - buf_pool->old_stat.n_pages_written)
4866
 
                / time_elapsed);
4867
 
 
4868
 
        n_gets_diff = buf_pool->stat.n_page_gets
4869
 
                    - buf_pool->old_stat.n_page_gets;
4870
 
 
4871
 
        if (n_gets_diff) {
4872
 
                fprintf(file,
4873
 
                        "Buffer pool hit rate %lu / 1000,"
4874
 
                        " young-making rate %lu / 1000 not %lu / 1000\n",
4875
 
                        (ulong)
4876
 
                        (1000 - ((1000 * (buf_pool->stat.n_pages_read
4877
 
                                          - buf_pool->old_stat.n_pages_read))
4878
 
                                 / (buf_pool->stat.n_page_gets
4879
 
                                    - buf_pool->old_stat.n_page_gets))),
4880
 
                        (ulong)
4881
 
                        (1000 * (buf_pool->stat.n_pages_made_young
4882
 
                                 - buf_pool->old_stat.n_pages_made_young)
4883
 
                         / n_gets_diff),
4884
 
                        (ulong)
4885
 
                        (1000 * (buf_pool->stat.n_pages_not_made_young
4886
 
                                 - buf_pool->old_stat.n_pages_not_made_young)
4887
 
                         / n_gets_diff));
4888
 
        } else {
4889
 
                fputs("No buffer pool page gets since the last printout\n",
4890
 
                      file);
4891
 
        }
4892
 
 
4893
 
        /* Statistics about read ahead algorithm */
4894
 
        fprintf(file, "Pages read ahead %.2f/s,"
4895
 
                " evicted without access %.2f/s\n",
4896
 
                (buf_pool->stat.n_ra_pages_read
4897
 
                - buf_pool->old_stat.n_ra_pages_read)
4898
 
                / time_elapsed,
4899
 
                (buf_pool->stat.n_ra_pages_evicted
4900
 
                - buf_pool->old_stat.n_ra_pages_evicted)
4901
 
                / time_elapsed);
4902
 
 
4903
 
        /* Print some values to help us with visualizing what is
4904
 
        happening with LRU eviction. */
4905
 
        fprintf(file,
4906
 
                "LRU len: %lu, unzip_LRU len: %lu\n"
4907
 
                "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
4908
 
                static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->LRU)),
4909
 
                static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->unzip_LRU)),
4910
 
                buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
4911
 
                buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
4912
 
 
4913
 
        buf_refresh_io_stats(buf_pool);
4914
 
        buf_pool_mutex_exit(buf_pool);
4915
 
}
4916
 
 
4917
 
/*********************************************************************//**
4918
 
Prints info of the buffer i/o. */
4919
 
UNIV_INTERN
4920
 
void
4921
 
buf_print_io(
4922
 
/*=========*/
4923
 
        FILE*   file)   /*!< in/out: buffer where to print */
4924
 
{
4925
 
        ulint   i;
4926
 
 
4927
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4928
 
                buf_pool_t*     buf_pool;
4929
 
 
4930
 
                buf_pool = buf_pool_from_array(i);
4931
 
                buf_print_io_instance(buf_pool, file);
4932
 
        }
4933
 
}
4934
 
 
4935
 
/**********************************************************************//**
4936
 
Refreshes the statistics used to print per-second averages. */
4937
 
UNIV_INTERN
4938
 
void
4939
 
buf_refresh_io_stats(
4940
 
/*=================*/
4941
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instance */
4942
 
{
4943
 
        buf_pool->last_printout_time = ut_time();
4944
 
        buf_pool->old_stat = buf_pool->stat;
4945
 
}
4946
 
 
4947
 
/**********************************************************************//**
4948
 
Refreshes the statistics used to print per-second averages. */
4949
 
UNIV_INTERN
4950
 
void
4951
 
buf_refresh_io_stats_all(void)
4952
 
/*==========================*/
4953
 
{
4954
 
        ulint           i;
4955
 
 
4956
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4957
 
                buf_pool_t*     buf_pool;
4958
 
 
4959
 
                buf_pool = buf_pool_from_array(i);
4960
 
 
4961
 
                buf_refresh_io_stats(buf_pool);
4962
 
        }
4963
 
}
4964
 
 
4965
 
/**********************************************************************//**
4966
 
Check if all pages in all buffer pools are in a replacable state.
4967
 
@return FALSE if not */
4968
 
UNIV_INTERN
4969
 
ibool
4970
 
buf_all_freed(void)
4971
 
/*===============*/
4972
 
{
4973
 
        ulint   i;
4974
 
 
4975
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4976
 
                buf_pool_t*     buf_pool;
4977
 
 
4978
 
                buf_pool = buf_pool_from_array(i);
4979
 
 
4980
 
                if (!buf_all_freed_instance(buf_pool)) {
4981
 
                        return(FALSE);
4982
 
                }
4983
 
        }
4984
 
 
4985
 
        return(TRUE);
4986
 
}
4987
 
  
4988
 
/*********************************************************************//**
4989
 
Checks that there currently are no pending i/o-operations for the buffer
4990
 
pool.
4991
 
@return TRUE if there is no pending i/o */
4992
 
UNIV_INTERN
4993
 
ibool
4994
 
buf_pool_check_no_pending_io(void)
4995
 
/*==============================*/
4996
 
{
4997
 
        ulint           i;
4998
 
        ibool           ret = TRUE;
4999
 
 
5000
 
        buf_pool_mutex_enter_all();
5001
 
 
5002
 
        for (i = 0; i < srv_buf_pool_instances && ret; i++) {
5003
 
                const buf_pool_t*       buf_pool;
5004
 
 
5005
 
                buf_pool = buf_pool_from_array(i);
5006
 
 
5007
 
                if (buf_pool->n_pend_reads
5008
 
                    + buf_pool->n_flush[BUF_FLUSH_LRU]
5009
 
                    + buf_pool->n_flush[BUF_FLUSH_LIST]
5010
 
                    + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
5011
 
 
5012
 
                        ret = FALSE;
5013
 
                }
5014
 
        }
5015
 
 
5016
 
        buf_pool_mutex_exit_all();
5017
 
 
5018
 
        return(ret);
5019
 
}
5020
 
 
5021
 
#if 0
5022
 
Code currently not used
5023
 
/*********************************************************************//**
5024
 
Gets the current length of the free list of buffer blocks.
5025
 
@return length of the free list */
5026
 
UNIV_INTERN
5027
 
ulint
5028
 
buf_get_free_list_len(void)
5029
 
/*=======================*/
5030
 
{
5031
 
        ulint   len;
5032
 
 
5033
 
        buf_pool_mutex_enter(buf_pool);
5034
 
 
5035
 
        len = UT_LIST_GET_LEN(buf_pool->free);
5036
 
 
5037
 
        buf_pool_mutex_exit(buf_pool);
5038
 
 
5039
 
        return(len);
5040
 
}
5041
 
#endif
5042
 
 
5043
 
#else /* !UNIV_HOTBACKUP */
5044
 
/********************************************************************//**
5045
 
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
5046
 
UNIV_INTERN
5047
 
void
5048
 
buf_page_init_for_backup_restore(
5049
 
/*=============================*/
5050
 
        ulint           space,  /*!< in: space id */
5051
 
        ulint           offset, /*!< in: offset of the page within space
5052
 
                                in units of a page */
5053
 
        ulint           zip_size,/*!< in: compressed page size in bytes
5054
 
                                or 0 for uncompressed pages */
5055
 
        buf_block_t*    block)  /*!< in: block to init */
5056
 
{
5057
 
        block->page.state       = BUF_BLOCK_FILE_PAGE;
5058
 
        block->page.space       = space;
5059
 
        block->page.offset      = offset;
5060
 
 
5061
 
        page_zip_des_init(&block->page.zip);
5062
 
 
5063
 
        /* We assume that block->page.data has been allocated
5064
 
        with zip_size == UNIV_PAGE_SIZE. */
5065
 
        ut_ad(zip_size <= UNIV_PAGE_SIZE);
5066
 
        ut_ad(ut_is_2pow(zip_size));
5067
 
        page_zip_set_size(&block->page.zip, zip_size);
5068
 
        if (zip_size) {
5069
 
                block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
5070
 
        }
5071
 
}
5072
 
#endif /* !UNIV_HOTBACKUP */