~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/innobase/buf/buf0buf.cc

  • Committer: Stewart Smith
  • Date: 2008-10-15 04:21:24 UTC
  • mto: This revision was merged to the branch mainline in revision 516.
  • Revision ID: stewart@flamingspork.com-20081015042124-kdmb74bcbky1k1nz
remove my_pthread_[gs]etspecific

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*****************************************************************************
2
 
 
3
 
Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
4
 
Copyright (C) 2008, Google Inc.
5
 
 
6
 
Portions of this file contain modifications contributed and copyrighted by
7
 
Google, Inc. Those modifications are gratefully acknowledged and are described
8
 
briefly in the InnoDB documentation. The contributions by Google are
9
 
incorporated with their permission, and subject to the conditions contained in
10
 
the file COPYING.Google.
11
 
 
12
 
This program is free software; you can redistribute it and/or modify it under
13
 
the terms of the GNU General Public License as published by the Free Software
14
 
Foundation; version 2 of the License.
15
 
 
16
 
This program is distributed in the hope that it will be useful, but WITHOUT
17
 
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
 
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19
 
 
20
 
You should have received a copy of the GNU General Public License along with
21
 
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22
 
St, Fifth Floor, Boston, MA 02110-1301 USA
23
 
 
24
 
*****************************************************************************/
25
 
 
26
 
/**************************************************//**
27
 
@file buf/buf0buf.c
28
 
The database buffer buf_pool
29
 
 
30
 
Created 11/5/1995 Heikki Tuuri
31
 
*******************************************************/
32
 
 
33
 
#include "buf0buf.h"
34
 
 
35
 
#ifdef UNIV_NONINL
36
 
#include "buf0buf.ic"
37
 
#endif
38
 
 
39
 
#include "mem0mem.h"
40
 
#include "btr0btr.h"
41
 
#include "fil0fil.h"
42
 
#ifndef UNIV_HOTBACKUP
43
 
#include "buf0buddy.h"
44
 
#include "lock0lock.h"
45
 
#include "btr0sea.h"
46
 
#include "ibuf0ibuf.h"
47
 
#include "trx0undo.h"
48
 
#include "log0log.h"
49
 
#endif /* !UNIV_HOTBACKUP */
50
 
#include "srv0srv.h"
51
 
#include "dict0dict.h"
52
 
#include "log0recv.h"
53
 
#include "page0zip.h"
54
 
 
55
 
#include <drizzled/errmsg_print.h>
56
 
 
57
 
/*
58
 
                IMPLEMENTATION OF THE BUFFER POOL
59
 
                =================================
60
 
 
61
 
Performance improvement:
62
 
------------------------
63
 
Thread scheduling in NT may be so slow that the OS wait mechanism should
64
 
not be used even in waiting for disk reads to complete.
65
 
Rather, we should put waiting query threads to the queue of
66
 
waiting jobs, and let the OS thread do something useful while the i/o
67
 
is processed. In this way we could remove most OS thread switches in
68
 
an i/o-intensive benchmark like TPC-C.
69
 
 
70
 
A possibility is to put a user space thread library between the database
71
 
and NT. User space thread libraries might be very fast.
72
 
 
73
 
SQL Server 7.0 can be configured to use 'fibers' which are lightweight
74
 
threads in NT. These should be studied.
75
 
 
76
 
                Buffer frames and blocks
77
 
                ------------------------
78
 
Following the terminology of Gray and Reuter, we call the memory
79
 
blocks where file pages are loaded buffer frames. For each buffer
80
 
frame there is a control block, or shortly, a block, in the buffer
81
 
control array. The control info which does not need to be stored
82
 
in the file along with the file page, resides in the control block.
83
 
 
84
 
                Buffer pool struct
85
 
                ------------------
86
 
The buffer buf_pool contains a single mutex which protects all the
87
 
control data structures of the buf_pool. The content of a buffer frame is
88
 
protected by a separate read-write lock in its control block, though.
89
 
These locks can be locked and unlocked without owning the buf_pool->mutex.
90
 
The OS events in the buf_pool struct can be waited for without owning the
91
 
buf_pool->mutex.
92
 
 
93
 
The buf_pool->mutex is a hot-spot in main memory, causing a lot of
94
 
memory bus traffic on multiprocessor systems when processors
95
 
alternately access the mutex. On our Pentium, the mutex is accessed
96
 
maybe every 10 microseconds. We gave up the solution to have mutexes
97
 
for each control block, for instance, because it seemed to be
98
 
complicated.
99
 
 
100
 
A solution to reduce mutex contention of the buf_pool->mutex is to
101
 
create a separate mutex for the page hash table. On Pentium,
102
 
accessing the hash table takes 2 microseconds, about half
103
 
of the total buf_pool->mutex hold time.
104
 
 
105
 
                Control blocks
106
 
                --------------
107
 
 
108
 
The control block contains, for instance, the bufferfix count
109
 
which is incremented when a thread wants a file page to be fixed
110
 
in a buffer frame. The bufferfix operation does not lock the
111
 
contents of the frame, however. For this purpose, the control
112
 
block contains a read-write lock.
113
 
 
114
 
The buffer frames have to be aligned so that the start memory
115
 
address of a frame is divisible by the universal page size, which
116
 
is a power of two.
117
 
 
118
 
We intend to make the buffer buf_pool size on-line reconfigurable,
119
 
that is, the buf_pool size can be changed without closing the database.
120
 
Then the database administarator may adjust it to be bigger
121
 
at night, for example. The control block array must
122
 
contain enough control blocks for the maximum buffer buf_pool size
123
 
which is used in the particular database.
124
 
If the buf_pool size is cut, we exploit the virtual memory mechanism of
125
 
the OS, and just refrain from using frames at high addresses. Then the OS
126
 
can swap them to disk.
127
 
 
128
 
The control blocks containing file pages are put to a hash table
129
 
according to the file address of the page.
130
 
We could speed up the access to an individual page by using
131
 
"pointer swizzling": we could replace the page references on
132
 
non-leaf index pages by direct pointers to the page, if it exists
133
 
in the buf_pool. We could make a separate hash table where we could
134
 
chain all the page references in non-leaf pages residing in the buf_pool,
135
 
using the page reference as the hash key,
136
 
and at the time of reading of a page update the pointers accordingly.
137
 
Drawbacks of this solution are added complexity and,
138
 
possibly, extra space required on non-leaf pages for memory pointers.
139
 
A simpler solution is just to speed up the hash table mechanism
140
 
in the database, using tables whose size is a power of 2.
141
 
 
142
 
                Lists of blocks
143
 
                ---------------
144
 
 
145
 
There are several lists of control blocks.
146
 
 
147
 
The free list (buf_pool->free) contains blocks which are currently not
148
 
used.
149
 
 
150
 
The common LRU list contains all the blocks holding a file page
151
 
except those for which the bufferfix count is non-zero.
152
 
The pages are in the LRU list roughly in the order of the last
153
 
access to the page, so that the oldest pages are at the end of the
154
 
list. We also keep a pointer to near the end of the LRU list,
155
 
which we can use when we want to artificially age a page in the
156
 
buf_pool. This is used if we know that some page is not needed
157
 
again for some time: we insert the block right after the pointer,
158
 
causing it to be replaced sooner than would normally be the case.
159
 
Currently this aging mechanism is used for read-ahead mechanism
160
 
of pages, and it can also be used when there is a scan of a full
161
 
table which cannot fit in the memory. Putting the pages near the
162
 
end of the LRU list, we make sure that most of the buf_pool stays
163
 
in the main memory, undisturbed.
164
 
 
165
 
The unzip_LRU list contains a subset of the common LRU list.  The
166
 
blocks on the unzip_LRU list hold a compressed file page and the
167
 
corresponding uncompressed page frame.  A block is in unzip_LRU if and
168
 
only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
169
 
holds.  The blocks in unzip_LRU will be in same order as they are in
170
 
the common LRU list.  That is, each manipulation of the common LRU
171
 
list will result in the same manipulation of the unzip_LRU list.
172
 
 
173
 
The chain of modified blocks (buf_pool->flush_list) contains the blocks
174
 
holding file pages that have been modified in the memory
175
 
but not written to disk yet. The block with the oldest modification
176
 
which has not yet been written to disk is at the end of the chain.
177
 
The access to this list is protected by buf_pool->flush_list_mutex.
178
 
 
179
 
The chain of unmodified compressed blocks (buf_pool->zip_clean)
180
 
contains the control blocks (buf_page_t) of those compressed pages
181
 
that are not in buf_pool->flush_list and for which no uncompressed
182
 
page has been allocated in the buffer pool.  The control blocks for
183
 
uncompressed pages are accessible via buf_block_t objects that are
184
 
reachable via buf_pool->chunks[].
185
 
 
186
 
The chains of free memory blocks (buf_pool->zip_free[]) are used by
187
 
the buddy allocator (buf0buddy.c) to keep track of currently unused
188
 
memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2.  These
189
 
blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
190
 
BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
191
 
pool.  The buddy allocator is solely used for allocating control
192
 
blocks for compressed pages (buf_page_t) and compressed page frames.
193
 
 
194
 
                Loading a file page
195
 
                -------------------
196
 
 
197
 
First, a victim block for replacement has to be found in the
198
 
buf_pool. It is taken from the free list or searched for from the
199
 
end of the LRU-list. An exclusive lock is reserved for the frame,
200
 
the io_fix field is set in the block fixing the block in buf_pool,
201
 
and the io-operation for loading the page is queued. The io-handler thread
202
 
releases the X-lock on the frame and resets the io_fix field
203
 
when the io operation completes.
204
 
 
205
 
A thread may request the above operation using the function
206
 
buf_page_get(). It may then continue to request a lock on the frame.
207
 
The lock is granted when the io-handler releases the x-lock.
208
 
 
209
 
                Read-ahead
210
 
                ----------
211
 
 
212
 
The read-ahead mechanism is intended to be intelligent and
213
 
isolated from the semantically higher levels of the database
214
 
index management. From the higher level we only need the
215
 
information if a file page has a natural successor or
216
 
predecessor page. On the leaf level of a B-tree index,
217
 
these are the next and previous pages in the natural
218
 
order of the pages.
219
 
 
220
 
Let us first explain the read-ahead mechanism when the leafs
221
 
of a B-tree are scanned in an ascending or descending order.
222
 
When a read page is the first time referenced in the buf_pool,
223
 
the buffer manager checks if it is at the border of a so-called
224
 
linear read-ahead area. The tablespace is divided into these
225
 
areas of size 64 blocks, for example. So if the page is at the
226
 
border of such an area, the read-ahead mechanism checks if
227
 
all the other blocks in the area have been accessed in an
228
 
ascending or descending order. If this is the case, the system
229
 
looks at the natural successor or predecessor of the page,
230
 
checks if that is at the border of another area, and in this case
231
 
issues read-requests for all the pages in that area. Maybe
232
 
we could relax the condition that all the pages in the area
233
 
have to be accessed: if data is deleted from a table, there may
234
 
appear holes of unused pages in the area.
235
 
 
236
 
A different read-ahead mechanism is used when there appears
237
 
to be a random access pattern to a file.
238
 
If a new page is referenced in the buf_pool, and several pages
239
 
of its random access area (for instance, 32 consecutive pages
240
 
in a tablespace) have recently been referenced, we may predict
241
 
that the whole area may be needed in the near future, and issue
242
 
the read requests for the whole area.
243
 
*/
244
 
 
245
 
#ifndef UNIV_HOTBACKUP
246
 
/** Value in microseconds */
247
 
static const int WAIT_FOR_READ  = 5000;
248
 
/** Number of attemtps made to read in a page in the buffer pool */
249
 
static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
250
 
 
251
 
/** The buffer pools of the database */
252
 
UNIV_INTERN buf_pool_t* buf_pool_ptr;
253
 
 
254
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
255
 
static ulint    buf_dbg_counter = 0; /*!< This is used to insert validation
256
 
                                        operations in execution in the
257
 
                                        debug version */
258
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
259
 
#ifdef UNIV_DEBUG
260
 
/** If this is set TRUE, the program prints info whenever
261
 
read-ahead or flush occurs */
262
 
UNIV_INTERN ibool               buf_debug_prints = FALSE;
263
 
#endif /* UNIV_DEBUG */
264
 
 
265
 
#ifdef UNIV_PFS_RWLOCK
266
 
/* Keys to register buffer block related rwlocks and mutexes with
267
 
performance schema */
268
 
UNIV_INTERN mysql_pfs_key_t     buf_block_lock_key;
269
 
# ifdef UNIV_SYNC_DEBUG
270
 
UNIV_INTERN mysql_pfs_key_t     buf_block_debug_latch_key;
271
 
# endif /* UNIV_SYNC_DEBUG */
272
 
#endif /* UNIV_PFS_RWLOCK */
273
 
 
274
 
#ifdef UNIV_PFS_MUTEX
275
 
UNIV_INTERN mysql_pfs_key_t     buffer_block_mutex_key;
276
 
UNIV_INTERN mysql_pfs_key_t     buf_pool_mutex_key;
277
 
UNIV_INTERN mysql_pfs_key_t     buf_pool_zip_mutex_key;
278
 
UNIV_INTERN mysql_pfs_key_t     flush_list_mutex_key;
279
 
#endif /* UNIV_PFS_MUTEX */
280
 
 
281
 
#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
282
 
# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
283
 
 
284
 
/* Buffer block mutexes and rwlocks can be registered
285
 
in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
286
 
is defined, register buffer block mutex and rwlock
287
 
in one group after their initialization. */
288
 
#  define PFS_GROUP_BUFFER_SYNC
289
 
 
290
 
/* This define caps the number of mutexes/rwlocks can
291
 
be registered with performance schema. Developers can
292
 
modify this define if necessary. Please note, this would
293
 
be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
294
 
#  define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER    ULINT_MAX
295
 
 
296
 
# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
297
 
#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
298
 
 
299
 
/** A chunk of buffers.  The buffer pool is allocated in chunks. */
300
 
struct buf_chunk_struct{
301
 
        ulint           mem_size;       /*!< allocated size of the chunk */
302
 
        ulint           size;           /*!< size of frames[] and blocks[] */
303
 
        void*           mem;            /*!< pointer to the memory area which
304
 
                                        was allocated for the frames */
305
 
        buf_block_t*    blocks;         /*!< array of buffer control blocks */
306
 
};
307
 
#endif /* !UNIV_HOTBACKUP */
308
 
 
309
 
/********************************************************************//**
310
 
Gets the smallest oldest_modification lsn for any page in the pool. Returns
311
 
zero if all modified pages have been flushed to disk.
312
 
@return oldest modification in pool, zero if none */
313
 
UNIV_INTERN
314
 
ib_uint64_t
315
 
buf_pool_get_oldest_modification(void)
316
 
/*==================================*/
317
 
{
318
 
        ulint           i;
319
 
        buf_page_t*     bpage;
320
 
        ib_uint64_t     lsn = 0;
321
 
        ib_uint64_t     oldest_lsn = 0;
322
 
 
323
 
        /* When we traverse all the flush lists we don't want another
324
 
        thread to add a dirty page to any flush list. */
325
 
        log_flush_order_mutex_enter();
326
 
 
327
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
328
 
                buf_pool_t*     buf_pool;
329
 
 
330
 
                buf_pool = buf_pool_from_array(i);
331
 
 
332
 
                buf_flush_list_mutex_enter(buf_pool);
333
 
 
334
 
                bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
335
 
 
336
 
                if (bpage != NULL) {
337
 
                        ut_ad(bpage->in_flush_list);
338
 
                        lsn = bpage->oldest_modification;
339
 
                }
340
 
 
341
 
                buf_flush_list_mutex_exit(buf_pool);
342
 
 
343
 
                if (!oldest_lsn || oldest_lsn > lsn) {
344
 
                        oldest_lsn = lsn;
345
 
                }
346
 
        }
347
 
 
348
 
        log_flush_order_mutex_exit();
349
 
 
350
 
        /* The returned answer may be out of date: the flush_list can
351
 
        change after the mutex has been released. */
352
 
 
353
 
        return(oldest_lsn);
354
 
}
355
 
 
356
 
/********************************************************************//**
357
 
Get total buffer pool statistics. */
358
 
UNIV_INTERN
359
 
void
360
 
buf_get_total_list_len(
361
 
/*===================*/
362
 
        ulint*          LRU_len,        /*!< out: length of all LRU lists */
363
 
        ulint*          free_len,       /*!< out: length of all free lists */
364
 
        ulint*          flush_list_len) /*!< out: length of all flush lists */
365
 
{
366
 
        ulint           i;
367
 
 
368
 
        *LRU_len = 0;
369
 
        *free_len = 0;
370
 
        *flush_list_len = 0;
371
 
 
372
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
373
 
                buf_pool_t*     buf_pool;
374
 
 
375
 
                buf_pool = buf_pool_from_array(i);
376
 
                *LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
377
 
                *free_len += UT_LIST_GET_LEN(buf_pool->free);
378
 
                *flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
379
 
        }
380
 
}
381
 
 
382
 
/********************************************************************//**
383
 
Get total buffer pool statistics. */
384
 
UNIV_INTERN
385
 
void
386
 
buf_get_total_stat(
387
 
/*===============*/
388
 
        buf_pool_stat_t*        tot_stat)       /*!< out: buffer pool stats */
389
 
{
390
 
        ulint                   i;
391
 
 
392
 
        memset(tot_stat, 0, sizeof(*tot_stat));
393
 
 
394
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
395
 
                buf_pool_stat_t*buf_stat;
396
 
                buf_pool_t*     buf_pool;
397
 
 
398
 
                buf_pool = buf_pool_from_array(i);
399
 
 
400
 
                buf_stat = &buf_pool->stat;
401
 
                tot_stat->n_page_gets += buf_stat->n_page_gets;
402
 
                tot_stat->n_pages_read += buf_stat->n_pages_read;
403
 
                tot_stat->n_pages_written += buf_stat->n_pages_written;
404
 
                tot_stat->n_pages_created += buf_stat->n_pages_created;
405
 
                tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
406
 
                tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
407
 
                tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
408
 
 
409
 
                tot_stat->n_pages_not_made_young +=
410
 
                        buf_stat->n_pages_not_made_young;
411
 
        }
412
 
}
413
 
 
414
 
/********************************************************************//**
415
 
Allocates a buffer block.
416
 
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
417
 
UNIV_INTERN
418
 
buf_block_t*
419
 
buf_block_alloc(
420
 
/*============*/
421
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
422
 
        ulint           zip_size)       /*!< in: compressed page size in bytes,
423
 
                                        or 0 if uncompressed tablespace */
424
 
{
425
 
        buf_block_t*    block;
426
 
        ulint           index;
427
 
        static ulint    buf_pool_index;
428
 
 
429
 
        if (buf_pool == NULL) {
430
 
                /* We are allocating memory from any buffer pool, ensure
431
 
                we spread the grace on all buffer pool instances. */
432
 
                index = buf_pool_index++ % srv_buf_pool_instances;
433
 
                buf_pool = buf_pool_from_array(index);
434
 
        }
435
 
 
436
 
        block = buf_LRU_get_free_block(buf_pool, zip_size);
437
 
 
438
 
        buf_block_set_state(block, BUF_BLOCK_MEMORY);
439
 
 
440
 
        return(block);
441
 
}
442
 
 
443
 
/********************************************************************//**
444
 
Calculates a page checksum which is stored to the page when it is written
445
 
to a file. Note that we must be careful to calculate the same value on
446
 
32-bit and 64-bit architectures.
447
 
@return checksum */
448
 
UNIV_INTERN
449
 
ulint
450
 
buf_calc_page_new_checksum(
451
 
/*=======================*/
452
 
        const byte*     page)   /*!< in: buffer page */
453
 
{
454
 
        ulint checksum;
455
 
 
456
 
        /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
457
 
        ..._ARCH_LOG_NO, are written outside the buffer pool to the first
458
 
        pages of data files, we have to skip them in the page checksum
459
 
        calculation.
460
 
        We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
461
 
        checksum is stored, and also the last 8 bytes of page because
462
 
        there we store the old formula checksum. */
463
 
 
464
 
        checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
465
 
                                  FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
466
 
                + ut_fold_binary(page + FIL_PAGE_DATA,
467
 
                                 UNIV_PAGE_SIZE - FIL_PAGE_DATA
468
 
                                 - FIL_PAGE_END_LSN_OLD_CHKSUM);
469
 
        checksum = checksum & 0xFFFFFFFFUL;
470
 
 
471
 
        return(checksum);
472
 
}
473
 
 
474
 
/********************************************************************//**
475
 
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
476
 
looked at the first few bytes of the page. This calculates that old
477
 
checksum.
478
 
NOTE: we must first store the new formula checksum to
479
 
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
480
 
because this takes that field as an input!
481
 
@return checksum */
482
 
UNIV_INTERN
483
 
ulint
484
 
buf_calc_page_old_checksum(
485
 
/*=======================*/
486
 
        const byte*     page)   /*!< in: buffer page */
487
 
{
488
 
        ulint checksum;
489
 
 
490
 
        checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
491
 
 
492
 
        checksum = checksum & 0xFFFFFFFFUL;
493
 
 
494
 
        return(checksum);
495
 
}
496
 
 
497
 
/********************************************************************//**
498
 
Checks if a page is corrupt.
499
 
@return TRUE if corrupted */
500
 
UNIV_INTERN
501
 
ibool
502
 
buf_page_is_corrupted(
503
 
/*==================*/
504
 
        const byte*     read_buf,       /*!< in: a database page */
505
 
        ulint           zip_size)       /*!< in: size of compressed page;
506
 
                                        0 for uncompressed pages */
507
 
{
508
 
        ulint           checksum_field;
509
 
        ulint           old_checksum_field;
510
 
 
511
 
        if (UNIV_LIKELY(!zip_size)
512
 
            && memcmp(read_buf + FIL_PAGE_LSN + 4,
513
 
                      read_buf + UNIV_PAGE_SIZE
514
 
                      - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
515
 
 
516
 
                /* Stored log sequence numbers at the start and the end
517
 
                of page do not match */
518
 
 
519
 
                return(TRUE);
520
 
        }
521
 
 
522
 
#ifndef UNIV_HOTBACKUP
523
 
        if (recv_lsn_checks_on) {
524
 
                ib_uint64_t     current_lsn;
525
 
 
526
 
                if (log_peek_lsn(&current_lsn)
527
 
                    && UNIV_UNLIKELY
528
 
                    (current_lsn
529
 
                     < mach_read_from_8(read_buf + FIL_PAGE_LSN))) {
530
 
                        ut_print_timestamp(stderr);
531
 
 
532
 
                        drizzled::errmsg_printf(drizzled::error::INFO,
533
 
                                                "InnoDB: Error: page %lu log sequence number %"PRIu64". "
534
 
                                                "InnoDB: is in the future! Current system log sequence number %"PRIu64". "
535
 
                                                "Your database may be corrupt or you may have copied the InnoDB tablespace but not the InnoDB log files. See "
536
 
                                                " " REFMAN "forcing-recovery.html for more information. ",
537
 
                                                (ulong) mach_read_from_4(read_buf
538
 
                                                                         + FIL_PAGE_OFFSET),
539
 
                                                mach_read_from_8(read_buf + FIL_PAGE_LSN),
540
 
                                                current_lsn);
541
 
                }
542
 
        }
543
 
#endif
544
 
 
545
 
        /* If we use checksums validation, make additional check before
546
 
        returning TRUE to ensure that the checksum is not equal to
547
 
        BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
548
 
        disabled. Otherwise, skip checksum calculation and return FALSE */
549
 
 
550
 
        if (UNIV_LIKELY(srv_use_checksums)) {
551
 
                checksum_field = mach_read_from_4(read_buf
552
 
                                                  + FIL_PAGE_SPACE_OR_CHKSUM);
553
 
 
554
 
                if (UNIV_UNLIKELY(zip_size)) {
555
 
                        return(checksum_field != BUF_NO_CHECKSUM_MAGIC
556
 
                               && checksum_field
557
 
                               != page_zip_calc_checksum(read_buf, zip_size));
558
 
                }
559
 
 
560
 
                old_checksum_field = mach_read_from_4(
561
 
                        read_buf + UNIV_PAGE_SIZE
562
 
                        - FIL_PAGE_END_LSN_OLD_CHKSUM);
563
 
 
564
 
                /* There are 2 valid formulas for old_checksum_field:
565
 
 
566
 
                1. Very old versions of InnoDB only stored 8 byte lsn to the
567
 
                start and the end of the page.
568
 
 
569
 
                2. Newer InnoDB versions store the old formula checksum
570
 
                there. */
571
 
 
572
 
                if (old_checksum_field != mach_read_from_4(read_buf
573
 
                                                           + FIL_PAGE_LSN)
574
 
                    && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
575
 
                    && old_checksum_field
576
 
                    != buf_calc_page_old_checksum(read_buf)) {
577
 
 
578
 
                        return(TRUE);
579
 
                }
580
 
 
581
 
                /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
582
 
                (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
583
 
 
584
 
                if (checksum_field != 0
585
 
                    && checksum_field != BUF_NO_CHECKSUM_MAGIC
586
 
                    && checksum_field
587
 
                    != buf_calc_page_new_checksum(read_buf)) {
588
 
 
589
 
                        return(TRUE);
590
 
                }
591
 
        }
592
 
 
593
 
        return(FALSE);
594
 
}
595
 
 
596
 
/********************************************************************//**
597
 
Prints a page to stderr. */
598
 
UNIV_INTERN
599
 
void
600
 
buf_page_print(
601
 
/*===========*/
602
 
        const byte*     read_buf,       /*!< in: a database page */
603
 
        ulint           zip_size)       /*!< in: compressed page size, or
604
 
                                0 for uncompressed pages */
605
 
{
606
 
#ifndef UNIV_HOTBACKUP
607
 
        dict_index_t*   index;
608
 
#endif /* !UNIV_HOTBACKUP */
609
 
        ulint           checksum;
610
 
        ulint           old_checksum;
611
 
        ulint           size    = zip_size;
612
 
 
613
 
        if (!size) {
614
 
                size = UNIV_PAGE_SIZE;
615
 
        }
616
 
 
617
 
        ut_print_timestamp(stderr);
618
 
        fprintf(stderr, "  InnoDB: Page dump in ascii and hex (%lu bytes):\n",
619
 
                (ulong) size);
620
 
        ut_print_buf(stderr, read_buf, size);
621
 
        fputs("\nInnoDB: End of page dump\n", stderr);
622
 
 
623
 
        if (zip_size) {
624
 
                /* Print compressed page. */
625
 
 
626
 
                switch (fil_page_get_type(read_buf)) {
627
 
                case FIL_PAGE_TYPE_ZBLOB:
628
 
                case FIL_PAGE_TYPE_ZBLOB2:
629
 
                        checksum = srv_use_checksums
630
 
                                ? page_zip_calc_checksum(read_buf, zip_size)
631
 
                                : BUF_NO_CHECKSUM_MAGIC;
632
 
                        ut_print_timestamp(stderr);
633
 
                        fprintf(stderr,
634
 
                                "  InnoDB: Compressed BLOB page"
635
 
                                " checksum %lu, stored %lu\n"
636
 
                                "InnoDB: Page lsn %lu %lu\n"
637
 
                                "InnoDB: Page number (if stored"
638
 
                                " to page already) %lu,\n"
639
 
                                "InnoDB: space id (if stored"
640
 
                                " to page already) %lu\n",
641
 
                                (ulong) checksum,
642
 
                                (ulong) mach_read_from_4(
643
 
                                        read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
644
 
                                (ulong) mach_read_from_4(
645
 
                                        read_buf + FIL_PAGE_LSN),
646
 
                                (ulong) mach_read_from_4(
647
 
                                        read_buf + (FIL_PAGE_LSN + 4)),
648
 
                                (ulong) mach_read_from_4(
649
 
                                        read_buf + FIL_PAGE_OFFSET),
650
 
                                (ulong) mach_read_from_4(
651
 
                                        read_buf
652
 
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
653
 
                        return;
654
 
                default:
655
 
                        ut_print_timestamp(stderr);
656
 
                        fprintf(stderr,
657
 
                                "  InnoDB: unknown page type %lu,"
658
 
                                " assuming FIL_PAGE_INDEX\n",
659
 
                                fil_page_get_type(read_buf));
660
 
                        /* fall through */
661
 
                case FIL_PAGE_INDEX:
662
 
                        checksum = srv_use_checksums
663
 
                                ? page_zip_calc_checksum(read_buf, zip_size)
664
 
                                : BUF_NO_CHECKSUM_MAGIC;
665
 
 
666
 
                        ut_print_timestamp(stderr);
667
 
                        fprintf(stderr,
668
 
                                "  InnoDB: Compressed page checksum %lu,"
669
 
                                " stored %lu\n"
670
 
                                "InnoDB: Page lsn %lu %lu\n"
671
 
                                "InnoDB: Page number (if stored"
672
 
                                " to page already) %lu,\n"
673
 
                                "InnoDB: space id (if stored"
674
 
                                " to page already) %lu\n",
675
 
                                (ulong) checksum,
676
 
                                (ulong) mach_read_from_4(
677
 
                                        read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
678
 
                                (ulong) mach_read_from_4(
679
 
                                        read_buf + FIL_PAGE_LSN),
680
 
                                (ulong) mach_read_from_4(
681
 
                                        read_buf + (FIL_PAGE_LSN + 4)),
682
 
                                (ulong) mach_read_from_4(
683
 
                                        read_buf + FIL_PAGE_OFFSET),
684
 
                                (ulong) mach_read_from_4(
685
 
                                        read_buf
686
 
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
687
 
                        return;
688
 
                case FIL_PAGE_TYPE_XDES:
689
 
                        /* This is an uncompressed page. */
690
 
                        break;
691
 
                }
692
 
        }
693
 
 
694
 
        checksum = srv_use_checksums
695
 
                ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
696
 
        old_checksum = srv_use_checksums
697
 
                ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
698
 
 
699
 
        ut_print_timestamp(stderr);
700
 
        fprintf(stderr,
701
 
                "  InnoDB: Page checksum %lu, prior-to-4.0.14-form"
702
 
                " checksum %lu\n"
703
 
                "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
704
 
                " stored checksum %lu\n"
705
 
                "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
706
 
                " at page end %lu\n"
707
 
                "InnoDB: Page number (if stored to page already) %lu,\n"
708
 
                "InnoDB: space id (if created with >= MySQL-4.1.1"
709
 
                " and stored already) %lu\n",
710
 
                (ulong) checksum, (ulong) old_checksum,
711
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
712
 
                (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
713
 
                                         - FIL_PAGE_END_LSN_OLD_CHKSUM),
714
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
715
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
716
 
                (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
717
 
                                         - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
718
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
719
 
                (ulong) mach_read_from_4(read_buf
720
 
                                         + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
721
 
 
722
 
#ifndef UNIV_HOTBACKUP
723
 
        if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
724
 
            == TRX_UNDO_INSERT) {
725
 
                fprintf(stderr,
726
 
                        "InnoDB: Page may be an insert undo log page\n");
727
 
        } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
728
 
                                    + TRX_UNDO_PAGE_TYPE)
729
 
                   == TRX_UNDO_UPDATE) {
730
 
                fprintf(stderr,
731
 
                        "InnoDB: Page may be an update undo log page\n");
732
 
        }
733
 
#endif /* !UNIV_HOTBACKUP */
734
 
 
735
 
        switch (fil_page_get_type(read_buf)) {
736
 
                index_id_t      index_id;
737
 
        case FIL_PAGE_INDEX:
738
 
                index_id = btr_page_get_index_id(read_buf);
739
 
                fprintf(stderr,
740
 
                        "InnoDB: Page may be an index page where"
741
 
                        " index id is %llu\n",
742
 
                        (ullint) index_id);
743
 
#ifndef UNIV_HOTBACKUP
744
 
                index = dict_index_find_on_id_low(index_id);
745
 
                if (index) {
746
 
                        fputs("InnoDB: (", stderr);
747
 
                        dict_index_name_print(stderr, NULL, index);
748
 
                        fputs(")\n", stderr);
749
 
                }
750
 
#endif /* !UNIV_HOTBACKUP */
751
 
                break;
752
 
        case FIL_PAGE_INODE:
753
 
                fputs("InnoDB: Page may be an 'inode' page\n", stderr);
754
 
                break;
755
 
        case FIL_PAGE_IBUF_FREE_LIST:
756
 
                fputs("InnoDB: Page may be an insert buffer free list page\n",
757
 
                      stderr);
758
 
                break;
759
 
        case FIL_PAGE_TYPE_ALLOCATED:
760
 
                fputs("InnoDB: Page may be a freshly allocated page\n",
761
 
                      stderr);
762
 
                break;
763
 
        case FIL_PAGE_IBUF_BITMAP:
764
 
                fputs("InnoDB: Page may be an insert buffer bitmap page\n",
765
 
                      stderr);
766
 
                break;
767
 
        case FIL_PAGE_TYPE_SYS:
768
 
                fputs("InnoDB: Page may be a system page\n",
769
 
                      stderr);
770
 
                break;
771
 
        case FIL_PAGE_TYPE_TRX_SYS:
772
 
                fputs("InnoDB: Page may be a transaction system page\n",
773
 
                      stderr);
774
 
                break;
775
 
        case FIL_PAGE_TYPE_FSP_HDR:
776
 
                fputs("InnoDB: Page may be a file space header page\n",
777
 
                      stderr);
778
 
                break;
779
 
        case FIL_PAGE_TYPE_XDES:
780
 
                fputs("InnoDB: Page may be an extent descriptor page\n",
781
 
                      stderr);
782
 
                break;
783
 
        case FIL_PAGE_TYPE_BLOB:
784
 
                fputs("InnoDB: Page may be a BLOB page\n",
785
 
                      stderr);
786
 
                break;
787
 
        case FIL_PAGE_TYPE_ZBLOB:
788
 
        case FIL_PAGE_TYPE_ZBLOB2:
789
 
                fputs("InnoDB: Page may be a compressed BLOB page\n",
790
 
                      stderr);
791
 
                break;
792
 
        }
793
 
}
794
 
 
795
 
#ifndef UNIV_HOTBACKUP
796
 
 
797
 
# ifdef PFS_GROUP_BUFFER_SYNC
798
 
/********************************************************************//**
799
 
This function registers mutexes and rwlocks in buffer blocks with
800
 
performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
801
 
defined to be a value less than chunk->size, then only mutexes
802
 
and rwlocks in the first PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER
803
 
blocks are registered. */
804
 
static
805
 
void
806
 
pfs_register_buffer_block(
807
 
/*======================*/
808
 
        buf_chunk_t*    chunk)          /*!< in/out: chunk of buffers */
809
 
{
810
 
        ulint           i;
811
 
        ulint           num_to_register;
812
 
        buf_block_t*    block;
813
 
 
814
 
        block = chunk->blocks;
815
 
 
816
 
        num_to_register = ut_min(chunk->size,
817
 
                                 PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
818
 
 
819
 
        for (i = 0; i < num_to_register; i++) {
820
 
                mutex_t*        mutex;
821
 
                rw_lock_t*      rwlock;
822
 
 
823
 
#  ifdef UNIV_PFS_MUTEX
824
 
                mutex = &block->mutex;
825
 
                ut_a(!mutex->pfs_psi);
826
 
                mutex->pfs_psi = (PSI_server)
827
 
                        ? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
828
 
                        : NULL;
829
 
#  endif /* UNIV_PFS_MUTEX */
830
 
 
831
 
#  ifdef UNIV_PFS_RWLOCK
832
 
                rwlock = &block->lock;
833
 
                ut_a(!rwlock->pfs_psi);
834
 
                rwlock->pfs_psi = (PSI_server)
835
 
                        ? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
836
 
                        : NULL;
837
 
#  endif /* UNIV_PFS_RWLOCK */
838
 
                block++;
839
 
        }
840
 
}
841
 
# endif /* PFS_GROUP_BUFFER_SYNC */
842
 
 
843
 
/********************************************************************//**
844
 
Initializes a buffer control block when the buf_pool is created. */
845
 
static
846
 
void
847
 
buf_block_init(
848
 
/*===========*/
849
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
850
 
        buf_block_t*    block,          /*!< in: pointer to control block */
851
 
        byte*           frame)          /*!< in: pointer to buffer frame */
852
 
{
853
 
        UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
854
 
 
855
 
        block->frame = frame;
856
 
 
857
 
        block->page.buf_pool_index = buf_pool_index(buf_pool);
858
 
        block->page.state = BUF_BLOCK_NOT_USED;
859
 
        block->page.buf_fix_count = 0;
860
 
        block->page.io_fix = BUF_IO_NONE;
861
 
 
862
 
        block->modify_clock = 0;
863
 
 
864
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
865
 
        block->page.file_page_was_freed = FALSE;
866
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
867
 
 
868
 
        block->check_index_page_at_flush = FALSE;
869
 
        block->index = NULL;
870
 
 
871
 
        block->is_hashed = FALSE;
872
 
 
873
 
#ifdef UNIV_DEBUG
874
 
        block->page.in_page_hash = FALSE;
875
 
        block->page.in_zip_hash = FALSE;
876
 
        block->page.in_flush_list = FALSE;
877
 
        block->page.in_free_list = FALSE;
878
 
        block->page.in_LRU_list = FALSE;
879
 
        block->in_unzip_LRU_list = FALSE;
880
 
#endif /* UNIV_DEBUG */
881
 
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
882
 
        block->n_pointers = 0;
883
 
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
884
 
        page_zip_des_init(&block->page.zip);
885
 
 
886
 
#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
887
 
        /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
888
 
        of buffer block mutex/rwlock with performance schema. If
889
 
        PFS_GROUP_BUFFER_SYNC is defined, skip the registration
890
 
        since buffer block mutex/rwlock will be registered later in
891
 
        pfs_register_buffer_block() */
892
 
 
893
 
        mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
894
 
        rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
895
 
#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
896
 
        mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
897
 
        rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
898
 
#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
899
 
 
900
 
        ut_ad(rw_lock_validate(&(block->lock)));
901
 
 
902
 
#ifdef UNIV_SYNC_DEBUG
903
 
        rw_lock_create(buf_block_debug_latch_key,
904
 
                       &block->debug_latch, SYNC_NO_ORDER_CHECK);
905
 
#endif /* UNIV_SYNC_DEBUG */
906
 
}
907
 
 
908
 
/********************************************************************//**
909
 
Allocates a chunk of buffer frames.
910
 
@return chunk, or NULL on failure */
911
 
static
912
 
buf_chunk_t*
913
 
buf_chunk_init(
914
 
/*===========*/
915
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
916
 
        buf_chunk_t*    chunk,          /*!< out: chunk of buffers */
917
 
        ulint           mem_size)       /*!< in: requested size in bytes */
918
 
{
919
 
        buf_block_t*    block;
920
 
        byte*           frame;
921
 
        ulint           i;
922
 
 
923
 
        /* Round down to a multiple of page size,
924
 
        although it already should be. */
925
 
        mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
926
 
        /* Reserve space for the block descriptors. */
927
 
        mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
928
 
                                  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
929
 
 
930
 
        chunk->mem_size = mem_size;
931
 
        chunk->mem = os_mem_alloc_large(&chunk->mem_size);
932
 
 
933
 
        if (UNIV_UNLIKELY(chunk->mem == NULL)) {
934
 
 
935
 
                return(NULL);
936
 
        }
937
 
 
938
 
        /* Allocate the block descriptors from
939
 
        the start of the memory block. */
940
 
        chunk->blocks = static_cast<buf_block_struct *>(chunk->mem);
941
 
 
942
 
        /* Align a pointer to the first frame.  Note that when
943
 
        os_large_page_size is smaller than UNIV_PAGE_SIZE,
944
 
        we may allocate one fewer block than requested.  When
945
 
        it is bigger, we may allocate more blocks than requested. */
946
 
 
947
 
        frame = static_cast<unsigned char *>(ut_align(chunk->mem, UNIV_PAGE_SIZE));
948
 
        chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
949
 
                - (frame != chunk->mem);
950
 
 
951
 
        /* Subtract the space needed for block descriptors. */
952
 
        {
953
 
                ulint   size = chunk->size;
954
 
 
955
 
                while (frame < (byte*) (chunk->blocks + size)) {
956
 
                        frame += UNIV_PAGE_SIZE;
957
 
                        size--;
958
 
                }
959
 
 
960
 
                chunk->size = size;
961
 
        }
962
 
 
963
 
        /* Init block structs and assign frames for them. Then we
964
 
        assign the frames to the first blocks (we already mapped the
965
 
        memory above). */
966
 
 
967
 
        block = chunk->blocks;
968
 
 
969
 
        for (i = chunk->size; i--; ) {
970
 
 
971
 
                buf_block_init(buf_pool, block, frame);
972
 
 
973
 
#ifdef HAVE_VALGRIND
974
 
                /* Wipe contents of frame to eliminate a Purify warning */
975
 
                memset(block->frame, '\0', UNIV_PAGE_SIZE);
976
 
#endif
977
 
                /* Add the block to the free list */
978
 
                UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
979
 
 
980
 
                ut_d(block->page.in_free_list = TRUE);
981
 
                ut_ad(buf_pool_from_block(block) == buf_pool);
982
 
 
983
 
                block++;
984
 
                frame += UNIV_PAGE_SIZE;
985
 
        }
986
 
 
987
 
#ifdef PFS_GROUP_BUFFER_SYNC
988
 
        pfs_register_buffer_block(chunk);
989
 
#endif
990
 
        return(chunk);
991
 
}
992
 
 
993
 
#ifdef UNIV_DEBUG
994
 
/*********************************************************************//**
995
 
Finds a block in the given buffer chunk that points to a
996
 
given compressed page.
997
 
@return buffer block pointing to the compressed page, or NULL */
998
 
static
999
 
buf_block_t*
1000
 
buf_chunk_contains_zip(
1001
 
/*===================*/
1002
 
        buf_chunk_t*    chunk,  /*!< in: chunk being checked */
1003
 
        const void*     data)   /*!< in: pointer to compressed page */
1004
 
{
1005
 
        buf_block_t*    block;
1006
 
        ulint           i;
1007
 
 
1008
 
        block = chunk->blocks;
1009
 
 
1010
 
        for (i = chunk->size; i--; block++) {
1011
 
                if (block->page.zip.data == data) {
1012
 
 
1013
 
                        return(block);
1014
 
                }
1015
 
        }
1016
 
 
1017
 
        return(NULL);
1018
 
}
1019
 
 
1020
 
/*********************************************************************//**
1021
 
Finds a block in the buffer pool that points to a
1022
 
given compressed page.
1023
 
@return buffer block pointing to the compressed page, or NULL */
1024
 
UNIV_INTERN
1025
 
buf_block_t*
1026
 
buf_pool_contains_zip(
1027
 
/*==================*/
1028
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1029
 
        const void*     data)           /*!< in: pointer to compressed page */
1030
 
{
1031
 
        ulint           n;
1032
 
        buf_chunk_t*    chunk = buf_pool->chunks;
1033
 
 
1034
 
        ut_ad(buf_pool);
1035
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1036
 
        for (n = buf_pool->n_chunks; n--; chunk++) {
1037
 
 
1038
 
                buf_block_t* block = buf_chunk_contains_zip(chunk, data);
1039
 
 
1040
 
                if (block) {
1041
 
                        return(block);
1042
 
                }
1043
 
        }
1044
 
 
1045
 
        return(NULL);
1046
 
}
1047
 
#endif /* UNIV_DEBUG */
1048
 
 
1049
 
/*********************************************************************//**
1050
 
Checks that all file pages in the buffer chunk are in a replaceable state.
1051
 
@return address of a non-free block, or NULL if all freed */
1052
 
static
1053
 
const buf_block_t*
1054
 
buf_chunk_not_freed(
1055
 
/*================*/
1056
 
        buf_chunk_t*    chunk)  /*!< in: chunk being checked */
1057
 
{
1058
 
        buf_block_t*    block;
1059
 
        ulint           i;
1060
 
 
1061
 
        block = chunk->blocks;
1062
 
 
1063
 
        for (i = chunk->size; i--; block++) {
1064
 
                ibool   ready;
1065
 
 
1066
 
                switch (buf_block_get_state(block)) {
1067
 
                case BUF_BLOCK_ZIP_FREE:
1068
 
                case BUF_BLOCK_ZIP_PAGE:
1069
 
                case BUF_BLOCK_ZIP_DIRTY:
1070
 
                        /* The uncompressed buffer pool should never
1071
 
                        contain compressed block descriptors. */
1072
 
                        ut_error;
1073
 
                        break;
1074
 
                case BUF_BLOCK_NOT_USED:
1075
 
                case BUF_BLOCK_READY_FOR_USE:
1076
 
                case BUF_BLOCK_MEMORY:
1077
 
                case BUF_BLOCK_REMOVE_HASH:
1078
 
                        /* Skip blocks that are not being used for
1079
 
                        file pages. */
1080
 
                        break;
1081
 
                case BUF_BLOCK_FILE_PAGE:
1082
 
                        mutex_enter(&block->mutex);
1083
 
                        ready = buf_flush_ready_for_replace(&block->page);
1084
 
                        mutex_exit(&block->mutex);
1085
 
 
1086
 
                        if (!ready) {
1087
 
 
1088
 
                                return(block);
1089
 
                        }
1090
 
 
1091
 
                        break;
1092
 
                }
1093
 
        }
1094
 
 
1095
 
        return(NULL);
1096
 
}
1097
 
 
1098
 
/*********************************************************************//**
1099
 
Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state.
1100
 
@return TRUE if all freed */
1101
 
static
1102
 
ibool
1103
 
buf_chunk_all_free(
1104
 
/*===============*/
1105
 
        const buf_chunk_t*      chunk)  /*!< in: chunk being checked */
1106
 
{
1107
 
        const buf_block_t*      block;
1108
 
        ulint                   i;
1109
 
 
1110
 
        block = chunk->blocks;
1111
 
 
1112
 
        for (i = chunk->size; i--; block++) {
1113
 
 
1114
 
                if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
1115
 
 
1116
 
                        return(FALSE);
1117
 
                }
1118
 
        }
1119
 
 
1120
 
        return(TRUE);
1121
 
}
1122
 
 
1123
 
/********************************************************************//**
1124
 
Frees a chunk of buffer frames. */
1125
 
static
1126
 
void
1127
 
buf_chunk_free(
1128
 
/*===========*/
1129
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1130
 
        buf_chunk_t*    chunk)          /*!< out: chunk of buffers */
1131
 
{
1132
 
        buf_block_t*            block;
1133
 
        const buf_block_t*      block_end;
1134
 
 
1135
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1136
 
 
1137
 
        block_end = chunk->blocks + chunk->size;
1138
 
 
1139
 
        for (block = chunk->blocks; block < block_end; block++) {
1140
 
                ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
1141
 
                ut_a(!block->page.zip.data);
1142
 
 
1143
 
                ut_ad(!block->page.in_LRU_list);
1144
 
                ut_ad(!block->in_unzip_LRU_list);
1145
 
                ut_ad(!block->page.in_flush_list);
1146
 
                /* Remove the block from the free list. */
1147
 
                ut_ad(block->page.in_free_list);
1148
 
                UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
1149
 
 
1150
 
                /* Free the latches. */
1151
 
                mutex_free(&block->mutex);
1152
 
                rw_lock_free(&block->lock);
1153
 
#ifdef UNIV_SYNC_DEBUG
1154
 
                rw_lock_free(&block->debug_latch);
1155
 
#endif /* UNIV_SYNC_DEBUG */
1156
 
                UNIV_MEM_UNDESC(block);
1157
 
        }
1158
 
 
1159
 
        os_mem_free_large(chunk->mem, chunk->mem_size);
1160
 
}
1161
 
 
1162
 
/********************************************************************//**
1163
 
Set buffer pool size variables after resizing it */
1164
 
static
1165
 
void
1166
 
buf_pool_set_sizes(void)
1167
 
/*====================*/
1168
 
{
1169
 
        ulint   i;
1170
 
        ulint   curr_size = 0;
1171
 
 
1172
 
        buf_pool_mutex_enter_all();
1173
 
 
1174
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1175
 
                buf_pool_t*     buf_pool;
1176
 
 
1177
 
                buf_pool = buf_pool_from_array(i);
1178
 
                curr_size += buf_pool->curr_pool_size;
1179
 
        }
1180
 
 
1181
 
        srv_buf_pool_curr_size = curr_size;
1182
 
        srv_buf_pool_old_size = srv_buf_pool_size;
1183
 
 
1184
 
        buf_pool_mutex_exit_all();
1185
 
}
1186
 
 
1187
 
/********************************************************************//**
1188
 
Initialize a buffer pool instance.
1189
 
@return DB_SUCCESS if all goes well. */
1190
 
static
1191
 
ulint
1192
 
buf_pool_init_instance(
1193
 
/*===================*/
1194
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1195
 
        ulint           buf_pool_size,  /*!< in: size in bytes */
1196
 
        ulint           instance_no)    /*!< in: id of the instance */
1197
 
{
1198
 
        ulint           i;
1199
 
        buf_chunk_t*    chunk;
1200
 
 
1201
 
        /* 1. Initialize general fields
1202
 
        ------------------------------- */
1203
 
        mutex_create(buf_pool_mutex_key,
1204
 
                     &buf_pool->mutex, SYNC_BUF_POOL);
1205
 
        mutex_create(buf_pool_zip_mutex_key,
1206
 
                     &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
1207
 
 
1208
 
        buf_pool_mutex_enter(buf_pool);
1209
 
 
1210
 
        if (buf_pool_size > 0) {
1211
 
                buf_pool->n_chunks = 1;
1212
 
                void *chunk_ptr= mem_zalloc((sizeof *chunk));
1213
 
                buf_pool->chunks = chunk = static_cast<buf_chunk_t *>(chunk_ptr);
1214
 
 
1215
 
                UT_LIST_INIT(buf_pool->free);
1216
 
 
1217
 
                if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
1218
 
                        mem_free(chunk);
1219
 
                        mem_free(buf_pool);
1220
 
 
1221
 
                        buf_pool_mutex_exit(buf_pool);
1222
 
 
1223
 
                        return(DB_ERROR);
1224
 
                }
1225
 
 
1226
 
                buf_pool->instance_no = instance_no;
1227
 
                buf_pool->old_pool_size = buf_pool_size;
1228
 
                buf_pool->curr_size = chunk->size;
1229
 
                buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1230
 
 
1231
 
                buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
1232
 
                buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
1233
 
                
1234
 
                buf_pool->last_printout_time = ut_time();
1235
 
        }
1236
 
        /* 2. Initialize flushing fields
1237
 
        -------------------------------- */
1238
 
 
1239
 
        mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
1240
 
                     SYNC_BUF_FLUSH_LIST);
1241
 
 
1242
 
        for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
1243
 
                buf_pool->no_flush[i] = os_event_create(NULL);
1244
 
        }
1245
 
 
1246
 
        /* 3. Initialize LRU fields
1247
 
        --------------------------- */
1248
 
 
1249
 
        /* All fields are initialized by mem_zalloc(). */
1250
 
 
1251
 
        buf_pool_mutex_exit(buf_pool);
1252
 
 
1253
 
        return(DB_SUCCESS);
1254
 
}
1255
 
 
1256
 
/********************************************************************//**
1257
 
free one buffer pool instance */
1258
 
static
1259
 
void
1260
 
buf_pool_free_instance(
1261
 
/*===================*/
1262
 
        buf_pool_t*     buf_pool)       /* in,own: buffer pool instance
1263
 
                                        to free */
1264
 
{
1265
 
        buf_chunk_t*    chunk;
1266
 
        buf_chunk_t*    chunks;
1267
 
 
1268
 
        chunks = buf_pool->chunks;
1269
 
        chunk = chunks + buf_pool->n_chunks;
1270
 
 
1271
 
        while (--chunk >= chunks) {
1272
 
                /* Bypass the checks of buf_chunk_free(), since they
1273
 
                would fail at shutdown. */
1274
 
                os_mem_free_large(chunk->mem, chunk->mem_size);
1275
 
        }
1276
 
 
1277
 
        mem_free(buf_pool->chunks);
1278
 
        hash_table_free(buf_pool->page_hash);
1279
 
        hash_table_free(buf_pool->zip_hash);
1280
 
}
1281
 
 
1282
 
/********************************************************************//**
1283
 
Creates the buffer pool.
1284
 
@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
1285
 
UNIV_INTERN
1286
 
ulint
1287
 
buf_pool_init(
1288
 
/*==========*/
1289
 
        ulint   total_size,     /*!< in: size of the total pool in bytes */
1290
 
        ulint   n_instances)    /*!< in: number of instances */
1291
 
{
1292
 
        ulint           i;
1293
 
        const ulint     size    = total_size / n_instances;
1294
 
 
1295
 
        ut_ad(n_instances > 0);
1296
 
        ut_ad(n_instances <= MAX_BUFFER_POOLS);
1297
 
        ut_ad(n_instances == srv_buf_pool_instances);
1298
 
 
1299
 
        /* We create an extra buffer pool instance, this instance is used
1300
 
        for flushing the flush lists, to keep track of n_flush for all
1301
 
        the buffer pools and also used as a waiting object during flushing. */
1302
 
        void *buf_pool_void_ptr= mem_zalloc(n_instances * sizeof *buf_pool_ptr);
1303
 
        buf_pool_ptr = static_cast<buf_pool_struct *>(buf_pool_void_ptr);
1304
 
 
1305
 
        for (i = 0; i < n_instances; i++) {
1306
 
                buf_pool_t*     ptr     = &buf_pool_ptr[i];
1307
 
 
1308
 
                if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) {
1309
 
 
1310
 
                        /* Free all the instances created so far. */
1311
 
                        buf_pool_free(i);
1312
 
 
1313
 
                        return(DB_ERROR);
1314
 
                }
1315
 
        }
1316
 
 
1317
 
        buf_pool_set_sizes();
1318
 
        buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
1319
 
 
1320
 
        btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
1321
 
 
1322
 
        return(DB_SUCCESS);
1323
 
}
1324
 
 
1325
 
/********************************************************************//**
1326
 
Frees the buffer pool at shutdown.  This must not be invoked before
1327
 
freeing all mutexes. */
1328
 
UNIV_INTERN
1329
 
void
1330
 
buf_pool_free(
1331
 
/*==========*/
1332
 
        ulint   n_instances)    /*!< in: numbere of instances to free */
1333
 
{
1334
 
        ulint   i;
1335
 
 
1336
 
        for (i = 0; i < n_instances; i++) {
1337
 
                buf_pool_free_instance(buf_pool_from_array(i));
1338
 
        }
1339
 
 
1340
 
        mem_free(buf_pool_ptr);
1341
 
        buf_pool_ptr = NULL;
1342
 
}
1343
 
 
1344
 
/********************************************************************//**
1345
 
Drops adaptive hash index for a buffer pool instance. */
1346
 
static
1347
 
void
1348
 
buf_pool_drop_hash_index_instance(
1349
 
/*==============================*/
1350
 
        buf_pool_t*     buf_pool,               /*!< in: buffer pool instance */
1351
 
        ibool*          released_search_latch)  /*!< out: flag for signalling
1352
 
                                                whether the search latch was
1353
 
                                                released */
1354
 
{
1355
 
        buf_chunk_t*    chunks  = buf_pool->chunks;
1356
 
        buf_chunk_t*    chunk   = chunks + buf_pool->n_chunks;
1357
 
 
1358
 
        while (--chunk >= chunks) {
1359
 
                ulint           i;
1360
 
                buf_block_t*    block   = chunk->blocks;
1361
 
 
1362
 
                for (i = chunk->size; i--; block++) {
1363
 
                        /* block->is_hashed cannot be modified
1364
 
                        when we have an x-latch on btr_search_latch;
1365
 
                        see the comment in buf0buf.h */
1366
 
                        
1367
 
                        if (!block->is_hashed) {
1368
 
                                continue;
1369
 
                        }
1370
 
                        
1371
 
                        /* To follow the latching order, we
1372
 
                        have to release btr_search_latch
1373
 
                        before acquiring block->latch. */
1374
 
                        rw_lock_x_unlock(&btr_search_latch);
1375
 
                        /* When we release the search latch,
1376
 
                        we must rescan all blocks, because
1377
 
                        some may become hashed again. */
1378
 
                        *released_search_latch = TRUE;
1379
 
                        
1380
 
                        rw_lock_x_lock(&block->lock);
1381
 
                        
1382
 
                        /* This should be guaranteed by the
1383
 
                        callers, which will be holding
1384
 
                        btr_search_enabled_mutex. */
1385
 
                        ut_ad(!btr_search_enabled);
1386
 
                        
1387
 
                        /* Because we did not buffer-fix the
1388
 
                        block by calling buf_block_get_gen(),
1389
 
                        it is possible that the block has been
1390
 
                        allocated for some other use after
1391
 
                        btr_search_latch was released above.
1392
 
                        We do not care which file page the
1393
 
                        block is mapped to.  All we want to do
1394
 
                        is to drop any hash entries referring
1395
 
                        to the page. */
1396
 
                        
1397
 
                        /* It is possible that
1398
 
                        block->page.state != BUF_FILE_PAGE.
1399
 
                        Even that does not matter, because
1400
 
                        btr_search_drop_page_hash_index() will
1401
 
                        check block->is_hashed before doing
1402
 
                        anything.  block->is_hashed can only
1403
 
                        be set on uncompressed file pages. */
1404
 
                        
1405
 
                        btr_search_drop_page_hash_index(block);
1406
 
                        
1407
 
                        rw_lock_x_unlock(&block->lock);
1408
 
                        
1409
 
                        rw_lock_x_lock(&btr_search_latch);
1410
 
                        
1411
 
                        ut_ad(!btr_search_enabled);
1412
 
                }
1413
 
        }
1414
 
}
1415
 
 
1416
 
/********************************************************************//**
1417
 
Drops the adaptive hash index.  To prevent a livelock, this function
1418
 
is only to be called while holding btr_search_latch and while
1419
 
btr_search_enabled == FALSE. */
1420
 
UNIV_INTERN
1421
 
void
1422
 
buf_pool_drop_hash_index(void)
1423
 
/*==========================*/
1424
 
{
1425
 
        ibool           released_search_latch;
1426
 
 
1427
 
#ifdef UNIV_SYNC_DEBUG
1428
 
        ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
1429
 
#endif /* UNIV_SYNC_DEBUG */
1430
 
        ut_ad(!btr_search_enabled);
1431
 
 
1432
 
        do {
1433
 
                ulint   i;
1434
 
 
1435
 
                released_search_latch = FALSE;
1436
 
 
1437
 
                for (i = 0; i < srv_buf_pool_instances; i++) {
1438
 
                        buf_pool_t*     buf_pool;
1439
 
 
1440
 
                        buf_pool = buf_pool_from_array(i);
1441
 
 
1442
 
                        buf_pool_drop_hash_index_instance(
1443
 
                                buf_pool, &released_search_latch);
1444
 
                }
1445
 
 
1446
 
        } while (released_search_latch);
1447
 
}
1448
 
 
1449
 
/********************************************************************//**
1450
 
Relocate a buffer control block.  Relocates the block on the LRU list
1451
 
and in buf_pool->page_hash.  Does not relocate bpage->list.
1452
 
The caller must take care of relocating bpage->list. */
1453
 
UNIV_INTERN
1454
 
void
1455
 
buf_relocate(
1456
 
/*=========*/
1457
 
        buf_page_t*     bpage,  /*!< in/out: control block being relocated;
1458
 
                                buf_page_get_state(bpage) must be
1459
 
                                BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
1460
 
        buf_page_t*     dpage)  /*!< in/out: destination control block */
1461
 
{
1462
 
        buf_page_t*     b;
1463
 
        ulint           fold;
1464
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1465
 
 
1466
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1467
 
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1468
 
        ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
1469
 
        ut_a(bpage->buf_fix_count == 0);
1470
 
        ut_ad(bpage->in_LRU_list);
1471
 
        ut_ad(!bpage->in_zip_hash);
1472
 
        ut_ad(bpage->in_page_hash);
1473
 
        ut_ad(bpage == buf_page_hash_get(buf_pool,
1474
 
                                         bpage->space, bpage->offset));
1475
 
        ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1476
 
#ifdef UNIV_DEBUG
1477
 
        switch (buf_page_get_state(bpage)) {
1478
 
        case BUF_BLOCK_ZIP_FREE:
1479
 
        case BUF_BLOCK_NOT_USED:
1480
 
        case BUF_BLOCK_READY_FOR_USE:
1481
 
        case BUF_BLOCK_FILE_PAGE:
1482
 
        case BUF_BLOCK_MEMORY:
1483
 
        case BUF_BLOCK_REMOVE_HASH:
1484
 
                ut_error;
1485
 
        case BUF_BLOCK_ZIP_DIRTY:
1486
 
        case BUF_BLOCK_ZIP_PAGE:
1487
 
                break;
1488
 
        }
1489
 
#endif /* UNIV_DEBUG */
1490
 
 
1491
 
        memcpy(dpage, bpage, sizeof *dpage);
1492
 
 
1493
 
        ut_d(bpage->in_LRU_list = FALSE);
1494
 
        ut_d(bpage->in_page_hash = FALSE);
1495
 
 
1496
 
        /* relocate buf_pool->LRU */
1497
 
        b = UT_LIST_GET_PREV(LRU, bpage);
1498
 
        UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
1499
 
 
1500
 
        if (b) {
1501
 
                UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
1502
 
        } else {
1503
 
                UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
1504
 
        }
1505
 
 
1506
 
        if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
1507
 
                buf_pool->LRU_old = dpage;
1508
 
#ifdef UNIV_LRU_DEBUG
1509
 
                /* buf_pool->LRU_old must be the first item in the LRU list
1510
 
                whose "old" flag is set. */
1511
 
                ut_a(buf_pool->LRU_old->old);
1512
 
                ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
1513
 
                     || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
1514
 
                ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
1515
 
                     || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
1516
 
        } else {
1517
 
                /* Check that the "old" flag is consistent in
1518
 
                the block and its neighbours. */
1519
 
                buf_page_set_old(dpage, buf_page_is_old(dpage));
1520
 
#endif /* UNIV_LRU_DEBUG */
1521
 
        }
1522
 
 
1523
 
        ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
1524
 
                              ut_ad(ut_list_node_313->in_LRU_list)));
1525
 
 
1526
 
        /* relocate buf_pool->page_hash */
1527
 
        fold = buf_page_address_fold(bpage->space, bpage->offset);
1528
 
 
1529
 
        HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
1530
 
        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
1531
 
}
1532
 
 
1533
 
/********************************************************************//**
1534
 
Shrinks a buffer pool instance. */
1535
 
static
1536
 
void
1537
 
buf_pool_shrink_instance(
1538
 
/*=====================*/
1539
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1540
 
        ulint           chunk_size)     /*!< in: number of pages to remove */
1541
 
{
1542
 
        buf_chunk_t*    chunks;
1543
 
        buf_chunk_t*    chunk;
1544
 
        ulint           max_size;
1545
 
        ulint           max_free_size;
1546
 
        buf_chunk_t*    max_chunk;
1547
 
        buf_chunk_t*    max_free_chunk;
1548
 
 
1549
 
        ut_ad(!buf_pool_mutex_own(buf_pool));
1550
 
 
1551
 
try_again:
1552
 
        btr_search_disable(); /* Empty the adaptive hash index again */
1553
 
        buf_pool_mutex_enter(buf_pool);
1554
 
 
1555
 
shrink_again:
1556
 
        if (buf_pool->n_chunks <= 1) {
1557
 
 
1558
 
                /* Cannot shrink if there is only one chunk */
1559
 
                goto func_done;
1560
 
        }
1561
 
 
1562
 
        /* Search for the largest free chunk
1563
 
        not larger than the size difference */
1564
 
        chunks = buf_pool->chunks;
1565
 
        chunk = chunks + buf_pool->n_chunks;
1566
 
        max_size = max_free_size = 0;
1567
 
        max_chunk = max_free_chunk = NULL;
1568
 
 
1569
 
        while (--chunk >= chunks) {
1570
 
                if (chunk->size <= chunk_size
1571
 
                    && chunk->size > max_free_size) {
1572
 
                        if (chunk->size > max_size) {
1573
 
                                max_size = chunk->size;
1574
 
                                max_chunk = chunk;
1575
 
                        }
1576
 
 
1577
 
                        if (buf_chunk_all_free(chunk)) {
1578
 
                                max_free_size = chunk->size;
1579
 
                                max_free_chunk = chunk;
1580
 
                        }
1581
 
                }
1582
 
        }
1583
 
 
1584
 
        if (!max_free_size) {
1585
 
 
1586
 
                ulint           dirty   = 0;
1587
 
                ulint           nonfree = 0;
1588
 
                buf_block_t*    block;
1589
 
                buf_block_t*    bend;
1590
 
 
1591
 
                /* Cannot shrink: try again later
1592
 
                (do not assign srv_buf_pool_old_size) */
1593
 
                if (!max_chunk) {
1594
 
 
1595
 
                        goto func_exit;
1596
 
                }
1597
 
 
1598
 
                block = max_chunk->blocks;
1599
 
                bend = block + max_chunk->size;
1600
 
 
1601
 
                /* Move the blocks of chunk to the end of the
1602
 
                LRU list and try to flush them. */
1603
 
                for (; block < bend; block++) {
1604
 
                        switch (buf_block_get_state(block)) {
1605
 
                        case BUF_BLOCK_NOT_USED:
1606
 
                                continue;
1607
 
                        case BUF_BLOCK_FILE_PAGE:
1608
 
                                break;
1609
 
                        default:
1610
 
                                nonfree++;
1611
 
                                continue;
1612
 
                        }
1613
 
 
1614
 
                        mutex_enter(&block->mutex);
1615
 
                        /* The following calls will temporarily
1616
 
                        release block->mutex and buf_pool->mutex.
1617
 
                        Therefore, we have to always retry,
1618
 
                        even if !dirty && !nonfree. */
1619
 
 
1620
 
                        if (!buf_flush_ready_for_replace(&block->page)) {
1621
 
 
1622
 
                                buf_LRU_make_block_old(&block->page);
1623
 
                                dirty++;
1624
 
                        } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
1625
 
                                   != BUF_LRU_FREED) {
1626
 
                                nonfree++;
1627
 
                        }
1628
 
 
1629
 
                        mutex_exit(&block->mutex);
1630
 
                }
1631
 
 
1632
 
                buf_pool_mutex_exit(buf_pool);
1633
 
 
1634
 
                /* Request for a flush of the chunk if it helps.
1635
 
                Do not flush if there are non-free blocks, since
1636
 
                flushing will not make the chunk freeable. */
1637
 
                if (nonfree) {
1638
 
                        /* Avoid busy-waiting. */
1639
 
                        os_thread_sleep(100000);
1640
 
                } else if (dirty
1641
 
                           && buf_flush_LRU(buf_pool, dirty)
1642
 
                              == ULINT_UNDEFINED) {
1643
 
 
1644
 
                        buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
1645
 
                }
1646
 
 
1647
 
                goto try_again;
1648
 
        }
1649
 
 
1650
 
        max_size = max_free_size;
1651
 
        max_chunk = max_free_chunk;
1652
 
 
1653
 
        buf_pool->old_pool_size = buf_pool->curr_pool_size;
1654
 
 
1655
 
        /* Rewrite buf_pool->chunks.  Copy everything but max_chunk. */
1656
 
        chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks));
1657
 
        memcpy(chunks, buf_pool->chunks,
1658
 
               (max_chunk - buf_pool->chunks) * sizeof *chunks);
1659
 
        memcpy(chunks + (max_chunk - buf_pool->chunks),
1660
 
               max_chunk + 1,
1661
 
               buf_pool->chunks + buf_pool->n_chunks
1662
 
               - (max_chunk + 1));
1663
 
        ut_a(buf_pool->curr_size > max_chunk->size);
1664
 
        buf_pool->curr_size -= max_chunk->size;
1665
 
        buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1666
 
        chunk_size -= max_chunk->size;
1667
 
        buf_chunk_free(buf_pool, max_chunk);
1668
 
        mem_free(buf_pool->chunks);
1669
 
        buf_pool->chunks = chunks;
1670
 
        buf_pool->n_chunks--;
1671
 
 
1672
 
        /* Allow a slack of one megabyte. */
1673
 
        if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
1674
 
 
1675
 
                goto shrink_again;
1676
 
        }
1677
 
        goto func_exit;
1678
 
 
1679
 
func_done:
1680
 
        buf_pool->old_pool_size = buf_pool->curr_pool_size;
1681
 
func_exit:
1682
 
        buf_pool_mutex_exit(buf_pool);
1683
 
        btr_search_enable();
1684
 
}
1685
 
 
1686
 
/********************************************************************//**
1687
 
Shrinks the buffer pool. */
1688
 
static
1689
 
void
1690
 
buf_pool_shrink(
1691
 
/*============*/
1692
 
        ulint   chunk_size)     /*!< in: number of pages to remove */
1693
 
{
1694
 
        ulint   i;
1695
 
 
1696
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1697
 
                buf_pool_t*     buf_pool;
1698
 
                ulint           instance_chunk_size;
1699
 
 
1700
 
                instance_chunk_size = chunk_size / srv_buf_pool_instances;
1701
 
                buf_pool = buf_pool_from_array(i);
1702
 
                buf_pool_shrink_instance(buf_pool, instance_chunk_size);
1703
 
        }
1704
 
 
1705
 
        buf_pool_set_sizes();
1706
 
}
1707
 
 
1708
 
/********************************************************************//**
1709
 
Rebuild buf_pool->page_hash for a buffer pool instance. */
1710
 
static
1711
 
void
1712
 
buf_pool_page_hash_rebuild_instance(
1713
 
/*================================*/
1714
 
        buf_pool_t*     buf_pool)               /*!< in: buffer pool instance */
1715
 
{
1716
 
        ulint           i;
1717
 
        buf_page_t*     b;
1718
 
        buf_chunk_t*    chunk;
1719
 
        ulint           n_chunks;
1720
 
        hash_table_t*   zip_hash;
1721
 
        hash_table_t*   page_hash;
1722
 
 
1723
 
        buf_pool_mutex_enter(buf_pool);
1724
 
 
1725
 
        /* Free, create, and populate the hash table. */
1726
 
        hash_table_free(buf_pool->page_hash);
1727
 
        buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
1728
 
        zip_hash = hash_create(2 * buf_pool->curr_size);
1729
 
 
1730
 
        HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
1731
 
                     BUF_POOL_ZIP_FOLD_BPAGE);
1732
 
 
1733
 
        hash_table_free(buf_pool->zip_hash);
1734
 
        buf_pool->zip_hash = zip_hash;
1735
 
 
1736
 
        /* Insert the uncompressed file pages to buf_pool->page_hash. */
1737
 
 
1738
 
        chunk = buf_pool->chunks;
1739
 
        n_chunks = buf_pool->n_chunks;
1740
 
 
1741
 
        for (i = 0; i < n_chunks; i++, chunk++) {
1742
 
                ulint           j;
1743
 
                buf_block_t*    block = chunk->blocks;
1744
 
 
1745
 
                for (j = 0; j < chunk->size; j++, block++) {
1746
 
                        if (buf_block_get_state(block)
1747
 
                            == BUF_BLOCK_FILE_PAGE) {
1748
 
                                ut_ad(!block->page.in_zip_hash);
1749
 
                                ut_ad(block->page.in_page_hash);
1750
 
 
1751
 
                                HASH_INSERT(buf_page_t, hash, page_hash,
1752
 
                                            buf_page_address_fold(
1753
 
                                                    block->page.space,
1754
 
                                                    block->page.offset),
1755
 
                                            &block->page);
1756
 
                        }
1757
 
                }
1758
 
        }
1759
 
 
1760
 
        /* Insert the compressed-only pages to buf_pool->page_hash.
1761
 
        All such blocks are either in buf_pool->zip_clean or
1762
 
        in buf_pool->flush_list. */
1763
 
 
1764
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1765
 
             b = UT_LIST_GET_NEXT(list, b)) {
1766
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1767
 
                ut_ad(!b->in_flush_list);
1768
 
                ut_ad(b->in_LRU_list);
1769
 
                ut_ad(b->in_page_hash);
1770
 
                ut_ad(!b->in_zip_hash);
1771
 
 
1772
 
                HASH_INSERT(buf_page_t, hash, page_hash,
1773
 
                            buf_page_address_fold(b->space, b->offset), b);
1774
 
        }
1775
 
 
1776
 
        buf_flush_list_mutex_enter(buf_pool);
1777
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1778
 
             b = UT_LIST_GET_NEXT(list, b)) {
1779
 
                ut_ad(b->in_flush_list);
1780
 
                ut_ad(b->in_LRU_list);
1781
 
                ut_ad(b->in_page_hash);
1782
 
                ut_ad(!b->in_zip_hash);
1783
 
 
1784
 
                switch (buf_page_get_state(b)) {
1785
 
                case BUF_BLOCK_ZIP_DIRTY:
1786
 
                        HASH_INSERT(buf_page_t, hash, page_hash,
1787
 
                                    buf_page_address_fold(b->space,
1788
 
                                                          b->offset), b);
1789
 
                        break;
1790
 
                case BUF_BLOCK_FILE_PAGE:
1791
 
                        /* uncompressed page */
1792
 
                        break;
1793
 
                case BUF_BLOCK_ZIP_FREE:
1794
 
                case BUF_BLOCK_ZIP_PAGE:
1795
 
                case BUF_BLOCK_NOT_USED:
1796
 
                case BUF_BLOCK_READY_FOR_USE:
1797
 
                case BUF_BLOCK_MEMORY:
1798
 
                case BUF_BLOCK_REMOVE_HASH:
1799
 
                        ut_error;
1800
 
                        break;
1801
 
                }
1802
 
        }
1803
 
 
1804
 
        buf_flush_list_mutex_exit(buf_pool);
1805
 
        buf_pool_mutex_exit(buf_pool);
1806
 
}
1807
 
 
1808
 
/********************************************************************
1809
 
Determine if a block is a sentinel for a buffer pool watch.
1810
 
@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
1811
 
UNIV_INTERN
1812
 
ibool
1813
 
buf_pool_watch_is_sentinel(
1814
 
/*=======================*/
1815
 
        buf_pool_t*             buf_pool,       /*!< buffer pool instance */
1816
 
        const buf_page_t*       bpage)          /*!< in: block */
1817
 
{
1818
 
        ut_ad(buf_page_in_file(bpage));
1819
 
 
1820
 
        if (bpage < &buf_pool->watch[0]
1821
 
            || bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
1822
 
 
1823
 
                ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_PAGE
1824
 
                      || bpage->zip.data != NULL);
1825
 
 
1826
 
                return(FALSE);
1827
 
        }
1828
 
 
1829
 
        ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
1830
 
        ut_ad(!bpage->in_zip_hash);
1831
 
        ut_ad(bpage->in_page_hash);
1832
 
        ut_ad(bpage->zip.data == NULL);
1833
 
        ut_ad(bpage->buf_fix_count > 0);
1834
 
        return(TRUE);
1835
 
}
1836
 
 
1837
 
/****************************************************************//**
1838
 
Add watch for the given page to be read in. Caller must have the buffer pool
1839
 
mutex reserved.
1840
 
@return NULL if watch set, block if the page is in the buffer pool */
1841
 
UNIV_INTERN
1842
 
buf_page_t*
1843
 
buf_pool_watch_set(
1844
 
/*===============*/
1845
 
        ulint   space,  /*!< in: space id */
1846
 
        ulint   offset, /*!< in: page number */
1847
 
        ulint   fold)   /*!< in: buf_page_address_fold(space, offset) */
1848
 
{
1849
 
        buf_page_t*     bpage;
1850
 
        ulint           i;
1851
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1852
 
 
1853
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1854
 
 
1855
 
        bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
1856
 
 
1857
 
        if (UNIV_LIKELY_NULL(bpage)) {
1858
 
                if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
1859
 
                        /* The page was loaded meanwhile. */
1860
 
                        return(bpage);
1861
 
                }
1862
 
                /* Add to an existing watch. */
1863
 
                bpage->buf_fix_count++;
1864
 
                return(NULL);
1865
 
        }
1866
 
 
1867
 
        for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
1868
 
                bpage = &buf_pool->watch[i];
1869
 
 
1870
 
                ut_ad(bpage->access_time == 0);
1871
 
                ut_ad(bpage->newest_modification == 0);
1872
 
                ut_ad(bpage->oldest_modification == 0);
1873
 
                ut_ad(bpage->zip.data == NULL);
1874
 
                ut_ad(!bpage->in_zip_hash);
1875
 
 
1876
 
                switch (bpage->state) {
1877
 
                case BUF_BLOCK_POOL_WATCH:
1878
 
                        ut_ad(!bpage->in_page_hash);
1879
 
                        ut_ad(bpage->buf_fix_count == 0);
1880
 
 
1881
 
                        /* bpage is pointing to buf_pool->watch[],
1882
 
                        which is protected by buf_pool->mutex.
1883
 
                        Normally, buf_page_t objects are protected by
1884
 
                        buf_block_t::mutex or buf_pool->zip_mutex or both. */
1885
 
 
1886
 
                        bpage->state = BUF_BLOCK_ZIP_PAGE;
1887
 
                        bpage->space = space;
1888
 
                        bpage->offset = offset;
1889
 
                        bpage->buf_fix_count = 1;
1890
 
 
1891
 
                        ut_d(bpage->in_page_hash = TRUE);
1892
 
                        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
1893
 
                                    fold, bpage);
1894
 
                        return(NULL);
1895
 
                case BUF_BLOCK_ZIP_PAGE:
1896
 
                        ut_ad(bpage->in_page_hash);
1897
 
                        ut_ad(bpage->buf_fix_count > 0);
1898
 
                        break;
1899
 
                default:
1900
 
                        ut_error;
1901
 
                }
1902
 
        }
1903
 
 
1904
 
        /* Allocation failed.  Either the maximum number of purge
1905
 
        threads should never exceed BUF_POOL_WATCH_SIZE, or this code
1906
 
        should be modified to return a special non-NULL value and the
1907
 
        caller should purge the record directly. */
1908
 
        ut_error;
1909
 
 
1910
 
        /* Fix compiler warning */
1911
 
        return(NULL);
1912
 
}
1913
 
 
1914
 
/********************************************************************//**
1915
 
Rebuild buf_pool->page_hash. */
1916
 
static
1917
 
void
1918
 
buf_pool_page_hash_rebuild(void)
1919
 
/*============================*/
1920
 
{
1921
 
        ulint   i;
1922
 
 
1923
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1924
 
                buf_pool_page_hash_rebuild_instance(buf_pool_from_array(i));
1925
 
        }
1926
 
}
1927
 
 
1928
 
/********************************************************************//**
1929
 
Increase the buffer pool size of one buffer pool instance. */
1930
 
static
1931
 
void
1932
 
buf_pool_increase_instance(
1933
 
/*=======================*/
1934
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instane */
1935
 
        ulint           change_size)    /*!< in: new size of the pool */
1936
 
{
1937
 
        buf_chunk_t*    chunks;
1938
 
        buf_chunk_t*    chunk;
1939
 
 
1940
 
        buf_pool_mutex_enter(buf_pool);
1941
 
        chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks));
1942
 
 
1943
 
        memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks * sizeof *chunks);
1944
 
 
1945
 
        chunk = &chunks[buf_pool->n_chunks];
1946
 
 
1947
 
        if (!buf_chunk_init(buf_pool, chunk, change_size)) {
1948
 
                mem_free(chunks);
1949
 
        } else {
1950
 
                buf_pool->old_pool_size = buf_pool->curr_pool_size;
1951
 
                buf_pool->curr_size += chunk->size;
1952
 
                buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1953
 
                mem_free(buf_pool->chunks);
1954
 
                buf_pool->chunks = chunks;
1955
 
                buf_pool->n_chunks++;
1956
 
        }
1957
 
 
1958
 
        buf_pool_mutex_exit(buf_pool);
1959
 
}
1960
 
 
1961
 
/********************************************************************//**
1962
 
Increase the buffer pool size. */
1963
 
static
1964
 
void
1965
 
buf_pool_increase(
1966
 
/*==============*/
1967
 
        ulint   change_size)
1968
 
{
1969
 
        ulint   i;
1970
 
 
1971
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1972
 
                buf_pool_increase_instance(
1973
 
                        buf_pool_from_array(i),
1974
 
                        change_size / srv_buf_pool_instances);
1975
 
        }
1976
 
 
1977
 
        buf_pool_set_sizes();
1978
 
}
1979
 
 
1980
 
/********************************************************************//**
1981
 
Resizes the buffer pool. */
1982
 
UNIV_INTERN
1983
 
void
1984
 
buf_pool_resize(void)
1985
 
/*=================*/
1986
 
{
1987
 
        ulint   change_size;
1988
 
        ulint   min_change_size = 1048576 * srv_buf_pool_instances;
1989
 
 
1990
 
        buf_pool_mutex_enter_all();
1991
 
  
1992
 
        if (srv_buf_pool_old_size == srv_buf_pool_size) {
1993
 
  
1994
 
                buf_pool_mutex_exit_all();
1995
 
 
1996
 
                return;
1997
 
 
1998
 
        } else if (srv_buf_pool_curr_size + min_change_size
1999
 
                   > srv_buf_pool_size) {
2000
 
  
2001
 
                change_size = (srv_buf_pool_curr_size - srv_buf_pool_size)
2002
 
                            / UNIV_PAGE_SIZE;
2003
 
 
2004
 
                buf_pool_mutex_exit_all();
2005
 
  
2006
 
                /* Disable adaptive hash indexes and empty the index
2007
 
                in order to free up memory in the buffer pool chunks. */
2008
 
                buf_pool_shrink(change_size);
2009
 
 
2010
 
        } else if (srv_buf_pool_curr_size + min_change_size
2011
 
                   < srv_buf_pool_size) {
2012
 
 
2013
 
                /* Enlarge the buffer pool by at least one megabyte */
2014
 
  
2015
 
                change_size = srv_buf_pool_size - srv_buf_pool_curr_size;
2016
 
 
2017
 
                buf_pool_mutex_exit_all();
2018
 
 
2019
 
                buf_pool_increase(change_size);
2020
 
        } else {
2021
 
                srv_buf_pool_size = srv_buf_pool_old_size;
2022
 
 
2023
 
                buf_pool_mutex_exit_all();
2024
 
 
2025
 
                return;
2026
 
        }
2027
 
  
2028
 
        buf_pool_page_hash_rebuild();
2029
 
}
2030
 
 
2031
 
/****************************************************************//**
2032
 
Remove the sentinel block for the watch before replacing it with a real block.
2033
 
buf_page_watch_clear() or buf_page_watch_occurred() will notice that
2034
 
the block has been replaced with the real block.
2035
 
@return reference count, to be added to the replacement block */
2036
 
static
2037
 
void
2038
 
buf_pool_watch_remove(
2039
 
/*==================*/
2040
 
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
2041
 
        ulint           fold,           /*!< in: buf_page_address_fold(
2042
 
                                        space, offset) */
2043
 
        buf_page_t*     watch)          /*!< in/out: sentinel for watch */
2044
 
{
2045
 
        ut_ad(buf_pool_mutex_own(buf_pool));
2046
 
 
2047
 
        HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
2048
 
        ut_d(watch->in_page_hash = FALSE);
2049
 
        watch->buf_fix_count = 0;
2050
 
        watch->state = BUF_BLOCK_POOL_WATCH;
2051
 
}
2052
 
 
2053
 
/****************************************************************//**
2054
 
Stop watching if the page has been read in.
2055
 
buf_pool_watch_set(space,offset) must have returned NULL before. */
2056
 
UNIV_INTERN
2057
 
void
2058
 
buf_pool_watch_unset(
2059
 
/*=================*/
2060
 
        ulint   space,  /*!< in: space id */
2061
 
        ulint   offset) /*!< in: page number */
2062
 
{
2063
 
        buf_page_t*     bpage;
2064
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2065
 
        ulint           fold = buf_page_address_fold(space, offset);
2066
 
 
2067
 
        buf_pool_mutex_enter(buf_pool);
2068
 
        bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2069
 
        /* The page must exist because buf_pool_watch_set()
2070
 
        increments buf_fix_count. */
2071
 
        ut_a(bpage);
2072
 
 
2073
 
        if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
2074
 
                mutex_t* mutex = buf_page_get_mutex(bpage);
2075
 
 
2076
 
                mutex_enter(mutex);
2077
 
                ut_a(bpage->buf_fix_count > 0);
2078
 
                bpage->buf_fix_count--;
2079
 
                mutex_exit(mutex);
2080
 
        } else {
2081
 
                ut_a(bpage->buf_fix_count > 0);
2082
 
 
2083
 
                if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
2084
 
                        buf_pool_watch_remove(buf_pool, fold, bpage);
2085
 
                }
2086
 
        }
2087
 
 
2088
 
        buf_pool_mutex_exit(buf_pool);
2089
 
}
2090
 
 
2091
 
/****************************************************************//**
2092
 
Check if the page has been read in.
2093
 
This may only be called after buf_pool_watch_set(space,offset)
2094
 
has returned NULL and before invoking buf_pool_watch_unset(space,offset).
2095
 
@return FALSE if the given page was not read in, TRUE if it was */
2096
 
UNIV_INTERN
2097
 
ibool
2098
 
buf_pool_watch_occurred(
2099
 
/*====================*/
2100
 
        ulint   space,  /*!< in: space id */
2101
 
        ulint   offset) /*!< in: page number */
2102
 
{
2103
 
        ibool           ret;
2104
 
        buf_page_t*     bpage;
2105
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2106
 
        ulint           fold    = buf_page_address_fold(space, offset);
2107
 
 
2108
 
        buf_pool_mutex_enter(buf_pool);
2109
 
 
2110
 
        bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2111
 
        /* The page must exist because buf_pool_watch_set()
2112
 
        increments buf_fix_count. */
2113
 
        ut_a(bpage);
2114
 
        ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
2115
 
        buf_pool_mutex_exit(buf_pool);
2116
 
 
2117
 
        return(ret);
2118
 
}
2119
 
 
2120
 
/********************************************************************//**
2121
 
Moves a page to the start of the buffer pool LRU list. This high-level
2122
 
function can be used to prevent an important page from slipping out of
2123
 
the buffer pool. */
2124
 
UNIV_INTERN
2125
 
void
2126
 
buf_page_make_young(
2127
 
/*================*/
2128
 
        buf_page_t*     bpage)  /*!< in: buffer block of a file page */
2129
 
{
2130
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2131
 
 
2132
 
        buf_pool_mutex_enter(buf_pool);
2133
 
 
2134
 
        ut_a(buf_page_in_file(bpage));
2135
 
 
2136
 
        buf_LRU_make_block_young(bpage);
2137
 
 
2138
 
        buf_pool_mutex_exit(buf_pool);
2139
 
}
2140
 
 
2141
 
/********************************************************************//**
2142
 
Sets the time of the first access of a page and moves a page to the
2143
 
start of the buffer pool LRU list if it is too old.  This high-level
2144
 
function can be used to prevent an important page from slipping
2145
 
out of the buffer pool. */
2146
 
static
2147
 
void
2148
 
buf_page_set_accessed_make_young(
2149
 
/*=============================*/
2150
 
        buf_page_t*     bpage,          /*!< in/out: buffer block of a
2151
 
                                        file page */
2152
 
        unsigned        access_time)    /*!< in: bpage->access_time
2153
 
                                        read under mutex protection,
2154
 
                                        or 0 if unknown */
2155
 
{
2156
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2157
 
 
2158
 
        ut_ad(!buf_pool_mutex_own(buf_pool));
2159
 
        ut_a(buf_page_in_file(bpage));
2160
 
 
2161
 
        if (buf_page_peek_if_too_old(bpage)) {
2162
 
                buf_pool_mutex_enter(buf_pool);
2163
 
                buf_LRU_make_block_young(bpage);
2164
 
                buf_pool_mutex_exit(buf_pool);
2165
 
        } else if (!access_time) {
2166
 
                ulint   time_ms = ut_time_ms();
2167
 
                buf_pool_mutex_enter(buf_pool);
2168
 
                buf_page_set_accessed(bpage, time_ms);
2169
 
                buf_pool_mutex_exit(buf_pool);
2170
 
        }
2171
 
}
2172
 
 
2173
 
/********************************************************************//**
2174
 
Resets the check_index_page_at_flush field of a page if found in the buffer
2175
 
pool. */
2176
 
UNIV_INTERN
2177
 
void
2178
 
buf_reset_check_index_page_at_flush(
2179
 
/*================================*/
2180
 
        ulint   space,  /*!< in: space id */
2181
 
        ulint   offset) /*!< in: page number */
2182
 
{
2183
 
        buf_block_t*    block;
2184
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2185
 
 
2186
 
        buf_pool_mutex_enter(buf_pool);
2187
 
 
2188
 
        block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2189
 
 
2190
 
        if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
2191
 
                ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2192
 
                block->check_index_page_at_flush = FALSE;
2193
 
        }
2194
 
 
2195
 
        buf_pool_mutex_exit(buf_pool);
2196
 
}
2197
 
 
2198
 
/********************************************************************//**
2199
 
Returns the current state of is_hashed of a page. FALSE if the page is
2200
 
not in the pool. NOTE that this operation does not fix the page in the
2201
 
pool if it is found there.
2202
 
@return TRUE if page hash index is built in search system */
2203
 
UNIV_INTERN
2204
 
ibool
2205
 
buf_page_peek_if_search_hashed(
2206
 
/*===========================*/
2207
 
        ulint   space,  /*!< in: space id */
2208
 
        ulint   offset) /*!< in: page number */
2209
 
{
2210
 
        buf_block_t*    block;
2211
 
        ibool           is_hashed;
2212
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2213
 
 
2214
 
        buf_pool_mutex_enter(buf_pool);
2215
 
 
2216
 
        block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2217
 
 
2218
 
        if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2219
 
                is_hashed = FALSE;
2220
 
        } else {
2221
 
                ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2222
 
                is_hashed = block->is_hashed;
2223
 
        }
2224
 
 
2225
 
        buf_pool_mutex_exit(buf_pool);
2226
 
 
2227
 
        return(is_hashed);
2228
 
}
2229
 
 
2230
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2231
 
/********************************************************************//**
2232
 
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
2233
 
This function should be called when we free a file page and want the
2234
 
debug version to check that it is not accessed any more unless
2235
 
reallocated.
2236
 
@return control block if found in page hash table, otherwise NULL */
2237
 
UNIV_INTERN
2238
 
buf_page_t*
2239
 
buf_page_set_file_page_was_freed(
2240
 
/*=============================*/
2241
 
        ulint   space,  /*!< in: space id */
2242
 
        ulint   offset) /*!< in: page number */
2243
 
{
2244
 
        buf_page_t*     bpage;
2245
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2246
 
 
2247
 
        buf_pool_mutex_enter(buf_pool);
2248
 
 
2249
 
        bpage = buf_page_hash_get(buf_pool, space, offset);
2250
 
 
2251
 
        if (bpage) {
2252
 
                ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2253
 
                bpage->file_page_was_freed = TRUE;
2254
 
        }
2255
 
 
2256
 
        buf_pool_mutex_exit(buf_pool);
2257
 
 
2258
 
        return(bpage);
2259
 
}
2260
 
 
2261
 
/********************************************************************//**
2262
 
Sets file_page_was_freed FALSE if the page is found in the buffer pool.
2263
 
This function should be called when we free a file page and want the
2264
 
debug version to check that it is not accessed any more unless
2265
 
reallocated.
2266
 
@return control block if found in page hash table, otherwise NULL */
2267
 
UNIV_INTERN
2268
 
buf_page_t*
2269
 
buf_page_reset_file_page_was_freed(
2270
 
/*===============================*/
2271
 
        ulint   space,  /*!< in: space id */
2272
 
        ulint   offset) /*!< in: page number */
2273
 
{
2274
 
        buf_page_t*     bpage;
2275
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2276
 
 
2277
 
        buf_pool_mutex_enter(buf_pool);
2278
 
 
2279
 
        bpage = buf_page_hash_get(buf_pool, space, offset);
2280
 
 
2281
 
        if (bpage) {
2282
 
                ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2283
 
                bpage->file_page_was_freed = FALSE;
2284
 
        }
2285
 
 
2286
 
        buf_pool_mutex_exit(buf_pool);
2287
 
 
2288
 
        return(bpage);
2289
 
}
2290
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
2291
 
 
2292
 
/********************************************************************//**
2293
 
Get read access to a compressed page (usually of type
2294
 
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
2295
 
The page must be released with buf_page_release_zip().
2296
 
NOTE: the page is not protected by any latch.  Mutual exclusion has to
2297
 
be implemented at a higher level.  In other words, all possible
2298
 
accesses to a given page through this function must be protected by
2299
 
the same set of mutexes or latches.
2300
 
@return pointer to the block */
2301
 
UNIV_INTERN
2302
 
buf_page_t*
2303
 
buf_page_get_zip(
2304
 
/*=============*/
2305
 
        ulint           space,  /*!< in: space id */
2306
 
        ulint           zip_size,/*!< in: compressed page size */
2307
 
        ulint           offset) /*!< in: page number */
2308
 
{
2309
 
        buf_page_t*     bpage;
2310
 
        mutex_t*        block_mutex;
2311
 
        ibool           must_read;
2312
 
        unsigned        access_time;
2313
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2314
 
 
2315
 
#ifndef UNIV_LOG_DEBUG
2316
 
        ut_ad(!ibuf_inside());
2317
 
#endif
2318
 
        buf_pool->stat.n_page_gets++;
2319
 
 
2320
 
        for (;;) {
2321
 
                buf_pool_mutex_enter(buf_pool);
2322
 
lookup:
2323
 
                bpage = buf_page_hash_get(buf_pool, space, offset);
2324
 
                if (bpage) {
2325
 
                        ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2326
 
                        break;
2327
 
                }
2328
 
 
2329
 
                /* Page not in buf_pool: needs to be read from file */
2330
 
 
2331
 
                buf_pool_mutex_exit(buf_pool);
2332
 
 
2333
 
                buf_read_page(space, zip_size, offset);
2334
 
 
2335
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2336
 
                ut_a(++buf_dbg_counter % 37 || buf_validate());
2337
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2338
 
        }
2339
 
 
2340
 
        if (UNIV_UNLIKELY(!bpage->zip.data)) {
2341
 
                /* There is no compressed page. */
2342
 
err_exit:
2343
 
                buf_pool_mutex_exit(buf_pool);
2344
 
                return(NULL);
2345
 
        }
2346
 
 
2347
 
        ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2348
 
 
2349
 
        switch (buf_page_get_state(bpage)) {
2350
 
        case BUF_BLOCK_NOT_USED:
2351
 
        case BUF_BLOCK_READY_FOR_USE:
2352
 
        case BUF_BLOCK_MEMORY:
2353
 
        case BUF_BLOCK_REMOVE_HASH:
2354
 
        case BUF_BLOCK_ZIP_FREE:
2355
 
                break;
2356
 
        case BUF_BLOCK_ZIP_PAGE:
2357
 
        case BUF_BLOCK_ZIP_DIRTY:
2358
 
                block_mutex = &buf_pool->zip_mutex;
2359
 
                mutex_enter(block_mutex);
2360
 
                bpage->buf_fix_count++;
2361
 
                goto got_block;
2362
 
        case BUF_BLOCK_FILE_PAGE:
2363
 
                block_mutex = &((buf_block_t*) bpage)->mutex;
2364
 
                mutex_enter(block_mutex);
2365
 
 
2366
 
                /* Discard the uncompressed page frame if possible. */
2367
 
                if (buf_LRU_free_block(bpage, FALSE, NULL)
2368
 
                    == BUF_LRU_FREED) {
2369
 
 
2370
 
                        mutex_exit(block_mutex);
2371
 
                        goto lookup;
2372
 
                }
2373
 
 
2374
 
                buf_block_buf_fix_inc((buf_block_t*) bpage,
2375
 
                                      __FILE__, __LINE__);
2376
 
                goto got_block;
2377
 
        }
2378
 
 
2379
 
        ut_error;
2380
 
        goto err_exit;
2381
 
 
2382
 
got_block:
2383
 
        must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
2384
 
        access_time = buf_page_is_accessed(bpage);
2385
 
 
2386
 
        buf_pool_mutex_exit(buf_pool);
2387
 
 
2388
 
        mutex_exit(block_mutex);
2389
 
 
2390
 
        buf_page_set_accessed_make_young(bpage, access_time);
2391
 
 
2392
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2393
 
        ut_a(!bpage->file_page_was_freed);
2394
 
#endif
2395
 
 
2396
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2397
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
2398
 
        ut_a(bpage->buf_fix_count > 0);
2399
 
        ut_a(buf_page_in_file(bpage));
2400
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2401
 
 
2402
 
        if (must_read) {
2403
 
                /* Let us wait until the read operation
2404
 
                completes */
2405
 
 
2406
 
                for (;;) {
2407
 
                        enum buf_io_fix io_fix;
2408
 
 
2409
 
                        mutex_enter(block_mutex);
2410
 
                        io_fix = buf_page_get_io_fix(bpage);
2411
 
                        mutex_exit(block_mutex);
2412
 
 
2413
 
                        if (io_fix == BUF_IO_READ) {
2414
 
 
2415
 
                                os_thread_sleep(WAIT_FOR_READ);
2416
 
                        } else {
2417
 
                                break;
2418
 
                        }
2419
 
                }
2420
 
        }
2421
 
 
2422
 
#ifdef UNIV_IBUF_COUNT_DEBUG
2423
 
        ut_a(ibuf_count_get(buf_page_get_space(bpage),
2424
 
                            buf_page_get_page_no(bpage)) == 0);
2425
 
#endif
2426
 
        return(bpage);
2427
 
}
2428
 
 
2429
 
/********************************************************************//**
2430
 
Initialize some fields of a control block. */
2431
 
UNIV_INLINE
2432
 
void
2433
 
buf_block_init_low(
2434
 
/*===============*/
2435
 
        buf_block_t*    block)  /*!< in: block to init */
2436
 
{
2437
 
        block->check_index_page_at_flush = FALSE;
2438
 
        block->index            = NULL;
2439
 
 
2440
 
        block->n_hash_helps     = 0;
2441
 
        block->is_hashed        = FALSE;
2442
 
        block->n_fields         = 1;
2443
 
        block->n_bytes          = 0;
2444
 
        block->left_side        = TRUE;
2445
 
}
2446
 
#endif /* !UNIV_HOTBACKUP */
2447
 
 
2448
 
/********************************************************************//**
2449
 
Decompress a block.
2450
 
@return TRUE if successful */
2451
 
UNIV_INTERN
2452
 
ibool
2453
 
buf_zip_decompress(
2454
 
/*===============*/
2455
 
        buf_block_t*    block,  /*!< in/out: block */
2456
 
        ibool           check)  /*!< in: TRUE=verify the page checksum */
2457
 
{
2458
 
        const byte*     frame           = block->page.zip.data;
2459
 
        ulint           stamp_checksum  = mach_read_from_4(
2460
 
                frame + FIL_PAGE_SPACE_OR_CHKSUM);
2461
 
 
2462
 
        ut_ad(buf_block_get_zip_size(block));
2463
 
        ut_a(buf_block_get_space(block) != 0);
2464
 
 
2465
 
        if (UNIV_LIKELY(check && stamp_checksum != BUF_NO_CHECKSUM_MAGIC)) {
2466
 
                ulint   calc_checksum   = page_zip_calc_checksum(
2467
 
                        frame, page_zip_get_size(&block->page.zip));
2468
 
 
2469
 
                if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
2470
 
                        ut_print_timestamp(stderr);
2471
 
                        fprintf(stderr,
2472
 
                                "  InnoDB: compressed page checksum mismatch"
2473
 
                                " (space %u page %u): %lu != %lu\n",
2474
 
                                block->page.space, block->page.offset,
2475
 
                                stamp_checksum, calc_checksum);
2476
 
                        return(FALSE);
2477
 
                }
2478
 
        }
2479
 
 
2480
 
        switch (fil_page_get_type(frame)) {
2481
 
        case FIL_PAGE_INDEX:
2482
 
                if (page_zip_decompress(&block->page.zip,
2483
 
                                        block->frame, TRUE)) {
2484
 
                        return(TRUE);
2485
 
                }
2486
 
 
2487
 
                fprintf(stderr,
2488
 
                        "InnoDB: unable to decompress space %lu page %lu\n",
2489
 
                        (ulong) block->page.space,
2490
 
                        (ulong) block->page.offset);
2491
 
                return(FALSE);
2492
 
 
2493
 
        case FIL_PAGE_TYPE_ALLOCATED:
2494
 
        case FIL_PAGE_INODE:
2495
 
        case FIL_PAGE_IBUF_BITMAP:
2496
 
        case FIL_PAGE_TYPE_FSP_HDR:
2497
 
        case FIL_PAGE_TYPE_XDES:
2498
 
        case FIL_PAGE_TYPE_ZBLOB:
2499
 
        case FIL_PAGE_TYPE_ZBLOB2:
2500
 
                /* Copy to uncompressed storage. */
2501
 
                memcpy(block->frame, frame,
2502
 
                       buf_block_get_zip_size(block));
2503
 
                return(TRUE);
2504
 
        }
2505
 
 
2506
 
        ut_print_timestamp(stderr);
2507
 
        fprintf(stderr,
2508
 
                "  InnoDB: unknown compressed page"
2509
 
                " type %lu\n",
2510
 
                fil_page_get_type(frame));
2511
 
        return(FALSE);
2512
 
}
2513
 
 
2514
 
#ifndef UNIV_HOTBACKUP
2515
 
/*******************************************************************//**
2516
 
Gets the block to whose frame the pointer is pointing to if found
2517
 
in this buffer pool instance.
2518
 
@return pointer to block */
2519
 
static
2520
 
buf_block_t*
2521
 
buf_block_align_instance(
2522
 
/*=====================*/
2523
 
        buf_pool_t*     buf_pool,       /*!< in: buffer in which the block
2524
 
                                        resides */
2525
 
        const byte*     ptr)            /*!< in: pointer to a frame */
2526
 
{
2527
 
        buf_chunk_t*    chunk;
2528
 
        ulint           i;
2529
 
 
2530
 
        /* TODO: protect buf_pool->chunks with a mutex (it will
2531
 
        currently remain constant after buf_pool_init()) */
2532
 
        for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
2533
 
                lint    offs = ptr - chunk->blocks->frame;
2534
 
 
2535
 
                if (UNIV_UNLIKELY(offs < 0)) {
2536
 
 
2537
 
                        continue;
2538
 
                }
2539
 
 
2540
 
                offs >>= UNIV_PAGE_SIZE_SHIFT;
2541
 
 
2542
 
                if (UNIV_LIKELY((ulint) offs < chunk->size)) {
2543
 
                        buf_block_t*    block = &chunk->blocks[offs];
2544
 
 
2545
 
                        /* The function buf_chunk_init() invokes
2546
 
                        buf_block_init() so that block[n].frame ==
2547
 
                        block->frame + n * UNIV_PAGE_SIZE.  Check it. */
2548
 
                        ut_ad(block->frame == page_align(ptr));
2549
 
#ifdef UNIV_DEBUG
2550
 
                        /* A thread that updates these fields must
2551
 
                        hold buf_pool->mutex and block->mutex.  Acquire
2552
 
                        only the latter. */
2553
 
                        mutex_enter(&block->mutex);
2554
 
 
2555
 
                        switch (buf_block_get_state(block)) {
2556
 
                        case BUF_BLOCK_ZIP_FREE:
2557
 
                        case BUF_BLOCK_ZIP_PAGE:
2558
 
                        case BUF_BLOCK_ZIP_DIRTY:
2559
 
                                /* These types should only be used in
2560
 
                                the compressed buffer pool, whose
2561
 
                                memory is allocated from
2562
 
                                buf_pool->chunks, in UNIV_PAGE_SIZE
2563
 
                                blocks flagged as BUF_BLOCK_MEMORY. */
2564
 
                                ut_error;
2565
 
                                break;
2566
 
                        case BUF_BLOCK_NOT_USED:
2567
 
                        case BUF_BLOCK_READY_FOR_USE:
2568
 
                        case BUF_BLOCK_MEMORY:
2569
 
                                /* Some data structures contain
2570
 
                                "guess" pointers to file pages.  The
2571
 
                                file pages may have been freed and
2572
 
                                reused.  Do not complain. */
2573
 
                                break;
2574
 
                        case BUF_BLOCK_REMOVE_HASH:
2575
 
                                /* buf_LRU_block_remove_hashed_page()
2576
 
                                will overwrite the FIL_PAGE_OFFSET and
2577
 
                                FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
2578
 
                                0xff and set the state to
2579
 
                                BUF_BLOCK_REMOVE_HASH. */
2580
 
                                ut_ad(page_get_space_id(page_align(ptr))
2581
 
                                      == 0xffffffff);
2582
 
                                ut_ad(page_get_page_no(page_align(ptr))
2583
 
                                      == 0xffffffff);
2584
 
                                break;
2585
 
                        case BUF_BLOCK_FILE_PAGE:
2586
 
                                ut_ad(block->page.space
2587
 
                                      == page_get_space_id(page_align(ptr)));
2588
 
                                ut_ad(block->page.offset
2589
 
                                      == page_get_page_no(page_align(ptr)));
2590
 
                                break;
2591
 
                        }
2592
 
 
2593
 
                        mutex_exit(&block->mutex);
2594
 
#endif /* UNIV_DEBUG */
2595
 
 
2596
 
                        return(block);
2597
 
                }
2598
 
        }
2599
 
 
2600
 
        return(NULL);
2601
 
}
2602
 
 
2603
 
/*******************************************************************//**
2604
 
Gets the block to whose frame the pointer is pointing to.
2605
 
@return pointer to block, never NULL */
2606
 
UNIV_INTERN
2607
 
buf_block_t*
2608
 
buf_block_align(
2609
 
/*============*/
2610
 
        const byte*     ptr)    /*!< in: pointer to a frame */
2611
 
{
2612
 
        ulint           i;
2613
 
 
2614
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
2615
 
                buf_block_t*    block;
2616
 
 
2617
 
                block = buf_block_align_instance(
2618
 
                        buf_pool_from_array(i), ptr);
2619
 
                if (block) {
2620
 
                        return(block);
2621
 
                }
2622
 
        }
2623
 
 
2624
 
        /* The block should always be found. */
2625
 
        ut_error;
2626
 
        return(NULL);
2627
 
}
2628
 
 
2629
 
/********************************************************************//**
2630
 
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
2631
 
the buf_block_t itself or a member of it. This functions checks one of
2632
 
the buffer pool instances.
2633
 
@return TRUE if ptr belongs to a buf_block_t struct */
2634
 
static
2635
 
ibool
2636
 
buf_pointer_is_block_field_instance(
2637
 
/*================================*/
2638
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
2639
 
        const void*     ptr)            /*!< in: pointer not dereferenced */
2640
 
{
2641
 
        const buf_chunk_t*              chunk   = buf_pool->chunks;
2642
 
        const buf_chunk_t* const        echunk  = chunk + buf_pool->n_chunks;
2643
 
 
2644
 
        /* TODO: protect buf_pool->chunks with a mutex (it will
2645
 
        currently remain constant after buf_pool_init()) */
2646
 
        while (chunk < echunk) {
2647
 
                if (ptr >= (void *)chunk->blocks
2648
 
                    && ptr < (void *)(chunk->blocks + chunk->size)) {
2649
 
 
2650
 
                        return(TRUE);
2651
 
                }
2652
 
 
2653
 
                chunk++;
2654
 
        }
2655
 
 
2656
 
        return(FALSE);
2657
 
}
2658
 
 
2659
 
/********************************************************************//**
2660
 
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
2661
 
the buf_block_t itself or a member of it
2662
 
@return TRUE if ptr belongs to a buf_block_t struct */
2663
 
UNIV_INTERN
2664
 
ibool
2665
 
buf_pointer_is_block_field(
2666
 
/*=======================*/
2667
 
        const void*     ptr)    /*!< in: pointer not dereferenced */
2668
 
{
2669
 
        ulint   i;
2670
 
 
2671
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
2672
 
                ibool   found;
2673
 
 
2674
 
                found = buf_pointer_is_block_field_instance(
2675
 
                        buf_pool_from_array(i), ptr);
2676
 
                if (found) {
2677
 
                        return(TRUE);
2678
 
                }
2679
 
        }
2680
 
 
2681
 
        return(FALSE);
2682
 
}
2683
 
 
2684
 
/********************************************************************//**
2685
 
Find out if a buffer block was created by buf_chunk_init().
2686
 
@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
2687
 
static
2688
 
ibool
2689
 
buf_block_is_uncompressed(
2690
 
/*======================*/
2691
 
        buf_pool_t*             buf_pool,       /*!< in: buffer pool instance */
2692
 
        const buf_block_t*      block)          /*!< in: pointer to block,
2693
 
                                                not dereferenced */
2694
 
{
2695
 
        ut_ad(buf_pool_mutex_own(buf_pool));
2696
 
 
2697
 
        if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
2698
 
                /* The pointer should be aligned. */
2699
 
                return(FALSE);
2700
 
        }
2701
 
 
2702
 
        return(buf_pointer_is_block_field_instance(buf_pool, (void *)block));
2703
 
}
2704
 
 
2705
 
/********************************************************************//**
2706
 
This is the general function used to get access to a database page.
2707
 
@return pointer to the block or NULL */
2708
 
UNIV_INTERN
2709
 
buf_block_t*
2710
 
buf_page_get_gen(
2711
 
/*=============*/
2712
 
        ulint           space,  /*!< in: space id */
2713
 
        ulint           zip_size,/*!< in: compressed page size in bytes
2714
 
                                or 0 for uncompressed pages */
2715
 
        ulint           offset, /*!< in: page number */
2716
 
        ulint           rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
2717
 
        buf_block_t*    guess,  /*!< in: guessed block or NULL */
2718
 
        ulint           mode,   /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
2719
 
                                BUF_GET_NO_LATCH, or
2720
 
                                BUF_GET_IF_IN_POOL_OR_WATCH */
2721
 
        const char*     file,   /*!< in: file name */
2722
 
        ulint           line,   /*!< in: line where called */
2723
 
        mtr_t*          mtr)    /*!< in: mini-transaction */
2724
 
{
2725
 
        buf_block_t*    block;
2726
 
        ulint           fold;
2727
 
        unsigned        access_time;
2728
 
        ulint           fix_type;
2729
 
        ibool           must_read;
2730
 
        ulint           retries = 0;
2731
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2732
 
 
2733
 
        ut_ad(mtr);
2734
 
        ut_ad(mtr->state == MTR_ACTIVE);
2735
 
        ut_ad((rw_latch == RW_S_LATCH)
2736
 
              || (rw_latch == RW_X_LATCH)
2737
 
              || (rw_latch == RW_NO_LATCH));
2738
 
        ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
2739
 
        ut_ad(mode == BUF_GET
2740
 
              || mode == BUF_GET_IF_IN_POOL
2741
 
              || mode == BUF_GET_NO_LATCH
2742
 
              || mode == BUF_GET_IF_IN_POOL_OR_WATCH);
2743
 
        ut_ad(zip_size == fil_space_get_zip_size(space));
2744
 
        ut_ad(ut_is_2pow(zip_size));
2745
 
#ifndef UNIV_LOG_DEBUG
2746
 
        ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
2747
 
#endif
2748
 
        buf_pool->stat.n_page_gets++;
2749
 
        fold = buf_page_address_fold(space, offset);
2750
 
loop:
2751
 
        block = guess;
2752
 
        buf_pool_mutex_enter(buf_pool);
2753
 
 
2754
 
        if (block) {
2755
 
                /* If the guess is a compressed page descriptor that
2756
 
                has been allocated by buf_buddy_alloc(), it may have
2757
 
                been invalidated by buf_buddy_relocate().  In that
2758
 
                case, block could point to something that happens to
2759
 
                contain the expected bits in block->page.  Similarly,
2760
 
                the guess may be pointing to a buffer pool chunk that
2761
 
                has been released when resizing the buffer pool. */
2762
 
 
2763
 
                if (!buf_block_is_uncompressed(buf_pool, block)
2764
 
                    || offset != block->page.offset
2765
 
                    || space != block->page.space
2766
 
                    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2767
 
 
2768
 
                        block = guess = NULL;
2769
 
                } else {
2770
 
                        ut_ad(!block->page.in_zip_hash);
2771
 
                        ut_ad(block->page.in_page_hash);
2772
 
                }
2773
 
        }
2774
 
 
2775
 
        if (block == NULL) {
2776
 
                block = (buf_block_t*) buf_page_hash_get_low(
2777
 
                        buf_pool, space, offset, fold);
2778
 
        }
2779
 
 
2780
 
loop2:
2781
 
        if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
2782
 
                block = NULL;
2783
 
        }
2784
 
 
2785
 
        if (block == NULL) {
2786
 
                /* Page not in buf_pool: needs to be read from file */
2787
 
 
2788
 
                if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2789
 
                        block = (buf_block_t*) buf_pool_watch_set(
2790
 
                                space, offset, fold);
2791
 
 
2792
 
                        if (UNIV_LIKELY_NULL(block)) {
2793
 
 
2794
 
                                goto got_block;
2795
 
                        }
2796
 
                }
2797
 
 
2798
 
                buf_pool_mutex_exit(buf_pool);
2799
 
 
2800
 
                if (mode == BUF_GET_IF_IN_POOL
2801
 
                    || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2802
 
 
2803
 
                        return(NULL);
2804
 
                }
2805
 
 
2806
 
                if (buf_read_page(space, zip_size, offset)) {
2807
 
                        retries = 0;
2808
 
                } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
2809
 
                        ++retries;
2810
 
                } else {
2811
 
                        fprintf(stderr, "InnoDB: Error: Unable"
2812
 
                                " to read tablespace %lu page no"
2813
 
                                " %lu into the buffer pool after"
2814
 
                                " %lu attempts\n"
2815
 
                                "InnoDB: The most probable cause"
2816
 
                                " of this error may be that the"
2817
 
                                " table has been corrupted.\n"
2818
 
                                "InnoDB: You can try to fix this"
2819
 
                                " problem by using"
2820
 
                                " innodb_force_recovery.\n"
2821
 
                                "InnoDB: Please see reference manual"
2822
 
                                " for more details.\n"
2823
 
                                "InnoDB: Aborting...\n",
2824
 
                                space, offset,
2825
 
                                BUF_PAGE_READ_MAX_RETRIES);
2826
 
 
2827
 
                        ut_error;
2828
 
                }
2829
 
 
2830
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2831
 
                ut_a(++buf_dbg_counter % 37 || buf_validate());
2832
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2833
 
                goto loop;
2834
 
        }
2835
 
 
2836
 
got_block:
2837
 
        ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
2838
 
 
2839
 
        must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
2840
 
 
2841
 
        if (must_read && mode == BUF_GET_IF_IN_POOL) {
2842
 
 
2843
 
                /* The page is being read to buffer pool,
2844
 
                but we cannot wait around for the read to
2845
 
                complete. */
2846
 
                buf_pool_mutex_exit(buf_pool);
2847
 
 
2848
 
                return(NULL);
2849
 
        }
2850
 
 
2851
 
        switch (buf_block_get_state(block)) {
2852
 
                buf_page_t*     bpage;
2853
 
                ibool           success;
2854
 
 
2855
 
        case BUF_BLOCK_FILE_PAGE:
2856
 
                break;
2857
 
 
2858
 
        case BUF_BLOCK_ZIP_PAGE:
2859
 
        case BUF_BLOCK_ZIP_DIRTY:
2860
 
                bpage = &block->page;
2861
 
                /* Protect bpage->buf_fix_count. */
2862
 
                mutex_enter(&buf_pool->zip_mutex);
2863
 
 
2864
 
                if (bpage->buf_fix_count
2865
 
                    || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
2866
 
                        /* This condition often occurs when the buffer
2867
 
                        is not buffer-fixed, but I/O-fixed by
2868
 
                        buf_page_init_for_read(). */
2869
 
                        mutex_exit(&buf_pool->zip_mutex);
2870
 
wait_until_unfixed:
2871
 
                        /* The block is buffer-fixed or I/O-fixed.
2872
 
                        Try again later. */
2873
 
                        buf_pool_mutex_exit(buf_pool);
2874
 
                        os_thread_sleep(WAIT_FOR_READ);
2875
 
  
2876
 
                        goto loop;
2877
 
                }
2878
 
 
2879
 
                /* Allocate an uncompressed page. */
2880
 
                buf_pool_mutex_exit(buf_pool);
2881
 
                mutex_exit(&buf_pool->zip_mutex);
2882
 
 
2883
 
                block = buf_LRU_get_free_block(buf_pool, 0);
2884
 
                ut_a(block);
2885
 
 
2886
 
                buf_pool_mutex_enter(buf_pool);
2887
 
                mutex_enter(&block->mutex);
2888
 
 
2889
 
                {
2890
 
                        buf_page_t*     hash_bpage;
2891
 
 
2892
 
                        hash_bpage = buf_page_hash_get_low(
2893
 
                                buf_pool, space, offset, fold);
2894
 
 
2895
 
                        if (UNIV_UNLIKELY(bpage != hash_bpage)) {
2896
 
                                /* The buf_pool->page_hash was modified
2897
 
                                while buf_pool->mutex was released.
2898
 
                                Free the block that was allocated. */
2899
 
 
2900
 
                                buf_LRU_block_free_non_file_page(block);
2901
 
                                mutex_exit(&block->mutex);
2902
 
 
2903
 
                                block = (buf_block_t*) hash_bpage;
2904
 
                                goto loop2;
2905
 
                        }
2906
 
                }
2907
 
 
2908
 
                if (UNIV_UNLIKELY
2909
 
                    (bpage->buf_fix_count
2910
 
                     || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
2911
 
 
2912
 
                        /* The block was buffer-fixed or I/O-fixed
2913
 
                        while buf_pool->mutex was not held by this thread.
2914
 
                        Free the block that was allocated and try again.
2915
 
                        This should be extremely unlikely. */
2916
 
 
2917
 
                        buf_LRU_block_free_non_file_page(block);
2918
 
                        mutex_exit(&block->mutex);
2919
 
 
2920
 
                        goto wait_until_unfixed;
2921
 
                }
2922
 
 
2923
 
                /* Move the compressed page from bpage to block,
2924
 
                and uncompress it. */
2925
 
 
2926
 
                mutex_enter(&buf_pool->zip_mutex);
2927
 
 
2928
 
                buf_relocate(bpage, &block->page);
2929
 
                buf_block_init_low(block);
2930
 
                block->lock_hash_val = lock_rec_hash(space, offset);
2931
 
 
2932
 
                UNIV_MEM_DESC(&block->page.zip.data,
2933
 
                              page_zip_get_size(&block->page.zip), block);
2934
 
 
2935
 
                if (buf_page_get_state(&block->page)
2936
 
                    == BUF_BLOCK_ZIP_PAGE) {
2937
 
                        UT_LIST_REMOVE(list, buf_pool->zip_clean,
2938
 
                                       &block->page);
2939
 
                        ut_ad(!block->page.in_flush_list);
2940
 
                } else {
2941
 
                        /* Relocate buf_pool->flush_list. */
2942
 
                        buf_flush_relocate_on_flush_list(bpage,
2943
 
                                                         &block->page);
2944
 
                }
2945
 
 
2946
 
                /* Buffer-fix, I/O-fix, and X-latch the block
2947
 
                for the duration of the decompression.
2948
 
                Also add the block to the unzip_LRU list. */
2949
 
                block->page.state = BUF_BLOCK_FILE_PAGE;
2950
 
 
2951
 
                /* Insert at the front of unzip_LRU list */
2952
 
                buf_unzip_LRU_add_block(block, FALSE);
2953
 
 
2954
 
                block->page.buf_fix_count = 1;
2955
 
                buf_block_set_io_fix(block, BUF_IO_READ);
2956
 
                rw_lock_x_lock_func(&block->lock, 0, file, line);
2957
 
 
2958
 
                UNIV_MEM_INVALID(bpage, sizeof *bpage);
2959
 
 
2960
 
                mutex_exit(&block->mutex);
2961
 
                mutex_exit(&buf_pool->zip_mutex);
2962
 
                buf_pool->n_pend_unzip++;
2963
 
 
2964
 
                buf_buddy_free(buf_pool, bpage, sizeof *bpage);
2965
 
 
2966
 
                buf_pool_mutex_exit(buf_pool);
2967
 
 
2968
 
                /* Decompress the page and apply buffered operations
2969
 
                while not holding buf_pool->mutex or block->mutex. */
2970
 
                success = buf_zip_decompress(block, srv_use_checksums);
2971
 
                ut_a(success);
2972
 
 
2973
 
                if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
2974
 
                        ibuf_merge_or_delete_for_page(block, space, offset,
2975
 
                                                      zip_size, TRUE);
2976
 
                }
2977
 
 
2978
 
                /* Unfix and unlatch the block. */
2979
 
                buf_pool_mutex_enter(buf_pool);
2980
 
                mutex_enter(&block->mutex);
2981
 
                block->page.buf_fix_count--;
2982
 
                buf_block_set_io_fix(block, BUF_IO_NONE);
2983
 
                mutex_exit(&block->mutex);
2984
 
                buf_pool->n_pend_unzip--;
2985
 
                rw_lock_x_unlock(&block->lock);
2986
 
 
2987
 
                break;
2988
 
 
2989
 
        case BUF_BLOCK_ZIP_FREE:
2990
 
        case BUF_BLOCK_NOT_USED:
2991
 
        case BUF_BLOCK_READY_FOR_USE:
2992
 
        case BUF_BLOCK_MEMORY:
2993
 
        case BUF_BLOCK_REMOVE_HASH:
2994
 
                ut_error;
2995
 
                break;
2996
 
        }
2997
 
 
2998
 
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2999
 
 
3000
 
        mutex_enter(&block->mutex);
3001
 
#if UNIV_WORD_SIZE == 4
3002
 
        /* On 32-bit systems, there is no padding in buf_page_t.  On
3003
 
        other systems, Valgrind could complain about uninitialized pad
3004
 
        bytes. */
3005
 
        UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
3006
 
#endif
3007
 
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
3008
 
        if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)
3009
 
            && ibuf_debug) {
3010
 
                /* Try to evict the block from the buffer pool, to use the
3011
 
                insert buffer (change buffer) as much as possible. */
3012
 
 
3013
 
                if (buf_LRU_free_block(&block->page, TRUE, NULL)
3014
 
                    == BUF_LRU_FREED) {
3015
 
                        mutex_exit(&block->mutex);
3016
 
                        if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
3017
 
                                /* Set the watch, as it would have
3018
 
                                been set if the page were not in the
3019
 
                                buffer pool in the first place. */
3020
 
                                block = (buf_block_t*) buf_pool_watch_set(
3021
 
                                        space, offset, fold);
3022
 
 
3023
 
                                if (UNIV_LIKELY_NULL(block)) {
3024
 
 
3025
 
                                        /* The page entered the buffer
3026
 
                                        pool for some reason. Try to
3027
 
                                        evict it again. */
3028
 
                                        goto got_block;
3029
 
                                }
3030
 
                        }
3031
 
                        buf_pool_mutex_exit(buf_pool);
3032
 
                        fprintf(stderr,
3033
 
                                "innodb_change_buffering_debug evict %u %u\n",
3034
 
                                (unsigned) space, (unsigned) offset);
3035
 
                        return(NULL);
3036
 
                } else if (buf_flush_page_try(buf_pool, block)) {
3037
 
                        fprintf(stderr,
3038
 
                                "innodb_change_buffering_debug flush %u %u\n",
3039
 
                                (unsigned) space, (unsigned) offset);
3040
 
                        guess = block;
3041
 
                        goto loop;
3042
 
                }
3043
 
 
3044
 
                /* Failed to evict the page; change it directly */
3045
 
        }
3046
 
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
3047
 
 
3048
 
        buf_block_buf_fix_inc(block, file, line);
3049
 
 
3050
 
        mutex_exit(&block->mutex);
3051
 
 
3052
 
        /* Check if this is the first access to the page */
3053
 
 
3054
 
        access_time = buf_page_is_accessed(&block->page);
3055
 
 
3056
 
        buf_pool_mutex_exit(buf_pool);
3057
 
 
3058
 
        buf_page_set_accessed_make_young(&block->page, access_time);
3059
 
 
3060
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3061
 
        ut_a(!block->page.file_page_was_freed);
3062
 
#endif
3063
 
 
3064
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3065
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
3066
 
        ut_a(block->page.buf_fix_count > 0);
3067
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3068
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3069
 
 
3070
 
        switch (rw_latch) {
3071
 
        case RW_NO_LATCH:
3072
 
                if (must_read) {
3073
 
                        /* Let us wait until the read operation
3074
 
                        completes */
3075
 
 
3076
 
                        for (;;) {
3077
 
                                enum buf_io_fix io_fix;
3078
 
 
3079
 
                                mutex_enter(&block->mutex);
3080
 
                                io_fix = buf_block_get_io_fix(block);
3081
 
                                mutex_exit(&block->mutex);
3082
 
 
3083
 
                                if (io_fix == BUF_IO_READ) {
3084
 
 
3085
 
                                        os_thread_sleep(WAIT_FOR_READ);
3086
 
                                } else {
3087
 
                                        break;
3088
 
                                }
3089
 
                        }
3090
 
                }
3091
 
 
3092
 
                fix_type = MTR_MEMO_BUF_FIX;
3093
 
                break;
3094
 
 
3095
 
        case RW_S_LATCH:
3096
 
                rw_lock_s_lock_func(&(block->lock), 0, file, line);
3097
 
 
3098
 
                fix_type = MTR_MEMO_PAGE_S_FIX;
3099
 
                break;
3100
 
 
3101
 
        default:
3102
 
                ut_ad(rw_latch == RW_X_LATCH);
3103
 
                rw_lock_x_lock_func(&(block->lock), 0, file, line);
3104
 
 
3105
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
3106
 
                break;
3107
 
        }
3108
 
 
3109
 
        mtr_memo_push(mtr, block, fix_type);
3110
 
 
3111
 
        if (!access_time) {
3112
 
                /* In the case of a first access, try to apply linear
3113
 
                read-ahead */
3114
 
 
3115
 
                buf_read_ahead_linear(space, zip_size, offset);
3116
 
        }
3117
 
 
3118
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3119
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
3120
 
                            buf_block_get_page_no(block)) == 0);
3121
 
#endif
3122
 
        return(block);
3123
 
}
3124
 
 
3125
 
/********************************************************************//**
3126
 
This is the general function used to get optimistic access to a database
3127
 
page.
3128
 
@return TRUE if success */
3129
 
UNIV_INTERN
3130
 
ibool
3131
 
buf_page_optimistic_get(
3132
 
/*====================*/
3133
 
        ulint           rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
3134
 
        buf_block_t*    block,  /*!< in: guessed buffer block */
3135
 
        ib_uint64_t     modify_clock,/*!< in: modify clock value if mode is
3136
 
                                ..._GUESS_ON_CLOCK */
3137
 
        const char*     file,   /*!< in: file name */
3138
 
        ulint           line,   /*!< in: line where called */
3139
 
        mtr_t*          mtr)    /*!< in: mini-transaction */
3140
 
{
3141
 
        buf_pool_t*     buf_pool;
3142
 
        unsigned        access_time;
3143
 
        ibool           success;
3144
 
        ulint           fix_type;
3145
 
 
3146
 
        ut_ad(block);
3147
 
        ut_ad(mtr);
3148
 
        ut_ad(mtr->state == MTR_ACTIVE);
3149
 
        ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3150
 
 
3151
 
        mutex_enter(&block->mutex);
3152
 
 
3153
 
        if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
3154
 
 
3155
 
                mutex_exit(&block->mutex);
3156
 
 
3157
 
                return(FALSE);
3158
 
        }
3159
 
 
3160
 
        buf_block_buf_fix_inc(block, file, line);
3161
 
 
3162
 
        mutex_exit(&block->mutex);
3163
 
 
3164
 
        /* Check if this is the first access to the page.
3165
 
        We do a dirty read on purpose, to avoid mutex contention.
3166
 
        This field is only used for heuristic purposes; it does not
3167
 
        affect correctness. */
3168
 
 
3169
 
        access_time = buf_page_is_accessed(&block->page);
3170
 
        buf_page_set_accessed_make_young(&block->page, access_time);
3171
 
 
3172
 
        ut_ad(!ibuf_inside()
3173
 
              || ibuf_page(buf_block_get_space(block),
3174
 
                           buf_block_get_zip_size(block),
3175
 
                           buf_block_get_page_no(block), NULL));
3176
 
 
3177
 
        if (rw_latch == RW_S_LATCH) {
3178
 
                success = rw_lock_s_lock_nowait(&(block->lock),
3179
 
                                                file, line);
3180
 
                fix_type = MTR_MEMO_PAGE_S_FIX;
3181
 
        } else {
3182
 
                success = rw_lock_x_lock_func_nowait(&(block->lock),
3183
 
                                                     file, line);
3184
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
3185
 
        }
3186
 
 
3187
 
        if (UNIV_UNLIKELY(!success)) {
3188
 
                mutex_enter(&block->mutex);
3189
 
                buf_block_buf_fix_dec(block);
3190
 
                mutex_exit(&block->mutex);
3191
 
 
3192
 
                return(FALSE);
3193
 
        }
3194
 
 
3195
 
        if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
3196
 
                buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3197
 
 
3198
 
                if (rw_latch == RW_S_LATCH) {
3199
 
                        rw_lock_s_unlock(&(block->lock));
3200
 
                } else {
3201
 
                        rw_lock_x_unlock(&(block->lock));
3202
 
                }
3203
 
 
3204
 
                mutex_enter(&block->mutex);
3205
 
                buf_block_buf_fix_dec(block);
3206
 
                mutex_exit(&block->mutex);
3207
 
 
3208
 
                return(FALSE);
3209
 
        }
3210
 
 
3211
 
        mtr_memo_push(mtr, block, fix_type);
3212
 
 
3213
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3214
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
3215
 
        ut_a(block->page.buf_fix_count > 0);
3216
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3217
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3218
 
 
3219
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3220
 
        ut_a(block->page.file_page_was_freed == FALSE);
3221
 
#endif
3222
 
        if (UNIV_UNLIKELY(!access_time)) {
3223
 
                /* In the case of a first access, try to apply linear
3224
 
                read-ahead */
3225
 
 
3226
 
                buf_read_ahead_linear(buf_block_get_space(block),
3227
 
                                      buf_block_get_zip_size(block),
3228
 
                                      buf_block_get_page_no(block));
3229
 
        }
3230
 
 
3231
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3232
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
3233
 
                            buf_block_get_page_no(block)) == 0);
3234
 
#endif
3235
 
        buf_pool = buf_pool_from_block(block);
3236
 
        buf_pool->stat.n_page_gets++;
3237
 
 
3238
 
        return(TRUE);
3239
 
}
3240
 
 
3241
 
/********************************************************************//**
3242
 
This is used to get access to a known database page, when no waiting can be
3243
 
done. For example, if a search in an adaptive hash index leads us to this
3244
 
frame.
3245
 
@return TRUE if success */
3246
 
UNIV_INTERN
3247
 
ibool
3248
 
buf_page_get_known_nowait(
3249
 
/*======================*/
3250
 
        ulint           rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
3251
 
        buf_block_t*    block,  /*!< in: the known page */
3252
 
        ulint           mode,   /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
3253
 
        const char*     file,   /*!< in: file name */
3254
 
        ulint           line,   /*!< in: line where called */
3255
 
        mtr_t*          mtr)    /*!< in: mini-transaction */
3256
 
{
3257
 
        buf_pool_t*     buf_pool;
3258
 
        ibool           success;
3259
 
        ulint           fix_type;
3260
 
 
3261
 
        ut_ad(mtr);
3262
 
        ut_ad(mtr->state == MTR_ACTIVE);
3263
 
        ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3264
 
 
3265
 
        mutex_enter(&block->mutex);
3266
 
 
3267
 
        if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
3268
 
                /* Another thread is just freeing the block from the LRU list
3269
 
                of the buffer pool: do not try to access this page; this
3270
 
                attempt to access the page can only come through the hash
3271
 
                index because when the buffer block state is ..._REMOVE_HASH,
3272
 
                we have already removed it from the page address hash table
3273
 
                of the buffer pool. */
3274
 
 
3275
 
                mutex_exit(&block->mutex);
3276
 
 
3277
 
                return(FALSE);
3278
 
        }
3279
 
 
3280
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3281
 
 
3282
 
        buf_block_buf_fix_inc(block, file, line);
3283
 
 
3284
 
        mutex_exit(&block->mutex);
3285
 
 
3286
 
        buf_pool = buf_pool_from_block(block);
3287
 
 
3288
 
        if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
3289
 
                buf_pool_mutex_enter(buf_pool);
3290
 
                buf_LRU_make_block_young(&block->page);
3291
 
                buf_pool_mutex_exit(buf_pool);
3292
 
        } else if (!buf_page_is_accessed(&block->page)) {
3293
 
                /* Above, we do a dirty read on purpose, to avoid
3294
 
                mutex contention.  The field buf_page_t::access_time
3295
 
                is only used for heuristic purposes.  Writes to the
3296
 
                field must be protected by mutex, however. */
3297
 
                ulint   time_ms = ut_time_ms();
3298
 
 
3299
 
                buf_pool_mutex_enter(buf_pool);
3300
 
                buf_page_set_accessed(&block->page, time_ms);
3301
 
                buf_pool_mutex_exit(buf_pool);
3302
 
        }
3303
 
 
3304
 
        ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
3305
 
 
3306
 
        if (rw_latch == RW_S_LATCH) {
3307
 
                success = rw_lock_s_lock_nowait(&(block->lock),
3308
 
                                                file, line);
3309
 
                fix_type = MTR_MEMO_PAGE_S_FIX;
3310
 
        } else {
3311
 
                success = rw_lock_x_lock_func_nowait(&(block->lock),
3312
 
                                                     file, line);
3313
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
3314
 
        }
3315
 
 
3316
 
        if (!success) {
3317
 
                mutex_enter(&block->mutex);
3318
 
                buf_block_buf_fix_dec(block);
3319
 
                mutex_exit(&block->mutex);
3320
 
 
3321
 
                return(FALSE);
3322
 
        }
3323
 
 
3324
 
        mtr_memo_push(mtr, block, fix_type);
3325
 
 
3326
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3327
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
3328
 
        ut_a(block->page.buf_fix_count > 0);
3329
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3330
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3331
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3332
 
        ut_a(block->page.file_page_was_freed == FALSE);
3333
 
#endif
3334
 
 
3335
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3336
 
        ut_a((mode == BUF_KEEP_OLD)
3337
 
             || (ibuf_count_get(buf_block_get_space(block),
3338
 
                                buf_block_get_page_no(block)) == 0));
3339
 
#endif
3340
 
        buf_pool->stat.n_page_gets++;
3341
 
 
3342
 
        return(TRUE);
3343
 
}
3344
 
 
3345
 
/*******************************************************************//**
3346
 
Given a tablespace id and page number tries to get that page. If the
3347
 
page is not in the buffer pool it is not loaded and NULL is returned.
3348
 
Suitable for using when holding the kernel mutex.
3349
 
@return pointer to a page or NULL */
3350
 
UNIV_INTERN
3351
 
const buf_block_t*
3352
 
buf_page_try_get_func(
3353
 
/*==================*/
3354
 
        ulint           space_id,/*!< in: tablespace id */
3355
 
        ulint           page_no,/*!< in: page number */
3356
 
        const char*     file,   /*!< in: file name */
3357
 
        ulint           line,   /*!< in: line where called */
3358
 
        mtr_t*          mtr)    /*!< in: mini-transaction */
3359
 
{
3360
 
        buf_block_t*    block;
3361
 
        ibool           success;
3362
 
        ulint           fix_type;
3363
 
        buf_pool_t*     buf_pool = buf_pool_get(space_id, page_no);
3364
 
 
3365
 
        ut_ad(mtr);
3366
 
        ut_ad(mtr->state == MTR_ACTIVE);
3367
 
 
3368
 
        buf_pool_mutex_enter(buf_pool);
3369
 
        block = buf_block_hash_get(buf_pool, space_id, page_no);
3370
 
 
3371
 
        if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
3372
 
                buf_pool_mutex_exit(buf_pool);
3373
 
                return(NULL);
3374
 
        }
3375
 
 
3376
 
        ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
3377
 
 
3378
 
        mutex_enter(&block->mutex);
3379
 
        buf_pool_mutex_exit(buf_pool);
3380
 
 
3381
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3382
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3383
 
        ut_a(buf_block_get_space(block) == space_id);
3384
 
        ut_a(buf_block_get_page_no(block) == page_no);
3385
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3386
 
 
3387
 
        buf_block_buf_fix_inc(block, file, line);
3388
 
        mutex_exit(&block->mutex);
3389
 
 
3390
 
        fix_type = MTR_MEMO_PAGE_S_FIX;
3391
 
        success = rw_lock_s_lock_nowait(&block->lock, file, line);
3392
 
 
3393
 
        if (!success) {
3394
 
                /* Let us try to get an X-latch. If the current thread
3395
 
                is holding an X-latch on the page, we cannot get an
3396
 
                S-latch. */
3397
 
 
3398
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
3399
 
                success = rw_lock_x_lock_func_nowait(&block->lock,
3400
 
                                                     file, line);
3401
 
        }
3402
 
 
3403
 
        if (!success) {
3404
 
                mutex_enter(&block->mutex);
3405
 
                buf_block_buf_fix_dec(block);
3406
 
                mutex_exit(&block->mutex);
3407
 
 
3408
 
                return(NULL);
3409
 
        }
3410
 
 
3411
 
        mtr_memo_push(mtr, block, fix_type);
3412
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3413
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
3414
 
        ut_a(block->page.buf_fix_count > 0);
3415
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3416
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3417
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3418
 
        ut_a(block->page.file_page_was_freed == FALSE);
3419
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3420
 
        buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3421
 
 
3422
 
        buf_pool->stat.n_page_gets++;
3423
 
 
3424
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3425
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
3426
 
                            buf_block_get_page_no(block)) == 0);
3427
 
#endif
3428
 
 
3429
 
        return(block);
3430
 
}
3431
 
 
3432
 
/********************************************************************//**
3433
 
Initialize some fields of a control block. */
3434
 
UNIV_INLINE
3435
 
void
3436
 
buf_page_init_low(
3437
 
/*==============*/
3438
 
        buf_page_t*     bpage)  /*!< in: block to init */
3439
 
{
3440
 
        bpage->flush_type = BUF_FLUSH_LRU;
3441
 
        bpage->io_fix = BUF_IO_NONE;
3442
 
        bpage->buf_fix_count = 0;
3443
 
        bpage->freed_page_clock = 0;
3444
 
        bpage->access_time = 0;
3445
 
        bpage->newest_modification = 0;
3446
 
        bpage->oldest_modification = 0;
3447
 
        HASH_INVALIDATE(bpage, hash);
3448
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3449
 
        bpage->file_page_was_freed = FALSE;
3450
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3451
 
}
3452
 
 
3453
 
/********************************************************************//**
3454
 
Inits a page to the buffer buf_pool. */
3455
 
static
3456
 
void
3457
 
buf_page_init(
3458
 
/*==========*/
3459
 
        ulint           space,  /*!< in: space id */
3460
 
        ulint           offset, /*!< in: offset of the page within space
3461
 
                                in units of a page */
3462
 
        ulint           fold,   /*!< in: buf_page_address_fold(space,offset) */
3463
 
        buf_block_t*    block)  /*!< in: block to init */
3464
 
{
3465
 
        buf_page_t*     hash_page;
3466
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
3467
 
 
3468
 
        ut_ad(buf_pool_mutex_own(buf_pool));
3469
 
        ut_ad(mutex_own(&(block->mutex)));
3470
 
        ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3471
 
 
3472
 
        /* Set the state of the block */
3473
 
        buf_block_set_file_page(block, space, offset);
3474
 
 
3475
 
#ifdef UNIV_DEBUG_VALGRIND
3476
 
        if (!space) {
3477
 
                /* Silence valid Valgrind warnings about uninitialized
3478
 
                data being written to data files.  There are some unused
3479
 
                bytes on some pages that InnoDB does not initialize. */
3480
 
                UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
3481
 
        }
3482
 
#endif /* UNIV_DEBUG_VALGRIND */
3483
 
 
3484
 
        buf_block_init_low(block);
3485
 
 
3486
 
        block->lock_hash_val = lock_rec_hash(space, offset);
3487
 
 
3488
 
        buf_page_init_low(&block->page);
3489
 
 
3490
 
        /* Insert into the hash table of file pages */
3491
 
 
3492
 
        hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3493
 
 
3494
 
        if (UNIV_LIKELY(!hash_page)) {
3495
 
        } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
3496
 
                /* Preserve the reference count. */
3497
 
                ulint   buf_fix_count = hash_page->buf_fix_count;
3498
 
 
3499
 
                ut_a(buf_fix_count > 0);
3500
 
                block->page.buf_fix_count += buf_fix_count;
3501
 
                buf_pool_watch_remove(buf_pool, fold, hash_page);
3502
 
        } else {
3503
 
                fprintf(stderr,
3504
 
                        "InnoDB: Error: page %lu %lu already found"
3505
 
                        " in the hash table: %p, %p\n",
3506
 
                        (ulong) space,
3507
 
                        (ulong) offset,
3508
 
                        (const void*) hash_page, (const void*) block);
3509
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3510
 
                mutex_exit(&block->mutex);
3511
 
                buf_pool_mutex_exit(buf_pool);
3512
 
                buf_print();
3513
 
                buf_LRU_print();
3514
 
                buf_validate();
3515
 
                buf_LRU_validate();
3516
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3517
 
                ut_error;
3518
 
        }
3519
 
 
3520
 
        ut_ad(!block->page.in_zip_hash);
3521
 
        ut_ad(!block->page.in_page_hash);
3522
 
        ut_d(block->page.in_page_hash = TRUE);
3523
 
        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
3524
 
                    fold, &block->page);
3525
 
}
3526
 
 
3527
 
/********************************************************************//**
3528
 
Function which inits a page for read to the buffer buf_pool. If the page is
3529
 
(1) already in buf_pool, or
3530
 
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
3531
 
(3) if the space is deleted or being deleted,
3532
 
then this function does nothing.
3533
 
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
3534
 
on the buffer frame. The io-handler must take care that the flag is cleared
3535
 
and the lock released later.
3536
 
@return pointer to the block or NULL */
3537
 
UNIV_INTERN
3538
 
buf_page_t*
3539
 
buf_page_init_for_read(
3540
 
/*===================*/
3541
 
        ulint*          err,    /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
3542
 
        ulint           mode,   /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
3543
 
        ulint           space,  /*!< in: space id */
3544
 
        ulint           zip_size,/*!< in: compressed page size, or 0 */
3545
 
        ibool           unzip,  /*!< in: TRUE=request uncompressed page */
3546
 
        ib_int64_t      tablespace_version,
3547
 
                                /*!< in: prevents reading from a wrong
3548
 
                                version of the tablespace in case we have done
3549
 
                                DISCARD + IMPORT */
3550
 
        ulint           offset) /*!< in: page number */
3551
 
{
3552
 
        buf_block_t*    block;
3553
 
        buf_page_t*     bpage   = NULL;
3554
 
        buf_page_t*     watch_page;
3555
 
        mtr_t           mtr;
3556
 
        ulint           fold;
3557
 
        ibool           lru     = FALSE;
3558
 
        void*           data;
3559
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
3560
 
 
3561
 
        ut_ad(buf_pool);
3562
 
 
3563
 
        *err = DB_SUCCESS;
3564
 
 
3565
 
        if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3566
 
                /* It is a read-ahead within an ibuf routine */
3567
 
 
3568
 
                ut_ad(!ibuf_bitmap_page(zip_size, offset));
3569
 
                ut_ad(ibuf_inside());
3570
 
 
3571
 
                mtr_start(&mtr);
3572
 
 
3573
 
                if (!recv_no_ibuf_operations
3574
 
                    && !ibuf_page(space, zip_size, offset, &mtr)) {
3575
 
 
3576
 
                        mtr_commit(&mtr);
3577
 
 
3578
 
                        return(NULL);
3579
 
                }
3580
 
        } else {
3581
 
                ut_ad(mode == BUF_READ_ANY_PAGE);
3582
 
        }
3583
 
 
3584
 
        if (zip_size && UNIV_LIKELY(!unzip)
3585
 
            && UNIV_LIKELY(!recv_recovery_is_on())) {
3586
 
                block = NULL;
3587
 
        } else {
3588
 
                block = buf_LRU_get_free_block(buf_pool, 0);
3589
 
                ut_ad(block);
3590
 
                ut_ad(buf_pool_from_block(block) == buf_pool);
3591
 
        }
3592
 
 
3593
 
        fold = buf_page_address_fold(space, offset);
3594
 
 
3595
 
        buf_pool_mutex_enter(buf_pool);
3596
 
 
3597
 
        watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3598
 
        if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
3599
 
                /* The page is already in the buffer pool. */
3600
 
                watch_page = NULL;
3601
 
err_exit:
3602
 
                if (block) {
3603
 
                        mutex_enter(&block->mutex);
3604
 
                        buf_LRU_block_free_non_file_page(block);
3605
 
                        mutex_exit(&block->mutex);
3606
 
                }
3607
 
 
3608
 
                bpage = NULL;
3609
 
                goto func_exit;
3610
 
        }
3611
 
 
3612
 
        if (fil_tablespace_deleted_or_being_deleted_in_mem(
3613
 
                    space, tablespace_version)) {
3614
 
                /* The page belongs to a space which has been
3615
 
                deleted or is being deleted. */
3616
 
                *err = DB_TABLESPACE_DELETED;
3617
 
 
3618
 
                goto err_exit;
3619
 
        }
3620
 
 
3621
 
        if (block) {
3622
 
                bpage = &block->page;
3623
 
                mutex_enter(&block->mutex);
3624
 
 
3625
 
                ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
3626
 
 
3627
 
                buf_page_init(space, offset, fold, block);
3628
 
 
3629
 
                /* The block must be put to the LRU list, to the old blocks */
3630
 
                buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3631
 
 
3632
 
                /* We set a pass-type x-lock on the frame because then
3633
 
                the same thread which called for the read operation
3634
 
                (and is running now at this point of code) can wait
3635
 
                for the read to complete by waiting for the x-lock on
3636
 
                the frame; if the x-lock were recursive, the same
3637
 
                thread would illegally get the x-lock before the page
3638
 
                read is completed.  The x-lock is cleared by the
3639
 
                io-handler thread. */
3640
 
 
3641
 
                rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
3642
 
                buf_page_set_io_fix(bpage, BUF_IO_READ);
3643
 
 
3644
 
                if (UNIV_UNLIKELY(zip_size)) {
3645
 
                        page_zip_set_size(&block->page.zip, zip_size);
3646
 
 
3647
 
                        /* buf_pool->mutex may be released and
3648
 
                        reacquired by buf_buddy_alloc().  Thus, we
3649
 
                        must release block->mutex in order not to
3650
 
                        break the latching order in the reacquisition
3651
 
                        of buf_pool->mutex.  We also must defer this
3652
 
                        operation until after the block descriptor has
3653
 
                        been added to buf_pool->LRU and
3654
 
                        buf_pool->page_hash. */
3655
 
                        mutex_exit(&block->mutex);
3656
 
                        data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3657
 
                        mutex_enter(&block->mutex);
3658
 
                        block->page.zip.data = static_cast<unsigned char *>(data);
3659
 
 
3660
 
                        /* To maintain the invariant
3661
 
                        block->in_unzip_LRU_list
3662
 
                        == buf_page_belongs_to_unzip_LRU(&block->page)
3663
 
                        we have to add this block to unzip_LRU
3664
 
                        after block->page.zip.data is set. */
3665
 
                        ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
3666
 
                        buf_unzip_LRU_add_block(block, TRUE);
3667
 
                }
3668
 
 
3669
 
                mutex_exit(&block->mutex);
3670
 
        } else {
3671
 
                /* Defer buf_buddy_alloc() until after the block has
3672
 
                been found not to exist.  The buf_buddy_alloc() and
3673
 
                buf_buddy_free() calls may be expensive because of
3674
 
                buf_buddy_relocate(). */
3675
 
 
3676
 
                /* The compressed page must be allocated before the
3677
 
                control block (bpage), in order to avoid the
3678
 
                invocation of buf_buddy_relocate_block() on
3679
 
                uninitialized data. */
3680
 
                data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3681
 
                bpage = static_cast<buf_page_struct *>(buf_buddy_alloc(buf_pool, sizeof *bpage, &lru));
3682
 
 
3683
 
                /* Initialize the buf_pool pointer. */
3684
 
                bpage->buf_pool_index = buf_pool_index(buf_pool);
3685
 
 
3686
 
                /* If buf_buddy_alloc() allocated storage from the LRU list,
3687
 
                it released and reacquired buf_pool->mutex.  Thus, we must
3688
 
                check the page_hash again, as it may have been modified. */
3689
 
                if (UNIV_UNLIKELY(lru)) {
3690
 
 
3691
 
                        watch_page = buf_page_hash_get_low(
3692
 
                                buf_pool, space, offset, fold);
3693
 
 
3694
 
                        if (watch_page
3695
 
                            && !buf_pool_watch_is_sentinel(buf_pool,
3696
 
                                                           watch_page)) {
3697
 
 
3698
 
                                /* The block was added by some other thread. */
3699
 
                                watch_page = NULL;
3700
 
                                buf_buddy_free(buf_pool, bpage, sizeof *bpage);
3701
 
                                buf_buddy_free(buf_pool, data, zip_size);
3702
 
 
3703
 
                                bpage = NULL;
3704
 
                                goto func_exit;
3705
 
                        }
3706
 
                }
3707
 
 
3708
 
                page_zip_des_init(&bpage->zip);
3709
 
                page_zip_set_size(&bpage->zip, zip_size);
3710
 
                bpage->zip.data = static_cast<unsigned char *>(data);
3711
 
 
3712
 
                mutex_enter(&buf_pool->zip_mutex);
3713
 
                UNIV_MEM_DESC(bpage->zip.data,
3714
 
                              page_zip_get_size(&bpage->zip), bpage);
3715
 
 
3716
 
                buf_page_init_low(bpage);
3717
 
 
3718
 
                bpage->state    = BUF_BLOCK_ZIP_PAGE;
3719
 
                bpage->space    = space;
3720
 
                bpage->offset   = offset;
3721
 
 
3722
 
 
3723
 
#ifdef UNIV_DEBUG
3724
 
                bpage->in_page_hash = FALSE;
3725
 
                bpage->in_zip_hash = FALSE;
3726
 
                bpage->in_flush_list = FALSE;
3727
 
                bpage->in_free_list = FALSE;
3728
 
                bpage->in_LRU_list = FALSE;
3729
 
#endif /* UNIV_DEBUG */
3730
 
 
3731
 
                ut_d(bpage->in_page_hash = TRUE);
3732
 
 
3733
 
                if (UNIV_LIKELY_NULL(watch_page)) {
3734
 
                        /* Preserve the reference count. */
3735
 
                        ulint   buf_fix_count = watch_page->buf_fix_count;
3736
 
                        ut_a(buf_fix_count > 0);
3737
 
                        bpage->buf_fix_count += buf_fix_count;
3738
 
                        ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
3739
 
                        buf_pool_watch_remove(buf_pool, fold, watch_page);
3740
 
                }
3741
 
 
3742
 
                HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
3743
 
                            bpage);
3744
 
 
3745
 
                /* The block must be put to the LRU list, to the old blocks */
3746
 
                buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3747
 
                buf_LRU_insert_zip_clean(bpage);
3748
 
 
3749
 
                buf_page_set_io_fix(bpage, BUF_IO_READ);
3750
 
 
3751
 
                mutex_exit(&buf_pool->zip_mutex);
3752
 
        }
3753
 
 
3754
 
        buf_pool->n_pend_reads++;
3755
 
func_exit:
3756
 
        buf_pool_mutex_exit(buf_pool);
3757
 
 
3758
 
        if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3759
 
 
3760
 
                mtr_commit(&mtr);
3761
 
        }
3762
 
 
3763
 
        ut_ad(!bpage || buf_page_in_file(bpage));
3764
 
        return(bpage);
3765
 
}
3766
 
 
3767
 
/********************************************************************//**
3768
 
Initializes a page to the buffer buf_pool. The page is usually not read
3769
 
from a file even if it cannot be found in the buffer buf_pool. This is one
3770
 
of the functions which perform to a block a state transition NOT_USED =>
3771
 
FILE_PAGE (the other is buf_page_get_gen).
3772
 
@return pointer to the block, page bufferfixed */
3773
 
UNIV_INTERN
3774
 
buf_block_t*
3775
 
buf_page_create(
3776
 
/*============*/
3777
 
        ulint   space,  /*!< in: space id */
3778
 
        ulint   offset, /*!< in: offset of the page within space in units of
3779
 
                        a page */
3780
 
        ulint   zip_size,/*!< in: compressed page size, or 0 */
3781
 
        mtr_t*  mtr)    /*!< in: mini-transaction handle */
3782
 
{
3783
 
        buf_frame_t*    frame;
3784
 
        buf_block_t*    block;
3785
 
        ulint           fold;
3786
 
        buf_block_t*    free_block      = NULL;
3787
 
        ulint           time_ms         = ut_time_ms();
3788
 
        buf_pool_t*     buf_pool        = buf_pool_get(space, offset);
3789
 
 
3790
 
        ut_ad(mtr);
3791
 
        ut_ad(mtr->state == MTR_ACTIVE);
3792
 
        ut_ad(space || !zip_size);
3793
 
 
3794
 
        free_block = buf_LRU_get_free_block(buf_pool, 0);
3795
 
 
3796
 
        fold = buf_page_address_fold(space, offset);
3797
 
 
3798
 
        buf_pool_mutex_enter(buf_pool);
3799
 
 
3800
 
        block = (buf_block_t*) buf_page_hash_get_low(
3801
 
                buf_pool, space, offset, fold);
3802
 
 
3803
 
        if (block
3804
 
            && buf_page_in_file(&block->page)
3805
 
            && !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
3806
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3807
 
                ut_a(ibuf_count_get(space, offset) == 0);
3808
 
#endif
3809
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3810
 
                block->page.file_page_was_freed = FALSE;
3811
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3812
 
 
3813
 
                /* Page can be found in buf_pool */
3814
 
                buf_pool_mutex_exit(buf_pool);
3815
 
 
3816
 
                buf_block_free(free_block);
3817
 
 
3818
 
                return(buf_page_get_with_no_latch(space, zip_size,
3819
 
                                                  offset, mtr));
3820
 
        }
3821
 
 
3822
 
        /* If we get here, the page was not in buf_pool: init it there */
3823
 
 
3824
 
#ifdef UNIV_DEBUG
3825
 
        if (buf_debug_prints) {
3826
 
                fprintf(stderr, "Creating space %lu page %lu to buffer\n",
3827
 
                        (ulong) space, (ulong) offset);
3828
 
        }
3829
 
#endif /* UNIV_DEBUG */
3830
 
 
3831
 
        block = free_block;
3832
 
 
3833
 
        mutex_enter(&block->mutex);
3834
 
 
3835
 
        buf_page_init(space, offset, fold, block);
3836
 
 
3837
 
        /* The block must be put to the LRU list */
3838
 
        buf_LRU_add_block(&block->page, FALSE);
3839
 
 
3840
 
        buf_block_buf_fix_inc(block, __FILE__, __LINE__);
3841
 
        buf_pool->stat.n_pages_created++;
3842
 
 
3843
 
        if (zip_size) {
3844
 
                void*   data;
3845
 
                ibool   lru;
3846
 
 
3847
 
                /* Prevent race conditions during buf_buddy_alloc(),
3848
 
                which may release and reacquire buf_pool->mutex,
3849
 
                by IO-fixing and X-latching the block. */
3850
 
 
3851
 
                buf_page_set_io_fix(&block->page, BUF_IO_READ);
3852
 
                rw_lock_x_lock(&block->lock);
3853
 
 
3854
 
                page_zip_set_size(&block->page.zip, zip_size);
3855
 
                mutex_exit(&block->mutex);
3856
 
                /* buf_pool->mutex may be released and reacquired by
3857
 
                buf_buddy_alloc().  Thus, we must release block->mutex
3858
 
                in order not to break the latching order in
3859
 
                the reacquisition of buf_pool->mutex.  We also must
3860
 
                defer this operation until after the block descriptor
3861
 
                has been added to buf_pool->LRU and buf_pool->page_hash. */
3862
 
                data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3863
 
                mutex_enter(&block->mutex);
3864
 
                block->page.zip.data = static_cast<unsigned char *>(data);
3865
 
 
3866
 
                /* To maintain the invariant
3867
 
                block->in_unzip_LRU_list
3868
 
                == buf_page_belongs_to_unzip_LRU(&block->page)
3869
 
                we have to add this block to unzip_LRU after
3870
 
                block->page.zip.data is set. */
3871
 
                ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
3872
 
                buf_unzip_LRU_add_block(block, FALSE);
3873
 
 
3874
 
                buf_page_set_io_fix(&block->page, BUF_IO_NONE);
3875
 
                rw_lock_x_unlock(&block->lock);
3876
 
        }
3877
 
 
3878
 
        buf_page_set_accessed(&block->page, time_ms);
3879
 
 
3880
 
        buf_pool_mutex_exit(buf_pool);
3881
 
 
3882
 
        mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
3883
 
 
3884
 
        mutex_exit(&block->mutex);
3885
 
 
3886
 
        /* Delete possible entries for the page from the insert buffer:
3887
 
        such can exist if the page belonged to an index which was dropped */
3888
 
 
3889
 
        ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
3890
 
 
3891
 
        /* Flush pages from the end of the LRU list if necessary */
3892
 
        buf_flush_free_margin(buf_pool);
3893
 
 
3894
 
        frame = block->frame;
3895
 
 
3896
 
        memset(frame + FIL_PAGE_PREV, 0xff, 4);
3897
 
        memset(frame + FIL_PAGE_NEXT, 0xff, 4);
3898
 
        mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
3899
 
 
3900
 
        /* Reset to zero the file flush lsn field in the page; if the first
3901
 
        page of an ibdata file is 'created' in this function into the buffer
3902
 
        pool then we lose the original contents of the file flush lsn stamp.
3903
 
        Then InnoDB could in a crash recovery print a big, false, corruption
3904
 
        warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
3905
 
 
3906
 
        memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
3907
 
 
3908
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3909
 
        ut_a(++buf_dbg_counter % 357 || buf_validate());
3910
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3911
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3912
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
3913
 
                            buf_block_get_page_no(block)) == 0);
3914
 
#endif
3915
 
        return(block);
3916
 
}
3917
 
 
3918
 
/********************************************************************//**
3919
 
Completes an asynchronous read or write request of a file page to or from
3920
 
the buffer pool. */
3921
 
UNIV_INTERN
3922
 
void
3923
 
buf_page_io_complete(
3924
 
/*=================*/
3925
 
        buf_page_t*     bpage)  /*!< in: pointer to the block in question */
3926
 
{
3927
 
        enum buf_io_fix io_type;
3928
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3929
 
        const ibool     uncompressed = (buf_page_get_state(bpage)
3930
 
                                        == BUF_BLOCK_FILE_PAGE);
3931
 
 
3932
 
        ut_a(buf_page_in_file(bpage));
3933
 
 
3934
 
        /* We do not need protect io_fix here by mutex to read
3935
 
        it because this is the only function where we can change the value
3936
 
        from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
3937
 
        ensures that this is the only thread that handles the i/o for this
3938
 
        block. */
3939
 
 
3940
 
        io_type = buf_page_get_io_fix(bpage);
3941
 
        ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
3942
 
 
3943
 
        if (io_type == BUF_IO_READ) {
3944
 
                ulint   read_page_no;
3945
 
                ulint   read_space_id;
3946
 
                byte*   frame;
3947
 
 
3948
 
                if (buf_page_get_zip_size(bpage)) {
3949
 
                        frame = bpage->zip.data;
3950
 
                        buf_pool->n_pend_unzip++;
3951
 
                        if (uncompressed
3952
 
                            && !buf_zip_decompress((buf_block_t*) bpage,
3953
 
                                                   FALSE)) {
3954
 
 
3955
 
                                buf_pool->n_pend_unzip--;
3956
 
                                goto corrupt;
3957
 
                        }
3958
 
                        buf_pool->n_pend_unzip--;
3959
 
                } else {
3960
 
                        ut_a(uncompressed);
3961
 
                        frame = ((buf_block_t*) bpage)->frame;
3962
 
                }
3963
 
 
3964
 
                /* If this page is not uninitialized and not in the
3965
 
                doublewrite buffer, then the page number and space id
3966
 
                should be the same as in block. */
3967
 
                read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
3968
 
                read_space_id = mach_read_from_4(
3969
 
                        frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
3970
 
 
3971
 
                if (bpage->space == TRX_SYS_SPACE
3972
 
                    && trx_doublewrite_page_inside(bpage->offset)) {
3973
 
 
3974
 
                        ut_print_timestamp(stderr);
3975
 
                        fprintf(stderr,
3976
 
                                "  InnoDB: Error: reading page %lu\n"
3977
 
                                "InnoDB: which is in the"
3978
 
                                " doublewrite buffer!\n",
3979
 
                                (ulong) bpage->offset);
3980
 
                } else if (!read_space_id && !read_page_no) {
3981
 
                        /* This is likely an uninitialized page. */
3982
 
                } else if ((bpage->space
3983
 
                            && bpage->space != read_space_id)
3984
 
                           || bpage->offset != read_page_no) {
3985
 
                        /* We did not compare space_id to read_space_id
3986
 
                        if bpage->space == 0, because the field on the
3987
 
                        page may contain garbage in MySQL < 4.1.1,
3988
 
                        which only supported bpage->space == 0. */
3989
 
 
3990
 
                        ut_print_timestamp(stderr);
3991
 
                        fprintf(stderr,
3992
 
                                "  InnoDB: Error: space id and page n:o"
3993
 
                                " stored in the page\n"
3994
 
                                "InnoDB: read in are %lu:%lu,"
3995
 
                                " should be %lu:%lu!\n",
3996
 
                                (ulong) read_space_id, (ulong) read_page_no,
3997
 
                                (ulong) bpage->space,
3998
 
                                (ulong) bpage->offset);
3999
 
                }
4000
 
 
4001
 
                /* From version 3.23.38 up we store the page checksum
4002
 
                to the 4 first bytes of the page end lsn field */
4003
 
 
4004
 
                if (buf_page_is_corrupted(frame,
4005
 
                                          buf_page_get_zip_size(bpage))) {
4006
 
corrupt:
4007
 
                        fprintf(stderr,
4008
 
                                "InnoDB: Database page corruption on disk"
4009
 
                                " or a failed\n"
4010
 
                                "InnoDB: file read of page %lu.\n"
4011
 
                                "InnoDB: You may have to recover"
4012
 
                                " from a backup.\n",
4013
 
                                (ulong) bpage->offset);
4014
 
                        buf_page_print(frame, buf_page_get_zip_size(bpage));
4015
 
                        fprintf(stderr,
4016
 
                                "InnoDB: Database page corruption on disk"
4017
 
                                " or a failed\n"
4018
 
                                "InnoDB: file read of page %lu.\n"
4019
 
                                "InnoDB: You may have to recover"
4020
 
                                " from a backup.\n",
4021
 
                                (ulong) bpage->offset);
4022
 
                        fputs("InnoDB: It is also possible that"
4023
 
                              " your operating\n"
4024
 
                              "InnoDB: system has corrupted its"
4025
 
                              " own file cache\n"
4026
 
                              "InnoDB: and rebooting your computer"
4027
 
                              " removes the\n"
4028
 
                              "InnoDB: error.\n"
4029
 
                              "InnoDB: If the corrupt page is an index page\n"
4030
 
                              "InnoDB: you can also try to"
4031
 
                              " fix the corruption\n"
4032
 
                              "InnoDB: by dumping, dropping,"
4033
 
                              " and reimporting\n"
4034
 
                              "InnoDB: the corrupt table."
4035
 
                              " You can use CHECK\n"
4036
 
                              "InnoDB: TABLE to scan your"
4037
 
                              " table for corruption.\n"
4038
 
                              "InnoDB: See also "
4039
 
                              REFMAN "forcing-recovery.html\n"
4040
 
                              "InnoDB: about forcing recovery.\n", stderr);
4041
 
 
4042
 
                        if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
4043
 
                                fputs("InnoDB: Ending processing because of"
4044
 
                                      " a corrupt database page.\n",
4045
 
                                      stderr);
4046
 
                                exit(1);
4047
 
                        }
4048
 
                }
4049
 
 
4050
 
                if (recv_recovery_is_on()) {
4051
 
                        /* Pages must be uncompressed for crash recovery. */
4052
 
                        ut_a(uncompressed);
4053
 
                        recv_recover_page(TRUE, (buf_block_t*) bpage);
4054
 
                }
4055
 
 
4056
 
                if (uncompressed && !recv_no_ibuf_operations) {
4057
 
                        ibuf_merge_or_delete_for_page(
4058
 
                                (buf_block_t*) bpage, bpage->space,
4059
 
                                bpage->offset, buf_page_get_zip_size(bpage),
4060
 
                                TRUE);
4061
 
                }
4062
 
        }
4063
 
 
4064
 
        buf_pool_mutex_enter(buf_pool);
4065
 
        mutex_enter(buf_page_get_mutex(bpage));
4066
 
 
4067
 
#ifdef UNIV_IBUF_COUNT_DEBUG
4068
 
        if (io_type == BUF_IO_WRITE || uncompressed) {
4069
 
                /* For BUF_IO_READ of compressed-only blocks, the
4070
 
                buffered operations will be merged by buf_page_get_gen()
4071
 
                after the block has been uncompressed. */
4072
 
                ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
4073
 
        }
4074
 
#endif
4075
 
        /* Because this thread which does the unlocking is not the same that
4076
 
        did the locking, we use a pass value != 0 in unlock, which simply
4077
 
        removes the newest lock debug record, without checking the thread
4078
 
        id. */
4079
 
 
4080
 
        buf_page_set_io_fix(bpage, BUF_IO_NONE);
4081
 
 
4082
 
        switch (io_type) {
4083
 
        case BUF_IO_READ:
4084
 
                /* NOTE that the call to ibuf may have moved the ownership of
4085
 
                the x-latch to this OS thread: do not let this confuse you in
4086
 
                debugging! */
4087
 
 
4088
 
                ut_ad(buf_pool->n_pend_reads > 0);
4089
 
                buf_pool->n_pend_reads--;
4090
 
                buf_pool->stat.n_pages_read++;
4091
 
 
4092
 
                if (uncompressed) {
4093
 
                        rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
4094
 
                                             BUF_IO_READ);
4095
 
                }
4096
 
 
4097
 
                break;
4098
 
 
4099
 
        case BUF_IO_WRITE:
4100
 
                /* Write means a flush operation: call the completion
4101
 
                routine in the flush system */
4102
 
 
4103
 
                buf_flush_write_complete(bpage);
4104
 
 
4105
 
                if (uncompressed) {
4106
 
                        rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
4107
 
                                             BUF_IO_WRITE);
4108
 
                }
4109
 
 
4110
 
                buf_pool->stat.n_pages_written++;
4111
 
 
4112
 
                break;
4113
 
 
4114
 
        default:
4115
 
                ut_error;
4116
 
        }
4117
 
 
4118
 
#ifdef UNIV_DEBUG
4119
 
        if (buf_debug_prints) {
4120
 
                fprintf(stderr, "Has %s page space %lu page no %lu\n",
4121
 
                        io_type == BUF_IO_READ ? "read" : "written",
4122
 
                        (ulong) buf_page_get_space(bpage),
4123
 
                        (ulong) buf_page_get_page_no(bpage));
4124
 
        }
4125
 
#endif /* UNIV_DEBUG */
4126
 
 
4127
 
        mutex_exit(buf_page_get_mutex(bpage));
4128
 
        buf_pool_mutex_exit(buf_pool);
4129
 
}
4130
 
 
4131
 
/*********************************************************************//**
4132
 
Asserts that all file pages in the buffer are in a replaceable state.
4133
 
@return TRUE */
4134
 
static
4135
 
ibool
4136
 
buf_all_freed_instance(
4137
 
/*===================*/
4138
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instancce */
4139
 
{
4140
 
        ulint           i;
4141
 
        buf_chunk_t*    chunk;
4142
 
 
4143
 
        ut_ad(buf_pool);
4144
 
 
4145
 
        buf_pool_mutex_enter(buf_pool);
4146
 
 
4147
 
        chunk = buf_pool->chunks;
4148
 
 
4149
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
4150
 
 
4151
 
                const buf_block_t* block = buf_chunk_not_freed(chunk);
4152
 
 
4153
 
                if (UNIV_LIKELY_NULL(block)) {
4154
 
                        fprintf(stderr,
4155
 
                                "Page %lu %lu still fixed or dirty\n",
4156
 
                                (ulong) block->page.space,
4157
 
                                (ulong) block->page.offset);
4158
 
                        ut_error;
4159
 
                }
4160
 
        }
4161
 
 
4162
 
        buf_pool_mutex_exit(buf_pool);
4163
 
 
4164
 
        return(TRUE);
4165
 
}
4166
 
 
4167
 
/*********************************************************************//**
4168
 
Invalidates file pages in one buffer pool instance */
4169
 
static
4170
 
void
4171
 
buf_pool_invalidate_instance(
4172
 
/*=========================*/
4173
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instance */
4174
 
{
4175
 
        ibool           freed;
4176
 
        int     i;
4177
 
 
4178
 
        buf_pool_mutex_enter(buf_pool);
4179
 
 
4180
 
        for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
4181
 
 
4182
 
                /* As this function is called during startup and
4183
 
                during redo application phase during recovery, InnoDB
4184
 
                is single threaded (apart from IO helper threads) at
4185
 
                this stage. No new write batch can be in intialization
4186
 
                stage at this point. */
4187
 
                ut_ad(buf_pool->init_flush[i] == FALSE);
4188
 
 
4189
 
                /* However, it is possible that a write batch that has
4190
 
                been posted earlier is still not complete. For buffer
4191
 
                pool invalidation to proceed we must ensure there is NO
4192
 
                write activity happening. */
4193
 
                if (buf_pool->n_flush[i] > 0) {
4194
 
                        buf_pool_mutex_exit(buf_pool);
4195
 
                        buf_flush_wait_batch_end(buf_pool, static_cast<buf_flush>(i));
4196
 
                        buf_pool_mutex_enter(buf_pool);
4197
 
                }
4198
 
        }
4199
 
 
4200
 
        buf_pool_mutex_exit(buf_pool);
4201
 
 
4202
 
        ut_ad(buf_all_freed_instance(buf_pool));
4203
 
 
4204
 
        freed = TRUE;
4205
 
 
4206
 
        while (freed) {
4207
 
                freed = buf_LRU_search_and_free_block(buf_pool, 100);
4208
 
        }
4209
 
 
4210
 
        buf_pool_mutex_enter(buf_pool);
4211
 
 
4212
 
        ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
4213
 
        ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
4214
 
 
4215
 
        buf_pool->freed_page_clock = 0;
4216
 
        buf_pool->LRU_old = NULL;
4217
 
        buf_pool->LRU_old_len = 0;
4218
 
        buf_pool->LRU_flush_ended = 0;
4219
 
 
4220
 
        memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
4221
 
        buf_refresh_io_stats(buf_pool);
4222
 
 
4223
 
        buf_pool_mutex_exit(buf_pool);
4224
 
}
4225
 
 
4226
 
/*********************************************************************//**
4227
 
Invalidates the file pages in the buffer pool when an archive recovery is
4228
 
completed. All the file pages buffered must be in a replaceable state when
4229
 
this function is called: not latched and not modified. */
4230
 
UNIV_INTERN
4231
 
void
4232
 
buf_pool_invalidate(void)
4233
 
/*=====================*/
4234
 
{
4235
 
        ulint   i;
4236
 
 
4237
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4238
 
                buf_pool_invalidate_instance(buf_pool_from_array(i));
4239
 
        }
4240
 
}
4241
 
 
4242
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4243
 
/*********************************************************************//**
4244
 
Validates data in one buffer pool instance
4245
 
@return TRUE */
4246
 
static
4247
 
ibool
4248
 
buf_pool_validate_instance(
4249
 
/*=======================*/
4250
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instance */
4251
 
{
4252
 
        buf_page_t*     b;
4253
 
        buf_chunk_t*    chunk;
4254
 
        ulint           i;
4255
 
        ulint           n_single_flush  = 0;
4256
 
        ulint           n_lru_flush     = 0;
4257
 
        ulint           n_list_flush    = 0;
4258
 
        ulint           n_lru           = 0;
4259
 
        ulint           n_flush         = 0;
4260
 
        ulint           n_free          = 0;
4261
 
        ulint           n_zip           = 0;
4262
 
 
4263
 
        ut_ad(buf_pool);
4264
 
 
4265
 
        buf_pool_mutex_enter(buf_pool);
4266
 
 
4267
 
        chunk = buf_pool->chunks;
4268
 
 
4269
 
        /* Check the uncompressed blocks. */
4270
 
 
4271
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
4272
 
 
4273
 
                ulint           j;
4274
 
                buf_block_t*    block = chunk->blocks;
4275
 
 
4276
 
                for (j = chunk->size; j--; block++) {
4277
 
 
4278
 
                        mutex_enter(&block->mutex);
4279
 
 
4280
 
                        switch (buf_block_get_state(block)) {
4281
 
                        case BUF_BLOCK_ZIP_FREE:
4282
 
                        case BUF_BLOCK_ZIP_PAGE:
4283
 
                        case BUF_BLOCK_ZIP_DIRTY:
4284
 
                                /* These should only occur on
4285
 
                                zip_clean, zip_free[], or flush_list. */
4286
 
                                ut_error;
4287
 
                                break;
4288
 
 
4289
 
                        case BUF_BLOCK_FILE_PAGE:
4290
 
                                ut_a(buf_page_hash_get(buf_pool,
4291
 
                                                       buf_block_get_space(
4292
 
                                                               block),
4293
 
                                                       buf_block_get_page_no(
4294
 
                                                               block))
4295
 
                                     == &block->page);
4296
 
 
4297
 
#ifdef UNIV_IBUF_COUNT_DEBUG
4298
 
                                ut_a(buf_page_get_io_fix(&block->page)
4299
 
                                     == BUF_IO_READ
4300
 
                                     || !ibuf_count_get(buf_block_get_space(
4301
 
                                                                block),
4302
 
                                                        buf_block_get_page_no(
4303
 
                                                                block)));
4304
 
#endif
4305
 
                                switch (buf_page_get_io_fix(&block->page)) {
4306
 
                                case BUF_IO_NONE:
4307
 
                                        break;
4308
 
 
4309
 
                                case BUF_IO_WRITE:
4310
 
                                        switch (buf_page_get_flush_type(
4311
 
                                                        &block->page)) {
4312
 
                                        case BUF_FLUSH_LRU:
4313
 
                                                n_lru_flush++;
4314
 
                                                ut_a(rw_lock_is_locked(
4315
 
                                                             &block->lock,
4316
 
                                                             RW_LOCK_SHARED));
4317
 
                                                break;
4318
 
                                        case BUF_FLUSH_LIST:
4319
 
                                                n_list_flush++;
4320
 
                                                break;
4321
 
                                        case BUF_FLUSH_SINGLE_PAGE:
4322
 
                                                n_single_flush++;
4323
 
                                                break;
4324
 
                                        default:
4325
 
                                                ut_error;
4326
 
                                        }
4327
 
 
4328
 
                                        break;
4329
 
 
4330
 
                                case BUF_IO_READ:
4331
 
 
4332
 
                                        ut_a(rw_lock_is_locked(&block->lock,
4333
 
                                                               RW_LOCK_EX));
4334
 
                                        break;
4335
 
                                }
4336
 
 
4337
 
                                n_lru++;
4338
 
                                break;
4339
 
 
4340
 
                        case BUF_BLOCK_NOT_USED:
4341
 
                                n_free++;
4342
 
                                break;
4343
 
 
4344
 
                        case BUF_BLOCK_READY_FOR_USE:
4345
 
                        case BUF_BLOCK_MEMORY:
4346
 
                        case BUF_BLOCK_REMOVE_HASH:
4347
 
                                /* do nothing */
4348
 
                                break;
4349
 
                        }
4350
 
 
4351
 
                        mutex_exit(&block->mutex);
4352
 
                }
4353
 
        }
4354
 
 
4355
 
        mutex_enter(&buf_pool->zip_mutex);
4356
 
 
4357
 
        /* Check clean compressed-only blocks. */
4358
 
 
4359
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4360
 
             b = UT_LIST_GET_NEXT(list, b)) {
4361
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
4362
 
                switch (buf_page_get_io_fix(b)) {
4363
 
                case BUF_IO_NONE:
4364
 
                        /* All clean blocks should be I/O-unfixed. */
4365
 
                        break;
4366
 
                case BUF_IO_READ:
4367
 
                        /* In buf_LRU_free_block(), we temporarily set
4368
 
                        b->io_fix = BUF_IO_READ for a newly allocated
4369
 
                        control block in order to prevent
4370
 
                        buf_page_get_gen() from decompressing the block. */
4371
 
                        break;
4372
 
                default:
4373
 
                        ut_error;
4374
 
                        break;
4375
 
                }
4376
 
 
4377
 
                /* It is OK to read oldest_modification here because
4378
 
                we have acquired buf_pool->zip_mutex above which acts
4379
 
                as the 'block->mutex' for these bpages. */
4380
 
                ut_a(!b->oldest_modification);
4381
 
                ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4382
 
 
4383
 
                n_lru++;
4384
 
                n_zip++;
4385
 
        }
4386
 
 
4387
 
        /* Check dirty blocks. */
4388
 
 
4389
 
        buf_flush_list_mutex_enter(buf_pool);
4390
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4391
 
             b = UT_LIST_GET_NEXT(list, b)) {
4392
 
                ut_ad(b->in_flush_list);
4393
 
                ut_a(b->oldest_modification);
4394
 
                n_flush++;
4395
 
 
4396
 
                switch (buf_page_get_state(b)) {
4397
 
                case BUF_BLOCK_ZIP_DIRTY:
4398
 
                        n_lru++;
4399
 
                        n_zip++;
4400
 
                        switch (buf_page_get_io_fix(b)) {
4401
 
                        case BUF_IO_NONE:
4402
 
                        case BUF_IO_READ:
4403
 
                                break;
4404
 
                        case BUF_IO_WRITE:
4405
 
                                switch (buf_page_get_flush_type(b)) {
4406
 
                                case BUF_FLUSH_LRU:
4407
 
                                        n_lru_flush++;
4408
 
                                        break;
4409
 
                                case BUF_FLUSH_LIST:
4410
 
                                        n_list_flush++;
4411
 
                                        break;
4412
 
                                case BUF_FLUSH_SINGLE_PAGE:
4413
 
                                        n_single_flush++;
4414
 
                                        break;
4415
 
                                default:
4416
 
                                        ut_error;
4417
 
                                }
4418
 
                                break;
4419
 
                        }
4420
 
                        break;
4421
 
                case BUF_BLOCK_FILE_PAGE:
4422
 
                        /* uncompressed page */
4423
 
                        break;
4424
 
                case BUF_BLOCK_ZIP_FREE:
4425
 
                case BUF_BLOCK_ZIP_PAGE:
4426
 
                case BUF_BLOCK_NOT_USED:
4427
 
                case BUF_BLOCK_READY_FOR_USE:
4428
 
                case BUF_BLOCK_MEMORY:
4429
 
                case BUF_BLOCK_REMOVE_HASH:
4430
 
                        ut_error;
4431
 
                        break;
4432
 
                }
4433
 
                ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4434
 
        }
4435
 
 
4436
 
        ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
4437
 
 
4438
 
        buf_flush_list_mutex_exit(buf_pool);
4439
 
 
4440
 
        mutex_exit(&buf_pool->zip_mutex);
4441
 
 
4442
 
        if (n_lru + n_free > buf_pool->curr_size + n_zip) {
4443
 
                fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
4444
 
                        (ulong) n_lru, (ulong) n_free,
4445
 
                        (ulong) buf_pool->curr_size, (ulong) n_zip);
4446
 
                ut_error;
4447
 
        }
4448
 
 
4449
 
        ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
4450
 
        if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
4451
 
                fprintf(stderr, "Free list len %lu, free blocks %lu\n",
4452
 
                        (ulong) UT_LIST_GET_LEN(buf_pool->free),
4453
 
                        (ulong) n_free);
4454
 
                ut_error;
4455
 
        }
4456
 
 
4457
 
        ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
4458
 
        ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
4459
 
        ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
4460
 
 
4461
 
        buf_pool_mutex_exit(buf_pool);
4462
 
 
4463
 
        ut_a(buf_LRU_validate());
4464
 
        ut_a(buf_flush_validate(buf_pool));
4465
 
 
4466
 
        return(TRUE);
4467
 
}
4468
 
 
4469
 
/*********************************************************************//**
4470
 
Validates the buffer buf_pool data structure.
4471
 
@return TRUE */
4472
 
UNIV_INTERN
4473
 
ibool
4474
 
buf_validate(void)
4475
 
/*==============*/
4476
 
{
4477
 
        ulint   i;
4478
 
 
4479
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4480
 
                buf_pool_t*     buf_pool;
4481
 
 
4482
 
                buf_pool = buf_pool_from_array(i);
4483
 
 
4484
 
                buf_pool_validate_instance(buf_pool);
4485
 
        }
4486
 
        return(TRUE);
4487
 
}
4488
 
 
4489
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
4490
 
 
4491
 
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4492
 
/*********************************************************************//**
4493
 
Prints info of the buffer buf_pool data structure for one instance. */
4494
 
static
4495
 
void
4496
 
buf_print_instance(
4497
 
/*===============*/
4498
 
        buf_pool_t*     buf_pool)
4499
 
{
4500
 
        index_id_t*     index_ids;
4501
 
        ulint*          counts;
4502
 
        ulint           size;
4503
 
        ulint           i;
4504
 
        ulint           j;
4505
 
        index_id_t      id;
4506
 
        ulint           n_found;
4507
 
        buf_chunk_t*    chunk;
4508
 
        dict_index_t*   index;
4509
 
 
4510
 
        ut_ad(buf_pool);
4511
 
 
4512
 
        size = buf_pool->curr_size;
4513
 
 
4514
 
        index_ids = mem_alloc(size * sizeof *index_ids);
4515
 
        counts = mem_alloc(sizeof(ulint) * size);
4516
 
 
4517
 
        buf_pool_mutex_enter(buf_pool);
4518
 
        buf_flush_list_mutex_enter(buf_pool);
4519
 
 
4520
 
        fprintf(stderr,
4521
 
                "buf_pool size %lu\n"
4522
 
                "database pages %lu\n"
4523
 
                "free pages %lu\n"
4524
 
                "modified database pages %lu\n"
4525
 
                "n pending decompressions %lu\n"
4526
 
                "n pending reads %lu\n"
4527
 
                "n pending flush LRU %lu list %lu single page %lu\n"
4528
 
                "pages made young %lu, not young %lu\n"
4529
 
                "pages read %lu, created %lu, written %lu\n",
4530
 
                (ulong) size,
4531
 
                (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4532
 
                (ulong) UT_LIST_GET_LEN(buf_pool->free),
4533
 
                (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4534
 
                (ulong) buf_pool->n_pend_unzip,
4535
 
                (ulong) buf_pool->n_pend_reads,
4536
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
4537
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
4538
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
4539
 
                (ulong) buf_pool->stat.n_pages_made_young,
4540
 
                (ulong) buf_pool->stat.n_pages_not_made_young,
4541
 
                (ulong) buf_pool->stat.n_pages_read,
4542
 
                (ulong) buf_pool->stat.n_pages_created,
4543
 
                (ulong) buf_pool->stat.n_pages_written);
4544
 
 
4545
 
        buf_flush_list_mutex_exit(buf_pool);
4546
 
 
4547
 
        /* Count the number of blocks belonging to each index in the buffer */
4548
 
 
4549
 
        n_found = 0;
4550
 
 
4551
 
        chunk = buf_pool->chunks;
4552
 
 
4553
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
4554
 
                buf_block_t*    block           = chunk->blocks;
4555
 
                ulint           n_blocks        = chunk->size;
4556
 
 
4557
 
                for (; n_blocks--; block++) {
4558
 
                        const buf_frame_t* frame = block->frame;
4559
 
 
4560
 
                        if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
4561
 
 
4562
 
                                id = btr_page_get_index_id(frame);
4563
 
 
4564
 
                                /* Look for the id in the index_ids array */
4565
 
                                j = 0;
4566
 
 
4567
 
                                while (j < n_found) {
4568
 
 
4569
 
                                        if (index_ids[j] == id) {
4570
 
                                                counts[j]++;
4571
 
 
4572
 
                                                break;
4573
 
                                        }
4574
 
                                        j++;
4575
 
                                }
4576
 
 
4577
 
                                if (j == n_found) {
4578
 
                                        n_found++;
4579
 
                                        index_ids[j] = id;
4580
 
                                        counts[j] = 1;
4581
 
                                }
4582
 
                        }
4583
 
                }
4584
 
        }
4585
 
 
4586
 
        buf_pool_mutex_exit(buf_pool);
4587
 
 
4588
 
        for (i = 0; i < n_found; i++) {
4589
 
                index = dict_index_get_if_in_cache(index_ids[i]);
4590
 
 
4591
 
                fprintf(stderr,
4592
 
                        "Block count for index %llu in buffer is about %lu",
4593
 
                        (ullint) index_ids[i],
4594
 
                        (ulong) counts[i]);
4595
 
 
4596
 
                if (index) {
4597
 
                        putc(' ', stderr);
4598
 
                        dict_index_name_print(stderr, NULL, index);
4599
 
                }
4600
 
 
4601
 
                putc('\n', stderr);
4602
 
        }
4603
 
 
4604
 
        mem_free(index_ids);
4605
 
        mem_free(counts);
4606
 
 
4607
 
        ut_a(buf_pool_validate_instance(buf_pool));
4608
 
}
4609
 
 
4610
 
/*********************************************************************//**
4611
 
Prints info of the buffer buf_pool data structure. */
4612
 
UNIV_INTERN
4613
 
void
4614
 
buf_print(void)
4615
 
/*===========*/
4616
 
{
4617
 
        ulint   i;
4618
 
 
4619
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4620
 
                buf_pool_t*     buf_pool;
4621
 
 
4622
 
                buf_pool = buf_pool_from_array(i);
4623
 
                buf_print_instance(buf_pool);
4624
 
        }
4625
 
}
4626
 
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
4627
 
 
4628
 
#ifdef UNIV_DEBUG
4629
 
/*********************************************************************//**
4630
 
Returns the number of latched pages in the buffer pool.
4631
 
@return number of latched pages */
4632
 
UNIV_INTERN
4633
 
ulint
4634
 
buf_get_latched_pages_number_instance(
4635
 
/*==================================*/
4636
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instance */
4637
 
{
4638
 
        buf_page_t*     b;
4639
 
        ulint           i;
4640
 
        buf_chunk_t*    chunk;
4641
 
        ulint           fixed_pages_number = 0;
4642
 
 
4643
 
        buf_pool_mutex_enter(buf_pool);
4644
 
 
4645
 
        chunk = buf_pool->chunks;
4646
 
 
4647
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
4648
 
                buf_block_t*    block;
4649
 
                ulint           j;
4650
 
 
4651
 
                block = chunk->blocks;
4652
 
 
4653
 
                for (j = chunk->size; j--; block++) {
4654
 
                        if (buf_block_get_state(block)
4655
 
                            != BUF_BLOCK_FILE_PAGE) {
4656
 
 
4657
 
                                continue;
4658
 
                        }
4659
 
 
4660
 
                        mutex_enter(&block->mutex);
4661
 
 
4662
 
                        if (block->page.buf_fix_count != 0
4663
 
                            || buf_page_get_io_fix(&block->page)
4664
 
                            != BUF_IO_NONE) {
4665
 
                                fixed_pages_number++;
4666
 
                        }
4667
 
 
4668
 
                        mutex_exit(&block->mutex);
4669
 
                }
4670
 
        }
4671
 
 
4672
 
        mutex_enter(&buf_pool->zip_mutex);
4673
 
 
4674
 
        /* Traverse the lists of clean and dirty compressed-only blocks. */
4675
 
 
4676
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4677
 
             b = UT_LIST_GET_NEXT(list, b)) {
4678
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
4679
 
                ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
4680
 
 
4681
 
                if (b->buf_fix_count != 0
4682
 
                    || buf_page_get_io_fix(b) != BUF_IO_NONE) {
4683
 
                        fixed_pages_number++;
4684
 
                }
4685
 
        }
4686
 
 
4687
 
        buf_flush_list_mutex_enter(buf_pool);
4688
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4689
 
             b = UT_LIST_GET_NEXT(list, b)) {
4690
 
                ut_ad(b->in_flush_list);
4691
 
 
4692
 
                switch (buf_page_get_state(b)) {
4693
 
                case BUF_BLOCK_ZIP_DIRTY:
4694
 
                        if (b->buf_fix_count != 0
4695
 
                            || buf_page_get_io_fix(b) != BUF_IO_NONE) {
4696
 
                                fixed_pages_number++;
4697
 
                        }
4698
 
                        break;
4699
 
                case BUF_BLOCK_FILE_PAGE:
4700
 
                        /* uncompressed page */
4701
 
                        break;
4702
 
                case BUF_BLOCK_ZIP_FREE:
4703
 
                case BUF_BLOCK_ZIP_PAGE:
4704
 
                case BUF_BLOCK_NOT_USED:
4705
 
                case BUF_BLOCK_READY_FOR_USE:
4706
 
                case BUF_BLOCK_MEMORY:
4707
 
                case BUF_BLOCK_REMOVE_HASH:
4708
 
                        ut_error;
4709
 
                        break;
4710
 
                }
4711
 
        }
4712
 
 
4713
 
        buf_flush_list_mutex_exit(buf_pool);
4714
 
        mutex_exit(&buf_pool->zip_mutex);
4715
 
        buf_pool_mutex_exit(buf_pool);
4716
 
 
4717
 
        return(fixed_pages_number);
4718
 
}
4719
 
 
4720
 
/*********************************************************************//**
4721
 
Returns the number of latched pages in all the buffer pools.
4722
 
@return number of latched pages */
4723
 
UNIV_INTERN
4724
 
ulint
4725
 
buf_get_latched_pages_number(void)
4726
 
/*==============================*/
4727
 
{
4728
 
        ulint   i;
4729
 
        ulint   total_latched_pages = 0;
4730
 
 
4731
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4732
 
                buf_pool_t*     buf_pool;
4733
 
 
4734
 
                buf_pool = buf_pool_from_array(i);
4735
 
 
4736
 
                total_latched_pages += buf_get_latched_pages_number_instance(
4737
 
                        buf_pool);
4738
 
        }
4739
 
 
4740
 
        return(total_latched_pages);
4741
 
}
4742
 
 
4743
 
#endif /* UNIV_DEBUG */
4744
 
 
4745
 
/*********************************************************************//**
4746
 
Returns the number of pending buf pool ios.
4747
 
@return number of pending I/O operations */
4748
 
UNIV_INTERN
4749
 
ulint
4750
 
buf_get_n_pending_ios(void)
4751
 
/*=======================*/
4752
 
{
4753
 
        ulint   i;
4754
 
        ulint   pend_ios = 0;
4755
 
 
4756
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4757
 
                buf_pool_t*     buf_pool;
4758
 
 
4759
 
                buf_pool = buf_pool_from_array(i);
4760
 
 
4761
 
                pend_ios +=
4762
 
                        buf_pool->n_pend_reads
4763
 
                        + buf_pool->n_flush[BUF_FLUSH_LRU]
4764
 
                        + buf_pool->n_flush[BUF_FLUSH_LIST]
4765
 
                        + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
4766
 
        }
4767
 
 
4768
 
        return(pend_ios);
4769
 
}
4770
 
 
4771
 
/*********************************************************************//**
4772
 
Returns the ratio in percents of modified pages in the buffer pool /
4773
 
database pages in the buffer pool.
4774
 
@return modified page percentage ratio */
4775
 
UNIV_INTERN
4776
 
ulint
4777
 
buf_get_modified_ratio_pct(void)
4778
 
/*============================*/
4779
 
{
4780
 
        ulint           ratio;
4781
 
        ulint           lru_len = 0;
4782
 
        ulint           free_len = 0;
4783
 
        ulint           flush_list_len = 0;
4784
 
 
4785
 
        buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
4786
 
 
4787
 
        ratio = (100 * flush_list_len) / (1 + lru_len + free_len);
4788
 
  
4789
 
        /* 1 + is there to avoid division by zero */
4790
 
 
4791
 
        return(ratio);
4792
 
}
4793
 
 
4794
 
/*********************************************************************//**
4795
 
Prints info of the buffer i/o. */
4796
 
static
4797
 
void
4798
 
buf_print_io_instance(
4799
 
/*==================*/
4800
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
4801
 
        FILE*           file)           /*!< in/out: buffer where to print */
4802
 
{
4803
 
        time_t  current_time;
4804
 
        double  time_elapsed;
4805
 
        ulint   n_gets_diff;
4806
 
 
4807
 
        ut_ad(buf_pool);
4808
 
 
4809
 
        buf_pool_mutex_enter(buf_pool);
4810
 
        buf_flush_list_mutex_enter(buf_pool);
4811
 
 
4812
 
        fprintf(file,
4813
 
                "Buffer pool size   %lu\n"
4814
 
                "Free buffers       %lu\n"
4815
 
                "Database pages     %lu\n"
4816
 
                "Old database pages %lu\n"
4817
 
                "Modified db pages  %lu\n"
4818
 
                "Pending reads %lu\n"
4819
 
                "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
4820
 
                (ulong) buf_pool->curr_size,
4821
 
                (ulong) UT_LIST_GET_LEN(buf_pool->free),
4822
 
                (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4823
 
                (ulong) buf_pool->LRU_old_len,
4824
 
                (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4825
 
                (ulong) buf_pool->n_pend_reads,
4826
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
4827
 
                + buf_pool->init_flush[BUF_FLUSH_LRU],
4828
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
4829
 
                + buf_pool->init_flush[BUF_FLUSH_LIST],
4830
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
4831
 
 
4832
 
        buf_flush_list_mutex_exit(buf_pool);
4833
 
 
4834
 
        current_time = time(NULL);
4835
 
        time_elapsed = 0.001 + difftime(current_time,
4836
 
                                        buf_pool->last_printout_time);
4837
 
 
4838
 
        fprintf(file,
4839
 
                "Pages made young %lu, not young %lu\n"
4840
 
                "%.2f youngs/s, %.2f non-youngs/s\n"
4841
 
                "Pages read %lu, created %lu, written %lu\n"
4842
 
                "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
4843
 
                (ulong) buf_pool->stat.n_pages_made_young,
4844
 
                (ulong) buf_pool->stat.n_pages_not_made_young,
4845
 
                (buf_pool->stat.n_pages_made_young
4846
 
                 - buf_pool->old_stat.n_pages_made_young)
4847
 
                / time_elapsed,
4848
 
                (buf_pool->stat.n_pages_not_made_young
4849
 
                 - buf_pool->old_stat.n_pages_not_made_young)
4850
 
                / time_elapsed,
4851
 
                (ulong) buf_pool->stat.n_pages_read,
4852
 
                (ulong) buf_pool->stat.n_pages_created,
4853
 
                (ulong) buf_pool->stat.n_pages_written,
4854
 
                (buf_pool->stat.n_pages_read
4855
 
                 - buf_pool->old_stat.n_pages_read)
4856
 
                / time_elapsed,
4857
 
                (buf_pool->stat.n_pages_created
4858
 
                 - buf_pool->old_stat.n_pages_created)
4859
 
                / time_elapsed,
4860
 
                (buf_pool->stat.n_pages_written
4861
 
                 - buf_pool->old_stat.n_pages_written)
4862
 
                / time_elapsed);
4863
 
 
4864
 
        n_gets_diff = buf_pool->stat.n_page_gets
4865
 
                    - buf_pool->old_stat.n_page_gets;
4866
 
 
4867
 
        if (n_gets_diff) {
4868
 
                fprintf(file,
4869
 
                        "Buffer pool hit rate %lu / 1000,"
4870
 
                        " young-making rate %lu / 1000 not %lu / 1000\n",
4871
 
                        (ulong)
4872
 
                        (1000 - ((1000 * (buf_pool->stat.n_pages_read
4873
 
                                          - buf_pool->old_stat.n_pages_read))
4874
 
                                 / (buf_pool->stat.n_page_gets
4875
 
                                    - buf_pool->old_stat.n_page_gets))),
4876
 
                        (ulong)
4877
 
                        (1000 * (buf_pool->stat.n_pages_made_young
4878
 
                                 - buf_pool->old_stat.n_pages_made_young)
4879
 
                         / n_gets_diff),
4880
 
                        (ulong)
4881
 
                        (1000 * (buf_pool->stat.n_pages_not_made_young
4882
 
                                 - buf_pool->old_stat.n_pages_not_made_young)
4883
 
                         / n_gets_diff));
4884
 
        } else {
4885
 
                fputs("No buffer pool page gets since the last printout\n",
4886
 
                      file);
4887
 
        }
4888
 
 
4889
 
        /* Statistics about read ahead algorithm */
4890
 
        fprintf(file, "Pages read ahead %.2f/s,"
4891
 
                " evicted without access %.2f/s\n",
4892
 
                (buf_pool->stat.n_ra_pages_read
4893
 
                - buf_pool->old_stat.n_ra_pages_read)
4894
 
                / time_elapsed,
4895
 
                (buf_pool->stat.n_ra_pages_evicted
4896
 
                - buf_pool->old_stat.n_ra_pages_evicted)
4897
 
                / time_elapsed);
4898
 
 
4899
 
        /* Print some values to help us with visualizing what is
4900
 
        happening with LRU eviction. */
4901
 
        fprintf(file,
4902
 
                "LRU len: %lu, unzip_LRU len: %lu\n"
4903
 
                "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
4904
 
                static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->LRU)),
4905
 
                static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->unzip_LRU)),
4906
 
                buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
4907
 
                buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
4908
 
 
4909
 
        buf_refresh_io_stats(buf_pool);
4910
 
        buf_pool_mutex_exit(buf_pool);
4911
 
}
4912
 
 
4913
 
/*********************************************************************//**
4914
 
Prints info of the buffer i/o. */
4915
 
UNIV_INTERN
4916
 
void
4917
 
buf_print_io(
4918
 
/*=========*/
4919
 
        FILE*   file)   /*!< in/out: buffer where to print */
4920
 
{
4921
 
        ulint   i;
4922
 
 
4923
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4924
 
                buf_pool_t*     buf_pool;
4925
 
 
4926
 
                buf_pool = buf_pool_from_array(i);
4927
 
                buf_print_io_instance(buf_pool, file);
4928
 
        }
4929
 
}
4930
 
 
4931
 
/**********************************************************************//**
4932
 
Refreshes the statistics used to print per-second averages. */
4933
 
UNIV_INTERN
4934
 
void
4935
 
buf_refresh_io_stats(
4936
 
/*=================*/
4937
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instance */
4938
 
{
4939
 
        buf_pool->last_printout_time = ut_time();
4940
 
        buf_pool->old_stat = buf_pool->stat;
4941
 
}
4942
 
 
4943
 
/**********************************************************************//**
4944
 
Refreshes the statistics used to print per-second averages. */
4945
 
UNIV_INTERN
4946
 
void
4947
 
buf_refresh_io_stats_all(void)
4948
 
/*==========================*/
4949
 
{
4950
 
        ulint           i;
4951
 
 
4952
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4953
 
                buf_pool_t*     buf_pool;
4954
 
 
4955
 
                buf_pool = buf_pool_from_array(i);
4956
 
 
4957
 
                buf_refresh_io_stats(buf_pool);
4958
 
        }
4959
 
}
4960
 
 
4961
 
/**********************************************************************//**
4962
 
Check if all pages in all buffer pools are in a replacable state.
4963
 
@return FALSE if not */
4964
 
UNIV_INTERN
4965
 
ibool
4966
 
buf_all_freed(void)
4967
 
/*===============*/
4968
 
{
4969
 
        ulint   i;
4970
 
 
4971
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4972
 
                buf_pool_t*     buf_pool;
4973
 
 
4974
 
                buf_pool = buf_pool_from_array(i);
4975
 
 
4976
 
                if (!buf_all_freed_instance(buf_pool)) {
4977
 
                        return(FALSE);
4978
 
                }
4979
 
        }
4980
 
 
4981
 
        return(TRUE);
4982
 
}
4983
 
  
4984
 
/*********************************************************************//**
4985
 
Checks that there currently are no pending i/o-operations for the buffer
4986
 
pool.
4987
 
@return TRUE if there is no pending i/o */
4988
 
UNIV_INTERN
4989
 
ibool
4990
 
buf_pool_check_no_pending_io(void)
4991
 
/*==============================*/
4992
 
{
4993
 
        ulint           i;
4994
 
        ibool           ret = TRUE;
4995
 
 
4996
 
        buf_pool_mutex_enter_all();
4997
 
 
4998
 
        for (i = 0; i < srv_buf_pool_instances && ret; i++) {
4999
 
                const buf_pool_t*       buf_pool;
5000
 
 
5001
 
                buf_pool = buf_pool_from_array(i);
5002
 
 
5003
 
                if (buf_pool->n_pend_reads
5004
 
                    + buf_pool->n_flush[BUF_FLUSH_LRU]
5005
 
                    + buf_pool->n_flush[BUF_FLUSH_LIST]
5006
 
                    + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
5007
 
 
5008
 
                        ret = FALSE;
5009
 
                }
5010
 
        }
5011
 
 
5012
 
        buf_pool_mutex_exit_all();
5013
 
 
5014
 
        return(ret);
5015
 
}
5016
 
 
5017
 
#if 0
5018
 
Code currently not used
5019
 
/*********************************************************************//**
5020
 
Gets the current length of the free list of buffer blocks.
5021
 
@return length of the free list */
5022
 
UNIV_INTERN
5023
 
ulint
5024
 
buf_get_free_list_len(void)
5025
 
/*=======================*/
5026
 
{
5027
 
        ulint   len;
5028
 
 
5029
 
        buf_pool_mutex_enter(buf_pool);
5030
 
 
5031
 
        len = UT_LIST_GET_LEN(buf_pool->free);
5032
 
 
5033
 
        buf_pool_mutex_exit(buf_pool);
5034
 
 
5035
 
        return(len);
5036
 
}
5037
 
#endif
5038
 
 
5039
 
#else /* !UNIV_HOTBACKUP */
5040
 
/********************************************************************//**
5041
 
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
5042
 
UNIV_INTERN
5043
 
void
5044
 
buf_page_init_for_backup_restore(
5045
 
/*=============================*/
5046
 
        ulint           space,  /*!< in: space id */
5047
 
        ulint           offset, /*!< in: offset of the page within space
5048
 
                                in units of a page */
5049
 
        ulint           zip_size,/*!< in: compressed page size in bytes
5050
 
                                or 0 for uncompressed pages */
5051
 
        buf_block_t*    block)  /*!< in: block to init */
5052
 
{
5053
 
        block->page.state       = BUF_BLOCK_FILE_PAGE;
5054
 
        block->page.space       = space;
5055
 
        block->page.offset      = offset;
5056
 
 
5057
 
        page_zip_des_init(&block->page.zip);
5058
 
 
5059
 
        /* We assume that block->page.data has been allocated
5060
 
        with zip_size == UNIV_PAGE_SIZE. */
5061
 
        ut_ad(zip_size <= UNIV_PAGE_SIZE);
5062
 
        ut_ad(ut_is_2pow(zip_size));
5063
 
        page_zip_set_size(&block->page.zip, zip_size);
5064
 
        if (zip_size) {
5065
 
                block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
5066
 
        }
5067
 
}
5068
 
#endif /* !UNIV_HOTBACKUP */