~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/innobase/buf/buf0buf.c

  • Committer: Brian Aker
  • Date: 2009-06-03 19:30:45 UTC
  • mfrom: (1046.1.6 merge)
  • Revision ID: brian@gaz-20090603193045-4xgeczyfixh07beg
MergeĀ forĀ Brian

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*****************************************************************************
2
 
 
3
 
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
4
 
Copyright (c) 2008, Google Inc.
5
 
 
6
 
Portions of this file contain modifications contributed and copyrighted by
7
 
Google, Inc. Those modifications are gratefully acknowledged and are described
8
 
briefly in the InnoDB documentation. The contributions by Google are
9
 
incorporated with their permission, and subject to the conditions contained in
10
 
the file COPYING.Google.
11
 
 
12
 
This program is free software; you can redistribute it and/or modify it under
13
 
the terms of the GNU General Public License as published by the Free Software
14
 
Foundation; version 2 of the License.
15
 
 
16
 
This program is distributed in the hope that it will be useful, but WITHOUT
17
 
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
 
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19
 
 
20
 
You should have received a copy of the GNU General Public License along with
21
 
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22
 
St, Fifth Floor, Boston, MA 02110-1301 USA
23
 
 
24
 
*****************************************************************************/
25
 
 
26
 
/**************************************************//**
27
 
@file buf/buf0buf.c
28
 
The database buffer buf_pool
29
 
 
30
 
Created 11/5/1995 Heikki Tuuri
31
 
*******************************************************/
32
 
 
33
 
#include "buf0buf.h"
34
 
 
35
 
#ifdef UNIV_NONINL
36
 
#include "buf0buf.ic"
37
 
#endif
38
 
 
39
 
#include "mem0mem.h"
40
 
#include "btr0btr.h"
41
 
#include "fil0fil.h"
42
 
#ifndef UNIV_HOTBACKUP
43
 
#include "buf0buddy.h"
44
 
#include "lock0lock.h"
45
 
#include "btr0sea.h"
46
 
#include "ibuf0ibuf.h"
47
 
#include "trx0undo.h"
48
 
#include "log0log.h"
49
 
#endif /* !UNIV_HOTBACKUP */
50
 
#include "srv0srv.h"
51
 
#include "dict0dict.h"
52
 
#include "log0recv.h"
53
 
#include "page0zip.h"
54
 
 
55
 
/*
56
 
                IMPLEMENTATION OF THE BUFFER POOL
57
 
                =================================
58
 
 
59
 
Performance improvement:
60
 
------------------------
61
 
Thread scheduling in NT may be so slow that the OS wait mechanism should
62
 
not be used even in waiting for disk reads to complete.
63
 
Rather, we should put waiting query threads to the queue of
64
 
waiting jobs, and let the OS thread do something useful while the i/o
65
 
is processed. In this way we could remove most OS thread switches in
66
 
an i/o-intensive benchmark like TPC-C.
67
 
 
68
 
A possibility is to put a user space thread library between the database
69
 
and NT. User space thread libraries might be very fast.
70
 
 
71
 
SQL Server 7.0 can be configured to use 'fibers' which are lightweight
72
 
threads in NT. These should be studied.
73
 
 
74
 
                Buffer frames and blocks
75
 
                ------------------------
76
 
Following the terminology of Gray and Reuter, we call the memory
77
 
blocks where file pages are loaded buffer frames. For each buffer
78
 
frame there is a control block, or shortly, a block, in the buffer
79
 
control array. The control info which does not need to be stored
80
 
in the file along with the file page, resides in the control block.
81
 
 
82
 
                Buffer pool struct
83
 
                ------------------
84
 
The buffer buf_pool contains a single mutex which protects all the
85
 
control data structures of the buf_pool. The content of a buffer frame is
86
 
protected by a separate read-write lock in its control block, though.
87
 
These locks can be locked and unlocked without owning the buf_pool->mutex.
88
 
The OS events in the buf_pool struct can be waited for without owning the
89
 
buf_pool->mutex.
90
 
 
91
 
The buf_pool->mutex is a hot-spot in main memory, causing a lot of
92
 
memory bus traffic on multiprocessor systems when processors
93
 
alternately access the mutex. On our Pentium, the mutex is accessed
94
 
maybe every 10 microseconds. We gave up the solution to have mutexes
95
 
for each control block, for instance, because it seemed to be
96
 
complicated.
97
 
 
98
 
A solution to reduce mutex contention of the buf_pool->mutex is to
99
 
create a separate mutex for the page hash table. On Pentium,
100
 
accessing the hash table takes 2 microseconds, about half
101
 
of the total buf_pool->mutex hold time.
102
 
 
103
 
                Control blocks
104
 
                --------------
105
 
 
106
 
The control block contains, for instance, the bufferfix count
107
 
which is incremented when a thread wants a file page to be fixed
108
 
in a buffer frame. The bufferfix operation does not lock the
109
 
contents of the frame, however. For this purpose, the control
110
 
block contains a read-write lock.
111
 
 
112
 
The buffer frames have to be aligned so that the start memory
113
 
address of a frame is divisible by the universal page size, which
114
 
is a power of two.
115
 
 
116
 
We intend to make the buffer buf_pool size on-line reconfigurable,
117
 
that is, the buf_pool size can be changed without closing the database.
118
 
Then the database administarator may adjust it to be bigger
119
 
at night, for example. The control block array must
120
 
contain enough control blocks for the maximum buffer buf_pool size
121
 
which is used in the particular database.
122
 
If the buf_pool size is cut, we exploit the virtual memory mechanism of
123
 
the OS, and just refrain from using frames at high addresses. Then the OS
124
 
can swap them to disk.
125
 
 
126
 
The control blocks containing file pages are put to a hash table
127
 
according to the file address of the page.
128
 
We could speed up the access to an individual page by using
129
 
"pointer swizzling": we could replace the page references on
130
 
non-leaf index pages by direct pointers to the page, if it exists
131
 
in the buf_pool. We could make a separate hash table where we could
132
 
chain all the page references in non-leaf pages residing in the buf_pool,
133
 
using the page reference as the hash key,
134
 
and at the time of reading of a page update the pointers accordingly.
135
 
Drawbacks of this solution are added complexity and,
136
 
possibly, extra space required on non-leaf pages for memory pointers.
137
 
A simpler solution is just to speed up the hash table mechanism
138
 
in the database, using tables whose size is a power of 2.
139
 
 
140
 
                Lists of blocks
141
 
                ---------------
142
 
 
143
 
There are several lists of control blocks.
144
 
 
145
 
The free list (buf_pool->free) contains blocks which are currently not
146
 
used.
147
 
 
148
 
The common LRU list contains all the blocks holding a file page
149
 
except those for which the bufferfix count is non-zero.
150
 
The pages are in the LRU list roughly in the order of the last
151
 
access to the page, so that the oldest pages are at the end of the
152
 
list. We also keep a pointer to near the end of the LRU list,
153
 
which we can use when we want to artificially age a page in the
154
 
buf_pool. This is used if we know that some page is not needed
155
 
again for some time: we insert the block right after the pointer,
156
 
causing it to be replaced sooner than would normally be the case.
157
 
Currently this aging mechanism is used for read-ahead mechanism
158
 
of pages, and it can also be used when there is a scan of a full
159
 
table which cannot fit in the memory. Putting the pages near the
160
 
end of the LRU list, we make sure that most of the buf_pool stays
161
 
in the main memory, undisturbed.
162
 
 
163
 
The unzip_LRU list contains a subset of the common LRU list.  The
164
 
blocks on the unzip_LRU list hold a compressed file page and the
165
 
corresponding uncompressed page frame.  A block is in unzip_LRU if and
166
 
only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
167
 
holds.  The blocks in unzip_LRU will be in same order as they are in
168
 
the common LRU list.  That is, each manipulation of the common LRU
169
 
list will result in the same manipulation of the unzip_LRU list.
170
 
 
171
 
The chain of modified blocks (buf_pool->flush_list) contains the blocks
172
 
holding file pages that have been modified in the memory
173
 
but not written to disk yet. The block with the oldest modification
174
 
which has not yet been written to disk is at the end of the chain.
175
 
The access to this list is protected by flush_list_mutex.
176
 
 
177
 
The chain of unmodified compressed blocks (buf_pool->zip_clean)
178
 
contains the control blocks (buf_page_t) of those compressed pages
179
 
that are not in buf_pool->flush_list and for which no uncompressed
180
 
page has been allocated in the buffer pool.  The control blocks for
181
 
uncompressed pages are accessible via buf_block_t objects that are
182
 
reachable via buf_pool->chunks[].
183
 
 
184
 
The chains of free memory blocks (buf_pool->zip_free[]) are used by
185
 
the buddy allocator (buf0buddy.c) to keep track of currently unused
186
 
memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2.  These
187
 
blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
188
 
BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
189
 
pool.  The buddy allocator is solely used for allocating control
190
 
blocks for compressed pages (buf_page_t) and compressed page frames.
191
 
 
192
 
                Loading a file page
193
 
                -------------------
194
 
 
195
 
First, a victim block for replacement has to be found in the
196
 
buf_pool. It is taken from the free list or searched for from the
197
 
end of the LRU-list. An exclusive lock is reserved for the frame,
198
 
the io_fix field is set in the block fixing the block in buf_pool,
199
 
and the io-operation for loading the page is queued. The io-handler thread
200
 
releases the X-lock on the frame and resets the io_fix field
201
 
when the io operation completes.
202
 
 
203
 
A thread may request the above operation using the function
204
 
buf_page_get(). It may then continue to request a lock on the frame.
205
 
The lock is granted when the io-handler releases the x-lock.
206
 
 
207
 
                Read-ahead
208
 
                ----------
209
 
 
210
 
The read-ahead mechanism is intended to be intelligent and
211
 
isolated from the semantically higher levels of the database
212
 
index management. From the higher level we only need the
213
 
information if a file page has a natural successor or
214
 
predecessor page. On the leaf level of a B-tree index,
215
 
these are the next and previous pages in the natural
216
 
order of the pages.
217
 
 
218
 
Let us first explain the read-ahead mechanism when the leafs
219
 
of a B-tree are scanned in an ascending or descending order.
220
 
When a read page is the first time referenced in the buf_pool,
221
 
the buffer manager checks if it is at the border of a so-called
222
 
linear read-ahead area. The tablespace is divided into these
223
 
areas of size 64 blocks, for example. So if the page is at the
224
 
border of such an area, the read-ahead mechanism checks if
225
 
all the other blocks in the area have been accessed in an
226
 
ascending or descending order. If this is the case, the system
227
 
looks at the natural successor or predecessor of the page,
228
 
checks if that is at the border of another area, and in this case
229
 
issues read-requests for all the pages in that area. Maybe
230
 
we could relax the condition that all the pages in the area
231
 
have to be accessed: if data is deleted from a table, there may
232
 
appear holes of unused pages in the area.
233
 
 
234
 
A different read-ahead mechanism is used when there appears
235
 
to be a random access pattern to a file.
236
 
If a new page is referenced in the buf_pool, and several pages
237
 
of its random access area (for instance, 32 consecutive pages
238
 
in a tablespace) have recently been referenced, we may predict
239
 
that the whole area may be needed in the near future, and issue
240
 
the read requests for the whole area.
241
 
*/
242
 
 
243
 
#ifndef UNIV_HOTBACKUP
244
 
/** Value in microseconds */
245
 
static const int WAIT_FOR_READ  = 5000;
246
 
/** Number of attemtps made to read in a page in the buffer pool */
247
 
static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
248
 
 
249
 
/** The buffer buf_pool of the database */
250
 
UNIV_INTERN buf_pool_t* buf_pool_ptr[MAX_BUFFER_POOLS];
251
 
 
252
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
253
 
static ulint    buf_dbg_counter = 0; /*!< This is used to insert validation
254
 
                                        operations in execution in the
255
 
                                        debug version */
256
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
257
 
#ifdef UNIV_DEBUG
258
 
/** If this is set TRUE, the program prints info whenever
259
 
read-ahead or flush occurs */
260
 
UNIV_INTERN ibool               buf_debug_prints = FALSE;
261
 
#endif /* UNIV_DEBUG */
262
 
 
263
 
#ifdef UNIV_PFS_RWLOCK
264
 
/* Keys to register buffer block related rwlocks and mutexes with
265
 
performance schema */
266
 
UNIV_INTERN mysql_pfs_key_t     buf_block_lock_key;
267
 
# ifdef UNIV_SYNC_DEBUG
268
 
UNIV_INTERN mysql_pfs_key_t     buf_block_debug_latch_key;
269
 
# endif /* UNIV_SYNC_DEBUG */
270
 
#endif /* UNIV_PFS_RWLOCK */
271
 
 
272
 
#ifdef UNIV_PFS_MUTEX
273
 
UNIV_INTERN mysql_pfs_key_t     buffer_block_mutex_key;
274
 
UNIV_INTERN mysql_pfs_key_t     buf_pool_mutex_key;
275
 
UNIV_INTERN mysql_pfs_key_t     buf_pool_zip_mutex_key;
276
 
UNIV_INTERN mysql_pfs_key_t     flush_list_mutex_key;
277
 
#endif /* UNIV_PFS_MUTEX */
278
 
 
279
 
#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
280
 
# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
281
 
 
282
 
/* Buffer block mutexes and rwlocks can be registered
283
 
in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
284
 
is defined, register buffer block mutex and rwlock
285
 
in one group after their initialization. */
286
 
#  define PFS_GROUP_BUFFER_SYNC
287
 
 
288
 
/* This define caps the number of mutexes/rwlocks can
289
 
be registered with performance schema. Developers can
290
 
modify this define if necessary. Please note, this would
291
 
be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
292
 
#  define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER    ULINT_MAX
293
 
 
294
 
# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
295
 
#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
296
 
 
297
 
/** A chunk of buffers.  The buffer pool is allocated in chunks. */
298
 
struct buf_chunk_struct{
299
 
        ulint           mem_size;       /*!< allocated size of the chunk */
300
 
        ulint           size;           /*!< size of frames[] and blocks[] */
301
 
        void*           mem;            /*!< pointer to the memory area which
302
 
                                        was allocated for the frames */
303
 
        buf_block_t*    blocks;         /*!< array of buffer control blocks */
304
 
};
305
 
#endif /* !UNIV_HOTBACKUP */
306
 
 
307
 
/********************************************************************//**
308
 
Gets the smallest oldest_modification lsn for any page in the pool. Returns
309
 
zero if all modified pages have been flushed to disk.
310
 
@return oldest modification in pool, zero if none */
311
 
UNIV_INTERN
312
 
ib_uint64_t
313
 
buf_pool_get_oldest_modification(void)
314
 
/*==================================*/
315
 
{
316
 
        ulint           i;
317
 
        buf_page_t*     bpage;
318
 
        ib_uint64_t     lsn = 0;
319
 
        ib_uint64_t     oldest_lsn = 0;
320
 
 
321
 
        /* When we traverse all the flush lists we don't want another
322
 
        thread to add a dirty page to any flush list. */
323
 
        log_flush_order_mutex_enter();
324
 
 
325
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
326
 
                buf_pool_t*     buf_pool;
327
 
 
328
 
                buf_pool = buf_pool_from_array(i);
329
 
 
330
 
                buf_flush_list_mutex_enter(buf_pool);
331
 
 
332
 
                bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
333
 
 
334
 
                if (bpage != NULL) {
335
 
                        ut_ad(bpage->in_flush_list);
336
 
                        lsn = bpage->oldest_modification;
337
 
                }
338
 
 
339
 
                buf_flush_list_mutex_exit(buf_pool);
340
 
 
341
 
                if (!oldest_lsn || oldest_lsn > lsn) {
342
 
                        oldest_lsn = lsn;
343
 
                }
344
 
        }
345
 
 
346
 
        log_flush_order_mutex_exit();
347
 
 
348
 
        /* The returned answer may be out of date: the flush_list can
349
 
        change after the mutex has been released. */
350
 
 
351
 
        return(oldest_lsn);
352
 
}
353
 
 
354
 
/********************************************************************//**
355
 
Get total buffer pool statistics. */
356
 
UNIV_INTERN
357
 
void
358
 
buf_get_total_list_len(
359
 
/*===================*/
360
 
        ulint*          LRU_len,        /*!< out: length of all LRU lists */
361
 
        ulint*          free_len,       /*!< out: length of all free lists */
362
 
        ulint*          flush_list_len) /*!< out: length of all flush lists */
363
 
{
364
 
        ulint           i;
365
 
 
366
 
        *LRU_len = 0;
367
 
        *free_len = 0;
368
 
        *flush_list_len = 0;
369
 
 
370
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
371
 
                buf_pool_t*     buf_pool;
372
 
 
373
 
                buf_pool = buf_pool_from_array(i);
374
 
                *LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
375
 
                *free_len += UT_LIST_GET_LEN(buf_pool->free);
376
 
                *flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
377
 
        }
378
 
}
379
 
 
380
 
/********************************************************************//**
381
 
Get total buffer pool statistics. */
382
 
UNIV_INTERN
383
 
void
384
 
buf_get_total_stat(
385
 
/*===============*/
386
 
        buf_pool_stat_t*        tot_stat)       /*!< out: buffer pool stats */
387
 
{
388
 
        ulint                   i;
389
 
 
390
 
        memset(tot_stat, 0, sizeof(*tot_stat));
391
 
 
392
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
393
 
                buf_pool_stat_t*buf_stat;
394
 
                buf_pool_t*     buf_pool;
395
 
 
396
 
                buf_pool = buf_pool_from_array(i);
397
 
 
398
 
                buf_stat = &buf_pool->stat;
399
 
                tot_stat->n_page_gets += buf_stat->n_page_gets;
400
 
                tot_stat->n_pages_read += buf_stat->n_pages_read;
401
 
                tot_stat->n_pages_written += buf_stat->n_pages_written;
402
 
                tot_stat->n_pages_created += buf_stat->n_pages_created;
403
 
                tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
404
 
                tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
405
 
                tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
406
 
 
407
 
                tot_stat->n_pages_not_made_young +=
408
 
                        buf_stat->n_pages_not_made_young;
409
 
        }
410
 
}
411
 
 
412
 
/********************************************************************//**
413
 
Allocates a buffer block.
414
 
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
415
 
UNIV_INTERN
416
 
buf_block_t*
417
 
buf_block_alloc(
418
 
/*============*/
419
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
420
 
        ulint           zip_size)       /*!< in: compressed page size in bytes,
421
 
                                        or 0 if uncompressed tablespace */
422
 
{
423
 
        buf_block_t*    block;
424
 
        ulint           index;
425
 
        static ulint    buf_pool_index;
426
 
 
427
 
        if (buf_pool == NULL) {
428
 
                /* We are allocating memory from any buffer pool, ensure
429
 
                we spread the grace on all buffer pool instances. */
430
 
                index = buf_pool_index++ % srv_buf_pool_instances;
431
 
                buf_pool = buf_pool_from_array(index);
432
 
        }
433
 
 
434
 
        block = buf_LRU_get_free_block(buf_pool, zip_size);
435
 
 
436
 
        buf_block_set_state(block, BUF_BLOCK_MEMORY);
437
 
 
438
 
        return(block);
439
 
}
440
 
 
441
 
/********************************************************************//**
442
 
Calculates a page checksum which is stored to the page when it is written
443
 
to a file. Note that we must be careful to calculate the same value on
444
 
32-bit and 64-bit architectures.
445
 
@return checksum */
446
 
UNIV_INTERN
447
 
ulint
448
 
buf_calc_page_new_checksum(
449
 
/*=======================*/
450
 
        const byte*     page)   /*!< in: buffer page */
451
 
{
452
 
        ulint checksum;
453
 
 
454
 
        /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
455
 
        ..._ARCH_LOG_NO, are written outside the buffer pool to the first
456
 
        pages of data files, we have to skip them in the page checksum
457
 
        calculation.
458
 
        We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
459
 
        checksum is stored, and also the last 8 bytes of page because
460
 
        there we store the old formula checksum. */
461
 
 
462
 
        checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
463
 
                                  FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
464
 
                + ut_fold_binary(page + FIL_PAGE_DATA,
465
 
                                 UNIV_PAGE_SIZE - FIL_PAGE_DATA
466
 
                                 - FIL_PAGE_END_LSN_OLD_CHKSUM);
467
 
        checksum = checksum & 0xFFFFFFFFUL;
468
 
 
469
 
        return(checksum);
470
 
}
471
 
 
472
 
/********************************************************************//**
473
 
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
474
 
looked at the first few bytes of the page. This calculates that old
475
 
checksum.
476
 
NOTE: we must first store the new formula checksum to
477
 
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
478
 
because this takes that field as an input!
479
 
@return checksum */
480
 
UNIV_INTERN
481
 
ulint
482
 
buf_calc_page_old_checksum(
483
 
/*=======================*/
484
 
        const byte*     page)   /*!< in: buffer page */
485
 
{
486
 
        ulint checksum;
487
 
 
488
 
        checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
489
 
 
490
 
        checksum = checksum & 0xFFFFFFFFUL;
491
 
 
492
 
        return(checksum);
493
 
}
494
 
 
495
 
/********************************************************************//**
496
 
Checks if a page is corrupt.
497
 
@return TRUE if corrupted */
498
 
UNIV_INTERN
499
 
ibool
500
 
buf_page_is_corrupted(
501
 
/*==================*/
502
 
        const byte*     read_buf,       /*!< in: a database page */
503
 
        ulint           zip_size)       /*!< in: size of compressed page;
504
 
                                        0 for uncompressed pages */
505
 
{
506
 
        ulint           checksum_field;
507
 
        ulint           old_checksum_field;
508
 
 
509
 
        if (UNIV_LIKELY(!zip_size)
510
 
            && memcmp(read_buf + FIL_PAGE_LSN + 4,
511
 
                      read_buf + UNIV_PAGE_SIZE
512
 
                      - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
513
 
 
514
 
                /* Stored log sequence numbers at the start and the end
515
 
                of page do not match */
516
 
 
517
 
                return(TRUE);
518
 
        }
519
 
 
520
 
#ifndef UNIV_HOTBACKUP
521
 
        if (recv_lsn_checks_on) {
522
 
                ib_uint64_t     current_lsn;
523
 
 
524
 
                if (log_peek_lsn(&current_lsn)
525
 
                    && UNIV_UNLIKELY
526
 
                    (current_lsn
527
 
                     < mach_read_from_8(read_buf + FIL_PAGE_LSN))) {
528
 
                        ut_print_timestamp(stderr);
529
 
 
530
 
                        fprintf(stderr,
531
 
                                "  InnoDB: Error: page %lu log sequence number"
532
 
                                " %"PRIu64"\n"
533
 
                                "InnoDB: is in the future! Current system "
534
 
                                "log sequence number %"PRIu64".\n"
535
 
                                "InnoDB: Your database may be corrupt or "
536
 
                                "you may have copied the InnoDB\n"
537
 
                                "InnoDB: tablespace but not the InnoDB "
538
 
                                "log files. See\n"
539
 
                                "InnoDB: " REFMAN "forcing-recovery.html\n"
540
 
                                "InnoDB: for more information.\n",
541
 
                                (ulong) mach_read_from_4(read_buf
542
 
                                                         + FIL_PAGE_OFFSET),
543
 
                                mach_read_from_8(read_buf + FIL_PAGE_LSN),
544
 
                                current_lsn);
545
 
                }
546
 
        }
547
 
#endif
548
 
 
549
 
        /* If we use checksums validation, make additional check before
550
 
        returning TRUE to ensure that the checksum is not equal to
551
 
        BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
552
 
        disabled. Otherwise, skip checksum calculation and return FALSE */
553
 
 
554
 
        if (UNIV_LIKELY(srv_use_checksums)) {
555
 
                checksum_field = mach_read_from_4(read_buf
556
 
                                                  + FIL_PAGE_SPACE_OR_CHKSUM);
557
 
 
558
 
                if (UNIV_UNLIKELY(zip_size)) {
559
 
                        return(checksum_field != BUF_NO_CHECKSUM_MAGIC
560
 
                               && checksum_field
561
 
                               != page_zip_calc_checksum(read_buf, zip_size));
562
 
                }
563
 
 
564
 
                old_checksum_field = mach_read_from_4(
565
 
                        read_buf + UNIV_PAGE_SIZE
566
 
                        - FIL_PAGE_END_LSN_OLD_CHKSUM);
567
 
 
568
 
                /* There are 2 valid formulas for old_checksum_field:
569
 
 
570
 
                1. Very old versions of InnoDB only stored 8 byte lsn to the
571
 
                start and the end of the page.
572
 
 
573
 
                2. Newer InnoDB versions store the old formula checksum
574
 
                there. */
575
 
 
576
 
                if (old_checksum_field != mach_read_from_4(read_buf
577
 
                                                           + FIL_PAGE_LSN)
578
 
                    && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
579
 
                    && old_checksum_field
580
 
                    != buf_calc_page_old_checksum(read_buf)) {
581
 
 
582
 
                        return(TRUE);
583
 
                }
584
 
 
585
 
                /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
586
 
                (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
587
 
 
588
 
                if (checksum_field != 0
589
 
                    && checksum_field != BUF_NO_CHECKSUM_MAGIC
590
 
                    && checksum_field
591
 
                    != buf_calc_page_new_checksum(read_buf)) {
592
 
 
593
 
                        return(TRUE);
594
 
                }
595
 
        }
596
 
 
597
 
        return(FALSE);
598
 
}
599
 
 
600
 
/********************************************************************//**
601
 
Prints a page to stderr. */
602
 
UNIV_INTERN
603
 
void
604
 
buf_page_print(
605
 
/*===========*/
606
 
        const byte*     read_buf,       /*!< in: a database page */
607
 
        ulint           zip_size)       /*!< in: compressed page size, or
608
 
                                0 for uncompressed pages */
609
 
{
610
 
#ifndef UNIV_HOTBACKUP
611
 
        dict_index_t*   index;
612
 
#endif /* !UNIV_HOTBACKUP */
613
 
        ulint           checksum;
614
 
        ulint           old_checksum;
615
 
        ulint           size    = zip_size;
616
 
 
617
 
        if (!size) {
618
 
                size = UNIV_PAGE_SIZE;
619
 
        }
620
 
 
621
 
        ut_print_timestamp(stderr);
622
 
        fprintf(stderr, "  InnoDB: Page dump in ascii and hex (%lu bytes):\n",
623
 
                (ulong) size);
624
 
        ut_print_buf(stderr, read_buf, size);
625
 
        fputs("\nInnoDB: End of page dump\n", stderr);
626
 
 
627
 
        if (zip_size) {
628
 
                /* Print compressed page. */
629
 
 
630
 
                switch (fil_page_get_type(read_buf)) {
631
 
                case FIL_PAGE_TYPE_ZBLOB:
632
 
                case FIL_PAGE_TYPE_ZBLOB2:
633
 
                        checksum = srv_use_checksums
634
 
                                ? page_zip_calc_checksum(read_buf, zip_size)
635
 
                                : BUF_NO_CHECKSUM_MAGIC;
636
 
                        ut_print_timestamp(stderr);
637
 
                        fprintf(stderr,
638
 
                                "  InnoDB: Compressed BLOB page"
639
 
                                " checksum %lu, stored %lu\n"
640
 
                                "InnoDB: Page lsn %lu %lu\n"
641
 
                                "InnoDB: Page number (if stored"
642
 
                                " to page already) %lu,\n"
643
 
                                "InnoDB: space id (if stored"
644
 
                                " to page already) %lu\n",
645
 
                                (ulong) checksum,
646
 
                                (ulong) mach_read_from_4(
647
 
                                        read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
648
 
                                (ulong) mach_read_from_4(
649
 
                                        read_buf + FIL_PAGE_LSN),
650
 
                                (ulong) mach_read_from_4(
651
 
                                        read_buf + (FIL_PAGE_LSN + 4)),
652
 
                                (ulong) mach_read_from_4(
653
 
                                        read_buf + FIL_PAGE_OFFSET),
654
 
                                (ulong) mach_read_from_4(
655
 
                                        read_buf
656
 
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
657
 
                        return;
658
 
                default:
659
 
                        ut_print_timestamp(stderr);
660
 
                        fprintf(stderr,
661
 
                                "  InnoDB: unknown page type %lu,"
662
 
                                " assuming FIL_PAGE_INDEX\n",
663
 
                                fil_page_get_type(read_buf));
664
 
                        /* fall through */
665
 
                case FIL_PAGE_INDEX:
666
 
                        checksum = srv_use_checksums
667
 
                                ? page_zip_calc_checksum(read_buf, zip_size)
668
 
                                : BUF_NO_CHECKSUM_MAGIC;
669
 
 
670
 
                        ut_print_timestamp(stderr);
671
 
                        fprintf(stderr,
672
 
                                "  InnoDB: Compressed page checksum %lu,"
673
 
                                " stored %lu\n"
674
 
                                "InnoDB: Page lsn %lu %lu\n"
675
 
                                "InnoDB: Page number (if stored"
676
 
                                " to page already) %lu,\n"
677
 
                                "InnoDB: space id (if stored"
678
 
                                " to page already) %lu\n",
679
 
                                (ulong) checksum,
680
 
                                (ulong) mach_read_from_4(
681
 
                                        read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
682
 
                                (ulong) mach_read_from_4(
683
 
                                        read_buf + FIL_PAGE_LSN),
684
 
                                (ulong) mach_read_from_4(
685
 
                                        read_buf + (FIL_PAGE_LSN + 4)),
686
 
                                (ulong) mach_read_from_4(
687
 
                                        read_buf + FIL_PAGE_OFFSET),
688
 
                                (ulong) mach_read_from_4(
689
 
                                        read_buf
690
 
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
691
 
                        return;
692
 
                case FIL_PAGE_TYPE_XDES:
693
 
                        /* This is an uncompressed page. */
694
 
                        break;
695
 
                }
696
 
        }
697
 
 
698
 
        checksum = srv_use_checksums
699
 
                ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
700
 
        old_checksum = srv_use_checksums
701
 
                ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
702
 
 
703
 
        ut_print_timestamp(stderr);
704
 
        fprintf(stderr,
705
 
                "  InnoDB: Page checksum %lu, prior-to-4.0.14-form"
706
 
                " checksum %lu\n"
707
 
                "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
708
 
                " stored checksum %lu\n"
709
 
                "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
710
 
                " at page end %lu\n"
711
 
                "InnoDB: Page number (if stored to page already) %lu,\n"
712
 
                "InnoDB: space id (if created with >= MySQL-4.1.1"
713
 
                " and stored already) %lu\n",
714
 
                (ulong) checksum, (ulong) old_checksum,
715
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
716
 
                (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
717
 
                                         - FIL_PAGE_END_LSN_OLD_CHKSUM),
718
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
719
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
720
 
                (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
721
 
                                         - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
722
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
723
 
                (ulong) mach_read_from_4(read_buf
724
 
                                         + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
725
 
 
726
 
#ifndef UNIV_HOTBACKUP
727
 
        if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
728
 
            == TRX_UNDO_INSERT) {
729
 
                fprintf(stderr,
730
 
                        "InnoDB: Page may be an insert undo log page\n");
731
 
        } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
732
 
                                    + TRX_UNDO_PAGE_TYPE)
733
 
                   == TRX_UNDO_UPDATE) {
734
 
                fprintf(stderr,
735
 
                        "InnoDB: Page may be an update undo log page\n");
736
 
        }
737
 
#endif /* !UNIV_HOTBACKUP */
738
 
 
739
 
        switch (fil_page_get_type(read_buf)) {
740
 
                index_id_t      index_id;
741
 
        case FIL_PAGE_INDEX:
742
 
                index_id = btr_page_get_index_id(read_buf);
743
 
                fprintf(stderr,
744
 
                        "InnoDB: Page may be an index page where"
745
 
                        " index id is %llu\n",
746
 
                        (ullint) index_id);
747
 
#ifndef UNIV_HOTBACKUP
748
 
                index = dict_index_find_on_id_low(index_id);
749
 
                if (index) {
750
 
                        fputs("InnoDB: (", stderr);
751
 
                        dict_index_name_print(stderr, NULL, index);
752
 
                        fputs(")\n", stderr);
753
 
                }
754
 
#endif /* !UNIV_HOTBACKUP */
755
 
                break;
756
 
        case FIL_PAGE_INODE:
757
 
                fputs("InnoDB: Page may be an 'inode' page\n", stderr);
758
 
                break;
759
 
        case FIL_PAGE_IBUF_FREE_LIST:
760
 
                fputs("InnoDB: Page may be an insert buffer free list page\n",
761
 
                      stderr);
762
 
                break;
763
 
        case FIL_PAGE_TYPE_ALLOCATED:
764
 
                fputs("InnoDB: Page may be a freshly allocated page\n",
765
 
                      stderr);
766
 
                break;
767
 
        case FIL_PAGE_IBUF_BITMAP:
768
 
                fputs("InnoDB: Page may be an insert buffer bitmap page\n",
769
 
                      stderr);
770
 
                break;
771
 
        case FIL_PAGE_TYPE_SYS:
772
 
                fputs("InnoDB: Page may be a system page\n",
773
 
                      stderr);
774
 
                break;
775
 
        case FIL_PAGE_TYPE_TRX_SYS:
776
 
                fputs("InnoDB: Page may be a transaction system page\n",
777
 
                      stderr);
778
 
                break;
779
 
        case FIL_PAGE_TYPE_FSP_HDR:
780
 
                fputs("InnoDB: Page may be a file space header page\n",
781
 
                      stderr);
782
 
                break;
783
 
        case FIL_PAGE_TYPE_XDES:
784
 
                fputs("InnoDB: Page may be an extent descriptor page\n",
785
 
                      stderr);
786
 
                break;
787
 
        case FIL_PAGE_TYPE_BLOB:
788
 
                fputs("InnoDB: Page may be a BLOB page\n",
789
 
                      stderr);
790
 
                break;
791
 
        case FIL_PAGE_TYPE_ZBLOB:
792
 
        case FIL_PAGE_TYPE_ZBLOB2:
793
 
                fputs("InnoDB: Page may be a compressed BLOB page\n",
794
 
                      stderr);
795
 
                break;
796
 
        }
797
 
}
798
 
 
799
 
#ifndef UNIV_HOTBACKUP
800
 
 
801
 
# ifdef PFS_GROUP_BUFFER_SYNC
802
 
/********************************************************************//**
803
 
This function registers mutexes and rwlocks in buffer blocks with
804
 
performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
805
 
defined to be a value less than chunk->size, then only mutexes
806
 
and rwlocks in the first PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER
807
 
blocks are registered. */
808
 
static
809
 
void
810
 
pfs_register_buffer_block(
811
 
/*======================*/
812
 
        buf_chunk_t*    chunk)          /*!< in/out: chunk of buffers */
813
 
{
814
 
        ulint           i;
815
 
        ulint           num_to_register;
816
 
        buf_block_t*    block;
817
 
 
818
 
        block = chunk->blocks;
819
 
 
820
 
        num_to_register = ut_min(chunk->size,
821
 
                                 PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
822
 
 
823
 
        for (i = 0; i < num_to_register; i++) {
824
 
                mutex_t*        mutex;
825
 
                rw_lock_t*      rwlock;
826
 
 
827
 
#  ifdef UNIV_PFS_MUTEX
828
 
                mutex = &block->mutex;
829
 
                ut_a(!mutex->pfs_psi);
830
 
                mutex->pfs_psi = (PSI_server)
831
 
                        ? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
832
 
                        : NULL;
833
 
#  endif /* UNIV_PFS_MUTEX */
834
 
 
835
 
#  ifdef UNIV_PFS_RWLOCK
836
 
                rwlock = &block->lock;
837
 
                ut_a(!rwlock->pfs_psi);
838
 
                rwlock->pfs_psi = (PSI_server)
839
 
                        ? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
840
 
                        : NULL;
841
 
#  endif /* UNIV_PFS_RWLOCK */
842
 
                block++;
843
 
        }
844
 
}
845
 
# endif /* PFS_GROUP_BUFFER_SYNC */
846
 
 
847
 
/********************************************************************//**
848
 
Initializes a buffer control block when the buf_pool is created. */
849
 
static
850
 
void
851
 
buf_block_init(
852
 
/*===========*/
853
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
854
 
        buf_block_t*    block,          /*!< in: pointer to control block */
855
 
        byte*           frame)          /*!< in: pointer to buffer frame */
856
 
{
857
 
        UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
858
 
 
859
 
        block->frame = frame;
860
 
 
861
 
        block->page.buf_pool = buf_pool;
862
 
        block->page.state = BUF_BLOCK_NOT_USED;
863
 
        block->page.buf_fix_count = 0;
864
 
        block->page.io_fix = BUF_IO_NONE;
865
 
 
866
 
        block->modify_clock = 0;
867
 
 
868
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
869
 
        block->page.file_page_was_freed = FALSE;
870
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
871
 
 
872
 
        block->check_index_page_at_flush = FALSE;
873
 
        block->index = NULL;
874
 
 
875
 
        block->is_hashed = FALSE;
876
 
 
877
 
#ifdef UNIV_DEBUG
878
 
        block->page.in_page_hash = FALSE;
879
 
        block->page.in_zip_hash = FALSE;
880
 
        block->page.in_flush_list = FALSE;
881
 
        block->page.in_free_list = FALSE;
882
 
        block->page.in_LRU_list = FALSE;
883
 
        block->in_unzip_LRU_list = FALSE;
884
 
#endif /* UNIV_DEBUG */
885
 
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
886
 
        block->n_pointers = 0;
887
 
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
888
 
        page_zip_des_init(&block->page.zip);
889
 
 
890
 
#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
891
 
        /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
892
 
        of buffer block mutex/rwlock with performance schema. If
893
 
        PFS_GROUP_BUFFER_SYNC is defined, skip the registration
894
 
        since buffer block mutex/rwlock will be registered later in
895
 
        pfs_register_buffer_block() */
896
 
 
897
 
        mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
898
 
        rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
899
 
#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
900
 
        mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
901
 
        rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
902
 
#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
903
 
 
904
 
        ut_ad(rw_lock_validate(&(block->lock)));
905
 
 
906
 
#ifdef UNIV_SYNC_DEBUG
907
 
        rw_lock_create(buf_block_debug_latch_key,
908
 
                       &block->debug_latch, SYNC_NO_ORDER_CHECK);
909
 
#endif /* UNIV_SYNC_DEBUG */
910
 
}
911
 
 
912
 
/********************************************************************//**
913
 
Allocates a chunk of buffer frames.
914
 
@return chunk, or NULL on failure */
915
 
static
916
 
buf_chunk_t*
917
 
buf_chunk_init(
918
 
/*===========*/
919
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
920
 
        buf_chunk_t*    chunk,          /*!< out: chunk of buffers */
921
 
        ulint           mem_size)       /*!< in: requested size in bytes */
922
 
{
923
 
        buf_block_t*    block;
924
 
        byte*           frame;
925
 
        ulint           i;
926
 
 
927
 
        /* Round down to a multiple of page size,
928
 
        although it already should be. */
929
 
        mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
930
 
        /* Reserve space for the block descriptors. */
931
 
        mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
932
 
                                  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
933
 
 
934
 
        chunk->mem_size = mem_size;
935
 
        chunk->mem = os_mem_alloc_large(&chunk->mem_size);
936
 
 
937
 
        if (UNIV_UNLIKELY(chunk->mem == NULL)) {
938
 
 
939
 
                return(NULL);
940
 
        }
941
 
 
942
 
        /* Allocate the block descriptors from
943
 
        the start of the memory block. */
944
 
        chunk->blocks = chunk->mem;
945
 
 
946
 
        /* Align a pointer to the first frame.  Note that when
947
 
        os_large_page_size is smaller than UNIV_PAGE_SIZE,
948
 
        we may allocate one fewer block than requested.  When
949
 
        it is bigger, we may allocate more blocks than requested. */
950
 
 
951
 
        frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
952
 
        chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
953
 
                - (frame != chunk->mem);
954
 
 
955
 
        /* Subtract the space needed for block descriptors. */
956
 
        {
957
 
                ulint   size = chunk->size;
958
 
 
959
 
                while (frame < (byte*) (chunk->blocks + size)) {
960
 
                        frame += UNIV_PAGE_SIZE;
961
 
                        size--;
962
 
                }
963
 
 
964
 
                chunk->size = size;
965
 
        }
966
 
 
967
 
        /* Init block structs and assign frames for them. Then we
968
 
        assign the frames to the first blocks (we already mapped the
969
 
        memory above). */
970
 
 
971
 
        block = chunk->blocks;
972
 
 
973
 
        for (i = chunk->size; i--; ) {
974
 
 
975
 
                buf_block_init(buf_pool, block, frame);
976
 
 
977
 
#ifdef HAVE_VALGRIND
978
 
                /* Wipe contents of frame to eliminate a Purify warning */
979
 
                memset(block->frame, '\0', UNIV_PAGE_SIZE);
980
 
#endif
981
 
                /* Add the block to the free list */
982
 
                UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
983
 
 
984
 
                ut_d(block->page.in_free_list = TRUE);
985
 
                ut_ad(buf_pool_from_block(block) == buf_pool);
986
 
 
987
 
                block++;
988
 
                frame += UNIV_PAGE_SIZE;
989
 
        }
990
 
 
991
 
#ifdef PFS_GROUP_BUFFER_SYNC
992
 
        pfs_register_buffer_block(chunk);
993
 
#endif
994
 
        return(chunk);
995
 
}
996
 
 
997
 
#ifdef UNIV_DEBUG
998
 
/*********************************************************************//**
999
 
Finds a block in the given buffer chunk that points to a
1000
 
given compressed page.
1001
 
@return buffer block pointing to the compressed page, or NULL */
1002
 
static
1003
 
buf_block_t*
1004
 
buf_chunk_contains_zip(
1005
 
/*===================*/
1006
 
        buf_chunk_t*    chunk,  /*!< in: chunk being checked */
1007
 
        const void*     data)   /*!< in: pointer to compressed page */
1008
 
{
1009
 
        buf_block_t*    block;
1010
 
        ulint           i;
1011
 
 
1012
 
        block = chunk->blocks;
1013
 
 
1014
 
        for (i = chunk->size; i--; block++) {
1015
 
                if (block->page.zip.data == data) {
1016
 
 
1017
 
                        return(block);
1018
 
                }
1019
 
        }
1020
 
 
1021
 
        return(NULL);
1022
 
}
1023
 
 
1024
 
/*********************************************************************//**
1025
 
Finds a block in the buffer pool that points to a
1026
 
given compressed page.
1027
 
@return buffer block pointing to the compressed page, or NULL */
1028
 
UNIV_INTERN
1029
 
buf_block_t*
1030
 
buf_pool_contains_zip(
1031
 
/*==================*/
1032
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1033
 
        const void*     data)           /*!< in: pointer to compressed page */
1034
 
{
1035
 
        ulint           n;
1036
 
        buf_chunk_t*    chunk = buf_pool->chunks;
1037
 
 
1038
 
        ut_ad(buf_pool);
1039
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1040
 
        for (n = buf_pool->n_chunks; n--; chunk++) {
1041
 
 
1042
 
                buf_block_t* block = buf_chunk_contains_zip(chunk, data);
1043
 
 
1044
 
                if (block) {
1045
 
                        return(block);
1046
 
                }
1047
 
        }
1048
 
 
1049
 
        return(NULL);
1050
 
}
1051
 
#endif /* UNIV_DEBUG */
1052
 
 
1053
 
/*********************************************************************//**
1054
 
Checks that all file pages in the buffer chunk are in a replaceable state.
1055
 
@return address of a non-free block, or NULL if all freed */
1056
 
static
1057
 
const buf_block_t*
1058
 
buf_chunk_not_freed(
1059
 
/*================*/
1060
 
        buf_chunk_t*    chunk)  /*!< in: chunk being checked */
1061
 
{
1062
 
        buf_block_t*    block;
1063
 
        ulint           i;
1064
 
 
1065
 
        block = chunk->blocks;
1066
 
 
1067
 
        for (i = chunk->size; i--; block++) {
1068
 
                ibool   ready;
1069
 
 
1070
 
                switch (buf_block_get_state(block)) {
1071
 
                case BUF_BLOCK_ZIP_FREE:
1072
 
                case BUF_BLOCK_ZIP_PAGE:
1073
 
                case BUF_BLOCK_ZIP_DIRTY:
1074
 
                        /* The uncompressed buffer pool should never
1075
 
                        contain compressed block descriptors. */
1076
 
                        ut_error;
1077
 
                        break;
1078
 
                case BUF_BLOCK_NOT_USED:
1079
 
                case BUF_BLOCK_READY_FOR_USE:
1080
 
                case BUF_BLOCK_MEMORY:
1081
 
                case BUF_BLOCK_REMOVE_HASH:
1082
 
                        /* Skip blocks that are not being used for
1083
 
                        file pages. */
1084
 
                        break;
1085
 
                case BUF_BLOCK_FILE_PAGE:
1086
 
                        mutex_enter(&block->mutex);
1087
 
                        ready = buf_flush_ready_for_replace(&block->page);
1088
 
                        mutex_exit(&block->mutex);
1089
 
 
1090
 
                        if (!ready) {
1091
 
 
1092
 
                                return(block);
1093
 
                        }
1094
 
 
1095
 
                        break;
1096
 
                }
1097
 
        }
1098
 
 
1099
 
        return(NULL);
1100
 
}
1101
 
 
1102
 
/*********************************************************************//**
1103
 
Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state.
1104
 
@return TRUE if all freed */
1105
 
static
1106
 
ibool
1107
 
buf_chunk_all_free(
1108
 
/*===============*/
1109
 
        const buf_chunk_t*      chunk)  /*!< in: chunk being checked */
1110
 
{
1111
 
        const buf_block_t*      block;
1112
 
        ulint                   i;
1113
 
 
1114
 
        block = chunk->blocks;
1115
 
 
1116
 
        for (i = chunk->size; i--; block++) {
1117
 
 
1118
 
                if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
1119
 
 
1120
 
                        return(FALSE);
1121
 
                }
1122
 
        }
1123
 
 
1124
 
        return(TRUE);
1125
 
}
1126
 
 
1127
 
/********************************************************************//**
1128
 
Frees a chunk of buffer frames. */
1129
 
static
1130
 
void
1131
 
buf_chunk_free(
1132
 
/*===========*/
1133
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1134
 
        buf_chunk_t*    chunk)          /*!< out: chunk of buffers */
1135
 
{
1136
 
        buf_block_t*            block;
1137
 
        const buf_block_t*      block_end;
1138
 
 
1139
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1140
 
 
1141
 
        block_end = chunk->blocks + chunk->size;
1142
 
 
1143
 
        for (block = chunk->blocks; block < block_end; block++) {
1144
 
                ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
1145
 
                ut_a(!block->page.zip.data);
1146
 
 
1147
 
                ut_ad(!block->page.in_LRU_list);
1148
 
                ut_ad(!block->in_unzip_LRU_list);
1149
 
                ut_ad(!block->page.in_flush_list);
1150
 
                /* Remove the block from the free list. */
1151
 
                ut_ad(block->page.in_free_list);
1152
 
                UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
1153
 
 
1154
 
                /* Free the latches. */
1155
 
                mutex_free(&block->mutex);
1156
 
                rw_lock_free(&block->lock);
1157
 
#ifdef UNIV_SYNC_DEBUG
1158
 
                rw_lock_free(&block->debug_latch);
1159
 
#endif /* UNIV_SYNC_DEBUG */
1160
 
                UNIV_MEM_UNDESC(block);
1161
 
        }
1162
 
 
1163
 
        os_mem_free_large(chunk->mem, chunk->mem_size);
1164
 
}
1165
 
 
1166
 
/********************************************************************//**
1167
 
Set buffer pool size variables after resizing it */
1168
 
static
1169
 
void
1170
 
buf_pool_set_sizes(void)
1171
 
/*====================*/
1172
 
{
1173
 
        ulint   i;
1174
 
        ulint   curr_size = 0;
1175
 
 
1176
 
        buf_pool_mutex_enter_all();
1177
 
 
1178
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1179
 
                buf_pool_t*     buf_pool;
1180
 
 
1181
 
                buf_pool = buf_pool_from_array(i);
1182
 
                curr_size += buf_pool->curr_pool_size;
1183
 
        }
1184
 
 
1185
 
        srv_buf_pool_curr_size = curr_size;
1186
 
        srv_buf_pool_old_size = srv_buf_pool_size;
1187
 
 
1188
 
        buf_pool_mutex_exit_all();
1189
 
}
1190
 
 
1191
 
/********************************************************************//**
1192
 
Initialize a buffer pool instance.
1193
 
@return DB_SUCCESS if all goes well. */
1194
 
static
1195
 
ulint
1196
 
buf_pool_init_instance(
1197
 
/*===================*/
1198
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1199
 
        ulint           buf_pool_size,  /*!< in: size in bytes */
1200
 
        ulint           instance_no)    /*!< in: id of the instance */
1201
 
{
1202
 
        ulint           i;
1203
 
        buf_chunk_t*    chunk;
1204
 
 
1205
 
        /* 1. Initialize general fields
1206
 
        ------------------------------- */
1207
 
        mutex_create(buf_pool_mutex_key,
1208
 
                     &buf_pool->mutex, SYNC_BUF_POOL);
1209
 
        mutex_create(buf_pool_zip_mutex_key,
1210
 
                     &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
1211
 
 
1212
 
        buf_pool_mutex_enter(buf_pool);
1213
 
 
1214
 
        if (buf_pool_size > 0) {
1215
 
                buf_pool->n_chunks = 1;
1216
 
                buf_pool->chunks = chunk = mem_zalloc(sizeof *chunk);
1217
 
 
1218
 
                UT_LIST_INIT(buf_pool->free);
1219
 
 
1220
 
                if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
1221
 
                        mem_free(chunk);
1222
 
                        mem_free(buf_pool);
1223
 
 
1224
 
                        buf_pool_mutex_exit(buf_pool);
1225
 
 
1226
 
                        return(DB_ERROR);
1227
 
                }
1228
 
 
1229
 
                buf_pool->instance_no = instance_no;
1230
 
                buf_pool->old_pool_size = buf_pool_size;
1231
 
                buf_pool->curr_size = chunk->size;
1232
 
                buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1233
 
 
1234
 
                buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
1235
 
                buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
1236
 
                
1237
 
                buf_pool->last_printout_time = ut_time();
1238
 
        }
1239
 
        /* 2. Initialize flushing fields
1240
 
        -------------------------------- */
1241
 
 
1242
 
        mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
1243
 
                     SYNC_BUF_FLUSH_LIST);
1244
 
 
1245
 
        for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
1246
 
                buf_pool->no_flush[i] = os_event_create(NULL);
1247
 
        }
1248
 
 
1249
 
        /* 3. Initialize LRU fields
1250
 
        --------------------------- */
1251
 
 
1252
 
        /* All fields are initialized by mem_zalloc(). */
1253
 
 
1254
 
        buf_pool_mutex_exit(buf_pool);
1255
 
 
1256
 
        return(DB_SUCCESS);
1257
 
}
1258
 
 
1259
 
/********************************************************************//**
1260
 
free one buffer pool instance */
1261
 
static
1262
 
void
1263
 
buf_pool_free_instance(
1264
 
/*===================*/
1265
 
        buf_pool_t*     buf_pool)       /* in,own: buffer pool instance
1266
 
                                        to free */
1267
 
{
1268
 
        buf_chunk_t*    chunk;
1269
 
        buf_chunk_t*    chunks;
1270
 
 
1271
 
        chunks = buf_pool->chunks;
1272
 
        chunk = chunks + buf_pool->n_chunks;
1273
 
 
1274
 
        while (--chunk >= chunks) {
1275
 
                /* Bypass the checks of buf_chunk_free(), since they
1276
 
                would fail at shutdown. */
1277
 
                os_mem_free_large(chunk->mem, chunk->mem_size);
1278
 
        }
1279
 
 
1280
 
        mem_free(buf_pool->chunks);
1281
 
        hash_table_free(buf_pool->page_hash);
1282
 
        hash_table_free(buf_pool->zip_hash);
1283
 
        mem_free(buf_pool);
1284
 
        buf_pool = NULL;
1285
 
}
1286
 
 
1287
 
/********************************************************************//**
1288
 
Creates the buffer pool.
1289
 
@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
1290
 
UNIV_INTERN
1291
 
ulint
1292
 
buf_pool_init(
1293
 
/*==========*/
1294
 
        ulint   total_size,     /*!< in: size of the total pool in bytes */
1295
 
        ulint   n_instances)    /*!< in: number of instances */
1296
 
{
1297
 
        ulint   i;
1298
 
 
1299
 
        /* We create an extra buffer pool instance, this instance is used
1300
 
        for flushing the flush lists, to keep track of n_flush for all
1301
 
        the buffer pools and also used as a waiting object during flushing. */
1302
 
        for (i = 0; i < n_instances; i++) {
1303
 
                buf_pool_t*     ptr;
1304
 
                ulint           size;
1305
 
 
1306
 
                ptr = mem_zalloc(sizeof(*ptr));
1307
 
 
1308
 
                size = total_size / n_instances;
1309
 
 
1310
 
                buf_pool_ptr[i] = ptr;
1311
 
 
1312
 
                if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) {
1313
 
 
1314
 
                        mem_free(buf_pool_ptr[i]);
1315
 
 
1316
 
                        /* Free all the instances created so far. */
1317
 
                        buf_pool_free(i);
1318
 
 
1319
 
                        return(DB_ERROR);
1320
 
                }
1321
 
        }
1322
 
 
1323
 
        buf_pool_set_sizes();
1324
 
        buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
1325
 
 
1326
 
        btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
1327
 
 
1328
 
        return(DB_SUCCESS);
1329
 
}
1330
 
 
1331
 
/********************************************************************//**
1332
 
Frees the buffer pool at shutdown.  This must not be invoked before
1333
 
freeing all mutexes. */
1334
 
UNIV_INTERN
1335
 
void
1336
 
buf_pool_free(
1337
 
/*==========*/
1338
 
        ulint   n_instances)    /*!< in: numbere of instances to free */
1339
 
{
1340
 
        ulint   i;
1341
 
 
1342
 
        for (i = 0; i < n_instances; i++) {
1343
 
                buf_pool_free_instance(buf_pool_from_array(i));
1344
 
                buf_pool_ptr[i] = NULL;
1345
 
        }
1346
 
}
1347
 
 
1348
 
/********************************************************************//**
1349
 
Drops adaptive hash index for a buffer pool instance. */
1350
 
static
1351
 
void
1352
 
buf_pool_drop_hash_index_instance(
1353
 
/*==============================*/
1354
 
        buf_pool_t*     buf_pool,               /*!< in: buffer pool instance */
1355
 
        ibool*          released_search_latch)  /*!< out: flag for signalling
1356
 
                                                whether the search latch was
1357
 
                                                released */
1358
 
{
1359
 
        buf_chunk_t*    chunks  = buf_pool->chunks;
1360
 
        buf_chunk_t*    chunk   = chunks + buf_pool->n_chunks;
1361
 
 
1362
 
        while (--chunk >= chunks) {
1363
 
                ulint           i;
1364
 
                buf_block_t*    block   = chunk->blocks;
1365
 
 
1366
 
                for (i = chunk->size; i--; block++) {
1367
 
                        /* block->is_hashed cannot be modified
1368
 
                        when we have an x-latch on btr_search_latch;
1369
 
                        see the comment in buf0buf.h */
1370
 
                        
1371
 
                        if (!block->is_hashed) {
1372
 
                                continue;
1373
 
                        }
1374
 
                        
1375
 
                        /* To follow the latching order, we
1376
 
                        have to release btr_search_latch
1377
 
                        before acquiring block->latch. */
1378
 
                        rw_lock_x_unlock(&btr_search_latch);
1379
 
                        /* When we release the search latch,
1380
 
                        we must rescan all blocks, because
1381
 
                        some may become hashed again. */
1382
 
                        *released_search_latch = TRUE;
1383
 
                        
1384
 
                        rw_lock_x_lock(&block->lock);
1385
 
                        
1386
 
                        /* This should be guaranteed by the
1387
 
                        callers, which will be holding
1388
 
                        btr_search_enabled_mutex. */
1389
 
                        ut_ad(!btr_search_enabled);
1390
 
                        
1391
 
                        /* Because we did not buffer-fix the
1392
 
                        block by calling buf_block_get_gen(),
1393
 
                        it is possible that the block has been
1394
 
                        allocated for some other use after
1395
 
                        btr_search_latch was released above.
1396
 
                        We do not care which file page the
1397
 
                        block is mapped to.  All we want to do
1398
 
                        is to drop any hash entries referring
1399
 
                        to the page. */
1400
 
                        
1401
 
                        /* It is possible that
1402
 
                        block->page.state != BUF_FILE_PAGE.
1403
 
                        Even that does not matter, because
1404
 
                        btr_search_drop_page_hash_index() will
1405
 
                        check block->is_hashed before doing
1406
 
                        anything.  block->is_hashed can only
1407
 
                        be set on uncompressed file pages. */
1408
 
                        
1409
 
                        btr_search_drop_page_hash_index(block);
1410
 
                        
1411
 
                        rw_lock_x_unlock(&block->lock);
1412
 
                        
1413
 
                        rw_lock_x_lock(&btr_search_latch);
1414
 
                        
1415
 
                        ut_ad(!btr_search_enabled);
1416
 
                }
1417
 
        }
1418
 
}
1419
 
 
1420
 
/********************************************************************//**
1421
 
Drops the adaptive hash index.  To prevent a livelock, this function
1422
 
is only to be called while holding btr_search_latch and while
1423
 
btr_search_enabled == FALSE. */
1424
 
UNIV_INTERN
1425
 
void
1426
 
buf_pool_drop_hash_index(void)
1427
 
/*==========================*/
1428
 
{
1429
 
        ibool           released_search_latch;
1430
 
 
1431
 
#ifdef UNIV_SYNC_DEBUG
1432
 
        ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
1433
 
#endif /* UNIV_SYNC_DEBUG */
1434
 
        ut_ad(!btr_search_enabled);
1435
 
 
1436
 
        do {
1437
 
                ulint   i;
1438
 
 
1439
 
                released_search_latch = FALSE;
1440
 
 
1441
 
                for (i = 0; i < srv_buf_pool_instances; i++) {
1442
 
                        buf_pool_t*     buf_pool;
1443
 
 
1444
 
                        buf_pool = buf_pool_from_array(i);
1445
 
 
1446
 
                        buf_pool_drop_hash_index_instance(
1447
 
                                buf_pool, &released_search_latch);
1448
 
                }
1449
 
 
1450
 
        } while (released_search_latch);
1451
 
}
1452
 
 
1453
 
/********************************************************************//**
1454
 
Relocate a buffer control block.  Relocates the block on the LRU list
1455
 
and in buf_pool->page_hash.  Does not relocate bpage->list.
1456
 
The caller must take care of relocating bpage->list. */
1457
 
UNIV_INTERN
1458
 
void
1459
 
buf_relocate(
1460
 
/*=========*/
1461
 
        buf_page_t*     bpage,  /*!< in/out: control block being relocated;
1462
 
                                buf_page_get_state(bpage) must be
1463
 
                                BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
1464
 
        buf_page_t*     dpage)  /*!< in/out: destination control block */
1465
 
{
1466
 
        buf_page_t*     b;
1467
 
        ulint           fold;
1468
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1469
 
 
1470
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1471
 
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1472
 
        ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
1473
 
        ut_a(bpage->buf_fix_count == 0);
1474
 
        ut_ad(bpage->in_LRU_list);
1475
 
        ut_ad(!bpage->in_zip_hash);
1476
 
        ut_ad(bpage->in_page_hash);
1477
 
        ut_ad(bpage == buf_page_hash_get(buf_pool,
1478
 
                                         bpage->space, bpage->offset));
1479
 
        ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1480
 
#ifdef UNIV_DEBUG
1481
 
        switch (buf_page_get_state(bpage)) {
1482
 
        case BUF_BLOCK_ZIP_FREE:
1483
 
        case BUF_BLOCK_NOT_USED:
1484
 
        case BUF_BLOCK_READY_FOR_USE:
1485
 
        case BUF_BLOCK_FILE_PAGE:
1486
 
        case BUF_BLOCK_MEMORY:
1487
 
        case BUF_BLOCK_REMOVE_HASH:
1488
 
                ut_error;
1489
 
        case BUF_BLOCK_ZIP_DIRTY:
1490
 
        case BUF_BLOCK_ZIP_PAGE:
1491
 
                break;
1492
 
        }
1493
 
#endif /* UNIV_DEBUG */
1494
 
 
1495
 
        memcpy(dpage, bpage, sizeof *dpage);
1496
 
 
1497
 
        ut_d(bpage->in_LRU_list = FALSE);
1498
 
        ut_d(bpage->in_page_hash = FALSE);
1499
 
 
1500
 
        /* relocate buf_pool->LRU */
1501
 
        b = UT_LIST_GET_PREV(LRU, bpage);
1502
 
        UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
1503
 
 
1504
 
        if (b) {
1505
 
                UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
1506
 
        } else {
1507
 
                UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
1508
 
        }
1509
 
 
1510
 
        if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
1511
 
                buf_pool->LRU_old = dpage;
1512
 
#ifdef UNIV_LRU_DEBUG
1513
 
                /* buf_pool->LRU_old must be the first item in the LRU list
1514
 
                whose "old" flag is set. */
1515
 
                ut_a(buf_pool->LRU_old->old);
1516
 
                ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
1517
 
                     || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
1518
 
                ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
1519
 
                     || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
1520
 
        } else {
1521
 
                /* Check that the "old" flag is consistent in
1522
 
                the block and its neighbours. */
1523
 
                buf_page_set_old(dpage, buf_page_is_old(dpage));
1524
 
#endif /* UNIV_LRU_DEBUG */
1525
 
        }
1526
 
 
1527
 
        ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
1528
 
                              ut_ad(ut_list_node_313->in_LRU_list)));
1529
 
 
1530
 
        /* relocate buf_pool->page_hash */
1531
 
        fold = buf_page_address_fold(bpage->space, bpage->offset);
1532
 
 
1533
 
        HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
1534
 
        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
1535
 
}
1536
 
 
1537
 
/********************************************************************//**
1538
 
Shrinks a buffer pool instance. */
1539
 
static
1540
 
void
1541
 
buf_pool_shrink_instance(
1542
 
/*=====================*/
1543
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1544
 
        ulint           chunk_size)     /*!< in: number of pages to remove */
1545
 
{
1546
 
        buf_chunk_t*    chunks;
1547
 
        buf_chunk_t*    chunk;
1548
 
        ulint           max_size;
1549
 
        ulint           max_free_size;
1550
 
        buf_chunk_t*    max_chunk;
1551
 
        buf_chunk_t*    max_free_chunk;
1552
 
 
1553
 
        ut_ad(!buf_pool_mutex_own(buf_pool));
1554
 
 
1555
 
try_again:
1556
 
        btr_search_disable(); /* Empty the adaptive hash index again */
1557
 
        buf_pool_mutex_enter(buf_pool);
1558
 
 
1559
 
shrink_again:
1560
 
        if (buf_pool->n_chunks <= 1) {
1561
 
 
1562
 
                /* Cannot shrink if there is only one chunk */
1563
 
                goto func_done;
1564
 
        }
1565
 
 
1566
 
        /* Search for the largest free chunk
1567
 
        not larger than the size difference */
1568
 
        chunks = buf_pool->chunks;
1569
 
        chunk = chunks + buf_pool->n_chunks;
1570
 
        max_size = max_free_size = 0;
1571
 
        max_chunk = max_free_chunk = NULL;
1572
 
 
1573
 
        while (--chunk >= chunks) {
1574
 
                if (chunk->size <= chunk_size
1575
 
                    && chunk->size > max_free_size) {
1576
 
                        if (chunk->size > max_size) {
1577
 
                                max_size = chunk->size;
1578
 
                                max_chunk = chunk;
1579
 
                        }
1580
 
 
1581
 
                        if (buf_chunk_all_free(chunk)) {
1582
 
                                max_free_size = chunk->size;
1583
 
                                max_free_chunk = chunk;
1584
 
                        }
1585
 
                }
1586
 
        }
1587
 
 
1588
 
        if (!max_free_size) {
1589
 
 
1590
 
                ulint           dirty   = 0;
1591
 
                ulint           nonfree = 0;
1592
 
                buf_block_t*    block;
1593
 
                buf_block_t*    bend;
1594
 
 
1595
 
                /* Cannot shrink: try again later
1596
 
                (do not assign srv_buf_pool_old_size) */
1597
 
                if (!max_chunk) {
1598
 
 
1599
 
                        goto func_exit;
1600
 
                }
1601
 
 
1602
 
                block = max_chunk->blocks;
1603
 
                bend = block + max_chunk->size;
1604
 
 
1605
 
                /* Move the blocks of chunk to the end of the
1606
 
                LRU list and try to flush them. */
1607
 
                for (; block < bend; block++) {
1608
 
                        switch (buf_block_get_state(block)) {
1609
 
                        case BUF_BLOCK_NOT_USED:
1610
 
                                continue;
1611
 
                        case BUF_BLOCK_FILE_PAGE:
1612
 
                                break;
1613
 
                        default:
1614
 
                                nonfree++;
1615
 
                                continue;
1616
 
                        }
1617
 
 
1618
 
                        mutex_enter(&block->mutex);
1619
 
                        /* The following calls will temporarily
1620
 
                        release block->mutex and buf_pool->mutex.
1621
 
                        Therefore, we have to always retry,
1622
 
                        even if !dirty && !nonfree. */
1623
 
 
1624
 
                        if (!buf_flush_ready_for_replace(&block->page)) {
1625
 
 
1626
 
                                buf_LRU_make_block_old(&block->page);
1627
 
                                dirty++;
1628
 
                        } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
1629
 
                                   != BUF_LRU_FREED) {
1630
 
                                nonfree++;
1631
 
                        }
1632
 
 
1633
 
                        mutex_exit(&block->mutex);
1634
 
                }
1635
 
 
1636
 
                buf_pool_mutex_exit(buf_pool);
1637
 
 
1638
 
                /* Request for a flush of the chunk if it helps.
1639
 
                Do not flush if there are non-free blocks, since
1640
 
                flushing will not make the chunk freeable. */
1641
 
                if (nonfree) {
1642
 
                        /* Avoid busy-waiting. */
1643
 
                        os_thread_sleep(100000);
1644
 
                } else if (dirty
1645
 
                           && buf_flush_LRU(buf_pool, dirty)
1646
 
                              == ULINT_UNDEFINED) {
1647
 
 
1648
 
                        buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
1649
 
                }
1650
 
 
1651
 
                goto try_again;
1652
 
        }
1653
 
 
1654
 
        max_size = max_free_size;
1655
 
        max_chunk = max_free_chunk;
1656
 
 
1657
 
        buf_pool->old_pool_size = buf_pool->curr_pool_size;
1658
 
 
1659
 
        /* Rewrite buf_pool->chunks.  Copy everything but max_chunk. */
1660
 
        chunks = mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks);
1661
 
        memcpy(chunks, buf_pool->chunks,
1662
 
               (max_chunk - buf_pool->chunks) * sizeof *chunks);
1663
 
        memcpy(chunks + (max_chunk - buf_pool->chunks),
1664
 
               max_chunk + 1,
1665
 
               buf_pool->chunks + buf_pool->n_chunks
1666
 
               - (max_chunk + 1));
1667
 
        ut_a(buf_pool->curr_size > max_chunk->size);
1668
 
        buf_pool->curr_size -= max_chunk->size;
1669
 
        buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1670
 
        chunk_size -= max_chunk->size;
1671
 
        buf_chunk_free(buf_pool, max_chunk);
1672
 
        mem_free(buf_pool->chunks);
1673
 
        buf_pool->chunks = chunks;
1674
 
        buf_pool->n_chunks--;
1675
 
 
1676
 
        /* Allow a slack of one megabyte. */
1677
 
        if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
1678
 
 
1679
 
                goto shrink_again;
1680
 
        }
1681
 
        goto func_exit;
1682
 
 
1683
 
func_done:
1684
 
        buf_pool->old_pool_size = buf_pool->curr_pool_size;
1685
 
func_exit:
1686
 
        buf_pool_mutex_exit(buf_pool);
1687
 
        btr_search_enable();
1688
 
}
1689
 
 
1690
 
/********************************************************************//**
1691
 
Shrinks the buffer pool. */
1692
 
static
1693
 
void
1694
 
buf_pool_shrink(
1695
 
/*============*/
1696
 
        ulint   chunk_size)     /*!< in: number of pages to remove */
1697
 
{
1698
 
        ulint   i;
1699
 
 
1700
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1701
 
                buf_pool_t*     buf_pool;
1702
 
                ulint           instance_chunk_size;
1703
 
 
1704
 
                instance_chunk_size = chunk_size / srv_buf_pool_instances;
1705
 
                buf_pool = buf_pool_from_array(i);
1706
 
                buf_pool_shrink_instance(buf_pool, instance_chunk_size);
1707
 
        }
1708
 
 
1709
 
        buf_pool_set_sizes();
1710
 
}
1711
 
 
1712
 
/********************************************************************//**
1713
 
Rebuild buf_pool->page_hash for a buffer pool instance. */
1714
 
static
1715
 
void
1716
 
buf_pool_page_hash_rebuild_instance(
1717
 
/*================================*/
1718
 
        buf_pool_t*     buf_pool)               /*!< in: buffer pool instance */
1719
 
{
1720
 
        ulint           i;
1721
 
        buf_page_t*     b;
1722
 
        buf_chunk_t*    chunk;
1723
 
        ulint           n_chunks;
1724
 
        hash_table_t*   zip_hash;
1725
 
        hash_table_t*   page_hash;
1726
 
 
1727
 
        buf_pool_mutex_enter(buf_pool);
1728
 
 
1729
 
        /* Free, create, and populate the hash table. */
1730
 
        hash_table_free(buf_pool->page_hash);
1731
 
        buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
1732
 
        zip_hash = hash_create(2 * buf_pool->curr_size);
1733
 
 
1734
 
        HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
1735
 
                     BUF_POOL_ZIP_FOLD_BPAGE);
1736
 
 
1737
 
        hash_table_free(buf_pool->zip_hash);
1738
 
        buf_pool->zip_hash = zip_hash;
1739
 
 
1740
 
        /* Insert the uncompressed file pages to buf_pool->page_hash. */
1741
 
 
1742
 
        chunk = buf_pool->chunks;
1743
 
        n_chunks = buf_pool->n_chunks;
1744
 
 
1745
 
        for (i = 0; i < n_chunks; i++, chunk++) {
1746
 
                ulint           j;
1747
 
                buf_block_t*    block = chunk->blocks;
1748
 
 
1749
 
                for (j = 0; j < chunk->size; j++, block++) {
1750
 
                        if (buf_block_get_state(block)
1751
 
                            == BUF_BLOCK_FILE_PAGE) {
1752
 
                                ut_ad(!block->page.in_zip_hash);
1753
 
                                ut_ad(block->page.in_page_hash);
1754
 
 
1755
 
                                HASH_INSERT(buf_page_t, hash, page_hash,
1756
 
                                            buf_page_address_fold(
1757
 
                                                    block->page.space,
1758
 
                                                    block->page.offset),
1759
 
                                            &block->page);
1760
 
                        }
1761
 
                }
1762
 
        }
1763
 
 
1764
 
        /* Insert the compressed-only pages to buf_pool->page_hash.
1765
 
        All such blocks are either in buf_pool->zip_clean or
1766
 
        in buf_pool->flush_list. */
1767
 
 
1768
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1769
 
             b = UT_LIST_GET_NEXT(list, b)) {
1770
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1771
 
                ut_ad(!b->in_flush_list);
1772
 
                ut_ad(b->in_LRU_list);
1773
 
                ut_ad(b->in_page_hash);
1774
 
                ut_ad(!b->in_zip_hash);
1775
 
 
1776
 
                HASH_INSERT(buf_page_t, hash, page_hash,
1777
 
                            buf_page_address_fold(b->space, b->offset), b);
1778
 
        }
1779
 
 
1780
 
        buf_flush_list_mutex_enter(buf_pool);
1781
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1782
 
             b = UT_LIST_GET_NEXT(list, b)) {
1783
 
                ut_ad(b->in_flush_list);
1784
 
                ut_ad(b->in_LRU_list);
1785
 
                ut_ad(b->in_page_hash);
1786
 
                ut_ad(!b->in_zip_hash);
1787
 
 
1788
 
                switch (buf_page_get_state(b)) {
1789
 
                case BUF_BLOCK_ZIP_DIRTY:
1790
 
                        HASH_INSERT(buf_page_t, hash, page_hash,
1791
 
                                    buf_page_address_fold(b->space,
1792
 
                                                          b->offset), b);
1793
 
                        break;
1794
 
                case BUF_BLOCK_FILE_PAGE:
1795
 
                        /* uncompressed page */
1796
 
                        break;
1797
 
                case BUF_BLOCK_ZIP_FREE:
1798
 
                case BUF_BLOCK_ZIP_PAGE:
1799
 
                case BUF_BLOCK_NOT_USED:
1800
 
                case BUF_BLOCK_READY_FOR_USE:
1801
 
                case BUF_BLOCK_MEMORY:
1802
 
                case BUF_BLOCK_REMOVE_HASH:
1803
 
                        ut_error;
1804
 
                        break;
1805
 
                }
1806
 
        }
1807
 
 
1808
 
        buf_flush_list_mutex_exit(buf_pool);
1809
 
        buf_pool_mutex_exit(buf_pool);
1810
 
}
1811
 
 
1812
 
/********************************************************************
1813
 
Determine if a block is a sentinel for a buffer pool watch.
1814
 
@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
1815
 
UNIV_INTERN
1816
 
ibool
1817
 
buf_pool_watch_is_sentinel(
1818
 
/*=======================*/
1819
 
        buf_pool_t*             buf_pool,       /*!< buffer pool instance */
1820
 
        const buf_page_t*       bpage)          /*!< in: block */
1821
 
{
1822
 
        ut_ad(buf_page_in_file(bpage));
1823
 
 
1824
 
        if (bpage < &buf_pool->watch[0]
1825
 
            || bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
1826
 
 
1827
 
                ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_PAGE
1828
 
                      || bpage->zip.data != NULL);
1829
 
 
1830
 
                return(FALSE);
1831
 
        }
1832
 
 
1833
 
        ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
1834
 
        ut_ad(!bpage->in_zip_hash);
1835
 
        ut_ad(bpage->in_page_hash);
1836
 
        ut_ad(bpage->zip.data == NULL);
1837
 
        ut_ad(bpage->buf_fix_count > 0);
1838
 
        return(TRUE);
1839
 
}
1840
 
 
1841
 
/****************************************************************//**
1842
 
Add watch for the given page to be read in. Caller must have the buffer pool
1843
 
mutex reserved.
1844
 
@return NULL if watch set, block if the page is in the buffer pool */
1845
 
UNIV_INTERN
1846
 
buf_page_t*
1847
 
buf_pool_watch_set(
1848
 
/*===============*/
1849
 
        ulint   space,  /*!< in: space id */
1850
 
        ulint   offset, /*!< in: page number */
1851
 
        ulint   fold)   /*!< in: buf_page_address_fold(space, offset) */
1852
 
{
1853
 
        buf_page_t*     bpage;
1854
 
        ulint           i;
1855
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1856
 
 
1857
 
        ut_ad(buf_pool_mutex_own(buf_pool));
1858
 
 
1859
 
        bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
1860
 
 
1861
 
        if (UNIV_LIKELY_NULL(bpage)) {
1862
 
                if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
1863
 
                        /* The page was loaded meanwhile. */
1864
 
                        return(bpage);
1865
 
                }
1866
 
                /* Add to an existing watch. */
1867
 
                bpage->buf_fix_count++;
1868
 
                return(NULL);
1869
 
        }
1870
 
 
1871
 
        for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
1872
 
                bpage = &buf_pool->watch[i];
1873
 
 
1874
 
                ut_ad(bpage->access_time == 0);
1875
 
                ut_ad(bpage->newest_modification == 0);
1876
 
                ut_ad(bpage->oldest_modification == 0);
1877
 
                ut_ad(bpage->zip.data == NULL);
1878
 
                ut_ad(!bpage->in_zip_hash);
1879
 
 
1880
 
                switch (bpage->state) {
1881
 
                case BUF_BLOCK_POOL_WATCH:
1882
 
                        ut_ad(!bpage->in_page_hash);
1883
 
                        ut_ad(bpage->buf_fix_count == 0);
1884
 
 
1885
 
                        /* bpage is pointing to buf_pool_watch[],
1886
 
                        which is protected by buf_pool_mutex.
1887
 
                        Normally, buf_page_t objects are protected by
1888
 
                        buf_block_t::mutex or buf_pool->zip_mutex or both. */
1889
 
 
1890
 
                        bpage->state = BUF_BLOCK_ZIP_PAGE;
1891
 
                        bpage->space = space;
1892
 
                        bpage->offset = offset;
1893
 
                        bpage->buf_fix_count = 1;
1894
 
 
1895
 
                        ut_d(bpage->in_page_hash = TRUE);
1896
 
                        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
1897
 
                                    fold, bpage);
1898
 
                        return(NULL);
1899
 
                case BUF_BLOCK_ZIP_PAGE:
1900
 
                        ut_ad(bpage->in_page_hash);
1901
 
                        ut_ad(bpage->buf_fix_count > 0);
1902
 
                        break;
1903
 
                default:
1904
 
                        ut_error;
1905
 
                }
1906
 
        }
1907
 
 
1908
 
        /* Allocation failed.  Either the maximum number of purge
1909
 
        threads should never exceed BUF_POOL_WATCH_SIZE, or this code
1910
 
        should be modified to return a special non-NULL value and the
1911
 
        caller should purge the record directly. */
1912
 
        ut_error;
1913
 
 
1914
 
        /* Fix compiler warning */
1915
 
        return(NULL);
1916
 
}
1917
 
 
1918
 
/********************************************************************//**
1919
 
Rebuild buf_pool->page_hash. */
1920
 
static
1921
 
void
1922
 
buf_pool_page_hash_rebuild(void)
1923
 
/*============================*/
1924
 
{
1925
 
        ulint   i;
1926
 
 
1927
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1928
 
                buf_pool_page_hash_rebuild_instance(buf_pool_from_array(i));
1929
 
        }
1930
 
}
1931
 
 
1932
 
/********************************************************************//**
1933
 
Increase the buffer pool size of one buffer pool instance. */
1934
 
static
1935
 
void
1936
 
buf_pool_increase_instance(
1937
 
/*=======================*/
1938
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instane */
1939
 
        ulint           change_size)    /*!< in: new size of the pool */
1940
 
{
1941
 
        buf_chunk_t*    chunks;
1942
 
        buf_chunk_t*    chunk;
1943
 
 
1944
 
        buf_pool_mutex_enter(buf_pool);
1945
 
        chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
1946
 
 
1947
 
        memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks * sizeof *chunks);
1948
 
 
1949
 
        chunk = &chunks[buf_pool->n_chunks];
1950
 
 
1951
 
        if (!buf_chunk_init(buf_pool, chunk, change_size)) {
1952
 
                mem_free(chunks);
1953
 
        } else {
1954
 
                buf_pool->old_pool_size = buf_pool->curr_pool_size;
1955
 
                buf_pool->curr_size += chunk->size;
1956
 
                buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1957
 
                mem_free(buf_pool->chunks);
1958
 
                buf_pool->chunks = chunks;
1959
 
                buf_pool->n_chunks++;
1960
 
        }
1961
 
 
1962
 
        buf_pool_mutex_exit(buf_pool);
1963
 
}
1964
 
 
1965
 
/********************************************************************//**
1966
 
Increase the buffer pool size. */
1967
 
static
1968
 
void
1969
 
buf_pool_increase(
1970
 
/*==============*/
1971
 
        ulint   change_size)
1972
 
{
1973
 
        ulint   i;
1974
 
 
1975
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
1976
 
                buf_pool_increase_instance(
1977
 
                        buf_pool_from_array(i),
1978
 
                        change_size / srv_buf_pool_instances);
1979
 
        }
1980
 
 
1981
 
        buf_pool_set_sizes();
1982
 
}
1983
 
 
1984
 
/********************************************************************//**
1985
 
Resizes the buffer pool. */
1986
 
UNIV_INTERN
1987
 
void
1988
 
buf_pool_resize(void)
1989
 
/*=================*/
1990
 
{
1991
 
        ulint   change_size;
1992
 
        ulint   min_change_size = 1048576 * srv_buf_pool_instances;
1993
 
 
1994
 
        buf_pool_mutex_enter_all();
1995
 
  
1996
 
        if (srv_buf_pool_old_size == srv_buf_pool_size) {
1997
 
  
1998
 
                buf_pool_mutex_exit_all();
1999
 
 
2000
 
                return;
2001
 
 
2002
 
        } else if (srv_buf_pool_curr_size + min_change_size
2003
 
                   > srv_buf_pool_size) {
2004
 
  
2005
 
                change_size = (srv_buf_pool_curr_size - srv_buf_pool_size)
2006
 
                            / UNIV_PAGE_SIZE;
2007
 
 
2008
 
                buf_pool_mutex_exit_all();
2009
 
  
2010
 
                /* Disable adaptive hash indexes and empty the index
2011
 
                in order to free up memory in the buffer pool chunks. */
2012
 
                buf_pool_shrink(change_size);
2013
 
 
2014
 
        } else if (srv_buf_pool_curr_size + min_change_size
2015
 
                   < srv_buf_pool_size) {
2016
 
 
2017
 
                /* Enlarge the buffer pool by at least one megabyte */
2018
 
  
2019
 
                change_size = srv_buf_pool_size - srv_buf_pool_curr_size;
2020
 
 
2021
 
                buf_pool_mutex_exit_all();
2022
 
 
2023
 
                buf_pool_increase(change_size);
2024
 
        } else {
2025
 
                srv_buf_pool_size = srv_buf_pool_old_size;
2026
 
 
2027
 
                buf_pool_mutex_exit_all();
2028
 
 
2029
 
                return;
2030
 
        }
2031
 
  
2032
 
        buf_pool_page_hash_rebuild();
2033
 
}
2034
 
 
2035
 
/****************************************************************//**
2036
 
Remove the sentinel block for the watch before replacing it with a real block.
2037
 
buf_page_watch_clear() or buf_page_watch_occurred() will notice that
2038
 
the block has been replaced with the real block.
2039
 
@return reference count, to be added to the replacement block */
2040
 
static
2041
 
void
2042
 
buf_pool_watch_remove(
2043
 
/*==================*/
2044
 
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
2045
 
        ulint           fold,           /*!< in: buf_page_address_fold(
2046
 
                                        space, offset) */
2047
 
        buf_page_t*     watch)          /*!< in/out: sentinel for watch */
2048
 
{
2049
 
        ut_ad(buf_pool_mutex_own(buf_pool));
2050
 
 
2051
 
        HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
2052
 
        ut_d(watch->in_page_hash = FALSE);
2053
 
        watch->buf_fix_count = 0;
2054
 
        watch->state = BUF_BLOCK_POOL_WATCH;
2055
 
}
2056
 
 
2057
 
/****************************************************************//**
2058
 
Stop watching if the page has been read in.
2059
 
buf_pool_watch_set(space,offset) must have returned NULL before. */
2060
 
UNIV_INTERN
2061
 
void
2062
 
buf_pool_watch_unset(
2063
 
/*=================*/
2064
 
        ulint   space,  /*!< in: space id */
2065
 
        ulint   offset) /*!< in: page number */
2066
 
{
2067
 
        buf_page_t*     bpage;
2068
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2069
 
        ulint           fold = buf_page_address_fold(space, offset);
2070
 
 
2071
 
        buf_pool_mutex_enter(buf_pool);
2072
 
        bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2073
 
        /* The page must exist because buf_pool_watch_set()
2074
 
        increments buf_fix_count. */
2075
 
        ut_a(bpage);
2076
 
 
2077
 
        if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
2078
 
                mutex_t* mutex = buf_page_get_mutex(bpage);
2079
 
 
2080
 
                mutex_enter(mutex);
2081
 
                ut_a(bpage->buf_fix_count > 0);
2082
 
                bpage->buf_fix_count--;
2083
 
                mutex_exit(mutex);
2084
 
        } else {
2085
 
                ut_a(bpage->buf_fix_count > 0);
2086
 
 
2087
 
                if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
2088
 
                        buf_pool_watch_remove(buf_pool, fold, bpage);
2089
 
                }
2090
 
        }
2091
 
 
2092
 
        buf_pool_mutex_exit(buf_pool);
2093
 
}
2094
 
 
2095
 
/****************************************************************//**
2096
 
Check if the page has been read in.
2097
 
This may only be called after buf_pool_watch_set(space,offset)
2098
 
has returned NULL and before invoking buf_pool_watch_unset(space,offset).
2099
 
@return FALSE if the given page was not read in, TRUE if it was */
2100
 
UNIV_INTERN
2101
 
ibool
2102
 
buf_pool_watch_occurred(
2103
 
/*====================*/
2104
 
        ulint   space,  /*!< in: space id */
2105
 
        ulint   offset) /*!< in: page number */
2106
 
{
2107
 
        ibool           ret;
2108
 
        buf_page_t*     bpage;
2109
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2110
 
        ulint           fold    = buf_page_address_fold(space, offset);
2111
 
 
2112
 
        buf_pool_mutex_enter(buf_pool);
2113
 
 
2114
 
        bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2115
 
        /* The page must exist because buf_pool_watch_set()
2116
 
        increments buf_fix_count. */
2117
 
        ut_a(bpage);
2118
 
        ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
2119
 
        buf_pool_mutex_exit(buf_pool);
2120
 
 
2121
 
        return(ret);
2122
 
}
2123
 
 
2124
 
/********************************************************************//**
2125
 
Moves a page to the start of the buffer pool LRU list. This high-level
2126
 
function can be used to prevent an important page from slipping out of
2127
 
the buffer pool. */
2128
 
UNIV_INTERN
2129
 
void
2130
 
buf_page_make_young(
2131
 
/*================*/
2132
 
        buf_page_t*     bpage)  /*!< in: buffer block of a file page */
2133
 
{
2134
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2135
 
 
2136
 
        buf_pool_mutex_enter(buf_pool);
2137
 
 
2138
 
        ut_a(buf_page_in_file(bpage));
2139
 
 
2140
 
        buf_LRU_make_block_young(bpage);
2141
 
 
2142
 
        buf_pool_mutex_exit(buf_pool);
2143
 
}
2144
 
 
2145
 
/********************************************************************//**
2146
 
Sets the time of the first access of a page and moves a page to the
2147
 
start of the buffer pool LRU list if it is too old.  This high-level
2148
 
function can be used to prevent an important page from slipping
2149
 
out of the buffer pool. */
2150
 
static
2151
 
void
2152
 
buf_page_set_accessed_make_young(
2153
 
/*=============================*/
2154
 
        buf_page_t*     bpage,          /*!< in/out: buffer block of a
2155
 
                                        file page */
2156
 
        unsigned        access_time)    /*!< in: bpage->access_time
2157
 
                                        read under mutex protection,
2158
 
                                        or 0 if unknown */
2159
 
{
2160
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2161
 
 
2162
 
        ut_ad(!buf_pool_mutex_own(buf_pool));
2163
 
        ut_a(buf_page_in_file(bpage));
2164
 
 
2165
 
        if (buf_page_peek_if_too_old(bpage)) {
2166
 
                buf_pool_mutex_enter(buf_pool);
2167
 
                buf_LRU_make_block_young(bpage);
2168
 
                buf_pool_mutex_exit(buf_pool);
2169
 
        } else if (!access_time) {
2170
 
                ulint   time_ms = ut_time_ms();
2171
 
                buf_pool_mutex_enter(buf_pool);
2172
 
                buf_page_set_accessed(bpage, time_ms);
2173
 
                buf_pool_mutex_exit(buf_pool);
2174
 
        }
2175
 
}
2176
 
 
2177
 
/********************************************************************//**
2178
 
Resets the check_index_page_at_flush field of a page if found in the buffer
2179
 
pool. */
2180
 
UNIV_INTERN
2181
 
void
2182
 
buf_reset_check_index_page_at_flush(
2183
 
/*================================*/
2184
 
        ulint   space,  /*!< in: space id */
2185
 
        ulint   offset) /*!< in: page number */
2186
 
{
2187
 
        buf_block_t*    block;
2188
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2189
 
 
2190
 
        buf_pool_mutex_enter(buf_pool);
2191
 
 
2192
 
        block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2193
 
 
2194
 
        if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
2195
 
                ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2196
 
                block->check_index_page_at_flush = FALSE;
2197
 
        }
2198
 
 
2199
 
        buf_pool_mutex_exit(buf_pool);
2200
 
}
2201
 
 
2202
 
/********************************************************************//**
2203
 
Returns the current state of is_hashed of a page. FALSE if the page is
2204
 
not in the pool. NOTE that this operation does not fix the page in the
2205
 
pool if it is found there.
2206
 
@return TRUE if page hash index is built in search system */
2207
 
UNIV_INTERN
2208
 
ibool
2209
 
buf_page_peek_if_search_hashed(
2210
 
/*===========================*/
2211
 
        ulint   space,  /*!< in: space id */
2212
 
        ulint   offset) /*!< in: page number */
2213
 
{
2214
 
        buf_block_t*    block;
2215
 
        ibool           is_hashed;
2216
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2217
 
 
2218
 
        buf_pool_mutex_enter(buf_pool);
2219
 
 
2220
 
        block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2221
 
 
2222
 
        if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2223
 
                is_hashed = FALSE;
2224
 
        } else {
2225
 
                ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2226
 
                is_hashed = block->is_hashed;
2227
 
        }
2228
 
 
2229
 
        buf_pool_mutex_exit(buf_pool);
2230
 
 
2231
 
        return(is_hashed);
2232
 
}
2233
 
 
2234
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2235
 
/********************************************************************//**
2236
 
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
2237
 
This function should be called when we free a file page and want the
2238
 
debug version to check that it is not accessed any more unless
2239
 
reallocated.
2240
 
@return control block if found in page hash table, otherwise NULL */
2241
 
UNIV_INTERN
2242
 
buf_page_t*
2243
 
buf_page_set_file_page_was_freed(
2244
 
/*=============================*/
2245
 
        ulint   space,  /*!< in: space id */
2246
 
        ulint   offset) /*!< in: page number */
2247
 
{
2248
 
        buf_page_t*     bpage;
2249
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2250
 
 
2251
 
        buf_pool_mutex_enter(buf_pool);
2252
 
 
2253
 
        bpage = buf_page_hash_get(buf_pool, space, offset);
2254
 
 
2255
 
        if (bpage) {
2256
 
                ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2257
 
                bpage->file_page_was_freed = TRUE;
2258
 
        }
2259
 
 
2260
 
        buf_pool_mutex_exit(buf_pool);
2261
 
 
2262
 
        return(bpage);
2263
 
}
2264
 
 
2265
 
/********************************************************************//**
2266
 
Sets file_page_was_freed FALSE if the page is found in the buffer pool.
2267
 
This function should be called when we free a file page and want the
2268
 
debug version to check that it is not accessed any more unless
2269
 
reallocated.
2270
 
@return control block if found in page hash table, otherwise NULL */
2271
 
UNIV_INTERN
2272
 
buf_page_t*
2273
 
buf_page_reset_file_page_was_freed(
2274
 
/*===============================*/
2275
 
        ulint   space,  /*!< in: space id */
2276
 
        ulint   offset) /*!< in: page number */
2277
 
{
2278
 
        buf_page_t*     bpage;
2279
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2280
 
 
2281
 
        buf_pool_mutex_enter(buf_pool);
2282
 
 
2283
 
        bpage = buf_page_hash_get(buf_pool, space, offset);
2284
 
 
2285
 
        if (bpage) {
2286
 
                ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2287
 
                bpage->file_page_was_freed = FALSE;
2288
 
        }
2289
 
 
2290
 
        buf_pool_mutex_exit(buf_pool);
2291
 
 
2292
 
        return(bpage);
2293
 
}
2294
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
2295
 
 
2296
 
/********************************************************************//**
2297
 
Get read access to a compressed page (usually of type
2298
 
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
2299
 
The page must be released with buf_page_release_zip().
2300
 
NOTE: the page is not protected by any latch.  Mutual exclusion has to
2301
 
be implemented at a higher level.  In other words, all possible
2302
 
accesses to a given page through this function must be protected by
2303
 
the same set of mutexes or latches.
2304
 
@return pointer to the block */
2305
 
UNIV_INTERN
2306
 
buf_page_t*
2307
 
buf_page_get_zip(
2308
 
/*=============*/
2309
 
        ulint           space,  /*!< in: space id */
2310
 
        ulint           zip_size,/*!< in: compressed page size */
2311
 
        ulint           offset) /*!< in: page number */
2312
 
{
2313
 
        buf_page_t*     bpage;
2314
 
        mutex_t*        block_mutex;
2315
 
        ibool           must_read;
2316
 
        unsigned        access_time;
2317
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2318
 
 
2319
 
#ifndef UNIV_LOG_DEBUG
2320
 
        ut_ad(!ibuf_inside());
2321
 
#endif
2322
 
        buf_pool->stat.n_page_gets++;
2323
 
 
2324
 
        for (;;) {
2325
 
                buf_pool_mutex_enter(buf_pool);
2326
 
lookup:
2327
 
                bpage = buf_page_hash_get(buf_pool, space, offset);
2328
 
                if (bpage) {
2329
 
                        ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2330
 
                        break;
2331
 
                }
2332
 
 
2333
 
                /* Page not in buf_pool: needs to be read from file */
2334
 
 
2335
 
                buf_pool_mutex_exit(buf_pool);
2336
 
 
2337
 
                buf_read_page(space, zip_size, offset);
2338
 
 
2339
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2340
 
                ut_a(++buf_dbg_counter % 37 || buf_validate());
2341
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2342
 
        }
2343
 
 
2344
 
        if (UNIV_UNLIKELY(!bpage->zip.data)) {
2345
 
                /* There is no compressed page. */
2346
 
err_exit:
2347
 
                buf_pool_mutex_exit(buf_pool);
2348
 
                return(NULL);
2349
 
        }
2350
 
 
2351
 
        ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2352
 
 
2353
 
        switch (buf_page_get_state(bpage)) {
2354
 
        case BUF_BLOCK_NOT_USED:
2355
 
        case BUF_BLOCK_READY_FOR_USE:
2356
 
        case BUF_BLOCK_MEMORY:
2357
 
        case BUF_BLOCK_REMOVE_HASH:
2358
 
        case BUF_BLOCK_ZIP_FREE:
2359
 
                break;
2360
 
        case BUF_BLOCK_ZIP_PAGE:
2361
 
        case BUF_BLOCK_ZIP_DIRTY:
2362
 
                block_mutex = &buf_pool->zip_mutex;
2363
 
                mutex_enter(block_mutex);
2364
 
                bpage->buf_fix_count++;
2365
 
                goto got_block;
2366
 
        case BUF_BLOCK_FILE_PAGE:
2367
 
                block_mutex = &((buf_block_t*) bpage)->mutex;
2368
 
                mutex_enter(block_mutex);
2369
 
 
2370
 
                /* Discard the uncompressed page frame if possible. */
2371
 
                if (buf_LRU_free_block(bpage, FALSE, NULL)
2372
 
                    == BUF_LRU_FREED) {
2373
 
 
2374
 
                        mutex_exit(block_mutex);
2375
 
                        goto lookup;
2376
 
                }
2377
 
 
2378
 
                buf_block_buf_fix_inc((buf_block_t*) bpage,
2379
 
                                      __FILE__, __LINE__);
2380
 
                goto got_block;
2381
 
        }
2382
 
 
2383
 
        ut_error;
2384
 
        goto err_exit;
2385
 
 
2386
 
got_block:
2387
 
        must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
2388
 
        access_time = buf_page_is_accessed(bpage);
2389
 
 
2390
 
        buf_pool_mutex_exit(buf_pool);
2391
 
 
2392
 
        mutex_exit(block_mutex);
2393
 
 
2394
 
        buf_page_set_accessed_make_young(bpage, access_time);
2395
 
 
2396
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2397
 
        ut_a(!bpage->file_page_was_freed);
2398
 
#endif
2399
 
 
2400
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2401
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
2402
 
        ut_a(bpage->buf_fix_count > 0);
2403
 
        ut_a(buf_page_in_file(bpage));
2404
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2405
 
 
2406
 
        if (must_read) {
2407
 
                /* Let us wait until the read operation
2408
 
                completes */
2409
 
 
2410
 
                for (;;) {
2411
 
                        enum buf_io_fix io_fix;
2412
 
 
2413
 
                        mutex_enter(block_mutex);
2414
 
                        io_fix = buf_page_get_io_fix(bpage);
2415
 
                        mutex_exit(block_mutex);
2416
 
 
2417
 
                        if (io_fix == BUF_IO_READ) {
2418
 
 
2419
 
                                os_thread_sleep(WAIT_FOR_READ);
2420
 
                        } else {
2421
 
                                break;
2422
 
                        }
2423
 
                }
2424
 
        }
2425
 
 
2426
 
#ifdef UNIV_IBUF_COUNT_DEBUG
2427
 
        ut_a(ibuf_count_get(buf_page_get_space(bpage),
2428
 
                            buf_page_get_page_no(bpage)) == 0);
2429
 
#endif
2430
 
        return(bpage);
2431
 
}
2432
 
 
2433
 
/********************************************************************//**
2434
 
Initialize some fields of a control block. */
2435
 
UNIV_INLINE
2436
 
void
2437
 
buf_block_init_low(
2438
 
/*===============*/
2439
 
        buf_block_t*    block)  /*!< in: block to init */
2440
 
{
2441
 
        block->check_index_page_at_flush = FALSE;
2442
 
        block->index            = NULL;
2443
 
 
2444
 
        block->n_hash_helps     = 0;
2445
 
        block->is_hashed        = FALSE;
2446
 
        block->n_fields         = 1;
2447
 
        block->n_bytes          = 0;
2448
 
        block->left_side        = TRUE;
2449
 
}
2450
 
#endif /* !UNIV_HOTBACKUP */
2451
 
 
2452
 
/********************************************************************//**
2453
 
Decompress a block.
2454
 
@return TRUE if successful */
2455
 
UNIV_INTERN
2456
 
ibool
2457
 
buf_zip_decompress(
2458
 
/*===============*/
2459
 
        buf_block_t*    block,  /*!< in/out: block */
2460
 
        ibool           check)  /*!< in: TRUE=verify the page checksum */
2461
 
{
2462
 
        const byte*     frame           = block->page.zip.data;
2463
 
        ulint           stamp_checksum  = mach_read_from_4(
2464
 
                frame + FIL_PAGE_SPACE_OR_CHKSUM);
2465
 
 
2466
 
        ut_ad(buf_block_get_zip_size(block));
2467
 
        ut_a(buf_block_get_space(block) != 0);
2468
 
 
2469
 
        if (UNIV_LIKELY(check && stamp_checksum != BUF_NO_CHECKSUM_MAGIC)) {
2470
 
                ulint   calc_checksum   = page_zip_calc_checksum(
2471
 
                        frame, page_zip_get_size(&block->page.zip));
2472
 
 
2473
 
                if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
2474
 
                        ut_print_timestamp(stderr);
2475
 
                        fprintf(stderr,
2476
 
                                "  InnoDB: compressed page checksum mismatch"
2477
 
                                " (space %u page %u): %lu != %lu\n",
2478
 
                                block->page.space, block->page.offset,
2479
 
                                stamp_checksum, calc_checksum);
2480
 
                        return(FALSE);
2481
 
                }
2482
 
        }
2483
 
 
2484
 
        switch (fil_page_get_type(frame)) {
2485
 
        case FIL_PAGE_INDEX:
2486
 
                if (page_zip_decompress(&block->page.zip,
2487
 
                                        block->frame, TRUE)) {
2488
 
                        return(TRUE);
2489
 
                }
2490
 
 
2491
 
                fprintf(stderr,
2492
 
                        "InnoDB: unable to decompress space %lu page %lu\n",
2493
 
                        (ulong) block->page.space,
2494
 
                        (ulong) block->page.offset);
2495
 
                return(FALSE);
2496
 
 
2497
 
        case FIL_PAGE_TYPE_ALLOCATED:
2498
 
        case FIL_PAGE_INODE:
2499
 
        case FIL_PAGE_IBUF_BITMAP:
2500
 
        case FIL_PAGE_TYPE_FSP_HDR:
2501
 
        case FIL_PAGE_TYPE_XDES:
2502
 
        case FIL_PAGE_TYPE_ZBLOB:
2503
 
        case FIL_PAGE_TYPE_ZBLOB2:
2504
 
                /* Copy to uncompressed storage. */
2505
 
                memcpy(block->frame, frame,
2506
 
                       buf_block_get_zip_size(block));
2507
 
                return(TRUE);
2508
 
        }
2509
 
 
2510
 
        ut_print_timestamp(stderr);
2511
 
        fprintf(stderr,
2512
 
                "  InnoDB: unknown compressed page"
2513
 
                " type %lu\n",
2514
 
                fil_page_get_type(frame));
2515
 
        return(FALSE);
2516
 
}
2517
 
 
2518
 
#ifndef UNIV_HOTBACKUP
2519
 
/*******************************************************************//**
2520
 
Gets the block to whose frame the pointer is pointing to if found
2521
 
in this buffer pool instance.
2522
 
@return pointer to block */
2523
 
static
2524
 
buf_block_t*
2525
 
buf_block_align_instance(
2526
 
/*=====================*/
2527
 
        buf_pool_t*     buf_pool,       /*!< in: buffer in which the block
2528
 
                                        resides */
2529
 
        const byte*     ptr)            /*!< in: pointer to a frame */
2530
 
{
2531
 
        buf_chunk_t*    chunk;
2532
 
        ulint           i;
2533
 
 
2534
 
        /* TODO: protect buf_pool->chunks with a mutex (it will
2535
 
        currently remain constant after buf_pool_init()) */
2536
 
        for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
2537
 
                lint    offs = ptr - chunk->blocks->frame;
2538
 
 
2539
 
                if (UNIV_UNLIKELY(offs < 0)) {
2540
 
 
2541
 
                        continue;
2542
 
                }
2543
 
 
2544
 
                offs >>= UNIV_PAGE_SIZE_SHIFT;
2545
 
 
2546
 
                if (UNIV_LIKELY((ulint) offs < chunk->size)) {
2547
 
                        buf_block_t*    block = &chunk->blocks[offs];
2548
 
 
2549
 
                        /* The function buf_chunk_init() invokes
2550
 
                        buf_block_init() so that block[n].frame ==
2551
 
                        block->frame + n * UNIV_PAGE_SIZE.  Check it. */
2552
 
                        ut_ad(block->frame == page_align(ptr));
2553
 
#ifdef UNIV_DEBUG
2554
 
                        /* A thread that updates these fields must
2555
 
                        hold buf_pool->mutex and block->mutex.  Acquire
2556
 
                        only the latter. */
2557
 
                        mutex_enter(&block->mutex);
2558
 
 
2559
 
                        switch (buf_block_get_state(block)) {
2560
 
                        case BUF_BLOCK_ZIP_FREE:
2561
 
                        case BUF_BLOCK_ZIP_PAGE:
2562
 
                        case BUF_BLOCK_ZIP_DIRTY:
2563
 
                                /* These types should only be used in
2564
 
                                the compressed buffer pool, whose
2565
 
                                memory is allocated from
2566
 
                                buf_pool->chunks, in UNIV_PAGE_SIZE
2567
 
                                blocks flagged as BUF_BLOCK_MEMORY. */
2568
 
                                ut_error;
2569
 
                                break;
2570
 
                        case BUF_BLOCK_NOT_USED:
2571
 
                        case BUF_BLOCK_READY_FOR_USE:
2572
 
                        case BUF_BLOCK_MEMORY:
2573
 
                                /* Some data structures contain
2574
 
                                "guess" pointers to file pages.  The
2575
 
                                file pages may have been freed and
2576
 
                                reused.  Do not complain. */
2577
 
                                break;
2578
 
                        case BUF_BLOCK_REMOVE_HASH:
2579
 
                                /* buf_LRU_block_remove_hashed_page()
2580
 
                                will overwrite the FIL_PAGE_OFFSET and
2581
 
                                FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
2582
 
                                0xff and set the state to
2583
 
                                BUF_BLOCK_REMOVE_HASH. */
2584
 
                                ut_ad(page_get_space_id(page_align(ptr))
2585
 
                                      == 0xffffffff);
2586
 
                                ut_ad(page_get_page_no(page_align(ptr))
2587
 
                                      == 0xffffffff);
2588
 
                                break;
2589
 
                        case BUF_BLOCK_FILE_PAGE:
2590
 
                                ut_ad(block->page.space
2591
 
                                      == page_get_space_id(page_align(ptr)));
2592
 
                                ut_ad(block->page.offset
2593
 
                                      == page_get_page_no(page_align(ptr)));
2594
 
                                break;
2595
 
                        }
2596
 
 
2597
 
                        mutex_exit(&block->mutex);
2598
 
#endif /* UNIV_DEBUG */
2599
 
 
2600
 
                        return(block);
2601
 
                }
2602
 
        }
2603
 
 
2604
 
        return(NULL);
2605
 
}
2606
 
 
2607
 
/*******************************************************************//**
2608
 
Gets the block to whose frame the pointer is pointing to.
2609
 
@return pointer to block, never NULL */
2610
 
UNIV_INTERN
2611
 
buf_block_t*
2612
 
buf_block_align(
2613
 
/*============*/
2614
 
        const byte*     ptr)    /*!< in: pointer to a frame */
2615
 
{
2616
 
        ulint           i;
2617
 
 
2618
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
2619
 
                buf_block_t*    block;
2620
 
 
2621
 
                block = buf_block_align_instance(
2622
 
                        buf_pool_from_array(i), ptr);
2623
 
                if (block) {
2624
 
                        return(block);
2625
 
                }
2626
 
        }
2627
 
 
2628
 
        /* The block should always be found. */
2629
 
        ut_error;
2630
 
        return(NULL);
2631
 
}
2632
 
 
2633
 
/********************************************************************//**
2634
 
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
2635
 
the buf_block_t itself or a member of it. This functions checks one of
2636
 
the buffer pool instances.
2637
 
@return TRUE if ptr belongs to a buf_block_t struct */
2638
 
static
2639
 
ibool
2640
 
buf_pointer_is_block_field_instance(
2641
 
/*================================*/
2642
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
2643
 
        const void*     ptr)            /*!< in: pointer not dereferenced */
2644
 
{
2645
 
        const buf_chunk_t*              chunk   = buf_pool->chunks;
2646
 
        const buf_chunk_t* const        echunk  = chunk + buf_pool->n_chunks;
2647
 
 
2648
 
        /* TODO: protect buf_pool->chunks with a mutex (it will
2649
 
        currently remain constant after buf_pool_init()) */
2650
 
        while (chunk < echunk) {
2651
 
                if (ptr >= (void *)chunk->blocks
2652
 
                    && ptr < (void *)(chunk->blocks + chunk->size)) {
2653
 
 
2654
 
                        return(TRUE);
2655
 
                }
2656
 
 
2657
 
                chunk++;
2658
 
        }
2659
 
 
2660
 
        return(FALSE);
2661
 
}
2662
 
 
2663
 
/********************************************************************//**
2664
 
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
2665
 
the buf_block_t itself or a member of it
2666
 
@return TRUE if ptr belongs to a buf_block_t struct */
2667
 
UNIV_INTERN
2668
 
ibool
2669
 
buf_pointer_is_block_field(
2670
 
/*=======================*/
2671
 
        const void*     ptr)    /*!< in: pointer not dereferenced */
2672
 
{
2673
 
        ulint   i;
2674
 
 
2675
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
2676
 
                ibool   found;
2677
 
 
2678
 
                found = buf_pointer_is_block_field_instance(
2679
 
                        buf_pool_from_array(i), ptr);
2680
 
                if (found) {
2681
 
                        return(TRUE);
2682
 
                }
2683
 
        }
2684
 
 
2685
 
        return(FALSE);
2686
 
}
2687
 
 
2688
 
/********************************************************************//**
2689
 
Find out if a buffer block was created by buf_chunk_init().
2690
 
@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
2691
 
static
2692
 
ibool
2693
 
buf_block_is_uncompressed(
2694
 
/*======================*/
2695
 
        buf_pool_t*             buf_pool,       /*!< in: buffer pool instance */
2696
 
        const buf_block_t*      block)          /*!< in: pointer to block,
2697
 
                                                not dereferenced */
2698
 
{
2699
 
        ut_ad(buf_pool_mutex_own(buf_pool));
2700
 
 
2701
 
        if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
2702
 
                /* The pointer should be aligned. */
2703
 
                return(FALSE);
2704
 
        }
2705
 
 
2706
 
        return(buf_pointer_is_block_field_instance(buf_pool, (void *)block));
2707
 
}
2708
 
 
2709
 
/********************************************************************//**
2710
 
This is the general function used to get access to a database page.
2711
 
@return pointer to the block or NULL */
2712
 
UNIV_INTERN
2713
 
buf_block_t*
2714
 
buf_page_get_gen(
2715
 
/*=============*/
2716
 
        ulint           space,  /*!< in: space id */
2717
 
        ulint           zip_size,/*!< in: compressed page size in bytes
2718
 
                                or 0 for uncompressed pages */
2719
 
        ulint           offset, /*!< in: page number */
2720
 
        ulint           rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
2721
 
        buf_block_t*    guess,  /*!< in: guessed block or NULL */
2722
 
        ulint           mode,   /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
2723
 
                                BUF_GET_NO_LATCH, or
2724
 
                                BUF_GET_IF_IN_POOL_OR_WATCH */
2725
 
        const char*     file,   /*!< in: file name */
2726
 
        ulint           line,   /*!< in: line where called */
2727
 
        mtr_t*          mtr)    /*!< in: mini-transaction */
2728
 
{
2729
 
        buf_block_t*    block;
2730
 
        ulint           fold;
2731
 
        unsigned        access_time;
2732
 
        ulint           fix_type;
2733
 
        ibool           must_read;
2734
 
        ulint           retries = 0;
2735
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
2736
 
 
2737
 
        ut_ad(mtr);
2738
 
        ut_ad(mtr->state == MTR_ACTIVE);
2739
 
        ut_ad((rw_latch == RW_S_LATCH)
2740
 
              || (rw_latch == RW_X_LATCH)
2741
 
              || (rw_latch == RW_NO_LATCH));
2742
 
        ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
2743
 
        ut_ad(mode == BUF_GET
2744
 
              || mode == BUF_GET_IF_IN_POOL
2745
 
              || mode == BUF_GET_NO_LATCH
2746
 
              || mode == BUF_GET_IF_IN_POOL_OR_WATCH);
2747
 
        ut_ad(zip_size == fil_space_get_zip_size(space));
2748
 
        ut_ad(ut_is_2pow(zip_size));
2749
 
#ifndef UNIV_LOG_DEBUG
2750
 
        ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
2751
 
#endif
2752
 
        buf_pool->stat.n_page_gets++;
2753
 
        fold = buf_page_address_fold(space, offset);
2754
 
loop:
2755
 
        block = guess;
2756
 
        buf_pool_mutex_enter(buf_pool);
2757
 
 
2758
 
        if (block) {
2759
 
                /* If the guess is a compressed page descriptor that
2760
 
                has been allocated by buf_buddy_alloc(), it may have
2761
 
                been invalidated by buf_buddy_relocate().  In that
2762
 
                case, block could point to something that happens to
2763
 
                contain the expected bits in block->page.  Similarly,
2764
 
                the guess may be pointing to a buffer pool chunk that
2765
 
                has been released when resizing the buffer pool. */
2766
 
 
2767
 
                if (!buf_block_is_uncompressed(buf_pool, block)
2768
 
                    || offset != block->page.offset
2769
 
                    || space != block->page.space
2770
 
                    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2771
 
 
2772
 
                        block = guess = NULL;
2773
 
                } else {
2774
 
                        ut_ad(!block->page.in_zip_hash);
2775
 
                        ut_ad(block->page.in_page_hash);
2776
 
                }
2777
 
        }
2778
 
 
2779
 
        if (block == NULL) {
2780
 
                block = (buf_block_t*) buf_page_hash_get_low(
2781
 
                        buf_pool, space, offset, fold);
2782
 
        }
2783
 
 
2784
 
loop2:
2785
 
        if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
2786
 
                block = NULL;
2787
 
        }
2788
 
 
2789
 
        if (block == NULL) {
2790
 
                /* Page not in buf_pool: needs to be read from file */
2791
 
 
2792
 
                if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2793
 
                        block = (buf_block_t*) buf_pool_watch_set(
2794
 
                                space, offset, fold);
2795
 
 
2796
 
                        if (UNIV_LIKELY_NULL(block)) {
2797
 
 
2798
 
                                goto got_block;
2799
 
                        }
2800
 
                }
2801
 
 
2802
 
                buf_pool_mutex_exit(buf_pool);
2803
 
 
2804
 
                if (mode == BUF_GET_IF_IN_POOL
2805
 
                    || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2806
 
 
2807
 
                        return(NULL);
2808
 
                }
2809
 
 
2810
 
                if (buf_read_page(space, zip_size, offset)) {
2811
 
                        retries = 0;
2812
 
                } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
2813
 
                        ++retries;
2814
 
                } else {
2815
 
                        fprintf(stderr, "InnoDB: Error: Unable"
2816
 
                                " to read tablespace %lu page no"
2817
 
                                " %lu into the buffer pool after"
2818
 
                                " %lu attempts\n"
2819
 
                                "InnoDB: The most probable cause"
2820
 
                                " of this error may be that the"
2821
 
                                " table has been corrupted.\n"
2822
 
                                "InnoDB: You can try to fix this"
2823
 
                                " problem by using"
2824
 
                                " innodb_force_recovery.\n"
2825
 
                                "InnoDB: Please see reference manual"
2826
 
                                " for more details.\n"
2827
 
                                "InnoDB: Aborting...\n",
2828
 
                                space, offset,
2829
 
                                BUF_PAGE_READ_MAX_RETRIES);
2830
 
 
2831
 
                        ut_error;
2832
 
                }
2833
 
 
2834
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2835
 
                ut_a(++buf_dbg_counter % 37 || buf_validate());
2836
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2837
 
                goto loop;
2838
 
        }
2839
 
 
2840
 
got_block:
2841
 
        ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
2842
 
 
2843
 
        must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
2844
 
 
2845
 
        if (must_read && mode == BUF_GET_IF_IN_POOL) {
2846
 
 
2847
 
                /* The page is being read to buffer pool,
2848
 
                but we cannot wait around for the read to
2849
 
                complete. */
2850
 
                buf_pool_mutex_exit(buf_pool);
2851
 
 
2852
 
                return(NULL);
2853
 
        }
2854
 
 
2855
 
        switch (buf_block_get_state(block)) {
2856
 
                buf_page_t*     bpage;
2857
 
                ibool           success;
2858
 
 
2859
 
        case BUF_BLOCK_FILE_PAGE:
2860
 
                break;
2861
 
 
2862
 
        case BUF_BLOCK_ZIP_PAGE:
2863
 
        case BUF_BLOCK_ZIP_DIRTY:
2864
 
                bpage = &block->page;
2865
 
                /* Protect bpage->buf_fix_count. */
2866
 
                mutex_enter(&buf_pool->zip_mutex);
2867
 
 
2868
 
                if (bpage->buf_fix_count
2869
 
                    || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
2870
 
                        /* This condition often occurs when the buffer
2871
 
                        is not buffer-fixed, but I/O-fixed by
2872
 
                        buf_page_init_for_read(). */
2873
 
                        mutex_exit(&buf_pool->zip_mutex);
2874
 
wait_until_unfixed:
2875
 
                        /* The block is buffer-fixed or I/O-fixed.
2876
 
                        Try again later. */
2877
 
                        buf_pool_mutex_exit(buf_pool);
2878
 
                        os_thread_sleep(WAIT_FOR_READ);
2879
 
  
2880
 
                        goto loop;
2881
 
                }
2882
 
 
2883
 
                /* Allocate an uncompressed page. */
2884
 
                buf_pool_mutex_exit(buf_pool);
2885
 
                mutex_exit(&buf_pool->zip_mutex);
2886
 
 
2887
 
                block = buf_LRU_get_free_block(buf_pool, 0);
2888
 
                ut_a(block);
2889
 
 
2890
 
                buf_pool_mutex_enter(buf_pool);
2891
 
                mutex_enter(&block->mutex);
2892
 
 
2893
 
                {
2894
 
                        buf_page_t*     hash_bpage;
2895
 
 
2896
 
                        hash_bpage = buf_page_hash_get_low(
2897
 
                                buf_pool, space, offset, fold);
2898
 
 
2899
 
                        if (UNIV_UNLIKELY(bpage != hash_bpage)) {
2900
 
                                /* The buf_pool->page_hash was modified
2901
 
                                while buf_pool->mutex was released.
2902
 
                                Free the block that was allocated. */
2903
 
 
2904
 
                                buf_LRU_block_free_non_file_page(block);
2905
 
                                mutex_exit(&block->mutex);
2906
 
 
2907
 
                                block = (buf_block_t*) hash_bpage;
2908
 
                                goto loop2;
2909
 
                        }
2910
 
                }
2911
 
 
2912
 
                if (UNIV_UNLIKELY
2913
 
                    (bpage->buf_fix_count
2914
 
                     || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
2915
 
 
2916
 
                        /* The block was buffer-fixed or I/O-fixed
2917
 
                        while buf_pool->mutex was not held by this thread.
2918
 
                        Free the block that was allocated and try again.
2919
 
                        This should be extremely unlikely. */
2920
 
 
2921
 
                        buf_LRU_block_free_non_file_page(block);
2922
 
                        mutex_exit(&block->mutex);
2923
 
 
2924
 
                        goto wait_until_unfixed;
2925
 
                }
2926
 
 
2927
 
                /* Move the compressed page from bpage to block,
2928
 
                and uncompress it. */
2929
 
 
2930
 
                mutex_enter(&buf_pool->zip_mutex);
2931
 
 
2932
 
                buf_relocate(bpage, &block->page);
2933
 
                buf_block_init_low(block);
2934
 
                block->lock_hash_val = lock_rec_hash(space, offset);
2935
 
 
2936
 
                UNIV_MEM_DESC(&block->page.zip.data,
2937
 
                              page_zip_get_size(&block->page.zip), block);
2938
 
 
2939
 
                if (buf_page_get_state(&block->page)
2940
 
                    == BUF_BLOCK_ZIP_PAGE) {
2941
 
                        UT_LIST_REMOVE(list, buf_pool->zip_clean,
2942
 
                                       &block->page);
2943
 
                        ut_ad(!block->page.in_flush_list);
2944
 
                } else {
2945
 
                        /* Relocate buf_pool->flush_list. */
2946
 
                        buf_flush_relocate_on_flush_list(bpage,
2947
 
                                                         &block->page);
2948
 
                }
2949
 
 
2950
 
                /* Buffer-fix, I/O-fix, and X-latch the block
2951
 
                for the duration of the decompression.
2952
 
                Also add the block to the unzip_LRU list. */
2953
 
                block->page.state = BUF_BLOCK_FILE_PAGE;
2954
 
 
2955
 
                /* Insert at the front of unzip_LRU list */
2956
 
                buf_unzip_LRU_add_block(block, FALSE);
2957
 
 
2958
 
                block->page.buf_fix_count = 1;
2959
 
                buf_block_set_io_fix(block, BUF_IO_READ);
2960
 
                rw_lock_x_lock_func(&block->lock, 0, file, line);
2961
 
 
2962
 
                UNIV_MEM_INVALID(bpage, sizeof *bpage);
2963
 
 
2964
 
                mutex_exit(&block->mutex);
2965
 
                mutex_exit(&buf_pool->zip_mutex);
2966
 
                buf_pool->n_pend_unzip++;
2967
 
 
2968
 
                buf_buddy_free(buf_pool, bpage, sizeof *bpage);
2969
 
 
2970
 
                buf_pool_mutex_exit(buf_pool);
2971
 
 
2972
 
                /* Decompress the page and apply buffered operations
2973
 
                while not holding buf_pool->mutex or block->mutex. */
2974
 
                success = buf_zip_decompress(block, srv_use_checksums);
2975
 
                ut_a(success);
2976
 
 
2977
 
                if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
2978
 
                        ibuf_merge_or_delete_for_page(block, space, offset,
2979
 
                                                      zip_size, TRUE);
2980
 
                }
2981
 
 
2982
 
                /* Unfix and unlatch the block. */
2983
 
                buf_pool_mutex_enter(buf_pool);
2984
 
                mutex_enter(&block->mutex);
2985
 
                block->page.buf_fix_count--;
2986
 
                buf_block_set_io_fix(block, BUF_IO_NONE);
2987
 
                mutex_exit(&block->mutex);
2988
 
                buf_pool->n_pend_unzip--;
2989
 
                rw_lock_x_unlock(&block->lock);
2990
 
 
2991
 
                break;
2992
 
 
2993
 
        case BUF_BLOCK_ZIP_FREE:
2994
 
        case BUF_BLOCK_NOT_USED:
2995
 
        case BUF_BLOCK_READY_FOR_USE:
2996
 
        case BUF_BLOCK_MEMORY:
2997
 
        case BUF_BLOCK_REMOVE_HASH:
2998
 
                ut_error;
2999
 
                break;
3000
 
        }
3001
 
 
3002
 
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3003
 
 
3004
 
        mutex_enter(&block->mutex);
3005
 
#if UNIV_WORD_SIZE == 4
3006
 
        /* On 32-bit systems, there is no padding in buf_page_t.  On
3007
 
        other systems, Valgrind could complain about uninitialized pad
3008
 
        bytes. */
3009
 
        UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
3010
 
#endif
3011
 
 
3012
 
        buf_block_buf_fix_inc(block, file, line);
3013
 
 
3014
 
        mutex_exit(&block->mutex);
3015
 
 
3016
 
        /* Check if this is the first access to the page */
3017
 
 
3018
 
        access_time = buf_page_is_accessed(&block->page);
3019
 
 
3020
 
        buf_pool_mutex_exit(buf_pool);
3021
 
 
3022
 
        buf_page_set_accessed_make_young(&block->page, access_time);
3023
 
 
3024
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3025
 
        ut_a(!block->page.file_page_was_freed);
3026
 
#endif
3027
 
 
3028
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3029
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
3030
 
        ut_a(block->page.buf_fix_count > 0);
3031
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3032
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3033
 
 
3034
 
        switch (rw_latch) {
3035
 
        case RW_NO_LATCH:
3036
 
                if (must_read) {
3037
 
                        /* Let us wait until the read operation
3038
 
                        completes */
3039
 
 
3040
 
                        for (;;) {
3041
 
                                enum buf_io_fix io_fix;
3042
 
 
3043
 
                                mutex_enter(&block->mutex);
3044
 
                                io_fix = buf_block_get_io_fix(block);
3045
 
                                mutex_exit(&block->mutex);
3046
 
 
3047
 
                                if (io_fix == BUF_IO_READ) {
3048
 
 
3049
 
                                        os_thread_sleep(WAIT_FOR_READ);
3050
 
                                } else {
3051
 
                                        break;
3052
 
                                }
3053
 
                        }
3054
 
                }
3055
 
 
3056
 
                fix_type = MTR_MEMO_BUF_FIX;
3057
 
                break;
3058
 
 
3059
 
        case RW_S_LATCH:
3060
 
                rw_lock_s_lock_func(&(block->lock), 0, file, line);
3061
 
 
3062
 
                fix_type = MTR_MEMO_PAGE_S_FIX;
3063
 
                break;
3064
 
 
3065
 
        default:
3066
 
                ut_ad(rw_latch == RW_X_LATCH);
3067
 
                rw_lock_x_lock_func(&(block->lock), 0, file, line);
3068
 
 
3069
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
3070
 
                break;
3071
 
        }
3072
 
 
3073
 
        mtr_memo_push(mtr, block, fix_type);
3074
 
 
3075
 
        if (!access_time) {
3076
 
                /* In the case of a first access, try to apply linear
3077
 
                read-ahead */
3078
 
 
3079
 
                buf_read_ahead_linear(space, zip_size, offset);
3080
 
        }
3081
 
 
3082
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3083
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
3084
 
                            buf_block_get_page_no(block)) == 0);
3085
 
#endif
3086
 
        return(block);
3087
 
}
3088
 
 
3089
 
/********************************************************************//**
3090
 
This is the general function used to get optimistic access to a database
3091
 
page.
3092
 
@return TRUE if success */
3093
 
UNIV_INTERN
3094
 
ibool
3095
 
buf_page_optimistic_get(
3096
 
/*====================*/
3097
 
        ulint           rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
3098
 
        buf_block_t*    block,  /*!< in: guessed buffer block */
3099
 
        ib_uint64_t     modify_clock,/*!< in: modify clock value if mode is
3100
 
                                ..._GUESS_ON_CLOCK */
3101
 
        const char*     file,   /*!< in: file name */
3102
 
        ulint           line,   /*!< in: line where called */
3103
 
        mtr_t*          mtr)    /*!< in: mini-transaction */
3104
 
{
3105
 
        buf_pool_t*     buf_pool;
3106
 
        unsigned        access_time;
3107
 
        ibool           success;
3108
 
        ulint           fix_type;
3109
 
 
3110
 
        ut_ad(block);
3111
 
        ut_ad(mtr);
3112
 
        ut_ad(mtr->state == MTR_ACTIVE);
3113
 
        ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3114
 
 
3115
 
        mutex_enter(&block->mutex);
3116
 
 
3117
 
        if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
3118
 
 
3119
 
                mutex_exit(&block->mutex);
3120
 
 
3121
 
                return(FALSE);
3122
 
        }
3123
 
 
3124
 
        buf_block_buf_fix_inc(block, file, line);
3125
 
 
3126
 
        mutex_exit(&block->mutex);
3127
 
 
3128
 
        /* Check if this is the first access to the page.
3129
 
        We do a dirty read on purpose, to avoid mutex contention.
3130
 
        This field is only used for heuristic purposes; it does not
3131
 
        affect correctness. */
3132
 
 
3133
 
        access_time = buf_page_is_accessed(&block->page);
3134
 
        buf_page_set_accessed_make_young(&block->page, access_time);
3135
 
 
3136
 
        ut_ad(!ibuf_inside()
3137
 
              || ibuf_page(buf_block_get_space(block),
3138
 
                           buf_block_get_zip_size(block),
3139
 
                           buf_block_get_page_no(block), NULL));
3140
 
 
3141
 
        if (rw_latch == RW_S_LATCH) {
3142
 
                success = rw_lock_s_lock_nowait(&(block->lock),
3143
 
                                                file, line);
3144
 
                fix_type = MTR_MEMO_PAGE_S_FIX;
3145
 
        } else {
3146
 
                success = rw_lock_x_lock_func_nowait(&(block->lock),
3147
 
                                                     file, line);
3148
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
3149
 
        }
3150
 
 
3151
 
        if (UNIV_UNLIKELY(!success)) {
3152
 
                mutex_enter(&block->mutex);
3153
 
                buf_block_buf_fix_dec(block);
3154
 
                mutex_exit(&block->mutex);
3155
 
 
3156
 
                return(FALSE);
3157
 
        }
3158
 
 
3159
 
        if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
3160
 
                buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3161
 
 
3162
 
                if (rw_latch == RW_S_LATCH) {
3163
 
                        rw_lock_s_unlock(&(block->lock));
3164
 
                } else {
3165
 
                        rw_lock_x_unlock(&(block->lock));
3166
 
                }
3167
 
 
3168
 
                mutex_enter(&block->mutex);
3169
 
                buf_block_buf_fix_dec(block);
3170
 
                mutex_exit(&block->mutex);
3171
 
 
3172
 
                return(FALSE);
3173
 
        }
3174
 
 
3175
 
        mtr_memo_push(mtr, block, fix_type);
3176
 
 
3177
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3178
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
3179
 
        ut_a(block->page.buf_fix_count > 0);
3180
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3181
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3182
 
 
3183
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3184
 
        ut_a(block->page.file_page_was_freed == FALSE);
3185
 
#endif
3186
 
        if (UNIV_UNLIKELY(!access_time)) {
3187
 
                /* In the case of a first access, try to apply linear
3188
 
                read-ahead */
3189
 
 
3190
 
                buf_read_ahead_linear(buf_block_get_space(block),
3191
 
                                      buf_block_get_zip_size(block),
3192
 
                                      buf_block_get_page_no(block));
3193
 
        }
3194
 
 
3195
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3196
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
3197
 
                            buf_block_get_page_no(block)) == 0);
3198
 
#endif
3199
 
        buf_pool = buf_pool_from_block(block);
3200
 
        buf_pool->stat.n_page_gets++;
3201
 
 
3202
 
        return(TRUE);
3203
 
}
3204
 
 
3205
 
/********************************************************************//**
3206
 
This is used to get access to a known database page, when no waiting can be
3207
 
done. For example, if a search in an adaptive hash index leads us to this
3208
 
frame.
3209
 
@return TRUE if success */
3210
 
UNIV_INTERN
3211
 
ibool
3212
 
buf_page_get_known_nowait(
3213
 
/*======================*/
3214
 
        ulint           rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
3215
 
        buf_block_t*    block,  /*!< in: the known page */
3216
 
        ulint           mode,   /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
3217
 
        const char*     file,   /*!< in: file name */
3218
 
        ulint           line,   /*!< in: line where called */
3219
 
        mtr_t*          mtr)    /*!< in: mini-transaction */
3220
 
{
3221
 
        buf_pool_t*     buf_pool;
3222
 
        ibool           success;
3223
 
        ulint           fix_type;
3224
 
 
3225
 
        ut_ad(mtr);
3226
 
        ut_ad(mtr->state == MTR_ACTIVE);
3227
 
        ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3228
 
 
3229
 
        mutex_enter(&block->mutex);
3230
 
 
3231
 
        if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
3232
 
                /* Another thread is just freeing the block from the LRU list
3233
 
                of the buffer pool: do not try to access this page; this
3234
 
                attempt to access the page can only come through the hash
3235
 
                index because when the buffer block state is ..._REMOVE_HASH,
3236
 
                we have already removed it from the page address hash table
3237
 
                of the buffer pool. */
3238
 
 
3239
 
                mutex_exit(&block->mutex);
3240
 
 
3241
 
                return(FALSE);
3242
 
        }
3243
 
 
3244
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3245
 
 
3246
 
        buf_block_buf_fix_inc(block, file, line);
3247
 
 
3248
 
        mutex_exit(&block->mutex);
3249
 
 
3250
 
        buf_pool = buf_pool_from_block(block);
3251
 
 
3252
 
        if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
3253
 
                buf_pool_mutex_enter(buf_pool);
3254
 
                buf_LRU_make_block_young(&block->page);
3255
 
                buf_pool_mutex_exit(buf_pool);
3256
 
        } else if (!buf_page_is_accessed(&block->page)) {
3257
 
                /* Above, we do a dirty read on purpose, to avoid
3258
 
                mutex contention.  The field buf_page_t::access_time
3259
 
                is only used for heuristic purposes.  Writes to the
3260
 
                field must be protected by mutex, however. */
3261
 
                ulint   time_ms = ut_time_ms();
3262
 
 
3263
 
                buf_pool_mutex_enter(buf_pool);
3264
 
                buf_page_set_accessed(&block->page, time_ms);
3265
 
                buf_pool_mutex_exit(buf_pool);
3266
 
        }
3267
 
 
3268
 
        ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
3269
 
 
3270
 
        if (rw_latch == RW_S_LATCH) {
3271
 
                success = rw_lock_s_lock_nowait(&(block->lock),
3272
 
                                                file, line);
3273
 
                fix_type = MTR_MEMO_PAGE_S_FIX;
3274
 
        } else {
3275
 
                success = rw_lock_x_lock_func_nowait(&(block->lock),
3276
 
                                                     file, line);
3277
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
3278
 
        }
3279
 
 
3280
 
        if (!success) {
3281
 
                mutex_enter(&block->mutex);
3282
 
                buf_block_buf_fix_dec(block);
3283
 
                mutex_exit(&block->mutex);
3284
 
 
3285
 
                return(FALSE);
3286
 
        }
3287
 
 
3288
 
        mtr_memo_push(mtr, block, fix_type);
3289
 
 
3290
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3291
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
3292
 
        ut_a(block->page.buf_fix_count > 0);
3293
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3294
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3295
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3296
 
        ut_a(block->page.file_page_was_freed == FALSE);
3297
 
#endif
3298
 
 
3299
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3300
 
        ut_a((mode == BUF_KEEP_OLD)
3301
 
             || (ibuf_count_get(buf_block_get_space(block),
3302
 
                                buf_block_get_page_no(block)) == 0));
3303
 
#endif
3304
 
        buf_pool->stat.n_page_gets++;
3305
 
 
3306
 
        return(TRUE);
3307
 
}
3308
 
 
3309
 
/*******************************************************************//**
3310
 
Given a tablespace id and page number tries to get that page. If the
3311
 
page is not in the buffer pool it is not loaded and NULL is returned.
3312
 
Suitable for using when holding the kernel mutex.
3313
 
@return pointer to a page or NULL */
3314
 
UNIV_INTERN
3315
 
const buf_block_t*
3316
 
buf_page_try_get_func(
3317
 
/*==================*/
3318
 
        ulint           space_id,/*!< in: tablespace id */
3319
 
        ulint           page_no,/*!< in: page number */
3320
 
        const char*     file,   /*!< in: file name */
3321
 
        ulint           line,   /*!< in: line where called */
3322
 
        mtr_t*          mtr)    /*!< in: mini-transaction */
3323
 
{
3324
 
        buf_block_t*    block;
3325
 
        ibool           success;
3326
 
        ulint           fix_type;
3327
 
        buf_pool_t*     buf_pool = buf_pool_get(space_id, page_no);
3328
 
 
3329
 
        ut_ad(mtr);
3330
 
        ut_ad(mtr->state == MTR_ACTIVE);
3331
 
 
3332
 
        buf_pool_mutex_enter(buf_pool);
3333
 
        block = buf_block_hash_get(buf_pool, space_id, page_no);
3334
 
 
3335
 
        if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
3336
 
                buf_pool_mutex_exit(buf_pool);
3337
 
                return(NULL);
3338
 
        }
3339
 
 
3340
 
        ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
3341
 
 
3342
 
        mutex_enter(&block->mutex);
3343
 
        buf_pool_mutex_exit(buf_pool);
3344
 
 
3345
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3346
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3347
 
        ut_a(buf_block_get_space(block) == space_id);
3348
 
        ut_a(buf_block_get_page_no(block) == page_no);
3349
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3350
 
 
3351
 
        buf_block_buf_fix_inc(block, file, line);
3352
 
        mutex_exit(&block->mutex);
3353
 
 
3354
 
        fix_type = MTR_MEMO_PAGE_S_FIX;
3355
 
        success = rw_lock_s_lock_nowait(&block->lock, file, line);
3356
 
 
3357
 
        if (!success) {
3358
 
                /* Let us try to get an X-latch. If the current thread
3359
 
                is holding an X-latch on the page, we cannot get an
3360
 
                S-latch. */
3361
 
 
3362
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
3363
 
                success = rw_lock_x_lock_func_nowait(&block->lock,
3364
 
                                                     file, line);
3365
 
        }
3366
 
 
3367
 
        if (!success) {
3368
 
                mutex_enter(&block->mutex);
3369
 
                buf_block_buf_fix_dec(block);
3370
 
                mutex_exit(&block->mutex);
3371
 
 
3372
 
                return(NULL);
3373
 
        }
3374
 
 
3375
 
        mtr_memo_push(mtr, block, fix_type);
3376
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3377
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
3378
 
        ut_a(block->page.buf_fix_count > 0);
3379
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3380
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3381
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3382
 
        ut_a(block->page.file_page_was_freed == FALSE);
3383
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3384
 
        buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3385
 
 
3386
 
        buf_pool->stat.n_page_gets++;
3387
 
 
3388
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3389
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
3390
 
                            buf_block_get_page_no(block)) == 0);
3391
 
#endif
3392
 
 
3393
 
        return(block);
3394
 
}
3395
 
 
3396
 
/********************************************************************//**
3397
 
Initialize some fields of a control block. */
3398
 
UNIV_INLINE
3399
 
void
3400
 
buf_page_init_low(
3401
 
/*==============*/
3402
 
        buf_page_t*     bpage)  /*!< in: block to init */
3403
 
{
3404
 
        bpage->flush_type = BUF_FLUSH_LRU;
3405
 
        bpage->io_fix = BUF_IO_NONE;
3406
 
        bpage->buf_fix_count = 0;
3407
 
        bpage->freed_page_clock = 0;
3408
 
        bpage->access_time = 0;
3409
 
        bpage->newest_modification = 0;
3410
 
        bpage->oldest_modification = 0;
3411
 
        HASH_INVALIDATE(bpage, hash);
3412
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3413
 
        bpage->file_page_was_freed = FALSE;
3414
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3415
 
}
3416
 
 
3417
 
/********************************************************************//**
3418
 
Inits a page to the buffer buf_pool. */
3419
 
static
3420
 
void
3421
 
buf_page_init(
3422
 
/*==========*/
3423
 
        ulint           space,  /*!< in: space id */
3424
 
        ulint           offset, /*!< in: offset of the page within space
3425
 
                                in units of a page */
3426
 
        ulint           fold,   /*!< in: buf_page_address_fold(space,offset) */
3427
 
        buf_block_t*    block)  /*!< in: block to init */
3428
 
{
3429
 
        buf_page_t*     hash_page;
3430
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
3431
 
 
3432
 
        ut_ad(buf_pool_mutex_own(buf_pool));
3433
 
        ut_ad(mutex_own(&(block->mutex)));
3434
 
        ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3435
 
 
3436
 
        /* Set the state of the block */
3437
 
        buf_block_set_file_page(block, space, offset);
3438
 
 
3439
 
#ifdef UNIV_DEBUG_VALGRIND
3440
 
        if (!space) {
3441
 
                /* Silence valid Valgrind warnings about uninitialized
3442
 
                data being written to data files.  There are some unused
3443
 
                bytes on some pages that InnoDB does not initialize. */
3444
 
                UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
3445
 
        }
3446
 
#endif /* UNIV_DEBUG_VALGRIND */
3447
 
 
3448
 
        buf_block_init_low(block);
3449
 
 
3450
 
        block->lock_hash_val = lock_rec_hash(space, offset);
3451
 
 
3452
 
        buf_page_init_low(&block->page);
3453
 
 
3454
 
        /* Insert into the hash table of file pages */
3455
 
 
3456
 
        hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3457
 
 
3458
 
        if (UNIV_LIKELY(!hash_page)) {
3459
 
        } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
3460
 
                /* Preserve the reference count. */
3461
 
                ulint   buf_fix_count = hash_page->buf_fix_count;
3462
 
 
3463
 
                ut_a(buf_fix_count > 0);
3464
 
                block->page.buf_fix_count += buf_fix_count;
3465
 
                buf_pool_watch_remove(buf_pool, fold, hash_page);
3466
 
        } else {
3467
 
                fprintf(stderr,
3468
 
                        "InnoDB: Error: page %lu %lu already found"
3469
 
                        " in the hash table: %p, %p\n",
3470
 
                        (ulong) space,
3471
 
                        (ulong) offset,
3472
 
                        (const void*) hash_page, (const void*) block);
3473
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3474
 
                mutex_exit(&block->mutex);
3475
 
                buf_pool_mutex_exit(buf_pool);
3476
 
                buf_print();
3477
 
                buf_LRU_print();
3478
 
                buf_validate();
3479
 
                buf_LRU_validate();
3480
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3481
 
                ut_error;
3482
 
        }
3483
 
 
3484
 
        ut_ad(!block->page.in_zip_hash);
3485
 
        ut_ad(!block->page.in_page_hash);
3486
 
        ut_d(block->page.in_page_hash = TRUE);
3487
 
        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
3488
 
                    fold, &block->page);
3489
 
}
3490
 
 
3491
 
/********************************************************************//**
3492
 
Function which inits a page for read to the buffer buf_pool. If the page is
3493
 
(1) already in buf_pool, or
3494
 
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
3495
 
(3) if the space is deleted or being deleted,
3496
 
then this function does nothing.
3497
 
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
3498
 
on the buffer frame. The io-handler must take care that the flag is cleared
3499
 
and the lock released later.
3500
 
@return pointer to the block or NULL */
3501
 
UNIV_INTERN
3502
 
buf_page_t*
3503
 
buf_page_init_for_read(
3504
 
/*===================*/
3505
 
        ulint*          err,    /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
3506
 
        ulint           mode,   /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
3507
 
        ulint           space,  /*!< in: space id */
3508
 
        ulint           zip_size,/*!< in: compressed page size, or 0 */
3509
 
        ibool           unzip,  /*!< in: TRUE=request uncompressed page */
3510
 
        ib_int64_t      tablespace_version,
3511
 
                                /*!< in: prevents reading from a wrong
3512
 
                                version of the tablespace in case we have done
3513
 
                                DISCARD + IMPORT */
3514
 
        ulint           offset) /*!< in: page number */
3515
 
{
3516
 
        buf_block_t*    block;
3517
 
        buf_page_t*     bpage   = NULL;
3518
 
        buf_page_t*     watch_page;
3519
 
        mtr_t           mtr;
3520
 
        ulint           fold;
3521
 
        ibool           lru     = FALSE;
3522
 
        void*           data;
3523
 
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
3524
 
 
3525
 
        ut_ad(buf_pool);
3526
 
 
3527
 
        *err = DB_SUCCESS;
3528
 
 
3529
 
        if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3530
 
                /* It is a read-ahead within an ibuf routine */
3531
 
 
3532
 
                ut_ad(!ibuf_bitmap_page(zip_size, offset));
3533
 
                ut_ad(ibuf_inside());
3534
 
 
3535
 
                mtr_start(&mtr);
3536
 
 
3537
 
                if (!recv_no_ibuf_operations
3538
 
                    && !ibuf_page(space, zip_size, offset, &mtr)) {
3539
 
 
3540
 
                        mtr_commit(&mtr);
3541
 
 
3542
 
                        return(NULL);
3543
 
                }
3544
 
        } else {
3545
 
                ut_ad(mode == BUF_READ_ANY_PAGE);
3546
 
        }
3547
 
 
3548
 
        if (zip_size && UNIV_LIKELY(!unzip)
3549
 
            && UNIV_LIKELY(!recv_recovery_is_on())) {
3550
 
                block = NULL;
3551
 
        } else {
3552
 
                block = buf_LRU_get_free_block(buf_pool, 0);
3553
 
                ut_ad(block);
3554
 
                ut_ad(buf_pool_from_block(block) == buf_pool);
3555
 
        }
3556
 
 
3557
 
        fold = buf_page_address_fold(space, offset);
3558
 
 
3559
 
        buf_pool_mutex_enter(buf_pool);
3560
 
 
3561
 
        watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3562
 
        if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
3563
 
                /* The page is already in the buffer pool. */
3564
 
                watch_page = NULL;
3565
 
err_exit:
3566
 
                if (block) {
3567
 
                        mutex_enter(&block->mutex);
3568
 
                        buf_LRU_block_free_non_file_page(block);
3569
 
                        mutex_exit(&block->mutex);
3570
 
                }
3571
 
 
3572
 
                bpage = NULL;
3573
 
                goto func_exit;
3574
 
        }
3575
 
 
3576
 
        if (fil_tablespace_deleted_or_being_deleted_in_mem(
3577
 
                    space, tablespace_version)) {
3578
 
                /* The page belongs to a space which has been
3579
 
                deleted or is being deleted. */
3580
 
                *err = DB_TABLESPACE_DELETED;
3581
 
 
3582
 
                goto err_exit;
3583
 
        }
3584
 
 
3585
 
        if (block) {
3586
 
                bpage = &block->page;
3587
 
                mutex_enter(&block->mutex);
3588
 
 
3589
 
                ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
3590
 
 
3591
 
                buf_page_init(space, offset, fold, block);
3592
 
 
3593
 
                /* The block must be put to the LRU list, to the old blocks */
3594
 
                buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3595
 
 
3596
 
                /* We set a pass-type x-lock on the frame because then
3597
 
                the same thread which called for the read operation
3598
 
                (and is running now at this point of code) can wait
3599
 
                for the read to complete by waiting for the x-lock on
3600
 
                the frame; if the x-lock were recursive, the same
3601
 
                thread would illegally get the x-lock before the page
3602
 
                read is completed.  The x-lock is cleared by the
3603
 
                io-handler thread. */
3604
 
 
3605
 
                rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
3606
 
                buf_page_set_io_fix(bpage, BUF_IO_READ);
3607
 
 
3608
 
                if (UNIV_UNLIKELY(zip_size)) {
3609
 
                        page_zip_set_size(&block->page.zip, zip_size);
3610
 
 
3611
 
                        /* buf_pool->mutex may be released and
3612
 
                        reacquired by buf_buddy_alloc().  Thus, we
3613
 
                        must release block->mutex in order not to
3614
 
                        break the latching order in the reacquisition
3615
 
                        of buf_pool->mutex.  We also must defer this
3616
 
                        operation until after the block descriptor has
3617
 
                        been added to buf_pool->LRU and
3618
 
                        buf_pool->page_hash. */
3619
 
                        mutex_exit(&block->mutex);
3620
 
                        data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3621
 
                        mutex_enter(&block->mutex);
3622
 
                        block->page.zip.data = data;
3623
 
 
3624
 
                        /* To maintain the invariant
3625
 
                        block->in_unzip_LRU_list
3626
 
                        == buf_page_belongs_to_unzip_LRU(&block->page)
3627
 
                        we have to add this block to unzip_LRU
3628
 
                        after block->page.zip.data is set. */
3629
 
                        ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
3630
 
                        buf_unzip_LRU_add_block(block, TRUE);
3631
 
                }
3632
 
 
3633
 
                mutex_exit(&block->mutex);
3634
 
        } else {
3635
 
                /* Defer buf_buddy_alloc() until after the block has
3636
 
                been found not to exist.  The buf_buddy_alloc() and
3637
 
                buf_buddy_free() calls may be expensive because of
3638
 
                buf_buddy_relocate(). */
3639
 
 
3640
 
                /* The compressed page must be allocated before the
3641
 
                control block (bpage), in order to avoid the
3642
 
                invocation of buf_buddy_relocate_block() on
3643
 
                uninitialized data. */
3644
 
                data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3645
 
                bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
3646
 
 
3647
 
                /* Initialize the buf_pool pointer. */
3648
 
                bpage->buf_pool = buf_pool;
3649
 
 
3650
 
                /* If buf_buddy_alloc() allocated storage from the LRU list,
3651
 
                it released and reacquired buf_pool->mutex.  Thus, we must
3652
 
                check the page_hash again, as it may have been modified. */
3653
 
                if (UNIV_UNLIKELY(lru)) {
3654
 
 
3655
 
                        watch_page = buf_page_hash_get_low(
3656
 
                                buf_pool, space, offset, fold);
3657
 
 
3658
 
                        if (watch_page
3659
 
                            && !buf_pool_watch_is_sentinel(buf_pool,
3660
 
                                                           watch_page)) {
3661
 
 
3662
 
                                /* The block was added by some other thread. */
3663
 
                                watch_page = NULL;
3664
 
                                buf_buddy_free(buf_pool, bpage, sizeof *bpage);
3665
 
                                buf_buddy_free(buf_pool, data, zip_size);
3666
 
 
3667
 
                                bpage = NULL;
3668
 
                                goto func_exit;
3669
 
                        }
3670
 
                }
3671
 
 
3672
 
                page_zip_des_init(&bpage->zip);
3673
 
                page_zip_set_size(&bpage->zip, zip_size);
3674
 
                bpage->zip.data = data;
3675
 
 
3676
 
                mutex_enter(&buf_pool->zip_mutex);
3677
 
                UNIV_MEM_DESC(bpage->zip.data,
3678
 
                              page_zip_get_size(&bpage->zip), bpage);
3679
 
 
3680
 
                buf_page_init_low(bpage);
3681
 
 
3682
 
                bpage->state    = BUF_BLOCK_ZIP_PAGE;
3683
 
                bpage->space    = space;
3684
 
                bpage->offset   = offset;
3685
 
 
3686
 
 
3687
 
#ifdef UNIV_DEBUG
3688
 
                bpage->in_page_hash = FALSE;
3689
 
                bpage->in_zip_hash = FALSE;
3690
 
                bpage->in_flush_list = FALSE;
3691
 
                bpage->in_free_list = FALSE;
3692
 
                bpage->in_LRU_list = FALSE;
3693
 
#endif /* UNIV_DEBUG */
3694
 
 
3695
 
                ut_d(bpage->in_page_hash = TRUE);
3696
 
 
3697
 
                if (UNIV_LIKELY_NULL(watch_page)) {
3698
 
                        /* Preserve the reference count. */
3699
 
                        ulint   buf_fix_count = watch_page->buf_fix_count;
3700
 
                        ut_a(buf_fix_count > 0);
3701
 
                        bpage->buf_fix_count += buf_fix_count;
3702
 
                        ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
3703
 
                        buf_pool_watch_remove(buf_pool, fold, watch_page);
3704
 
                }
3705
 
 
3706
 
                HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
3707
 
                            bpage);
3708
 
 
3709
 
                /* The block must be put to the LRU list, to the old blocks */
3710
 
                buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3711
 
                buf_LRU_insert_zip_clean(bpage);
3712
 
 
3713
 
                buf_page_set_io_fix(bpage, BUF_IO_READ);
3714
 
 
3715
 
                mutex_exit(&buf_pool->zip_mutex);
3716
 
        }
3717
 
 
3718
 
        buf_pool->n_pend_reads++;
3719
 
func_exit:
3720
 
        buf_pool_mutex_exit(buf_pool);
3721
 
 
3722
 
        if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3723
 
 
3724
 
                mtr_commit(&mtr);
3725
 
        }
3726
 
 
3727
 
        ut_ad(!bpage || buf_page_in_file(bpage));
3728
 
        return(bpage);
3729
 
}
3730
 
 
3731
 
/********************************************************************//**
3732
 
Initializes a page to the buffer buf_pool. The page is usually not read
3733
 
from a file even if it cannot be found in the buffer buf_pool. This is one
3734
 
of the functions which perform to a block a state transition NOT_USED =>
3735
 
FILE_PAGE (the other is buf_page_get_gen).
3736
 
@return pointer to the block, page bufferfixed */
3737
 
UNIV_INTERN
3738
 
buf_block_t*
3739
 
buf_page_create(
3740
 
/*============*/
3741
 
        ulint   space,  /*!< in: space id */
3742
 
        ulint   offset, /*!< in: offset of the page within space in units of
3743
 
                        a page */
3744
 
        ulint   zip_size,/*!< in: compressed page size, or 0 */
3745
 
        mtr_t*  mtr)    /*!< in: mini-transaction handle */
3746
 
{
3747
 
        buf_frame_t*    frame;
3748
 
        buf_block_t*    block;
3749
 
        ulint           fold;
3750
 
        buf_block_t*    free_block      = NULL;
3751
 
        ulint           time_ms         = ut_time_ms();
3752
 
        buf_pool_t*     buf_pool        = buf_pool_get(space, offset);
3753
 
 
3754
 
        ut_ad(mtr);
3755
 
        ut_ad(mtr->state == MTR_ACTIVE);
3756
 
        ut_ad(space || !zip_size);
3757
 
 
3758
 
        free_block = buf_LRU_get_free_block(buf_pool, 0);
3759
 
 
3760
 
        fold = buf_page_address_fold(space, offset);
3761
 
 
3762
 
        buf_pool_mutex_enter(buf_pool);
3763
 
 
3764
 
        block = (buf_block_t*) buf_page_hash_get_low(
3765
 
                buf_pool, space, offset, fold);
3766
 
 
3767
 
        if (block
3768
 
            && buf_page_in_file(&block->page)
3769
 
            && !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
3770
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3771
 
                ut_a(ibuf_count_get(space, offset) == 0);
3772
 
#endif
3773
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
3774
 
                block->page.file_page_was_freed = FALSE;
3775
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
3776
 
 
3777
 
                /* Page can be found in buf_pool */
3778
 
                buf_pool_mutex_exit(buf_pool);
3779
 
 
3780
 
                buf_block_free(free_block);
3781
 
 
3782
 
                return(buf_page_get_with_no_latch(space, zip_size,
3783
 
                                                  offset, mtr));
3784
 
        }
3785
 
 
3786
 
        /* If we get here, the page was not in buf_pool: init it there */
3787
 
 
3788
 
#ifdef UNIV_DEBUG
3789
 
        if (buf_debug_prints) {
3790
 
                fprintf(stderr, "Creating space %lu page %lu to buffer\n",
3791
 
                        (ulong) space, (ulong) offset);
3792
 
        }
3793
 
#endif /* UNIV_DEBUG */
3794
 
 
3795
 
        block = free_block;
3796
 
 
3797
 
        mutex_enter(&block->mutex);
3798
 
 
3799
 
        buf_page_init(space, offset, fold, block);
3800
 
 
3801
 
        /* The block must be put to the LRU list */
3802
 
        buf_LRU_add_block(&block->page, FALSE);
3803
 
 
3804
 
        buf_block_buf_fix_inc(block, __FILE__, __LINE__);
3805
 
        buf_pool->stat.n_pages_created++;
3806
 
 
3807
 
        if (zip_size) {
3808
 
                void*   data;
3809
 
                ibool   lru;
3810
 
 
3811
 
                /* Prevent race conditions during buf_buddy_alloc(),
3812
 
                which may release and reacquire buf_pool->mutex,
3813
 
                by IO-fixing and X-latching the block. */
3814
 
 
3815
 
                buf_page_set_io_fix(&block->page, BUF_IO_READ);
3816
 
                rw_lock_x_lock(&block->lock);
3817
 
 
3818
 
                page_zip_set_size(&block->page.zip, zip_size);
3819
 
                mutex_exit(&block->mutex);
3820
 
                /* buf_pool->mutex may be released and reacquired by
3821
 
                buf_buddy_alloc().  Thus, we must release block->mutex
3822
 
                in order not to break the latching order in
3823
 
                the reacquisition of buf_pool->mutex.  We also must
3824
 
                defer this operation until after the block descriptor
3825
 
                has been added to buf_pool->LRU and buf_pool->page_hash. */
3826
 
                data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3827
 
                mutex_enter(&block->mutex);
3828
 
                block->page.zip.data = data;
3829
 
 
3830
 
                /* To maintain the invariant
3831
 
                block->in_unzip_LRU_list
3832
 
                == buf_page_belongs_to_unzip_LRU(&block->page)
3833
 
                we have to add this block to unzip_LRU after
3834
 
                block->page.zip.data is set. */
3835
 
                ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
3836
 
                buf_unzip_LRU_add_block(block, FALSE);
3837
 
 
3838
 
                buf_page_set_io_fix(&block->page, BUF_IO_NONE);
3839
 
                rw_lock_x_unlock(&block->lock);
3840
 
        }
3841
 
 
3842
 
        buf_page_set_accessed(&block->page, time_ms);
3843
 
 
3844
 
        buf_pool_mutex_exit(buf_pool);
3845
 
 
3846
 
        mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
3847
 
 
3848
 
        mutex_exit(&block->mutex);
3849
 
 
3850
 
        /* Delete possible entries for the page from the insert buffer:
3851
 
        such can exist if the page belonged to an index which was dropped */
3852
 
 
3853
 
        ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
3854
 
 
3855
 
        /* Flush pages from the end of the LRU list if necessary */
3856
 
        buf_flush_free_margin(buf_pool);
3857
 
 
3858
 
        frame = block->frame;
3859
 
 
3860
 
        memset(frame + FIL_PAGE_PREV, 0xff, 4);
3861
 
        memset(frame + FIL_PAGE_NEXT, 0xff, 4);
3862
 
        mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
3863
 
 
3864
 
        /* Reset to zero the file flush lsn field in the page; if the first
3865
 
        page of an ibdata file is 'created' in this function into the buffer
3866
 
        pool then we lose the original contents of the file flush lsn stamp.
3867
 
        Then InnoDB could in a crash recovery print a big, false, corruption
3868
 
        warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
3869
 
 
3870
 
        memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
3871
 
 
3872
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3873
 
        ut_a(++buf_dbg_counter % 357 || buf_validate());
3874
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3875
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3876
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
3877
 
                            buf_block_get_page_no(block)) == 0);
3878
 
#endif
3879
 
        return(block);
3880
 
}
3881
 
 
3882
 
/********************************************************************//**
3883
 
Completes an asynchronous read or write request of a file page to or from
3884
 
the buffer pool. */
3885
 
UNIV_INTERN
3886
 
void
3887
 
buf_page_io_complete(
3888
 
/*=================*/
3889
 
        buf_page_t*     bpage)  /*!< in: pointer to the block in question */
3890
 
{
3891
 
        enum buf_io_fix io_type;
3892
 
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3893
 
        const ibool     uncompressed = (buf_page_get_state(bpage)
3894
 
                                        == BUF_BLOCK_FILE_PAGE);
3895
 
 
3896
 
        ut_a(buf_page_in_file(bpage));
3897
 
 
3898
 
        /* We do not need protect io_fix here by mutex to read
3899
 
        it because this is the only function where we can change the value
3900
 
        from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
3901
 
        ensures that this is the only thread that handles the i/o for this
3902
 
        block. */
3903
 
 
3904
 
        io_type = buf_page_get_io_fix(bpage);
3905
 
        ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
3906
 
 
3907
 
        if (io_type == BUF_IO_READ) {
3908
 
                ulint   read_page_no;
3909
 
                ulint   read_space_id;
3910
 
                byte*   frame;
3911
 
 
3912
 
                if (buf_page_get_zip_size(bpage)) {
3913
 
                        frame = bpage->zip.data;
3914
 
                        buf_pool->n_pend_unzip++;
3915
 
                        if (uncompressed
3916
 
                            && !buf_zip_decompress((buf_block_t*) bpage,
3917
 
                                                   FALSE)) {
3918
 
 
3919
 
                                buf_pool->n_pend_unzip--;
3920
 
                                goto corrupt;
3921
 
                        }
3922
 
                        buf_pool->n_pend_unzip--;
3923
 
                } else {
3924
 
                        ut_a(uncompressed);
3925
 
                        frame = ((buf_block_t*) bpage)->frame;
3926
 
                }
3927
 
 
3928
 
                /* If this page is not uninitialized and not in the
3929
 
                doublewrite buffer, then the page number and space id
3930
 
                should be the same as in block. */
3931
 
                read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
3932
 
                read_space_id = mach_read_from_4(
3933
 
                        frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
3934
 
 
3935
 
                if (bpage->space == TRX_SYS_SPACE
3936
 
                    && trx_doublewrite_page_inside(bpage->offset)) {
3937
 
 
3938
 
                        ut_print_timestamp(stderr);
3939
 
                        fprintf(stderr,
3940
 
                                "  InnoDB: Error: reading page %lu\n"
3941
 
                                "InnoDB: which is in the"
3942
 
                                " doublewrite buffer!\n",
3943
 
                                (ulong) bpage->offset);
3944
 
                } else if (!read_space_id && !read_page_no) {
3945
 
                        /* This is likely an uninitialized page. */
3946
 
                } else if ((bpage->space
3947
 
                            && bpage->space != read_space_id)
3948
 
                           || bpage->offset != read_page_no) {
3949
 
                        /* We did not compare space_id to read_space_id
3950
 
                        if bpage->space == 0, because the field on the
3951
 
                        page may contain garbage in MySQL < 4.1.1,
3952
 
                        which only supported bpage->space == 0. */
3953
 
 
3954
 
                        ut_print_timestamp(stderr);
3955
 
                        fprintf(stderr,
3956
 
                                "  InnoDB: Error: space id and page n:o"
3957
 
                                " stored in the page\n"
3958
 
                                "InnoDB: read in are %lu:%lu,"
3959
 
                                " should be %lu:%lu!\n",
3960
 
                                (ulong) read_space_id, (ulong) read_page_no,
3961
 
                                (ulong) bpage->space,
3962
 
                                (ulong) bpage->offset);
3963
 
                }
3964
 
 
3965
 
                /* From version 3.23.38 up we store the page checksum
3966
 
                to the 4 first bytes of the page end lsn field */
3967
 
 
3968
 
                if (buf_page_is_corrupted(frame,
3969
 
                                          buf_page_get_zip_size(bpage))) {
3970
 
corrupt:
3971
 
                        fprintf(stderr,
3972
 
                                "InnoDB: Database page corruption on disk"
3973
 
                                " or a failed\n"
3974
 
                                "InnoDB: file read of page %lu.\n"
3975
 
                                "InnoDB: You may have to recover"
3976
 
                                " from a backup.\n",
3977
 
                                (ulong) bpage->offset);
3978
 
                        buf_page_print(frame, buf_page_get_zip_size(bpage));
3979
 
                        fprintf(stderr,
3980
 
                                "InnoDB: Database page corruption on disk"
3981
 
                                " or a failed\n"
3982
 
                                "InnoDB: file read of page %lu.\n"
3983
 
                                "InnoDB: You may have to recover"
3984
 
                                " from a backup.\n",
3985
 
                                (ulong) bpage->offset);
3986
 
                        fputs("InnoDB: It is also possible that"
3987
 
                              " your operating\n"
3988
 
                              "InnoDB: system has corrupted its"
3989
 
                              " own file cache\n"
3990
 
                              "InnoDB: and rebooting your computer"
3991
 
                              " removes the\n"
3992
 
                              "InnoDB: error.\n"
3993
 
                              "InnoDB: If the corrupt page is an index page\n"
3994
 
                              "InnoDB: you can also try to"
3995
 
                              " fix the corruption\n"
3996
 
                              "InnoDB: by dumping, dropping,"
3997
 
                              " and reimporting\n"
3998
 
                              "InnoDB: the corrupt table."
3999
 
                              " You can use CHECK\n"
4000
 
                              "InnoDB: TABLE to scan your"
4001
 
                              " table for corruption.\n"
4002
 
                              "InnoDB: See also "
4003
 
                              REFMAN "forcing-recovery.html\n"
4004
 
                              "InnoDB: about forcing recovery.\n", stderr);
4005
 
 
4006
 
                        if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
4007
 
                                fputs("InnoDB: Ending processing because of"
4008
 
                                      " a corrupt database page.\n",
4009
 
                                      stderr);
4010
 
                                exit(1);
4011
 
                        }
4012
 
                }
4013
 
 
4014
 
                if (recv_recovery_is_on()) {
4015
 
                        /* Pages must be uncompressed for crash recovery. */
4016
 
                        ut_a(uncompressed);
4017
 
                        recv_recover_page(TRUE, (buf_block_t*) bpage);
4018
 
                }
4019
 
 
4020
 
                if (uncompressed && !recv_no_ibuf_operations) {
4021
 
                        ibuf_merge_or_delete_for_page(
4022
 
                                (buf_block_t*) bpage, bpage->space,
4023
 
                                bpage->offset, buf_page_get_zip_size(bpage),
4024
 
                                TRUE);
4025
 
                }
4026
 
        }
4027
 
 
4028
 
        buf_pool_mutex_enter(buf_pool);
4029
 
        mutex_enter(buf_page_get_mutex(bpage));
4030
 
 
4031
 
#ifdef UNIV_IBUF_COUNT_DEBUG
4032
 
        if (io_type == BUF_IO_WRITE || uncompressed) {
4033
 
                /* For BUF_IO_READ of compressed-only blocks, the
4034
 
                buffered operations will be merged by buf_page_get_gen()
4035
 
                after the block has been uncompressed. */
4036
 
                ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
4037
 
        }
4038
 
#endif
4039
 
        /* Because this thread which does the unlocking is not the same that
4040
 
        did the locking, we use a pass value != 0 in unlock, which simply
4041
 
        removes the newest lock debug record, without checking the thread
4042
 
        id. */
4043
 
 
4044
 
        buf_page_set_io_fix(bpage, BUF_IO_NONE);
4045
 
 
4046
 
        switch (io_type) {
4047
 
        case BUF_IO_READ:
4048
 
                /* NOTE that the call to ibuf may have moved the ownership of
4049
 
                the x-latch to this OS thread: do not let this confuse you in
4050
 
                debugging! */
4051
 
 
4052
 
                ut_ad(buf_pool->n_pend_reads > 0);
4053
 
                buf_pool->n_pend_reads--;
4054
 
                buf_pool->stat.n_pages_read++;
4055
 
 
4056
 
                if (uncompressed) {
4057
 
                        rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
4058
 
                                             BUF_IO_READ);
4059
 
                }
4060
 
 
4061
 
                break;
4062
 
 
4063
 
        case BUF_IO_WRITE:
4064
 
                /* Write means a flush operation: call the completion
4065
 
                routine in the flush system */
4066
 
 
4067
 
                buf_flush_write_complete(bpage);
4068
 
 
4069
 
                if (uncompressed) {
4070
 
                        rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
4071
 
                                             BUF_IO_WRITE);
4072
 
                }
4073
 
 
4074
 
                buf_pool->stat.n_pages_written++;
4075
 
 
4076
 
                break;
4077
 
 
4078
 
        default:
4079
 
                ut_error;
4080
 
        }
4081
 
 
4082
 
#ifdef UNIV_DEBUG
4083
 
        if (buf_debug_prints) {
4084
 
                fprintf(stderr, "Has %s page space %lu page no %lu\n",
4085
 
                        io_type == BUF_IO_READ ? "read" : "written",
4086
 
                        (ulong) buf_page_get_space(bpage),
4087
 
                        (ulong) buf_page_get_page_no(bpage));
4088
 
        }
4089
 
#endif /* UNIV_DEBUG */
4090
 
 
4091
 
        mutex_exit(buf_page_get_mutex(bpage));
4092
 
        buf_pool_mutex_exit(buf_pool);
4093
 
}
4094
 
 
4095
 
/*********************************************************************//**
4096
 
Asserts that all file pages in the buffer are in a replaceable state.
4097
 
@return TRUE */
4098
 
static
4099
 
ibool
4100
 
buf_all_freed_instance(
4101
 
/*===================*/
4102
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instancce */
4103
 
{
4104
 
        ulint           i;
4105
 
        buf_chunk_t*    chunk;
4106
 
 
4107
 
        ut_ad(buf_pool);
4108
 
 
4109
 
        buf_pool_mutex_enter(buf_pool);
4110
 
 
4111
 
        chunk = buf_pool->chunks;
4112
 
 
4113
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
4114
 
 
4115
 
                const buf_block_t* block = buf_chunk_not_freed(chunk);
4116
 
 
4117
 
                if (UNIV_LIKELY_NULL(block)) {
4118
 
                        fprintf(stderr,
4119
 
                                "Page %lu %lu still fixed or dirty\n",
4120
 
                                (ulong) block->page.space,
4121
 
                                (ulong) block->page.offset);
4122
 
                        ut_error;
4123
 
                }
4124
 
        }
4125
 
 
4126
 
        buf_pool_mutex_exit(buf_pool);
4127
 
 
4128
 
        return(TRUE);
4129
 
}
4130
 
 
4131
 
/*********************************************************************//**
4132
 
Invalidates file pages in one buffer pool instance */
4133
 
static
4134
 
void
4135
 
buf_pool_invalidate_instance(
4136
 
/*=========================*/
4137
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instance */
4138
 
{
4139
 
        ibool           freed;
4140
 
        enum buf_flush  i;
4141
 
 
4142
 
        buf_pool_mutex_enter(buf_pool);
4143
 
 
4144
 
        for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
4145
 
 
4146
 
                /* As this function is called during startup and
4147
 
                during redo application phase during recovery, InnoDB
4148
 
                is single threaded (apart from IO helper threads) at
4149
 
                this stage. No new write batch can be in intialization
4150
 
                stage at this point. */
4151
 
                ut_ad(buf_pool->init_flush[i] == FALSE);
4152
 
 
4153
 
                /* However, it is possible that a write batch that has
4154
 
                been posted earlier is still not complete. For buffer
4155
 
                pool invalidation to proceed we must ensure there is NO
4156
 
                write activity happening. */
4157
 
                if (buf_pool->n_flush[i] > 0) {
4158
 
                        buf_pool_mutex_exit(buf_pool);
4159
 
                        buf_flush_wait_batch_end(buf_pool, i);
4160
 
                        buf_pool_mutex_enter(buf_pool);
4161
 
                }
4162
 
        }
4163
 
 
4164
 
        buf_pool_mutex_exit(buf_pool);
4165
 
 
4166
 
        ut_ad(buf_all_freed_instance(buf_pool));
4167
 
 
4168
 
        freed = TRUE;
4169
 
 
4170
 
        while (freed) {
4171
 
                freed = buf_LRU_search_and_free_block(buf_pool, 100);
4172
 
        }
4173
 
 
4174
 
        buf_pool_mutex_enter(buf_pool);
4175
 
 
4176
 
        ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
4177
 
        ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
4178
 
 
4179
 
        buf_pool->freed_page_clock = 0;
4180
 
        buf_pool->LRU_old = NULL;
4181
 
        buf_pool->LRU_old_len = 0;
4182
 
        buf_pool->LRU_flush_ended = 0;
4183
 
 
4184
 
        memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
4185
 
        buf_refresh_io_stats(buf_pool);
4186
 
 
4187
 
        buf_pool_mutex_exit(buf_pool);
4188
 
}
4189
 
 
4190
 
/*********************************************************************//**
4191
 
Invalidates the file pages in the buffer pool when an archive recovery is
4192
 
completed. All the file pages buffered must be in a replaceable state when
4193
 
this function is called: not latched and not modified. */
4194
 
UNIV_INTERN
4195
 
void
4196
 
buf_pool_invalidate(void)
4197
 
/*=====================*/
4198
 
{
4199
 
        ulint   i;
4200
 
 
4201
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4202
 
                buf_pool_invalidate_instance(buf_pool_from_array(i));
4203
 
        }
4204
 
}
4205
 
 
4206
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4207
 
/*********************************************************************//**
4208
 
Validates data in one buffer pool instance
4209
 
@return TRUE */
4210
 
static
4211
 
ibool
4212
 
buf_pool_validate_instance(
4213
 
/*=======================*/
4214
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instance */
4215
 
{
4216
 
        buf_page_t*     b;
4217
 
        buf_chunk_t*    chunk;
4218
 
        ulint           i;
4219
 
        ulint           n_single_flush  = 0;
4220
 
        ulint           n_lru_flush     = 0;
4221
 
        ulint           n_list_flush    = 0;
4222
 
        ulint           n_lru           = 0;
4223
 
        ulint           n_flush         = 0;
4224
 
        ulint           n_free          = 0;
4225
 
        ulint           n_zip           = 0;
4226
 
 
4227
 
        ut_ad(buf_pool);
4228
 
 
4229
 
        buf_pool_mutex_enter(buf_pool);
4230
 
 
4231
 
        chunk = buf_pool->chunks;
4232
 
 
4233
 
        /* Check the uncompressed blocks. */
4234
 
 
4235
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
4236
 
 
4237
 
                ulint           j;
4238
 
                buf_block_t*    block = chunk->blocks;
4239
 
 
4240
 
                for (j = chunk->size; j--; block++) {
4241
 
 
4242
 
                        mutex_enter(&block->mutex);
4243
 
 
4244
 
                        switch (buf_block_get_state(block)) {
4245
 
                        case BUF_BLOCK_ZIP_FREE:
4246
 
                        case BUF_BLOCK_ZIP_PAGE:
4247
 
                        case BUF_BLOCK_ZIP_DIRTY:
4248
 
                                /* These should only occur on
4249
 
                                zip_clean, zip_free[], or flush_list. */
4250
 
                                ut_error;
4251
 
                                break;
4252
 
 
4253
 
                        case BUF_BLOCK_FILE_PAGE:
4254
 
                                ut_a(buf_page_hash_get(buf_pool,
4255
 
                                                       buf_block_get_space(
4256
 
                                                               block),
4257
 
                                                       buf_block_get_page_no(
4258
 
                                                               block))
4259
 
                                     == &block->page);
4260
 
 
4261
 
#ifdef UNIV_IBUF_COUNT_DEBUG
4262
 
                                ut_a(buf_page_get_io_fix(&block->page)
4263
 
                                     == BUF_IO_READ
4264
 
                                     || !ibuf_count_get(buf_block_get_space(
4265
 
                                                                block),
4266
 
                                                        buf_block_get_page_no(
4267
 
                                                                block)));
4268
 
#endif
4269
 
                                switch (buf_page_get_io_fix(&block->page)) {
4270
 
                                case BUF_IO_NONE:
4271
 
                                        break;
4272
 
 
4273
 
                                case BUF_IO_WRITE:
4274
 
                                        switch (buf_page_get_flush_type(
4275
 
                                                        &block->page)) {
4276
 
                                        case BUF_FLUSH_LRU:
4277
 
                                                n_lru_flush++;
4278
 
                                                ut_a(rw_lock_is_locked(
4279
 
                                                             &block->lock,
4280
 
                                                             RW_LOCK_SHARED));
4281
 
                                                break;
4282
 
                                        case BUF_FLUSH_LIST:
4283
 
                                                n_list_flush++;
4284
 
                                                break;
4285
 
                                        case BUF_FLUSH_SINGLE_PAGE:
4286
 
                                                n_single_flush++;
4287
 
                                                break;
4288
 
                                        default:
4289
 
                                                ut_error;
4290
 
                                        }
4291
 
 
4292
 
                                        break;
4293
 
 
4294
 
                                case BUF_IO_READ:
4295
 
 
4296
 
                                        ut_a(rw_lock_is_locked(&block->lock,
4297
 
                                                               RW_LOCK_EX));
4298
 
                                        break;
4299
 
                                }
4300
 
 
4301
 
                                n_lru++;
4302
 
                                break;
4303
 
 
4304
 
                        case BUF_BLOCK_NOT_USED:
4305
 
                                n_free++;
4306
 
                                break;
4307
 
 
4308
 
                        case BUF_BLOCK_READY_FOR_USE:
4309
 
                        case BUF_BLOCK_MEMORY:
4310
 
                        case BUF_BLOCK_REMOVE_HASH:
4311
 
                                /* do nothing */
4312
 
                                break;
4313
 
                        }
4314
 
 
4315
 
                        mutex_exit(&block->mutex);
4316
 
                }
4317
 
        }
4318
 
 
4319
 
        mutex_enter(&buf_pool->zip_mutex);
4320
 
 
4321
 
        /* Check clean compressed-only blocks. */
4322
 
 
4323
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4324
 
             b = UT_LIST_GET_NEXT(list, b)) {
4325
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
4326
 
                switch (buf_page_get_io_fix(b)) {
4327
 
                case BUF_IO_NONE:
4328
 
                        /* All clean blocks should be I/O-unfixed. */
4329
 
                        break;
4330
 
                case BUF_IO_READ:
4331
 
                        /* In buf_LRU_free_block(), we temporarily set
4332
 
                        b->io_fix = BUF_IO_READ for a newly allocated
4333
 
                        control block in order to prevent
4334
 
                        buf_page_get_gen() from decompressing the block. */
4335
 
                        break;
4336
 
                default:
4337
 
                        ut_error;
4338
 
                        break;
4339
 
                }
4340
 
 
4341
 
                /* It is OK to read oldest_modification here because
4342
 
                we have acquired buf_pool->zip_mutex above which acts
4343
 
                as the 'block->mutex' for these bpages. */
4344
 
                ut_a(!b->oldest_modification);
4345
 
                ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4346
 
 
4347
 
                n_lru++;
4348
 
                n_zip++;
4349
 
        }
4350
 
 
4351
 
        /* Check dirty blocks. */
4352
 
 
4353
 
        buf_flush_list_mutex_enter(buf_pool);
4354
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4355
 
             b = UT_LIST_GET_NEXT(list, b)) {
4356
 
                ut_ad(b->in_flush_list);
4357
 
                ut_a(b->oldest_modification);
4358
 
                n_flush++;
4359
 
 
4360
 
                switch (buf_page_get_state(b)) {
4361
 
                case BUF_BLOCK_ZIP_DIRTY:
4362
 
                        n_lru++;
4363
 
                        n_zip++;
4364
 
                        switch (buf_page_get_io_fix(b)) {
4365
 
                        case BUF_IO_NONE:
4366
 
                        case BUF_IO_READ:
4367
 
                                break;
4368
 
                        case BUF_IO_WRITE:
4369
 
                                switch (buf_page_get_flush_type(b)) {
4370
 
                                case BUF_FLUSH_LRU:
4371
 
                                        n_lru_flush++;
4372
 
                                        break;
4373
 
                                case BUF_FLUSH_LIST:
4374
 
                                        n_list_flush++;
4375
 
                                        break;
4376
 
                                case BUF_FLUSH_SINGLE_PAGE:
4377
 
                                        n_single_flush++;
4378
 
                                        break;
4379
 
                                default:
4380
 
                                        ut_error;
4381
 
                                }
4382
 
                                break;
4383
 
                        }
4384
 
                        break;
4385
 
                case BUF_BLOCK_FILE_PAGE:
4386
 
                        /* uncompressed page */
4387
 
                        break;
4388
 
                case BUF_BLOCK_ZIP_FREE:
4389
 
                case BUF_BLOCK_ZIP_PAGE:
4390
 
                case BUF_BLOCK_NOT_USED:
4391
 
                case BUF_BLOCK_READY_FOR_USE:
4392
 
                case BUF_BLOCK_MEMORY:
4393
 
                case BUF_BLOCK_REMOVE_HASH:
4394
 
                        ut_error;
4395
 
                        break;
4396
 
                }
4397
 
                ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4398
 
        }
4399
 
 
4400
 
        ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
4401
 
 
4402
 
        buf_flush_list_mutex_exit(buf_pool);
4403
 
 
4404
 
        mutex_exit(&buf_pool->zip_mutex);
4405
 
 
4406
 
        if (n_lru + n_free > buf_pool->curr_size + n_zip) {
4407
 
                fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
4408
 
                        (ulong) n_lru, (ulong) n_free,
4409
 
                        (ulong) buf_pool->curr_size, (ulong) n_zip);
4410
 
                ut_error;
4411
 
        }
4412
 
 
4413
 
        ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
4414
 
        if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
4415
 
                fprintf(stderr, "Free list len %lu, free blocks %lu\n",
4416
 
                        (ulong) UT_LIST_GET_LEN(buf_pool->free),
4417
 
                        (ulong) n_free);
4418
 
                ut_error;
4419
 
        }
4420
 
 
4421
 
        ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
4422
 
        ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
4423
 
        ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
4424
 
 
4425
 
        buf_pool_mutex_exit(buf_pool);
4426
 
 
4427
 
        ut_a(buf_LRU_validate());
4428
 
        ut_a(buf_flush_validate(buf_pool));
4429
 
 
4430
 
        return(TRUE);
4431
 
}
4432
 
 
4433
 
/*********************************************************************//**
4434
 
Validates the buffer buf_pool data structure.
4435
 
@return TRUE */
4436
 
UNIV_INTERN
4437
 
ibool
4438
 
buf_validate(void)
4439
 
/*==============*/
4440
 
{
4441
 
        ulint   i;
4442
 
 
4443
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4444
 
                buf_pool_t*     buf_pool;
4445
 
 
4446
 
                buf_pool = buf_pool_from_array(i);
4447
 
 
4448
 
                buf_pool_validate_instance(buf_pool);
4449
 
        }
4450
 
        return(TRUE);
4451
 
}
4452
 
 
4453
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
4454
 
 
4455
 
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4456
 
/*********************************************************************//**
4457
 
Prints info of the buffer buf_pool data structure for one instance. */
4458
 
static
4459
 
void
4460
 
buf_print_instance(
4461
 
/*===============*/
4462
 
        buf_pool_t*     buf_pool)
4463
 
{
4464
 
        index_id_t*     index_ids;
4465
 
        ulint*          counts;
4466
 
        ulint           size;
4467
 
        ulint           i;
4468
 
        ulint           j;
4469
 
        index_id_t      id;
4470
 
        ulint           n_found;
4471
 
        buf_chunk_t*    chunk;
4472
 
        dict_index_t*   index;
4473
 
 
4474
 
        ut_ad(buf_pool);
4475
 
 
4476
 
        size = buf_pool->curr_size;
4477
 
 
4478
 
        index_ids = mem_alloc(size * sizeof *index_ids);
4479
 
        counts = mem_alloc(sizeof(ulint) * size);
4480
 
 
4481
 
        buf_pool_mutex_enter(buf_pool);
4482
 
        buf_flush_list_mutex_enter(buf_pool);
4483
 
 
4484
 
        fprintf(stderr,
4485
 
                "buf_pool size %lu\n"
4486
 
                "database pages %lu\n"
4487
 
                "free pages %lu\n"
4488
 
                "modified database pages %lu\n"
4489
 
                "n pending decompressions %lu\n"
4490
 
                "n pending reads %lu\n"
4491
 
                "n pending flush LRU %lu list %lu single page %lu\n"
4492
 
                "pages made young %lu, not young %lu\n"
4493
 
                "pages read %lu, created %lu, written %lu\n",
4494
 
                (ulong) size,
4495
 
                (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4496
 
                (ulong) UT_LIST_GET_LEN(buf_pool->free),
4497
 
                (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4498
 
                (ulong) buf_pool->n_pend_unzip,
4499
 
                (ulong) buf_pool->n_pend_reads,
4500
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
4501
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
4502
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
4503
 
                (ulong) buf_pool->stat.n_pages_made_young,
4504
 
                (ulong) buf_pool->stat.n_pages_not_made_young,
4505
 
                (ulong) buf_pool->stat.n_pages_read,
4506
 
                (ulong) buf_pool->stat.n_pages_created,
4507
 
                (ulong) buf_pool->stat.n_pages_written);
4508
 
 
4509
 
        buf_flush_list_mutex_exit(buf_pool);
4510
 
 
4511
 
        /* Count the number of blocks belonging to each index in the buffer */
4512
 
 
4513
 
        n_found = 0;
4514
 
 
4515
 
        chunk = buf_pool->chunks;
4516
 
 
4517
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
4518
 
                buf_block_t*    block           = chunk->blocks;
4519
 
                ulint           n_blocks        = chunk->size;
4520
 
 
4521
 
                for (; n_blocks--; block++) {
4522
 
                        const buf_frame_t* frame = block->frame;
4523
 
 
4524
 
                        if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
4525
 
 
4526
 
                                id = btr_page_get_index_id(frame);
4527
 
 
4528
 
                                /* Look for the id in the index_ids array */
4529
 
                                j = 0;
4530
 
 
4531
 
                                while (j < n_found) {
4532
 
 
4533
 
                                        if (index_ids[j] == id) {
4534
 
                                                counts[j]++;
4535
 
 
4536
 
                                                break;
4537
 
                                        }
4538
 
                                        j++;
4539
 
                                }
4540
 
 
4541
 
                                if (j == n_found) {
4542
 
                                        n_found++;
4543
 
                                        index_ids[j] = id;
4544
 
                                        counts[j] = 1;
4545
 
                                }
4546
 
                        }
4547
 
                }
4548
 
        }
4549
 
 
4550
 
        buf_pool_mutex_exit(buf_pool);
4551
 
 
4552
 
        for (i = 0; i < n_found; i++) {
4553
 
                index = dict_index_get_if_in_cache(index_ids[i]);
4554
 
 
4555
 
                fprintf(stderr,
4556
 
                        "Block count for index %llu in buffer is about %lu",
4557
 
                        (ullint) index_ids[i],
4558
 
                        (ulong) counts[i]);
4559
 
 
4560
 
                if (index) {
4561
 
                        putc(' ', stderr);
4562
 
                        dict_index_name_print(stderr, NULL, index);
4563
 
                }
4564
 
 
4565
 
                putc('\n', stderr);
4566
 
        }
4567
 
 
4568
 
        mem_free(index_ids);
4569
 
        mem_free(counts);
4570
 
 
4571
 
        ut_a(buf_pool_validate_instance(buf_pool));
4572
 
}
4573
 
 
4574
 
/*********************************************************************//**
4575
 
Prints info of the buffer buf_pool data structure. */
4576
 
UNIV_INTERN
4577
 
void
4578
 
buf_print(void)
4579
 
/*===========*/
4580
 
{
4581
 
        ulint   i;
4582
 
 
4583
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4584
 
                buf_pool_t*     buf_pool;
4585
 
 
4586
 
                buf_pool = buf_pool_from_array(i);
4587
 
                buf_print_instance(buf_pool);
4588
 
        }
4589
 
}
4590
 
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
4591
 
 
4592
 
#ifdef UNIV_DEBUG
4593
 
/*********************************************************************//**
4594
 
Returns the number of latched pages in the buffer pool.
4595
 
@return number of latched pages */
4596
 
UNIV_INTERN
4597
 
ulint
4598
 
buf_get_latched_pages_number_instance(
4599
 
/*==================================*/
4600
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instance */
4601
 
{
4602
 
        buf_page_t*     b;
4603
 
        ulint           i;
4604
 
        buf_chunk_t*    chunk;
4605
 
        ulint           fixed_pages_number = 0;
4606
 
 
4607
 
        buf_pool_mutex_enter(buf_pool);
4608
 
 
4609
 
        chunk = buf_pool->chunks;
4610
 
 
4611
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
4612
 
                buf_block_t*    block;
4613
 
                ulint           j;
4614
 
 
4615
 
                block = chunk->blocks;
4616
 
 
4617
 
                for (j = chunk->size; j--; block++) {
4618
 
                        if (buf_block_get_state(block)
4619
 
                            != BUF_BLOCK_FILE_PAGE) {
4620
 
 
4621
 
                                continue;
4622
 
                        }
4623
 
 
4624
 
                        mutex_enter(&block->mutex);
4625
 
 
4626
 
                        if (block->page.buf_fix_count != 0
4627
 
                            || buf_page_get_io_fix(&block->page)
4628
 
                            != BUF_IO_NONE) {
4629
 
                                fixed_pages_number++;
4630
 
                        }
4631
 
 
4632
 
                        mutex_exit(&block->mutex);
4633
 
                }
4634
 
        }
4635
 
 
4636
 
        mutex_enter(&buf_pool->zip_mutex);
4637
 
 
4638
 
        /* Traverse the lists of clean and dirty compressed-only blocks. */
4639
 
 
4640
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4641
 
             b = UT_LIST_GET_NEXT(list, b)) {
4642
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
4643
 
                ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
4644
 
 
4645
 
                if (b->buf_fix_count != 0
4646
 
                    || buf_page_get_io_fix(b) != BUF_IO_NONE) {
4647
 
                        fixed_pages_number++;
4648
 
                }
4649
 
        }
4650
 
 
4651
 
        buf_flush_list_mutex_enter(buf_pool);
4652
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4653
 
             b = UT_LIST_GET_NEXT(list, b)) {
4654
 
                ut_ad(b->in_flush_list);
4655
 
 
4656
 
                switch (buf_page_get_state(b)) {
4657
 
                case BUF_BLOCK_ZIP_DIRTY:
4658
 
                        if (b->buf_fix_count != 0
4659
 
                            || buf_page_get_io_fix(b) != BUF_IO_NONE) {
4660
 
                                fixed_pages_number++;
4661
 
                        }
4662
 
                        break;
4663
 
                case BUF_BLOCK_FILE_PAGE:
4664
 
                        /* uncompressed page */
4665
 
                        break;
4666
 
                case BUF_BLOCK_ZIP_FREE:
4667
 
                case BUF_BLOCK_ZIP_PAGE:
4668
 
                case BUF_BLOCK_NOT_USED:
4669
 
                case BUF_BLOCK_READY_FOR_USE:
4670
 
                case BUF_BLOCK_MEMORY:
4671
 
                case BUF_BLOCK_REMOVE_HASH:
4672
 
                        ut_error;
4673
 
                        break;
4674
 
                }
4675
 
        }
4676
 
 
4677
 
        buf_flush_list_mutex_exit(buf_pool);
4678
 
        mutex_exit(&buf_pool->zip_mutex);
4679
 
        buf_pool_mutex_exit(buf_pool);
4680
 
 
4681
 
        return(fixed_pages_number);
4682
 
}
4683
 
 
4684
 
/*********************************************************************//**
4685
 
Returns the number of latched pages in all the buffer pools.
4686
 
@return number of latched pages */
4687
 
UNIV_INTERN
4688
 
ulint
4689
 
buf_get_latched_pages_number(void)
4690
 
/*==============================*/
4691
 
{
4692
 
        ulint   i;
4693
 
        ulint   total_latched_pages = 0;
4694
 
 
4695
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4696
 
                buf_pool_t*     buf_pool;
4697
 
 
4698
 
                buf_pool = buf_pool_from_array(i);
4699
 
 
4700
 
                total_latched_pages += buf_get_latched_pages_number_instance(
4701
 
                        buf_pool);
4702
 
        }
4703
 
 
4704
 
        return(total_latched_pages);
4705
 
}
4706
 
 
4707
 
#endif /* UNIV_DEBUG */
4708
 
 
4709
 
/*********************************************************************//**
4710
 
Returns the number of pending buf pool ios.
4711
 
@return number of pending I/O operations */
4712
 
UNIV_INTERN
4713
 
ulint
4714
 
buf_get_n_pending_ios(void)
4715
 
/*=======================*/
4716
 
{
4717
 
        ulint   i;
4718
 
        ulint   pend_ios = 0;
4719
 
 
4720
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4721
 
                buf_pool_t*     buf_pool;
4722
 
 
4723
 
                buf_pool = buf_pool_from_array(i);
4724
 
 
4725
 
                pend_ios +=
4726
 
                        buf_pool->n_pend_reads
4727
 
                        + buf_pool->n_flush[BUF_FLUSH_LRU]
4728
 
                        + buf_pool->n_flush[BUF_FLUSH_LIST]
4729
 
                        + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
4730
 
        }
4731
 
 
4732
 
        return(pend_ios);
4733
 
}
4734
 
 
4735
 
/*********************************************************************//**
4736
 
Returns the ratio in percents of modified pages in the buffer pool /
4737
 
database pages in the buffer pool.
4738
 
@return modified page percentage ratio */
4739
 
UNIV_INTERN
4740
 
ulint
4741
 
buf_get_modified_ratio_pct(void)
4742
 
/*============================*/
4743
 
{
4744
 
        ulint           ratio;
4745
 
        ulint           lru_len = 0;
4746
 
        ulint           free_len = 0;
4747
 
        ulint           flush_list_len = 0;
4748
 
 
4749
 
        buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
4750
 
 
4751
 
        ratio = (100 * flush_list_len) / (1 + lru_len + free_len);
4752
 
  
4753
 
        /* 1 + is there to avoid division by zero */
4754
 
 
4755
 
        return(ratio);
4756
 
}
4757
 
 
4758
 
/*********************************************************************//**
4759
 
Prints info of the buffer i/o. */
4760
 
static
4761
 
void
4762
 
buf_print_io_instance(
4763
 
/*==================*/
4764
 
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
4765
 
        FILE*           file)           /*!< in/out: buffer where to print */
4766
 
{
4767
 
        time_t  current_time;
4768
 
        double  time_elapsed;
4769
 
        ulint   n_gets_diff;
4770
 
 
4771
 
        ut_ad(buf_pool);
4772
 
 
4773
 
        buf_pool_mutex_enter(buf_pool);
4774
 
        buf_flush_list_mutex_enter(buf_pool);
4775
 
 
4776
 
        fprintf(file,
4777
 
                "Buffer pool size   %lu\n"
4778
 
                "Free buffers       %lu\n"
4779
 
                "Database pages     %lu\n"
4780
 
                "Old database pages %lu\n"
4781
 
                "Modified db pages  %lu\n"
4782
 
                "Pending reads %lu\n"
4783
 
                "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
4784
 
                (ulong) buf_pool->curr_size,
4785
 
                (ulong) UT_LIST_GET_LEN(buf_pool->free),
4786
 
                (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4787
 
                (ulong) buf_pool->LRU_old_len,
4788
 
                (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4789
 
                (ulong) buf_pool->n_pend_reads,
4790
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
4791
 
                + buf_pool->init_flush[BUF_FLUSH_LRU],
4792
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
4793
 
                + buf_pool->init_flush[BUF_FLUSH_LIST],
4794
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
4795
 
 
4796
 
        buf_flush_list_mutex_exit(buf_pool);
4797
 
 
4798
 
        current_time = time(NULL);
4799
 
        time_elapsed = 0.001 + difftime(current_time,
4800
 
                                        buf_pool->last_printout_time);
4801
 
 
4802
 
        fprintf(file,
4803
 
                "Pages made young %lu, not young %lu\n"
4804
 
                "%.2f youngs/s, %.2f non-youngs/s\n"
4805
 
                "Pages read %lu, created %lu, written %lu\n"
4806
 
                "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
4807
 
                (ulong) buf_pool->stat.n_pages_made_young,
4808
 
                (ulong) buf_pool->stat.n_pages_not_made_young,
4809
 
                (buf_pool->stat.n_pages_made_young
4810
 
                 - buf_pool->old_stat.n_pages_made_young)
4811
 
                / time_elapsed,
4812
 
                (buf_pool->stat.n_pages_not_made_young
4813
 
                 - buf_pool->old_stat.n_pages_not_made_young)
4814
 
                / time_elapsed,
4815
 
                (ulong) buf_pool->stat.n_pages_read,
4816
 
                (ulong) buf_pool->stat.n_pages_created,
4817
 
                (ulong) buf_pool->stat.n_pages_written,
4818
 
                (buf_pool->stat.n_pages_read
4819
 
                 - buf_pool->old_stat.n_pages_read)
4820
 
                / time_elapsed,
4821
 
                (buf_pool->stat.n_pages_created
4822
 
                 - buf_pool->old_stat.n_pages_created)
4823
 
                / time_elapsed,
4824
 
                (buf_pool->stat.n_pages_written
4825
 
                 - buf_pool->old_stat.n_pages_written)
4826
 
                / time_elapsed);
4827
 
 
4828
 
        n_gets_diff = buf_pool->stat.n_page_gets
4829
 
                    - buf_pool->old_stat.n_page_gets;
4830
 
 
4831
 
        if (n_gets_diff) {
4832
 
                fprintf(file,
4833
 
                        "Buffer pool hit rate %lu / 1000,"
4834
 
                        " young-making rate %lu / 1000 not %lu / 1000\n",
4835
 
                        (ulong)
4836
 
                        (1000 - ((1000 * (buf_pool->stat.n_pages_read
4837
 
                                          - buf_pool->old_stat.n_pages_read))
4838
 
                                 / (buf_pool->stat.n_page_gets
4839
 
                                    - buf_pool->old_stat.n_page_gets))),
4840
 
                        (ulong)
4841
 
                        (1000 * (buf_pool->stat.n_pages_made_young
4842
 
                                 - buf_pool->old_stat.n_pages_made_young)
4843
 
                         / n_gets_diff),
4844
 
                        (ulong)
4845
 
                        (1000 * (buf_pool->stat.n_pages_not_made_young
4846
 
                                 - buf_pool->old_stat.n_pages_not_made_young)
4847
 
                         / n_gets_diff));
4848
 
        } else {
4849
 
                fputs("No buffer pool page gets since the last printout\n",
4850
 
                      file);
4851
 
        }
4852
 
 
4853
 
        /* Statistics about read ahead algorithm */
4854
 
        fprintf(file, "Pages read ahead %.2f/s,"
4855
 
                " evicted without access %.2f/s\n",
4856
 
                (buf_pool->stat.n_ra_pages_read
4857
 
                - buf_pool->old_stat.n_ra_pages_read)
4858
 
                / time_elapsed,
4859
 
                (buf_pool->stat.n_ra_pages_evicted
4860
 
                - buf_pool->old_stat.n_ra_pages_evicted)
4861
 
                / time_elapsed);
4862
 
 
4863
 
        /* Print some values to help us with visualizing what is
4864
 
        happening with LRU eviction. */
4865
 
        fprintf(file,
4866
 
                "LRU len: %lu, unzip_LRU len: %lu\n"
4867
 
                "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
4868
 
                UT_LIST_GET_LEN(buf_pool->LRU),
4869
 
                UT_LIST_GET_LEN(buf_pool->unzip_LRU),
4870
 
                buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
4871
 
                buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
4872
 
 
4873
 
        buf_refresh_io_stats(buf_pool);
4874
 
        buf_pool_mutex_exit(buf_pool);
4875
 
}
4876
 
 
4877
 
/*********************************************************************//**
4878
 
Prints info of the buffer i/o. */
4879
 
UNIV_INTERN
4880
 
void
4881
 
buf_print_io(
4882
 
/*=========*/
4883
 
        FILE*   file)   /*!< in/out: buffer where to print */
4884
 
{
4885
 
        ulint   i;
4886
 
 
4887
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4888
 
                buf_pool_t*     buf_pool;
4889
 
 
4890
 
                buf_pool = buf_pool_from_array(i);
4891
 
                buf_print_io_instance(buf_pool, file);
4892
 
        }
4893
 
}
4894
 
 
4895
 
/**********************************************************************//**
4896
 
Refreshes the statistics used to print per-second averages. */
4897
 
UNIV_INTERN
4898
 
void
4899
 
buf_refresh_io_stats(
4900
 
/*=================*/
4901
 
        buf_pool_t*     buf_pool)       /*!< in: buffer pool instance */
4902
 
{
4903
 
        buf_pool->last_printout_time = ut_time();
4904
 
        buf_pool->old_stat = buf_pool->stat;
4905
 
}
4906
 
 
4907
 
/**********************************************************************//**
4908
 
Refreshes the statistics used to print per-second averages. */
4909
 
UNIV_INTERN
4910
 
void
4911
 
buf_refresh_io_stats_all(void)
4912
 
/*==========================*/
4913
 
{
4914
 
        ulint           i;
4915
 
 
4916
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4917
 
                buf_pool_t*     buf_pool;
4918
 
 
4919
 
                buf_pool = buf_pool_from_array(i);
4920
 
 
4921
 
                buf_refresh_io_stats(buf_pool);
4922
 
        }
4923
 
}
4924
 
 
4925
 
/**********************************************************************//**
4926
 
Check if all pages in all buffer pools are in a replacable state.
4927
 
@return FALSE if not */
4928
 
UNIV_INTERN
4929
 
ibool
4930
 
buf_all_freed(void)
4931
 
/*===============*/
4932
 
{
4933
 
        ulint   i;
4934
 
 
4935
 
        for (i = 0; i < srv_buf_pool_instances; i++) {
4936
 
                buf_pool_t*     buf_pool;
4937
 
 
4938
 
                buf_pool = buf_pool_from_array(i);
4939
 
 
4940
 
                if (!buf_all_freed_instance(buf_pool)) {
4941
 
                        return(FALSE);
4942
 
                }
4943
 
        }
4944
 
 
4945
 
        return(TRUE);
4946
 
}
4947
 
  
4948
 
/*********************************************************************//**
4949
 
Checks that there currently are no pending i/o-operations for the buffer
4950
 
pool.
4951
 
@return TRUE if there is no pending i/o */
4952
 
UNIV_INTERN
4953
 
ibool
4954
 
buf_pool_check_no_pending_io(void)
4955
 
/*==============================*/
4956
 
{
4957
 
        ulint           i;
4958
 
        ibool           ret = TRUE;
4959
 
 
4960
 
        buf_pool_mutex_enter_all();
4961
 
 
4962
 
        for (i = 0; i < srv_buf_pool_instances && ret; i++) {
4963
 
                const buf_pool_t*       buf_pool;
4964
 
 
4965
 
                buf_pool = buf_pool_from_array(i);
4966
 
 
4967
 
                if (buf_pool->n_pend_reads
4968
 
                    + buf_pool->n_flush[BUF_FLUSH_LRU]
4969
 
                    + buf_pool->n_flush[BUF_FLUSH_LIST]
4970
 
                    + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
4971
 
 
4972
 
                        ret = FALSE;
4973
 
                }
4974
 
        }
4975
 
 
4976
 
        buf_pool_mutex_exit_all();
4977
 
 
4978
 
        return(ret);
4979
 
}
4980
 
 
4981
 
#if 0
4982
 
Code currently not used
4983
 
/*********************************************************************//**
4984
 
Gets the current length of the free list of buffer blocks.
4985
 
@return length of the free list */
4986
 
UNIV_INTERN
4987
 
ulint
4988
 
buf_get_free_list_len(void)
4989
 
/*=======================*/
4990
 
{
4991
 
        ulint   len;
4992
 
 
4993
 
        buf_pool_mutex_enter(buf_pool);
4994
 
 
4995
 
        len = UT_LIST_GET_LEN(buf_pool->free);
4996
 
 
4997
 
        buf_pool_mutex_exit(buf_pool);
4998
 
 
4999
 
        return(len);
5000
 
}
5001
 
#endif
5002
 
 
5003
 
#else /* !UNIV_HOTBACKUP */
5004
 
/********************************************************************//**
5005
 
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
5006
 
UNIV_INTERN
5007
 
void
5008
 
buf_page_init_for_backup_restore(
5009
 
/*=============================*/
5010
 
        ulint           space,  /*!< in: space id */
5011
 
        ulint           offset, /*!< in: offset of the page within space
5012
 
                                in units of a page */
5013
 
        ulint           zip_size,/*!< in: compressed page size in bytes
5014
 
                                or 0 for uncompressed pages */
5015
 
        buf_block_t*    block)  /*!< in: block to init */
5016
 
{
5017
 
        block->page.state       = BUF_BLOCK_FILE_PAGE;
5018
 
        block->page.space       = space;
5019
 
        block->page.offset      = offset;
5020
 
 
5021
 
        page_zip_des_init(&block->page.zip);
5022
 
 
5023
 
        /* We assume that block->page.data has been allocated
5024
 
        with zip_size == UNIV_PAGE_SIZE. */
5025
 
        ut_ad(zip_size <= UNIV_PAGE_SIZE);
5026
 
        ut_ad(ut_is_2pow(zip_size));
5027
 
        page_zip_set_size(&block->page.zip, zip_size);
5028
 
        if (zip_size) {
5029
 
                block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
5030
 
        }
5031
 
}
5032
 
#endif /* !UNIV_HOTBACKUP */