~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/buf/buf0buf.c

Merged Nathan from lp:~nlws/drizzle/fix-string-c-ptr-overrun

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*   Innobase relational database engine; Copyright (C) 2001 Innobase Oy
2
 
 
3
 
     This program is free software; you can redistribute it and/or modify
4
 
     it under the terms of the GNU General Public License 2
5
 
     as published by the Free Software Foundation in June 1991.
6
 
 
7
 
     This program is distributed in the hope that it will be useful,
8
 
     but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 
     GNU General Public License for more details.
11
 
 
12
 
     You should have received a copy of the GNU General Public License 2
13
 
     along with this program (in file COPYING); if not, write to the Free
14
 
     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
15
 
/******************************************************
16
 
The database buffer buf_pool
17
 
 
18
 
(c) 1995 Innobase Oy
19
 
 
20
 
Created 11/5/1995 Heikki Tuuri
21
 
*******************************************************/
22
 
 
23
 
#include "buf0buf.h"
24
 
 
25
 
#ifdef UNIV_NONINL
26
 
#include "buf0buf.ic"
27
 
#endif
28
 
 
29
 
#include "buf0buddy.h"
30
 
#include "mem0mem.h"
31
 
#include "btr0btr.h"
32
 
#include "fil0fil.h"
33
 
#include "lock0lock.h"
34
 
#include "btr0sea.h"
35
 
#include "ibuf0ibuf.h"
36
 
#include "dict0dict.h"
37
 
#include "log0recv.h"
38
 
#include "log0log.h"
39
 
#include "trx0undo.h"
40
 
#include "srv0srv.h"
41
 
#include "page0zip.h"
42
 
 
43
 
/*
44
 
                IMPLEMENTATION OF THE BUFFER POOL
45
 
                =================================
46
 
 
47
 
Performance improvement:
48
 
------------------------
49
 
Thread scheduling in NT may be so slow that the OS wait mechanism should
50
 
not be used even in waiting for disk reads to complete.
51
 
Rather, we should put waiting query threads to the queue of
52
 
waiting jobs, and let the OS thread do something useful while the i/o
53
 
is processed. In this way we could remove most OS thread switches in
54
 
an i/o-intensive benchmark like TPC-C.
55
 
 
56
 
A possibility is to put a user space thread library between the database
57
 
and NT. User space thread libraries might be very fast.
58
 
 
59
 
SQL Server 7.0 can be configured to use 'fibers' which are lightweight
60
 
threads in NT. These should be studied.
61
 
 
62
 
                Buffer frames and blocks
63
 
                ------------------------
64
 
Following the terminology of Gray and Reuter, we call the memory
65
 
blocks where file pages are loaded buffer frames. For each buffer
66
 
frame there is a control block, or shortly, a block, in the buffer
67
 
control array. The control info which does not need to be stored
68
 
in the file along with the file page, resides in the control block.
69
 
 
70
 
                Buffer pool struct
71
 
                ------------------
72
 
The buffer buf_pool contains a single mutex which protects all the
73
 
control data structures of the buf_pool. The content of a buffer frame is
74
 
protected by a separate read-write lock in its control block, though.
75
 
These locks can be locked and unlocked without owning the buf_pool mutex.
76
 
The OS events in the buf_pool struct can be waited for without owning the
77
 
buf_pool mutex.
78
 
 
79
 
The buf_pool mutex is a hot-spot in main memory, causing a lot of
80
 
memory bus traffic on multiprocessor systems when processors
81
 
alternately access the mutex. On our Pentium, the mutex is accessed
82
 
maybe every 10 microseconds. We gave up the solution to have mutexes
83
 
for each control block, for instance, because it seemed to be
84
 
complicated.
85
 
 
86
 
A solution to reduce mutex contention of the buf_pool mutex is to
87
 
create a separate mutex for the page hash table. On Pentium,
88
 
accessing the hash table takes 2 microseconds, about half
89
 
of the total buf_pool mutex hold time.
90
 
 
91
 
                Control blocks
92
 
                --------------
93
 
 
94
 
The control block contains, for instance, the bufferfix count
95
 
which is incremented when a thread wants a file page to be fixed
96
 
in a buffer frame. The bufferfix operation does not lock the
97
 
contents of the frame, however. For this purpose, the control
98
 
block contains a read-write lock.
99
 
 
100
 
The buffer frames have to be aligned so that the start memory
101
 
address of a frame is divisible by the universal page size, which
102
 
is a power of two.
103
 
 
104
 
We intend to make the buffer buf_pool size on-line reconfigurable,
105
 
that is, the buf_pool size can be changed without closing the database.
106
 
Then the database administarator may adjust it to be bigger
107
 
at night, for example. The control block array must
108
 
contain enough control blocks for the maximum buffer buf_pool size
109
 
which is used in the particular database.
110
 
If the buf_pool size is cut, we exploit the virtual memory mechanism of
111
 
the OS, and just refrain from using frames at high addresses. Then the OS
112
 
can swap them to disk.
113
 
 
114
 
The control blocks containing file pages are put to a hash table
115
 
according to the file address of the page.
116
 
We could speed up the access to an individual page by using
117
 
"pointer swizzling": we could replace the page references on
118
 
non-leaf index pages by direct pointers to the page, if it exists
119
 
in the buf_pool. We could make a separate hash table where we could
120
 
chain all the page references in non-leaf pages residing in the buf_pool,
121
 
using the page reference as the hash key,
122
 
and at the time of reading of a page update the pointers accordingly.
123
 
Drawbacks of this solution are added complexity and,
124
 
possibly, extra space required on non-leaf pages for memory pointers.
125
 
A simpler solution is just to speed up the hash table mechanism
126
 
in the database, using tables whose size is a power of 2.
127
 
 
128
 
                Lists of blocks
129
 
                ---------------
130
 
 
131
 
There are several lists of control blocks.
132
 
 
133
 
The free list (buf_pool->free) contains blocks which are currently not
134
 
used.
135
 
 
136
 
The common LRU list contains all the blocks holding a file page
137
 
except those for which the bufferfix count is non-zero.
138
 
The pages are in the LRU list roughly in the order of the last
139
 
access to the page, so that the oldest pages are at the end of the
140
 
list. We also keep a pointer to near the end of the LRU list,
141
 
which we can use when we want to artificially age a page in the
142
 
buf_pool. This is used if we know that some page is not needed
143
 
again for some time: we insert the block right after the pointer,
144
 
causing it to be replaced sooner than would noramlly be the case.
145
 
Currently this aging mechanism is used for read-ahead mechanism
146
 
of pages, and it can also be used when there is a scan of a full
147
 
table which cannot fit in the memory. Putting the pages near the
148
 
of the LRU list, we make sure that most of the buf_pool stays in the
149
 
main memory, undisturbed.
150
 
 
151
 
The unzip_LRU list contains a subset of the common LRU list.  The
152
 
blocks on the unzip_LRU list hold a compressed file page and the
153
 
corresponding uncompressed page frame.  A block is in unzip_LRU if and
154
 
only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
155
 
holds.  The blocks in unzip_LRU will be in same order as they are in
156
 
the common LRU list.  That is, each manipulation of the common LRU
157
 
list will result in the same manipulation of the unzip_LRU list.
158
 
 
159
 
The chain of modified blocks (buf_pool->flush_list) contains the blocks
160
 
holding file pages that have been modified in the memory
161
 
but not written to disk yet. The block with the oldest modification
162
 
which has not yet been written to disk is at the end of the chain.
163
 
 
164
 
The chain of unmodified compressed blocks (buf_pool->zip_clean)
165
 
contains the control blocks (buf_page_t) of those compressed pages
166
 
that are not in buf_pool->flush_list and for which no uncompressed
167
 
page has been allocated in the buffer pool.  The control blocks for
168
 
uncompressed pages are accessible via buf_block_t objects that are
169
 
reachable via buf_pool->chunks[].
170
 
 
171
 
The chains of free memory blocks (buf_pool->zip_free[]) are used by
172
 
the buddy allocator (buf0buddy.c) to keep track of currently unused
173
 
memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2.  These
174
 
blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
175
 
BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
176
 
pool.  The buddy allocator is solely used for allocating control
177
 
blocks for compressed pages (buf_page_t) and compressed page frames.
178
 
 
179
 
                Loading a file page
180
 
                -------------------
181
 
 
182
 
First, a victim block for replacement has to be found in the
183
 
buf_pool. It is taken from the free list or searched for from the
184
 
end of the LRU-list. An exclusive lock is reserved for the frame,
185
 
the io_fix field is set in the block fixing the block in buf_pool,
186
 
and the io-operation for loading the page is queued. The io-handler thread
187
 
releases the X-lock on the frame and resets the io_fix field
188
 
when the io operation completes.
189
 
 
190
 
A thread may request the above operation using the function
191
 
buf_page_get(). It may then continue to request a lock on the frame.
192
 
The lock is granted when the io-handler releases the x-lock.
193
 
 
194
 
                Read-ahead
195
 
                ----------
196
 
 
197
 
The read-ahead mechanism is intended to be intelligent and
198
 
isolated from the semantically higher levels of the database
199
 
index management. From the higher level we only need the
200
 
information if a file page has a natural successor or
201
 
predecessor page. On the leaf level of a B-tree index,
202
 
these are the next and previous pages in the natural
203
 
order of the pages.
204
 
 
205
 
Let us first explain the read-ahead mechanism when the leafs
206
 
of a B-tree are scanned in an ascending or descending order.
207
 
When a read page is the first time referenced in the buf_pool,
208
 
the buffer manager checks if it is at the border of a so-called
209
 
linear read-ahead area. The tablespace is divided into these
210
 
areas of size 64 blocks, for example. So if the page is at the
211
 
border of such an area, the read-ahead mechanism checks if
212
 
all the other blocks in the area have been accessed in an
213
 
ascending or descending order. If this is the case, the system
214
 
looks at the natural successor or predecessor of the page,
215
 
checks if that is at the border of another area, and in this case
216
 
issues read-requests for all the pages in that area. Maybe
217
 
we could relax the condition that all the pages in the area
218
 
have to be accessed: if data is deleted from a table, there may
219
 
appear holes of unused pages in the area.
220
 
 
221
 
A different read-ahead mechanism is used when there appears
222
 
to be a random access pattern to a file.
223
 
If a new page is referenced in the buf_pool, and several pages
224
 
of its random access area (for instance, 32 consecutive pages
225
 
in a tablespace) have recently been referenced, we may predict
226
 
that the whole area may be needed in the near future, and issue
227
 
the read requests for the whole area.
228
 
*/
229
 
 
230
 
/* Value in microseconds */
231
 
static const int WAIT_FOR_READ  = 5000;
232
 
 
233
 
/* The buffer buf_pool of the database */
234
 
UNIV_INTERN buf_pool_t* buf_pool = NULL;
235
 
 
236
 
/* mutex protecting the buffer pool struct and control blocks, except the
237
 
read-write lock in them */
238
 
UNIV_INTERN mutex_t             buf_pool_mutex;
239
 
/* mutex protecting the control blocks of compressed-only pages
240
 
(of type buf_page_t, not buf_block_t) */
241
 
UNIV_INTERN mutex_t             buf_pool_zip_mutex;
242
 
 
243
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
244
 
static ulint    buf_dbg_counter = 0; /* This is used to insert validation
245
 
                                        operations in excution in the
246
 
                                        debug version */
247
 
/** Flag to forbid the release of the buffer pool mutex.
248
 
Protected by buf_pool->mutex. */
249
 
UNIV_INTERN ulint               buf_pool_mutex_exit_forbidden = 0;
250
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
251
 
#ifdef UNIV_DEBUG
252
 
/* If this is set TRUE, the program prints info whenever
253
 
read-ahead or flush occurs */
254
 
UNIV_INTERN ibool               buf_debug_prints = FALSE;
255
 
#endif /* UNIV_DEBUG */
256
 
 
257
 
/* A chunk of buffers.  The buffer pool is allocated in chunks. */
258
 
struct buf_chunk_struct{
259
 
        ulint           mem_size;       /* allocated size of the chunk */
260
 
        ulint           size;           /* size of frames[] and blocks[] */
261
 
        void*           mem;            /* pointer to the memory area which
262
 
                                        was allocated for the frames */
263
 
        buf_block_t*    blocks;         /* array of buffer control blocks */
264
 
};
265
 
 
266
 
/************************************************************************
267
 
Calculates a page checksum which is stored to the page when it is written
268
 
to a file. Note that we must be careful to calculate the same value on
269
 
32-bit and 64-bit architectures. */
270
 
UNIV_INTERN
271
 
ulint
272
 
buf_calc_page_new_checksum(
273
 
/*=======================*/
274
 
                                /* out: checksum */
275
 
        const byte*     page)   /* in: buffer page */
276
 
{
277
 
        ulint checksum;
278
 
 
279
 
        /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
280
 
        ..._ARCH_LOG_NO, are written outside the buffer pool to the first
281
 
        pages of data files, we have to skip them in the page checksum
282
 
        calculation.
283
 
        We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
284
 
        checksum is stored, and also the last 8 bytes of page because
285
 
        there we store the old formula checksum. */
286
 
 
287
 
        checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
288
 
                                  FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
289
 
                + ut_fold_binary(page + FIL_PAGE_DATA,
290
 
                                 UNIV_PAGE_SIZE - FIL_PAGE_DATA
291
 
                                 - FIL_PAGE_END_LSN_OLD_CHKSUM);
292
 
        checksum = checksum & 0xFFFFFFFFUL;
293
 
 
294
 
        return(checksum);
295
 
}
296
 
 
297
 
/************************************************************************
298
 
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
299
 
looked at the first few bytes of the page. This calculates that old
300
 
checksum.
301
 
NOTE: we must first store the new formula checksum to
302
 
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
303
 
because this takes that field as an input! */
304
 
UNIV_INTERN
305
 
ulint
306
 
buf_calc_page_old_checksum(
307
 
/*=======================*/
308
 
                                /* out: checksum */
309
 
        const byte*     page)   /* in: buffer page */
310
 
{
311
 
        ulint checksum;
312
 
 
313
 
        checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
314
 
 
315
 
        checksum = checksum & 0xFFFFFFFFUL;
316
 
 
317
 
        return(checksum);
318
 
}
319
 
 
320
 
/************************************************************************
321
 
Checks if a page is corrupt. */
322
 
UNIV_INTERN
323
 
ibool
324
 
buf_page_is_corrupted(
325
 
/*==================*/
326
 
                                        /* out: TRUE if corrupted */
327
 
        const byte*     read_buf,       /* in: a database page */
328
 
        ulint           zip_size)       /* in: size of compressed page;
329
 
                                        0 for uncompressed pages */
330
 
{
331
 
        ulint           checksum_field;
332
 
        ulint           old_checksum_field;
333
 
#ifndef UNIV_HOTBACKUP
334
 
        ib_uint64_t     current_lsn;
335
 
#endif
336
 
        if (UNIV_LIKELY(!zip_size)
337
 
            && memcmp(read_buf + FIL_PAGE_LSN + 4,
338
 
                      read_buf + UNIV_PAGE_SIZE
339
 
                      - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
340
 
 
341
 
                /* Stored log sequence numbers at the start and the end
342
 
                of page do not match */
343
 
 
344
 
                return(TRUE);
345
 
        }
346
 
 
347
 
#ifndef UNIV_HOTBACKUP
348
 
        if (recv_lsn_checks_on && log_peek_lsn(&current_lsn)) {
349
 
                if (current_lsn < mach_read_ull(read_buf + FIL_PAGE_LSN)) {
350
 
                        ut_print_timestamp(stderr);
351
 
 
352
 
                        fprintf(stderr,
353
 
                                "  InnoDB: Error: page %lu log sequence number"
354
 
                                " %"PRIu64"\n"
355
 
                                "InnoDB: is in the future! Current system "
356
 
                                "log sequence number %"PRIu64".\n"
357
 
                                "InnoDB: Your database may be corrupt or "
358
 
                                "you may have copied the InnoDB\n"
359
 
                                "InnoDB: tablespace but not the InnoDB "
360
 
                                "log files. See\n"
361
 
                                "InnoDB: http://dev.mysql.com/doc/refman/"
362
 
                                "5.1/en/forcing-recovery.html\n"
363
 
                                "InnoDB: for more information.\n",
364
 
                                (ulong) mach_read_from_4(read_buf
365
 
                                                         + FIL_PAGE_OFFSET),
366
 
                                mach_read_ull(read_buf + FIL_PAGE_LSN),
367
 
                                current_lsn);
368
 
                }
369
 
        }
370
 
#endif
371
 
 
372
 
        /* If we use checksums validation, make additional check before
373
 
        returning TRUE to ensure that the checksum is not equal to
374
 
        BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
375
 
        disabled. Otherwise, skip checksum calculation and return FALSE */
376
 
 
377
 
        if (UNIV_LIKELY(srv_use_checksums)) {
378
 
                checksum_field = mach_read_from_4(read_buf
379
 
                                                  + FIL_PAGE_SPACE_OR_CHKSUM);
380
 
 
381
 
                if (UNIV_UNLIKELY(zip_size)) {
382
 
                        return(checksum_field != BUF_NO_CHECKSUM_MAGIC
383
 
                               && checksum_field
384
 
                               != page_zip_calc_checksum(read_buf, zip_size));
385
 
                }
386
 
 
387
 
                old_checksum_field = mach_read_from_4(
388
 
                        read_buf + UNIV_PAGE_SIZE
389
 
                        - FIL_PAGE_END_LSN_OLD_CHKSUM);
390
 
 
391
 
                /* There are 2 valid formulas for old_checksum_field:
392
 
 
393
 
                1. Very old versions of InnoDB only stored 8 byte lsn to the
394
 
                start and the end of the page.
395
 
 
396
 
                2. Newer InnoDB versions store the old formula checksum
397
 
                there. */
398
 
 
399
 
                if (old_checksum_field != mach_read_from_4(read_buf
400
 
                                                           + FIL_PAGE_LSN)
401
 
                    && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
402
 
                    && old_checksum_field
403
 
                    != buf_calc_page_old_checksum(read_buf)) {
404
 
 
405
 
                        return(TRUE);
406
 
                }
407
 
 
408
 
                /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
409
 
                (always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */
410
 
 
411
 
                if (checksum_field != 0
412
 
                    && checksum_field != BUF_NO_CHECKSUM_MAGIC
413
 
                    && checksum_field
414
 
                    != buf_calc_page_new_checksum(read_buf)) {
415
 
 
416
 
                        return(TRUE);
417
 
                }
418
 
        }
419
 
 
420
 
        return(FALSE);
421
 
}
422
 
 
423
 
/************************************************************************
424
 
Prints a page to stderr. */
425
 
UNIV_INTERN
426
 
void
427
 
buf_page_print(
428
 
/*===========*/
429
 
        const byte*     read_buf,       /* in: a database page */
430
 
        ulint           zip_size)       /* in: compressed page size, or
431
 
                                0 for uncompressed pages */
432
 
{
433
 
        dict_index_t*   index;
434
 
        ulint           checksum;
435
 
        ulint           old_checksum;
436
 
        ulint           size    = zip_size;
437
 
 
438
 
        if (!size) {
439
 
                size = UNIV_PAGE_SIZE;
440
 
        }
441
 
 
442
 
        ut_print_timestamp(stderr);
443
 
        fprintf(stderr, "  InnoDB: Page dump in ascii and hex (%lu bytes):\n",
444
 
                (ulong) size);
445
 
        ut_print_buf(stderr, read_buf, size);
446
 
        fputs("InnoDB: End of page dump\n", stderr);
447
 
 
448
 
        if (zip_size) {
449
 
                /* Print compressed page. */
450
 
 
451
 
                switch (fil_page_get_type(read_buf)) {
452
 
                case FIL_PAGE_TYPE_ZBLOB:
453
 
                case FIL_PAGE_TYPE_ZBLOB2:
454
 
                        checksum = srv_use_checksums
455
 
                                ? page_zip_calc_checksum(read_buf, zip_size)
456
 
                                : BUF_NO_CHECKSUM_MAGIC;
457
 
                        ut_print_timestamp(stderr);
458
 
                        fprintf(stderr,
459
 
                                "  InnoDB: Compressed BLOB page"
460
 
                                " checksum %lu, stored %lu\n"
461
 
                                "InnoDB: Page lsn %lu %lu\n"
462
 
                                "InnoDB: Page number (if stored"
463
 
                                " to page already) %lu,\n"
464
 
                                "InnoDB: space id (if stored"
465
 
                                " to page already) %lu\n",
466
 
                                (ulong) checksum,
467
 
                                (ulong) mach_read_from_4(
468
 
                                        read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
469
 
                                (ulong) mach_read_from_4(
470
 
                                        read_buf + FIL_PAGE_LSN),
471
 
                                (ulong) mach_read_from_4(
472
 
                                        read_buf + (FIL_PAGE_LSN + 4)),
473
 
                                (ulong) mach_read_from_4(
474
 
                                        read_buf + FIL_PAGE_OFFSET),
475
 
                                (ulong) mach_read_from_4(
476
 
                                        read_buf
477
 
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
478
 
                        return;
479
 
                default:
480
 
                        ut_print_timestamp(stderr);
481
 
                        fprintf(stderr,
482
 
                                "  InnoDB: unknown page type %lu,"
483
 
                                " assuming FIL_PAGE_INDEX\n",
484
 
                                fil_page_get_type(read_buf));
485
 
                        /* fall through */
486
 
                case FIL_PAGE_INDEX:
487
 
                        checksum = srv_use_checksums
488
 
                                ? page_zip_calc_checksum(read_buf, zip_size)
489
 
                                : BUF_NO_CHECKSUM_MAGIC;
490
 
 
491
 
                        ut_print_timestamp(stderr);
492
 
                        fprintf(stderr,
493
 
                                "  InnoDB: Compressed page checksum %lu,"
494
 
                                " stored %lu\n"
495
 
                                "InnoDB: Page lsn %lu %lu\n"
496
 
                                "InnoDB: Page number (if stored"
497
 
                                " to page already) %lu,\n"
498
 
                                "InnoDB: space id (if stored"
499
 
                                " to page already) %lu\n",
500
 
                                (ulong) checksum,
501
 
                                (ulong) mach_read_from_4(
502
 
                                        read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
503
 
                                (ulong) mach_read_from_4(
504
 
                                        read_buf + FIL_PAGE_LSN),
505
 
                                (ulong) mach_read_from_4(
506
 
                                        read_buf + (FIL_PAGE_LSN + 4)),
507
 
                                (ulong) mach_read_from_4(
508
 
                                        read_buf + FIL_PAGE_OFFSET),
509
 
                                (ulong) mach_read_from_4(
510
 
                                        read_buf
511
 
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
512
 
                        return;
513
 
                case FIL_PAGE_TYPE_XDES:
514
 
                        /* This is an uncompressed page. */
515
 
                        break;
516
 
                }
517
 
        }
518
 
 
519
 
        checksum = srv_use_checksums
520
 
                ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
521
 
        old_checksum = srv_use_checksums
522
 
                ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
523
 
 
524
 
        ut_print_timestamp(stderr);
525
 
        fprintf(stderr,
526
 
                "  InnoDB: Page checksum %lu, prior-to-4.0.14-form"
527
 
                " checksum %lu\n"
528
 
                "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
529
 
                " stored checksum %lu\n"
530
 
                "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
531
 
                " at page end %lu\n"
532
 
                "InnoDB: Page number (if stored to page already) %lu,\n"
533
 
                "InnoDB: space id (if created with >= MySQL-4.1.1"
534
 
                " and stored already) %lu\n",
535
 
                (ulong) checksum, (ulong) old_checksum,
536
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
537
 
                (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
538
 
                                         - FIL_PAGE_END_LSN_OLD_CHKSUM),
539
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
540
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
541
 
                (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
542
 
                                         - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
543
 
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
544
 
                (ulong) mach_read_from_4(read_buf
545
 
                                         + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
546
 
 
547
 
        if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
548
 
            == TRX_UNDO_INSERT) {
549
 
                fprintf(stderr,
550
 
                        "InnoDB: Page may be an insert undo log page\n");
551
 
        } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
552
 
                                    + TRX_UNDO_PAGE_TYPE)
553
 
                   == TRX_UNDO_UPDATE) {
554
 
                fprintf(stderr,
555
 
                        "InnoDB: Page may be an update undo log page\n");
556
 
        }
557
 
 
558
 
        switch (fil_page_get_type(read_buf)) {
559
 
        case FIL_PAGE_INDEX:
560
 
                fprintf(stderr,
561
 
                        "InnoDB: Page may be an index page where"
562
 
                        " index id is %lu %lu\n",
563
 
                        (ulong) ut_dulint_get_high(
564
 
                                btr_page_get_index_id(read_buf)),
565
 
                        (ulong) ut_dulint_get_low(
566
 
                                btr_page_get_index_id(read_buf)));
567
 
 
568
 
#ifdef UNIV_HOTBACKUP
569
 
                /* If the code is in ibbackup, dict_sys may be uninitialized,
570
 
                i.e., NULL */
571
 
 
572
 
                if (dict_sys == NULL) {
573
 
                        break;
574
 
                }
575
 
#endif /* UNIV_HOTBACKUP */
576
 
 
577
 
                index = dict_index_find_on_id_low(
578
 
                        btr_page_get_index_id(read_buf));
579
 
                if (index) {
580
 
                        fputs("InnoDB: (", stderr);
581
 
                        dict_index_name_print(stderr, NULL, index);
582
 
                        fputs(")\n", stderr);
583
 
                }
584
 
                break;
585
 
        case FIL_PAGE_INODE:
586
 
                fputs("InnoDB: Page may be an 'inode' page\n", stderr);
587
 
                break;
588
 
        case FIL_PAGE_IBUF_FREE_LIST:
589
 
                fputs("InnoDB: Page may be an insert buffer free list page\n",
590
 
                      stderr);
591
 
                break;
592
 
        case FIL_PAGE_TYPE_ALLOCATED:
593
 
                fputs("InnoDB: Page may be a freshly allocated page\n",
594
 
                      stderr);
595
 
                break;
596
 
        case FIL_PAGE_IBUF_BITMAP:
597
 
                fputs("InnoDB: Page may be an insert buffer bitmap page\n",
598
 
                      stderr);
599
 
                break;
600
 
        case FIL_PAGE_TYPE_SYS:
601
 
                fputs("InnoDB: Page may be a system page\n",
602
 
                      stderr);
603
 
                break;
604
 
        case FIL_PAGE_TYPE_TRX_SYS:
605
 
                fputs("InnoDB: Page may be a transaction system page\n",
606
 
                      stderr);
607
 
                break;
608
 
        case FIL_PAGE_TYPE_FSP_HDR:
609
 
                fputs("InnoDB: Page may be a file space header page\n",
610
 
                      stderr);
611
 
                break;
612
 
        case FIL_PAGE_TYPE_XDES:
613
 
                fputs("InnoDB: Page may be an extent descriptor page\n",
614
 
                      stderr);
615
 
                break;
616
 
        case FIL_PAGE_TYPE_BLOB:
617
 
                fputs("InnoDB: Page may be a BLOB page\n",
618
 
                      stderr);
619
 
                break;
620
 
        case FIL_PAGE_TYPE_ZBLOB:
621
 
        case FIL_PAGE_TYPE_ZBLOB2:
622
 
                fputs("InnoDB: Page may be a compressed BLOB page\n",
623
 
                      stderr);
624
 
                break;
625
 
        }
626
 
}
627
 
 
628
 
/************************************************************************
629
 
Initializes a buffer control block when the buf_pool is created. */
630
 
static
631
 
void
632
 
buf_block_init(
633
 
/*===========*/
634
 
        buf_block_t*    block,  /* in: pointer to control block */
635
 
        byte*           frame)  /* in: pointer to buffer frame */
636
 
{
637
 
        UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
638
 
 
639
 
        block->frame = frame;
640
 
 
641
 
        block->page.state = BUF_BLOCK_NOT_USED;
642
 
        block->page.buf_fix_count = 0;
643
 
        block->page.io_fix = BUF_IO_NONE;
644
 
 
645
 
        block->modify_clock = 0;
646
 
 
647
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
648
 
        block->page.file_page_was_freed = FALSE;
649
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
650
 
 
651
 
        block->check_index_page_at_flush = FALSE;
652
 
        block->index = NULL;
653
 
 
654
 
#ifdef UNIV_DEBUG
655
 
        block->page.in_page_hash = FALSE;
656
 
        block->page.in_zip_hash = FALSE;
657
 
        block->page.in_flush_list = FALSE;
658
 
        block->page.in_free_list = FALSE;
659
 
        block->page.in_LRU_list = FALSE;
660
 
        block->in_unzip_LRU_list = FALSE;
661
 
        block->n_pointers = 0;
662
 
#endif /* UNIV_DEBUG */
663
 
        page_zip_des_init(&block->page.zip);
664
 
 
665
 
        mutex_create(&block->mutex, SYNC_BUF_BLOCK);
666
 
 
667
 
        rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
668
 
        ut_ad(rw_lock_validate(&(block->lock)));
669
 
 
670
 
#ifdef UNIV_SYNC_DEBUG
671
 
        rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
672
 
#endif /* UNIV_SYNC_DEBUG */
673
 
}
674
 
 
675
 
/************************************************************************
676
 
Allocates a chunk of buffer frames. */
677
 
static
678
 
buf_chunk_t*
679
 
buf_chunk_init(
680
 
/*===========*/
681
 
                                        /* out: chunk, or NULL on failure */
682
 
        buf_chunk_t*    chunk,          /* out: chunk of buffers */
683
 
        ulint           mem_size)       /* in: requested size in bytes */
684
 
{
685
 
        buf_block_t*    block;
686
 
        byte*           frame;
687
 
        ulint           i;
688
 
 
689
 
        /* Round down to a multiple of page size,
690
 
        although it already should be. */
691
 
        mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
692
 
        /* Reserve space for the block descriptors. */
693
 
        mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
694
 
                                  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
695
 
 
696
 
        chunk->mem_size = mem_size;
697
 
        chunk->mem = os_mem_alloc_large(&chunk->mem_size);
698
 
 
699
 
        if (UNIV_UNLIKELY(chunk->mem == NULL)) {
700
 
 
701
 
                return(NULL);
702
 
        }
703
 
 
704
 
        /* Allocate the block descriptors from
705
 
        the start of the memory block. */
706
 
        chunk->blocks = chunk->mem;
707
 
 
708
 
        /* Align a pointer to the first frame.  Note that when
709
 
        os_large_page_size is smaller than UNIV_PAGE_SIZE,
710
 
        we may allocate one fewer block than requested.  When
711
 
        it is bigger, we may allocate more blocks than requested. */
712
 
 
713
 
        frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
714
 
        chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
715
 
                - (frame != chunk->mem);
716
 
 
717
 
        /* Subtract the space needed for block descriptors. */
718
 
        {
719
 
                ulint   size = chunk->size;
720
 
 
721
 
                while (frame < (byte*) (chunk->blocks + size)) {
722
 
                        frame += UNIV_PAGE_SIZE;
723
 
                        size--;
724
 
                }
725
 
 
726
 
                chunk->size = size;
727
 
        }
728
 
 
729
 
        /* Init block structs and assign frames for them. Then we
730
 
        assign the frames to the first blocks (we already mapped the
731
 
        memory above). */
732
 
 
733
 
        block = chunk->blocks;
734
 
 
735
 
        for (i = chunk->size; i--; ) {
736
 
 
737
 
                buf_block_init(block, frame);
738
 
 
739
 
#ifdef HAVE_purify
740
 
                /* Wipe contents of frame to eliminate a Purify warning */
741
 
                memset(block->frame, '\0', UNIV_PAGE_SIZE);
742
 
#endif
743
 
                /* Add the block to the free list */
744
 
                UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
745
 
                ut_d(block->page.in_free_list = TRUE);
746
 
 
747
 
                block++;
748
 
                frame += UNIV_PAGE_SIZE;
749
 
        }
750
 
 
751
 
        return(chunk);
752
 
}
753
 
 
754
 
#ifdef UNIV_DEBUG
755
 
/*************************************************************************
756
 
Finds a block in the given buffer chunk that points to a
757
 
given compressed page. */
758
 
static
759
 
buf_block_t*
760
 
buf_chunk_contains_zip(
761
 
/*===================*/
762
 
                                /* out: buffer block pointing to
763
 
                                the compressed page, or NULL */
764
 
        buf_chunk_t*    chunk,  /* in: chunk being checked */
765
 
        const void*     data)   /* in: pointer to compressed page */
766
 
{
767
 
        buf_block_t*    block;
768
 
        ulint           i;
769
 
 
770
 
        ut_ad(buf_pool);
771
 
        ut_ad(buf_pool_mutex_own());
772
 
 
773
 
        block = chunk->blocks;
774
 
 
775
 
        for (i = chunk->size; i--; block++) {
776
 
                if (block->page.zip.data == data) {
777
 
 
778
 
                        return(block);
779
 
                }
780
 
        }
781
 
 
782
 
        return(NULL);
783
 
}
784
 
 
785
 
/*************************************************************************
786
 
Finds a block in the buffer pool that points to a
787
 
given compressed page. */
788
 
UNIV_INTERN
789
 
buf_block_t*
790
 
buf_pool_contains_zip(
791
 
/*==================*/
792
 
                                /* out: buffer block pointing to
793
 
                                the compressed page, or NULL */
794
 
        const void*     data)   /* in: pointer to compressed page */
795
 
{
796
 
        ulint           n;
797
 
        buf_chunk_t*    chunk = buf_pool->chunks;
798
 
 
799
 
        for (n = buf_pool->n_chunks; n--; chunk++) {
800
 
                buf_block_t* block = buf_chunk_contains_zip(chunk, data);
801
 
 
802
 
                if (block) {
803
 
                        return(block);
804
 
                }
805
 
        }
806
 
 
807
 
        return(NULL);
808
 
}
809
 
#endif /* UNIV_DEBUG */
810
 
 
811
 
/*************************************************************************
812
 
Checks that all file pages in the buffer chunk are in a replaceable state. */
813
 
static
814
 
const buf_block_t*
815
 
buf_chunk_not_freed(
816
 
/*================*/
817
 
                                /* out: address of a non-free block,
818
 
                                or NULL if all freed */
819
 
        buf_chunk_t*    chunk)  /* in: chunk being checked */
820
 
{
821
 
        buf_block_t*    block;
822
 
        ulint           i;
823
 
 
824
 
        ut_ad(buf_pool);
825
 
        ut_ad(buf_pool_mutex_own());
826
 
 
827
 
        block = chunk->blocks;
828
 
 
829
 
        for (i = chunk->size; i--; block++) {
830
 
                mutex_enter(&block->mutex);
831
 
 
832
 
                if (buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
833
 
                    && !buf_flush_ready_for_replace(&block->page)) {
834
 
 
835
 
                        mutex_exit(&block->mutex);
836
 
                        return(block);
837
 
                }
838
 
 
839
 
                mutex_exit(&block->mutex);
840
 
        }
841
 
 
842
 
        return(NULL);
843
 
}
844
 
 
845
 
/*************************************************************************
846
 
Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state. */
847
 
static
848
 
ibool
849
 
buf_chunk_all_free(
850
 
/*===============*/
851
 
                                        /* out: TRUE if all freed */
852
 
        const buf_chunk_t*      chunk)  /* in: chunk being checked */
853
 
{
854
 
        const buf_block_t*      block;
855
 
        ulint                   i;
856
 
 
857
 
        ut_ad(buf_pool);
858
 
        ut_ad(buf_pool_mutex_own());
859
 
 
860
 
        block = chunk->blocks;
861
 
 
862
 
        for (i = chunk->size; i--; block++) {
863
 
 
864
 
                if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
865
 
 
866
 
                        return(FALSE);
867
 
                }
868
 
        }
869
 
 
870
 
        return(TRUE);
871
 
}
872
 
 
873
 
/************************************************************************
874
 
Frees a chunk of buffer frames. */
875
 
static
876
 
void
877
 
buf_chunk_free(
878
 
/*===========*/
879
 
        buf_chunk_t*    chunk)          /* out: chunk of buffers */
880
 
{
881
 
        buf_block_t*            block;
882
 
        const buf_block_t*      block_end;
883
 
 
884
 
        ut_ad(buf_pool_mutex_own());
885
 
 
886
 
        block_end = chunk->blocks + chunk->size;
887
 
 
888
 
        for (block = chunk->blocks; block < block_end; block++) {
889
 
                ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
890
 
                ut_a(!block->page.zip.data);
891
 
 
892
 
                ut_ad(!block->page.in_LRU_list);
893
 
                ut_ad(!block->in_unzip_LRU_list);
894
 
                ut_ad(!block->page.in_flush_list);
895
 
                /* Remove the block from the free list. */
896
 
                ut_ad(block->page.in_free_list);
897
 
                UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
898
 
 
899
 
                /* Free the latches. */
900
 
                mutex_free(&block->mutex);
901
 
                rw_lock_free(&block->lock);
902
 
#ifdef UNIV_SYNC_DEBUG
903
 
                rw_lock_free(&block->debug_latch);
904
 
#endif /* UNIV_SYNC_DEBUG */
905
 
                UNIV_MEM_UNDESC(block);
906
 
        }
907
 
 
908
 
        os_mem_free_large(chunk->mem, chunk->mem_size);
909
 
}
910
 
 
911
 
/************************************************************************
912
 
Creates the buffer pool. */
913
 
UNIV_INTERN
914
 
buf_pool_t*
915
 
buf_pool_init(void)
916
 
/*===============*/
917
 
                                /* out, own: buf_pool object, NULL if not
918
 
                                enough memory or error */
919
 
{
920
 
        buf_chunk_t*    chunk;
921
 
        ulint           i;
922
 
 
923
 
        buf_pool = mem_zalloc(sizeof(buf_pool_t));
924
 
 
925
 
        /* 1. Initialize general fields
926
 
        ------------------------------- */
927
 
        mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
928
 
        mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
929
 
 
930
 
        buf_pool_mutex_enter();
931
 
 
932
 
        buf_pool->n_chunks = 1;
933
 
        buf_pool->chunks = chunk = mem_alloc(sizeof *chunk);
934
 
 
935
 
        UT_LIST_INIT(buf_pool->free);
936
 
 
937
 
        if (!buf_chunk_init(chunk, srv_buf_pool_size)) {
938
 
                mem_free(chunk);
939
 
                mem_free(buf_pool);
940
 
                buf_pool = NULL;
941
 
                return(NULL);
942
 
        }
943
 
 
944
 
        srv_buf_pool_old_size = srv_buf_pool_size;
945
 
        buf_pool->curr_size = chunk->size;
946
 
        srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
947
 
 
948
 
        buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
949
 
        buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
950
 
 
951
 
        buf_pool->last_printout_time = time(NULL);
952
 
 
953
 
        /* 2. Initialize flushing fields
954
 
        -------------------------------- */
955
 
 
956
 
        for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
957
 
                buf_pool->no_flush[i] = os_event_create(NULL);
958
 
        }
959
 
 
960
 
        buf_pool->ulint_clock = 1;
961
 
 
962
 
        /* 3. Initialize LRU fields
963
 
        --------------------------- */
964
 
        /* All fields are initialized by mem_zalloc(). */
965
 
 
966
 
        buf_pool_mutex_exit();
967
 
 
968
 
        btr_search_sys_create(buf_pool->curr_size
969
 
                              * UNIV_PAGE_SIZE / sizeof(void*) / 64);
970
 
 
971
 
        /* 4. Initialize the buddy allocator fields */
972
 
        /* All fields are initialized by mem_zalloc(). */
973
 
 
974
 
        return(buf_pool);
975
 
}
976
 
 
977
 
/************************************************************************
978
 
Frees the buffer pool at shutdown.  This must not be invoked before
979
 
freeing all mutexes. */
980
 
UNIV_INTERN
981
 
void
982
 
buf_pool_free(void)
983
 
/*===============*/
984
 
{
985
 
        buf_chunk_t*    chunk;
986
 
        buf_chunk_t*    chunks;
987
 
 
988
 
        chunks = buf_pool->chunks;
989
 
        chunk = chunks + buf_pool->n_chunks;
990
 
 
991
 
        while (--chunk >= chunks) {
992
 
                /* Bypass the checks of buf_chunk_free(), since they
993
 
                would fail at shutdown. */
994
 
                os_mem_free_large(chunk->mem, chunk->mem_size);
995
 
        }
996
 
 
997
 
        buf_pool->n_chunks = 0;
998
 
}
999
 
 
1000
 
/************************************************************************
1001
 
Relocate a buffer control block.  Relocates the block on the LRU list
1002
 
and in buf_pool->page_hash.  Does not relocate bpage->list.
1003
 
The caller must take care of relocating bpage->list. */
1004
 
UNIV_INTERN
1005
 
void
1006
 
buf_relocate(
1007
 
/*=========*/
1008
 
        buf_page_t*     bpage,  /* in/out: control block being relocated;
1009
 
                                buf_page_get_state(bpage) must be
1010
 
                                BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
1011
 
        buf_page_t*     dpage)  /* in/out: destination control block */
1012
 
{
1013
 
        buf_page_t*     b;
1014
 
        ulint           fold;
1015
 
 
1016
 
        ut_ad(buf_pool_mutex_own());
1017
 
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1018
 
        ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
1019
 
        ut_a(bpage->buf_fix_count == 0);
1020
 
        ut_ad(bpage->in_LRU_list);
1021
 
        ut_ad(!bpage->in_zip_hash);
1022
 
        ut_ad(bpage->in_page_hash);
1023
 
        ut_ad(bpage == buf_page_hash_get(bpage->space, bpage->offset));
1024
 
#ifdef UNIV_DEBUG
1025
 
        switch (buf_page_get_state(bpage)) {
1026
 
        case BUF_BLOCK_ZIP_FREE:
1027
 
        case BUF_BLOCK_NOT_USED:
1028
 
        case BUF_BLOCK_READY_FOR_USE:
1029
 
        case BUF_BLOCK_FILE_PAGE:
1030
 
        case BUF_BLOCK_MEMORY:
1031
 
        case BUF_BLOCK_REMOVE_HASH:
1032
 
                ut_error;
1033
 
        case BUF_BLOCK_ZIP_DIRTY:
1034
 
        case BUF_BLOCK_ZIP_PAGE:
1035
 
                break;
1036
 
        }
1037
 
#endif /* UNIV_DEBUG */
1038
 
 
1039
 
        memcpy(dpage, bpage, sizeof *dpage);
1040
 
 
1041
 
        ut_d(bpage->in_LRU_list = FALSE);
1042
 
        ut_d(bpage->in_page_hash = FALSE);
1043
 
 
1044
 
        /* relocate buf_pool->LRU */
1045
 
        b = UT_LIST_GET_PREV(LRU, bpage);
1046
 
        UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
1047
 
 
1048
 
        if (b) {
1049
 
                UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
1050
 
        } else {
1051
 
                UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
1052
 
        }
1053
 
 
1054
 
        if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
1055
 
                buf_pool->LRU_old = dpage;
1056
 
        }
1057
 
 
1058
 
        ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU));
1059
 
 
1060
 
        /* relocate buf_pool->page_hash */
1061
 
        fold = buf_page_address_fold(bpage->space, bpage->offset);
1062
 
 
1063
 
        HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
1064
 
        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
1065
 
 
1066
 
        UNIV_MEM_INVALID(bpage, sizeof *bpage);
1067
 
}
1068
 
 
1069
 
/************************************************************************
1070
 
Shrinks the buffer pool. */
1071
 
static
1072
 
void
1073
 
buf_pool_shrink(
1074
 
/*============*/
1075
 
                                /* out: TRUE if shrunk */
1076
 
        ulint   chunk_size)     /* in: number of pages to remove */
1077
 
{
1078
 
        buf_chunk_t*    chunks;
1079
 
        buf_chunk_t*    chunk;
1080
 
        ulint           max_size;
1081
 
        ulint           max_free_size;
1082
 
        buf_chunk_t*    max_chunk;
1083
 
        buf_chunk_t*    max_free_chunk;
1084
 
 
1085
 
        ut_ad(!buf_pool_mutex_own());
1086
 
 
1087
 
try_again:
1088
 
        btr_search_disable(); /* Empty the adaptive hash index again */
1089
 
        buf_pool_mutex_enter();
1090
 
 
1091
 
shrink_again:
1092
 
        if (buf_pool->n_chunks <= 1) {
1093
 
 
1094
 
                /* Cannot shrink if there is only one chunk */
1095
 
                goto func_done;
1096
 
        }
1097
 
 
1098
 
        /* Search for the largest free chunk
1099
 
        not larger than the size difference */
1100
 
        chunks = buf_pool->chunks;
1101
 
        chunk = chunks + buf_pool->n_chunks;
1102
 
        max_size = max_free_size = 0;
1103
 
        max_chunk = max_free_chunk = NULL;
1104
 
 
1105
 
        while (--chunk >= chunks) {
1106
 
                if (chunk->size <= chunk_size
1107
 
                    && chunk->size > max_free_size) {
1108
 
                        if (chunk->size > max_size) {
1109
 
                                max_size = chunk->size;
1110
 
                                max_chunk = chunk;
1111
 
                        }
1112
 
 
1113
 
                        if (buf_chunk_all_free(chunk)) {
1114
 
                                max_free_size = chunk->size;
1115
 
                                max_free_chunk = chunk;
1116
 
                        }
1117
 
                }
1118
 
        }
1119
 
 
1120
 
        if (!max_free_size) {
1121
 
 
1122
 
                ulint           dirty   = 0;
1123
 
                ulint           nonfree = 0;
1124
 
                buf_block_t*    block;
1125
 
                buf_block_t*    bend;
1126
 
 
1127
 
                /* Cannot shrink: try again later
1128
 
                (do not assign srv_buf_pool_old_size) */
1129
 
                if (!max_chunk) {
1130
 
 
1131
 
                        goto func_exit;
1132
 
                }
1133
 
 
1134
 
                block = max_chunk->blocks;
1135
 
                bend = block + max_chunk->size;
1136
 
 
1137
 
                /* Move the blocks of chunk to the end of the
1138
 
                LRU list and try to flush them. */
1139
 
                for (; block < bend; block++) {
1140
 
                        switch (buf_block_get_state(block)) {
1141
 
                        case BUF_BLOCK_NOT_USED:
1142
 
                                continue;
1143
 
                        case BUF_BLOCK_FILE_PAGE:
1144
 
                                break;
1145
 
                        default:
1146
 
                                nonfree++;
1147
 
                                continue;
1148
 
                        }
1149
 
 
1150
 
                        mutex_enter(&block->mutex);
1151
 
                        /* The following calls will temporarily
1152
 
                        release block->mutex and buf_pool_mutex.
1153
 
                        Therefore, we have to always retry,
1154
 
                        even if !dirty && !nonfree. */
1155
 
 
1156
 
                        if (!buf_flush_ready_for_replace(&block->page)) {
1157
 
 
1158
 
                                buf_LRU_make_block_old(&block->page);
1159
 
                                dirty++;
1160
 
                        } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
1161
 
                                   != BUF_LRU_FREED) {
1162
 
                                nonfree++;
1163
 
                        }
1164
 
 
1165
 
                        mutex_exit(&block->mutex);
1166
 
                }
1167
 
 
1168
 
                buf_pool_mutex_exit();
1169
 
 
1170
 
                /* Request for a flush of the chunk if it helps.
1171
 
                Do not flush if there are non-free blocks, since
1172
 
                flushing will not make the chunk freeable. */
1173
 
                if (nonfree) {
1174
 
                        /* Avoid busy-waiting. */
1175
 
                        os_thread_sleep(100000);
1176
 
                } else if (dirty
1177
 
                           && buf_flush_batch(BUF_FLUSH_LRU, dirty, 0)
1178
 
                           == ULINT_UNDEFINED) {
1179
 
 
1180
 
                        buf_flush_wait_batch_end(BUF_FLUSH_LRU);
1181
 
                }
1182
 
 
1183
 
                goto try_again;
1184
 
        }
1185
 
 
1186
 
        max_size = max_free_size;
1187
 
        max_chunk = max_free_chunk;
1188
 
 
1189
 
        srv_buf_pool_old_size = srv_buf_pool_size;
1190
 
 
1191
 
        /* Rewrite buf_pool->chunks.  Copy everything but max_chunk. */
1192
 
        chunks = mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks);
1193
 
        memcpy(chunks, buf_pool->chunks,
1194
 
               (max_chunk - buf_pool->chunks) * sizeof *chunks);
1195
 
        memcpy(chunks + (max_chunk - buf_pool->chunks),
1196
 
               max_chunk + 1,
1197
 
               buf_pool->chunks + buf_pool->n_chunks
1198
 
               - (max_chunk + 1));
1199
 
        ut_a(buf_pool->curr_size > max_chunk->size);
1200
 
        buf_pool->curr_size -= max_chunk->size;
1201
 
        srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1202
 
        chunk_size -= max_chunk->size;
1203
 
        buf_chunk_free(max_chunk);
1204
 
        mem_free(buf_pool->chunks);
1205
 
        buf_pool->chunks = chunks;
1206
 
        buf_pool->n_chunks--;
1207
 
 
1208
 
        /* Allow a slack of one megabyte. */
1209
 
        if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
1210
 
 
1211
 
                goto shrink_again;
1212
 
        }
1213
 
 
1214
 
func_done:
1215
 
        srv_buf_pool_old_size = srv_buf_pool_size;
1216
 
func_exit:
1217
 
        buf_pool_mutex_exit();
1218
 
        btr_search_enable();
1219
 
}
1220
 
 
1221
 
/************************************************************************
1222
 
Rebuild buf_pool->page_hash. */
1223
 
static
1224
 
void
1225
 
buf_pool_page_hash_rebuild(void)
1226
 
/*============================*/
1227
 
{
1228
 
        ulint           i;
1229
 
        ulint           n_chunks;
1230
 
        buf_chunk_t*    chunk;
1231
 
        hash_table_t*   page_hash;
1232
 
        hash_table_t*   zip_hash;
1233
 
        buf_page_t*     b;
1234
 
 
1235
 
        buf_pool_mutex_enter();
1236
 
 
1237
 
        /* Free, create, and populate the hash table. */
1238
 
        hash_table_free(buf_pool->page_hash);
1239
 
        buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
1240
 
        zip_hash = hash_create(2 * buf_pool->curr_size);
1241
 
 
1242
 
        HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
1243
 
                     BUF_POOL_ZIP_FOLD_BPAGE);
1244
 
 
1245
 
        hash_table_free(buf_pool->zip_hash);
1246
 
        buf_pool->zip_hash = zip_hash;
1247
 
 
1248
 
        /* Insert the uncompressed file pages to buf_pool->page_hash. */
1249
 
 
1250
 
        chunk = buf_pool->chunks;
1251
 
        n_chunks = buf_pool->n_chunks;
1252
 
 
1253
 
        for (i = 0; i < n_chunks; i++, chunk++) {
1254
 
                ulint           j;
1255
 
                buf_block_t*    block = chunk->blocks;
1256
 
 
1257
 
                for (j = 0; j < chunk->size; j++, block++) {
1258
 
                        if (buf_block_get_state(block)
1259
 
                            == BUF_BLOCK_FILE_PAGE) {
1260
 
                                ut_ad(!block->page.in_zip_hash);
1261
 
                                ut_ad(block->page.in_page_hash);
1262
 
 
1263
 
                                HASH_INSERT(buf_page_t, hash, page_hash,
1264
 
                                            buf_page_address_fold(
1265
 
                                                    block->page.space,
1266
 
                                                    block->page.offset),
1267
 
                                            &block->page);
1268
 
                        }
1269
 
                }
1270
 
        }
1271
 
 
1272
 
        /* Insert the compressed-only pages to buf_pool->page_hash.
1273
 
        All such blocks are either in buf_pool->zip_clean or
1274
 
        in buf_pool->flush_list. */
1275
 
 
1276
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1277
 
             b = UT_LIST_GET_NEXT(list, b)) {
1278
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1279
 
                ut_ad(!b->in_flush_list);
1280
 
                ut_ad(b->in_LRU_list);
1281
 
                ut_ad(b->in_page_hash);
1282
 
                ut_ad(!b->in_zip_hash);
1283
 
 
1284
 
                HASH_INSERT(buf_page_t, hash, page_hash,
1285
 
                            buf_page_address_fold(b->space, b->offset), b);
1286
 
        }
1287
 
 
1288
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1289
 
             b = UT_LIST_GET_NEXT(list, b)) {
1290
 
                ut_ad(b->in_flush_list);
1291
 
                ut_ad(b->in_LRU_list);
1292
 
                ut_ad(b->in_page_hash);
1293
 
                ut_ad(!b->in_zip_hash);
1294
 
 
1295
 
                switch (buf_page_get_state(b)) {
1296
 
                case BUF_BLOCK_ZIP_DIRTY:
1297
 
                        HASH_INSERT(buf_page_t, hash, page_hash,
1298
 
                                    buf_page_address_fold(b->space,
1299
 
                                                          b->offset), b);
1300
 
                        break;
1301
 
                case BUF_BLOCK_FILE_PAGE:
1302
 
                        /* uncompressed page */
1303
 
                        break;
1304
 
                case BUF_BLOCK_ZIP_FREE:
1305
 
                case BUF_BLOCK_ZIP_PAGE:
1306
 
                case BUF_BLOCK_NOT_USED:
1307
 
                case BUF_BLOCK_READY_FOR_USE:
1308
 
                case BUF_BLOCK_MEMORY:
1309
 
                case BUF_BLOCK_REMOVE_HASH:
1310
 
                        ut_error;
1311
 
                        break;
1312
 
                }
1313
 
        }
1314
 
 
1315
 
        buf_pool_mutex_exit();
1316
 
}
1317
 
 
1318
 
/************************************************************************
1319
 
Resizes the buffer pool. */
1320
 
UNIV_INTERN
1321
 
void
1322
 
buf_pool_resize(void)
1323
 
/*=================*/
1324
 
{
1325
 
        buf_pool_mutex_enter();
1326
 
 
1327
 
        if (srv_buf_pool_old_size == srv_buf_pool_size) {
1328
 
 
1329
 
                buf_pool_mutex_exit();
1330
 
                return;
1331
 
        }
1332
 
 
1333
 
        if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
1334
 
 
1335
 
                buf_pool_mutex_exit();
1336
 
 
1337
 
                /* Disable adaptive hash indexes and empty the index
1338
 
                in order to free up memory in the buffer pool chunks. */
1339
 
                buf_pool_shrink((srv_buf_pool_curr_size - srv_buf_pool_size)
1340
 
                                / UNIV_PAGE_SIZE);
1341
 
        } else if (srv_buf_pool_curr_size + 1048576 < srv_buf_pool_size) {
1342
 
 
1343
 
                /* Enlarge the buffer pool by at least one megabyte */
1344
 
 
1345
 
                ulint           mem_size
1346
 
                        = srv_buf_pool_size - srv_buf_pool_curr_size;
1347
 
                buf_chunk_t*    chunks;
1348
 
                buf_chunk_t*    chunk;
1349
 
 
1350
 
                chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
1351
 
 
1352
 
                memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks
1353
 
                       * sizeof *chunks);
1354
 
 
1355
 
                chunk = &chunks[buf_pool->n_chunks];
1356
 
 
1357
 
                if (!buf_chunk_init(chunk, mem_size)) {
1358
 
                        mem_free(chunks);
1359
 
                } else {
1360
 
                        buf_pool->curr_size += chunk->size;
1361
 
                        srv_buf_pool_curr_size = buf_pool->curr_size
1362
 
                                * UNIV_PAGE_SIZE;
1363
 
                        mem_free(buf_pool->chunks);
1364
 
                        buf_pool->chunks = chunks;
1365
 
                        buf_pool->n_chunks++;
1366
 
                }
1367
 
 
1368
 
                srv_buf_pool_old_size = srv_buf_pool_size;
1369
 
                buf_pool_mutex_exit();
1370
 
        }
1371
 
 
1372
 
        buf_pool_page_hash_rebuild();
1373
 
}
1374
 
 
1375
 
/************************************************************************
1376
 
Moves to the block to the start of the LRU list if there is a danger
1377
 
that the block would drift out of the buffer pool. */
1378
 
UNIV_INLINE
1379
 
void
1380
 
buf_block_make_young(
1381
 
/*=================*/
1382
 
        buf_page_t*     bpage)  /* in: block to make younger */
1383
 
{
1384
 
        ut_ad(!buf_pool_mutex_own());
1385
 
 
1386
 
        /* Note that we read freed_page_clock's without holding any mutex:
1387
 
        this is allowed since the result is used only in heuristics */
1388
 
 
1389
 
        if (buf_page_peek_if_too_old(bpage)) {
1390
 
 
1391
 
                buf_pool_mutex_enter();
1392
 
                /* There has been freeing activity in the LRU list:
1393
 
                best to move to the head of the LRU list */
1394
 
 
1395
 
                buf_LRU_make_block_young(bpage);
1396
 
                buf_pool_mutex_exit();
1397
 
        }
1398
 
}
1399
 
 
1400
 
/************************************************************************
1401
 
Moves a page to the start of the buffer pool LRU list. This high-level
1402
 
function can be used to prevent an important page from from slipping out of
1403
 
the buffer pool. */
1404
 
UNIV_INTERN
1405
 
void
1406
 
buf_page_make_young(
1407
 
/*================*/
1408
 
        buf_page_t*     bpage)  /* in: buffer block of a file page */
1409
 
{
1410
 
        buf_pool_mutex_enter();
1411
 
 
1412
 
        ut_a(buf_page_in_file(bpage));
1413
 
 
1414
 
        buf_LRU_make_block_young(bpage);
1415
 
 
1416
 
        buf_pool_mutex_exit();
1417
 
}
1418
 
 
1419
 
/************************************************************************
1420
 
Resets the check_index_page_at_flush field of a page if found in the buffer
1421
 
pool. */
1422
 
UNIV_INTERN
1423
 
void
1424
 
buf_reset_check_index_page_at_flush(
1425
 
/*================================*/
1426
 
        ulint   space,  /* in: space id */
1427
 
        ulint   offset) /* in: page number */
1428
 
{
1429
 
        buf_block_t*    block;
1430
 
 
1431
 
        buf_pool_mutex_enter();
1432
 
 
1433
 
        block = (buf_block_t*) buf_page_hash_get(space, offset);
1434
 
 
1435
 
        if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
1436
 
                block->check_index_page_at_flush = FALSE;
1437
 
        }
1438
 
 
1439
 
        buf_pool_mutex_exit();
1440
 
}
1441
 
 
1442
 
/************************************************************************
1443
 
Returns the current state of is_hashed of a page. FALSE if the page is
1444
 
not in the pool. NOTE that this operation does not fix the page in the
1445
 
pool if it is found there. */
1446
 
UNIV_INTERN
1447
 
ibool
1448
 
buf_page_peek_if_search_hashed(
1449
 
/*===========================*/
1450
 
                        /* out: TRUE if page hash index is built in search
1451
 
                        system */
1452
 
        ulint   space,  /* in: space id */
1453
 
        ulint   offset) /* in: page number */
1454
 
{
1455
 
        buf_block_t*    block;
1456
 
        ibool           is_hashed;
1457
 
 
1458
 
        buf_pool_mutex_enter();
1459
 
 
1460
 
        block = (buf_block_t*) buf_page_hash_get(space, offset);
1461
 
 
1462
 
        if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1463
 
                is_hashed = FALSE;
1464
 
        } else {
1465
 
                is_hashed = block->is_hashed;
1466
 
        }
1467
 
 
1468
 
        buf_pool_mutex_exit();
1469
 
 
1470
 
        return(is_hashed);
1471
 
}
1472
 
 
1473
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
1474
 
/************************************************************************
1475
 
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
1476
 
This function should be called when we free a file page and want the
1477
 
debug version to check that it is not accessed any more unless
1478
 
reallocated. */
1479
 
UNIV_INTERN
1480
 
buf_page_t*
1481
 
buf_page_set_file_page_was_freed(
1482
 
/*=============================*/
1483
 
                        /* out: control block if found in page hash table,
1484
 
                        otherwise NULL */
1485
 
        ulint   space,  /* in: space id */
1486
 
        ulint   offset) /* in: page number */
1487
 
{
1488
 
        buf_page_t*     bpage;
1489
 
 
1490
 
        buf_pool_mutex_enter();
1491
 
 
1492
 
        bpage = buf_page_hash_get(space, offset);
1493
 
 
1494
 
        if (bpage) {
1495
 
                bpage->file_page_was_freed = TRUE;
1496
 
        }
1497
 
 
1498
 
        buf_pool_mutex_exit();
1499
 
 
1500
 
        return(bpage);
1501
 
}
1502
 
 
1503
 
/************************************************************************
1504
 
Sets file_page_was_freed FALSE if the page is found in the buffer pool.
1505
 
This function should be called when we free a file page and want the
1506
 
debug version to check that it is not accessed any more unless
1507
 
reallocated. */
1508
 
UNIV_INTERN
1509
 
buf_page_t*
1510
 
buf_page_reset_file_page_was_freed(
1511
 
/*===============================*/
1512
 
                        /* out: control block if found in page hash table,
1513
 
                        otherwise NULL */
1514
 
        ulint   space,  /* in: space id */
1515
 
        ulint   offset) /* in: page number */
1516
 
{
1517
 
        buf_page_t*     bpage;
1518
 
 
1519
 
        buf_pool_mutex_enter();
1520
 
 
1521
 
        bpage = buf_page_hash_get(space, offset);
1522
 
 
1523
 
        if (bpage) {
1524
 
                bpage->file_page_was_freed = FALSE;
1525
 
        }
1526
 
 
1527
 
        buf_pool_mutex_exit();
1528
 
 
1529
 
        return(bpage);
1530
 
}
1531
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
1532
 
 
1533
 
/************************************************************************
1534
 
Get read access to a compressed page (usually of type
1535
 
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
1536
 
The page must be released with buf_page_release_zip().
1537
 
NOTE: the page is not protected by any latch.  Mutual exclusion has to
1538
 
be implemented at a higher level.  In other words, all possible
1539
 
accesses to a given page through this function must be protected by
1540
 
the same set of mutexes or latches. */
1541
 
UNIV_INTERN
1542
 
buf_page_t*
1543
 
buf_page_get_zip(
1544
 
/*=============*/
1545
 
                                /* out: pointer to the block */
1546
 
        ulint           space,  /* in: space id */
1547
 
        ulint           zip_size,/* in: compressed page size */
1548
 
        ulint           offset) /* in: page number */
1549
 
{
1550
 
        buf_page_t*     bpage;
1551
 
        mutex_t*        block_mutex;
1552
 
        ibool           must_read;
1553
 
 
1554
 
#ifndef UNIV_LOG_DEBUG
1555
 
        ut_ad(!ibuf_inside());
1556
 
#endif
1557
 
        buf_pool->n_page_gets++;
1558
 
 
1559
 
        for (;;) {
1560
 
                buf_pool_mutex_enter();
1561
 
lookup:
1562
 
                bpage = buf_page_hash_get(space, offset);
1563
 
                if (bpage) {
1564
 
                        break;
1565
 
                }
1566
 
 
1567
 
                /* Page not in buf_pool: needs to be read from file */
1568
 
 
1569
 
                buf_pool_mutex_exit();
1570
 
 
1571
 
                buf_read_page(space, zip_size, offset);
1572
 
 
1573
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1574
 
                ut_a(++buf_dbg_counter % 37 || buf_validate());
1575
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1576
 
        }
1577
 
 
1578
 
        if (UNIV_UNLIKELY(!bpage->zip.data)) {
1579
 
                /* There is no compressed page. */
1580
 
                buf_pool_mutex_exit();
1581
 
                return(NULL);
1582
 
        }
1583
 
 
1584
 
        block_mutex = buf_page_get_mutex(bpage);
1585
 
        mutex_enter(block_mutex);
1586
 
 
1587
 
        switch (buf_page_get_state(bpage)) {
1588
 
        case BUF_BLOCK_NOT_USED:
1589
 
        case BUF_BLOCK_READY_FOR_USE:
1590
 
        case BUF_BLOCK_MEMORY:
1591
 
        case BUF_BLOCK_REMOVE_HASH:
1592
 
        case BUF_BLOCK_ZIP_FREE:
1593
 
                ut_error;
1594
 
                break;
1595
 
        case BUF_BLOCK_ZIP_PAGE:
1596
 
        case BUF_BLOCK_ZIP_DIRTY:
1597
 
                bpage->buf_fix_count++;
1598
 
                break;
1599
 
        case BUF_BLOCK_FILE_PAGE:
1600
 
                /* Discard the uncompressed page frame if possible. */
1601
 
                if (buf_LRU_free_block(bpage, FALSE, NULL)
1602
 
                    == BUF_LRU_FREED) {
1603
 
 
1604
 
                        mutex_exit(block_mutex);
1605
 
                        goto lookup;
1606
 
                }
1607
 
 
1608
 
                buf_block_buf_fix_inc((buf_block_t*) bpage,
1609
 
                                      __FILE__, __LINE__);
1610
 
                break;
1611
 
        }
1612
 
 
1613
 
        must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1614
 
 
1615
 
        buf_pool_mutex_exit();
1616
 
 
1617
 
        buf_page_set_accessed(bpage, TRUE);
1618
 
 
1619
 
        mutex_exit(block_mutex);
1620
 
 
1621
 
        buf_block_make_young(bpage);
1622
 
 
1623
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
1624
 
        ut_a(!bpage->file_page_was_freed);
1625
 
#endif
1626
 
 
1627
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1628
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
1629
 
        ut_a(bpage->buf_fix_count > 0);
1630
 
        ut_a(buf_page_in_file(bpage));
1631
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1632
 
 
1633
 
        if (must_read) {
1634
 
                /* Let us wait until the read operation
1635
 
                completes */
1636
 
 
1637
 
                for (;;) {
1638
 
                        enum buf_io_fix io_fix;
1639
 
 
1640
 
                        mutex_enter(block_mutex);
1641
 
                        io_fix = buf_page_get_io_fix(bpage);
1642
 
                        mutex_exit(block_mutex);
1643
 
 
1644
 
                        if (io_fix == BUF_IO_READ) {
1645
 
 
1646
 
                                os_thread_sleep(WAIT_FOR_READ);
1647
 
                        } else {
1648
 
                                break;
1649
 
                        }
1650
 
                }
1651
 
        }
1652
 
 
1653
 
#ifdef UNIV_IBUF_COUNT_DEBUG
1654
 
        ut_a(ibuf_count_get(buf_page_get_space(bpage),
1655
 
                            buf_page_get_page_no(bpage)) == 0);
1656
 
#endif
1657
 
        return(bpage);
1658
 
}
1659
 
 
1660
 
/************************************************************************
1661
 
Initialize some fields of a control block. */
1662
 
UNIV_INLINE
1663
 
void
1664
 
buf_block_init_low(
1665
 
/*===============*/
1666
 
        buf_block_t*    block)  /* in: block to init */
1667
 
{
1668
 
        block->check_index_page_at_flush = FALSE;
1669
 
        block->index            = NULL;
1670
 
 
1671
 
        block->n_hash_helps     = 0;
1672
 
        block->is_hashed        = FALSE;
1673
 
        block->n_fields         = 1;
1674
 
        block->n_bytes          = 0;
1675
 
        block->left_side        = TRUE;
1676
 
}
1677
 
 
1678
 
/************************************************************************
1679
 
Decompress a block. */
1680
 
static
1681
 
ibool
1682
 
buf_zip_decompress(
1683
 
/*===============*/
1684
 
                                /* out: TRUE if successful */
1685
 
        buf_block_t*    block,  /* in/out: block */
1686
 
        ibool           check)  /* in: TRUE=verify the page checksum */
1687
 
{
1688
 
        const byte* frame = block->page.zip.data;
1689
 
 
1690
 
        ut_ad(buf_block_get_zip_size(block));
1691
 
        ut_a(buf_block_get_space(block) != 0);
1692
 
 
1693
 
        if (UNIV_LIKELY(check)) {
1694
 
                ulint   stamp_checksum  = mach_read_from_4(
1695
 
                        frame + FIL_PAGE_SPACE_OR_CHKSUM);
1696
 
                ulint   calc_checksum   = page_zip_calc_checksum(
1697
 
                        frame, page_zip_get_size(&block->page.zip));
1698
 
 
1699
 
                if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
1700
 
                        ut_print_timestamp(stderr);
1701
 
                        fprintf(stderr,
1702
 
                                "  InnoDB: compressed page checksum mismatch"
1703
 
                                " (space %u page %u): %lu != %lu\n",
1704
 
                                block->page.space, block->page.offset,
1705
 
                                stamp_checksum, calc_checksum);
1706
 
                        return(FALSE);
1707
 
                }
1708
 
        }
1709
 
 
1710
 
        switch (fil_page_get_type(frame)) {
1711
 
        case FIL_PAGE_INDEX:
1712
 
                if (page_zip_decompress(&block->page.zip,
1713
 
                                        block->frame)) {
1714
 
                        return(TRUE);
1715
 
                }
1716
 
 
1717
 
                fprintf(stderr,
1718
 
                        "InnoDB: unable to decompress space %lu page %lu\n",
1719
 
                        (ulong) block->page.space,
1720
 
                        (ulong) block->page.offset);
1721
 
                return(FALSE);
1722
 
 
1723
 
        case FIL_PAGE_TYPE_ALLOCATED:
1724
 
        case FIL_PAGE_INODE:
1725
 
        case FIL_PAGE_IBUF_BITMAP:
1726
 
        case FIL_PAGE_TYPE_FSP_HDR:
1727
 
        case FIL_PAGE_TYPE_XDES:
1728
 
        case FIL_PAGE_TYPE_ZBLOB:
1729
 
        case FIL_PAGE_TYPE_ZBLOB2:
1730
 
                /* Copy to uncompressed storage. */
1731
 
                memcpy(block->frame, frame,
1732
 
                       buf_block_get_zip_size(block));
1733
 
                return(TRUE);
1734
 
        }
1735
 
 
1736
 
        ut_print_timestamp(stderr);
1737
 
        fprintf(stderr,
1738
 
                "  InnoDB: unknown compressed page"
1739
 
                " type %lu\n",
1740
 
                fil_page_get_type(frame));
1741
 
        return(FALSE);
1742
 
}
1743
 
 
1744
 
/************************************************************************
1745
 
Find out if a buffer block was created by buf_chunk_init(). */
1746
 
static
1747
 
ibool
1748
 
buf_block_is_uncompressed(
1749
 
/*======================*/
1750
 
                                        /* out: TRUE if "block" has
1751
 
                                        been added to buf_pool->free
1752
 
                                        by buf_chunk_init() */
1753
 
        const buf_block_t*      block)  /* in: pointer to block,
1754
 
                                        not dereferenced */
1755
 
{
1756
 
        const buf_chunk_t*              chunk   = buf_pool->chunks;
1757
 
        const buf_chunk_t* const        echunk  = chunk + buf_pool->n_chunks;
1758
 
 
1759
 
        ut_ad(buf_pool_mutex_own());
1760
 
 
1761
 
        if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1762
 
                /* The pointer should be aligned. */
1763
 
                return(FALSE);
1764
 
        }
1765
 
 
1766
 
        while (chunk < echunk) {
1767
 
                if (block >= chunk->blocks
1768
 
                    && block < chunk->blocks + chunk->size) {
1769
 
 
1770
 
                        return(TRUE);
1771
 
                }
1772
 
 
1773
 
                chunk++;
1774
 
        }
1775
 
 
1776
 
        return(FALSE);
1777
 
}
1778
 
 
1779
 
/************************************************************************
1780
 
This is the general function used to get access to a database page. */
1781
 
UNIV_INTERN
1782
 
buf_block_t*
1783
 
buf_page_get_gen(
1784
 
/*=============*/
1785
 
                                /* out: pointer to the block or NULL */
1786
 
        ulint           space,  /* in: space id */
1787
 
        ulint           zip_size,/* in: compressed page size in bytes
1788
 
                                or 0 for uncompressed pages */
1789
 
        ulint           offset, /* in: page number */
1790
 
        ulint           rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
1791
 
        buf_block_t*    guess,  /* in: guessed block or NULL */
1792
 
        ulint           mode,   /* in: BUF_GET, BUF_GET_IF_IN_POOL,
1793
 
                                BUF_GET_NO_LATCH, BUF_GET_NOWAIT */
1794
 
        const char*     file,   /* in: file name */
1795
 
        ulint           line,   /* in: line where called */
1796
 
        mtr_t*          mtr)    /* in: mini-transaction */
1797
 
{
1798
 
        buf_block_t*    block;
1799
 
        ibool           accessed;
1800
 
        ulint           fix_type;
1801
 
        ibool           must_read;
1802
 
 
1803
 
        ut_ad(mtr);
1804
 
        ut_ad((rw_latch == RW_S_LATCH)
1805
 
              || (rw_latch == RW_X_LATCH)
1806
 
              || (rw_latch == RW_NO_LATCH));
1807
 
        ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
1808
 
        ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
1809
 
              || (mode == BUF_GET_NO_LATCH) || (mode == BUF_GET_NOWAIT));
1810
 
        ut_ad(zip_size == fil_space_get_zip_size(space));
1811
 
#ifndef UNIV_LOG_DEBUG
1812
 
        ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset));
1813
 
#endif
1814
 
        buf_pool->n_page_gets++;
1815
 
loop:
1816
 
        block = guess;
1817
 
        buf_pool_mutex_enter();
1818
 
 
1819
 
        if (block) {
1820
 
                /* If the guess is a compressed page descriptor that
1821
 
                has been allocated by buf_buddy_alloc(), it may have
1822
 
                been invalidated by buf_buddy_relocate().  In that
1823
 
                case, block could point to something that happens to
1824
 
                contain the expected bits in block->page.  Similarly,
1825
 
                the guess may be pointing to a buffer pool chunk that
1826
 
                has been released when resizing the buffer pool. */
1827
 
 
1828
 
                if (!buf_block_is_uncompressed(block)
1829
 
                    || offset != block->page.offset
1830
 
                    || space != block->page.space
1831
 
                    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1832
 
 
1833
 
                        block = guess = NULL;
1834
 
                } else {
1835
 
                        ut_ad(!block->page.in_zip_hash);
1836
 
                        ut_ad(block->page.in_page_hash);
1837
 
                }
1838
 
        }
1839
 
 
1840
 
        if (block == NULL) {
1841
 
                block = (buf_block_t*) buf_page_hash_get(space, offset);
1842
 
        }
1843
 
 
1844
 
loop2:
1845
 
        if (block == NULL) {
1846
 
                /* Page not in buf_pool: needs to be read from file */
1847
 
 
1848
 
                buf_pool_mutex_exit();
1849
 
 
1850
 
                if (mode == BUF_GET_IF_IN_POOL) {
1851
 
 
1852
 
                        return(NULL);
1853
 
                }
1854
 
 
1855
 
                buf_read_page(space, zip_size, offset);
1856
 
 
1857
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1858
 
                ut_a(++buf_dbg_counter % 37 || buf_validate());
1859
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1860
 
                goto loop;
1861
 
        }
1862
 
 
1863
 
        ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
1864
 
 
1865
 
        must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
1866
 
 
1867
 
        if (must_read && mode == BUF_GET_IF_IN_POOL) {
1868
 
                /* The page is only being read to buffer */
1869
 
                buf_pool_mutex_exit();
1870
 
 
1871
 
                return(NULL);
1872
 
        }
1873
 
 
1874
 
        switch (buf_block_get_state(block)) {
1875
 
                buf_page_t*     bpage;
1876
 
                ibool           success;
1877
 
 
1878
 
        case BUF_BLOCK_FILE_PAGE:
1879
 
                break;
1880
 
 
1881
 
        case BUF_BLOCK_ZIP_PAGE:
1882
 
        case BUF_BLOCK_ZIP_DIRTY:
1883
 
                bpage = &block->page;
1884
 
 
1885
 
                if (bpage->buf_fix_count
1886
 
                    || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1887
 
                        /* This condition often occurs when the buffer
1888
 
                        is not buffer-fixed, but I/O-fixed by
1889
 
                        buf_page_init_for_read(). */
1890
 
wait_until_unfixed:
1891
 
                        /* The block is buffer-fixed or I/O-fixed.
1892
 
                        Try again later. */
1893
 
                        buf_pool_mutex_exit();
1894
 
                        os_thread_sleep(WAIT_FOR_READ);
1895
 
 
1896
 
                        goto loop;
1897
 
                }
1898
 
 
1899
 
                /* Allocate an uncompressed page. */
1900
 
                buf_pool_mutex_exit();
1901
 
 
1902
 
                block = buf_LRU_get_free_block(0);
1903
 
                ut_a(block);
1904
 
 
1905
 
                buf_pool_mutex_enter();
1906
 
                mutex_enter(&block->mutex);
1907
 
 
1908
 
                {
1909
 
                        buf_page_t*     hash_bpage
1910
 
                                = buf_page_hash_get(space, offset);
1911
 
 
1912
 
                        if (UNIV_UNLIKELY(bpage != hash_bpage)) {
1913
 
                                /* The buf_pool->page_hash was modified
1914
 
                                while buf_pool_mutex was released.
1915
 
                                Free the block that was allocated. */
1916
 
 
1917
 
                                buf_LRU_block_free_non_file_page(block);
1918
 
                                mutex_exit(&block->mutex);
1919
 
 
1920
 
                                block = (buf_block_t*) hash_bpage;
1921
 
                                goto loop2;
1922
 
                        }
1923
 
                }
1924
 
 
1925
 
                if (UNIV_UNLIKELY
1926
 
                    (bpage->buf_fix_count
1927
 
                     || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1928
 
 
1929
 
                        /* The block was buffer-fixed or I/O-fixed
1930
 
                        while buf_pool_mutex was not held by this thread.
1931
 
                        Free the block that was allocated and try again.
1932
 
                        This should be extremely unlikely. */
1933
 
 
1934
 
                        buf_LRU_block_free_non_file_page(block);
1935
 
                        mutex_exit(&block->mutex);
1936
 
 
1937
 
                        goto wait_until_unfixed;
1938
 
                }
1939
 
 
1940
 
                /* Move the compressed page from bpage to block,
1941
 
                and uncompress it. */
1942
 
 
1943
 
                mutex_enter(&buf_pool_zip_mutex);
1944
 
 
1945
 
                buf_relocate(bpage, &block->page);
1946
 
                buf_block_init_low(block);
1947
 
                block->lock_hash_val = lock_rec_hash(space, offset);
1948
 
 
1949
 
                UNIV_MEM_DESC(&block->page.zip.data,
1950
 
                              page_zip_get_size(&block->page.zip), block);
1951
 
 
1952
 
                if (buf_page_get_state(&block->page)
1953
 
                    == BUF_BLOCK_ZIP_PAGE) {
1954
 
                        UT_LIST_REMOVE(list, buf_pool->zip_clean,
1955
 
                                       &block->page);
1956
 
                        ut_ad(!block->page.in_flush_list);
1957
 
                } else {
1958
 
                        /* Relocate buf_pool->flush_list. */
1959
 
                        buf_page_t*     b;
1960
 
 
1961
 
                        b = UT_LIST_GET_PREV(list, &block->page);
1962
 
                        ut_ad(block->page.in_flush_list);
1963
 
                        UT_LIST_REMOVE(list, buf_pool->flush_list,
1964
 
                                       &block->page);
1965
 
 
1966
 
                        if (b) {
1967
 
                                UT_LIST_INSERT_AFTER(
1968
 
                                        list, buf_pool->flush_list, b,
1969
 
                                        &block->page);
1970
 
                        } else {
1971
 
                                UT_LIST_ADD_FIRST(
1972
 
                                        list, buf_pool->flush_list,
1973
 
                                        &block->page);
1974
 
                        }
1975
 
                }
1976
 
 
1977
 
                /* Buffer-fix, I/O-fix, and X-latch the block
1978
 
                for the duration of the decompression.
1979
 
                Also add the block to the unzip_LRU list. */
1980
 
                block->page.state = BUF_BLOCK_FILE_PAGE;
1981
 
 
1982
 
                /* Insert at the front of unzip_LRU list */
1983
 
                buf_unzip_LRU_add_block(block, FALSE);
1984
 
 
1985
 
                block->page.buf_fix_count = 1;
1986
 
                buf_block_set_io_fix(block, BUF_IO_READ);
1987
 
                buf_pool->n_pend_unzip++;
1988
 
                rw_lock_x_lock(&block->lock);
1989
 
                mutex_exit(&block->mutex);
1990
 
                mutex_exit(&buf_pool_zip_mutex);
1991
 
 
1992
 
                buf_buddy_free(bpage, sizeof *bpage);
1993
 
 
1994
 
                buf_pool_mutex_exit();
1995
 
 
1996
 
                /* Decompress the page and apply buffered operations
1997
 
                while not holding buf_pool_mutex or block->mutex. */
1998
 
                success = buf_zip_decompress(block, srv_use_checksums);
1999
 
 
2000
 
                if (UNIV_LIKELY(success)) {
2001
 
                        ibuf_merge_or_delete_for_page(block, space, offset,
2002
 
                                                      zip_size, TRUE);
2003
 
                }
2004
 
 
2005
 
                /* Unfix and unlatch the block. */
2006
 
                buf_pool_mutex_enter();
2007
 
                mutex_enter(&block->mutex);
2008
 
                buf_pool->n_pend_unzip--;
2009
 
                block->page.buf_fix_count--;
2010
 
                buf_block_set_io_fix(block, BUF_IO_NONE);
2011
 
                mutex_exit(&block->mutex);
2012
 
                rw_lock_x_unlock(&block->lock);
2013
 
 
2014
 
                if (UNIV_UNLIKELY(!success)) {
2015
 
 
2016
 
                        buf_pool_mutex_exit();
2017
 
                        return(NULL);
2018
 
                }
2019
 
 
2020
 
                break;
2021
 
 
2022
 
        case BUF_BLOCK_ZIP_FREE:
2023
 
        case BUF_BLOCK_NOT_USED:
2024
 
        case BUF_BLOCK_READY_FOR_USE:
2025
 
        case BUF_BLOCK_MEMORY:
2026
 
        case BUF_BLOCK_REMOVE_HASH:
2027
 
                ut_error;
2028
 
                break;
2029
 
        }
2030
 
 
2031
 
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2032
 
 
2033
 
        mutex_enter(&block->mutex);
2034
 
        UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
2035
 
 
2036
 
        buf_block_buf_fix_inc(block, file, line);
2037
 
        buf_pool_mutex_exit();
2038
 
 
2039
 
        /* Check if this is the first access to the page */
2040
 
 
2041
 
        accessed = buf_page_is_accessed(&block->page);
2042
 
 
2043
 
        buf_page_set_accessed(&block->page, TRUE);
2044
 
 
2045
 
        mutex_exit(&block->mutex);
2046
 
 
2047
 
        buf_block_make_young(&block->page);
2048
 
 
2049
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2050
 
        ut_a(!block->page.file_page_was_freed);
2051
 
#endif
2052
 
 
2053
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2054
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
2055
 
        ut_a(block->page.buf_fix_count > 0);
2056
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2057
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2058
 
 
2059
 
        if (mode == BUF_GET_NOWAIT) {
2060
 
                ibool   success;
2061
 
 
2062
 
                if (rw_latch == RW_S_LATCH) {
2063
 
                        success = rw_lock_s_lock_func_nowait(&(block->lock),
2064
 
                                                             file, line);
2065
 
                        fix_type = MTR_MEMO_PAGE_S_FIX;
2066
 
                } else {
2067
 
                        ut_ad(rw_latch == RW_X_LATCH);
2068
 
                        success = rw_lock_x_lock_func_nowait(&(block->lock),
2069
 
                                                             file, line);
2070
 
                        fix_type = MTR_MEMO_PAGE_X_FIX;
2071
 
                }
2072
 
 
2073
 
                if (!success) {
2074
 
                        mutex_enter(&block->mutex);
2075
 
                        buf_block_buf_fix_dec(block);
2076
 
                        mutex_exit(&block->mutex);
2077
 
 
2078
 
                        return(NULL);
2079
 
                }
2080
 
        } else if (rw_latch == RW_NO_LATCH) {
2081
 
 
2082
 
                if (must_read) {
2083
 
                        /* Let us wait until the read operation
2084
 
                        completes */
2085
 
 
2086
 
                        for (;;) {
2087
 
                                enum buf_io_fix io_fix;
2088
 
 
2089
 
                                mutex_enter(&block->mutex);
2090
 
                                io_fix = buf_block_get_io_fix(block);
2091
 
                                mutex_exit(&block->mutex);
2092
 
 
2093
 
                                if (io_fix == BUF_IO_READ) {
2094
 
 
2095
 
                                        os_thread_sleep(WAIT_FOR_READ);
2096
 
                                } else {
2097
 
                                        break;
2098
 
                                }
2099
 
                        }
2100
 
                }
2101
 
 
2102
 
                fix_type = MTR_MEMO_BUF_FIX;
2103
 
        } else if (rw_latch == RW_S_LATCH) {
2104
 
 
2105
 
                rw_lock_s_lock_func(&(block->lock), 0, file, line);
2106
 
 
2107
 
                fix_type = MTR_MEMO_PAGE_S_FIX;
2108
 
        } else {
2109
 
                rw_lock_x_lock_func(&(block->lock), 0, file, line);
2110
 
 
2111
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
2112
 
        }
2113
 
 
2114
 
        mtr_memo_push(mtr, block, fix_type);
2115
 
 
2116
 
        if (!accessed) {
2117
 
                /* In the case of a first access, try to apply linear
2118
 
                read-ahead */
2119
 
 
2120
 
                buf_read_ahead_linear(space, zip_size, offset);
2121
 
        }
2122
 
 
2123
 
#ifdef UNIV_IBUF_COUNT_DEBUG
2124
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
2125
 
                            buf_block_get_page_no(block)) == 0);
2126
 
#endif
2127
 
        return(block);
2128
 
}
2129
 
 
2130
 
/************************************************************************
2131
 
This is the general function used to get optimistic access to a database
2132
 
page. */
2133
 
UNIV_INTERN
2134
 
ibool
2135
 
buf_page_optimistic_get_func(
2136
 
/*=========================*/
2137
 
                                /* out: TRUE if success */
2138
 
        ulint           rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
2139
 
        buf_block_t*    block,  /* in: guessed buffer block */
2140
 
        ib_uint64_t     modify_clock,/* in: modify clock value if mode is
2141
 
                                ..._GUESS_ON_CLOCK */
2142
 
        const char*     file,   /* in: file name */
2143
 
        ulint           line,   /* in: line where called */
2144
 
        mtr_t*          mtr)    /* in: mini-transaction */
2145
 
{
2146
 
        ibool           accessed;
2147
 
        ibool           success;
2148
 
        ulint           fix_type;
2149
 
 
2150
 
        ut_ad(mtr && block);
2151
 
        ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
2152
 
 
2153
 
        mutex_enter(&block->mutex);
2154
 
 
2155
 
        if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
2156
 
 
2157
 
                mutex_exit(&block->mutex);
2158
 
 
2159
 
                return(FALSE);
2160
 
        }
2161
 
 
2162
 
        buf_block_buf_fix_inc(block, file, line);
2163
 
        accessed = buf_page_is_accessed(&block->page);
2164
 
        buf_page_set_accessed(&block->page, TRUE);
2165
 
 
2166
 
        mutex_exit(&block->mutex);
2167
 
 
2168
 
        buf_block_make_young(&block->page);
2169
 
 
2170
 
        /* Check if this is the first access to the page */
2171
 
 
2172
 
        ut_ad(!ibuf_inside()
2173
 
              || ibuf_page(buf_block_get_space(block),
2174
 
                           buf_block_get_zip_size(block),
2175
 
                           buf_block_get_page_no(block)));
2176
 
 
2177
 
        if (rw_latch == RW_S_LATCH) {
2178
 
                success = rw_lock_s_lock_func_nowait(&(block->lock),
2179
 
                                                     file, line);
2180
 
                fix_type = MTR_MEMO_PAGE_S_FIX;
2181
 
        } else {
2182
 
                success = rw_lock_x_lock_func_nowait(&(block->lock),
2183
 
                                                     file, line);
2184
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
2185
 
        }
2186
 
 
2187
 
        if (UNIV_UNLIKELY(!success)) {
2188
 
                mutex_enter(&block->mutex);
2189
 
                buf_block_buf_fix_dec(block);
2190
 
                mutex_exit(&block->mutex);
2191
 
 
2192
 
                return(FALSE);
2193
 
        }
2194
 
 
2195
 
        if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
2196
 
#ifdef UNIV_SYNC_DEBUG
2197
 
                buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
2198
 
#endif /* UNIV_SYNC_DEBUG */
2199
 
                if (rw_latch == RW_S_LATCH) {
2200
 
                        rw_lock_s_unlock(&(block->lock));
2201
 
                } else {
2202
 
                        rw_lock_x_unlock(&(block->lock));
2203
 
                }
2204
 
 
2205
 
                mutex_enter(&block->mutex);
2206
 
                buf_block_buf_fix_dec(block);
2207
 
                mutex_exit(&block->mutex);
2208
 
 
2209
 
                return(FALSE);
2210
 
        }
2211
 
 
2212
 
        mtr_memo_push(mtr, block, fix_type);
2213
 
 
2214
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2215
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
2216
 
        ut_a(block->page.buf_fix_count > 0);
2217
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2218
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2219
 
 
2220
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2221
 
        ut_a(block->page.file_page_was_freed == FALSE);
2222
 
#endif
2223
 
        if (UNIV_UNLIKELY(!accessed)) {
2224
 
                /* In the case of a first access, try to apply linear
2225
 
                read-ahead */
2226
 
 
2227
 
                buf_read_ahead_linear(buf_block_get_space(block),
2228
 
                                      buf_block_get_zip_size(block),
2229
 
                                      buf_block_get_page_no(block));
2230
 
        }
2231
 
 
2232
 
#ifdef UNIV_IBUF_COUNT_DEBUG
2233
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
2234
 
                            buf_block_get_page_no(block)) == 0);
2235
 
#endif
2236
 
        buf_pool->n_page_gets++;
2237
 
 
2238
 
        return(TRUE);
2239
 
}
2240
 
 
2241
 
/************************************************************************
2242
 
This is used to get access to a known database page, when no waiting can be
2243
 
done. For example, if a search in an adaptive hash index leads us to this
2244
 
frame. */
2245
 
UNIV_INTERN
2246
 
ibool
2247
 
buf_page_get_known_nowait(
2248
 
/*======================*/
2249
 
                                /* out: TRUE if success */
2250
 
        ulint           rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
2251
 
        buf_block_t*    block,  /* in: the known page */
2252
 
        ulint           mode,   /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
2253
 
        const char*     file,   /* in: file name */
2254
 
        ulint           line,   /* in: line where called */
2255
 
        mtr_t*          mtr)    /* in: mini-transaction */
2256
 
{
2257
 
        ibool           success;
2258
 
        ulint           fix_type;
2259
 
 
2260
 
        ut_ad(mtr);
2261
 
        ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
2262
 
 
2263
 
        mutex_enter(&block->mutex);
2264
 
 
2265
 
        if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
2266
 
                /* Another thread is just freeing the block from the LRU list
2267
 
                of the buffer pool: do not try to access this page; this
2268
 
                attempt to access the page can only come through the hash
2269
 
                index because when the buffer block state is ..._REMOVE_HASH,
2270
 
                we have already removed it from the page address hash table
2271
 
                of the buffer pool. */
2272
 
 
2273
 
                mutex_exit(&block->mutex);
2274
 
 
2275
 
                return(FALSE);
2276
 
        }
2277
 
 
2278
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2279
 
 
2280
 
        buf_block_buf_fix_inc(block, file, line);
2281
 
 
2282
 
        mutex_exit(&block->mutex);
2283
 
 
2284
 
        if (mode == BUF_MAKE_YOUNG) {
2285
 
                buf_block_make_young(&block->page);
2286
 
        }
2287
 
 
2288
 
        ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
2289
 
 
2290
 
        if (rw_latch == RW_S_LATCH) {
2291
 
                success = rw_lock_s_lock_func_nowait(&(block->lock),
2292
 
                                                     file, line);
2293
 
                fix_type = MTR_MEMO_PAGE_S_FIX;
2294
 
        } else {
2295
 
                success = rw_lock_x_lock_func_nowait(&(block->lock),
2296
 
                                                     file, line);
2297
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
2298
 
        }
2299
 
 
2300
 
        if (!success) {
2301
 
                mutex_enter(&block->mutex);
2302
 
                buf_block_buf_fix_dec(block);
2303
 
                mutex_exit(&block->mutex);
2304
 
 
2305
 
                return(FALSE);
2306
 
        }
2307
 
 
2308
 
        mtr_memo_push(mtr, block, fix_type);
2309
 
 
2310
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2311
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
2312
 
        ut_a(block->page.buf_fix_count > 0);
2313
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2314
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2315
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2316
 
        ut_a(block->page.file_page_was_freed == FALSE);
2317
 
#endif
2318
 
 
2319
 
#ifdef UNIV_IBUF_COUNT_DEBUG
2320
 
        ut_a((mode == BUF_KEEP_OLD)
2321
 
             || (ibuf_count_get(buf_block_get_space(block),
2322
 
                                buf_block_get_page_no(block)) == 0));
2323
 
#endif
2324
 
        buf_pool->n_page_gets++;
2325
 
 
2326
 
        return(TRUE);
2327
 
}
2328
 
 
2329
 
/***********************************************************************
2330
 
Given a tablespace id and page number tries to get that page. If the
2331
 
page is not in the buffer pool it is not loaded and NULL is returned.
2332
 
Suitable for using when holding the kernel mutex. */
2333
 
UNIV_INTERN
2334
 
const buf_block_t*
2335
 
buf_page_try_get_func(
2336
 
/*==================*/
2337
 
                                /* out: pointer to a page or NULL */
2338
 
        ulint           space_id,/* in: tablespace id */
2339
 
        ulint           page_no,/* in: page number */
2340
 
        const char*     file,   /* in: file name */
2341
 
        ulint           line,   /* in: line where called */
2342
 
        mtr_t*          mtr)    /* in: mini-transaction */
2343
 
{
2344
 
        buf_block_t*    block;
2345
 
        ibool           success;
2346
 
        ulint           fix_type;
2347
 
 
2348
 
        buf_pool_mutex_enter();
2349
 
        block = buf_block_hash_get(space_id, page_no);
2350
 
 
2351
 
        if (!block) {
2352
 
                buf_pool_mutex_exit();
2353
 
                return(NULL);
2354
 
        }
2355
 
 
2356
 
        mutex_enter(&block->mutex);
2357
 
        buf_pool_mutex_exit();
2358
 
 
2359
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2360
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2361
 
        ut_a(buf_block_get_space(block) == space_id);
2362
 
        ut_a(buf_block_get_page_no(block) == page_no);
2363
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2364
 
 
2365
 
        buf_block_buf_fix_inc(block, file, line);
2366
 
        mutex_exit(&block->mutex);
2367
 
 
2368
 
        fix_type = MTR_MEMO_PAGE_S_FIX;
2369
 
        success = rw_lock_s_lock_func_nowait(&block->lock, file, line);
2370
 
 
2371
 
        if (!success) {
2372
 
                /* Let us try to get an X-latch. If the current thread
2373
 
                is holding an X-latch on the page, we cannot get an
2374
 
                S-latch. */
2375
 
 
2376
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
2377
 
                success = rw_lock_x_lock_func_nowait(&block->lock,
2378
 
                                                     file, line);
2379
 
        }
2380
 
 
2381
 
        if (!success) {
2382
 
                mutex_enter(&block->mutex);
2383
 
                buf_block_buf_fix_dec(block);
2384
 
                mutex_exit(&block->mutex);
2385
 
 
2386
 
                return(NULL);
2387
 
        }
2388
 
 
2389
 
        mtr_memo_push(mtr, block, fix_type);
2390
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2391
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
2392
 
        ut_a(block->page.buf_fix_count > 0);
2393
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2394
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2395
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2396
 
        ut_a(block->page.file_page_was_freed == FALSE);
2397
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
2398
 
#ifdef UNIV_SYNC_DEBUG
2399
 
        buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
2400
 
#endif /* UNIV_SYNC_DEBUG */
2401
 
        buf_pool->n_page_gets++;
2402
 
 
2403
 
        return(block);
2404
 
}
2405
 
 
2406
 
/************************************************************************
2407
 
Initialize some fields of a control block. */
2408
 
UNIV_INLINE
2409
 
void
2410
 
buf_page_init_low(
2411
 
/*==============*/
2412
 
        buf_page_t*     bpage)  /* in: block to init */
2413
 
{
2414
 
        bpage->flush_type = BUF_FLUSH_LRU;
2415
 
        bpage->accessed = FALSE;
2416
 
        bpage->io_fix = BUF_IO_NONE;
2417
 
        bpage->buf_fix_count = 0;
2418
 
        bpage->freed_page_clock = 0;
2419
 
        bpage->newest_modification = 0;
2420
 
        bpage->oldest_modification = 0;
2421
 
        HASH_INVALIDATE(bpage, hash);
2422
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2423
 
        bpage->file_page_was_freed = FALSE;
2424
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
2425
 
}
2426
 
 
2427
 
#ifdef UNIV_HOTBACKUP
2428
 
/************************************************************************
2429
 
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
2430
 
UNIV_INTERN
2431
 
void
2432
 
buf_page_init_for_backup_restore(
2433
 
/*=============================*/
2434
 
        ulint           space,  /* in: space id */
2435
 
        ulint           offset, /* in: offset of the page within space
2436
 
                                in units of a page */
2437
 
        ulint           zip_size,/* in: compressed page size in bytes
2438
 
                                or 0 for uncompressed pages */
2439
 
        buf_block_t*    block)  /* in: block to init */
2440
 
{
2441
 
        buf_block_init_low(block);
2442
 
 
2443
 
        block->lock_hash_val    = 0;
2444
 
 
2445
 
        buf_page_init_low(&block->page);
2446
 
        block->page.state       = BUF_BLOCK_FILE_PAGE;
2447
 
        block->page.space       = space;
2448
 
        block->page.offset      = offset;
2449
 
 
2450
 
        page_zip_des_init(&block->page.zip);
2451
 
 
2452
 
        /* We assume that block->page.data has been allocated
2453
 
        with zip_size == UNIV_PAGE_SIZE. */
2454
 
        ut_ad(zip_size <= UNIV_PAGE_SIZE);
2455
 
        ut_ad(ut_is_2pow(zip_size));
2456
 
        page_zip_set_size(&block->page.zip, zip_size);
2457
 
}
2458
 
#endif /* UNIV_HOTBACKUP */
2459
 
 
2460
 
/************************************************************************
2461
 
Inits a page to the buffer buf_pool. */
2462
 
static
2463
 
void
2464
 
buf_page_init(
2465
 
/*==========*/
2466
 
        ulint           space,  /* in: space id */
2467
 
        ulint           offset, /* in: offset of the page within space
2468
 
                                in units of a page */
2469
 
        buf_block_t*    block)  /* in: block to init */
2470
 
{
2471
 
        buf_page_t*     hash_page;
2472
 
 
2473
 
        ut_ad(buf_pool_mutex_own());
2474
 
        ut_ad(mutex_own(&(block->mutex)));
2475
 
        ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
2476
 
 
2477
 
        /* Set the state of the block */
2478
 
        buf_block_set_file_page(block, space, offset);
2479
 
 
2480
 
#ifdef UNIV_DEBUG_VALGRIND
2481
 
        if (!space) {
2482
 
                /* Silence valid Valgrind warnings about uninitialized
2483
 
                data being written to data files.  There are some unused
2484
 
                bytes on some pages that InnoDB does not initialize. */
2485
 
                UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
2486
 
        }
2487
 
#endif /* UNIV_DEBUG_VALGRIND */
2488
 
 
2489
 
        buf_block_init_low(block);
2490
 
 
2491
 
        block->lock_hash_val    = lock_rec_hash(space, offset);
2492
 
 
2493
 
        /* Insert into the hash table of file pages */
2494
 
 
2495
 
        hash_page = buf_page_hash_get(space, offset);
2496
 
 
2497
 
        if (UNIV_LIKELY_NULL(hash_page)) {
2498
 
                fprintf(stderr,
2499
 
                        "InnoDB: Error: page %lu %lu already found"
2500
 
                        " in the hash table: %p, %p\n",
2501
 
                        (ulong) space,
2502
 
                        (ulong) offset,
2503
 
                        (const void*) hash_page, (const void*) block);
2504
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2505
 
                mutex_exit(&block->mutex);
2506
 
                buf_pool_mutex_exit();
2507
 
                buf_print();
2508
 
                buf_LRU_print();
2509
 
                buf_validate();
2510
 
                buf_LRU_validate();
2511
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2512
 
                ut_error;
2513
 
        }
2514
 
 
2515
 
        buf_page_init_low(&block->page);
2516
 
 
2517
 
        ut_ad(!block->page.in_zip_hash);
2518
 
        ut_ad(!block->page.in_page_hash);
2519
 
        ut_d(block->page.in_page_hash = TRUE);
2520
 
        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
2521
 
                    buf_page_address_fold(space, offset), &block->page);
2522
 
}
2523
 
 
2524
 
/************************************************************************
2525
 
Function which inits a page for read to the buffer buf_pool. If the page is
2526
 
(1) already in buf_pool, or
2527
 
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
2528
 
(3) if the space is deleted or being deleted,
2529
 
then this function does nothing.
2530
 
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
2531
 
on the buffer frame. The io-handler must take care that the flag is cleared
2532
 
and the lock released later. */
2533
 
UNIV_INTERN
2534
 
buf_page_t*
2535
 
buf_page_init_for_read(
2536
 
/*===================*/
2537
 
                                /* out: pointer to the block or NULL */
2538
 
        ulint*          err,    /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
2539
 
        ulint           mode,   /* in: BUF_READ_IBUF_PAGES_ONLY, ... */
2540
 
        ulint           space,  /* in: space id */
2541
 
        ulint           zip_size,/* in: compressed page size, or 0 */
2542
 
        ibool           unzip,  /* in: TRUE=request uncompressed page */
2543
 
        ib_int64_t      tablespace_version,/* in: prevents reading from a wrong
2544
 
                                version of the tablespace in case we have done
2545
 
                                DISCARD + IMPORT */
2546
 
        ulint           offset) /* in: page number */
2547
 
{
2548
 
        buf_block_t*    block;
2549
 
        buf_page_t*     bpage;
2550
 
        mtr_t           mtr;
2551
 
        ibool           lru     = FALSE;
2552
 
        void*           data;
2553
 
 
2554
 
        ut_ad(buf_pool);
2555
 
 
2556
 
        *err = DB_SUCCESS;
2557
 
 
2558
 
        if (mode == BUF_READ_IBUF_PAGES_ONLY) {
2559
 
                /* It is a read-ahead within an ibuf routine */
2560
 
 
2561
 
                ut_ad(!ibuf_bitmap_page(zip_size, offset));
2562
 
                ut_ad(ibuf_inside());
2563
 
 
2564
 
                mtr_start(&mtr);
2565
 
 
2566
 
                if (!ibuf_page_low(space, zip_size, offset, &mtr)) {
2567
 
 
2568
 
                        mtr_commit(&mtr);
2569
 
 
2570
 
                        return(NULL);
2571
 
                }
2572
 
        } else {
2573
 
                ut_ad(mode == BUF_READ_ANY_PAGE);
2574
 
        }
2575
 
 
2576
 
        if (zip_size && UNIV_LIKELY(!unzip)
2577
 
            && UNIV_LIKELY(!recv_recovery_is_on())) {
2578
 
                block = NULL;
2579
 
        } else {
2580
 
                block = buf_LRU_get_free_block(0);
2581
 
                ut_ad(block);
2582
 
        }
2583
 
 
2584
 
        buf_pool_mutex_enter();
2585
 
 
2586
 
        if (buf_page_hash_get(space, offset)) {
2587
 
                /* The page is already in the buffer pool. */
2588
 
err_exit:
2589
 
                if (block) {
2590
 
                        mutex_enter(&block->mutex);
2591
 
                        buf_LRU_block_free_non_file_page(block);
2592
 
                        mutex_exit(&block->mutex);
2593
 
                }
2594
 
 
2595
 
err_exit2:
2596
 
                buf_pool_mutex_exit();
2597
 
 
2598
 
                if (mode == BUF_READ_IBUF_PAGES_ONLY) {
2599
 
 
2600
 
                        mtr_commit(&mtr);
2601
 
                }
2602
 
 
2603
 
                return(NULL);
2604
 
        }
2605
 
 
2606
 
        if (fil_tablespace_deleted_or_being_deleted_in_mem(
2607
 
                    space, tablespace_version)) {
2608
 
                /* The page belongs to a space which has been
2609
 
                deleted or is being deleted. */
2610
 
                *err = DB_TABLESPACE_DELETED;
2611
 
 
2612
 
                goto err_exit;
2613
 
        }
2614
 
 
2615
 
        if (block) {
2616
 
                bpage = &block->page;
2617
 
                mutex_enter(&block->mutex);
2618
 
                buf_page_init(space, offset, block);
2619
 
 
2620
 
                /* The block must be put to the LRU list, to the old blocks */
2621
 
                buf_LRU_add_block(bpage, TRUE/* to old blocks */);
2622
 
 
2623
 
                /* We set a pass-type x-lock on the frame because then
2624
 
                the same thread which called for the read operation
2625
 
                (and is running now at this point of code) can wait
2626
 
                for the read to complete by waiting for the x-lock on
2627
 
                the frame; if the x-lock were recursive, the same
2628
 
                thread would illegally get the x-lock before the page
2629
 
                read is completed.  The x-lock is cleared by the
2630
 
                io-handler thread. */
2631
 
 
2632
 
                rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
2633
 
                buf_page_set_io_fix(bpage, BUF_IO_READ);
2634
 
 
2635
 
                if (UNIV_UNLIKELY(zip_size)) {
2636
 
                        page_zip_set_size(&block->page.zip, zip_size);
2637
 
 
2638
 
                        /* buf_pool_mutex may be released and
2639
 
                        reacquired by buf_buddy_alloc().  Thus, we
2640
 
                        must release block->mutex in order not to
2641
 
                        break the latching order in the reacquisition
2642
 
                        of buf_pool_mutex.  We also must defer this
2643
 
                        operation until after the block descriptor has
2644
 
                        been added to buf_pool->LRU and
2645
 
                        buf_pool->page_hash. */
2646
 
                        mutex_exit(&block->mutex);
2647
 
                        data = buf_buddy_alloc(zip_size, &lru);
2648
 
                        mutex_enter(&block->mutex);
2649
 
                        block->page.zip.data = data;
2650
 
 
2651
 
                        /* To maintain the invariant
2652
 
                        block->in_unzip_LRU_list
2653
 
                        == buf_page_belongs_to_unzip_LRU(&block->page)
2654
 
                        we have to add this block to unzip_LRU
2655
 
                        after block->page.zip.data is set. */
2656
 
                        ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
2657
 
                        buf_unzip_LRU_add_block(block, TRUE);
2658
 
                }
2659
 
 
2660
 
                mutex_exit(&block->mutex);
2661
 
        } else {
2662
 
                /* Defer buf_buddy_alloc() until after the block has
2663
 
                been found not to exist.  The buf_buddy_alloc() and
2664
 
                buf_buddy_free() calls may be expensive because of
2665
 
                buf_buddy_relocate(). */
2666
 
 
2667
 
                /* The compressed page must be allocated before the
2668
 
                control block (bpage), in order to avoid the
2669
 
                invocation of buf_buddy_relocate_block() on
2670
 
                uninitialized data. */
2671
 
                data = buf_buddy_alloc(zip_size, &lru);
2672
 
                bpage = buf_buddy_alloc(sizeof *bpage, &lru);
2673
 
 
2674
 
                /* If buf_buddy_alloc() allocated storage from the LRU list,
2675
 
                it released and reacquired buf_pool_mutex.  Thus, we must
2676
 
                check the page_hash again, as it may have been modified. */
2677
 
                if (UNIV_UNLIKELY(lru)
2678
 
                    && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
2679
 
 
2680
 
                        /* The block was added by some other thread. */
2681
 
                        buf_buddy_free(bpage, sizeof *bpage);
2682
 
                        buf_buddy_free(data, zip_size);
2683
 
                        goto err_exit2;
2684
 
                }
2685
 
 
2686
 
                page_zip_des_init(&bpage->zip);
2687
 
                page_zip_set_size(&bpage->zip, zip_size);
2688
 
                bpage->zip.data = data;
2689
 
 
2690
 
                mutex_enter(&buf_pool_zip_mutex);
2691
 
                UNIV_MEM_DESC(bpage->zip.data,
2692
 
                              page_zip_get_size(&bpage->zip), bpage);
2693
 
                buf_page_init_low(bpage);
2694
 
                bpage->state    = BUF_BLOCK_ZIP_PAGE;
2695
 
                bpage->space    = space;
2696
 
                bpage->offset   = offset;
2697
 
 
2698
 
#ifdef UNIV_DEBUG
2699
 
                bpage->in_page_hash = FALSE;
2700
 
                bpage->in_zip_hash = FALSE;
2701
 
                bpage->in_flush_list = FALSE;
2702
 
                bpage->in_free_list = FALSE;
2703
 
                bpage->in_LRU_list = FALSE;
2704
 
#endif /* UNIV_DEBUG */
2705
 
 
2706
 
                ut_d(bpage->in_page_hash = TRUE);
2707
 
                HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
2708
 
                            buf_page_address_fold(space, offset), bpage);
2709
 
 
2710
 
                /* The block must be put to the LRU list, to the old blocks */
2711
 
                buf_LRU_add_block(bpage, TRUE/* to old blocks */);
2712
 
                buf_LRU_insert_zip_clean(bpage);
2713
 
 
2714
 
                buf_page_set_io_fix(bpage, BUF_IO_READ);
2715
 
 
2716
 
                mutex_exit(&buf_pool_zip_mutex);
2717
 
        }
2718
 
 
2719
 
        buf_pool->n_pend_reads++;
2720
 
        buf_pool_mutex_exit();
2721
 
 
2722
 
        if (mode == BUF_READ_IBUF_PAGES_ONLY) {
2723
 
 
2724
 
                mtr_commit(&mtr);
2725
 
        }
2726
 
 
2727
 
        ut_ad(buf_page_in_file(bpage));
2728
 
        return(bpage);
2729
 
}
2730
 
 
2731
 
/************************************************************************
2732
 
Initializes a page to the buffer buf_pool. The page is usually not read
2733
 
from a file even if it cannot be found in the buffer buf_pool. This is one
2734
 
of the functions which perform to a block a state transition NOT_USED =>
2735
 
FILE_PAGE (the other is buf_page_get_gen). */
2736
 
UNIV_INTERN
2737
 
buf_block_t*
2738
 
buf_page_create(
2739
 
/*============*/
2740
 
                        /* out: pointer to the block, page bufferfixed */
2741
 
        ulint   space,  /* in: space id */
2742
 
        ulint   offset, /* in: offset of the page within space in units of
2743
 
                        a page */
2744
 
        ulint   zip_size,/* in: compressed page size, or 0 */
2745
 
        mtr_t*  mtr)    /* in: mini-transaction handle */
2746
 
{
2747
 
        buf_frame_t*    frame;
2748
 
        buf_block_t*    block;
2749
 
        buf_block_t*    free_block      = NULL;
2750
 
 
2751
 
        ut_ad(mtr);
2752
 
        ut_ad(space || !zip_size);
2753
 
 
2754
 
        free_block = buf_LRU_get_free_block(0);
2755
 
 
2756
 
        buf_pool_mutex_enter();
2757
 
 
2758
 
        block = (buf_block_t*) buf_page_hash_get(space, offset);
2759
 
 
2760
 
        if (block && buf_page_in_file(&block->page)) {
2761
 
#ifdef UNIV_IBUF_COUNT_DEBUG
2762
 
                ut_a(ibuf_count_get(space, offset) == 0);
2763
 
#endif
2764
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2765
 
                block->page.file_page_was_freed = FALSE;
2766
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
2767
 
 
2768
 
                /* Page can be found in buf_pool */
2769
 
                buf_pool_mutex_exit();
2770
 
 
2771
 
                buf_block_free(free_block);
2772
 
 
2773
 
                return(buf_page_get_with_no_latch(space, zip_size,
2774
 
                                                  offset, mtr));
2775
 
        }
2776
 
 
2777
 
        /* If we get here, the page was not in buf_pool: init it there */
2778
 
 
2779
 
#ifdef UNIV_DEBUG
2780
 
        if (buf_debug_prints) {
2781
 
                fprintf(stderr, "Creating space %lu page %lu to buffer\n",
2782
 
                        (ulong) space, (ulong) offset);
2783
 
        }
2784
 
#endif /* UNIV_DEBUG */
2785
 
 
2786
 
        block = free_block;
2787
 
 
2788
 
        mutex_enter(&block->mutex);
2789
 
 
2790
 
        buf_page_init(space, offset, block);
2791
 
 
2792
 
        /* The block must be put to the LRU list */
2793
 
        buf_LRU_add_block(&block->page, FALSE);
2794
 
 
2795
 
        buf_block_buf_fix_inc(block, __FILE__, __LINE__);
2796
 
        buf_pool->n_pages_created++;
2797
 
 
2798
 
        if (zip_size) {
2799
 
                void*   data;
2800
 
                ibool   lru;
2801
 
 
2802
 
                /* Prevent race conditions during buf_buddy_alloc(),
2803
 
                which may release and reacquire buf_pool_mutex,
2804
 
                by IO-fixing and X-latching the block. */
2805
 
 
2806
 
                buf_page_set_io_fix(&block->page, BUF_IO_READ);
2807
 
                rw_lock_x_lock(&block->lock);
2808
 
 
2809
 
                page_zip_set_size(&block->page.zip, zip_size);
2810
 
                mutex_exit(&block->mutex);
2811
 
                /* buf_pool_mutex may be released and reacquired by
2812
 
                buf_buddy_alloc().  Thus, we must release block->mutex
2813
 
                in order not to break the latching order in
2814
 
                the reacquisition of buf_pool_mutex.  We also must
2815
 
                defer this operation until after the block descriptor
2816
 
                has been added to buf_pool->LRU and buf_pool->page_hash. */
2817
 
                data = buf_buddy_alloc(zip_size, &lru);
2818
 
                mutex_enter(&block->mutex);
2819
 
                block->page.zip.data = data;
2820
 
 
2821
 
                /* To maintain the invariant
2822
 
                block->in_unzip_LRU_list
2823
 
                == buf_page_belongs_to_unzip_LRU(&block->page)
2824
 
                we have to add this block to unzip_LRU after
2825
 
                block->page.zip.data is set. */
2826
 
                ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
2827
 
                buf_unzip_LRU_add_block(block, FALSE);
2828
 
 
2829
 
                buf_page_set_io_fix(&block->page, BUF_IO_NONE);
2830
 
                rw_lock_x_unlock(&block->lock);
2831
 
        }
2832
 
 
2833
 
        buf_pool_mutex_exit();
2834
 
 
2835
 
        mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
2836
 
 
2837
 
        buf_page_set_accessed(&block->page, TRUE);
2838
 
 
2839
 
        mutex_exit(&block->mutex);
2840
 
 
2841
 
        /* Delete possible entries for the page from the insert buffer:
2842
 
        such can exist if the page belonged to an index which was dropped */
2843
 
 
2844
 
        ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
2845
 
 
2846
 
        /* Flush pages from the end of the LRU list if necessary */
2847
 
        buf_flush_free_margin();
2848
 
 
2849
 
        frame = block->frame;
2850
 
 
2851
 
        memset(frame + FIL_PAGE_PREV, 0xff, 4);
2852
 
        memset(frame + FIL_PAGE_NEXT, 0xff, 4);
2853
 
        mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
2854
 
 
2855
 
        /* Reset to zero the file flush lsn field in the page; if the first
2856
 
        page of an ibdata file is 'created' in this function into the buffer
2857
 
        pool then we lose the original contents of the file flush lsn stamp.
2858
 
        Then InnoDB could in a crash recovery print a big, false, corruption
2859
 
        warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
2860
 
 
2861
 
        memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
2862
 
 
2863
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2864
 
        ut_a(++buf_dbg_counter % 357 || buf_validate());
2865
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2866
 
#ifdef UNIV_IBUF_COUNT_DEBUG
2867
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
2868
 
                            buf_block_get_page_no(block)) == 0);
2869
 
#endif
2870
 
        return(block);
2871
 
}
2872
 
 
2873
 
/************************************************************************
2874
 
Completes an asynchronous read or write request of a file page to or from
2875
 
the buffer pool. */
2876
 
UNIV_INTERN
2877
 
void
2878
 
buf_page_io_complete(
2879
 
/*=================*/
2880
 
        buf_page_t*     bpage)  /* in: pointer to the block in question */
2881
 
{
2882
 
        enum buf_io_fix io_type;
2883
 
        const ibool     uncompressed = (buf_page_get_state(bpage)
2884
 
                                        == BUF_BLOCK_FILE_PAGE);
2885
 
 
2886
 
        ut_a(buf_page_in_file(bpage));
2887
 
 
2888
 
        /* We do not need protect io_fix here by mutex to read
2889
 
        it because this is the only function where we can change the value
2890
 
        from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
2891
 
        ensures that this is the only thread that handles the i/o for this
2892
 
        block. */
2893
 
 
2894
 
        io_type = buf_page_get_io_fix(bpage);
2895
 
        ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
2896
 
 
2897
 
        if (io_type == BUF_IO_READ) {
2898
 
                ulint   read_page_no;
2899
 
                ulint   read_space_id;
2900
 
                byte*   frame;
2901
 
 
2902
 
                if (buf_page_get_zip_size(bpage)) {
2903
 
                        frame = bpage->zip.data;
2904
 
                        buf_pool->n_pend_unzip++;
2905
 
                        if (uncompressed
2906
 
                            && !buf_zip_decompress((buf_block_t*) bpage,
2907
 
                                                   FALSE)) {
2908
 
 
2909
 
                                buf_pool->n_pend_unzip--;
2910
 
                                goto corrupt;
2911
 
                        }
2912
 
                        buf_pool->n_pend_unzip--;
2913
 
                } else {
2914
 
                        ut_a(uncompressed);
2915
 
                        frame = ((buf_block_t*) bpage)->frame;
2916
 
                }
2917
 
 
2918
 
                /* If this page is not uninitialized and not in the
2919
 
                doublewrite buffer, then the page number and space id
2920
 
                should be the same as in block. */
2921
 
                read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
2922
 
                read_space_id = mach_read_from_4(
2923
 
                        frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
2924
 
 
2925
 
                if (bpage->space == TRX_SYS_SPACE
2926
 
                    && trx_doublewrite_page_inside(bpage->offset)) {
2927
 
 
2928
 
                        ut_print_timestamp(stderr);
2929
 
                        fprintf(stderr,
2930
 
                                "  InnoDB: Error: reading page %lu\n"
2931
 
                                "InnoDB: which is in the"
2932
 
                                " doublewrite buffer!\n",
2933
 
                                (ulong) bpage->offset);
2934
 
                } else if (!read_space_id && !read_page_no) {
2935
 
                        /* This is likely an uninitialized page. */
2936
 
                } else if ((bpage->space
2937
 
                            && bpage->space != read_space_id)
2938
 
                           || bpage->offset != read_page_no) {
2939
 
                        /* We did not compare space_id to read_space_id
2940
 
                        if bpage->space == 0, because the field on the
2941
 
                        page may contain garbage in MySQL < 4.1.1,
2942
 
                        which only supported bpage->space == 0. */
2943
 
 
2944
 
                        ut_print_timestamp(stderr);
2945
 
                        fprintf(stderr,
2946
 
                                "  InnoDB: Error: space id and page n:o"
2947
 
                                " stored in the page\n"
2948
 
                                "InnoDB: read in are %lu:%lu,"
2949
 
                                " should be %lu:%lu!\n",
2950
 
                                (ulong) read_space_id, (ulong) read_page_no,
2951
 
                                (ulong) bpage->space,
2952
 
                                (ulong) bpage->offset);
2953
 
                }
2954
 
 
2955
 
                /* From version 3.23.38 up we store the page checksum
2956
 
                to the 4 first bytes of the page end lsn field */
2957
 
 
2958
 
                if (buf_page_is_corrupted(frame,
2959
 
                                          buf_page_get_zip_size(bpage))) {
2960
 
corrupt:
2961
 
                        fprintf(stderr,
2962
 
                                "InnoDB: Database page corruption on disk"
2963
 
                                " or a failed\n"
2964
 
                                "InnoDB: file read of page %lu.\n"
2965
 
                                "InnoDB: You may have to recover"
2966
 
                                " from a backup.\n",
2967
 
                                (ulong) bpage->offset);
2968
 
                        buf_page_print(frame, buf_page_get_zip_size(bpage));
2969
 
                        fprintf(stderr,
2970
 
                                "InnoDB: Database page corruption on disk"
2971
 
                                " or a failed\n"
2972
 
                                "InnoDB: file read of page %lu.\n"
2973
 
                                "InnoDB: You may have to recover"
2974
 
                                " from a backup.\n",
2975
 
                                (ulong) bpage->offset);
2976
 
                        fputs("InnoDB: It is also possible that"
2977
 
                              " your operating\n"
2978
 
                              "InnoDB: system has corrupted its"
2979
 
                              " own file cache\n"
2980
 
                              "InnoDB: and rebooting your computer"
2981
 
                              " removes the\n"
2982
 
                              "InnoDB: error.\n"
2983
 
                              "InnoDB: If the corrupt page is an index page\n"
2984
 
                              "InnoDB: you can also try to"
2985
 
                              " fix the corruption\n"
2986
 
                              "InnoDB: by dumping, dropping,"
2987
 
                              " and reimporting\n"
2988
 
                              "InnoDB: the corrupt table."
2989
 
                              " You can use CHECK\n"
2990
 
                              "InnoDB: TABLE to scan your"
2991
 
                              " table for corruption.\n"
2992
 
                              "InnoDB: See also"
2993
 
                              " http://dev.mysql.com/doc/refman/5.1/en/"
2994
 
                              "forcing-recovery.html\n"
2995
 
                              "InnoDB: about forcing recovery.\n", stderr);
2996
 
 
2997
 
                        if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
2998
 
                                fputs("InnoDB: Ending processing because of"
2999
 
                                      " a corrupt database page.\n",
3000
 
                                      stderr);
3001
 
                                exit(1);
3002
 
                        }
3003
 
                }
3004
 
 
3005
 
                if (recv_recovery_is_on()) {
3006
 
                        /* Pages must be uncompressed for crash recovery. */
3007
 
                        ut_a(uncompressed);
3008
 
                        recv_recover_page(FALSE, TRUE, (buf_block_t*) bpage);
3009
 
                }
3010
 
 
3011
 
                if (uncompressed && !recv_no_ibuf_operations) {
3012
 
                        ibuf_merge_or_delete_for_page(
3013
 
                                (buf_block_t*) bpage, bpage->space,
3014
 
                                bpage->offset, buf_page_get_zip_size(bpage),
3015
 
                                TRUE);
3016
 
                }
3017
 
        }
3018
 
 
3019
 
        buf_pool_mutex_enter();
3020
 
        mutex_enter(buf_page_get_mutex(bpage));
3021
 
 
3022
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3023
 
        if (io_type == BUF_IO_WRITE || uncompressed) {
3024
 
                /* For BUF_IO_READ of compressed-only blocks, the
3025
 
                buffered operations will be merged by buf_page_get_gen()
3026
 
                after the block has been uncompressed. */
3027
 
                ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
3028
 
        }
3029
 
#endif
3030
 
        /* Because this thread which does the unlocking is not the same that
3031
 
        did the locking, we use a pass value != 0 in unlock, which simply
3032
 
        removes the newest lock debug record, without checking the thread
3033
 
        id. */
3034
 
 
3035
 
        buf_page_set_io_fix(bpage, BUF_IO_NONE);
3036
 
 
3037
 
        switch (io_type) {
3038
 
        case BUF_IO_READ:
3039
 
                /* NOTE that the call to ibuf may have moved the ownership of
3040
 
                the x-latch to this OS thread: do not let this confuse you in
3041
 
                debugging! */
3042
 
 
3043
 
                ut_ad(buf_pool->n_pend_reads > 0);
3044
 
                buf_pool->n_pend_reads--;
3045
 
                buf_pool->n_pages_read++;
3046
 
 
3047
 
                if (uncompressed) {
3048
 
                        rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
3049
 
                                             BUF_IO_READ);
3050
 
                }
3051
 
 
3052
 
                break;
3053
 
 
3054
 
        case BUF_IO_WRITE:
3055
 
                /* Write means a flush operation: call the completion
3056
 
                routine in the flush system */
3057
 
 
3058
 
                buf_flush_write_complete(bpage);
3059
 
 
3060
 
                if (uncompressed) {
3061
 
                        rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
3062
 
                                             BUF_IO_WRITE);
3063
 
                }
3064
 
 
3065
 
                buf_pool->n_pages_written++;
3066
 
 
3067
 
                break;
3068
 
 
3069
 
        default:
3070
 
                ut_error;
3071
 
        }
3072
 
 
3073
 
        mutex_exit(buf_page_get_mutex(bpage));
3074
 
        buf_pool_mutex_exit();
3075
 
 
3076
 
#ifdef UNIV_DEBUG
3077
 
        if (buf_debug_prints) {
3078
 
                fprintf(stderr, "Has %s page space %lu page no %lu\n",
3079
 
                        io_type == BUF_IO_READ ? "read" : "written",
3080
 
                        (ulong) buf_page_get_space(bpage),
3081
 
                        (ulong) buf_page_get_page_no(bpage));
3082
 
        }
3083
 
#endif /* UNIV_DEBUG */
3084
 
}
3085
 
 
3086
 
/*************************************************************************
3087
 
Invalidates the file pages in the buffer pool when an archive recovery is
3088
 
completed. All the file pages buffered must be in a replaceable state when
3089
 
this function is called: not latched and not modified. */
3090
 
UNIV_INTERN
3091
 
void
3092
 
buf_pool_invalidate(void)
3093
 
/*=====================*/
3094
 
{
3095
 
        ibool   freed;
3096
 
 
3097
 
        ut_ad(buf_all_freed());
3098
 
 
3099
 
        freed = TRUE;
3100
 
 
3101
 
        while (freed) {
3102
 
                freed = buf_LRU_search_and_free_block(100);
3103
 
        }
3104
 
 
3105
 
        buf_pool_mutex_enter();
3106
 
 
3107
 
        ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
3108
 
        ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
3109
 
 
3110
 
        buf_pool_mutex_exit();
3111
 
}
3112
 
 
3113
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3114
 
/*************************************************************************
3115
 
Validates the buffer buf_pool data structure. */
3116
 
UNIV_INTERN
3117
 
ibool
3118
 
buf_validate(void)
3119
 
/*==============*/
3120
 
{
3121
 
        buf_page_t*     b;
3122
 
        buf_chunk_t*    chunk;
3123
 
        ulint           i;
3124
 
        ulint           n_single_flush  = 0;
3125
 
        ulint           n_lru_flush     = 0;
3126
 
        ulint           n_list_flush    = 0;
3127
 
        ulint           n_lru           = 0;
3128
 
        ulint           n_flush         = 0;
3129
 
        ulint           n_free          = 0;
3130
 
        ulint           n_zip           = 0;
3131
 
 
3132
 
        ut_ad(buf_pool);
3133
 
 
3134
 
        buf_pool_mutex_enter();
3135
 
 
3136
 
        chunk = buf_pool->chunks;
3137
 
 
3138
 
        /* Check the uncompressed blocks. */
3139
 
 
3140
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
3141
 
 
3142
 
                ulint           j;
3143
 
                buf_block_t*    block = chunk->blocks;
3144
 
 
3145
 
                for (j = chunk->size; j--; block++) {
3146
 
 
3147
 
                        mutex_enter(&block->mutex);
3148
 
 
3149
 
                        switch (buf_block_get_state(block)) {
3150
 
                        case BUF_BLOCK_ZIP_FREE:
3151
 
                        case BUF_BLOCK_ZIP_PAGE:
3152
 
                        case BUF_BLOCK_ZIP_DIRTY:
3153
 
                                /* These should only occur on
3154
 
                                zip_clean, zip_free[], or flush_list. */
3155
 
                                ut_error;
3156
 
                                break;
3157
 
 
3158
 
                        case BUF_BLOCK_FILE_PAGE:
3159
 
                                ut_a(buf_page_hash_get(buf_block_get_space(
3160
 
                                                               block),
3161
 
                                                       buf_block_get_page_no(
3162
 
                                                               block))
3163
 
                                     == &block->page);
3164
 
 
3165
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3166
 
                                ut_a(buf_page_get_io_fix(&block->page)
3167
 
                                     == BUF_IO_READ
3168
 
                                     || !ibuf_count_get(buf_block_get_space(
3169
 
                                                                block),
3170
 
                                                        buf_block_get_page_no(
3171
 
                                                                block)));
3172
 
#endif
3173
 
                                switch (buf_page_get_io_fix(&block->page)) {
3174
 
                                case BUF_IO_NONE:
3175
 
                                        break;
3176
 
 
3177
 
                                case BUF_IO_WRITE:
3178
 
                                        switch (buf_page_get_flush_type(
3179
 
                                                        &block->page)) {
3180
 
                                        case BUF_FLUSH_LRU:
3181
 
                                                n_lru_flush++;
3182
 
                                                ut_a(rw_lock_is_locked(
3183
 
                                                             &block->lock,
3184
 
                                                             RW_LOCK_SHARED));
3185
 
                                                break;
3186
 
                                        case BUF_FLUSH_LIST:
3187
 
                                                n_list_flush++;
3188
 
                                                break;
3189
 
                                        case BUF_FLUSH_SINGLE_PAGE:
3190
 
                                                n_single_flush++;
3191
 
                                                break;
3192
 
                                        default:
3193
 
                                                ut_error;
3194
 
                                        }
3195
 
 
3196
 
                                        break;
3197
 
 
3198
 
                                case BUF_IO_READ:
3199
 
 
3200
 
                                        ut_a(rw_lock_is_locked(&block->lock,
3201
 
                                                               RW_LOCK_EX));
3202
 
                                        break;
3203
 
                                }
3204
 
 
3205
 
                                n_lru++;
3206
 
 
3207
 
                                if (block->page.oldest_modification > 0) {
3208
 
                                        n_flush++;
3209
 
                                }
3210
 
 
3211
 
                                break;
3212
 
 
3213
 
                        case BUF_BLOCK_NOT_USED:
3214
 
                                n_free++;
3215
 
                                break;
3216
 
 
3217
 
                        case BUF_BLOCK_READY_FOR_USE:
3218
 
                        case BUF_BLOCK_MEMORY:
3219
 
                        case BUF_BLOCK_REMOVE_HASH:
3220
 
                                /* do nothing */
3221
 
                                break;
3222
 
                        }
3223
 
 
3224
 
                        mutex_exit(&block->mutex);
3225
 
                }
3226
 
        }
3227
 
 
3228
 
        mutex_enter(&buf_pool_zip_mutex);
3229
 
 
3230
 
        /* Check clean compressed-only blocks. */
3231
 
 
3232
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
3233
 
             b = UT_LIST_GET_NEXT(list, b)) {
3234
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
3235
 
                switch (buf_page_get_io_fix(b)) {
3236
 
                case BUF_IO_NONE:
3237
 
                        /* All clean blocks should be I/O-unfixed. */
3238
 
                        break;
3239
 
                case BUF_IO_READ:
3240
 
                        /* In buf_LRU_free_block(), we temporarily set
3241
 
                        b->io_fix = BUF_IO_READ for a newly allocated
3242
 
                        control block in order to prevent
3243
 
                        buf_page_get_gen() from decompressing the block. */
3244
 
                        break;
3245
 
                default:
3246
 
                        ut_error;
3247
 
                        break;
3248
 
                }
3249
 
                ut_a(!b->oldest_modification);
3250
 
                ut_a(buf_page_hash_get(b->space, b->offset) == b);
3251
 
 
3252
 
                n_lru++;
3253
 
                n_zip++;
3254
 
        }
3255
 
 
3256
 
        /* Check dirty compressed-only blocks. */
3257
 
 
3258
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
3259
 
             b = UT_LIST_GET_NEXT(list, b)) {
3260
 
                ut_ad(b->in_flush_list);
3261
 
 
3262
 
                switch (buf_page_get_state(b)) {
3263
 
                case BUF_BLOCK_ZIP_DIRTY:
3264
 
                        ut_a(b->oldest_modification);
3265
 
                        n_lru++;
3266
 
                        n_flush++;
3267
 
                        n_zip++;
3268
 
                        switch (buf_page_get_io_fix(b)) {
3269
 
                        case BUF_IO_NONE:
3270
 
                        case BUF_IO_READ:
3271
 
                                break;
3272
 
 
3273
 
                        case BUF_IO_WRITE:
3274
 
                                switch (buf_page_get_flush_type(b)) {
3275
 
                                case BUF_FLUSH_LRU:
3276
 
                                        n_lru_flush++;
3277
 
                                        break;
3278
 
                                case BUF_FLUSH_LIST:
3279
 
                                        n_list_flush++;
3280
 
                                        break;
3281
 
                                case BUF_FLUSH_SINGLE_PAGE:
3282
 
                                        n_single_flush++;
3283
 
                                        break;
3284
 
                                default:
3285
 
                                        ut_error;
3286
 
                                }
3287
 
                                break;
3288
 
                        }
3289
 
                        break;
3290
 
                case BUF_BLOCK_FILE_PAGE:
3291
 
                        /* uncompressed page */
3292
 
                        break;
3293
 
                case BUF_BLOCK_ZIP_FREE:
3294
 
                case BUF_BLOCK_ZIP_PAGE:
3295
 
                case BUF_BLOCK_NOT_USED:
3296
 
                case BUF_BLOCK_READY_FOR_USE:
3297
 
                case BUF_BLOCK_MEMORY:
3298
 
                case BUF_BLOCK_REMOVE_HASH:
3299
 
                        ut_error;
3300
 
                        break;
3301
 
                }
3302
 
                ut_a(buf_page_hash_get(b->space, b->offset) == b);
3303
 
        }
3304
 
 
3305
 
        mutex_exit(&buf_pool_zip_mutex);
3306
 
 
3307
 
        if (n_lru + n_free > buf_pool->curr_size + n_zip) {
3308
 
                fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
3309
 
                        (ulong) n_lru, (ulong) n_free,
3310
 
                        (ulong) buf_pool->curr_size, (ulong) n_zip);
3311
 
                ut_error;
3312
 
        }
3313
 
 
3314
 
        ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
3315
 
        if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
3316
 
                fprintf(stderr, "Free list len %lu, free blocks %lu\n",
3317
 
                        (ulong) UT_LIST_GET_LEN(buf_pool->free),
3318
 
                        (ulong) n_free);
3319
 
                ut_error;
3320
 
        }
3321
 
        ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
3322
 
 
3323
 
        ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
3324
 
        ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
3325
 
        ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
3326
 
 
3327
 
        buf_pool_mutex_exit();
3328
 
 
3329
 
        ut_a(buf_LRU_validate());
3330
 
        ut_a(buf_flush_validate());
3331
 
 
3332
 
        return(TRUE);
3333
 
}
3334
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3335
 
 
3336
 
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3337
 
/*************************************************************************
3338
 
Prints info of the buffer buf_pool data structure. */
3339
 
UNIV_INTERN
3340
 
void
3341
 
buf_print(void)
3342
 
/*===========*/
3343
 
{
3344
 
        dulint*         index_ids;
3345
 
        ulint*          counts;
3346
 
        ulint           size;
3347
 
        ulint           i;
3348
 
        ulint           j;
3349
 
        dulint          id;
3350
 
        ulint           n_found;
3351
 
        buf_chunk_t*    chunk;
3352
 
        dict_index_t*   index;
3353
 
 
3354
 
        ut_ad(buf_pool);
3355
 
 
3356
 
        size = buf_pool->curr_size;
3357
 
 
3358
 
        index_ids = mem_alloc(sizeof(dulint) * size);
3359
 
        counts = mem_alloc(sizeof(ulint) * size);
3360
 
 
3361
 
        buf_pool_mutex_enter();
3362
 
 
3363
 
        fprintf(stderr,
3364
 
                "buf_pool size %lu\n"
3365
 
                "database pages %lu\n"
3366
 
                "free pages %lu\n"
3367
 
                "modified database pages %lu\n"
3368
 
                "n pending decompressions %lu\n"
3369
 
                "n pending reads %lu\n"
3370
 
                "n pending flush LRU %lu list %lu single page %lu\n"
3371
 
                "pages read %lu, created %lu, written %lu\n",
3372
 
                (ulong) size,
3373
 
                (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
3374
 
                (ulong) UT_LIST_GET_LEN(buf_pool->free),
3375
 
                (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
3376
 
                (ulong) buf_pool->n_pend_unzip,
3377
 
                (ulong) buf_pool->n_pend_reads,
3378
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
3379
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
3380
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
3381
 
                (ulong) buf_pool->n_pages_read, buf_pool->n_pages_created,
3382
 
                (ulong) buf_pool->n_pages_written);
3383
 
 
3384
 
        /* Count the number of blocks belonging to each index in the buffer */
3385
 
 
3386
 
        n_found = 0;
3387
 
 
3388
 
        chunk = buf_pool->chunks;
3389
 
 
3390
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
3391
 
                buf_block_t*    block           = chunk->blocks;
3392
 
                ulint           n_blocks        = chunk->size;
3393
 
 
3394
 
                for (; n_blocks--; block++) {
3395
 
                        const buf_frame_t* frame = block->frame;
3396
 
 
3397
 
                        if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
3398
 
 
3399
 
                                id = btr_page_get_index_id(frame);
3400
 
 
3401
 
                                /* Look for the id in the index_ids array */
3402
 
                                j = 0;
3403
 
 
3404
 
                                while (j < n_found) {
3405
 
 
3406
 
                                        if (ut_dulint_cmp(index_ids[j],
3407
 
                                                          id) == 0) {
3408
 
                                                counts[j]++;
3409
 
 
3410
 
                                                break;
3411
 
                                        }
3412
 
                                        j++;
3413
 
                                }
3414
 
 
3415
 
                                if (j == n_found) {
3416
 
                                        n_found++;
3417
 
                                        index_ids[j] = id;
3418
 
                                        counts[j] = 1;
3419
 
                                }
3420
 
                        }
3421
 
                }
3422
 
        }
3423
 
 
3424
 
        buf_pool_mutex_exit();
3425
 
 
3426
 
        for (i = 0; i < n_found; i++) {
3427
 
                index = dict_index_get_if_in_cache(index_ids[i]);
3428
 
 
3429
 
                fprintf(stderr,
3430
 
                        "Block count for index %lu in buffer is about %lu",
3431
 
                        (ulong) ut_dulint_get_low(index_ids[i]),
3432
 
                        (ulong) counts[i]);
3433
 
 
3434
 
                if (index) {
3435
 
                        putc(' ', stderr);
3436
 
                        dict_index_name_print(stderr, NULL, index);
3437
 
                }
3438
 
 
3439
 
                putc('\n', stderr);
3440
 
        }
3441
 
 
3442
 
        mem_free(index_ids);
3443
 
        mem_free(counts);
3444
 
 
3445
 
        ut_a(buf_validate());
3446
 
}
3447
 
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
3448
 
 
3449
 
/*************************************************************************
3450
 
Returns the number of latched pages in the buffer pool. */
3451
 
UNIV_INTERN
3452
 
ulint
3453
 
buf_get_latched_pages_number(void)
3454
 
/*==============================*/
3455
 
{
3456
 
        buf_chunk_t*    chunk;
3457
 
        buf_page_t*     b;
3458
 
        ulint           i;
3459
 
        ulint           fixed_pages_number = 0;
3460
 
 
3461
 
        buf_pool_mutex_enter();
3462
 
 
3463
 
        chunk = buf_pool->chunks;
3464
 
 
3465
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
3466
 
                buf_block_t*    block;
3467
 
                ulint           j;
3468
 
 
3469
 
                block = chunk->blocks;
3470
 
 
3471
 
                for (j = chunk->size; j--; block++) {
3472
 
                        if (buf_block_get_state(block)
3473
 
                            != BUF_BLOCK_FILE_PAGE) {
3474
 
 
3475
 
                                continue;
3476
 
                        }
3477
 
 
3478
 
                        mutex_enter(&block->mutex);
3479
 
 
3480
 
                        if (block->page.buf_fix_count != 0
3481
 
                            || buf_page_get_io_fix(&block->page)
3482
 
                            != BUF_IO_NONE) {
3483
 
                                fixed_pages_number++;
3484
 
                        }
3485
 
 
3486
 
                        mutex_exit(&block->mutex);
3487
 
                }
3488
 
        }
3489
 
 
3490
 
        mutex_enter(&buf_pool_zip_mutex);
3491
 
 
3492
 
        /* Traverse the lists of clean and dirty compressed-only blocks. */
3493
 
 
3494
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
3495
 
             b = UT_LIST_GET_NEXT(list, b)) {
3496
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
3497
 
                ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
3498
 
 
3499
 
                if (b->buf_fix_count != 0
3500
 
                    || buf_page_get_io_fix(b) != BUF_IO_NONE) {
3501
 
                        fixed_pages_number++;
3502
 
                }
3503
 
        }
3504
 
 
3505
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
3506
 
             b = UT_LIST_GET_NEXT(list, b)) {
3507
 
                ut_ad(b->in_flush_list);
3508
 
 
3509
 
                switch (buf_page_get_state(b)) {
3510
 
                case BUF_BLOCK_ZIP_DIRTY:
3511
 
                        if (b->buf_fix_count != 0
3512
 
                            || buf_page_get_io_fix(b) != BUF_IO_NONE) {
3513
 
                                fixed_pages_number++;
3514
 
                        }
3515
 
                        break;
3516
 
                case BUF_BLOCK_FILE_PAGE:
3517
 
                        /* uncompressed page */
3518
 
                        break;
3519
 
                case BUF_BLOCK_ZIP_FREE:
3520
 
                case BUF_BLOCK_ZIP_PAGE:
3521
 
                case BUF_BLOCK_NOT_USED:
3522
 
                case BUF_BLOCK_READY_FOR_USE:
3523
 
                case BUF_BLOCK_MEMORY:
3524
 
                case BUF_BLOCK_REMOVE_HASH:
3525
 
                        ut_error;
3526
 
                        break;
3527
 
                }
3528
 
        }
3529
 
 
3530
 
        mutex_exit(&buf_pool_zip_mutex);
3531
 
        buf_pool_mutex_exit();
3532
 
 
3533
 
        return(fixed_pages_number);
3534
 
}
3535
 
 
3536
 
/*************************************************************************
3537
 
Returns the number of pending buf pool ios. */
3538
 
UNIV_INTERN
3539
 
ulint
3540
 
buf_get_n_pending_ios(void)
3541
 
/*=======================*/
3542
 
{
3543
 
        return(buf_pool->n_pend_reads
3544
 
               + buf_pool->n_flush[BUF_FLUSH_LRU]
3545
 
               + buf_pool->n_flush[BUF_FLUSH_LIST]
3546
 
               + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
3547
 
}
3548
 
 
3549
 
/*************************************************************************
3550
 
Returns the ratio in percents of modified pages in the buffer pool /
3551
 
database pages in the buffer pool. */
3552
 
UNIV_INTERN
3553
 
ulint
3554
 
buf_get_modified_ratio_pct(void)
3555
 
/*============================*/
3556
 
{
3557
 
        ulint   ratio;
3558
 
 
3559
 
        buf_pool_mutex_enter();
3560
 
 
3561
 
        ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
3562
 
                / (1 + UT_LIST_GET_LEN(buf_pool->LRU)
3563
 
                   + UT_LIST_GET_LEN(buf_pool->free));
3564
 
 
3565
 
        /* 1 + is there to avoid division by zero */
3566
 
 
3567
 
        buf_pool_mutex_exit();
3568
 
 
3569
 
        return(ratio);
3570
 
}
3571
 
 
3572
 
/*************************************************************************
3573
 
Prints info of the buffer i/o. */
3574
 
UNIV_INTERN
3575
 
void
3576
 
buf_print_io(
3577
 
/*=========*/
3578
 
        FILE*   file)   /* in/out: buffer where to print */
3579
 
{
3580
 
        time_t  current_time;
3581
 
        double  time_elapsed;
3582
 
        ulint   size;
3583
 
 
3584
 
        ut_ad(buf_pool);
3585
 
        size = buf_pool->curr_size;
3586
 
 
3587
 
        buf_pool_mutex_enter();
3588
 
 
3589
 
        fprintf(file,
3590
 
                "Buffer pool size   %lu\n"
3591
 
                "Free buffers       %lu\n"
3592
 
                "Database pages     %lu\n"
3593
 
                "Modified db pages  %lu\n"
3594
 
                "Pending reads %lu\n"
3595
 
                "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
3596
 
                (ulong) size,
3597
 
                (ulong) UT_LIST_GET_LEN(buf_pool->free),
3598
 
                (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
3599
 
                (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
3600
 
                (ulong) buf_pool->n_pend_reads,
3601
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
3602
 
                + buf_pool->init_flush[BUF_FLUSH_LRU],
3603
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
3604
 
                + buf_pool->init_flush[BUF_FLUSH_LIST],
3605
 
                (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
3606
 
 
3607
 
        current_time = time(NULL);
3608
 
        time_elapsed = 0.001 + difftime(current_time,
3609
 
                                        buf_pool->last_printout_time);
3610
 
        buf_pool->last_printout_time = current_time;
3611
 
 
3612
 
        fprintf(file,
3613
 
                "Pages read %lu, created %lu, written %lu\n"
3614
 
                "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
3615
 
                (ulong) buf_pool->n_pages_read,
3616
 
                (ulong) buf_pool->n_pages_created,
3617
 
                (ulong) buf_pool->n_pages_written,
3618
 
                (buf_pool->n_pages_read - buf_pool->n_pages_read_old)
3619
 
                / time_elapsed,
3620
 
                (buf_pool->n_pages_created - buf_pool->n_pages_created_old)
3621
 
                / time_elapsed,
3622
 
                (buf_pool->n_pages_written - buf_pool->n_pages_written_old)
3623
 
                / time_elapsed);
3624
 
 
3625
 
        if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
3626
 
                fprintf(file, "Buffer pool hit rate %lu / 1000\n",
3627
 
                        (ulong)
3628
 
                        (1000 - ((1000 * (buf_pool->n_pages_read
3629
 
                                          - buf_pool->n_pages_read_old))
3630
 
                                 / (buf_pool->n_page_gets
3631
 
                                    - buf_pool->n_page_gets_old))));
3632
 
        } else {
3633
 
                fputs("No buffer pool page gets since the last printout\n",
3634
 
                      file);
3635
 
        }
3636
 
 
3637
 
        buf_pool->n_page_gets_old = buf_pool->n_page_gets;
3638
 
        buf_pool->n_pages_read_old = buf_pool->n_pages_read;
3639
 
        buf_pool->n_pages_created_old = buf_pool->n_pages_created;
3640
 
        buf_pool->n_pages_written_old = buf_pool->n_pages_written;
3641
 
 
3642
 
        /* Print some values to help us with visualizing what is
3643
 
        happening with LRU eviction. */
3644
 
        fprintf(file,
3645
 
                "LRU len: %lu, unzip_LRU len: %lu\n"
3646
 
                "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
3647
 
                UT_LIST_GET_LEN(buf_pool->LRU),
3648
 
                UT_LIST_GET_LEN(buf_pool->unzip_LRU),
3649
 
                buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
3650
 
                buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
3651
 
 
3652
 
        buf_pool_mutex_exit();
3653
 
}
3654
 
 
3655
 
/**************************************************************************
3656
 
Refreshes the statistics used to print per-second averages. */
3657
 
UNIV_INTERN
3658
 
void
3659
 
buf_refresh_io_stats(void)
3660
 
/*======================*/
3661
 
{
3662
 
        buf_pool->last_printout_time = time(NULL);
3663
 
        buf_pool->n_page_gets_old = buf_pool->n_page_gets;
3664
 
        buf_pool->n_pages_read_old = buf_pool->n_pages_read;
3665
 
        buf_pool->n_pages_created_old = buf_pool->n_pages_created;
3666
 
        buf_pool->n_pages_written_old = buf_pool->n_pages_written;
3667
 
}
3668
 
 
3669
 
/*************************************************************************
3670
 
Checks that all file pages in the buffer are in a replaceable state. */
3671
 
UNIV_INTERN
3672
 
ibool
3673
 
buf_all_freed(void)
3674
 
/*===============*/
3675
 
{
3676
 
        buf_chunk_t*    chunk;
3677
 
        ulint           i;
3678
 
 
3679
 
        ut_ad(buf_pool);
3680
 
 
3681
 
        buf_pool_mutex_enter();
3682
 
 
3683
 
        chunk = buf_pool->chunks;
3684
 
 
3685
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
3686
 
 
3687
 
                const buf_block_t* block = buf_chunk_not_freed(chunk);
3688
 
 
3689
 
                if (UNIV_LIKELY_NULL(block)) {
3690
 
                        fprintf(stderr,
3691
 
                                "Page %lu %lu still fixed or dirty\n",
3692
 
                                (ulong) block->page.space,
3693
 
                                (ulong) block->page.offset);
3694
 
                        ut_error;
3695
 
                }
3696
 
        }
3697
 
 
3698
 
        buf_pool_mutex_exit();
3699
 
 
3700
 
        return(TRUE);
3701
 
}
3702
 
 
3703
 
/*************************************************************************
3704
 
Checks that there currently are no pending i/o-operations for the buffer
3705
 
pool. */
3706
 
UNIV_INTERN
3707
 
ibool
3708
 
buf_pool_check_no_pending_io(void)
3709
 
/*==============================*/
3710
 
                                /* out: TRUE if there is no pending i/o */
3711
 
{
3712
 
        ibool   ret;
3713
 
 
3714
 
        buf_pool_mutex_enter();
3715
 
 
3716
 
        if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
3717
 
            + buf_pool->n_flush[BUF_FLUSH_LIST]
3718
 
            + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
3719
 
                ret = FALSE;
3720
 
        } else {
3721
 
                ret = TRUE;
3722
 
        }
3723
 
 
3724
 
        buf_pool_mutex_exit();
3725
 
 
3726
 
        return(ret);
3727
 
}
3728
 
 
3729
 
/*************************************************************************
3730
 
Gets the current length of the free list of buffer blocks. */
3731
 
UNIV_INTERN
3732
 
ulint
3733
 
buf_get_free_list_len(void)
3734
 
/*=======================*/
3735
 
{
3736
 
        ulint   len;
3737
 
 
3738
 
        buf_pool_mutex_enter();
3739
 
 
3740
 
        len = UT_LIST_GET_LEN(buf_pool->free);
3741
 
 
3742
 
        buf_pool_mutex_exit();
3743
 
 
3744
 
        return(len);
3745
 
}