~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/buf/buf0buf.c

  • Committer: Brian Aker
  • Date: 2008-11-04 15:39:09 UTC
  • mfrom: (575.1.2 devel)
  • Revision ID: brian@tangent.org-20081104153909-c72hn65udxs1ccal
Merge of Monty's work

Show diffs side-by-side

added added

removed removed

Lines of Context:
26
26
#include "buf0buf.ic"
27
27
#endif
28
28
 
 
29
#include "buf0buddy.h"
29
30
#include "mem0mem.h"
30
31
#include "btr0btr.h"
31
32
#include "fil0fil.h"
37
38
#include "log0log.h"
38
39
#include "trx0undo.h"
39
40
#include "srv0srv.h"
 
41
#include "page0zip.h"
40
42
 
41
43
/*
42
44
                IMPLEMENTATION OF THE BUFFER POOL
126
128
                Lists of blocks
127
129
                ---------------
128
130
 
129
 
There are several lists of control blocks. The free list contains
130
 
blocks which are currently not used.
131
 
 
132
 
The LRU-list contains all the blocks holding a file page
 
131
There are several lists of control blocks.
 
132
 
 
133
The free list (buf_pool->free) contains blocks which are currently not
 
134
used.
 
135
 
 
136
The common LRU list contains all the blocks holding a file page
133
137
except those for which the bufferfix count is non-zero.
134
138
The pages are in the LRU list roughly in the order of the last
135
139
access to the page, so that the oldest pages are at the end of the
144
148
of the LRU list, we make sure that most of the buf_pool stays in the
145
149
main memory, undisturbed.
146
150
 
147
 
The chain of modified blocks contains the blocks
 
151
The unzip_LRU list contains a subset of the common LRU list.  The
 
152
blocks on the unzip_LRU list hold a compressed file page and the
 
153
corresponding uncompressed page frame.  A block is in unzip_LRU if and
 
154
only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
 
155
holds.  The blocks in unzip_LRU will be in same order as they are in
 
156
the common LRU list.  That is, each manipulation of the common LRU
 
157
list will result in the same manipulation of the unzip_LRU list.
 
158
 
 
159
The chain of modified blocks (buf_pool->flush_list) contains the blocks
148
160
holding file pages that have been modified in the memory
149
161
but not written to disk yet. The block with the oldest modification
150
162
which has not yet been written to disk is at the end of the chain.
151
163
 
 
164
The chain of unmodified compressed blocks (buf_pool->zip_clean)
 
165
contains the control blocks (buf_page_t) of those compressed pages
 
166
that are not in buf_pool->flush_list and for which no uncompressed
 
167
page has been allocated in the buffer pool.  The control blocks for
 
168
uncompressed pages are accessible via buf_block_t objects that are
 
169
reachable via buf_pool->chunks[].
 
170
 
 
171
The chains of free memory blocks (buf_pool->zip_free[]) are used by
 
172
the buddy allocator (buf0buddy.c) to keep track of currently unused
 
173
memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2.  These
 
174
blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
 
175
BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
 
176
pool.  The buddy allocator is solely used for allocating control
 
177
blocks for compressed pages (buf_page_t) and compressed page frames.
 
178
 
152
179
                Loading a file page
153
180
                -------------------
154
181
 
198
225
in a tablespace) have recently been referenced, we may predict
199
226
that the whole area may be needed in the near future, and issue
200
227
the read requests for the whole area.
201
 
 
202
 
                AWE implementation
203
 
                ------------------
204
 
 
205
 
By a 'block' we mean the buffer header of type buf_block_t. By a 'page'
206
 
we mean the physical 16 kB memory area allocated from RAM for that block.
207
 
By a 'frame' we mean a 16 kB area in the virtual address space of the
208
 
process, in the frame_mem of buf_pool.
209
 
 
210
 
We can map pages to the frames of the buffer pool.
211
 
 
212
 
1) A buffer block allocated to use as a non-data page, e.g., to the lock
213
 
table, is always mapped to a frame.
214
 
2) A bufferfixed or io-fixed data page is always mapped to a frame.
215
 
3) When we need to map a block to frame, we look from the list
216
 
awe_LRU_free_mapped and try to unmap its last block, but note that
217
 
bufferfixed or io-fixed pages cannot be unmapped.
218
 
4) For every frame in the buffer pool there is always a block whose page is
219
 
mapped to it. When we create the buffer pool, we map the first elements
220
 
in the free list to the frames.
221
 
5) When we have AWE enabled, we disable adaptive hash indexes.
222
228
*/
223
229
 
224
230
/* Value in microseconds */
225
 
static const int WAIT_FOR_READ  = 20000;
226
 
 
227
 
buf_pool_t*     buf_pool = NULL; /* The buffer buf_pool of the database */
228
 
 
229
 
#ifdef UNIV_DEBUG
230
 
ulint           buf_dbg_counter = 0; /* This is used to insert validation
 
231
static const int WAIT_FOR_READ  = 5000;
 
232
 
 
233
/* The buffer buf_pool of the database */
 
234
UNIV_INTERN buf_pool_t* buf_pool = NULL;
 
235
 
 
236
/* mutex protecting the buffer pool struct and control blocks, except the
 
237
read-write lock in them */
 
238
UNIV_INTERN mutex_t             buf_pool_mutex;
 
239
/* mutex protecting the control blocks of compressed-only pages
 
240
(of type buf_page_t, not buf_block_t) */
 
241
UNIV_INTERN mutex_t             buf_pool_zip_mutex;
 
242
 
 
243
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
244
static ulint    buf_dbg_counter = 0; /* This is used to insert validation
231
245
                                        operations in excution in the
232
246
                                        debug version */
233
 
ibool           buf_debug_prints = FALSE; /* If this is set TRUE,
234
 
                                        the program prints info whenever
235
 
                                        read-ahead or flush occurs */
 
247
/** Flag to forbid the release of the buffer pool mutex.
 
248
Protected by buf_pool->mutex. */
 
249
UNIV_INTERN ulint               buf_pool_mutex_exit_forbidden = 0;
 
250
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
251
#ifdef UNIV_DEBUG
 
252
/* If this is set TRUE, the program prints info whenever
 
253
read-ahead or flush occurs */
 
254
UNIV_INTERN ibool               buf_debug_prints = FALSE;
236
255
#endif /* UNIV_DEBUG */
 
256
 
 
257
/* A chunk of buffers.  The buffer pool is allocated in chunks. */
 
258
struct buf_chunk_struct{
 
259
        ulint           mem_size;       /* allocated size of the chunk */
 
260
        ulint           size;           /* size of frames[] and blocks[] */
 
261
        void*           mem;            /* pointer to the memory area which
 
262
                                        was allocated for the frames */
 
263
        buf_block_t*    blocks;         /* array of buffer control blocks */
 
264
};
 
265
 
237
266
/************************************************************************
238
267
Calculates a page checksum which is stored to the page when it is written
239
268
to a file. Note that we must be careful to calculate the same value on
240
269
32-bit and 64-bit architectures. */
241
 
 
 
270
UNIV_INTERN
242
271
ulint
243
272
buf_calc_page_new_checksum(
244
273
/*=======================*/
245
 
                        /* out: checksum */
246
 
        byte*    page)  /* in: buffer page */
 
274
                                /* out: checksum */
 
275
        const byte*     page)   /* in: buffer page */
247
276
{
248
277
        ulint checksum;
249
278
 
272
301
NOTE: we must first store the new formula checksum to
273
302
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
274
303
because this takes that field as an input! */
275
 
 
 
304
UNIV_INTERN
276
305
ulint
277
306
buf_calc_page_old_checksum(
278
307
/*=======================*/
279
 
                        /* out: checksum */
280
 
        byte*    page)  /* in: buffer page */
 
308
                                /* out: checksum */
 
309
        const byte*     page)   /* in: buffer page */
281
310
{
282
311
        ulint checksum;
283
312
 
290
319
 
291
320
/************************************************************************
292
321
Checks if a page is corrupt. */
293
 
 
 
322
UNIV_INTERN
294
323
ibool
295
324
buf_page_is_corrupted(
296
325
/*==================*/
297
 
                                /* out: TRUE if corrupted */
298
 
        byte*   read_buf)       /* in: a database page */
 
326
                                        /* out: TRUE if corrupted */
 
327
        const byte*     read_buf,       /* in: a database page */
 
328
        ulint           zip_size)       /* in: size of compressed page;
 
329
                                        0 for uncompressed pages */
299
330
{
300
 
        ulint   checksum;
301
 
        ulint   old_checksum;
302
 
        ulint   checksum_field;
303
 
        ulint   old_checksum_field;
 
331
        ulint           checksum_field;
 
332
        ulint           old_checksum_field;
304
333
#ifndef UNIV_HOTBACKUP
305
 
        dulint  current_lsn;
 
334
        ib_uint64_t     current_lsn;
306
335
#endif
307
 
        if (mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
308
 
            != mach_read_from_4(read_buf + UNIV_PAGE_SIZE
309
 
                                - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
 
336
        if (UNIV_LIKELY(!zip_size)
 
337
            && memcmp(read_buf + FIL_PAGE_LSN + 4,
 
338
                      read_buf + UNIV_PAGE_SIZE
 
339
                      - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
310
340
 
311
341
                /* Stored log sequence numbers at the start and the end
312
342
                of page do not match */
316
346
 
317
347
#ifndef UNIV_HOTBACKUP
318
348
        if (recv_lsn_checks_on && log_peek_lsn(&current_lsn)) {
319
 
                if (ut_dulint_cmp(current_lsn,
320
 
                                  mach_read_from_8(read_buf + FIL_PAGE_LSN))
321
 
                    < 0) {
 
349
                if (current_lsn < mach_read_ull(read_buf + FIL_PAGE_LSN)) {
322
350
                        ut_print_timestamp(stderr);
323
351
 
324
352
                        fprintf(stderr,
325
353
                                "  InnoDB: Error: page %lu log sequence number"
326
 
                                " %lu %lu\n"
 
354
                                " %"PRIu64"\n"
327
355
                                "InnoDB: is in the future! Current system "
328
 
                                "log sequence number %lu %lu.\n"
 
356
                                "log sequence number %"PRIu64".\n"
329
357
                                "InnoDB: Your database may be corrupt or "
330
358
                                "you may have copied the InnoDB\n"
331
359
                                "InnoDB: tablespace but not the InnoDB "
335
363
                                "InnoDB: for more information.\n",
336
364
                                (ulong) mach_read_from_4(read_buf
337
365
                                                         + FIL_PAGE_OFFSET),
338
 
                                (ulong) ut_dulint_get_high
339
 
                                (mach_read_from_8(read_buf + FIL_PAGE_LSN)),
340
 
                                (ulong) ut_dulint_get_low
341
 
                                (mach_read_from_8(read_buf + FIL_PAGE_LSN)),
342
 
                                (ulong) ut_dulint_get_high(current_lsn),
343
 
                                (ulong) ut_dulint_get_low(current_lsn));
 
366
                                mach_read_ull(read_buf + FIL_PAGE_LSN),
 
367
                                current_lsn);
344
368
                }
345
369
        }
346
370
#endif
350
374
        BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
351
375
        disabled. Otherwise, skip checksum calculation and return FALSE */
352
376
 
353
 
        if (srv_use_checksums) {
354
 
                old_checksum = buf_calc_page_old_checksum(read_buf);
 
377
        if (UNIV_LIKELY(srv_use_checksums)) {
 
378
                checksum_field = mach_read_from_4(read_buf
 
379
                                                  + FIL_PAGE_SPACE_OR_CHKSUM);
 
380
 
 
381
                if (UNIV_UNLIKELY(zip_size)) {
 
382
                        return(checksum_field != BUF_NO_CHECKSUM_MAGIC
 
383
                               && checksum_field
 
384
                               != page_zip_calc_checksum(read_buf, zip_size));
 
385
                }
355
386
 
356
387
                old_checksum_field = mach_read_from_4(
357
388
                        read_buf + UNIV_PAGE_SIZE
367
398
 
368
399
                if (old_checksum_field != mach_read_from_4(read_buf
369
400
                                                           + FIL_PAGE_LSN)
370
 
                    && old_checksum_field != old_checksum
371
 
                    && old_checksum_field != BUF_NO_CHECKSUM_MAGIC) {
 
401
                    && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
 
402
                    && old_checksum_field
 
403
                    != buf_calc_page_old_checksum(read_buf)) {
372
404
 
373
405
                        return(TRUE);
374
406
                }
375
407
 
376
 
                checksum = buf_calc_page_new_checksum(read_buf);
377
 
                checksum_field = mach_read_from_4(read_buf
378
 
                                                  + FIL_PAGE_SPACE_OR_CHKSUM);
379
 
 
380
408
                /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
381
409
                (always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */
382
410
 
383
 
                if (checksum_field != 0 && checksum_field != checksum
384
 
                    && checksum_field != BUF_NO_CHECKSUM_MAGIC) {
 
411
                if (checksum_field != 0
 
412
                    && checksum_field != BUF_NO_CHECKSUM_MAGIC
 
413
                    && checksum_field
 
414
                    != buf_calc_page_new_checksum(read_buf)) {
385
415
 
386
416
                        return(TRUE);
387
417
                }
392
422
 
393
423
/************************************************************************
394
424
Prints a page to stderr. */
395
 
 
 
425
UNIV_INTERN
396
426
void
397
427
buf_page_print(
398
428
/*===========*/
399
 
        byte*   read_buf)       /* in: a database page */
 
429
        const byte*     read_buf,       /* in: a database page */
 
430
        ulint           zip_size)       /* in: compressed page size, or
 
431
                                0 for uncompressed pages */
400
432
{
401
433
        dict_index_t*   index;
402
434
        ulint           checksum;
403
435
        ulint           old_checksum;
 
436
        ulint           size    = zip_size;
 
437
 
 
438
        if (!size) {
 
439
                size = UNIV_PAGE_SIZE;
 
440
        }
404
441
 
405
442
        ut_print_timestamp(stderr);
406
443
        fprintf(stderr, "  InnoDB: Page dump in ascii and hex (%lu bytes):\n",
407
 
                (ulint)UNIV_PAGE_SIZE);
408
 
        ut_print_buf(stderr, read_buf, UNIV_PAGE_SIZE);
 
444
                (ulong) size);
 
445
        ut_print_buf(stderr, read_buf, size);
409
446
        fputs("InnoDB: End of page dump\n", stderr);
410
447
 
 
448
        if (zip_size) {
 
449
                /* Print compressed page. */
 
450
 
 
451
                switch (fil_page_get_type(read_buf)) {
 
452
                case FIL_PAGE_TYPE_ZBLOB:
 
453
                case FIL_PAGE_TYPE_ZBLOB2:
 
454
                        checksum = srv_use_checksums
 
455
                                ? page_zip_calc_checksum(read_buf, zip_size)
 
456
                                : BUF_NO_CHECKSUM_MAGIC;
 
457
                        ut_print_timestamp(stderr);
 
458
                        fprintf(stderr,
 
459
                                "  InnoDB: Compressed BLOB page"
 
460
                                " checksum %lu, stored %lu\n"
 
461
                                "InnoDB: Page lsn %lu %lu\n"
 
462
                                "InnoDB: Page number (if stored"
 
463
                                " to page already) %lu,\n"
 
464
                                "InnoDB: space id (if stored"
 
465
                                " to page already) %lu\n",
 
466
                                (ulong) checksum,
 
467
                                (ulong) mach_read_from_4(
 
468
                                        read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
 
469
                                (ulong) mach_read_from_4(
 
470
                                        read_buf + FIL_PAGE_LSN),
 
471
                                (ulong) mach_read_from_4(
 
472
                                        read_buf + (FIL_PAGE_LSN + 4)),
 
473
                                (ulong) mach_read_from_4(
 
474
                                        read_buf + FIL_PAGE_OFFSET),
 
475
                                (ulong) mach_read_from_4(
 
476
                                        read_buf
 
477
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
 
478
                        return;
 
479
                default:
 
480
                        ut_print_timestamp(stderr);
 
481
                        fprintf(stderr,
 
482
                                "  InnoDB: unknown page type %lu,"
 
483
                                " assuming FIL_PAGE_INDEX\n",
 
484
                                fil_page_get_type(read_buf));
 
485
                        /* fall through */
 
486
                case FIL_PAGE_INDEX:
 
487
                        checksum = srv_use_checksums
 
488
                                ? page_zip_calc_checksum(read_buf, zip_size)
 
489
                                : BUF_NO_CHECKSUM_MAGIC;
 
490
 
 
491
                        ut_print_timestamp(stderr);
 
492
                        fprintf(stderr,
 
493
                                "  InnoDB: Compressed page checksum %lu,"
 
494
                                " stored %lu\n"
 
495
                                "InnoDB: Page lsn %lu %lu\n"
 
496
                                "InnoDB: Page number (if stored"
 
497
                                " to page already) %lu,\n"
 
498
                                "InnoDB: space id (if stored"
 
499
                                " to page already) %lu\n",
 
500
                                (ulong) checksum,
 
501
                                (ulong) mach_read_from_4(
 
502
                                        read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
 
503
                                (ulong) mach_read_from_4(
 
504
                                        read_buf + FIL_PAGE_LSN),
 
505
                                (ulong) mach_read_from_4(
 
506
                                        read_buf + (FIL_PAGE_LSN + 4)),
 
507
                                (ulong) mach_read_from_4(
 
508
                                        read_buf + FIL_PAGE_OFFSET),
 
509
                                (ulong) mach_read_from_4(
 
510
                                        read_buf
 
511
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
 
512
                        return;
 
513
                case FIL_PAGE_TYPE_XDES:
 
514
                        /* This is an uncompressed page. */
 
515
                        break;
 
516
                }
 
517
        }
 
518
 
411
519
        checksum = srv_use_checksums
412
520
                ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
413
521
        old_checksum = srv_use_checksums
452
560
                fprintf(stderr,
453
561
                        "InnoDB: Page may be an index page where"
454
562
                        " index id is %lu %lu\n",
455
 
                        (ulong) ut_dulint_get_high
456
 
                        (btr_page_get_index_id(read_buf)),
457
 
                        (ulong) ut_dulint_get_low
458
 
                        (btr_page_get_index_id(read_buf)));
 
563
                        (ulong) ut_dulint_get_high(
 
564
                                btr_page_get_index_id(read_buf)),
 
565
                        (ulong) ut_dulint_get_low(
 
566
                                btr_page_get_index_id(read_buf)));
459
567
 
 
568
#ifdef UNIV_HOTBACKUP
460
569
                /* If the code is in ibbackup, dict_sys may be uninitialized,
461
570
                i.e., NULL */
462
571
 
463
 
                if (dict_sys != NULL) {
 
572
                if (dict_sys == NULL) {
 
573
                        break;
 
574
                }
 
575
#endif /* UNIV_HOTBACKUP */
464
576
 
465
 
                        index = dict_index_find_on_id_low(
466
 
                                btr_page_get_index_id(read_buf));
467
 
                        if (index) {
468
 
                                fputs("InnoDB: (", stderr);
469
 
                                dict_index_name_print(stderr, NULL, index);
470
 
                                fputs(")\n", stderr);
471
 
                        }
 
577
                index = dict_index_find_on_id_low(
 
578
                        btr_page_get_index_id(read_buf));
 
579
                if (index) {
 
580
                        fputs("InnoDB: (", stderr);
 
581
                        dict_index_name_print(stderr, NULL, index);
 
582
                        fputs(")\n", stderr);
472
583
                }
473
584
                break;
474
585
        case FIL_PAGE_INODE:
506
617
                fputs("InnoDB: Page may be a BLOB page\n",
507
618
                      stderr);
508
619
                break;
 
620
        case FIL_PAGE_TYPE_ZBLOB:
 
621
        case FIL_PAGE_TYPE_ZBLOB2:
 
622
                fputs("InnoDB: Page may be a compressed BLOB page\n",
 
623
                      stderr);
 
624
                break;
509
625
        }
510
626
}
511
627
 
516
632
buf_block_init(
517
633
/*===========*/
518
634
        buf_block_t*    block,  /* in: pointer to control block */
519
 
        byte*           frame)  /* in: pointer to buffer frame, or NULL if in
520
 
                                the case of AWE there is no frame */
 
635
        byte*           frame)  /* in: pointer to buffer frame */
521
636
{
522
 
        block->magic_n = 0;
523
 
 
524
 
        block->state = BUF_BLOCK_NOT_USED;
 
637
        UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
525
638
 
526
639
        block->frame = frame;
527
640
 
528
 
        block->awe_info = NULL;
529
 
 
530
 
        block->buf_fix_count = 0;
531
 
        block->io_fix = 0;
532
 
 
533
 
        block->modify_clock = ut_dulint_zero;
534
 
 
535
 
        block->file_page_was_freed = FALSE;
 
641
        block->page.state = BUF_BLOCK_NOT_USED;
 
642
        block->page.buf_fix_count = 0;
 
643
        block->page.io_fix = BUF_IO_NONE;
 
644
 
 
645
        block->modify_clock = 0;
 
646
 
 
647
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
648
        block->page.file_page_was_freed = FALSE;
 
649
#endif /* UNIV_DEBUG_FILE_ACCESSES */
536
650
 
537
651
        block->check_index_page_at_flush = FALSE;
538
652
        block->index = NULL;
539
653
 
540
 
        block->in_free_list = FALSE;
541
 
        block->in_LRU_list = FALSE;
542
 
 
 
654
#ifdef UNIV_DEBUG
 
655
        block->page.in_page_hash = FALSE;
 
656
        block->page.in_zip_hash = FALSE;
 
657
        block->page.in_flush_list = FALSE;
 
658
        block->page.in_free_list = FALSE;
 
659
        block->page.in_LRU_list = FALSE;
 
660
        block->in_unzip_LRU_list = FALSE;
543
661
        block->n_pointers = 0;
 
662
#endif /* UNIV_DEBUG */
 
663
        page_zip_des_init(&block->page.zip);
544
664
 
545
665
        mutex_create(&block->mutex, SYNC_BUF_BLOCK);
546
666
 
553
673
}
554
674
 
555
675
/************************************************************************
 
676
Allocates a chunk of buffer frames. */
 
677
static
 
678
buf_chunk_t*
 
679
buf_chunk_init(
 
680
/*===========*/
 
681
                                        /* out: chunk, or NULL on failure */
 
682
        buf_chunk_t*    chunk,          /* out: chunk of buffers */
 
683
        ulint           mem_size)       /* in: requested size in bytes */
 
684
{
 
685
        buf_block_t*    block;
 
686
        byte*           frame;
 
687
        ulint           i;
 
688
 
 
689
        /* Round down to a multiple of page size,
 
690
        although it already should be. */
 
691
        mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
 
692
        /* Reserve space for the block descriptors. */
 
693
        mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
 
694
                                  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
 
695
 
 
696
        chunk->mem_size = mem_size;
 
697
        chunk->mem = os_mem_alloc_large(&chunk->mem_size);
 
698
 
 
699
        if (UNIV_UNLIKELY(chunk->mem == NULL)) {
 
700
 
 
701
                return(NULL);
 
702
        }
 
703
 
 
704
        /* Allocate the block descriptors from
 
705
        the start of the memory block. */
 
706
        chunk->blocks = chunk->mem;
 
707
 
 
708
        /* Align a pointer to the first frame.  Note that when
 
709
        os_large_page_size is smaller than UNIV_PAGE_SIZE,
 
710
        we may allocate one fewer block than requested.  When
 
711
        it is bigger, we may allocate more blocks than requested. */
 
712
 
 
713
        frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
 
714
        chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
 
715
                - (frame != chunk->mem);
 
716
 
 
717
        /* Subtract the space needed for block descriptors. */
 
718
        {
 
719
                ulint   size = chunk->size;
 
720
 
 
721
                while (frame < (byte*) (chunk->blocks + size)) {
 
722
                        frame += UNIV_PAGE_SIZE;
 
723
                        size--;
 
724
                }
 
725
 
 
726
                chunk->size = size;
 
727
        }
 
728
 
 
729
        /* Init block structs and assign frames for them. Then we
 
730
        assign the frames to the first blocks (we already mapped the
 
731
        memory above). */
 
732
 
 
733
        block = chunk->blocks;
 
734
 
 
735
        for (i = chunk->size; i--; ) {
 
736
 
 
737
                buf_block_init(block, frame);
 
738
 
 
739
#ifdef HAVE_purify
 
740
                /* Wipe contents of frame to eliminate a Purify warning */
 
741
                memset(block->frame, '\0', UNIV_PAGE_SIZE);
 
742
#endif
 
743
                /* Add the block to the free list */
 
744
                UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
 
745
                ut_d(block->page.in_free_list = TRUE);
 
746
 
 
747
                block++;
 
748
                frame += UNIV_PAGE_SIZE;
 
749
        }
 
750
 
 
751
        return(chunk);
 
752
}
 
753
 
 
754
#ifdef UNIV_DEBUG
 
755
/*************************************************************************
 
756
Finds a block in the given buffer chunk that points to a
 
757
given compressed page. */
 
758
static
 
759
buf_block_t*
 
760
buf_chunk_contains_zip(
 
761
/*===================*/
 
762
                                /* out: buffer block pointing to
 
763
                                the compressed page, or NULL */
 
764
        buf_chunk_t*    chunk,  /* in: chunk being checked */
 
765
        const void*     data)   /* in: pointer to compressed page */
 
766
{
 
767
        buf_block_t*    block;
 
768
        ulint           i;
 
769
 
 
770
        ut_ad(buf_pool);
 
771
        ut_ad(buf_pool_mutex_own());
 
772
 
 
773
        block = chunk->blocks;
 
774
 
 
775
        for (i = chunk->size; i--; block++) {
 
776
                if (block->page.zip.data == data) {
 
777
 
 
778
                        return(block);
 
779
                }
 
780
        }
 
781
 
 
782
        return(NULL);
 
783
}
 
784
 
 
785
/*************************************************************************
 
786
Finds a block in the buffer pool that points to a
 
787
given compressed page. */
 
788
UNIV_INTERN
 
789
buf_block_t*
 
790
buf_pool_contains_zip(
 
791
/*==================*/
 
792
                                /* out: buffer block pointing to
 
793
                                the compressed page, or NULL */
 
794
        const void*     data)   /* in: pointer to compressed page */
 
795
{
 
796
        ulint           n;
 
797
        buf_chunk_t*    chunk = buf_pool->chunks;
 
798
 
 
799
        for (n = buf_pool->n_chunks; n--; chunk++) {
 
800
                buf_block_t* block = buf_chunk_contains_zip(chunk, data);
 
801
 
 
802
                if (block) {
 
803
                        return(block);
 
804
                }
 
805
        }
 
806
 
 
807
        return(NULL);
 
808
}
 
809
#endif /* UNIV_DEBUG */
 
810
 
 
811
/*************************************************************************
 
812
Checks that all file pages in the buffer chunk are in a replaceable state. */
 
813
static
 
814
const buf_block_t*
 
815
buf_chunk_not_freed(
 
816
/*================*/
 
817
                                /* out: address of a non-free block,
 
818
                                or NULL if all freed */
 
819
        buf_chunk_t*    chunk)  /* in: chunk being checked */
 
820
{
 
821
        buf_block_t*    block;
 
822
        ulint           i;
 
823
 
 
824
        ut_ad(buf_pool);
 
825
        ut_ad(buf_pool_mutex_own());
 
826
 
 
827
        block = chunk->blocks;
 
828
 
 
829
        for (i = chunk->size; i--; block++) {
 
830
                mutex_enter(&block->mutex);
 
831
 
 
832
                if (buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
 
833
                    && !buf_flush_ready_for_replace(&block->page)) {
 
834
 
 
835
                        mutex_exit(&block->mutex);
 
836
                        return(block);
 
837
                }
 
838
 
 
839
                mutex_exit(&block->mutex);
 
840
        }
 
841
 
 
842
        return(NULL);
 
843
}
 
844
 
 
845
/*************************************************************************
 
846
Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state. */
 
847
static
 
848
ibool
 
849
buf_chunk_all_free(
 
850
/*===============*/
 
851
                                        /* out: TRUE if all freed */
 
852
        const buf_chunk_t*      chunk)  /* in: chunk being checked */
 
853
{
 
854
        const buf_block_t*      block;
 
855
        ulint                   i;
 
856
 
 
857
        ut_ad(buf_pool);
 
858
        ut_ad(buf_pool_mutex_own());
 
859
 
 
860
        block = chunk->blocks;
 
861
 
 
862
        for (i = chunk->size; i--; block++) {
 
863
 
 
864
                if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
 
865
 
 
866
                        return(FALSE);
 
867
                }
 
868
        }
 
869
 
 
870
        return(TRUE);
 
871
}
 
872
 
 
873
/************************************************************************
 
874
Frees a chunk of buffer frames. */
 
875
static
 
876
void
 
877
buf_chunk_free(
 
878
/*===========*/
 
879
        buf_chunk_t*    chunk)          /* out: chunk of buffers */
 
880
{
 
881
        buf_block_t*            block;
 
882
        const buf_block_t*      block_end;
 
883
 
 
884
        ut_ad(buf_pool_mutex_own());
 
885
 
 
886
        block_end = chunk->blocks + chunk->size;
 
887
 
 
888
        for (block = chunk->blocks; block < block_end; block++) {
 
889
                ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
 
890
                ut_a(!block->page.zip.data);
 
891
 
 
892
                ut_ad(!block->page.in_LRU_list);
 
893
                ut_ad(!block->in_unzip_LRU_list);
 
894
                ut_ad(!block->page.in_flush_list);
 
895
                /* Remove the block from the free list. */
 
896
                ut_ad(block->page.in_free_list);
 
897
                UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
 
898
 
 
899
                /* Free the latches. */
 
900
                mutex_free(&block->mutex);
 
901
                rw_lock_free(&block->lock);
 
902
#ifdef UNIV_SYNC_DEBUG
 
903
                rw_lock_free(&block->debug_latch);
 
904
#endif /* UNIV_SYNC_DEBUG */
 
905
                UNIV_MEM_UNDESC(block);
 
906
        }
 
907
 
 
908
        os_mem_free_large(chunk->mem, chunk->mem_size);
 
909
}
 
910
 
 
911
/************************************************************************
556
912
Creates the buffer pool. */
557
 
 
 
913
UNIV_INTERN
558
914
buf_pool_t*
559
 
buf_pool_init(
560
 
/*==========*/
 
915
buf_pool_init(void)
 
916
/*===============*/
561
917
                                /* out, own: buf_pool object, NULL if not
562
918
                                enough memory or error */
563
 
        ulint   max_size,       /* in: maximum size of the buf_pool in
564
 
                                blocks */
565
 
        ulint   curr_size,      /* in: current size to use, must be <=
566
 
                                max_size, currently must be equal to
567
 
                                max_size */
568
 
        ulint   n_frames)       /* in: number of frames; if AWE is used,
569
 
                                this is the size of the address space window
570
 
                                where physical memory pages are mapped; if
571
 
                                AWE is not used then this must be the same
572
 
                                as max_size */
573
919
{
574
 
        byte*           frame;
 
920
        buf_chunk_t*    chunk;
575
921
        ulint           i;
576
 
        buf_block_t*    block;
577
 
 
578
 
        ut_a(max_size == curr_size);
579
 
        ut_a(srv_use_awe || n_frames == max_size);
580
 
 
581
 
        if (n_frames > curr_size) {
582
 
                fprintf(stderr,
583
 
                        "InnoDB: AWE: Error: you must specify in my.cnf"
584
 
                        " .._awe_mem_mb larger\n"
585
 
                        "InnoDB: than .._buffer_pool_size. Now the former"
586
 
                        " is %lu pages,\n"
587
 
                        "InnoDB: the latter %lu pages.\n",
588
 
                        (ulong) curr_size, (ulong) n_frames);
589
 
 
590
 
                return(NULL);
591
 
        }
592
 
 
593
 
        buf_pool = mem_alloc(sizeof(buf_pool_t));
 
922
 
 
923
        buf_pool = mem_zalloc(sizeof(buf_pool_t));
594
924
 
595
925
        /* 1. Initialize general fields
596
 
        ---------------------------- */
597
 
        mutex_create(&buf_pool->mutex, SYNC_BUF_POOL);
598
 
 
599
 
        mutex_enter(&(buf_pool->mutex));
600
 
 
601
 
        if (srv_use_awe) {
602
 
                /*----------------------------------------*/
603
 
                /* Allocate the virtual address space window, i.e., the
604
 
                buffer pool frames */
605
 
 
606
 
                buf_pool->frame_mem = os_awe_allocate_virtual_mem_window(
607
 
                        UNIV_PAGE_SIZE * (n_frames + 1));
608
 
 
609
 
                /* Allocate the physical memory for AWE and the AWE info array
610
 
                for buf_pool */
611
 
 
612
 
                if ((curr_size % ((1024 * 1024) / UNIV_PAGE_SIZE)) != 0) {
613
 
 
614
 
                        fprintf(stderr,
615
 
                                "InnoDB: AWE: Error: physical memory must be"
616
 
                                " allocated in full megabytes.\n"
617
 
                                "InnoDB: Trying to allocate %lu"
618
 
                                " database pages.\n",
619
 
                                (ulong) curr_size);
620
 
 
621
 
                        return(NULL);
622
 
                }
623
 
 
624
 
                if (!os_awe_allocate_physical_mem(&(buf_pool->awe_info),
625
 
                                                  curr_size
626
 
                                                  / ((1024 * 1024)
627
 
                                                     / UNIV_PAGE_SIZE))) {
628
 
 
629
 
                        return(NULL);
630
 
                }
631
 
                /*----------------------------------------*/
632
 
        } else {
633
 
                buf_pool->frame_mem = os_mem_alloc_large(
634
 
                        UNIV_PAGE_SIZE * (n_frames + 1), TRUE, FALSE);
635
 
        }
636
 
 
637
 
        if (buf_pool->frame_mem == NULL) {
638
 
 
639
 
                return(NULL);
640
 
        }
641
 
 
642
 
        buf_pool->blocks = ut_malloc(sizeof(buf_block_t) * max_size);
643
 
 
644
 
        if (buf_pool->blocks == NULL) {
645
 
 
646
 
                return(NULL);
647
 
        }
648
 
 
649
 
        buf_pool->max_size = max_size;
650
 
        buf_pool->curr_size = curr_size;
651
 
 
652
 
        buf_pool->n_frames = n_frames;
653
 
 
654
 
        /* Align pointer to the first frame */
655
 
 
656
 
        frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE);
657
 
 
658
 
        buf_pool->frame_zero = frame;
659
 
        buf_pool->high_end = frame + UNIV_PAGE_SIZE * n_frames;
660
 
 
661
 
        if (srv_use_awe) {
662
 
                /*----------------------------------------*/
663
 
                /* Map an initial part of the allocated physical memory to
664
 
                the window */
665
 
 
666
 
                os_awe_map_physical_mem_to_window(buf_pool->frame_zero,
667
 
                                                  n_frames
668
 
                                                  * (UNIV_PAGE_SIZE
669
 
                                                     / OS_AWE_X86_PAGE_SIZE),
670
 
                                                  buf_pool->awe_info);
671
 
                /*----------------------------------------*/
672
 
        }
673
 
 
674
 
        buf_pool->blocks_of_frames = ut_malloc(sizeof(void*) * n_frames);
675
 
 
676
 
        if (buf_pool->blocks_of_frames == NULL) {
677
 
 
678
 
                return(NULL);
679
 
        }
680
 
 
681
 
        /* Init block structs and assign frames for them; in the case of
682
 
        AWE there are less frames than blocks. Then we assign the frames
683
 
        to the first blocks (we already mapped the memory above). We also
684
 
        init the awe_info for every block. */
685
 
 
686
 
        for (i = 0; i < max_size; i++) {
687
 
 
688
 
                block = buf_pool_get_nth_block(buf_pool, i);
689
 
 
690
 
                if (i < n_frames) {
691
 
                        frame = buf_pool->frame_zero + i * UNIV_PAGE_SIZE;
692
 
                        *(buf_pool->blocks_of_frames + i) = block;
693
 
                } else {
694
 
                        frame = NULL;
695
 
                }
696
 
 
697
 
                buf_block_init(block, frame);
698
 
 
699
 
                if (srv_use_awe) {
700
 
                        /*----------------------------------------*/
701
 
                        block->awe_info = buf_pool->awe_info
702
 
                                + i * (UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE);
703
 
                        /*----------------------------------------*/
704
 
                }
705
 
        }
706
 
 
707
 
        buf_pool->page_hash = hash_create(2 * max_size);
708
 
 
709
 
        buf_pool->n_pend_reads = 0;
 
926
        ------------------------------- */
 
927
        mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
 
928
        mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
 
929
 
 
930
        buf_pool_mutex_enter();
 
931
 
 
932
        buf_pool->n_chunks = 1;
 
933
        buf_pool->chunks = chunk = mem_alloc(sizeof *chunk);
 
934
 
 
935
        UT_LIST_INIT(buf_pool->free);
 
936
 
 
937
        if (!buf_chunk_init(chunk, srv_buf_pool_size)) {
 
938
                mem_free(chunk);
 
939
                mem_free(buf_pool);
 
940
                buf_pool = NULL;
 
941
                return(NULL);
 
942
        }
 
943
 
 
944
        srv_buf_pool_old_size = srv_buf_pool_size;
 
945
        buf_pool->curr_size = chunk->size;
 
946
        srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
 
947
 
 
948
        buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
 
949
        buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
710
950
 
711
951
        buf_pool->last_printout_time = time(NULL);
712
952
 
713
 
        buf_pool->n_pages_read = 0;
714
 
        buf_pool->n_pages_written = 0;
715
 
        buf_pool->n_pages_created = 0;
716
 
        buf_pool->n_pages_awe_remapped = 0;
717
 
 
718
 
        buf_pool->n_page_gets = 0;
719
 
        buf_pool->n_page_gets_old = 0;
720
 
        buf_pool->n_pages_read_old = 0;
721
 
        buf_pool->n_pages_written_old = 0;
722
 
        buf_pool->n_pages_created_old = 0;
723
 
        buf_pool->n_pages_awe_remapped_old = 0;
724
 
 
725
953
        /* 2. Initialize flushing fields
726
 
        ---------------------------- */
727
 
        UT_LIST_INIT(buf_pool->flush_list);
 
954
        -------------------------------- */
728
955
 
729
 
        for (i = BUF_FLUSH_LRU; i <= BUF_FLUSH_LIST; i++) {
730
 
                buf_pool->n_flush[i] = 0;
731
 
                buf_pool->init_flush[i] = FALSE;
 
956
        for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
732
957
                buf_pool->no_flush[i] = os_event_create(NULL);
733
958
        }
734
959
 
735
 
        buf_pool->LRU_flush_ended = 0;
736
 
 
737
960
        buf_pool->ulint_clock = 1;
738
 
        buf_pool->freed_page_clock = 0;
739
961
 
740
962
        /* 3. Initialize LRU fields
741
 
        ---------------------------- */
742
 
        UT_LIST_INIT(buf_pool->LRU);
743
 
 
744
 
        buf_pool->LRU_old = NULL;
745
 
 
746
 
        UT_LIST_INIT(buf_pool->awe_LRU_free_mapped);
747
 
 
748
 
        /* Add control blocks to the free list */
749
 
        UT_LIST_INIT(buf_pool->free);
750
 
 
751
 
        for (i = 0; i < curr_size; i++) {
752
 
 
753
 
                block = buf_pool_get_nth_block(buf_pool, i);
754
 
 
755
 
                if (block->frame) {
756
 
                        /* Wipe contents of frame to eliminate a Purify
757
 
                        warning */
758
 
 
759
 
#ifdef HAVE_purify
760
 
                        memset(block->frame, '\0', UNIV_PAGE_SIZE);
761
 
#endif
762
 
                        if (srv_use_awe) {
763
 
                                /* Add to the list of blocks mapped to
764
 
                                frames */
765
 
 
766
 
                                UT_LIST_ADD_LAST(awe_LRU_free_mapped,
767
 
                                                 buf_pool->awe_LRU_free_mapped,
768
 
                                                 block);
769
 
                        }
770
 
                }
771
 
 
772
 
                UT_LIST_ADD_LAST(free, buf_pool->free, block);
773
 
                block->in_free_list = TRUE;
774
 
        }
775
 
 
776
 
        mutex_exit(&(buf_pool->mutex));
777
 
 
778
 
        if (srv_use_adaptive_hash_indexes) {
779
 
                btr_search_sys_create(curr_size * UNIV_PAGE_SIZE
780
 
                                      / sizeof(void*) / 64);
781
 
        } else {
782
 
                /* Create only a small dummy system */
783
 
                btr_search_sys_create(1000);
784
 
        }
 
963
        --------------------------- */
 
964
        /* All fields are initialized by mem_zalloc(). */
 
965
 
 
966
        buf_pool_mutex_exit();
 
967
 
 
968
        btr_search_sys_create(buf_pool->curr_size
 
969
                              * UNIV_PAGE_SIZE / sizeof(void*) / 64);
 
970
 
 
971
        /* 4. Initialize the buddy allocator fields */
 
972
        /* All fields are initialized by mem_zalloc(). */
785
973
 
786
974
        return(buf_pool);
787
975
}
788
976
 
789
977
/************************************************************************
790
 
Maps the page of block to a frame, if not mapped yet. Unmaps some page
791
 
from the end of the awe_LRU_free_mapped. */
792
 
 
793
 
void
794
 
buf_awe_map_page_to_frame(
795
 
/*======================*/
796
 
        buf_block_t*    block,          /* in: block whose page should be
797
 
                                        mapped to a frame */
798
 
        ibool           add_to_mapped_list) /* in: TRUE if we in the case
799
 
                                        we need to map the page should also
800
 
                                        add the block to the
801
 
                                        awe_LRU_free_mapped list */
802
 
{
803
 
        buf_block_t*    bck;
804
 
 
805
 
        ut_ad(mutex_own(&(buf_pool->mutex)));
806
 
        ut_ad(block);
807
 
 
808
 
        if (block->frame) {
809
 
 
 
978
Frees the buffer pool at shutdown.  This must not be invoked before
 
979
freeing all mutexes. */
 
980
UNIV_INTERN
 
981
void
 
982
buf_pool_free(void)
 
983
/*===============*/
 
984
{
 
985
        buf_chunk_t*    chunk;
 
986
        buf_chunk_t*    chunks;
 
987
 
 
988
        chunks = buf_pool->chunks;
 
989
        chunk = chunks + buf_pool->n_chunks;
 
990
 
 
991
        while (--chunk >= chunks) {
 
992
                /* Bypass the checks of buf_chunk_free(), since they
 
993
                would fail at shutdown. */
 
994
                os_mem_free_large(chunk->mem, chunk->mem_size);
 
995
        }
 
996
 
 
997
        buf_pool->n_chunks = 0;
 
998
}
 
999
 
 
1000
/************************************************************************
 
1001
Relocate a buffer control block.  Relocates the block on the LRU list
 
1002
and in buf_pool->page_hash.  Does not relocate bpage->list.
 
1003
The caller must take care of relocating bpage->list. */
 
1004
UNIV_INTERN
 
1005
void
 
1006
buf_relocate(
 
1007
/*=========*/
 
1008
        buf_page_t*     bpage,  /* in/out: control block being relocated;
 
1009
                                buf_page_get_state(bpage) must be
 
1010
                                BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
 
1011
        buf_page_t*     dpage)  /* in/out: destination control block */
 
1012
{
 
1013
        buf_page_t*     b;
 
1014
        ulint           fold;
 
1015
 
 
1016
        ut_ad(buf_pool_mutex_own());
 
1017
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 
1018
        ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
 
1019
        ut_a(bpage->buf_fix_count == 0);
 
1020
        ut_ad(bpage->in_LRU_list);
 
1021
        ut_ad(!bpage->in_zip_hash);
 
1022
        ut_ad(bpage->in_page_hash);
 
1023
        ut_ad(bpage == buf_page_hash_get(bpage->space, bpage->offset));
 
1024
#ifdef UNIV_DEBUG
 
1025
        switch (buf_page_get_state(bpage)) {
 
1026
        case BUF_BLOCK_ZIP_FREE:
 
1027
        case BUF_BLOCK_NOT_USED:
 
1028
        case BUF_BLOCK_READY_FOR_USE:
 
1029
        case BUF_BLOCK_FILE_PAGE:
 
1030
        case BUF_BLOCK_MEMORY:
 
1031
        case BUF_BLOCK_REMOVE_HASH:
 
1032
                ut_error;
 
1033
        case BUF_BLOCK_ZIP_DIRTY:
 
1034
        case BUF_BLOCK_ZIP_PAGE:
 
1035
                break;
 
1036
        }
 
1037
#endif /* UNIV_DEBUG */
 
1038
 
 
1039
        memcpy(dpage, bpage, sizeof *dpage);
 
1040
 
 
1041
        ut_d(bpage->in_LRU_list = FALSE);
 
1042
        ut_d(bpage->in_page_hash = FALSE);
 
1043
 
 
1044
        /* relocate buf_pool->LRU */
 
1045
        b = UT_LIST_GET_PREV(LRU, bpage);
 
1046
        UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
 
1047
 
 
1048
        if (b) {
 
1049
                UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
 
1050
        } else {
 
1051
                UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
 
1052
        }
 
1053
 
 
1054
        if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
 
1055
                buf_pool->LRU_old = dpage;
 
1056
        }
 
1057
 
 
1058
        ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU));
 
1059
 
 
1060
        /* relocate buf_pool->page_hash */
 
1061
        fold = buf_page_address_fold(bpage->space, bpage->offset);
 
1062
 
 
1063
        HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
 
1064
        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
 
1065
 
 
1066
        UNIV_MEM_INVALID(bpage, sizeof *bpage);
 
1067
}
 
1068
 
 
1069
/************************************************************************
 
1070
Shrinks the buffer pool. */
 
1071
static
 
1072
void
 
1073
buf_pool_shrink(
 
1074
/*============*/
 
1075
                                /* out: TRUE if shrunk */
 
1076
        ulint   chunk_size)     /* in: number of pages to remove */
 
1077
{
 
1078
        buf_chunk_t*    chunks;
 
1079
        buf_chunk_t*    chunk;
 
1080
        ulint           max_size;
 
1081
        ulint           max_free_size;
 
1082
        buf_chunk_t*    max_chunk;
 
1083
        buf_chunk_t*    max_free_chunk;
 
1084
 
 
1085
        ut_ad(!buf_pool_mutex_own());
 
1086
 
 
1087
try_again:
 
1088
        btr_search_disable(); /* Empty the adaptive hash index again */
 
1089
        buf_pool_mutex_enter();
 
1090
 
 
1091
shrink_again:
 
1092
        if (buf_pool->n_chunks <= 1) {
 
1093
 
 
1094
                /* Cannot shrink if there is only one chunk */
 
1095
                goto func_done;
 
1096
        }
 
1097
 
 
1098
        /* Search for the largest free chunk
 
1099
        not larger than the size difference */
 
1100
        chunks = buf_pool->chunks;
 
1101
        chunk = chunks + buf_pool->n_chunks;
 
1102
        max_size = max_free_size = 0;
 
1103
        max_chunk = max_free_chunk = NULL;
 
1104
 
 
1105
        while (--chunk >= chunks) {
 
1106
                if (chunk->size <= chunk_size
 
1107
                    && chunk->size > max_free_size) {
 
1108
                        if (chunk->size > max_size) {
 
1109
                                max_size = chunk->size;
 
1110
                                max_chunk = chunk;
 
1111
                        }
 
1112
 
 
1113
                        if (buf_chunk_all_free(chunk)) {
 
1114
                                max_free_size = chunk->size;
 
1115
                                max_free_chunk = chunk;
 
1116
                        }
 
1117
                }
 
1118
        }
 
1119
 
 
1120
        if (!max_free_size) {
 
1121
 
 
1122
                ulint           dirty   = 0;
 
1123
                ulint           nonfree = 0;
 
1124
                buf_block_t*    block;
 
1125
                buf_block_t*    bend;
 
1126
 
 
1127
                /* Cannot shrink: try again later
 
1128
                (do not assign srv_buf_pool_old_size) */
 
1129
                if (!max_chunk) {
 
1130
 
 
1131
                        goto func_exit;
 
1132
                }
 
1133
 
 
1134
                block = max_chunk->blocks;
 
1135
                bend = block + max_chunk->size;
 
1136
 
 
1137
                /* Move the blocks of chunk to the end of the
 
1138
                LRU list and try to flush them. */
 
1139
                for (; block < bend; block++) {
 
1140
                        switch (buf_block_get_state(block)) {
 
1141
                        case BUF_BLOCK_NOT_USED:
 
1142
                                continue;
 
1143
                        case BUF_BLOCK_FILE_PAGE:
 
1144
                                break;
 
1145
                        default:
 
1146
                                nonfree++;
 
1147
                                continue;
 
1148
                        }
 
1149
 
 
1150
                        mutex_enter(&block->mutex);
 
1151
                        /* The following calls will temporarily
 
1152
                        release block->mutex and buf_pool_mutex.
 
1153
                        Therefore, we have to always retry,
 
1154
                        even if !dirty && !nonfree. */
 
1155
 
 
1156
                        if (!buf_flush_ready_for_replace(&block->page)) {
 
1157
 
 
1158
                                buf_LRU_make_block_old(&block->page);
 
1159
                                dirty++;
 
1160
                        } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
 
1161
                                   != BUF_LRU_FREED) {
 
1162
                                nonfree++;
 
1163
                        }
 
1164
 
 
1165
                        mutex_exit(&block->mutex);
 
1166
                }
 
1167
 
 
1168
                buf_pool_mutex_exit();
 
1169
 
 
1170
                /* Request for a flush of the chunk if it helps.
 
1171
                Do not flush if there are non-free blocks, since
 
1172
                flushing will not make the chunk freeable. */
 
1173
                if (nonfree) {
 
1174
                        /* Avoid busy-waiting. */
 
1175
                        os_thread_sleep(100000);
 
1176
                } else if (dirty
 
1177
                           && buf_flush_batch(BUF_FLUSH_LRU, dirty, 0)
 
1178
                           == ULINT_UNDEFINED) {
 
1179
 
 
1180
                        buf_flush_wait_batch_end(BUF_FLUSH_LRU);
 
1181
                }
 
1182
 
 
1183
                goto try_again;
 
1184
        }
 
1185
 
 
1186
        max_size = max_free_size;
 
1187
        max_chunk = max_free_chunk;
 
1188
 
 
1189
        srv_buf_pool_old_size = srv_buf_pool_size;
 
1190
 
 
1191
        /* Rewrite buf_pool->chunks.  Copy everything but max_chunk. */
 
1192
        chunks = mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks);
 
1193
        memcpy(chunks, buf_pool->chunks,
 
1194
               (max_chunk - buf_pool->chunks) * sizeof *chunks);
 
1195
        memcpy(chunks + (max_chunk - buf_pool->chunks),
 
1196
               max_chunk + 1,
 
1197
               buf_pool->chunks + buf_pool->n_chunks
 
1198
               - (max_chunk + 1));
 
1199
        ut_a(buf_pool->curr_size > max_chunk->size);
 
1200
        buf_pool->curr_size -= max_chunk->size;
 
1201
        srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
 
1202
        chunk_size -= max_chunk->size;
 
1203
        buf_chunk_free(max_chunk);
 
1204
        mem_free(buf_pool->chunks);
 
1205
        buf_pool->chunks = chunks;
 
1206
        buf_pool->n_chunks--;
 
1207
 
 
1208
        /* Allow a slack of one megabyte. */
 
1209
        if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
 
1210
 
 
1211
                goto shrink_again;
 
1212
        }
 
1213
 
 
1214
func_done:
 
1215
        srv_buf_pool_old_size = srv_buf_pool_size;
 
1216
func_exit:
 
1217
        buf_pool_mutex_exit();
 
1218
        btr_search_enable();
 
1219
}
 
1220
 
 
1221
/************************************************************************
 
1222
Rebuild buf_pool->page_hash. */
 
1223
static
 
1224
void
 
1225
buf_pool_page_hash_rebuild(void)
 
1226
/*============================*/
 
1227
{
 
1228
        ulint           i;
 
1229
        ulint           n_chunks;
 
1230
        buf_chunk_t*    chunk;
 
1231
        hash_table_t*   page_hash;
 
1232
        hash_table_t*   zip_hash;
 
1233
        buf_page_t*     b;
 
1234
 
 
1235
        buf_pool_mutex_enter();
 
1236
 
 
1237
        /* Free, create, and populate the hash table. */
 
1238
        hash_table_free(buf_pool->page_hash);
 
1239
        buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
 
1240
        zip_hash = hash_create(2 * buf_pool->curr_size);
 
1241
 
 
1242
        HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
 
1243
                     BUF_POOL_ZIP_FOLD_BPAGE);
 
1244
 
 
1245
        hash_table_free(buf_pool->zip_hash);
 
1246
        buf_pool->zip_hash = zip_hash;
 
1247
 
 
1248
        /* Insert the uncompressed file pages to buf_pool->page_hash. */
 
1249
 
 
1250
        chunk = buf_pool->chunks;
 
1251
        n_chunks = buf_pool->n_chunks;
 
1252
 
 
1253
        for (i = 0; i < n_chunks; i++, chunk++) {
 
1254
                ulint           j;
 
1255
                buf_block_t*    block = chunk->blocks;
 
1256
 
 
1257
                for (j = 0; j < chunk->size; j++, block++) {
 
1258
                        if (buf_block_get_state(block)
 
1259
                            == BUF_BLOCK_FILE_PAGE) {
 
1260
                                ut_ad(!block->page.in_zip_hash);
 
1261
                                ut_ad(block->page.in_page_hash);
 
1262
 
 
1263
                                HASH_INSERT(buf_page_t, hash, page_hash,
 
1264
                                            buf_page_address_fold(
 
1265
                                                    block->page.space,
 
1266
                                                    block->page.offset),
 
1267
                                            &block->page);
 
1268
                        }
 
1269
                }
 
1270
        }
 
1271
 
 
1272
        /* Insert the compressed-only pages to buf_pool->page_hash.
 
1273
        All such blocks are either in buf_pool->zip_clean or
 
1274
        in buf_pool->flush_list. */
 
1275
 
 
1276
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
 
1277
             b = UT_LIST_GET_NEXT(list, b)) {
 
1278
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
 
1279
                ut_ad(!b->in_flush_list);
 
1280
                ut_ad(b->in_LRU_list);
 
1281
                ut_ad(b->in_page_hash);
 
1282
                ut_ad(!b->in_zip_hash);
 
1283
 
 
1284
                HASH_INSERT(buf_page_t, hash, page_hash,
 
1285
                            buf_page_address_fold(b->space, b->offset), b);
 
1286
        }
 
1287
 
 
1288
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
 
1289
             b = UT_LIST_GET_NEXT(list, b)) {
 
1290
                ut_ad(b->in_flush_list);
 
1291
                ut_ad(b->in_LRU_list);
 
1292
                ut_ad(b->in_page_hash);
 
1293
                ut_ad(!b->in_zip_hash);
 
1294
 
 
1295
                switch (buf_page_get_state(b)) {
 
1296
                case BUF_BLOCK_ZIP_DIRTY:
 
1297
                        HASH_INSERT(buf_page_t, hash, page_hash,
 
1298
                                    buf_page_address_fold(b->space,
 
1299
                                                          b->offset), b);
 
1300
                        break;
 
1301
                case BUF_BLOCK_FILE_PAGE:
 
1302
                        /* uncompressed page */
 
1303
                        break;
 
1304
                case BUF_BLOCK_ZIP_FREE:
 
1305
                case BUF_BLOCK_ZIP_PAGE:
 
1306
                case BUF_BLOCK_NOT_USED:
 
1307
                case BUF_BLOCK_READY_FOR_USE:
 
1308
                case BUF_BLOCK_MEMORY:
 
1309
                case BUF_BLOCK_REMOVE_HASH:
 
1310
                        ut_error;
 
1311
                        break;
 
1312
                }
 
1313
        }
 
1314
 
 
1315
        buf_pool_mutex_exit();
 
1316
}
 
1317
 
 
1318
/************************************************************************
 
1319
Resizes the buffer pool. */
 
1320
UNIV_INTERN
 
1321
void
 
1322
buf_pool_resize(void)
 
1323
/*=================*/
 
1324
{
 
1325
        buf_pool_mutex_enter();
 
1326
 
 
1327
        if (srv_buf_pool_old_size == srv_buf_pool_size) {
 
1328
 
 
1329
                buf_pool_mutex_exit();
810
1330
                return;
811
1331
        }
812
1332
 
813
 
        /* Scan awe_LRU_free_mapped from the end and try to find a block
814
 
        which is not bufferfixed or io-fixed */
815
 
 
816
 
        bck = UT_LIST_GET_LAST(buf_pool->awe_LRU_free_mapped);
817
 
 
818
 
        while (bck) {
819
 
                ibool skip;
820
 
 
821
 
                mutex_enter(&bck->mutex);
822
 
 
823
 
                skip = (bck->state == BUF_BLOCK_FILE_PAGE
824
 
                        && (bck->buf_fix_count != 0 || bck->io_fix != 0));
825
 
 
826
 
                if (skip) {
827
 
                        mutex_exit(&bck->mutex);
828
 
 
829
 
                        /* We have to skip this */
830
 
                        bck = UT_LIST_GET_PREV(awe_LRU_free_mapped, bck);
 
1333
        if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
 
1334
 
 
1335
                buf_pool_mutex_exit();
 
1336
 
 
1337
                /* Disable adaptive hash indexes and empty the index
 
1338
                in order to free up memory in the buffer pool chunks. */
 
1339
                buf_pool_shrink((srv_buf_pool_curr_size - srv_buf_pool_size)
 
1340
                                / UNIV_PAGE_SIZE);
 
1341
        } else if (srv_buf_pool_curr_size + 1048576 < srv_buf_pool_size) {
 
1342
 
 
1343
                /* Enlarge the buffer pool by at least one megabyte */
 
1344
 
 
1345
                ulint           mem_size
 
1346
                        = srv_buf_pool_size - srv_buf_pool_curr_size;
 
1347
                buf_chunk_t*    chunks;
 
1348
                buf_chunk_t*    chunk;
 
1349
 
 
1350
                chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
 
1351
 
 
1352
                memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks
 
1353
                       * sizeof *chunks);
 
1354
 
 
1355
                chunk = &chunks[buf_pool->n_chunks];
 
1356
 
 
1357
                if (!buf_chunk_init(chunk, mem_size)) {
 
1358
                        mem_free(chunks);
831
1359
                } else {
832
 
                        /* We can map block to the frame of bck */
833
 
 
834
 
                        os_awe_map_physical_mem_to_window(
835
 
                                bck->frame,
836
 
                                UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE,
837
 
                                block->awe_info);
838
 
 
839
 
                        block->frame = bck->frame;
840
 
 
841
 
                        *(buf_pool->blocks_of_frames
842
 
                          + (((ulint)(block->frame
843
 
                                      - buf_pool->frame_zero))
844
 
                             >> UNIV_PAGE_SIZE_SHIFT))
845
 
                                = block;
846
 
 
847
 
                        bck->frame = NULL;
848
 
                        UT_LIST_REMOVE(awe_LRU_free_mapped,
849
 
                                       buf_pool->awe_LRU_free_mapped,
850
 
                                       bck);
851
 
 
852
 
                        if (add_to_mapped_list) {
853
 
                                UT_LIST_ADD_FIRST(
854
 
                                        awe_LRU_free_mapped,
855
 
                                        buf_pool->awe_LRU_free_mapped,
856
 
                                        block);
857
 
                        }
858
 
 
859
 
                        buf_pool->n_pages_awe_remapped++;
860
 
 
861
 
                        mutex_exit(&bck->mutex);
862
 
 
863
 
                        return;
 
1360
                        buf_pool->curr_size += chunk->size;
 
1361
                        srv_buf_pool_curr_size = buf_pool->curr_size
 
1362
                                * UNIV_PAGE_SIZE;
 
1363
                        mem_free(buf_pool->chunks);
 
1364
                        buf_pool->chunks = chunks;
 
1365
                        buf_pool->n_chunks++;
864
1366
                }
 
1367
 
 
1368
                srv_buf_pool_old_size = srv_buf_pool_size;
 
1369
                buf_pool_mutex_exit();
865
1370
        }
866
1371
 
867
 
        fprintf(stderr,
868
 
                "InnoDB: AWE: Fatal error: cannot find a page to unmap\n"
869
 
                "InnoDB: awe_LRU_free_mapped list length %lu\n",
870
 
                (ulong) UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
871
 
 
872
 
        ut_a(0);
873
 
}
874
 
 
875
 
/************************************************************************
876
 
Allocates a buffer block. */
877
 
UNIV_INLINE
878
 
buf_block_t*
879
 
buf_block_alloc(void)
880
 
/*=================*/
881
 
                                /* out, own: the allocated block; also if AWE
882
 
                                is used it is guaranteed that the page is
883
 
                                mapped to a frame */
884
 
{
885
 
        buf_block_t*    block;
886
 
 
887
 
        block = buf_LRU_get_free_block();
888
 
 
889
 
        return(block);
 
1372
        buf_pool_page_hash_rebuild();
890
1373
}
891
1374
 
892
1375
/************************************************************************
896
1379
void
897
1380
buf_block_make_young(
898
1381
/*=================*/
899
 
        buf_block_t*    block)  /* in: block to make younger */
 
1382
        buf_page_t*     bpage)  /* in: block to make younger */
900
1383
{
901
 
        ut_ad(!mutex_own(&(buf_pool->mutex)));
 
1384
        ut_ad(!buf_pool_mutex_own());
902
1385
 
903
1386
        /* Note that we read freed_page_clock's without holding any mutex:
904
1387
        this is allowed since the result is used only in heuristics */
905
1388
 
906
 
        if (buf_block_peek_if_too_old(block)) {
 
1389
        if (buf_page_peek_if_too_old(bpage)) {
907
1390
 
908
 
                mutex_enter(&buf_pool->mutex);
 
1391
                buf_pool_mutex_enter();
909
1392
                /* There has been freeing activity in the LRU list:
910
1393
                best to move to the head of the LRU list */
911
1394
 
912
 
                buf_LRU_make_block_young(block);
913
 
                mutex_exit(&buf_pool->mutex);
 
1395
                buf_LRU_make_block_young(bpage);
 
1396
                buf_pool_mutex_exit();
914
1397
        }
915
1398
}
916
1399
 
918
1401
Moves a page to the start of the buffer pool LRU list. This high-level
919
1402
function can be used to prevent an important page from from slipping out of
920
1403
the buffer pool. */
921
 
 
 
1404
UNIV_INTERN
922
1405
void
923
1406
buf_page_make_young(
924
1407
/*================*/
925
 
        buf_frame_t*    frame)  /* in: buffer frame of a file page */
926
 
{
927
 
        buf_block_t*    block;
928
 
 
929
 
        mutex_enter(&(buf_pool->mutex));
930
 
 
931
 
        block = buf_block_align(frame);
932
 
 
933
 
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
934
 
 
935
 
        buf_LRU_make_block_young(block);
936
 
 
937
 
        mutex_exit(&(buf_pool->mutex));
938
 
}
939
 
 
940
 
/************************************************************************
941
 
Frees a buffer block which does not contain a file page. */
942
 
UNIV_INLINE
943
 
void
944
 
buf_block_free(
945
 
/*===========*/
946
 
        buf_block_t*    block)  /* in, own: block to be freed */
947
 
{
948
 
        mutex_enter(&(buf_pool->mutex));
949
 
 
950
 
        mutex_enter(&block->mutex);
951
 
 
952
 
        ut_a(block->state != BUF_BLOCK_FILE_PAGE);
953
 
 
954
 
        buf_LRU_block_free_non_file_page(block);
955
 
 
956
 
        mutex_exit(&block->mutex);
957
 
 
958
 
        mutex_exit(&(buf_pool->mutex));
959
 
}
960
 
 
961
 
/*************************************************************************
962
 
Allocates a buffer frame. */
963
 
 
964
 
buf_frame_t*
965
 
buf_frame_alloc(void)
966
 
/*=================*/
967
 
                                /* out: buffer frame */
968
 
{
969
 
        return(buf_block_alloc()->frame);
970
 
}
971
 
 
972
 
/*************************************************************************
973
 
Frees a buffer frame which does not contain a file page. */
974
 
 
975
 
void
976
 
buf_frame_free(
977
 
/*===========*/
978
 
        buf_frame_t*    frame)  /* in: buffer frame */
979
 
{
980
 
        buf_block_free(buf_block_align(frame));
981
 
}
982
 
 
983
 
/************************************************************************
984
 
Returns the buffer control block if the page can be found in the buffer
985
 
pool. NOTE that it is possible that the page is not yet read
986
 
from disk, though. This is a very low-level function: use with care! */
987
 
 
988
 
buf_block_t*
989
 
buf_page_peek_block(
990
 
/*================*/
991
 
                        /* out: control block if found from page hash table,
992
 
                        otherwise NULL; NOTE that the page is not necessarily
993
 
                        yet read from disk! */
994
 
        ulint   space,  /* in: space id */
995
 
        ulint   offset) /* in: page number */
996
 
{
997
 
        buf_block_t*    block;
998
 
 
999
 
        mutex_enter_fast(&(buf_pool->mutex));
1000
 
 
1001
 
        block = buf_page_hash_get(space, offset);
1002
 
 
1003
 
        mutex_exit(&(buf_pool->mutex));
1004
 
 
1005
 
        return(block);
 
1408
        buf_page_t*     bpage)  /* in: buffer block of a file page */
 
1409
{
 
1410
        buf_pool_mutex_enter();
 
1411
 
 
1412
        ut_a(buf_page_in_file(bpage));
 
1413
 
 
1414
        buf_LRU_make_block_young(bpage);
 
1415
 
 
1416
        buf_pool_mutex_exit();
1006
1417
}
1007
1418
 
1008
1419
/************************************************************************
1009
1420
Resets the check_index_page_at_flush field of a page if found in the buffer
1010
1421
pool. */
1011
 
 
 
1422
UNIV_INTERN
1012
1423
void
1013
1424
buf_reset_check_index_page_at_flush(
1014
1425
/*================================*/
1017
1428
{
1018
1429
        buf_block_t*    block;
1019
1430
 
1020
 
        mutex_enter_fast(&(buf_pool->mutex));
1021
 
 
1022
 
        block = buf_page_hash_get(space, offset);
1023
 
 
1024
 
        if (block) {
 
1431
        buf_pool_mutex_enter();
 
1432
 
 
1433
        block = (buf_block_t*) buf_page_hash_get(space, offset);
 
1434
 
 
1435
        if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
1025
1436
                block->check_index_page_at_flush = FALSE;
1026
1437
        }
1027
1438
 
1028
 
        mutex_exit(&(buf_pool->mutex));
 
1439
        buf_pool_mutex_exit();
1029
1440
}
1030
1441
 
1031
1442
/************************************************************************
1032
1443
Returns the current state of is_hashed of a page. FALSE if the page is
1033
1444
not in the pool. NOTE that this operation does not fix the page in the
1034
1445
pool if it is found there. */
1035
 
 
 
1446
UNIV_INTERN
1036
1447
ibool
1037
1448
buf_page_peek_if_search_hashed(
1038
1449
/*===========================*/
1044
1455
        buf_block_t*    block;
1045
1456
        ibool           is_hashed;
1046
1457
 
1047
 
        mutex_enter_fast(&(buf_pool->mutex));
1048
 
 
1049
 
        block = buf_page_hash_get(space, offset);
1050
 
 
1051
 
        if (!block) {
 
1458
        buf_pool_mutex_enter();
 
1459
 
 
1460
        block = (buf_block_t*) buf_page_hash_get(space, offset);
 
1461
 
 
1462
        if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1052
1463
                is_hashed = FALSE;
1053
1464
        } else {
1054
1465
                is_hashed = block->is_hashed;
1055
1466
        }
1056
1467
 
1057
 
        mutex_exit(&(buf_pool->mutex));
 
1468
        buf_pool_mutex_exit();
1058
1469
 
1059
1470
        return(is_hashed);
1060
1471
}
1061
1472
 
1062
 
/************************************************************************
1063
 
Returns TRUE if the page can be found in the buffer pool hash table. NOTE
1064
 
that it is possible that the page is not yet read from disk, though. */
1065
 
 
1066
 
ibool
1067
 
buf_page_peek(
1068
 
/*==========*/
1069
 
                        /* out: TRUE if found from page hash table,
1070
 
                        NOTE that the page is not necessarily yet read
1071
 
                        from disk! */
1072
 
        ulint   space,  /* in: space id */
1073
 
        ulint   offset) /* in: page number */
1074
 
{
1075
 
        if (buf_page_peek_block(space, offset)) {
1076
 
 
1077
 
                return(TRUE);
1078
 
        }
1079
 
 
1080
 
        return(FALSE);
1081
 
}
1082
 
 
 
1473
#ifdef UNIV_DEBUG_FILE_ACCESSES
1083
1474
/************************************************************************
1084
1475
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
1085
1476
This function should be called when we free a file page and want the
1086
1477
debug version to check that it is not accessed any more unless
1087
1478
reallocated. */
1088
 
 
1089
 
buf_block_t*
 
1479
UNIV_INTERN
 
1480
buf_page_t*
1090
1481
buf_page_set_file_page_was_freed(
1091
1482
/*=============================*/
1092
 
                        /* out: control block if found from page hash table,
 
1483
                        /* out: control block if found in page hash table,
1093
1484
                        otherwise NULL */
1094
1485
        ulint   space,  /* in: space id */
1095
1486
        ulint   offset) /* in: page number */
1096
1487
{
1097
 
        buf_block_t*    block;
1098
 
 
1099
 
        mutex_enter_fast(&(buf_pool->mutex));
1100
 
 
1101
 
        block = buf_page_hash_get(space, offset);
1102
 
 
1103
 
        if (block) {
1104
 
                block->file_page_was_freed = TRUE;
 
1488
        buf_page_t*     bpage;
 
1489
 
 
1490
        buf_pool_mutex_enter();
 
1491
 
 
1492
        bpage = buf_page_hash_get(space, offset);
 
1493
 
 
1494
        if (bpage) {
 
1495
                bpage->file_page_was_freed = TRUE;
1105
1496
        }
1106
1497
 
1107
 
        mutex_exit(&(buf_pool->mutex));
 
1498
        buf_pool_mutex_exit();
1108
1499
 
1109
 
        return(block);
 
1500
        return(bpage);
1110
1501
}
1111
1502
 
1112
1503
/************************************************************************
1114
1505
This function should be called when we free a file page and want the
1115
1506
debug version to check that it is not accessed any more unless
1116
1507
reallocated. */
1117
 
 
1118
 
buf_block_t*
 
1508
UNIV_INTERN
 
1509
buf_page_t*
1119
1510
buf_page_reset_file_page_was_freed(
1120
1511
/*===============================*/
1121
 
                        /* out: control block if found from page hash table,
 
1512
                        /* out: control block if found in page hash table,
1122
1513
                        otherwise NULL */
1123
1514
        ulint   space,  /* in: space id */
1124
1515
        ulint   offset) /* in: page number */
1125
1516
{
1126
 
        buf_block_t*    block;
1127
 
 
1128
 
        mutex_enter_fast(&(buf_pool->mutex));
1129
 
 
1130
 
        block = buf_page_hash_get(space, offset);
1131
 
 
1132
 
        if (block) {
1133
 
                block->file_page_was_freed = FALSE;
1134
 
        }
1135
 
 
1136
 
        mutex_exit(&(buf_pool->mutex));
1137
 
 
1138
 
        return(block);
 
1517
        buf_page_t*     bpage;
 
1518
 
 
1519
        buf_pool_mutex_enter();
 
1520
 
 
1521
        bpage = buf_page_hash_get(space, offset);
 
1522
 
 
1523
        if (bpage) {
 
1524
                bpage->file_page_was_freed = FALSE;
 
1525
        }
 
1526
 
 
1527
        buf_pool_mutex_exit();
 
1528
 
 
1529
        return(bpage);
 
1530
}
 
1531
#endif /* UNIV_DEBUG_FILE_ACCESSES */
 
1532
 
 
1533
/************************************************************************
 
1534
Get read access to a compressed page (usually of type
 
1535
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
 
1536
The page must be released with buf_page_release_zip().
 
1537
NOTE: the page is not protected by any latch.  Mutual exclusion has to
 
1538
be implemented at a higher level.  In other words, all possible
 
1539
accesses to a given page through this function must be protected by
 
1540
the same set of mutexes or latches. */
 
1541
UNIV_INTERN
 
1542
buf_page_t*
 
1543
buf_page_get_zip(
 
1544
/*=============*/
 
1545
                                /* out: pointer to the block */
 
1546
        ulint           space,  /* in: space id */
 
1547
        ulint           zip_size,/* in: compressed page size */
 
1548
        ulint           offset) /* in: page number */
 
1549
{
 
1550
        buf_page_t*     bpage;
 
1551
        mutex_t*        block_mutex;
 
1552
        ibool           must_read;
 
1553
 
 
1554
#ifndef UNIV_LOG_DEBUG
 
1555
        ut_ad(!ibuf_inside());
 
1556
#endif
 
1557
        buf_pool->n_page_gets++;
 
1558
 
 
1559
        for (;;) {
 
1560
                buf_pool_mutex_enter();
 
1561
lookup:
 
1562
                bpage = buf_page_hash_get(space, offset);
 
1563
                if (bpage) {
 
1564
                        break;
 
1565
                }
 
1566
 
 
1567
                /* Page not in buf_pool: needs to be read from file */
 
1568
 
 
1569
                buf_pool_mutex_exit();
 
1570
 
 
1571
                buf_read_page(space, zip_size, offset);
 
1572
 
 
1573
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
1574
                ut_a(++buf_dbg_counter % 37 || buf_validate());
 
1575
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
1576
        }
 
1577
 
 
1578
        if (UNIV_UNLIKELY(!bpage->zip.data)) {
 
1579
                /* There is no compressed page. */
 
1580
                buf_pool_mutex_exit();
 
1581
                return(NULL);
 
1582
        }
 
1583
 
 
1584
        block_mutex = buf_page_get_mutex(bpage);
 
1585
        mutex_enter(block_mutex);
 
1586
 
 
1587
        switch (buf_page_get_state(bpage)) {
 
1588
        case BUF_BLOCK_NOT_USED:
 
1589
        case BUF_BLOCK_READY_FOR_USE:
 
1590
        case BUF_BLOCK_MEMORY:
 
1591
        case BUF_BLOCK_REMOVE_HASH:
 
1592
        case BUF_BLOCK_ZIP_FREE:
 
1593
                ut_error;
 
1594
                break;
 
1595
        case BUF_BLOCK_ZIP_PAGE:
 
1596
        case BUF_BLOCK_ZIP_DIRTY:
 
1597
                bpage->buf_fix_count++;
 
1598
                break;
 
1599
        case BUF_BLOCK_FILE_PAGE:
 
1600
                /* Discard the uncompressed page frame if possible. */
 
1601
                if (buf_LRU_free_block(bpage, FALSE, NULL)
 
1602
                    == BUF_LRU_FREED) {
 
1603
 
 
1604
                        mutex_exit(block_mutex);
 
1605
                        goto lookup;
 
1606
                }
 
1607
 
 
1608
                buf_block_buf_fix_inc((buf_block_t*) bpage,
 
1609
                                      __FILE__, __LINE__);
 
1610
                break;
 
1611
        }
 
1612
 
 
1613
        must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
 
1614
 
 
1615
        buf_pool_mutex_exit();
 
1616
 
 
1617
        buf_page_set_accessed(bpage, TRUE);
 
1618
 
 
1619
        mutex_exit(block_mutex);
 
1620
 
 
1621
        buf_block_make_young(bpage);
 
1622
 
 
1623
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
1624
        ut_a(!bpage->file_page_was_freed);
 
1625
#endif
 
1626
 
 
1627
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
1628
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
 
1629
        ut_a(bpage->buf_fix_count > 0);
 
1630
        ut_a(buf_page_in_file(bpage));
 
1631
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
1632
 
 
1633
        if (must_read) {
 
1634
                /* Let us wait until the read operation
 
1635
                completes */
 
1636
 
 
1637
                for (;;) {
 
1638
                        enum buf_io_fix io_fix;
 
1639
 
 
1640
                        mutex_enter(block_mutex);
 
1641
                        io_fix = buf_page_get_io_fix(bpage);
 
1642
                        mutex_exit(block_mutex);
 
1643
 
 
1644
                        if (io_fix == BUF_IO_READ) {
 
1645
 
 
1646
                                os_thread_sleep(WAIT_FOR_READ);
 
1647
                        } else {
 
1648
                                break;
 
1649
                        }
 
1650
                }
 
1651
        }
 
1652
 
 
1653
#ifdef UNIV_IBUF_COUNT_DEBUG
 
1654
        ut_a(ibuf_count_get(buf_page_get_space(bpage),
 
1655
                            buf_page_get_page_no(bpage)) == 0);
 
1656
#endif
 
1657
        return(bpage);
 
1658
}
 
1659
 
 
1660
/************************************************************************
 
1661
Initialize some fields of a control block. */
 
1662
UNIV_INLINE
 
1663
void
 
1664
buf_block_init_low(
 
1665
/*===============*/
 
1666
        buf_block_t*    block)  /* in: block to init */
 
1667
{
 
1668
        block->check_index_page_at_flush = FALSE;
 
1669
        block->index            = NULL;
 
1670
 
 
1671
        block->n_hash_helps     = 0;
 
1672
        block->is_hashed        = FALSE;
 
1673
        block->n_fields         = 1;
 
1674
        block->n_bytes          = 0;
 
1675
        block->left_side        = TRUE;
 
1676
}
 
1677
 
 
1678
/************************************************************************
 
1679
Decompress a block. */
 
1680
static
 
1681
ibool
 
1682
buf_zip_decompress(
 
1683
/*===============*/
 
1684
                                /* out: TRUE if successful */
 
1685
        buf_block_t*    block,  /* in/out: block */
 
1686
        ibool           check)  /* in: TRUE=verify the page checksum */
 
1687
{
 
1688
        const byte* frame = block->page.zip.data;
 
1689
 
 
1690
        ut_ad(buf_block_get_zip_size(block));
 
1691
        ut_a(buf_block_get_space(block) != 0);
 
1692
 
 
1693
        if (UNIV_LIKELY(check)) {
 
1694
                ulint   stamp_checksum  = mach_read_from_4(
 
1695
                        frame + FIL_PAGE_SPACE_OR_CHKSUM);
 
1696
                ulint   calc_checksum   = page_zip_calc_checksum(
 
1697
                        frame, page_zip_get_size(&block->page.zip));
 
1698
 
 
1699
                if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
 
1700
                        ut_print_timestamp(stderr);
 
1701
                        fprintf(stderr,
 
1702
                                "  InnoDB: compressed page checksum mismatch"
 
1703
                                " (space %u page %u): %lu != %lu\n",
 
1704
                                block->page.space, block->page.offset,
 
1705
                                stamp_checksum, calc_checksum);
 
1706
                        return(FALSE);
 
1707
                }
 
1708
        }
 
1709
 
 
1710
        switch (fil_page_get_type(frame)) {
 
1711
        case FIL_PAGE_INDEX:
 
1712
                if (page_zip_decompress(&block->page.zip,
 
1713
                                        block->frame)) {
 
1714
                        return(TRUE);
 
1715
                }
 
1716
 
 
1717
                fprintf(stderr,
 
1718
                        "InnoDB: unable to decompress space %lu page %lu\n",
 
1719
                        (ulong) block->page.space,
 
1720
                        (ulong) block->page.offset);
 
1721
                return(FALSE);
 
1722
 
 
1723
        case FIL_PAGE_TYPE_ALLOCATED:
 
1724
        case FIL_PAGE_INODE:
 
1725
        case FIL_PAGE_IBUF_BITMAP:
 
1726
        case FIL_PAGE_TYPE_FSP_HDR:
 
1727
        case FIL_PAGE_TYPE_XDES:
 
1728
        case FIL_PAGE_TYPE_ZBLOB:
 
1729
        case FIL_PAGE_TYPE_ZBLOB2:
 
1730
                /* Copy to uncompressed storage. */
 
1731
                memcpy(block->frame, frame,
 
1732
                       buf_block_get_zip_size(block));
 
1733
                return(TRUE);
 
1734
        }
 
1735
 
 
1736
        ut_print_timestamp(stderr);
 
1737
        fprintf(stderr,
 
1738
                "  InnoDB: unknown compressed page"
 
1739
                " type %lu\n",
 
1740
                fil_page_get_type(frame));
 
1741
        return(FALSE);
 
1742
}
 
1743
 
 
1744
/************************************************************************
 
1745
Find out if a buffer block was created by buf_chunk_init(). */
 
1746
static
 
1747
ibool
 
1748
buf_block_is_uncompressed(
 
1749
/*======================*/
 
1750
                                        /* out: TRUE if "block" has
 
1751
                                        been added to buf_pool->free
 
1752
                                        by buf_chunk_init() */
 
1753
        const buf_block_t*      block)  /* in: pointer to block,
 
1754
                                        not dereferenced */
 
1755
{
 
1756
        const buf_chunk_t*              chunk   = buf_pool->chunks;
 
1757
        const buf_chunk_t* const        echunk  = chunk + buf_pool->n_chunks;
 
1758
 
 
1759
        ut_ad(buf_pool_mutex_own());
 
1760
 
 
1761
        if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
 
1762
                /* The pointer should be aligned. */
 
1763
                return(FALSE);
 
1764
        }
 
1765
 
 
1766
        while (chunk < echunk) {
 
1767
                if (block >= chunk->blocks
 
1768
                    && block < chunk->blocks + chunk->size) {
 
1769
 
 
1770
                        return(TRUE);
 
1771
                }
 
1772
 
 
1773
                chunk++;
 
1774
        }
 
1775
 
 
1776
        return(FALSE);
1139
1777
}
1140
1778
 
1141
1779
/************************************************************************
1142
1780
This is the general function used to get access to a database page. */
1143
 
 
1144
 
buf_frame_t*
 
1781
UNIV_INTERN
 
1782
buf_block_t*
1145
1783
buf_page_get_gen(
1146
1784
/*=============*/
1147
 
                                /* out: pointer to the frame or NULL */
 
1785
                                /* out: pointer to the block or NULL */
1148
1786
        ulint           space,  /* in: space id */
 
1787
        ulint           zip_size,/* in: compressed page size in bytes
 
1788
                                or 0 for uncompressed pages */
1149
1789
        ulint           offset, /* in: page number */
1150
1790
        ulint           rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
1151
 
        buf_frame_t*    guess,  /* in: guessed frame or NULL */
 
1791
        buf_block_t*    guess,  /* in: guessed block or NULL */
1152
1792
        ulint           mode,   /* in: BUF_GET, BUF_GET_IF_IN_POOL,
1153
1793
                                BUF_GET_NO_LATCH, BUF_GET_NOWAIT */
1154
1794
        const char*     file,   /* in: file name */
1158
1798
        buf_block_t*    block;
1159
1799
        ibool           accessed;
1160
1800
        ulint           fix_type;
1161
 
        ibool           success;
1162
1801
        ibool           must_read;
1163
1802
 
1164
1803
        ut_ad(mtr);
1168
1807
        ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
1169
1808
        ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
1170
1809
              || (mode == BUF_GET_NO_LATCH) || (mode == BUF_GET_NOWAIT));
 
1810
        ut_ad(zip_size == fil_space_get_zip_size(space));
1171
1811
#ifndef UNIV_LOG_DEBUG
1172
 
        ut_ad(!ibuf_inside() || ibuf_page(space, offset));
 
1812
        ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset));
1173
1813
#endif
1174
1814
        buf_pool->n_page_gets++;
1175
1815
loop:
1176
 
        block = NULL;
1177
 
        mutex_enter_fast(&(buf_pool->mutex));
1178
 
 
1179
 
        if (guess) {
1180
 
                block = buf_block_align(guess);
1181
 
 
1182
 
                if ((offset != block->offset) || (space != block->space)
1183
 
                    || (block->state != BUF_BLOCK_FILE_PAGE)) {
1184
 
 
1185
 
                        block = NULL;
 
1816
        block = guess;
 
1817
        buf_pool_mutex_enter();
 
1818
 
 
1819
        if (block) {
 
1820
                /* If the guess is a compressed page descriptor that
 
1821
                has been allocated by buf_buddy_alloc(), it may have
 
1822
                been invalidated by buf_buddy_relocate().  In that
 
1823
                case, block could point to something that happens to
 
1824
                contain the expected bits in block->page.  Similarly,
 
1825
                the guess may be pointing to a buffer pool chunk that
 
1826
                has been released when resizing the buffer pool. */
 
1827
 
 
1828
                if (!buf_block_is_uncompressed(block)
 
1829
                    || offset != block->page.offset
 
1830
                    || space != block->page.space
 
1831
                    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
 
1832
 
 
1833
                        block = guess = NULL;
 
1834
                } else {
 
1835
                        ut_ad(!block->page.in_zip_hash);
 
1836
                        ut_ad(block->page.in_page_hash);
1186
1837
                }
1187
1838
        }
1188
1839
 
1189
1840
        if (block == NULL) {
1190
 
                block = buf_page_hash_get(space, offset);
 
1841
                block = (buf_block_t*) buf_page_hash_get(space, offset);
1191
1842
        }
1192
1843
 
 
1844
loop2:
1193
1845
        if (block == NULL) {
1194
1846
                /* Page not in buf_pool: needs to be read from file */
1195
1847
 
1196
 
                mutex_exit(&(buf_pool->mutex));
 
1848
                buf_pool_mutex_exit();
1197
1849
 
1198
1850
                if (mode == BUF_GET_IF_IN_POOL) {
1199
1851
 
1200
1852
                        return(NULL);
1201
1853
                }
1202
1854
 
1203
 
                buf_read_page(space, offset);
1204
 
 
1205
 
#ifdef UNIV_DEBUG
1206
 
                buf_dbg_counter++;
1207
 
 
1208
 
                if (buf_dbg_counter % 37 == 0) {
1209
 
                        ut_ad(buf_validate());
1210
 
                }
1211
 
#endif
 
1855
                buf_read_page(space, zip_size, offset);
 
1856
 
 
1857
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
1858
                ut_a(++buf_dbg_counter % 37 || buf_validate());
 
1859
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1212
1860
                goto loop;
1213
1861
        }
1214
1862
 
1215
 
        mutex_enter(&block->mutex);
1216
 
 
1217
 
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
1218
 
 
1219
 
        must_read = FALSE;
1220
 
 
1221
 
        if (block->io_fix == BUF_IO_READ) {
1222
 
 
1223
 
                must_read = TRUE;
1224
 
 
1225
 
                if (mode == BUF_GET_IF_IN_POOL) {
1226
 
                        /* The page is only being read to buffer */
1227
 
                        mutex_exit(&buf_pool->mutex);
 
1863
        ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
 
1864
 
 
1865
        must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
 
1866
 
 
1867
        if (must_read && mode == BUF_GET_IF_IN_POOL) {
 
1868
                /* The page is only being read to buffer */
 
1869
                buf_pool_mutex_exit();
 
1870
 
 
1871
                return(NULL);
 
1872
        }
 
1873
 
 
1874
        switch (buf_block_get_state(block)) {
 
1875
                buf_page_t*     bpage;
 
1876
                ibool           success;
 
1877
 
 
1878
        case BUF_BLOCK_FILE_PAGE:
 
1879
                break;
 
1880
 
 
1881
        case BUF_BLOCK_ZIP_PAGE:
 
1882
        case BUF_BLOCK_ZIP_DIRTY:
 
1883
                bpage = &block->page;
 
1884
 
 
1885
                if (bpage->buf_fix_count
 
1886
                    || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
 
1887
                        /* This condition often occurs when the buffer
 
1888
                        is not buffer-fixed, but I/O-fixed by
 
1889
                        buf_page_init_for_read(). */
 
1890
wait_until_unfixed:
 
1891
                        /* The block is buffer-fixed or I/O-fixed.
 
1892
                        Try again later. */
 
1893
                        buf_pool_mutex_exit();
 
1894
                        os_thread_sleep(WAIT_FOR_READ);
 
1895
 
 
1896
                        goto loop;
 
1897
                }
 
1898
 
 
1899
                /* Allocate an uncompressed page. */
 
1900
                buf_pool_mutex_exit();
 
1901
 
 
1902
                block = buf_LRU_get_free_block(0);
 
1903
                ut_a(block);
 
1904
 
 
1905
                buf_pool_mutex_enter();
 
1906
                mutex_enter(&block->mutex);
 
1907
 
 
1908
                {
 
1909
                        buf_page_t*     hash_bpage
 
1910
                                = buf_page_hash_get(space, offset);
 
1911
 
 
1912
                        if (UNIV_UNLIKELY(bpage != hash_bpage)) {
 
1913
                                /* The buf_pool->page_hash was modified
 
1914
                                while buf_pool_mutex was released.
 
1915
                                Free the block that was allocated. */
 
1916
 
 
1917
                                buf_LRU_block_free_non_file_page(block);
 
1918
                                mutex_exit(&block->mutex);
 
1919
 
 
1920
                                block = (buf_block_t*) hash_bpage;
 
1921
                                goto loop2;
 
1922
                        }
 
1923
                }
 
1924
 
 
1925
                if (UNIV_UNLIKELY
 
1926
                    (bpage->buf_fix_count
 
1927
                     || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
 
1928
 
 
1929
                        /* The block was buffer-fixed or I/O-fixed
 
1930
                        while buf_pool_mutex was not held by this thread.
 
1931
                        Free the block that was allocated and try again.
 
1932
                        This should be extremely unlikely. */
 
1933
 
 
1934
                        buf_LRU_block_free_non_file_page(block);
1228
1935
                        mutex_exit(&block->mutex);
1229
1936
 
 
1937
                        goto wait_until_unfixed;
 
1938
                }
 
1939
 
 
1940
                /* Move the compressed page from bpage to block,
 
1941
                and uncompress it. */
 
1942
 
 
1943
                mutex_enter(&buf_pool_zip_mutex);
 
1944
 
 
1945
                buf_relocate(bpage, &block->page);
 
1946
                buf_block_init_low(block);
 
1947
                block->lock_hash_val = lock_rec_hash(space, offset);
 
1948
 
 
1949
                UNIV_MEM_DESC(&block->page.zip.data,
 
1950
                              page_zip_get_size(&block->page.zip), block);
 
1951
 
 
1952
                if (buf_page_get_state(&block->page)
 
1953
                    == BUF_BLOCK_ZIP_PAGE) {
 
1954
                        UT_LIST_REMOVE(list, buf_pool->zip_clean,
 
1955
                                       &block->page);
 
1956
                        ut_ad(!block->page.in_flush_list);
 
1957
                } else {
 
1958
                        /* Relocate buf_pool->flush_list. */
 
1959
                        buf_page_t*     b;
 
1960
 
 
1961
                        b = UT_LIST_GET_PREV(list, &block->page);
 
1962
                        ut_ad(block->page.in_flush_list);
 
1963
                        UT_LIST_REMOVE(list, buf_pool->flush_list,
 
1964
                                       &block->page);
 
1965
 
 
1966
                        if (b) {
 
1967
                                UT_LIST_INSERT_AFTER(
 
1968
                                        list, buf_pool->flush_list, b,
 
1969
                                        &block->page);
 
1970
                        } else {
 
1971
                                UT_LIST_ADD_FIRST(
 
1972
                                        list, buf_pool->flush_list,
 
1973
                                        &block->page);
 
1974
                        }
 
1975
                }
 
1976
 
 
1977
                /* Buffer-fix, I/O-fix, and X-latch the block
 
1978
                for the duration of the decompression.
 
1979
                Also add the block to the unzip_LRU list. */
 
1980
                block->page.state = BUF_BLOCK_FILE_PAGE;
 
1981
 
 
1982
                /* Insert at the front of unzip_LRU list */
 
1983
                buf_unzip_LRU_add_block(block, FALSE);
 
1984
 
 
1985
                block->page.buf_fix_count = 1;
 
1986
                buf_block_set_io_fix(block, BUF_IO_READ);
 
1987
                buf_pool->n_pend_unzip++;
 
1988
                rw_lock_x_lock(&block->lock);
 
1989
                mutex_exit(&block->mutex);
 
1990
                mutex_exit(&buf_pool_zip_mutex);
 
1991
 
 
1992
                buf_buddy_free(bpage, sizeof *bpage);
 
1993
 
 
1994
                buf_pool_mutex_exit();
 
1995
 
 
1996
                /* Decompress the page and apply buffered operations
 
1997
                while not holding buf_pool_mutex or block->mutex. */
 
1998
                success = buf_zip_decompress(block, srv_use_checksums);
 
1999
 
 
2000
                if (UNIV_LIKELY(success)) {
 
2001
                        ibuf_merge_or_delete_for_page(block, space, offset,
 
2002
                                                      zip_size, TRUE);
 
2003
                }
 
2004
 
 
2005
                /* Unfix and unlatch the block. */
 
2006
                buf_pool_mutex_enter();
 
2007
                mutex_enter(&block->mutex);
 
2008
                buf_pool->n_pend_unzip--;
 
2009
                block->page.buf_fix_count--;
 
2010
                buf_block_set_io_fix(block, BUF_IO_NONE);
 
2011
                mutex_exit(&block->mutex);
 
2012
                rw_lock_x_unlock(&block->lock);
 
2013
 
 
2014
                if (UNIV_UNLIKELY(!success)) {
 
2015
 
 
2016
                        buf_pool_mutex_exit();
1230
2017
                        return(NULL);
1231
2018
                }
1232
 
        }
1233
 
 
1234
 
        /* If AWE is enabled and the page is not mapped to a frame, then
1235
 
        map it */
1236
 
 
1237
 
        if (block->frame == NULL) {
1238
 
                ut_a(srv_use_awe);
1239
 
 
1240
 
                /* We set second parameter TRUE because the block is in the
1241
 
                LRU list and we must put it to awe_LRU_free_mapped list once
1242
 
                mapped to a frame */
1243
 
 
1244
 
                buf_awe_map_page_to_frame(block, TRUE);
1245
 
        }
1246
 
 
1247
 
#ifdef UNIV_SYNC_DEBUG
1248
 
        buf_block_buf_fix_inc_debug(block, file, line);
1249
 
#else
1250
 
        buf_block_buf_fix_inc(block);
1251
 
#endif
1252
 
        mutex_exit(&buf_pool->mutex);
 
2019
 
 
2020
                break;
 
2021
 
 
2022
        case BUF_BLOCK_ZIP_FREE:
 
2023
        case BUF_BLOCK_NOT_USED:
 
2024
        case BUF_BLOCK_READY_FOR_USE:
 
2025
        case BUF_BLOCK_MEMORY:
 
2026
        case BUF_BLOCK_REMOVE_HASH:
 
2027
                ut_error;
 
2028
                break;
 
2029
        }
 
2030
 
 
2031
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
2032
 
 
2033
        mutex_enter(&block->mutex);
 
2034
        UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
 
2035
 
 
2036
        buf_block_buf_fix_inc(block, file, line);
 
2037
        buf_pool_mutex_exit();
1253
2038
 
1254
2039
        /* Check if this is the first access to the page */
1255
2040
 
1256
 
        accessed = block->accessed;
 
2041
        accessed = buf_page_is_accessed(&block->page);
1257
2042
 
1258
 
        block->accessed = TRUE;
 
2043
        buf_page_set_accessed(&block->page, TRUE);
1259
2044
 
1260
2045
        mutex_exit(&block->mutex);
1261
2046
 
1262
 
        buf_block_make_young(block);
 
2047
        buf_block_make_young(&block->page);
1263
2048
 
1264
2049
#ifdef UNIV_DEBUG_FILE_ACCESSES
1265
 
        ut_a(block->file_page_was_freed == FALSE);
1266
 
#endif
1267
 
 
1268
 
#ifdef UNIV_DEBUG
1269
 
        buf_dbg_counter++;
1270
 
 
1271
 
        if (buf_dbg_counter % 5771 == 0) {
1272
 
                ut_ad(buf_validate());
1273
 
        }
1274
 
#endif
1275
 
        ut_ad(block->buf_fix_count > 0);
1276
 
        ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
 
2050
        ut_a(!block->page.file_page_was_freed);
 
2051
#endif
 
2052
 
 
2053
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2054
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
 
2055
        ut_a(block->page.buf_fix_count > 0);
 
2056
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
2057
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1277
2058
 
1278
2059
        if (mode == BUF_GET_NOWAIT) {
 
2060
                ibool   success;
 
2061
 
1279
2062
                if (rw_latch == RW_S_LATCH) {
1280
2063
                        success = rw_lock_s_lock_func_nowait(&(block->lock),
1281
2064
                                                             file, line);
1289
2072
 
1290
2073
                if (!success) {
1291
2074
                        mutex_enter(&block->mutex);
1292
 
 
1293
 
                        block->buf_fix_count--;
1294
 
 
 
2075
                        buf_block_buf_fix_dec(block);
1295
2076
                        mutex_exit(&block->mutex);
1296
 
#ifdef UNIV_SYNC_DEBUG
1297
 
                        rw_lock_s_unlock(&(block->debug_latch));
1298
 
#endif
1299
2077
 
1300
2078
                        return(NULL);
1301
2079
                }
1306
2084
                        completes */
1307
2085
 
1308
2086
                        for (;;) {
 
2087
                                enum buf_io_fix io_fix;
 
2088
 
1309
2089
                                mutex_enter(&block->mutex);
1310
 
 
1311
 
                                if (block->io_fix == BUF_IO_READ) {
1312
 
 
1313
 
                                        mutex_exit(&block->mutex);
 
2090
                                io_fix = buf_block_get_io_fix(block);
 
2091
                                mutex_exit(&block->mutex);
 
2092
 
 
2093
                                if (io_fix == BUF_IO_READ) {
1314
2094
 
1315
2095
                                        os_thread_sleep(WAIT_FOR_READ);
1316
2096
                                } else {
1317
 
 
1318
 
                                        mutex_exit(&block->mutex);
1319
 
 
1320
2097
                                        break;
1321
2098
                                }
1322
2099
                        }
1340
2117
                /* In the case of a first access, try to apply linear
1341
2118
                read-ahead */
1342
2119
 
1343
 
                buf_read_ahead_linear(space, offset);
 
2120
                buf_read_ahead_linear(space, zip_size, offset);
1344
2121
        }
1345
2122
 
1346
 
#ifdef UNIV_IBUF_DEBUG
1347
 
        ut_a(ibuf_count_get(block->space, block->offset) == 0);
 
2123
#ifdef UNIV_IBUF_COUNT_DEBUG
 
2124
        ut_a(ibuf_count_get(buf_block_get_space(block),
 
2125
                            buf_block_get_page_no(block)) == 0);
1348
2126
#endif
1349
 
        return(block->frame);
 
2127
        return(block);
1350
2128
}
1351
2129
 
1352
2130
/************************************************************************
1353
2131
This is the general function used to get optimistic access to a database
1354
2132
page. */
1355
 
 
 
2133
UNIV_INTERN
1356
2134
ibool
1357
2135
buf_page_optimistic_get_func(
1358
2136
/*=========================*/
1359
2137
                                /* out: TRUE if success */
1360
2138
        ulint           rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
1361
2139
        buf_block_t*    block,  /* in: guessed buffer block */
1362
 
        buf_frame_t*    guess,  /* in: guessed frame; note that AWE may move
1363
 
                                frames */
1364
 
        dulint          modify_clock,/* in: modify clock value if mode is
 
2140
        ib_uint64_t     modify_clock,/* in: modify clock value if mode is
1365
2141
                                ..._GUESS_ON_CLOCK */
1366
2142
        const char*     file,   /* in: file name */
1367
2143
        ulint           line,   /* in: line where called */
1374
2150
        ut_ad(mtr && block);
1375
2151
        ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
1376
2152
 
1377
 
        /* If AWE is used, block may have a different frame now, e.g., NULL */
1378
 
 
1379
2153
        mutex_enter(&block->mutex);
1380
2154
 
1381
 
        if (UNIV_UNLIKELY(block->state != BUF_BLOCK_FILE_PAGE)
1382
 
            || UNIV_UNLIKELY(block->frame != guess)) {
 
2155
        if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
1383
2156
 
1384
2157
                mutex_exit(&block->mutex);
1385
2158
 
1386
2159
                return(FALSE);
1387
2160
        }
1388
2161
 
1389
 
#ifdef UNIV_SYNC_DEBUG
1390
 
        buf_block_buf_fix_inc_debug(block, file, line);
1391
 
#else
1392
 
        buf_block_buf_fix_inc(block);
1393
 
#endif
1394
 
        accessed = block->accessed;
1395
 
        block->accessed = TRUE;
 
2162
        buf_block_buf_fix_inc(block, file, line);
 
2163
        accessed = buf_page_is_accessed(&block->page);
 
2164
        buf_page_set_accessed(&block->page, TRUE);
1396
2165
 
1397
2166
        mutex_exit(&block->mutex);
1398
2167
 
1399
 
        buf_block_make_young(block);
 
2168
        buf_block_make_young(&block->page);
1400
2169
 
1401
2170
        /* Check if this is the first access to the page */
1402
2171
 
1403
 
        ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset));
 
2172
        ut_ad(!ibuf_inside()
 
2173
              || ibuf_page(buf_block_get_space(block),
 
2174
                           buf_block_get_zip_size(block),
 
2175
                           buf_block_get_page_no(block)));
1404
2176
 
1405
2177
        if (rw_latch == RW_S_LATCH) {
1406
2178
                success = rw_lock_s_lock_func_nowait(&(block->lock),
1414
2186
 
1415
2187
        if (UNIV_UNLIKELY(!success)) {
1416
2188
                mutex_enter(&block->mutex);
1417
 
 
1418
 
                block->buf_fix_count--;
1419
 
 
 
2189
                buf_block_buf_fix_dec(block);
1420
2190
                mutex_exit(&block->mutex);
1421
2191
 
1422
 
#ifdef UNIV_SYNC_DEBUG
1423
 
                rw_lock_s_unlock(&(block->debug_latch));
1424
 
#endif
1425
2192
                return(FALSE);
1426
2193
        }
1427
2194
 
1428
 
        if (UNIV_UNLIKELY(!UT_DULINT_EQ(modify_clock, block->modify_clock))) {
 
2195
        if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
1429
2196
#ifdef UNIV_SYNC_DEBUG
1430
 
                buf_page_dbg_add_level(block->frame, SYNC_NO_ORDER_CHECK);
 
2197
                buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
1431
2198
#endif /* UNIV_SYNC_DEBUG */
1432
2199
                if (rw_latch == RW_S_LATCH) {
1433
2200
                        rw_lock_s_unlock(&(block->lock));
1436
2203
                }
1437
2204
 
1438
2205
                mutex_enter(&block->mutex);
1439
 
 
1440
 
                block->buf_fix_count--;
1441
 
 
 
2206
                buf_block_buf_fix_dec(block);
1442
2207
                mutex_exit(&block->mutex);
1443
2208
 
1444
 
#ifdef UNIV_SYNC_DEBUG
1445
 
                rw_lock_s_unlock(&(block->debug_latch));
1446
 
#endif
1447
2209
                return(FALSE);
1448
2210
        }
1449
2211
 
1450
2212
        mtr_memo_push(mtr, block, fix_type);
1451
2213
 
1452
 
#ifdef UNIV_DEBUG
1453
 
        buf_dbg_counter++;
1454
 
 
1455
 
        if (buf_dbg_counter % 5771 == 0) {
1456
 
                ut_ad(buf_validate());
1457
 
        }
1458
 
#endif
1459
 
        ut_ad(block->buf_fix_count > 0);
1460
 
        ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
 
2214
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2215
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
 
2216
        ut_a(block->page.buf_fix_count > 0);
 
2217
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
2218
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1461
2219
 
1462
2220
#ifdef UNIV_DEBUG_FILE_ACCESSES
1463
 
        ut_a(block->file_page_was_freed == FALSE);
 
2221
        ut_a(block->page.file_page_was_freed == FALSE);
1464
2222
#endif
1465
2223
        if (UNIV_UNLIKELY(!accessed)) {
1466
2224
                /* In the case of a first access, try to apply linear
1467
2225
                read-ahead */
1468
2226
 
1469
 
                buf_read_ahead_linear(buf_frame_get_space_id(guess),
1470
 
                                      buf_frame_get_page_no(guess));
 
2227
                buf_read_ahead_linear(buf_block_get_space(block),
 
2228
                                      buf_block_get_zip_size(block),
 
2229
                                      buf_block_get_page_no(block));
1471
2230
        }
1472
2231
 
1473
 
#ifdef UNIV_IBUF_DEBUG
1474
 
        ut_a(ibuf_count_get(block->space, block->offset) == 0);
 
2232
#ifdef UNIV_IBUF_COUNT_DEBUG
 
2233
        ut_a(ibuf_count_get(buf_block_get_space(block),
 
2234
                            buf_block_get_page_no(block)) == 0);
1475
2235
#endif
1476
2236
        buf_pool->n_page_gets++;
1477
2237
 
1482
2242
This is used to get access to a known database page, when no waiting can be
1483
2243
done. For example, if a search in an adaptive hash index leads us to this
1484
2244
frame. */
1485
 
 
 
2245
UNIV_INTERN
1486
2246
ibool
1487
2247
buf_page_get_known_nowait(
1488
2248
/*======================*/
1489
2249
                                /* out: TRUE if success */
1490
2250
        ulint           rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
1491
 
        buf_frame_t*    guess,  /* in: the known page frame */
 
2251
        buf_block_t*    block,  /* in: the known page */
1492
2252
        ulint           mode,   /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
1493
2253
        const char*     file,   /* in: file name */
1494
2254
        ulint           line,   /* in: line where called */
1495
2255
        mtr_t*          mtr)    /* in: mini-transaction */
1496
2256
{
1497
 
        buf_block_t*    block;
1498
2257
        ibool           success;
1499
2258
        ulint           fix_type;
1500
2259
 
1501
2260
        ut_ad(mtr);
1502
2261
        ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
1503
2262
 
1504
 
        block = buf_block_align(guess);
1505
 
 
1506
2263
        mutex_enter(&block->mutex);
1507
2264
 
1508
 
        if (block->state == BUF_BLOCK_REMOVE_HASH) {
 
2265
        if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
1509
2266
                /* Another thread is just freeing the block from the LRU list
1510
2267
                of the buffer pool: do not try to access this page; this
1511
2268
                attempt to access the page can only come through the hash
1518
2275
                return(FALSE);
1519
2276
        }
1520
2277
 
1521
 
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
1522
 
 
1523
 
#ifdef UNIV_SYNC_DEBUG
1524
 
        buf_block_buf_fix_inc_debug(block, file, line);
1525
 
#else
1526
 
        buf_block_buf_fix_inc(block);
1527
 
#endif
 
2278
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
2279
 
 
2280
        buf_block_buf_fix_inc(block, file, line);
 
2281
 
1528
2282
        mutex_exit(&block->mutex);
1529
2283
 
1530
2284
        if (mode == BUF_MAKE_YOUNG) {
1531
 
                buf_block_make_young(block);
 
2285
                buf_block_make_young(&block->page);
1532
2286
        }
1533
2287
 
1534
2288
        ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
1545
2299
 
1546
2300
        if (!success) {
1547
2301
                mutex_enter(&block->mutex);
1548
 
 
1549
 
                block->buf_fix_count--;
1550
 
 
 
2302
                buf_block_buf_fix_dec(block);
1551
2303
                mutex_exit(&block->mutex);
1552
2304
 
1553
 
#ifdef UNIV_SYNC_DEBUG
1554
 
                rw_lock_s_unlock(&(block->debug_latch));
1555
 
#endif
1556
 
 
1557
2305
                return(FALSE);
1558
2306
        }
1559
2307
 
1560
2308
        mtr_memo_push(mtr, block, fix_type);
1561
2309
 
1562
 
#ifdef UNIV_DEBUG
1563
 
        buf_dbg_counter++;
1564
 
 
1565
 
        if (buf_dbg_counter % 5771 == 0) {
1566
 
                ut_ad(buf_validate());
1567
 
        }
1568
 
#endif
1569
 
        ut_ad(block->buf_fix_count > 0);
1570
 
        ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
 
2310
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2311
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
 
2312
        ut_a(block->page.buf_fix_count > 0);
 
2313
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
2314
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1571
2315
#ifdef UNIV_DEBUG_FILE_ACCESSES
1572
 
        ut_a(block->file_page_was_freed == FALSE);
 
2316
        ut_a(block->page.file_page_was_freed == FALSE);
1573
2317
#endif
1574
2318
 
1575
 
#ifdef UNIV_IBUF_DEBUG
 
2319
#ifdef UNIV_IBUF_COUNT_DEBUG
1576
2320
        ut_a((mode == BUF_KEEP_OLD)
1577
 
             || (ibuf_count_get(block->space, block->offset) == 0));
 
2321
             || (ibuf_count_get(buf_block_get_space(block),
 
2322
                                buf_block_get_page_no(block)) == 0));
1578
2323
#endif
1579
2324
        buf_pool->n_page_gets++;
1580
2325
 
1581
2326
        return(TRUE);
1582
2327
}
1583
2328
 
 
2329
/***********************************************************************
 
2330
Given a tablespace id and page number tries to get that page. If the
 
2331
page is not in the buffer pool it is not loaded and NULL is returned.
 
2332
Suitable for using when holding the kernel mutex. */
 
2333
UNIV_INTERN
 
2334
const buf_block_t*
 
2335
buf_page_try_get_func(
 
2336
/*==================*/
 
2337
                                /* out: pointer to a page or NULL */
 
2338
        ulint           space_id,/* in: tablespace id */
 
2339
        ulint           page_no,/* in: page number */
 
2340
        const char*     file,   /* in: file name */
 
2341
        ulint           line,   /* in: line where called */
 
2342
        mtr_t*          mtr)    /* in: mini-transaction */
 
2343
{
 
2344
        buf_block_t*    block;
 
2345
        ibool           success;
 
2346
        ulint           fix_type;
 
2347
 
 
2348
        buf_pool_mutex_enter();
 
2349
        block = buf_block_hash_get(space_id, page_no);
 
2350
 
 
2351
        if (!block) {
 
2352
                buf_pool_mutex_exit();
 
2353
                return(NULL);
 
2354
        }
 
2355
 
 
2356
        mutex_enter(&block->mutex);
 
2357
        buf_pool_mutex_exit();
 
2358
 
 
2359
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2360
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
2361
        ut_a(buf_block_get_space(block) == space_id);
 
2362
        ut_a(buf_block_get_page_no(block) == page_no);
 
2363
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
2364
 
 
2365
        buf_block_buf_fix_inc(block, file, line);
 
2366
        mutex_exit(&block->mutex);
 
2367
 
 
2368
        fix_type = MTR_MEMO_PAGE_S_FIX;
 
2369
        success = rw_lock_s_lock_func_nowait(&block->lock, file, line);
 
2370
 
 
2371
        if (!success) {
 
2372
                /* Let us try to get an X-latch. If the current thread
 
2373
                is holding an X-latch on the page, we cannot get an
 
2374
                S-latch. */
 
2375
 
 
2376
                fix_type = MTR_MEMO_PAGE_X_FIX;
 
2377
                success = rw_lock_x_lock_func_nowait(&block->lock,
 
2378
                                                     file, line);
 
2379
        }
 
2380
 
 
2381
        if (!success) {
 
2382
                mutex_enter(&block->mutex);
 
2383
                buf_block_buf_fix_dec(block);
 
2384
                mutex_exit(&block->mutex);
 
2385
 
 
2386
                return(NULL);
 
2387
        }
 
2388
 
 
2389
        mtr_memo_push(mtr, block, fix_type);
 
2390
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2391
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
 
2392
        ut_a(block->page.buf_fix_count > 0);
 
2393
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
2394
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
2395
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
2396
        ut_a(block->page.file_page_was_freed == FALSE);
 
2397
#endif /* UNIV_DEBUG_FILE_ACCESSES */
 
2398
#ifdef UNIV_SYNC_DEBUG
 
2399
        buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
2400
#endif /* UNIV_SYNC_DEBUG */
 
2401
        buf_pool->n_page_gets++;
 
2402
 
 
2403
        return(block);
 
2404
}
 
2405
 
 
2406
/************************************************************************
 
2407
Initialize some fields of a control block. */
 
2408
UNIV_INLINE
 
2409
void
 
2410
buf_page_init_low(
 
2411
/*==============*/
 
2412
        buf_page_t*     bpage)  /* in: block to init */
 
2413
{
 
2414
        bpage->flush_type = BUF_FLUSH_LRU;
 
2415
        bpage->accessed = FALSE;
 
2416
        bpage->io_fix = BUF_IO_NONE;
 
2417
        bpage->buf_fix_count = 0;
 
2418
        bpage->freed_page_clock = 0;
 
2419
        bpage->newest_modification = 0;
 
2420
        bpage->oldest_modification = 0;
 
2421
        HASH_INVALIDATE(bpage, hash);
 
2422
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
2423
        bpage->file_page_was_freed = FALSE;
 
2424
#endif /* UNIV_DEBUG_FILE_ACCESSES */
 
2425
}
 
2426
 
 
2427
#ifdef UNIV_HOTBACKUP
1584
2428
/************************************************************************
1585
2429
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
1586
 
 
 
2430
UNIV_INTERN
1587
2431
void
1588
2432
buf_page_init_for_backup_restore(
1589
2433
/*=============================*/
1590
2434
        ulint           space,  /* in: space id */
1591
2435
        ulint           offset, /* in: offset of the page within space
1592
2436
                                in units of a page */
 
2437
        ulint           zip_size,/* in: compressed page size in bytes
 
2438
                                or 0 for uncompressed pages */
1593
2439
        buf_block_t*    block)  /* in: block to init */
1594
2440
{
1595
 
        /* Set the state of the block */
1596
 
        block->magic_n          = BUF_BLOCK_MAGIC_N;
1597
 
 
1598
 
        block->state            = BUF_BLOCK_FILE_PAGE;
1599
 
        block->space            = space;
1600
 
        block->offset           = offset;
 
2441
        buf_block_init_low(block);
1601
2442
 
1602
2443
        block->lock_hash_val    = 0;
1603
2444
 
1604
 
        block->freed_page_clock = 0;
1605
 
 
1606
 
        block->newest_modification = ut_dulint_zero;
1607
 
        block->oldest_modification = ut_dulint_zero;
1608
 
 
1609
 
        block->accessed         = FALSE;
1610
 
        block->buf_fix_count    = 0;
1611
 
        block->io_fix           = 0;
1612
 
 
1613
 
        block->n_hash_helps     = 0;
1614
 
        block->is_hashed        = FALSE;
1615
 
        block->n_fields         = 1;
1616
 
        block->n_bytes          = 0;
1617
 
        block->left_side        = TRUE;
1618
 
 
1619
 
        block->file_page_was_freed = FALSE;
 
2445
        buf_page_init_low(&block->page);
 
2446
        block->page.state       = BUF_BLOCK_FILE_PAGE;
 
2447
        block->page.space       = space;
 
2448
        block->page.offset      = offset;
 
2449
 
 
2450
        page_zip_des_init(&block->page.zip);
 
2451
 
 
2452
        /* We assume that block->page.data has been allocated
 
2453
        with zip_size == UNIV_PAGE_SIZE. */
 
2454
        ut_ad(zip_size <= UNIV_PAGE_SIZE);
 
2455
        ut_ad(ut_is_2pow(zip_size));
 
2456
        page_zip_set_size(&block->page.zip, zip_size);
1620
2457
}
 
2458
#endif /* UNIV_HOTBACKUP */
1621
2459
 
1622
2460
/************************************************************************
1623
2461
Inits a page to the buffer buf_pool. */
1630
2468
                                in units of a page */
1631
2469
        buf_block_t*    block)  /* in: block to init */
1632
2470
{
 
2471
        buf_page_t*     hash_page;
1633
2472
 
1634
 
        ut_ad(mutex_own(&(buf_pool->mutex)));
 
2473
        ut_ad(buf_pool_mutex_own());
1635
2474
        ut_ad(mutex_own(&(block->mutex)));
1636
 
        ut_a(block->state != BUF_BLOCK_FILE_PAGE);
 
2475
        ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1637
2476
 
1638
2477
        /* Set the state of the block */
1639
 
        block->magic_n          = BUF_BLOCK_MAGIC_N;
1640
 
 
1641
 
        block->state            = BUF_BLOCK_FILE_PAGE;
1642
 
        block->space            = space;
1643
 
        block->offset           = offset;
1644
 
 
1645
 
        block->check_index_page_at_flush = FALSE;
1646
 
        block->index            = NULL;
1647
 
 
1648
 
        block->lock_hash_val    = lock_rec_hash(space, offset);
 
2478
        buf_block_set_file_page(block, space, offset);
1649
2479
 
1650
2480
#ifdef UNIV_DEBUG_VALGRIND
1651
2481
        if (!space) {
1656
2486
        }
1657
2487
#endif /* UNIV_DEBUG_VALGRIND */
1658
2488
 
 
2489
        buf_block_init_low(block);
 
2490
 
 
2491
        block->lock_hash_val    = lock_rec_hash(space, offset);
 
2492
 
1659
2493
        /* Insert into the hash table of file pages */
1660
2494
 
1661
 
        if (buf_page_hash_get(space, offset)) {
 
2495
        hash_page = buf_page_hash_get(space, offset);
 
2496
 
 
2497
        if (UNIV_LIKELY_NULL(hash_page)) {
1662
2498
                fprintf(stderr,
1663
2499
                        "InnoDB: Error: page %lu %lu already found"
1664
 
                        " in the hash table\n",
 
2500
                        " in the hash table: %p, %p\n",
1665
2501
                        (ulong) space,
1666
 
                        (ulong) offset);
1667
 
#ifdef UNIV_DEBUG
 
2502
                        (ulong) offset,
 
2503
                        (const void*) hash_page, (const void*) block);
 
2504
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2505
                mutex_exit(&block->mutex);
 
2506
                buf_pool_mutex_exit();
1668
2507
                buf_print();
1669
2508
                buf_LRU_print();
1670
2509
                buf_validate();
1671
2510
                buf_LRU_validate();
1672
 
#endif /* UNIV_DEBUG */
1673
 
                ut_a(0);
 
2511
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
2512
                ut_error;
1674
2513
        }
1675
2514
 
1676
 
        HASH_INSERT(buf_block_t, hash, buf_pool->page_hash,
1677
 
                    buf_page_address_fold(space, offset), block);
1678
 
 
1679
 
        block->freed_page_clock = 0;
1680
 
 
1681
 
        block->newest_modification = ut_dulint_zero;
1682
 
        block->oldest_modification = ut_dulint_zero;
1683
 
 
1684
 
        block->accessed         = FALSE;
1685
 
        block->buf_fix_count    = 0;
1686
 
        block->io_fix           = 0;
1687
 
 
1688
 
        block->n_hash_helps     = 0;
1689
 
        block->is_hashed        = FALSE;
1690
 
        block->n_fields         = 1;
1691
 
        block->n_bytes          = 0;
1692
 
        block->left_side        = TRUE;
1693
 
 
1694
 
        block->file_page_was_freed = FALSE;
 
2515
        buf_page_init_low(&block->page);
 
2516
 
 
2517
        ut_ad(!block->page.in_zip_hash);
 
2518
        ut_ad(!block->page.in_page_hash);
 
2519
        ut_d(block->page.in_page_hash = TRUE);
 
2520
        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
 
2521
                    buf_page_address_fold(space, offset), &block->page);
1695
2522
}
1696
2523
 
1697
2524
/************************************************************************
1702
2529
then this function does nothing.
1703
2530
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
1704
2531
on the buffer frame. The io-handler must take care that the flag is cleared
1705
 
and the lock released later. This is one of the functions which perform the
1706
 
state transition NOT_USED => FILE_PAGE to a block (the other is
1707
 
buf_page_create). */
1708
 
 
1709
 
buf_block_t*
 
2532
and the lock released later. */
 
2533
UNIV_INTERN
 
2534
buf_page_t*
1710
2535
buf_page_init_for_read(
1711
2536
/*===================*/
1712
2537
                                /* out: pointer to the block or NULL */
1713
2538
        ulint*          err,    /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
1714
2539
        ulint           mode,   /* in: BUF_READ_IBUF_PAGES_ONLY, ... */
1715
2540
        ulint           space,  /* in: space id */
1716
 
        ib_longlong     tablespace_version,/* in: prevents reading from a wrong
 
2541
        ulint           zip_size,/* in: compressed page size, or 0 */
 
2542
        ibool           unzip,  /* in: TRUE=request uncompressed page */
 
2543
        ib_int64_t      tablespace_version,/* in: prevents reading from a wrong
1717
2544
                                version of the tablespace in case we have done
1718
2545
                                DISCARD + IMPORT */
1719
2546
        ulint           offset) /* in: page number */
1720
2547
{
1721
2548
        buf_block_t*    block;
 
2549
        buf_page_t*     bpage;
1722
2550
        mtr_t           mtr;
 
2551
        ibool           lru     = FALSE;
 
2552
        void*           data;
1723
2553
 
1724
2554
        ut_ad(buf_pool);
1725
2555
 
1728
2558
        if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1729
2559
                /* It is a read-ahead within an ibuf routine */
1730
2560
 
1731
 
                ut_ad(!ibuf_bitmap_page(offset));
 
2561
                ut_ad(!ibuf_bitmap_page(zip_size, offset));
1732
2562
                ut_ad(ibuf_inside());
1733
2563
 
1734
2564
                mtr_start(&mtr);
1735
2565
 
1736
 
                if (!ibuf_page_low(space, offset, &mtr)) {
 
2566
                if (!ibuf_page_low(space, zip_size, offset, &mtr)) {
1737
2567
 
1738
2568
                        mtr_commit(&mtr);
1739
2569
 
1743
2573
                ut_ad(mode == BUF_READ_ANY_PAGE);
1744
2574
        }
1745
2575
 
1746
 
        block = buf_block_alloc();
1747
 
 
1748
 
        ut_a(block);
1749
 
 
1750
 
        mutex_enter(&(buf_pool->mutex));
1751
 
        mutex_enter(&block->mutex);
 
2576
        if (zip_size && UNIV_LIKELY(!unzip)
 
2577
            && UNIV_LIKELY(!recv_recovery_is_on())) {
 
2578
                block = NULL;
 
2579
        } else {
 
2580
                block = buf_LRU_get_free_block(0);
 
2581
                ut_ad(block);
 
2582
        }
 
2583
 
 
2584
        buf_pool_mutex_enter();
 
2585
 
 
2586
        if (buf_page_hash_get(space, offset)) {
 
2587
                /* The page is already in the buffer pool. */
 
2588
err_exit:
 
2589
                if (block) {
 
2590
                        mutex_enter(&block->mutex);
 
2591
                        buf_LRU_block_free_non_file_page(block);
 
2592
                        mutex_exit(&block->mutex);
 
2593
                }
 
2594
 
 
2595
err_exit2:
 
2596
                buf_pool_mutex_exit();
 
2597
 
 
2598
                if (mode == BUF_READ_IBUF_PAGES_ONLY) {
 
2599
 
 
2600
                        mtr_commit(&mtr);
 
2601
                }
 
2602
 
 
2603
                return(NULL);
 
2604
        }
1752
2605
 
1753
2606
        if (fil_tablespace_deleted_or_being_deleted_in_mem(
1754
2607
                    space, tablespace_version)) {
 
2608
                /* The page belongs to a space which has been
 
2609
                deleted or is being deleted. */
1755
2610
                *err = DB_TABLESPACE_DELETED;
 
2611
 
 
2612
                goto err_exit;
1756
2613
        }
1757
2614
 
1758
 
        if (*err == DB_TABLESPACE_DELETED
1759
 
            || NULL != buf_page_hash_get(space, offset)) {
1760
 
 
1761
 
                /* The page belongs to a space which has been
1762
 
                deleted or is being deleted, or the page is
1763
 
                already in buf_pool, return */
 
2615
        if (block) {
 
2616
                bpage = &block->page;
 
2617
                mutex_enter(&block->mutex);
 
2618
                buf_page_init(space, offset, block);
 
2619
 
 
2620
                /* The block must be put to the LRU list, to the old blocks */
 
2621
                buf_LRU_add_block(bpage, TRUE/* to old blocks */);
 
2622
 
 
2623
                /* We set a pass-type x-lock on the frame because then
 
2624
                the same thread which called for the read operation
 
2625
                (and is running now at this point of code) can wait
 
2626
                for the read to complete by waiting for the x-lock on
 
2627
                the frame; if the x-lock were recursive, the same
 
2628
                thread would illegally get the x-lock before the page
 
2629
                read is completed.  The x-lock is cleared by the
 
2630
                io-handler thread. */
 
2631
 
 
2632
                rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
 
2633
                buf_page_set_io_fix(bpage, BUF_IO_READ);
 
2634
 
 
2635
                if (UNIV_UNLIKELY(zip_size)) {
 
2636
                        page_zip_set_size(&block->page.zip, zip_size);
 
2637
 
 
2638
                        /* buf_pool_mutex may be released and
 
2639
                        reacquired by buf_buddy_alloc().  Thus, we
 
2640
                        must release block->mutex in order not to
 
2641
                        break the latching order in the reacquisition
 
2642
                        of buf_pool_mutex.  We also must defer this
 
2643
                        operation until after the block descriptor has
 
2644
                        been added to buf_pool->LRU and
 
2645
                        buf_pool->page_hash. */
 
2646
                        mutex_exit(&block->mutex);
 
2647
                        data = buf_buddy_alloc(zip_size, &lru);
 
2648
                        mutex_enter(&block->mutex);
 
2649
                        block->page.zip.data = data;
 
2650
 
 
2651
                        /* To maintain the invariant
 
2652
                        block->in_unzip_LRU_list
 
2653
                        == buf_page_belongs_to_unzip_LRU(&block->page)
 
2654
                        we have to add this block to unzip_LRU
 
2655
                        after block->page.zip.data is set. */
 
2656
                        ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
 
2657
                        buf_unzip_LRU_add_block(block, TRUE);
 
2658
                }
1764
2659
 
1765
2660
                mutex_exit(&block->mutex);
1766
 
                mutex_exit(&(buf_pool->mutex));
1767
 
 
1768
 
                buf_block_free(block);
1769
 
 
1770
 
                if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1771
 
 
1772
 
                        mtr_commit(&mtr);
 
2661
        } else {
 
2662
                /* Defer buf_buddy_alloc() until after the block has
 
2663
                been found not to exist.  The buf_buddy_alloc() and
 
2664
                buf_buddy_free() calls may be expensive because of
 
2665
                buf_buddy_relocate(). */
 
2666
 
 
2667
                /* The compressed page must be allocated before the
 
2668
                control block (bpage), in order to avoid the
 
2669
                invocation of buf_buddy_relocate_block() on
 
2670
                uninitialized data. */
 
2671
                data = buf_buddy_alloc(zip_size, &lru);
 
2672
                bpage = buf_buddy_alloc(sizeof *bpage, &lru);
 
2673
 
 
2674
                /* If buf_buddy_alloc() allocated storage from the LRU list,
 
2675
                it released and reacquired buf_pool_mutex.  Thus, we must
 
2676
                check the page_hash again, as it may have been modified. */
 
2677
                if (UNIV_UNLIKELY(lru)
 
2678
                    && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
 
2679
 
 
2680
                        /* The block was added by some other thread. */
 
2681
                        buf_buddy_free(bpage, sizeof *bpage);
 
2682
                        buf_buddy_free(data, zip_size);
 
2683
                        goto err_exit2;
1773
2684
                }
1774
2685
 
1775
 
                return(NULL);
 
2686
                page_zip_des_init(&bpage->zip);
 
2687
                page_zip_set_size(&bpage->zip, zip_size);
 
2688
                bpage->zip.data = data;
 
2689
 
 
2690
                mutex_enter(&buf_pool_zip_mutex);
 
2691
                UNIV_MEM_DESC(bpage->zip.data,
 
2692
                              page_zip_get_size(&bpage->zip), bpage);
 
2693
                buf_page_init_low(bpage);
 
2694
                bpage->state    = BUF_BLOCK_ZIP_PAGE;
 
2695
                bpage->space    = space;
 
2696
                bpage->offset   = offset;
 
2697
 
 
2698
#ifdef UNIV_DEBUG
 
2699
                bpage->in_page_hash = FALSE;
 
2700
                bpage->in_zip_hash = FALSE;
 
2701
                bpage->in_flush_list = FALSE;
 
2702
                bpage->in_free_list = FALSE;
 
2703
                bpage->in_LRU_list = FALSE;
 
2704
#endif /* UNIV_DEBUG */
 
2705
 
 
2706
                ut_d(bpage->in_page_hash = TRUE);
 
2707
                HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
 
2708
                            buf_page_address_fold(space, offset), bpage);
 
2709
 
 
2710
                /* The block must be put to the LRU list, to the old blocks */
 
2711
                buf_LRU_add_block(bpage, TRUE/* to old blocks */);
 
2712
                buf_LRU_insert_zip_clean(bpage);
 
2713
 
 
2714
                buf_page_set_io_fix(bpage, BUF_IO_READ);
 
2715
 
 
2716
                mutex_exit(&buf_pool_zip_mutex);
1776
2717
        }
1777
2718
 
1778
 
        ut_ad(block);
1779
 
 
1780
 
        buf_page_init(space, offset, block);
1781
 
 
1782
 
        /* The block must be put to the LRU list, to the old blocks */
1783
 
 
1784
 
        buf_LRU_add_block(block, TRUE);         /* TRUE == to old blocks */
1785
 
 
1786
 
        block->io_fix = BUF_IO_READ;
1787
 
 
1788
2719
        buf_pool->n_pend_reads++;
1789
 
 
1790
 
        /* We set a pass-type x-lock on the frame because then the same
1791
 
        thread which called for the read operation (and is running now at
1792
 
        this point of code) can wait for the read to complete by waiting
1793
 
        for the x-lock on the frame; if the x-lock were recursive, the
1794
 
        same thread would illegally get the x-lock before the page read
1795
 
        is completed. The x-lock is cleared by the io-handler thread. */
1796
 
 
1797
 
        rw_lock_x_lock_gen(&(block->lock), BUF_IO_READ);
1798
 
 
1799
 
        mutex_exit(&block->mutex);
1800
 
        mutex_exit(&(buf_pool->mutex));
 
2720
        buf_pool_mutex_exit();
1801
2721
 
1802
2722
        if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1803
2723
 
1804
2724
                mtr_commit(&mtr);
1805
2725
        }
1806
2726
 
1807
 
        return(block);
 
2727
        ut_ad(buf_page_in_file(bpage));
 
2728
        return(bpage);
1808
2729
}
1809
2730
 
1810
2731
/************************************************************************
1811
2732
Initializes a page to the buffer buf_pool. The page is usually not read
1812
2733
from a file even if it cannot be found in the buffer buf_pool. This is one
1813
2734
of the functions which perform to a block a state transition NOT_USED =>
1814
 
FILE_PAGE (the other is buf_page_init_for_read above). */
1815
 
 
1816
 
buf_frame_t*
 
2735
FILE_PAGE (the other is buf_page_get_gen). */
 
2736
UNIV_INTERN
 
2737
buf_block_t*
1817
2738
buf_page_create(
1818
2739
/*============*/
1819
 
                        /* out: pointer to the frame, page bufferfixed */
 
2740
                        /* out: pointer to the block, page bufferfixed */
1820
2741
        ulint   space,  /* in: space id */
1821
2742
        ulint   offset, /* in: offset of the page within space in units of
1822
2743
                        a page */
 
2744
        ulint   zip_size,/* in: compressed page size, or 0 */
1823
2745
        mtr_t*  mtr)    /* in: mini-transaction handle */
1824
2746
{
1825
2747
        buf_frame_t*    frame;
1827
2749
        buf_block_t*    free_block      = NULL;
1828
2750
 
1829
2751
        ut_ad(mtr);
1830
 
 
1831
 
        free_block = buf_LRU_get_free_block();
1832
 
 
1833
 
        mutex_enter(&(buf_pool->mutex));
1834
 
 
1835
 
        block = buf_page_hash_get(space, offset);
1836
 
 
1837
 
        if (block != NULL) {
1838
 
#ifdef UNIV_IBUF_DEBUG
1839
 
                ut_a(ibuf_count_get(block->space, block->offset) == 0);
 
2752
        ut_ad(space || !zip_size);
 
2753
 
 
2754
        free_block = buf_LRU_get_free_block(0);
 
2755
 
 
2756
        buf_pool_mutex_enter();
 
2757
 
 
2758
        block = (buf_block_t*) buf_page_hash_get(space, offset);
 
2759
 
 
2760
        if (block && buf_page_in_file(&block->page)) {
 
2761
#ifdef UNIV_IBUF_COUNT_DEBUG
 
2762
                ut_a(ibuf_count_get(space, offset) == 0);
1840
2763
#endif
1841
 
                block->file_page_was_freed = FALSE;
 
2764
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
2765
                block->page.file_page_was_freed = FALSE;
 
2766
#endif /* UNIV_DEBUG_FILE_ACCESSES */
1842
2767
 
1843
2768
                /* Page can be found in buf_pool */
1844
 
                mutex_exit(&(buf_pool->mutex));
 
2769
                buf_pool_mutex_exit();
1845
2770
 
1846
2771
                buf_block_free(free_block);
1847
2772
 
1848
 
                frame = buf_page_get_with_no_latch(space, offset, mtr);
1849
 
 
1850
 
                return(frame);
 
2773
                return(buf_page_get_with_no_latch(space, zip_size,
 
2774
                                                  offset, mtr));
1851
2775
        }
1852
2776
 
1853
2777
        /* If we get here, the page was not in buf_pool: init it there */
1866
2790
        buf_page_init(space, offset, block);
1867
2791
 
1868
2792
        /* The block must be put to the LRU list */
1869
 
        buf_LRU_add_block(block, FALSE);
 
2793
        buf_LRU_add_block(&block->page, FALSE);
1870
2794
 
1871
 
#ifdef UNIV_SYNC_DEBUG
1872
 
        buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__);
1873
 
#else
1874
 
        buf_block_buf_fix_inc(block);
1875
 
#endif
 
2795
        buf_block_buf_fix_inc(block, __FILE__, __LINE__);
1876
2796
        buf_pool->n_pages_created++;
1877
2797
 
1878
 
        mutex_exit(&(buf_pool->mutex));
 
2798
        if (zip_size) {
 
2799
                void*   data;
 
2800
                ibool   lru;
 
2801
 
 
2802
                /* Prevent race conditions during buf_buddy_alloc(),
 
2803
                which may release and reacquire buf_pool_mutex,
 
2804
                by IO-fixing and X-latching the block. */
 
2805
 
 
2806
                buf_page_set_io_fix(&block->page, BUF_IO_READ);
 
2807
                rw_lock_x_lock(&block->lock);
 
2808
 
 
2809
                page_zip_set_size(&block->page.zip, zip_size);
 
2810
                mutex_exit(&block->mutex);
 
2811
                /* buf_pool_mutex may be released and reacquired by
 
2812
                buf_buddy_alloc().  Thus, we must release block->mutex
 
2813
                in order not to break the latching order in
 
2814
                the reacquisition of buf_pool_mutex.  We also must
 
2815
                defer this operation until after the block descriptor
 
2816
                has been added to buf_pool->LRU and buf_pool->page_hash. */
 
2817
                data = buf_buddy_alloc(zip_size, &lru);
 
2818
                mutex_enter(&block->mutex);
 
2819
                block->page.zip.data = data;
 
2820
 
 
2821
                /* To maintain the invariant
 
2822
                block->in_unzip_LRU_list
 
2823
                == buf_page_belongs_to_unzip_LRU(&block->page)
 
2824
                we have to add this block to unzip_LRU after
 
2825
                block->page.zip.data is set. */
 
2826
                ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
 
2827
                buf_unzip_LRU_add_block(block, FALSE);
 
2828
 
 
2829
                buf_page_set_io_fix(&block->page, BUF_IO_NONE);
 
2830
                rw_lock_x_unlock(&block->lock);
 
2831
        }
 
2832
 
 
2833
        buf_pool_mutex_exit();
1879
2834
 
1880
2835
        mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1881
2836
 
1882
 
        block->accessed = TRUE;
 
2837
        buf_page_set_accessed(&block->page, TRUE);
1883
2838
 
1884
2839
        mutex_exit(&block->mutex);
1885
2840
 
1886
2841
        /* Delete possible entries for the page from the insert buffer:
1887
2842
        such can exist if the page belonged to an index which was dropped */
1888
2843
 
1889
 
        ibuf_merge_or_delete_for_page(NULL, space, offset, TRUE);
 
2844
        ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
1890
2845
 
1891
2846
        /* Flush pages from the end of the LRU list if necessary */
1892
2847
        buf_flush_free_margin();
1905
2860
 
1906
2861
        memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
1907
2862
 
1908
 
#ifdef UNIV_DEBUG
1909
 
        buf_dbg_counter++;
1910
 
 
1911
 
        if (buf_dbg_counter % 357 == 0) {
1912
 
                ut_ad(buf_validate());
1913
 
        }
1914
 
#endif
1915
 
#ifdef UNIV_IBUF_DEBUG
1916
 
        ut_a(ibuf_count_get(block->space, block->offset) == 0);
1917
 
#endif
1918
 
        return(frame);
 
2863
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2864
        ut_a(++buf_dbg_counter % 357 || buf_validate());
 
2865
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
2866
#ifdef UNIV_IBUF_COUNT_DEBUG
 
2867
        ut_a(ibuf_count_get(buf_block_get_space(block),
 
2868
                            buf_block_get_page_no(block)) == 0);
 
2869
#endif
 
2870
        return(block);
1919
2871
}
1920
2872
 
1921
2873
/************************************************************************
1922
2874
Completes an asynchronous read or write request of a file page to or from
1923
2875
the buffer pool. */
1924
 
 
 
2876
UNIV_INTERN
1925
2877
void
1926
2878
buf_page_io_complete(
1927
2879
/*=================*/
1928
 
        buf_block_t*    block)  /* in: pointer to the block in question */
 
2880
        buf_page_t*     bpage)  /* in: pointer to the block in question */
1929
2881
{
1930
 
        ulint           io_type;
1931
 
 
1932
 
        ut_ad(block);
1933
 
 
1934
 
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
1935
 
 
1936
 
        /* We do not need protect block->io_fix here by block->mutex to read
 
2882
        enum buf_io_fix io_type;
 
2883
        const ibool     uncompressed = (buf_page_get_state(bpage)
 
2884
                                        == BUF_BLOCK_FILE_PAGE);
 
2885
 
 
2886
        ut_a(buf_page_in_file(bpage));
 
2887
 
 
2888
        /* We do not need protect io_fix here by mutex to read
1937
2889
        it because this is the only function where we can change the value
1938
2890
        from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
1939
2891
        ensures that this is the only thread that handles the i/o for this
1940
2892
        block. */
1941
2893
 
1942
 
        io_type = block->io_fix;
 
2894
        io_type = buf_page_get_io_fix(bpage);
 
2895
        ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
1943
2896
 
1944
2897
        if (io_type == BUF_IO_READ) {
 
2898
                ulint   read_page_no;
 
2899
                ulint   read_space_id;
 
2900
                byte*   frame;
 
2901
 
 
2902
                if (buf_page_get_zip_size(bpage)) {
 
2903
                        frame = bpage->zip.data;
 
2904
                        buf_pool->n_pend_unzip++;
 
2905
                        if (uncompressed
 
2906
                            && !buf_zip_decompress((buf_block_t*) bpage,
 
2907
                                                   FALSE)) {
 
2908
 
 
2909
                                buf_pool->n_pend_unzip--;
 
2910
                                goto corrupt;
 
2911
                        }
 
2912
                        buf_pool->n_pend_unzip--;
 
2913
                } else {
 
2914
                        ut_a(uncompressed);
 
2915
                        frame = ((buf_block_t*) bpage)->frame;
 
2916
                }
 
2917
 
1945
2918
                /* If this page is not uninitialized and not in the
1946
2919
                doublewrite buffer, then the page number and space id
1947
2920
                should be the same as in block. */
1948
 
                ulint   read_page_no = mach_read_from_4(
1949
 
                        block->frame + FIL_PAGE_OFFSET);
1950
 
                ulint   read_space_id = mach_read_from_4(
1951
 
                        block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
2921
                read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
 
2922
                read_space_id = mach_read_from_4(
 
2923
                        frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
1952
2924
 
1953
 
                if (!block->space
1954
 
                    && trx_doublewrite_page_inside(block->offset)) {
 
2925
                if (bpage->space == TRX_SYS_SPACE
 
2926
                    && trx_doublewrite_page_inside(bpage->offset)) {
1955
2927
 
1956
2928
                        ut_print_timestamp(stderr);
1957
2929
                        fprintf(stderr,
1958
2930
                                "  InnoDB: Error: reading page %lu\n"
1959
2931
                                "InnoDB: which is in the"
1960
2932
                                " doublewrite buffer!\n",
1961
 
                                (ulong) block->offset);
 
2933
                                (ulong) bpage->offset);
1962
2934
                } else if (!read_space_id && !read_page_no) {
1963
2935
                        /* This is likely an uninitialized page. */
1964
 
                } else if ((block->space && block->space != read_space_id)
1965
 
                           || block->offset != read_page_no) {
 
2936
                } else if ((bpage->space
 
2937
                            && bpage->space != read_space_id)
 
2938
                           || bpage->offset != read_page_no) {
1966
2939
                        /* We did not compare space_id to read_space_id
1967
 
                        if block->space == 0, because the field on the
 
2940
                        if bpage->space == 0, because the field on the
1968
2941
                        page may contain garbage in MySQL < 4.1.1,
1969
 
                        which only supported block->space == 0. */
 
2942
                        which only supported bpage->space == 0. */
1970
2943
 
1971
2944
                        ut_print_timestamp(stderr);
1972
2945
                        fprintf(stderr,
1975
2948
                                "InnoDB: read in are %lu:%lu,"
1976
2949
                                " should be %lu:%lu!\n",
1977
2950
                                (ulong) read_space_id, (ulong) read_page_no,
1978
 
                                (ulong) block->space, (ulong) block->offset);
 
2951
                                (ulong) bpage->space,
 
2952
                                (ulong) bpage->offset);
1979
2953
                }
 
2954
 
1980
2955
                /* From version 3.23.38 up we store the page checksum
1981
2956
                to the 4 first bytes of the page end lsn field */
1982
2957
 
1983
 
                if (buf_page_is_corrupted(block->frame)) {
1984
 
                        fprintf(stderr,
1985
 
                                "InnoDB: Database page corruption on disk"
1986
 
                                " or a failed\n"
1987
 
                                "InnoDB: file read of page %lu.\n",
1988
 
                                (ulong) block->offset);
1989
 
 
1990
 
                        fputs("InnoDB: You may have to recover"
1991
 
                              " from a backup.\n", stderr);
1992
 
 
1993
 
                        buf_page_print(block->frame);
1994
 
 
1995
 
                        fprintf(stderr,
1996
 
                                "InnoDB: Database page corruption on disk"
1997
 
                                " or a failed\n"
1998
 
                                "InnoDB: file read of page %lu.\n",
1999
 
                                (ulong) block->offset);
2000
 
                        fputs("InnoDB: You may have to recover"
2001
 
                              " from a backup.\n", stderr);
 
2958
                if (buf_page_is_corrupted(frame,
 
2959
                                          buf_page_get_zip_size(bpage))) {
 
2960
corrupt:
 
2961
                        fprintf(stderr,
 
2962
                                "InnoDB: Database page corruption on disk"
 
2963
                                " or a failed\n"
 
2964
                                "InnoDB: file read of page %lu.\n"
 
2965
                                "InnoDB: You may have to recover"
 
2966
                                " from a backup.\n",
 
2967
                                (ulong) bpage->offset);
 
2968
                        buf_page_print(frame, buf_page_get_zip_size(bpage));
 
2969
                        fprintf(stderr,
 
2970
                                "InnoDB: Database page corruption on disk"
 
2971
                                " or a failed\n"
 
2972
                                "InnoDB: file read of page %lu.\n"
 
2973
                                "InnoDB: You may have to recover"
 
2974
                                " from a backup.\n",
 
2975
                                (ulong) bpage->offset);
2002
2976
                        fputs("InnoDB: It is also possible that"
2003
2977
                              " your operating\n"
2004
2978
                              "InnoDB: system has corrupted its"
2029
3003
                }
2030
3004
 
2031
3005
                if (recv_recovery_is_on()) {
2032
 
                        recv_recover_page(FALSE, TRUE, block->frame,
2033
 
                                          block->space, block->offset);
 
3006
                        /* Pages must be uncompressed for crash recovery. */
 
3007
                        ut_a(uncompressed);
 
3008
                        recv_recover_page(FALSE, TRUE, (buf_block_t*) bpage);
2034
3009
                }
2035
3010
 
2036
 
                if (!recv_no_ibuf_operations) {
 
3011
                if (uncompressed && !recv_no_ibuf_operations) {
2037
3012
                        ibuf_merge_or_delete_for_page(
2038
 
                                block->frame, block->space, block->offset,
 
3013
                                (buf_block_t*) bpage, bpage->space,
 
3014
                                bpage->offset, buf_page_get_zip_size(bpage),
2039
3015
                                TRUE);
2040
3016
                }
2041
3017
        }
2042
3018
 
2043
 
        mutex_enter(&(buf_pool->mutex));
2044
 
        mutex_enter(&block->mutex);
 
3019
        buf_pool_mutex_enter();
 
3020
        mutex_enter(buf_page_get_mutex(bpage));
2045
3021
 
2046
 
#ifdef UNIV_IBUF_DEBUG
2047
 
        ut_a(ibuf_count_get(block->space, block->offset) == 0);
 
3022
#ifdef UNIV_IBUF_COUNT_DEBUG
 
3023
        if (io_type == BUF_IO_WRITE || uncompressed) {
 
3024
                /* For BUF_IO_READ of compressed-only blocks, the
 
3025
                buffered operations will be merged by buf_page_get_gen()
 
3026
                after the block has been uncompressed. */
 
3027
                ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
 
3028
        }
2048
3029
#endif
2049
3030
        /* Because this thread which does the unlocking is not the same that
2050
3031
        did the locking, we use a pass value != 0 in unlock, which simply
2051
3032
        removes the newest lock debug record, without checking the thread
2052
3033
        id. */
2053
3034
 
2054
 
        block->io_fix = 0;
 
3035
        buf_page_set_io_fix(bpage, BUF_IO_NONE);
2055
3036
 
2056
 
        if (io_type == BUF_IO_READ) {
 
3037
        switch (io_type) {
 
3038
        case BUF_IO_READ:
2057
3039
                /* NOTE that the call to ibuf may have moved the ownership of
2058
3040
                the x-latch to this OS thread: do not let this confuse you in
2059
3041
                debugging! */
2062
3044
                buf_pool->n_pend_reads--;
2063
3045
                buf_pool->n_pages_read++;
2064
3046
 
2065
 
                rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ);
2066
 
 
2067
 
#ifdef UNIV_DEBUG
2068
 
                if (buf_debug_prints) {
2069
 
                        fputs("Has read ", stderr);
 
3047
                if (uncompressed) {
 
3048
                        rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
 
3049
                                             BUF_IO_READ);
2070
3050
                }
2071
 
#endif /* UNIV_DEBUG */
2072
 
        } else {
2073
 
                ut_ad(io_type == BUF_IO_WRITE);
2074
 
 
 
3051
 
 
3052
                break;
 
3053
 
 
3054
        case BUF_IO_WRITE:
2075
3055
                /* Write means a flush operation: call the completion
2076
3056
                routine in the flush system */
2077
3057
 
2078
 
                buf_flush_write_complete(block);
 
3058
                buf_flush_write_complete(bpage);
2079
3059
 
2080
 
                rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE);
 
3060
                if (uncompressed) {
 
3061
                        rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
 
3062
                                             BUF_IO_WRITE);
 
3063
                }
2081
3064
 
2082
3065
                buf_pool->n_pages_written++;
2083
3066
 
2084
 
#ifdef UNIV_DEBUG
2085
 
                if (buf_debug_prints) {
2086
 
                        fputs("Has written ", stderr);
2087
 
                }
2088
 
#endif /* UNIV_DEBUG */
 
3067
                break;
 
3068
 
 
3069
        default:
 
3070
                ut_error;
2089
3071
        }
2090
3072
 
2091
 
        mutex_exit(&block->mutex);
2092
 
        mutex_exit(&(buf_pool->mutex));
 
3073
        mutex_exit(buf_page_get_mutex(bpage));
 
3074
        buf_pool_mutex_exit();
2093
3075
 
2094
3076
#ifdef UNIV_DEBUG
2095
3077
        if (buf_debug_prints) {
2096
 
                fprintf(stderr, "page space %lu page no %lu\n",
2097
 
                        (ulong) block->space, (ulong) block->offset);
 
3078
                fprintf(stderr, "Has %s page space %lu page no %lu\n",
 
3079
                        io_type == BUF_IO_READ ? "read" : "written",
 
3080
                        (ulong) buf_page_get_space(bpage),
 
3081
                        (ulong) buf_page_get_page_no(bpage));
2098
3082
        }
2099
3083
#endif /* UNIV_DEBUG */
2100
3084
}
2103
3087
Invalidates the file pages in the buffer pool when an archive recovery is
2104
3088
completed. All the file pages buffered must be in a replaceable state when
2105
3089
this function is called: not latched and not modified. */
2106
 
 
 
3090
UNIV_INTERN
2107
3091
void
2108
3092
buf_pool_invalidate(void)
2109
3093
/*=====================*/
2118
3102
                freed = buf_LRU_search_and_free_block(100);
2119
3103
        }
2120
3104
 
2121
 
        mutex_enter(&(buf_pool->mutex));
 
3105
        buf_pool_mutex_enter();
2122
3106
 
2123
3107
        ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
 
3108
        ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
2124
3109
 
2125
 
        mutex_exit(&(buf_pool->mutex));
 
3110
        buf_pool_mutex_exit();
2126
3111
}
2127
3112
 
2128
 
#ifdef UNIV_DEBUG
 
3113
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2129
3114
/*************************************************************************
2130
3115
Validates the buffer buf_pool data structure. */
2131
 
 
 
3116
UNIV_INTERN
2132
3117
ibool
2133
3118
buf_validate(void)
2134
3119
/*==============*/
2135
3120
{
2136
 
        buf_block_t*    block;
 
3121
        buf_page_t*     b;
 
3122
        buf_chunk_t*    chunk;
2137
3123
        ulint           i;
2138
3124
        ulint           n_single_flush  = 0;
2139
3125
        ulint           n_lru_flush     = 0;
2141
3127
        ulint           n_lru           = 0;
2142
3128
        ulint           n_flush         = 0;
2143
3129
        ulint           n_free          = 0;
2144
 
        ulint           n_page          = 0;
 
3130
        ulint           n_zip           = 0;
2145
3131
 
2146
3132
        ut_ad(buf_pool);
2147
3133
 
2148
 
        mutex_enter(&(buf_pool->mutex));
2149
 
 
2150
 
        for (i = 0; i < buf_pool->curr_size; i++) {
2151
 
 
2152
 
                block = buf_pool_get_nth_block(buf_pool, i);
2153
 
 
2154
 
                mutex_enter(&block->mutex);
2155
 
 
2156
 
                if (block->state == BUF_BLOCK_FILE_PAGE) {
2157
 
 
2158
 
                        ut_a(buf_page_hash_get(block->space,
2159
 
                                               block->offset) == block);
2160
 
                        n_page++;
2161
 
 
2162
 
#ifdef UNIV_IBUF_DEBUG
2163
 
                        ut_a((block->io_fix == BUF_IO_READ)
2164
 
                             || ibuf_count_get(block->space, block->offset)
2165
 
                             == 0);
 
3134
        buf_pool_mutex_enter();
 
3135
 
 
3136
        chunk = buf_pool->chunks;
 
3137
 
 
3138
        /* Check the uncompressed blocks. */
 
3139
 
 
3140
        for (i = buf_pool->n_chunks; i--; chunk++) {
 
3141
 
 
3142
                ulint           j;
 
3143
                buf_block_t*    block = chunk->blocks;
 
3144
 
 
3145
                for (j = chunk->size; j--; block++) {
 
3146
 
 
3147
                        mutex_enter(&block->mutex);
 
3148
 
 
3149
                        switch (buf_block_get_state(block)) {
 
3150
                        case BUF_BLOCK_ZIP_FREE:
 
3151
                        case BUF_BLOCK_ZIP_PAGE:
 
3152
                        case BUF_BLOCK_ZIP_DIRTY:
 
3153
                                /* These should only occur on
 
3154
                                zip_clean, zip_free[], or flush_list. */
 
3155
                                ut_error;
 
3156
                                break;
 
3157
 
 
3158
                        case BUF_BLOCK_FILE_PAGE:
 
3159
                                ut_a(buf_page_hash_get(buf_block_get_space(
 
3160
                                                               block),
 
3161
                                                       buf_block_get_page_no(
 
3162
                                                               block))
 
3163
                                     == &block->page);
 
3164
 
 
3165
#ifdef UNIV_IBUF_COUNT_DEBUG
 
3166
                                ut_a(buf_page_get_io_fix(&block->page)
 
3167
                                     == BUF_IO_READ
 
3168
                                     || !ibuf_count_get(buf_block_get_space(
 
3169
                                                                block),
 
3170
                                                        buf_block_get_page_no(
 
3171
                                                                block)));
2166
3172
#endif
2167
 
                        if (block->io_fix == BUF_IO_WRITE) {
2168
 
 
2169
 
                                if (block->flush_type == BUF_FLUSH_LRU) {
 
3173
                                switch (buf_page_get_io_fix(&block->page)) {
 
3174
                                case BUF_IO_NONE:
 
3175
                                        break;
 
3176
 
 
3177
                                case BUF_IO_WRITE:
 
3178
                                        switch (buf_page_get_flush_type(
 
3179
                                                        &block->page)) {
 
3180
                                        case BUF_FLUSH_LRU:
 
3181
                                                n_lru_flush++;
 
3182
                                                ut_a(rw_lock_is_locked(
 
3183
                                                             &block->lock,
 
3184
                                                             RW_LOCK_SHARED));
 
3185
                                                break;
 
3186
                                        case BUF_FLUSH_LIST:
 
3187
                                                n_list_flush++;
 
3188
                                                break;
 
3189
                                        case BUF_FLUSH_SINGLE_PAGE:
 
3190
                                                n_single_flush++;
 
3191
                                                break;
 
3192
                                        default:
 
3193
                                                ut_error;
 
3194
                                        }
 
3195
 
 
3196
                                        break;
 
3197
 
 
3198
                                case BUF_IO_READ:
 
3199
 
 
3200
                                        ut_a(rw_lock_is_locked(&block->lock,
 
3201
                                                               RW_LOCK_EX));
 
3202
                                        break;
 
3203
                                }
 
3204
 
 
3205
                                n_lru++;
 
3206
 
 
3207
                                if (block->page.oldest_modification > 0) {
 
3208
                                        n_flush++;
 
3209
                                }
 
3210
 
 
3211
                                break;
 
3212
 
 
3213
                        case BUF_BLOCK_NOT_USED:
 
3214
                                n_free++;
 
3215
                                break;
 
3216
 
 
3217
                        case BUF_BLOCK_READY_FOR_USE:
 
3218
                        case BUF_BLOCK_MEMORY:
 
3219
                        case BUF_BLOCK_REMOVE_HASH:
 
3220
                                /* do nothing */
 
3221
                                break;
 
3222
                        }
 
3223
 
 
3224
                        mutex_exit(&block->mutex);
 
3225
                }
 
3226
        }
 
3227
 
 
3228
        mutex_enter(&buf_pool_zip_mutex);
 
3229
 
 
3230
        /* Check clean compressed-only blocks. */
 
3231
 
 
3232
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
 
3233
             b = UT_LIST_GET_NEXT(list, b)) {
 
3234
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
 
3235
                switch (buf_page_get_io_fix(b)) {
 
3236
                case BUF_IO_NONE:
 
3237
                        /* All clean blocks should be I/O-unfixed. */
 
3238
                        break;
 
3239
                case BUF_IO_READ:
 
3240
                        /* In buf_LRU_free_block(), we temporarily set
 
3241
                        b->io_fix = BUF_IO_READ for a newly allocated
 
3242
                        control block in order to prevent
 
3243
                        buf_page_get_gen() from decompressing the block. */
 
3244
                        break;
 
3245
                default:
 
3246
                        ut_error;
 
3247
                        break;
 
3248
                }
 
3249
                ut_a(!b->oldest_modification);
 
3250
                ut_a(buf_page_hash_get(b->space, b->offset) == b);
 
3251
 
 
3252
                n_lru++;
 
3253
                n_zip++;
 
3254
        }
 
3255
 
 
3256
        /* Check dirty compressed-only blocks. */
 
3257
 
 
3258
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
 
3259
             b = UT_LIST_GET_NEXT(list, b)) {
 
3260
                ut_ad(b->in_flush_list);
 
3261
 
 
3262
                switch (buf_page_get_state(b)) {
 
3263
                case BUF_BLOCK_ZIP_DIRTY:
 
3264
                        ut_a(b->oldest_modification);
 
3265
                        n_lru++;
 
3266
                        n_flush++;
 
3267
                        n_zip++;
 
3268
                        switch (buf_page_get_io_fix(b)) {
 
3269
                        case BUF_IO_NONE:
 
3270
                        case BUF_IO_READ:
 
3271
                                break;
 
3272
 
 
3273
                        case BUF_IO_WRITE:
 
3274
                                switch (buf_page_get_flush_type(b)) {
 
3275
                                case BUF_FLUSH_LRU:
2170
3276
                                        n_lru_flush++;
2171
 
                                        ut_a(rw_lock_is_locked(
2172
 
                                                     &block->lock,
2173
 
                                                     RW_LOCK_SHARED));
2174
 
                                } else if (block->flush_type
2175
 
                                           == BUF_FLUSH_LIST) {
 
3277
                                        break;
 
3278
                                case BUF_FLUSH_LIST:
2176
3279
                                        n_list_flush++;
2177
 
                                } else if (block->flush_type
2178
 
                                           == BUF_FLUSH_SINGLE_PAGE) {
 
3280
                                        break;
 
3281
                                case BUF_FLUSH_SINGLE_PAGE:
2179
3282
                                        n_single_flush++;
2180
 
                                } else {
 
3283
                                        break;
 
3284
                                default:
2181
3285
                                        ut_error;
2182
3286
                                }
2183
 
 
2184
 
                        } else if (block->io_fix == BUF_IO_READ) {
2185
 
 
2186
 
                                ut_a(rw_lock_is_locked(&(block->lock),
2187
 
                                                       RW_LOCK_EX));
2188
 
                        }
2189
 
 
2190
 
                        n_lru++;
2191
 
 
2192
 
                        if (ut_dulint_cmp(block->oldest_modification,
2193
 
                                          ut_dulint_zero) > 0) {
2194
 
                                n_flush++;
2195
 
                        }
2196
 
 
2197
 
                } else if (block->state == BUF_BLOCK_NOT_USED) {
2198
 
                        n_free++;
 
3287
                                break;
 
3288
                        }
 
3289
                        break;
 
3290
                case BUF_BLOCK_FILE_PAGE:
 
3291
                        /* uncompressed page */
 
3292
                        break;
 
3293
                case BUF_BLOCK_ZIP_FREE:
 
3294
                case BUF_BLOCK_ZIP_PAGE:
 
3295
                case BUF_BLOCK_NOT_USED:
 
3296
                case BUF_BLOCK_READY_FOR_USE:
 
3297
                case BUF_BLOCK_MEMORY:
 
3298
                case BUF_BLOCK_REMOVE_HASH:
 
3299
                        ut_error;
 
3300
                        break;
2199
3301
                }
2200
 
 
2201
 
                mutex_exit(&block->mutex);
 
3302
                ut_a(buf_page_hash_get(b->space, b->offset) == b);
2202
3303
        }
2203
3304
 
2204
 
        if (n_lru + n_free > buf_pool->curr_size) {
2205
 
                fprintf(stderr, "n LRU %lu, n free %lu\n",
2206
 
                        (ulong) n_lru, (ulong) n_free);
 
3305
        mutex_exit(&buf_pool_zip_mutex);
 
3306
 
 
3307
        if (n_lru + n_free > buf_pool->curr_size + n_zip) {
 
3308
                fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
 
3309
                        (ulong) n_lru, (ulong) n_free,
 
3310
                        (ulong) buf_pool->curr_size, (ulong) n_zip);
2207
3311
                ut_error;
2208
3312
        }
2209
3313
 
2220
3324
        ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
2221
3325
        ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
2222
3326
 
2223
 
        mutex_exit(&(buf_pool->mutex));
 
3327
        buf_pool_mutex_exit();
2224
3328
 
2225
3329
        ut_a(buf_LRU_validate());
2226
3330
        ut_a(buf_flush_validate());
2227
3331
 
2228
3332
        return(TRUE);
2229
3333
}
 
3334
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2230
3335
 
 
3336
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2231
3337
/*************************************************************************
2232
3338
Prints info of the buffer buf_pool data structure. */
2233
 
 
 
3339
UNIV_INTERN
2234
3340
void
2235
3341
buf_print(void)
2236
3342
/*===========*/
2242
3348
        ulint           j;
2243
3349
        dulint          id;
2244
3350
        ulint           n_found;
2245
 
        buf_frame_t*    frame;
 
3351
        buf_chunk_t*    chunk;
2246
3352
        dict_index_t*   index;
2247
3353
 
2248
3354
        ut_ad(buf_pool);
2252
3358
        index_ids = mem_alloc(sizeof(dulint) * size);
2253
3359
        counts = mem_alloc(sizeof(ulint) * size);
2254
3360
 
2255
 
        mutex_enter(&(buf_pool->mutex));
 
3361
        buf_pool_mutex_enter();
2256
3362
 
2257
3363
        fprintf(stderr,
2258
3364
                "buf_pool size %lu\n"
2259
3365
                "database pages %lu\n"
2260
3366
                "free pages %lu\n"
2261
3367
                "modified database pages %lu\n"
 
3368
                "n pending decompressions %lu\n"
2262
3369
                "n pending reads %lu\n"
2263
3370
                "n pending flush LRU %lu list %lu single page %lu\n"
2264
3371
                "pages read %lu, created %lu, written %lu\n",
2266
3373
                (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
2267
3374
                (ulong) UT_LIST_GET_LEN(buf_pool->free),
2268
3375
                (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
 
3376
                (ulong) buf_pool->n_pend_unzip,
2269
3377
                (ulong) buf_pool->n_pend_reads,
2270
3378
                (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
2271
3379
                (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
2277
3385
 
2278
3386
        n_found = 0;
2279
3387
 
2280
 
        for (i = 0; i < size; i++) {
2281
 
                frame = buf_pool_get_nth_block(buf_pool, i)->frame;
2282
 
 
2283
 
                if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
2284
 
 
2285
 
                        id = btr_page_get_index_id(frame);
2286
 
 
2287
 
                        /* Look for the id in the index_ids array */
2288
 
                        j = 0;
2289
 
 
2290
 
                        while (j < n_found) {
2291
 
 
2292
 
                                if (ut_dulint_cmp(index_ids[j], id) == 0) {
2293
 
                                        (counts[j])++;
2294
 
 
2295
 
                                        break;
2296
 
                                }
2297
 
                                j++;
2298
 
                        }
2299
 
 
2300
 
                        if (j == n_found) {
2301
 
                                n_found++;
2302
 
                                index_ids[j] = id;
2303
 
                                counts[j] = 1;
 
3388
        chunk = buf_pool->chunks;
 
3389
 
 
3390
        for (i = buf_pool->n_chunks; i--; chunk++) {
 
3391
                buf_block_t*    block           = chunk->blocks;
 
3392
                ulint           n_blocks        = chunk->size;
 
3393
 
 
3394
                for (; n_blocks--; block++) {
 
3395
                        const buf_frame_t* frame = block->frame;
 
3396
 
 
3397
                        if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
 
3398
 
 
3399
                                id = btr_page_get_index_id(frame);
 
3400
 
 
3401
                                /* Look for the id in the index_ids array */
 
3402
                                j = 0;
 
3403
 
 
3404
                                while (j < n_found) {
 
3405
 
 
3406
                                        if (ut_dulint_cmp(index_ids[j],
 
3407
                                                          id) == 0) {
 
3408
                                                counts[j]++;
 
3409
 
 
3410
                                                break;
 
3411
                                        }
 
3412
                                        j++;
 
3413
                                }
 
3414
 
 
3415
                                if (j == n_found) {
 
3416
                                        n_found++;
 
3417
                                        index_ids[j] = id;
 
3418
                                        counts[j] = 1;
 
3419
                                }
2304
3420
                        }
2305
3421
                }
2306
3422
        }
2307
3423
 
2308
 
        mutex_exit(&(buf_pool->mutex));
 
3424
        buf_pool_mutex_exit();
2309
3425
 
2310
3426
        for (i = 0; i < n_found; i++) {
2311
3427
                index = dict_index_get_if_in_cache(index_ids[i]);
2328
3444
 
2329
3445
        ut_a(buf_validate());
2330
3446
}
2331
 
#endif /* UNIV_DEBUG */
 
3447
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
2332
3448
 
2333
3449
/*************************************************************************
2334
3450
Returns the number of latched pages in the buffer pool. */
2335
 
 
 
3451
UNIV_INTERN
2336
3452
ulint
2337
3453
buf_get_latched_pages_number(void)
 
3454
/*==============================*/
2338
3455
{
2339
 
        buf_block_t*    block;
 
3456
        buf_chunk_t*    chunk;
 
3457
        buf_page_t*     b;
2340
3458
        ulint           i;
2341
3459
        ulint           fixed_pages_number = 0;
2342
3460
 
2343
 
        mutex_enter(&(buf_pool->mutex));
2344
 
 
2345
 
        for (i = 0; i < buf_pool->curr_size; i++) {
2346
 
 
2347
 
                block = buf_pool_get_nth_block(buf_pool, i);
2348
 
 
2349
 
                if (block->magic_n == BUF_BLOCK_MAGIC_N) {
 
3461
        buf_pool_mutex_enter();
 
3462
 
 
3463
        chunk = buf_pool->chunks;
 
3464
 
 
3465
        for (i = buf_pool->n_chunks; i--; chunk++) {
 
3466
                buf_block_t*    block;
 
3467
                ulint           j;
 
3468
 
 
3469
                block = chunk->blocks;
 
3470
 
 
3471
                for (j = chunk->size; j--; block++) {
 
3472
                        if (buf_block_get_state(block)
 
3473
                            != BUF_BLOCK_FILE_PAGE) {
 
3474
 
 
3475
                                continue;
 
3476
                        }
 
3477
 
2350
3478
                        mutex_enter(&block->mutex);
2351
3479
 
2352
 
                        if (block->buf_fix_count != 0 || block->io_fix != 0) {
 
3480
                        if (block->page.buf_fix_count != 0
 
3481
                            || buf_page_get_io_fix(&block->page)
 
3482
                            != BUF_IO_NONE) {
2353
3483
                                fixed_pages_number++;
2354
3484
                        }
2355
3485
 
2357
3487
                }
2358
3488
        }
2359
3489
 
2360
 
        mutex_exit(&(buf_pool->mutex));
 
3490
        mutex_enter(&buf_pool_zip_mutex);
 
3491
 
 
3492
        /* Traverse the lists of clean and dirty compressed-only blocks. */
 
3493
 
 
3494
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
 
3495
             b = UT_LIST_GET_NEXT(list, b)) {
 
3496
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
 
3497
                ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
 
3498
 
 
3499
                if (b->buf_fix_count != 0
 
3500
                    || buf_page_get_io_fix(b) != BUF_IO_NONE) {
 
3501
                        fixed_pages_number++;
 
3502
                }
 
3503
        }
 
3504
 
 
3505
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
 
3506
             b = UT_LIST_GET_NEXT(list, b)) {
 
3507
                ut_ad(b->in_flush_list);
 
3508
 
 
3509
                switch (buf_page_get_state(b)) {
 
3510
                case BUF_BLOCK_ZIP_DIRTY:
 
3511
                        if (b->buf_fix_count != 0
 
3512
                            || buf_page_get_io_fix(b) != BUF_IO_NONE) {
 
3513
                                fixed_pages_number++;
 
3514
                        }
 
3515
                        break;
 
3516
                case BUF_BLOCK_FILE_PAGE:
 
3517
                        /* uncompressed page */
 
3518
                        break;
 
3519
                case BUF_BLOCK_ZIP_FREE:
 
3520
                case BUF_BLOCK_ZIP_PAGE:
 
3521
                case BUF_BLOCK_NOT_USED:
 
3522
                case BUF_BLOCK_READY_FOR_USE:
 
3523
                case BUF_BLOCK_MEMORY:
 
3524
                case BUF_BLOCK_REMOVE_HASH:
 
3525
                        ut_error;
 
3526
                        break;
 
3527
                }
 
3528
        }
 
3529
 
 
3530
        mutex_exit(&buf_pool_zip_mutex);
 
3531
        buf_pool_mutex_exit();
2361
3532
 
2362
3533
        return(fixed_pages_number);
2363
3534
}
2364
3535
 
2365
3536
/*************************************************************************
2366
3537
Returns the number of pending buf pool ios. */
2367
 
 
 
3538
UNIV_INTERN
2368
3539
ulint
2369
3540
buf_get_n_pending_ios(void)
2370
3541
/*=======================*/
2378
3549
/*************************************************************************
2379
3550
Returns the ratio in percents of modified pages in the buffer pool /
2380
3551
database pages in the buffer pool. */
2381
 
 
 
3552
UNIV_INTERN
2382
3553
ulint
2383
3554
buf_get_modified_ratio_pct(void)
2384
3555
/*============================*/
2385
3556
{
2386
3557
        ulint   ratio;
2387
3558
 
2388
 
        mutex_enter(&(buf_pool->mutex));
 
3559
        buf_pool_mutex_enter();
2389
3560
 
2390
3561
        ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
2391
3562
                / (1 + UT_LIST_GET_LEN(buf_pool->LRU)
2393
3564
 
2394
3565
        /* 1 + is there to avoid division by zero */
2395
3566
 
2396
 
        mutex_exit(&(buf_pool->mutex));
 
3567
        buf_pool_mutex_exit();
2397
3568
 
2398
3569
        return(ratio);
2399
3570
}
2400
3571
 
2401
3572
/*************************************************************************
2402
3573
Prints info of the buffer i/o. */
2403
 
 
 
3574
UNIV_INTERN
2404
3575
void
2405
3576
buf_print_io(
2406
3577
/*=========*/
2413
3584
        ut_ad(buf_pool);
2414
3585
        size = buf_pool->curr_size;
2415
3586
 
2416
 
        mutex_enter(&(buf_pool->mutex));
2417
 
 
2418
 
        if (srv_use_awe) {
2419
 
                fprintf(stderr,
2420
 
                        "AWE: Buffer pool memory frames %lu\n",
2421
 
                        (ulong) buf_pool->n_frames);
2422
 
 
2423
 
                fprintf(stderr,
2424
 
                        "AWE: Database pages and free buffers"
2425
 
                        " mapped in frames %lu\n",
2426
 
                        (ulong)
2427
 
                        UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
2428
 
        }
 
3587
        buf_pool_mutex_enter();
 
3588
 
2429
3589
        fprintf(file,
2430
3590
                "Buffer pool size   %lu\n"
2431
3591
                "Free buffers       %lu\n"
2462
3622
                (buf_pool->n_pages_written - buf_pool->n_pages_written_old)
2463
3623
                / time_elapsed);
2464
3624
 
2465
 
        if (srv_use_awe) {
2466
 
                fprintf(file, "AWE: %.2f page remaps/s\n",
2467
 
                        (buf_pool->n_pages_awe_remapped
2468
 
                         - buf_pool->n_pages_awe_remapped_old)
2469
 
                        / time_elapsed);
2470
 
        }
2471
 
 
2472
3625
        if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
2473
3626
                fprintf(file, "Buffer pool hit rate %lu / 1000\n",
2474
3627
                        (ulong)
2485
3638
        buf_pool->n_pages_read_old = buf_pool->n_pages_read;
2486
3639
        buf_pool->n_pages_created_old = buf_pool->n_pages_created;
2487
3640
        buf_pool->n_pages_written_old = buf_pool->n_pages_written;
2488
 
        buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
2489
 
 
2490
 
        mutex_exit(&(buf_pool->mutex));
 
3641
 
 
3642
        /* Print some values to help us with visualizing what is
 
3643
        happening with LRU eviction. */
 
3644
        fprintf(file,
 
3645
                "LRU len: %lu, unzip_LRU len: %lu\n"
 
3646
                "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
 
3647
                UT_LIST_GET_LEN(buf_pool->LRU),
 
3648
                UT_LIST_GET_LEN(buf_pool->unzip_LRU),
 
3649
                buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
 
3650
                buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
 
3651
 
 
3652
        buf_pool_mutex_exit();
2491
3653
}
2492
3654
 
2493
3655
/**************************************************************************
2494
3656
Refreshes the statistics used to print per-second averages. */
2495
 
 
 
3657
UNIV_INTERN
2496
3658
void
2497
3659
buf_refresh_io_stats(void)
2498
3660
/*======================*/
2502
3664
        buf_pool->n_pages_read_old = buf_pool->n_pages_read;
2503
3665
        buf_pool->n_pages_created_old = buf_pool->n_pages_created;
2504
3666
        buf_pool->n_pages_written_old = buf_pool->n_pages_written;
2505
 
        buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
2506
3667
}
2507
3668
 
2508
3669
/*************************************************************************
2509
3670
Checks that all file pages in the buffer are in a replaceable state. */
2510
 
 
 
3671
UNIV_INTERN
2511
3672
ibool
2512
3673
buf_all_freed(void)
2513
3674
/*===============*/
2514
3675
{
2515
 
        buf_block_t*    block;
 
3676
        buf_chunk_t*    chunk;
2516
3677
        ulint           i;
2517
3678
 
2518
3679
        ut_ad(buf_pool);
2519
3680
 
2520
 
        mutex_enter(&(buf_pool->mutex));
2521
 
 
2522
 
        for (i = 0; i < buf_pool->curr_size; i++) {
2523
 
 
2524
 
                block = buf_pool_get_nth_block(buf_pool, i);
2525
 
 
2526
 
                mutex_enter(&block->mutex);
2527
 
 
2528
 
                if (block->state == BUF_BLOCK_FILE_PAGE) {
2529
 
 
2530
 
                        if (!buf_flush_ready_for_replace(block)) {
2531
 
 
2532
 
                                fprintf(stderr,
2533
 
                                        "Page %lu %lu still fixed or dirty\n",
2534
 
                                        (ulong) block->space,
2535
 
                                        (ulong) block->offset);
2536
 
                                ut_error;
2537
 
                        }
 
3681
        buf_pool_mutex_enter();
 
3682
 
 
3683
        chunk = buf_pool->chunks;
 
3684
 
 
3685
        for (i = buf_pool->n_chunks; i--; chunk++) {
 
3686
 
 
3687
                const buf_block_t* block = buf_chunk_not_freed(chunk);
 
3688
 
 
3689
                if (UNIV_LIKELY_NULL(block)) {
 
3690
                        fprintf(stderr,
 
3691
                                "Page %lu %lu still fixed or dirty\n",
 
3692
                                (ulong) block->page.space,
 
3693
                                (ulong) block->page.offset);
 
3694
                        ut_error;
2538
3695
                }
2539
 
 
2540
 
                mutex_exit(&block->mutex);
2541
3696
        }
2542
3697
 
2543
 
        mutex_exit(&(buf_pool->mutex));
 
3698
        buf_pool_mutex_exit();
2544
3699
 
2545
3700
        return(TRUE);
2546
3701
}
2548
3703
/*************************************************************************
2549
3704
Checks that there currently are no pending i/o-operations for the buffer
2550
3705
pool. */
2551
 
 
 
3706
UNIV_INTERN
2552
3707
ibool
2553
3708
buf_pool_check_no_pending_io(void)
2554
3709
/*==============================*/
2556
3711
{
2557
3712
        ibool   ret;
2558
3713
 
2559
 
        mutex_enter(&(buf_pool->mutex));
 
3714
        buf_pool_mutex_enter();
2560
3715
 
2561
3716
        if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
2562
3717
            + buf_pool->n_flush[BUF_FLUSH_LIST]
2566
3721
                ret = TRUE;
2567
3722
        }
2568
3723
 
2569
 
        mutex_exit(&(buf_pool->mutex));
 
3724
        buf_pool_mutex_exit();
2570
3725
 
2571
3726
        return(ret);
2572
3727
}
2573
3728
 
2574
3729
/*************************************************************************
2575
3730
Gets the current length of the free list of buffer blocks. */
2576
 
 
 
3731
UNIV_INTERN
2577
3732
ulint
2578
3733
buf_get_free_list_len(void)
2579
3734
/*=======================*/
2580
3735
{
2581
3736
        ulint   len;
2582
3737
 
2583
 
        mutex_enter(&(buf_pool->mutex));
 
3738
        buf_pool_mutex_enter();
2584
3739
 
2585
3740
        len = UT_LIST_GET_LEN(buf_pool->free);
2586
3741
 
2587
 
        mutex_exit(&(buf_pool->mutex));
 
3742
        buf_pool_mutex_exit();
2588
3743
 
2589
3744
        return(len);
2590
3745
}