~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/buf/buf0buf.c

  • Committer: Monty Taylor
  • Date: 2008-09-15 17:24:04 UTC
  • Revision ID: monty@inaugust.com-20080915172404-ygh6hiyu0q7qpa9x
Removed strndup calls.

Show diffs side-by-side

added added

removed removed

Lines of Context:
26
26
#include "buf0buf.ic"
27
27
#endif
28
28
 
29
 
#include "buf0buddy.h"
30
29
#include "mem0mem.h"
31
30
#include "btr0btr.h"
32
31
#include "fil0fil.h"
38
37
#include "log0log.h"
39
38
#include "trx0undo.h"
40
39
#include "srv0srv.h"
41
 
#include "page0zip.h"
42
40
 
43
41
/*
44
42
                IMPLEMENTATION OF THE BUFFER POOL
128
126
                Lists of blocks
129
127
                ---------------
130
128
 
131
 
There are several lists of control blocks.
132
 
 
133
 
The free list (buf_pool->free) contains blocks which are currently not
134
 
used.
135
 
 
136
 
The common LRU list contains all the blocks holding a file page
 
129
There are several lists of control blocks. The free list contains
 
130
blocks which are currently not used.
 
131
 
 
132
The LRU-list contains all the blocks holding a file page
137
133
except those for which the bufferfix count is non-zero.
138
134
The pages are in the LRU list roughly in the order of the last
139
135
access to the page, so that the oldest pages are at the end of the
148
144
of the LRU list, we make sure that most of the buf_pool stays in the
149
145
main memory, undisturbed.
150
146
 
151
 
The unzip_LRU list contains a subset of the common LRU list.  The
152
 
blocks on the unzip_LRU list hold a compressed file page and the
153
 
corresponding uncompressed page frame.  A block is in unzip_LRU if and
154
 
only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
155
 
holds.  The blocks in unzip_LRU will be in same order as they are in
156
 
the common LRU list.  That is, each manipulation of the common LRU
157
 
list will result in the same manipulation of the unzip_LRU list.
158
 
 
159
 
The chain of modified blocks (buf_pool->flush_list) contains the blocks
 
147
The chain of modified blocks contains the blocks
160
148
holding file pages that have been modified in the memory
161
149
but not written to disk yet. The block with the oldest modification
162
150
which has not yet been written to disk is at the end of the chain.
163
151
 
164
 
The chain of unmodified compressed blocks (buf_pool->zip_clean)
165
 
contains the control blocks (buf_page_t) of those compressed pages
166
 
that are not in buf_pool->flush_list and for which no uncompressed
167
 
page has been allocated in the buffer pool.  The control blocks for
168
 
uncompressed pages are accessible via buf_block_t objects that are
169
 
reachable via buf_pool->chunks[].
170
 
 
171
 
The chains of free memory blocks (buf_pool->zip_free[]) are used by
172
 
the buddy allocator (buf0buddy.c) to keep track of currently unused
173
 
memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2.  These
174
 
blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
175
 
BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
176
 
pool.  The buddy allocator is solely used for allocating control
177
 
blocks for compressed pages (buf_page_t) and compressed page frames.
178
 
 
179
152
                Loading a file page
180
153
                -------------------
181
154
 
225
198
in a tablespace) have recently been referenced, we may predict
226
199
that the whole area may be needed in the near future, and issue
227
200
the read requests for the whole area.
 
201
 
 
202
                AWE implementation
 
203
                ------------------
 
204
 
 
205
By a 'block' we mean the buffer header of type buf_block_t. By a 'page'
 
206
we mean the physical 16 kB memory area allocated from RAM for that block.
 
207
By a 'frame' we mean a 16 kB area in the virtual address space of the
 
208
process, in the frame_mem of buf_pool.
 
209
 
 
210
We can map pages to the frames of the buffer pool.
 
211
 
 
212
1) A buffer block allocated to use as a non-data page, e.g., to the lock
 
213
table, is always mapped to a frame.
 
214
2) A bufferfixed or io-fixed data page is always mapped to a frame.
 
215
3) When we need to map a block to frame, we look from the list
 
216
awe_LRU_free_mapped and try to unmap its last block, but note that
 
217
bufferfixed or io-fixed pages cannot be unmapped.
 
218
4) For every frame in the buffer pool there is always a block whose page is
 
219
mapped to it. When we create the buffer pool, we map the first elements
 
220
in the free list to the frames.
 
221
5) When we have AWE enabled, we disable adaptive hash indexes.
228
222
*/
229
223
 
230
224
/* Value in microseconds */
231
 
static const int WAIT_FOR_READ  = 5000;
232
 
 
233
 
/* The buffer buf_pool of the database */
234
 
UNIV_INTERN buf_pool_t* buf_pool = NULL;
235
 
 
236
 
/* mutex protecting the buffer pool struct and control blocks, except the
237
 
read-write lock in them */
238
 
UNIV_INTERN mutex_t             buf_pool_mutex;
239
 
/* mutex protecting the control blocks of compressed-only pages
240
 
(of type buf_page_t, not buf_block_t) */
241
 
UNIV_INTERN mutex_t             buf_pool_zip_mutex;
242
 
 
243
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
244
 
static ulint    buf_dbg_counter = 0; /* This is used to insert validation
 
225
static const int WAIT_FOR_READ  = 20000;
 
226
 
 
227
buf_pool_t*     buf_pool = NULL; /* The buffer buf_pool of the database */
 
228
 
 
229
#ifdef UNIV_DEBUG
 
230
ulint           buf_dbg_counter = 0; /* This is used to insert validation
245
231
                                        operations in excution in the
246
232
                                        debug version */
247
 
/** Flag to forbid the release of the buffer pool mutex.
248
 
Protected by buf_pool->mutex. */
249
 
UNIV_INTERN ulint               buf_pool_mutex_exit_forbidden = 0;
250
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
251
 
#ifdef UNIV_DEBUG
252
 
/* If this is set TRUE, the program prints info whenever
253
 
read-ahead or flush occurs */
254
 
UNIV_INTERN ibool               buf_debug_prints = FALSE;
 
233
ibool           buf_debug_prints = FALSE; /* If this is set TRUE,
 
234
                                        the program prints info whenever
 
235
                                        read-ahead or flush occurs */
255
236
#endif /* UNIV_DEBUG */
256
 
 
257
 
/* A chunk of buffers.  The buffer pool is allocated in chunks. */
258
 
struct buf_chunk_struct{
259
 
        ulint           mem_size;       /* allocated size of the chunk */
260
 
        ulint           size;           /* size of frames[] and blocks[] */
261
 
        void*           mem;            /* pointer to the memory area which
262
 
                                        was allocated for the frames */
263
 
        buf_block_t*    blocks;         /* array of buffer control blocks */
264
 
};
265
 
 
266
237
/************************************************************************
267
238
Calculates a page checksum which is stored to the page when it is written
268
239
to a file. Note that we must be careful to calculate the same value on
269
240
32-bit and 64-bit architectures. */
270
 
UNIV_INTERN
 
241
 
271
242
ulint
272
243
buf_calc_page_new_checksum(
273
244
/*=======================*/
274
 
                                /* out: checksum */
275
 
        const byte*     page)   /* in: buffer page */
 
245
                        /* out: checksum */
 
246
        byte*    page)  /* in: buffer page */
276
247
{
277
248
        ulint checksum;
278
249
 
301
272
NOTE: we must first store the new formula checksum to
302
273
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
303
274
because this takes that field as an input! */
304
 
UNIV_INTERN
 
275
 
305
276
ulint
306
277
buf_calc_page_old_checksum(
307
278
/*=======================*/
308
 
                                /* out: checksum */
309
 
        const byte*     page)   /* in: buffer page */
 
279
                        /* out: checksum */
 
280
        byte*    page)  /* in: buffer page */
310
281
{
311
282
        ulint checksum;
312
283
 
319
290
 
320
291
/************************************************************************
321
292
Checks if a page is corrupt. */
322
 
UNIV_INTERN
 
293
 
323
294
ibool
324
295
buf_page_is_corrupted(
325
296
/*==================*/
326
 
                                        /* out: TRUE if corrupted */
327
 
        const byte*     read_buf,       /* in: a database page */
328
 
        ulint           zip_size)       /* in: size of compressed page;
329
 
                                        0 for uncompressed pages */
 
297
                                /* out: TRUE if corrupted */
 
298
        byte*   read_buf)       /* in: a database page */
330
299
{
331
 
        ulint           checksum_field;
332
 
        ulint           old_checksum_field;
 
300
        ulint   checksum;
 
301
        ulint   old_checksum;
 
302
        ulint   checksum_field;
 
303
        ulint   old_checksum_field;
333
304
#ifndef UNIV_HOTBACKUP
334
 
        ib_uint64_t     current_lsn;
 
305
        dulint  current_lsn;
335
306
#endif
336
 
        if (UNIV_LIKELY(!zip_size)
337
 
            && memcmp(read_buf + FIL_PAGE_LSN + 4,
338
 
                      read_buf + UNIV_PAGE_SIZE
339
 
                      - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
 
307
        if (mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
 
308
            != mach_read_from_4(read_buf + UNIV_PAGE_SIZE
 
309
                                - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
340
310
 
341
311
                /* Stored log sequence numbers at the start and the end
342
312
                of page do not match */
346
316
 
347
317
#ifndef UNIV_HOTBACKUP
348
318
        if (recv_lsn_checks_on && log_peek_lsn(&current_lsn)) {
349
 
                if (current_lsn < mach_read_ull(read_buf + FIL_PAGE_LSN)) {
 
319
                if (ut_dulint_cmp(current_lsn,
 
320
                                  mach_read_from_8(read_buf + FIL_PAGE_LSN))
 
321
                    < 0) {
350
322
                        ut_print_timestamp(stderr);
351
323
 
352
324
                        fprintf(stderr,
353
325
                                "  InnoDB: Error: page %lu log sequence number"
354
 
                                " %"PRIu64"\n"
 
326
                                " %lu %lu\n"
355
327
                                "InnoDB: is in the future! Current system "
356
 
                                "log sequence number %"PRIu64".\n"
 
328
                                "log sequence number %lu %lu.\n"
357
329
                                "InnoDB: Your database may be corrupt or "
358
330
                                "you may have copied the InnoDB\n"
359
331
                                "InnoDB: tablespace but not the InnoDB "
363
335
                                "InnoDB: for more information.\n",
364
336
                                (ulong) mach_read_from_4(read_buf
365
337
                                                         + FIL_PAGE_OFFSET),
366
 
                                mach_read_ull(read_buf + FIL_PAGE_LSN),
367
 
                                current_lsn);
 
338
                                (ulong) ut_dulint_get_high
 
339
                                (mach_read_from_8(read_buf + FIL_PAGE_LSN)),
 
340
                                (ulong) ut_dulint_get_low
 
341
                                (mach_read_from_8(read_buf + FIL_PAGE_LSN)),
 
342
                                (ulong) ut_dulint_get_high(current_lsn),
 
343
                                (ulong) ut_dulint_get_low(current_lsn));
368
344
                }
369
345
        }
370
346
#endif
374
350
        BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
375
351
        disabled. Otherwise, skip checksum calculation and return FALSE */
376
352
 
377
 
        if (UNIV_LIKELY(srv_use_checksums)) {
378
 
                checksum_field = mach_read_from_4(read_buf
379
 
                                                  + FIL_PAGE_SPACE_OR_CHKSUM);
380
 
 
381
 
                if (UNIV_UNLIKELY(zip_size)) {
382
 
                        return(checksum_field != BUF_NO_CHECKSUM_MAGIC
383
 
                               && checksum_field
384
 
                               != page_zip_calc_checksum(read_buf, zip_size));
385
 
                }
 
353
        if (srv_use_checksums) {
 
354
                old_checksum = buf_calc_page_old_checksum(read_buf);
386
355
 
387
356
                old_checksum_field = mach_read_from_4(
388
357
                        read_buf + UNIV_PAGE_SIZE
398
367
 
399
368
                if (old_checksum_field != mach_read_from_4(read_buf
400
369
                                                           + FIL_PAGE_LSN)
401
 
                    && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
402
 
                    && old_checksum_field
403
 
                    != buf_calc_page_old_checksum(read_buf)) {
 
370
                    && old_checksum_field != old_checksum
 
371
                    && old_checksum_field != BUF_NO_CHECKSUM_MAGIC) {
404
372
 
405
373
                        return(TRUE);
406
374
                }
407
375
 
 
376
                checksum = buf_calc_page_new_checksum(read_buf);
 
377
                checksum_field = mach_read_from_4(read_buf
 
378
                                                  + FIL_PAGE_SPACE_OR_CHKSUM);
 
379
 
408
380
                /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
409
381
                (always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */
410
382
 
411
 
                if (checksum_field != 0
412
 
                    && checksum_field != BUF_NO_CHECKSUM_MAGIC
413
 
                    && checksum_field
414
 
                    != buf_calc_page_new_checksum(read_buf)) {
 
383
                if (checksum_field != 0 && checksum_field != checksum
 
384
                    && checksum_field != BUF_NO_CHECKSUM_MAGIC) {
415
385
 
416
386
                        return(TRUE);
417
387
                }
422
392
 
423
393
/************************************************************************
424
394
Prints a page to stderr. */
425
 
UNIV_INTERN
 
395
 
426
396
void
427
397
buf_page_print(
428
398
/*===========*/
429
 
        const byte*     read_buf,       /* in: a database page */
430
 
        ulint           zip_size)       /* in: compressed page size, or
431
 
                                0 for uncompressed pages */
 
399
        byte*   read_buf)       /* in: a database page */
432
400
{
433
401
        dict_index_t*   index;
434
402
        ulint           checksum;
435
403
        ulint           old_checksum;
436
 
        ulint           size    = zip_size;
437
 
 
438
 
        if (!size) {
439
 
                size = UNIV_PAGE_SIZE;
440
 
        }
441
404
 
442
405
        ut_print_timestamp(stderr);
443
406
        fprintf(stderr, "  InnoDB: Page dump in ascii and hex (%lu bytes):\n",
444
 
                (ulong) size);
445
 
        ut_print_buf(stderr, read_buf, size);
 
407
                (ulint)UNIV_PAGE_SIZE);
 
408
        ut_print_buf(stderr, read_buf, UNIV_PAGE_SIZE);
446
409
        fputs("InnoDB: End of page dump\n", stderr);
447
410
 
448
 
        if (zip_size) {
449
 
                /* Print compressed page. */
450
 
 
451
 
                switch (fil_page_get_type(read_buf)) {
452
 
                case FIL_PAGE_TYPE_ZBLOB:
453
 
                case FIL_PAGE_TYPE_ZBLOB2:
454
 
                        checksum = srv_use_checksums
455
 
                                ? page_zip_calc_checksum(read_buf, zip_size)
456
 
                                : BUF_NO_CHECKSUM_MAGIC;
457
 
                        ut_print_timestamp(stderr);
458
 
                        fprintf(stderr,
459
 
                                "  InnoDB: Compressed BLOB page"
460
 
                                " checksum %lu, stored %lu\n"
461
 
                                "InnoDB: Page lsn %lu %lu\n"
462
 
                                "InnoDB: Page number (if stored"
463
 
                                " to page already) %lu,\n"
464
 
                                "InnoDB: space id (if stored"
465
 
                                " to page already) %lu\n",
466
 
                                (ulong) checksum,
467
 
                                (ulong) mach_read_from_4(
468
 
                                        read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
469
 
                                (ulong) mach_read_from_4(
470
 
                                        read_buf + FIL_PAGE_LSN),
471
 
                                (ulong) mach_read_from_4(
472
 
                                        read_buf + (FIL_PAGE_LSN + 4)),
473
 
                                (ulong) mach_read_from_4(
474
 
                                        read_buf + FIL_PAGE_OFFSET),
475
 
                                (ulong) mach_read_from_4(
476
 
                                        read_buf
477
 
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
478
 
                        return;
479
 
                default:
480
 
                        ut_print_timestamp(stderr);
481
 
                        fprintf(stderr,
482
 
                                "  InnoDB: unknown page type %lu,"
483
 
                                " assuming FIL_PAGE_INDEX\n",
484
 
                                fil_page_get_type(read_buf));
485
 
                        /* fall through */
486
 
                case FIL_PAGE_INDEX:
487
 
                        checksum = srv_use_checksums
488
 
                                ? page_zip_calc_checksum(read_buf, zip_size)
489
 
                                : BUF_NO_CHECKSUM_MAGIC;
490
 
 
491
 
                        ut_print_timestamp(stderr);
492
 
                        fprintf(stderr,
493
 
                                "  InnoDB: Compressed page checksum %lu,"
494
 
                                " stored %lu\n"
495
 
                                "InnoDB: Page lsn %lu %lu\n"
496
 
                                "InnoDB: Page number (if stored"
497
 
                                " to page already) %lu,\n"
498
 
                                "InnoDB: space id (if stored"
499
 
                                " to page already) %lu\n",
500
 
                                (ulong) checksum,
501
 
                                (ulong) mach_read_from_4(
502
 
                                        read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
503
 
                                (ulong) mach_read_from_4(
504
 
                                        read_buf + FIL_PAGE_LSN),
505
 
                                (ulong) mach_read_from_4(
506
 
                                        read_buf + (FIL_PAGE_LSN + 4)),
507
 
                                (ulong) mach_read_from_4(
508
 
                                        read_buf + FIL_PAGE_OFFSET),
509
 
                                (ulong) mach_read_from_4(
510
 
                                        read_buf
511
 
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
512
 
                        return;
513
 
                case FIL_PAGE_TYPE_XDES:
514
 
                        /* This is an uncompressed page. */
515
 
                        break;
516
 
                }
517
 
        }
518
 
 
519
411
        checksum = srv_use_checksums
520
412
                ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
521
413
        old_checksum = srv_use_checksums
560
452
                fprintf(stderr,
561
453
                        "InnoDB: Page may be an index page where"
562
454
                        " index id is %lu %lu\n",
563
 
                        (ulong) ut_dulint_get_high(
564
 
                                btr_page_get_index_id(read_buf)),
565
 
                        (ulong) ut_dulint_get_low(
566
 
                                btr_page_get_index_id(read_buf)));
 
455
                        (ulong) ut_dulint_get_high
 
456
                        (btr_page_get_index_id(read_buf)),
 
457
                        (ulong) ut_dulint_get_low
 
458
                        (btr_page_get_index_id(read_buf)));
567
459
 
568
 
#ifdef UNIV_HOTBACKUP
569
460
                /* If the code is in ibbackup, dict_sys may be uninitialized,
570
461
                i.e., NULL */
571
462
 
572
 
                if (dict_sys == NULL) {
573
 
                        break;
574
 
                }
575
 
#endif /* UNIV_HOTBACKUP */
 
463
                if (dict_sys != NULL) {
576
464
 
577
 
                index = dict_index_find_on_id_low(
578
 
                        btr_page_get_index_id(read_buf));
579
 
                if (index) {
580
 
                        fputs("InnoDB: (", stderr);
581
 
                        dict_index_name_print(stderr, NULL, index);
582
 
                        fputs(")\n", stderr);
 
465
                        index = dict_index_find_on_id_low(
 
466
                                btr_page_get_index_id(read_buf));
 
467
                        if (index) {
 
468
                                fputs("InnoDB: (", stderr);
 
469
                                dict_index_name_print(stderr, NULL, index);
 
470
                                fputs(")\n", stderr);
 
471
                        }
583
472
                }
584
473
                break;
585
474
        case FIL_PAGE_INODE:
617
506
                fputs("InnoDB: Page may be a BLOB page\n",
618
507
                      stderr);
619
508
                break;
620
 
        case FIL_PAGE_TYPE_ZBLOB:
621
 
        case FIL_PAGE_TYPE_ZBLOB2:
622
 
                fputs("InnoDB: Page may be a compressed BLOB page\n",
623
 
                      stderr);
624
 
                break;
625
509
        }
626
510
}
627
511
 
632
516
buf_block_init(
633
517
/*===========*/
634
518
        buf_block_t*    block,  /* in: pointer to control block */
635
 
        byte*           frame)  /* in: pointer to buffer frame */
 
519
        byte*           frame)  /* in: pointer to buffer frame, or NULL if in
 
520
                                the case of AWE there is no frame */
636
521
{
637
 
        UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
 
522
        block->magic_n = 0;
 
523
 
 
524
        block->state = BUF_BLOCK_NOT_USED;
638
525
 
639
526
        block->frame = frame;
640
527
 
641
 
        block->page.state = BUF_BLOCK_NOT_USED;
642
 
        block->page.buf_fix_count = 0;
643
 
        block->page.io_fix = BUF_IO_NONE;
644
 
 
645
 
        block->modify_clock = 0;
646
 
 
647
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
648
 
        block->page.file_page_was_freed = FALSE;
649
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
 
528
        block->awe_info = NULL;
 
529
 
 
530
        block->buf_fix_count = 0;
 
531
        block->io_fix = 0;
 
532
 
 
533
        block->modify_clock = ut_dulint_zero;
 
534
 
 
535
        block->file_page_was_freed = FALSE;
650
536
 
651
537
        block->check_index_page_at_flush = FALSE;
652
538
        block->index = NULL;
653
539
 
654
 
#ifdef UNIV_DEBUG
655
 
        block->page.in_page_hash = FALSE;
656
 
        block->page.in_zip_hash = FALSE;
657
 
        block->page.in_flush_list = FALSE;
658
 
        block->page.in_free_list = FALSE;
659
 
        block->page.in_LRU_list = FALSE;
660
 
        block->in_unzip_LRU_list = FALSE;
 
540
        block->in_free_list = FALSE;
 
541
        block->in_LRU_list = FALSE;
 
542
 
661
543
        block->n_pointers = 0;
662
 
#endif /* UNIV_DEBUG */
663
 
        page_zip_des_init(&block->page.zip);
664
544
 
665
545
        mutex_create(&block->mutex, SYNC_BUF_BLOCK);
666
546
 
673
553
}
674
554
 
675
555
/************************************************************************
676
 
Allocates a chunk of buffer frames. */
677
 
static
678
 
buf_chunk_t*
679
 
buf_chunk_init(
680
 
/*===========*/
681
 
                                        /* out: chunk, or NULL on failure */
682
 
        buf_chunk_t*    chunk,          /* out: chunk of buffers */
683
 
        ulint           mem_size)       /* in: requested size in bytes */
684
 
{
685
 
        buf_block_t*    block;
686
 
        byte*           frame;
687
 
        ulint           i;
688
 
 
689
 
        /* Round down to a multiple of page size,
690
 
        although it already should be. */
691
 
        mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
692
 
        /* Reserve space for the block descriptors. */
693
 
        mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
694
 
                                  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
695
 
 
696
 
        chunk->mem_size = mem_size;
697
 
        chunk->mem = os_mem_alloc_large(&chunk->mem_size);
698
 
 
699
 
        if (UNIV_UNLIKELY(chunk->mem == NULL)) {
700
 
 
701
 
                return(NULL);
702
 
        }
703
 
 
704
 
        /* Allocate the block descriptors from
705
 
        the start of the memory block. */
706
 
        chunk->blocks = chunk->mem;
707
 
 
708
 
        /* Align a pointer to the first frame.  Note that when
709
 
        os_large_page_size is smaller than UNIV_PAGE_SIZE,
710
 
        we may allocate one fewer block than requested.  When
711
 
        it is bigger, we may allocate more blocks than requested. */
712
 
 
713
 
        frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
714
 
        chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
715
 
                - (frame != chunk->mem);
716
 
 
717
 
        /* Subtract the space needed for block descriptors. */
718
 
        {
719
 
                ulint   size = chunk->size;
720
 
 
721
 
                while (frame < (byte*) (chunk->blocks + size)) {
722
 
                        frame += UNIV_PAGE_SIZE;
723
 
                        size--;
724
 
                }
725
 
 
726
 
                chunk->size = size;
727
 
        }
728
 
 
729
 
        /* Init block structs and assign frames for them. Then we
730
 
        assign the frames to the first blocks (we already mapped the
731
 
        memory above). */
732
 
 
733
 
        block = chunk->blocks;
734
 
 
735
 
        for (i = chunk->size; i--; ) {
736
 
 
737
 
                buf_block_init(block, frame);
738
 
 
739
 
#ifdef HAVE_purify
740
 
                /* Wipe contents of frame to eliminate a Purify warning */
741
 
                memset(block->frame, '\0', UNIV_PAGE_SIZE);
742
 
#endif
743
 
                /* Add the block to the free list */
744
 
                UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
745
 
                ut_d(block->page.in_free_list = TRUE);
746
 
 
747
 
                block++;
748
 
                frame += UNIV_PAGE_SIZE;
749
 
        }
750
 
 
751
 
        return(chunk);
752
 
}
753
 
 
754
 
#ifdef UNIV_DEBUG
755
 
/*************************************************************************
756
 
Finds a block in the given buffer chunk that points to a
757
 
given compressed page. */
758
 
static
759
 
buf_block_t*
760
 
buf_chunk_contains_zip(
761
 
/*===================*/
762
 
                                /* out: buffer block pointing to
763
 
                                the compressed page, or NULL */
764
 
        buf_chunk_t*    chunk,  /* in: chunk being checked */
765
 
        const void*     data)   /* in: pointer to compressed page */
766
 
{
767
 
        buf_block_t*    block;
768
 
        ulint           i;
769
 
 
770
 
        ut_ad(buf_pool);
771
 
        ut_ad(buf_pool_mutex_own());
772
 
 
773
 
        block = chunk->blocks;
774
 
 
775
 
        for (i = chunk->size; i--; block++) {
776
 
                if (block->page.zip.data == data) {
777
 
 
778
 
                        return(block);
779
 
                }
780
 
        }
781
 
 
782
 
        return(NULL);
783
 
}
784
 
 
785
 
/*************************************************************************
786
 
Finds a block in the buffer pool that points to a
787
 
given compressed page. */
788
 
UNIV_INTERN
789
 
buf_block_t*
790
 
buf_pool_contains_zip(
791
 
/*==================*/
792
 
                                /* out: buffer block pointing to
793
 
                                the compressed page, or NULL */
794
 
        const void*     data)   /* in: pointer to compressed page */
795
 
{
796
 
        ulint           n;
797
 
        buf_chunk_t*    chunk = buf_pool->chunks;
798
 
 
799
 
        for (n = buf_pool->n_chunks; n--; chunk++) {
800
 
                buf_block_t* block = buf_chunk_contains_zip(chunk, data);
801
 
 
802
 
                if (block) {
803
 
                        return(block);
804
 
                }
805
 
        }
806
 
 
807
 
        return(NULL);
808
 
}
809
 
#endif /* UNIV_DEBUG */
810
 
 
811
 
/*************************************************************************
812
 
Checks that all file pages in the buffer chunk are in a replaceable state. */
813
 
static
814
 
const buf_block_t*
815
 
buf_chunk_not_freed(
816
 
/*================*/
817
 
                                /* out: address of a non-free block,
818
 
                                or NULL if all freed */
819
 
        buf_chunk_t*    chunk)  /* in: chunk being checked */
820
 
{
821
 
        buf_block_t*    block;
822
 
        ulint           i;
823
 
 
824
 
        ut_ad(buf_pool);
825
 
        ut_ad(buf_pool_mutex_own());
826
 
 
827
 
        block = chunk->blocks;
828
 
 
829
 
        for (i = chunk->size; i--; block++) {
830
 
                mutex_enter(&block->mutex);
831
 
 
832
 
                if (buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
833
 
                    && !buf_flush_ready_for_replace(&block->page)) {
834
 
 
835
 
                        mutex_exit(&block->mutex);
836
 
                        return(block);
837
 
                }
838
 
 
839
 
                mutex_exit(&block->mutex);
840
 
        }
841
 
 
842
 
        return(NULL);
843
 
}
844
 
 
845
 
/*************************************************************************
846
 
Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state. */
847
 
static
848
 
ibool
849
 
buf_chunk_all_free(
850
 
/*===============*/
851
 
                                        /* out: TRUE if all freed */
852
 
        const buf_chunk_t*      chunk)  /* in: chunk being checked */
853
 
{
854
 
        const buf_block_t*      block;
855
 
        ulint                   i;
856
 
 
857
 
        ut_ad(buf_pool);
858
 
        ut_ad(buf_pool_mutex_own());
859
 
 
860
 
        block = chunk->blocks;
861
 
 
862
 
        for (i = chunk->size; i--; block++) {
863
 
 
864
 
                if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
865
 
 
866
 
                        return(FALSE);
867
 
                }
868
 
        }
869
 
 
870
 
        return(TRUE);
871
 
}
872
 
 
873
 
/************************************************************************
874
 
Frees a chunk of buffer frames. */
875
 
static
876
 
void
877
 
buf_chunk_free(
878
 
/*===========*/
879
 
        buf_chunk_t*    chunk)          /* out: chunk of buffers */
880
 
{
881
 
        buf_block_t*            block;
882
 
        const buf_block_t*      block_end;
883
 
 
884
 
        ut_ad(buf_pool_mutex_own());
885
 
 
886
 
        block_end = chunk->blocks + chunk->size;
887
 
 
888
 
        for (block = chunk->blocks; block < block_end; block++) {
889
 
                ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
890
 
                ut_a(!block->page.zip.data);
891
 
 
892
 
                ut_ad(!block->page.in_LRU_list);
893
 
                ut_ad(!block->in_unzip_LRU_list);
894
 
                ut_ad(!block->page.in_flush_list);
895
 
                /* Remove the block from the free list. */
896
 
                ut_ad(block->page.in_free_list);
897
 
                UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
898
 
 
899
 
                /* Free the latches. */
900
 
                mutex_free(&block->mutex);
901
 
                rw_lock_free(&block->lock);
902
 
#ifdef UNIV_SYNC_DEBUG
903
 
                rw_lock_free(&block->debug_latch);
904
 
#endif /* UNIV_SYNC_DEBUG */
905
 
                UNIV_MEM_UNDESC(block);
906
 
        }
907
 
 
908
 
        os_mem_free_large(chunk->mem, chunk->mem_size);
909
 
}
910
 
 
911
 
/************************************************************************
912
556
Creates the buffer pool. */
913
 
UNIV_INTERN
 
557
 
914
558
buf_pool_t*
915
 
buf_pool_init(void)
916
 
/*===============*/
 
559
buf_pool_init(
 
560
/*==========*/
917
561
                                /* out, own: buf_pool object, NULL if not
918
562
                                enough memory or error */
 
563
        ulint   max_size,       /* in: maximum size of the buf_pool in
 
564
                                blocks */
 
565
        ulint   curr_size,      /* in: current size to use, must be <=
 
566
                                max_size, currently must be equal to
 
567
                                max_size */
 
568
        ulint   n_frames)       /* in: number of frames; if AWE is used,
 
569
                                this is the size of the address space window
 
570
                                where physical memory pages are mapped; if
 
571
                                AWE is not used then this must be the same
 
572
                                as max_size */
919
573
{
920
 
        buf_chunk_t*    chunk;
 
574
        byte*           frame;
921
575
        ulint           i;
922
 
 
923
 
        buf_pool = mem_zalloc(sizeof(buf_pool_t));
 
576
        buf_block_t*    block;
 
577
 
 
578
        ut_a(max_size == curr_size);
 
579
        ut_a(srv_use_awe || n_frames == max_size);
 
580
 
 
581
        if (n_frames > curr_size) {
 
582
                fprintf(stderr,
 
583
                        "InnoDB: AWE: Error: you must specify in my.cnf"
 
584
                        " .._awe_mem_mb larger\n"
 
585
                        "InnoDB: than .._buffer_pool_size. Now the former"
 
586
                        " is %lu pages,\n"
 
587
                        "InnoDB: the latter %lu pages.\n",
 
588
                        (ulong) curr_size, (ulong) n_frames);
 
589
 
 
590
                return(NULL);
 
591
        }
 
592
 
 
593
        buf_pool = mem_alloc(sizeof(buf_pool_t));
924
594
 
925
595
        /* 1. Initialize general fields
926
 
        ------------------------------- */
927
 
        mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
928
 
        mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
929
 
 
930
 
        buf_pool_mutex_enter();
931
 
 
932
 
        buf_pool->n_chunks = 1;
933
 
        buf_pool->chunks = chunk = mem_alloc(sizeof *chunk);
934
 
 
935
 
        UT_LIST_INIT(buf_pool->free);
936
 
 
937
 
        if (!buf_chunk_init(chunk, srv_buf_pool_size)) {
938
 
                mem_free(chunk);
939
 
                mem_free(buf_pool);
940
 
                buf_pool = NULL;
941
 
                return(NULL);
942
 
        }
943
 
 
944
 
        srv_buf_pool_old_size = srv_buf_pool_size;
945
 
        buf_pool->curr_size = chunk->size;
946
 
        srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
947
 
 
948
 
        buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
949
 
        buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
 
596
        ---------------------------- */
 
597
        mutex_create(&buf_pool->mutex, SYNC_BUF_POOL);
 
598
 
 
599
        mutex_enter(&(buf_pool->mutex));
 
600
 
 
601
        if (srv_use_awe) {
 
602
                /*----------------------------------------*/
 
603
                /* Allocate the virtual address space window, i.e., the
 
604
                buffer pool frames */
 
605
 
 
606
                buf_pool->frame_mem = os_awe_allocate_virtual_mem_window(
 
607
                        UNIV_PAGE_SIZE * (n_frames + 1));
 
608
 
 
609
                /* Allocate the physical memory for AWE and the AWE info array
 
610
                for buf_pool */
 
611
 
 
612
                if ((curr_size % ((1024 * 1024) / UNIV_PAGE_SIZE)) != 0) {
 
613
 
 
614
                        fprintf(stderr,
 
615
                                "InnoDB: AWE: Error: physical memory must be"
 
616
                                " allocated in full megabytes.\n"
 
617
                                "InnoDB: Trying to allocate %lu"
 
618
                                " database pages.\n",
 
619
                                (ulong) curr_size);
 
620
 
 
621
                        return(NULL);
 
622
                }
 
623
 
 
624
                if (!os_awe_allocate_physical_mem(&(buf_pool->awe_info),
 
625
                                                  curr_size
 
626
                                                  / ((1024 * 1024)
 
627
                                                     / UNIV_PAGE_SIZE))) {
 
628
 
 
629
                        return(NULL);
 
630
                }
 
631
                /*----------------------------------------*/
 
632
        } else {
 
633
                buf_pool->frame_mem = os_mem_alloc_large(
 
634
                        UNIV_PAGE_SIZE * (n_frames + 1), TRUE, FALSE);
 
635
        }
 
636
 
 
637
        if (buf_pool->frame_mem == NULL) {
 
638
 
 
639
                return(NULL);
 
640
        }
 
641
 
 
642
        buf_pool->blocks = ut_malloc(sizeof(buf_block_t) * max_size);
 
643
 
 
644
        if (buf_pool->blocks == NULL) {
 
645
 
 
646
                return(NULL);
 
647
        }
 
648
 
 
649
        buf_pool->max_size = max_size;
 
650
        buf_pool->curr_size = curr_size;
 
651
 
 
652
        buf_pool->n_frames = n_frames;
 
653
 
 
654
        /* Align pointer to the first frame */
 
655
 
 
656
        frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE);
 
657
 
 
658
        buf_pool->frame_zero = frame;
 
659
        buf_pool->high_end = frame + UNIV_PAGE_SIZE * n_frames;
 
660
 
 
661
        if (srv_use_awe) {
 
662
                /*----------------------------------------*/
 
663
                /* Map an initial part of the allocated physical memory to
 
664
                the window */
 
665
 
 
666
                os_awe_map_physical_mem_to_window(buf_pool->frame_zero,
 
667
                                                  n_frames
 
668
                                                  * (UNIV_PAGE_SIZE
 
669
                                                     / OS_AWE_X86_PAGE_SIZE),
 
670
                                                  buf_pool->awe_info);
 
671
                /*----------------------------------------*/
 
672
        }
 
673
 
 
674
        buf_pool->blocks_of_frames = ut_malloc(sizeof(void*) * n_frames);
 
675
 
 
676
        if (buf_pool->blocks_of_frames == NULL) {
 
677
 
 
678
                return(NULL);
 
679
        }
 
680
 
 
681
        /* Init block structs and assign frames for them; in the case of
 
682
        AWE there are less frames than blocks. Then we assign the frames
 
683
        to the first blocks (we already mapped the memory above). We also
 
684
        init the awe_info for every block. */
 
685
 
 
686
        for (i = 0; i < max_size; i++) {
 
687
 
 
688
                block = buf_pool_get_nth_block(buf_pool, i);
 
689
 
 
690
                if (i < n_frames) {
 
691
                        frame = buf_pool->frame_zero + i * UNIV_PAGE_SIZE;
 
692
                        *(buf_pool->blocks_of_frames + i) = block;
 
693
                } else {
 
694
                        frame = NULL;
 
695
                }
 
696
 
 
697
                buf_block_init(block, frame);
 
698
 
 
699
                if (srv_use_awe) {
 
700
                        /*----------------------------------------*/
 
701
                        block->awe_info = buf_pool->awe_info
 
702
                                + i * (UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE);
 
703
                        /*----------------------------------------*/
 
704
                }
 
705
        }
 
706
 
 
707
        buf_pool->page_hash = hash_create(2 * max_size);
 
708
 
 
709
        buf_pool->n_pend_reads = 0;
950
710
 
951
711
        buf_pool->last_printout_time = time(NULL);
952
712
 
 
713
        buf_pool->n_pages_read = 0;
 
714
        buf_pool->n_pages_written = 0;
 
715
        buf_pool->n_pages_created = 0;
 
716
        buf_pool->n_pages_awe_remapped = 0;
 
717
 
 
718
        buf_pool->n_page_gets = 0;
 
719
        buf_pool->n_page_gets_old = 0;
 
720
        buf_pool->n_pages_read_old = 0;
 
721
        buf_pool->n_pages_written_old = 0;
 
722
        buf_pool->n_pages_created_old = 0;
 
723
        buf_pool->n_pages_awe_remapped_old = 0;
 
724
 
953
725
        /* 2. Initialize flushing fields
954
 
        -------------------------------- */
 
726
        ---------------------------- */
 
727
        UT_LIST_INIT(buf_pool->flush_list);
955
728
 
956
 
        for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
 
729
        for (i = BUF_FLUSH_LRU; i <= BUF_FLUSH_LIST; i++) {
 
730
                buf_pool->n_flush[i] = 0;
 
731
                buf_pool->init_flush[i] = FALSE;
957
732
                buf_pool->no_flush[i] = os_event_create(NULL);
958
733
        }
959
734
 
 
735
        buf_pool->LRU_flush_ended = 0;
 
736
 
960
737
        buf_pool->ulint_clock = 1;
 
738
        buf_pool->freed_page_clock = 0;
961
739
 
962
740
        /* 3. Initialize LRU fields
963
 
        --------------------------- */
964
 
        /* All fields are initialized by mem_zalloc(). */
965
 
 
966
 
        buf_pool_mutex_exit();
967
 
 
968
 
        btr_search_sys_create(buf_pool->curr_size
969
 
                              * UNIV_PAGE_SIZE / sizeof(void*) / 64);
970
 
 
971
 
        /* 4. Initialize the buddy allocator fields */
972
 
        /* All fields are initialized by mem_zalloc(). */
 
741
        ---------------------------- */
 
742
        UT_LIST_INIT(buf_pool->LRU);
 
743
 
 
744
        buf_pool->LRU_old = NULL;
 
745
 
 
746
        UT_LIST_INIT(buf_pool->awe_LRU_free_mapped);
 
747
 
 
748
        /* Add control blocks to the free list */
 
749
        UT_LIST_INIT(buf_pool->free);
 
750
 
 
751
        for (i = 0; i < curr_size; i++) {
 
752
 
 
753
                block = buf_pool_get_nth_block(buf_pool, i);
 
754
 
 
755
                if (block->frame) {
 
756
                        /* Wipe contents of frame to eliminate a Purify
 
757
                        warning */
 
758
 
 
759
#ifdef HAVE_purify
 
760
                        memset(block->frame, '\0', UNIV_PAGE_SIZE);
 
761
#endif
 
762
                        if (srv_use_awe) {
 
763
                                /* Add to the list of blocks mapped to
 
764
                                frames */
 
765
 
 
766
                                UT_LIST_ADD_LAST(awe_LRU_free_mapped,
 
767
                                                 buf_pool->awe_LRU_free_mapped,
 
768
                                                 block);
 
769
                        }
 
770
                }
 
771
 
 
772
                UT_LIST_ADD_LAST(free, buf_pool->free, block);
 
773
                block->in_free_list = TRUE;
 
774
        }
 
775
 
 
776
        mutex_exit(&(buf_pool->mutex));
 
777
 
 
778
        if (srv_use_adaptive_hash_indexes) {
 
779
                btr_search_sys_create(curr_size * UNIV_PAGE_SIZE
 
780
                                      / sizeof(void*) / 64);
 
781
        } else {
 
782
                /* Create only a small dummy system */
 
783
                btr_search_sys_create(1000);
 
784
        }
973
785
 
974
786
        return(buf_pool);
975
787
}
976
788
 
977
789
/************************************************************************
978
 
Frees the buffer pool at shutdown.  This must not be invoked before
979
 
freeing all mutexes. */
980
 
UNIV_INTERN
981
 
void
982
 
buf_pool_free(void)
983
 
/*===============*/
984
 
{
985
 
        buf_chunk_t*    chunk;
986
 
        buf_chunk_t*    chunks;
987
 
 
988
 
        chunks = buf_pool->chunks;
989
 
        chunk = chunks + buf_pool->n_chunks;
990
 
 
991
 
        while (--chunk >= chunks) {
992
 
                /* Bypass the checks of buf_chunk_free(), since they
993
 
                would fail at shutdown. */
994
 
                os_mem_free_large(chunk->mem, chunk->mem_size);
995
 
        }
996
 
 
997
 
        buf_pool->n_chunks = 0;
998
 
}
999
 
 
1000
 
/************************************************************************
1001
 
Relocate a buffer control block.  Relocates the block on the LRU list
1002
 
and in buf_pool->page_hash.  Does not relocate bpage->list.
1003
 
The caller must take care of relocating bpage->list. */
1004
 
UNIV_INTERN
1005
 
void
1006
 
buf_relocate(
1007
 
/*=========*/
1008
 
        buf_page_t*     bpage,  /* in/out: control block being relocated;
1009
 
                                buf_page_get_state(bpage) must be
1010
 
                                BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
1011
 
        buf_page_t*     dpage)  /* in/out: destination control block */
1012
 
{
1013
 
        buf_page_t*     b;
1014
 
        ulint           fold;
1015
 
 
1016
 
        ut_ad(buf_pool_mutex_own());
1017
 
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1018
 
        ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
1019
 
        ut_a(bpage->buf_fix_count == 0);
1020
 
        ut_ad(bpage->in_LRU_list);
1021
 
        ut_ad(!bpage->in_zip_hash);
1022
 
        ut_ad(bpage->in_page_hash);
1023
 
        ut_ad(bpage == buf_page_hash_get(bpage->space, bpage->offset));
1024
 
#ifdef UNIV_DEBUG
1025
 
        switch (buf_page_get_state(bpage)) {
1026
 
        case BUF_BLOCK_ZIP_FREE:
1027
 
        case BUF_BLOCK_NOT_USED:
1028
 
        case BUF_BLOCK_READY_FOR_USE:
1029
 
        case BUF_BLOCK_FILE_PAGE:
1030
 
        case BUF_BLOCK_MEMORY:
1031
 
        case BUF_BLOCK_REMOVE_HASH:
1032
 
                ut_error;
1033
 
        case BUF_BLOCK_ZIP_DIRTY:
1034
 
        case BUF_BLOCK_ZIP_PAGE:
1035
 
                break;
1036
 
        }
1037
 
#endif /* UNIV_DEBUG */
1038
 
 
1039
 
        memcpy(dpage, bpage, sizeof *dpage);
1040
 
 
1041
 
        ut_d(bpage->in_LRU_list = FALSE);
1042
 
        ut_d(bpage->in_page_hash = FALSE);
1043
 
 
1044
 
        /* relocate buf_pool->LRU */
1045
 
        b = UT_LIST_GET_PREV(LRU, bpage);
1046
 
        UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
1047
 
 
1048
 
        if (b) {
1049
 
                UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
1050
 
        } else {
1051
 
                UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
1052
 
        }
1053
 
 
1054
 
        if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
1055
 
                buf_pool->LRU_old = dpage;
1056
 
        }
1057
 
 
1058
 
        ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU));
1059
 
 
1060
 
        /* relocate buf_pool->page_hash */
1061
 
        fold = buf_page_address_fold(bpage->space, bpage->offset);
1062
 
 
1063
 
        HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
1064
 
        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
1065
 
 
1066
 
        UNIV_MEM_INVALID(bpage, sizeof *bpage);
1067
 
}
1068
 
 
1069
 
/************************************************************************
1070
 
Shrinks the buffer pool. */
1071
 
static
1072
 
void
1073
 
buf_pool_shrink(
1074
 
/*============*/
1075
 
                                /* out: TRUE if shrunk */
1076
 
        ulint   chunk_size)     /* in: number of pages to remove */
1077
 
{
1078
 
        buf_chunk_t*    chunks;
1079
 
        buf_chunk_t*    chunk;
1080
 
        ulint           max_size;
1081
 
        ulint           max_free_size;
1082
 
        buf_chunk_t*    max_chunk;
1083
 
        buf_chunk_t*    max_free_chunk;
1084
 
 
1085
 
        ut_ad(!buf_pool_mutex_own());
1086
 
 
1087
 
try_again:
1088
 
        btr_search_disable(); /* Empty the adaptive hash index again */
1089
 
        buf_pool_mutex_enter();
1090
 
 
1091
 
shrink_again:
1092
 
        if (buf_pool->n_chunks <= 1) {
1093
 
 
1094
 
                /* Cannot shrink if there is only one chunk */
1095
 
                goto func_done;
1096
 
        }
1097
 
 
1098
 
        /* Search for the largest free chunk
1099
 
        not larger than the size difference */
1100
 
        chunks = buf_pool->chunks;
1101
 
        chunk = chunks + buf_pool->n_chunks;
1102
 
        max_size = max_free_size = 0;
1103
 
        max_chunk = max_free_chunk = NULL;
1104
 
 
1105
 
        while (--chunk >= chunks) {
1106
 
                if (chunk->size <= chunk_size
1107
 
                    && chunk->size > max_free_size) {
1108
 
                        if (chunk->size > max_size) {
1109
 
                                max_size = chunk->size;
1110
 
                                max_chunk = chunk;
1111
 
                        }
1112
 
 
1113
 
                        if (buf_chunk_all_free(chunk)) {
1114
 
                                max_free_size = chunk->size;
1115
 
                                max_free_chunk = chunk;
1116
 
                        }
1117
 
                }
1118
 
        }
1119
 
 
1120
 
        if (!max_free_size) {
1121
 
 
1122
 
                ulint           dirty   = 0;
1123
 
                ulint           nonfree = 0;
1124
 
                buf_block_t*    block;
1125
 
                buf_block_t*    bend;
1126
 
 
1127
 
                /* Cannot shrink: try again later
1128
 
                (do not assign srv_buf_pool_old_size) */
1129
 
                if (!max_chunk) {
1130
 
 
1131
 
                        goto func_exit;
1132
 
                }
1133
 
 
1134
 
                block = max_chunk->blocks;
1135
 
                bend = block + max_chunk->size;
1136
 
 
1137
 
                /* Move the blocks of chunk to the end of the
1138
 
                LRU list and try to flush them. */
1139
 
                for (; block < bend; block++) {
1140
 
                        switch (buf_block_get_state(block)) {
1141
 
                        case BUF_BLOCK_NOT_USED:
1142
 
                                continue;
1143
 
                        case BUF_BLOCK_FILE_PAGE:
1144
 
                                break;
1145
 
                        default:
1146
 
                                nonfree++;
1147
 
                                continue;
1148
 
                        }
1149
 
 
1150
 
                        mutex_enter(&block->mutex);
1151
 
                        /* The following calls will temporarily
1152
 
                        release block->mutex and buf_pool_mutex.
1153
 
                        Therefore, we have to always retry,
1154
 
                        even if !dirty && !nonfree. */
1155
 
 
1156
 
                        if (!buf_flush_ready_for_replace(&block->page)) {
1157
 
 
1158
 
                                buf_LRU_make_block_old(&block->page);
1159
 
                                dirty++;
1160
 
                        } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
1161
 
                                   != BUF_LRU_FREED) {
1162
 
                                nonfree++;
1163
 
                        }
1164
 
 
1165
 
                        mutex_exit(&block->mutex);
1166
 
                }
1167
 
 
1168
 
                buf_pool_mutex_exit();
1169
 
 
1170
 
                /* Request for a flush of the chunk if it helps.
1171
 
                Do not flush if there are non-free blocks, since
1172
 
                flushing will not make the chunk freeable. */
1173
 
                if (nonfree) {
1174
 
                        /* Avoid busy-waiting. */
1175
 
                        os_thread_sleep(100000);
1176
 
                } else if (dirty
1177
 
                           && buf_flush_batch(BUF_FLUSH_LRU, dirty, 0)
1178
 
                           == ULINT_UNDEFINED) {
1179
 
 
1180
 
                        buf_flush_wait_batch_end(BUF_FLUSH_LRU);
1181
 
                }
1182
 
 
1183
 
                goto try_again;
1184
 
        }
1185
 
 
1186
 
        max_size = max_free_size;
1187
 
        max_chunk = max_free_chunk;
1188
 
 
1189
 
        srv_buf_pool_old_size = srv_buf_pool_size;
1190
 
 
1191
 
        /* Rewrite buf_pool->chunks.  Copy everything but max_chunk. */
1192
 
        chunks = mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks);
1193
 
        memcpy(chunks, buf_pool->chunks,
1194
 
               (max_chunk - buf_pool->chunks) * sizeof *chunks);
1195
 
        memcpy(chunks + (max_chunk - buf_pool->chunks),
1196
 
               max_chunk + 1,
1197
 
               buf_pool->chunks + buf_pool->n_chunks
1198
 
               - (max_chunk + 1));
1199
 
        ut_a(buf_pool->curr_size > max_chunk->size);
1200
 
        buf_pool->curr_size -= max_chunk->size;
1201
 
        srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1202
 
        chunk_size -= max_chunk->size;
1203
 
        buf_chunk_free(max_chunk);
1204
 
        mem_free(buf_pool->chunks);
1205
 
        buf_pool->chunks = chunks;
1206
 
        buf_pool->n_chunks--;
1207
 
 
1208
 
        /* Allow a slack of one megabyte. */
1209
 
        if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
1210
 
 
1211
 
                goto shrink_again;
1212
 
        }
1213
 
 
1214
 
func_done:
1215
 
        srv_buf_pool_old_size = srv_buf_pool_size;
1216
 
func_exit:
1217
 
        buf_pool_mutex_exit();
1218
 
        btr_search_enable();
1219
 
}
1220
 
 
1221
 
/************************************************************************
1222
 
Rebuild buf_pool->page_hash. */
1223
 
static
1224
 
void
1225
 
buf_pool_page_hash_rebuild(void)
1226
 
/*============================*/
1227
 
{
1228
 
        ulint           i;
1229
 
        ulint           n_chunks;
1230
 
        buf_chunk_t*    chunk;
1231
 
        hash_table_t*   page_hash;
1232
 
        hash_table_t*   zip_hash;
1233
 
        buf_page_t*     b;
1234
 
 
1235
 
        buf_pool_mutex_enter();
1236
 
 
1237
 
        /* Free, create, and populate the hash table. */
1238
 
        hash_table_free(buf_pool->page_hash);
1239
 
        buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
1240
 
        zip_hash = hash_create(2 * buf_pool->curr_size);
1241
 
 
1242
 
        HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
1243
 
                     BUF_POOL_ZIP_FOLD_BPAGE);
1244
 
 
1245
 
        hash_table_free(buf_pool->zip_hash);
1246
 
        buf_pool->zip_hash = zip_hash;
1247
 
 
1248
 
        /* Insert the uncompressed file pages to buf_pool->page_hash. */
1249
 
 
1250
 
        chunk = buf_pool->chunks;
1251
 
        n_chunks = buf_pool->n_chunks;
1252
 
 
1253
 
        for (i = 0; i < n_chunks; i++, chunk++) {
1254
 
                ulint           j;
1255
 
                buf_block_t*    block = chunk->blocks;
1256
 
 
1257
 
                for (j = 0; j < chunk->size; j++, block++) {
1258
 
                        if (buf_block_get_state(block)
1259
 
                            == BUF_BLOCK_FILE_PAGE) {
1260
 
                                ut_ad(!block->page.in_zip_hash);
1261
 
                                ut_ad(block->page.in_page_hash);
1262
 
 
1263
 
                                HASH_INSERT(buf_page_t, hash, page_hash,
1264
 
                                            buf_page_address_fold(
1265
 
                                                    block->page.space,
1266
 
                                                    block->page.offset),
1267
 
                                            &block->page);
1268
 
                        }
1269
 
                }
1270
 
        }
1271
 
 
1272
 
        /* Insert the compressed-only pages to buf_pool->page_hash.
1273
 
        All such blocks are either in buf_pool->zip_clean or
1274
 
        in buf_pool->flush_list. */
1275
 
 
1276
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1277
 
             b = UT_LIST_GET_NEXT(list, b)) {
1278
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1279
 
                ut_ad(!b->in_flush_list);
1280
 
                ut_ad(b->in_LRU_list);
1281
 
                ut_ad(b->in_page_hash);
1282
 
                ut_ad(!b->in_zip_hash);
1283
 
 
1284
 
                HASH_INSERT(buf_page_t, hash, page_hash,
1285
 
                            buf_page_address_fold(b->space, b->offset), b);
1286
 
        }
1287
 
 
1288
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1289
 
             b = UT_LIST_GET_NEXT(list, b)) {
1290
 
                ut_ad(b->in_flush_list);
1291
 
                ut_ad(b->in_LRU_list);
1292
 
                ut_ad(b->in_page_hash);
1293
 
                ut_ad(!b->in_zip_hash);
1294
 
 
1295
 
                switch (buf_page_get_state(b)) {
1296
 
                case BUF_BLOCK_ZIP_DIRTY:
1297
 
                        HASH_INSERT(buf_page_t, hash, page_hash,
1298
 
                                    buf_page_address_fold(b->space,
1299
 
                                                          b->offset), b);
1300
 
                        break;
1301
 
                case BUF_BLOCK_FILE_PAGE:
1302
 
                        /* uncompressed page */
1303
 
                        break;
1304
 
                case BUF_BLOCK_ZIP_FREE:
1305
 
                case BUF_BLOCK_ZIP_PAGE:
1306
 
                case BUF_BLOCK_NOT_USED:
1307
 
                case BUF_BLOCK_READY_FOR_USE:
1308
 
                case BUF_BLOCK_MEMORY:
1309
 
                case BUF_BLOCK_REMOVE_HASH:
1310
 
                        ut_error;
1311
 
                        break;
1312
 
                }
1313
 
        }
1314
 
 
1315
 
        buf_pool_mutex_exit();
1316
 
}
1317
 
 
1318
 
/************************************************************************
1319
 
Resizes the buffer pool. */
1320
 
UNIV_INTERN
1321
 
void
1322
 
buf_pool_resize(void)
1323
 
/*=================*/
1324
 
{
1325
 
        buf_pool_mutex_enter();
1326
 
 
1327
 
        if (srv_buf_pool_old_size == srv_buf_pool_size) {
1328
 
 
1329
 
                buf_pool_mutex_exit();
 
790
Maps the page of block to a frame, if not mapped yet. Unmaps some page
 
791
from the end of the awe_LRU_free_mapped. */
 
792
 
 
793
void
 
794
buf_awe_map_page_to_frame(
 
795
/*======================*/
 
796
        buf_block_t*    block,          /* in: block whose page should be
 
797
                                        mapped to a frame */
 
798
        ibool           add_to_mapped_list) /* in: TRUE if we in the case
 
799
                                        we need to map the page should also
 
800
                                        add the block to the
 
801
                                        awe_LRU_free_mapped list */
 
802
{
 
803
        buf_block_t*    bck;
 
804
 
 
805
        ut_ad(mutex_own(&(buf_pool->mutex)));
 
806
        ut_ad(block);
 
807
 
 
808
        if (block->frame) {
 
809
 
1330
810
                return;
1331
811
        }
1332
812
 
1333
 
        if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
1334
 
 
1335
 
                buf_pool_mutex_exit();
1336
 
 
1337
 
                /* Disable adaptive hash indexes and empty the index
1338
 
                in order to free up memory in the buffer pool chunks. */
1339
 
                buf_pool_shrink((srv_buf_pool_curr_size - srv_buf_pool_size)
1340
 
                                / UNIV_PAGE_SIZE);
1341
 
        } else if (srv_buf_pool_curr_size + 1048576 < srv_buf_pool_size) {
1342
 
 
1343
 
                /* Enlarge the buffer pool by at least one megabyte */
1344
 
 
1345
 
                ulint           mem_size
1346
 
                        = srv_buf_pool_size - srv_buf_pool_curr_size;
1347
 
                buf_chunk_t*    chunks;
1348
 
                buf_chunk_t*    chunk;
1349
 
 
1350
 
                chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
1351
 
 
1352
 
                memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks
1353
 
                       * sizeof *chunks);
1354
 
 
1355
 
                chunk = &chunks[buf_pool->n_chunks];
1356
 
 
1357
 
                if (!buf_chunk_init(chunk, mem_size)) {
1358
 
                        mem_free(chunks);
 
813
        /* Scan awe_LRU_free_mapped from the end and try to find a block
 
814
        which is not bufferfixed or io-fixed */
 
815
 
 
816
        bck = UT_LIST_GET_LAST(buf_pool->awe_LRU_free_mapped);
 
817
 
 
818
        while (bck) {
 
819
                ibool skip;
 
820
 
 
821
                mutex_enter(&bck->mutex);
 
822
 
 
823
                skip = (bck->state == BUF_BLOCK_FILE_PAGE
 
824
                        && (bck->buf_fix_count != 0 || bck->io_fix != 0));
 
825
 
 
826
                if (skip) {
 
827
                        mutex_exit(&bck->mutex);
 
828
 
 
829
                        /* We have to skip this */
 
830
                        bck = UT_LIST_GET_PREV(awe_LRU_free_mapped, bck);
1359
831
                } else {
1360
 
                        buf_pool->curr_size += chunk->size;
1361
 
                        srv_buf_pool_curr_size = buf_pool->curr_size
1362
 
                                * UNIV_PAGE_SIZE;
1363
 
                        mem_free(buf_pool->chunks);
1364
 
                        buf_pool->chunks = chunks;
1365
 
                        buf_pool->n_chunks++;
 
832
                        /* We can map block to the frame of bck */
 
833
 
 
834
                        os_awe_map_physical_mem_to_window(
 
835
                                bck->frame,
 
836
                                UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE,
 
837
                                block->awe_info);
 
838
 
 
839
                        block->frame = bck->frame;
 
840
 
 
841
                        *(buf_pool->blocks_of_frames
 
842
                          + (((ulint)(block->frame
 
843
                                      - buf_pool->frame_zero))
 
844
                             >> UNIV_PAGE_SIZE_SHIFT))
 
845
                                = block;
 
846
 
 
847
                        bck->frame = NULL;
 
848
                        UT_LIST_REMOVE(awe_LRU_free_mapped,
 
849
                                       buf_pool->awe_LRU_free_mapped,
 
850
                                       bck);
 
851
 
 
852
                        if (add_to_mapped_list) {
 
853
                                UT_LIST_ADD_FIRST(
 
854
                                        awe_LRU_free_mapped,
 
855
                                        buf_pool->awe_LRU_free_mapped,
 
856
                                        block);
 
857
                        }
 
858
 
 
859
                        buf_pool->n_pages_awe_remapped++;
 
860
 
 
861
                        mutex_exit(&bck->mutex);
 
862
 
 
863
                        return;
1366
864
                }
1367
 
 
1368
 
                srv_buf_pool_old_size = srv_buf_pool_size;
1369
 
                buf_pool_mutex_exit();
1370
865
        }
1371
866
 
1372
 
        buf_pool_page_hash_rebuild();
 
867
        fprintf(stderr,
 
868
                "InnoDB: AWE: Fatal error: cannot find a page to unmap\n"
 
869
                "InnoDB: awe_LRU_free_mapped list length %lu\n",
 
870
                (ulong) UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
 
871
 
 
872
        ut_a(0);
 
873
}
 
874
 
 
875
/************************************************************************
 
876
Allocates a buffer block. */
 
877
UNIV_INLINE
 
878
buf_block_t*
 
879
buf_block_alloc(void)
 
880
/*=================*/
 
881
                                /* out, own: the allocated block; also if AWE
 
882
                                is used it is guaranteed that the page is
 
883
                                mapped to a frame */
 
884
{
 
885
        buf_block_t*    block;
 
886
 
 
887
        block = buf_LRU_get_free_block();
 
888
 
 
889
        return(block);
1373
890
}
1374
891
 
1375
892
/************************************************************************
1379
896
void
1380
897
buf_block_make_young(
1381
898
/*=================*/
1382
 
        buf_page_t*     bpage)  /* in: block to make younger */
 
899
        buf_block_t*    block)  /* in: block to make younger */
1383
900
{
1384
 
        ut_ad(!buf_pool_mutex_own());
 
901
        ut_ad(!mutex_own(&(buf_pool->mutex)));
1385
902
 
1386
903
        /* Note that we read freed_page_clock's without holding any mutex:
1387
904
        this is allowed since the result is used only in heuristics */
1388
905
 
1389
 
        if (buf_page_peek_if_too_old(bpage)) {
 
906
        if (buf_block_peek_if_too_old(block)) {
1390
907
 
1391
 
                buf_pool_mutex_enter();
 
908
                mutex_enter(&buf_pool->mutex);
1392
909
                /* There has been freeing activity in the LRU list:
1393
910
                best to move to the head of the LRU list */
1394
911
 
1395
 
                buf_LRU_make_block_young(bpage);
1396
 
                buf_pool_mutex_exit();
 
912
                buf_LRU_make_block_young(block);
 
913
                mutex_exit(&buf_pool->mutex);
1397
914
        }
1398
915
}
1399
916
 
1401
918
Moves a page to the start of the buffer pool LRU list. This high-level
1402
919
function can be used to prevent an important page from from slipping out of
1403
920
the buffer pool. */
1404
 
UNIV_INTERN
 
921
 
1405
922
void
1406
923
buf_page_make_young(
1407
924
/*================*/
1408
 
        buf_page_t*     bpage)  /* in: buffer block of a file page */
1409
 
{
1410
 
        buf_pool_mutex_enter();
1411
 
 
1412
 
        ut_a(buf_page_in_file(bpage));
1413
 
 
1414
 
        buf_LRU_make_block_young(bpage);
1415
 
 
1416
 
        buf_pool_mutex_exit();
 
925
        buf_frame_t*    frame)  /* in: buffer frame of a file page */
 
926
{
 
927
        buf_block_t*    block;
 
928
 
 
929
        mutex_enter(&(buf_pool->mutex));
 
930
 
 
931
        block = buf_block_align(frame);
 
932
 
 
933
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
934
 
 
935
        buf_LRU_make_block_young(block);
 
936
 
 
937
        mutex_exit(&(buf_pool->mutex));
 
938
}
 
939
 
 
940
/************************************************************************
 
941
Frees a buffer block which does not contain a file page. */
 
942
UNIV_INLINE
 
943
void
 
944
buf_block_free(
 
945
/*===========*/
 
946
        buf_block_t*    block)  /* in, own: block to be freed */
 
947
{
 
948
        mutex_enter(&(buf_pool->mutex));
 
949
 
 
950
        mutex_enter(&block->mutex);
 
951
 
 
952
        ut_a(block->state != BUF_BLOCK_FILE_PAGE);
 
953
 
 
954
        buf_LRU_block_free_non_file_page(block);
 
955
 
 
956
        mutex_exit(&block->mutex);
 
957
 
 
958
        mutex_exit(&(buf_pool->mutex));
 
959
}
 
960
 
 
961
/*************************************************************************
 
962
Allocates a buffer frame. */
 
963
 
 
964
buf_frame_t*
 
965
buf_frame_alloc(void)
 
966
/*=================*/
 
967
                                /* out: buffer frame */
 
968
{
 
969
        return(buf_block_alloc()->frame);
 
970
}
 
971
 
 
972
/*************************************************************************
 
973
Frees a buffer frame which does not contain a file page. */
 
974
 
 
975
void
 
976
buf_frame_free(
 
977
/*===========*/
 
978
        buf_frame_t*    frame)  /* in: buffer frame */
 
979
{
 
980
        buf_block_free(buf_block_align(frame));
 
981
}
 
982
 
 
983
/************************************************************************
 
984
Returns the buffer control block if the page can be found in the buffer
 
985
pool. NOTE that it is possible that the page is not yet read
 
986
from disk, though. This is a very low-level function: use with care! */
 
987
 
 
988
buf_block_t*
 
989
buf_page_peek_block(
 
990
/*================*/
 
991
                        /* out: control block if found from page hash table,
 
992
                        otherwise NULL; NOTE that the page is not necessarily
 
993
                        yet read from disk! */
 
994
        ulint   space,  /* in: space id */
 
995
        ulint   offset) /* in: page number */
 
996
{
 
997
        buf_block_t*    block;
 
998
 
 
999
        mutex_enter_fast(&(buf_pool->mutex));
 
1000
 
 
1001
        block = buf_page_hash_get(space, offset);
 
1002
 
 
1003
        mutex_exit(&(buf_pool->mutex));
 
1004
 
 
1005
        return(block);
1417
1006
}
1418
1007
 
1419
1008
/************************************************************************
1420
1009
Resets the check_index_page_at_flush field of a page if found in the buffer
1421
1010
pool. */
1422
 
UNIV_INTERN
 
1011
 
1423
1012
void
1424
1013
buf_reset_check_index_page_at_flush(
1425
1014
/*================================*/
1428
1017
{
1429
1018
        buf_block_t*    block;
1430
1019
 
1431
 
        buf_pool_mutex_enter();
1432
 
 
1433
 
        block = (buf_block_t*) buf_page_hash_get(space, offset);
1434
 
 
1435
 
        if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
 
1020
        mutex_enter_fast(&(buf_pool->mutex));
 
1021
 
 
1022
        block = buf_page_hash_get(space, offset);
 
1023
 
 
1024
        if (block) {
1436
1025
                block->check_index_page_at_flush = FALSE;
1437
1026
        }
1438
1027
 
1439
 
        buf_pool_mutex_exit();
 
1028
        mutex_exit(&(buf_pool->mutex));
1440
1029
}
1441
1030
 
1442
1031
/************************************************************************
1443
1032
Returns the current state of is_hashed of a page. FALSE if the page is
1444
1033
not in the pool. NOTE that this operation does not fix the page in the
1445
1034
pool if it is found there. */
1446
 
UNIV_INTERN
 
1035
 
1447
1036
ibool
1448
1037
buf_page_peek_if_search_hashed(
1449
1038
/*===========================*/
1455
1044
        buf_block_t*    block;
1456
1045
        ibool           is_hashed;
1457
1046
 
1458
 
        buf_pool_mutex_enter();
1459
 
 
1460
 
        block = (buf_block_t*) buf_page_hash_get(space, offset);
1461
 
 
1462
 
        if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
 
1047
        mutex_enter_fast(&(buf_pool->mutex));
 
1048
 
 
1049
        block = buf_page_hash_get(space, offset);
 
1050
 
 
1051
        if (!block) {
1463
1052
                is_hashed = FALSE;
1464
1053
        } else {
1465
1054
                is_hashed = block->is_hashed;
1466
1055
        }
1467
1056
 
1468
 
        buf_pool_mutex_exit();
 
1057
        mutex_exit(&(buf_pool->mutex));
1469
1058
 
1470
1059
        return(is_hashed);
1471
1060
}
1472
1061
 
1473
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
1062
/************************************************************************
 
1063
Returns TRUE if the page can be found in the buffer pool hash table. NOTE
 
1064
that it is possible that the page is not yet read from disk, though. */
 
1065
 
 
1066
ibool
 
1067
buf_page_peek(
 
1068
/*==========*/
 
1069
                        /* out: TRUE if found from page hash table,
 
1070
                        NOTE that the page is not necessarily yet read
 
1071
                        from disk! */
 
1072
        ulint   space,  /* in: space id */
 
1073
        ulint   offset) /* in: page number */
 
1074
{
 
1075
        if (buf_page_peek_block(space, offset)) {
 
1076
 
 
1077
                return(TRUE);
 
1078
        }
 
1079
 
 
1080
        return(FALSE);
 
1081
}
 
1082
 
1474
1083
/************************************************************************
1475
1084
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
1476
1085
This function should be called when we free a file page and want the
1477
1086
debug version to check that it is not accessed any more unless
1478
1087
reallocated. */
1479
 
UNIV_INTERN
1480
 
buf_page_t*
 
1088
 
 
1089
buf_block_t*
1481
1090
buf_page_set_file_page_was_freed(
1482
1091
/*=============================*/
1483
 
                        /* out: control block if found in page hash table,
 
1092
                        /* out: control block if found from page hash table,
1484
1093
                        otherwise NULL */
1485
1094
        ulint   space,  /* in: space id */
1486
1095
        ulint   offset) /* in: page number */
1487
1096
{
1488
 
        buf_page_t*     bpage;
1489
 
 
1490
 
        buf_pool_mutex_enter();
1491
 
 
1492
 
        bpage = buf_page_hash_get(space, offset);
1493
 
 
1494
 
        if (bpage) {
1495
 
                bpage->file_page_was_freed = TRUE;
 
1097
        buf_block_t*    block;
 
1098
 
 
1099
        mutex_enter_fast(&(buf_pool->mutex));
 
1100
 
 
1101
        block = buf_page_hash_get(space, offset);
 
1102
 
 
1103
        if (block) {
 
1104
                block->file_page_was_freed = TRUE;
1496
1105
        }
1497
1106
 
1498
 
        buf_pool_mutex_exit();
 
1107
        mutex_exit(&(buf_pool->mutex));
1499
1108
 
1500
 
        return(bpage);
 
1109
        return(block);
1501
1110
}
1502
1111
 
1503
1112
/************************************************************************
1505
1114
This function should be called when we free a file page and want the
1506
1115
debug version to check that it is not accessed any more unless
1507
1116
reallocated. */
1508
 
UNIV_INTERN
1509
 
buf_page_t*
 
1117
 
 
1118
buf_block_t*
1510
1119
buf_page_reset_file_page_was_freed(
1511
1120
/*===============================*/
1512
 
                        /* out: control block if found in page hash table,
 
1121
                        /* out: control block if found from page hash table,
1513
1122
                        otherwise NULL */
1514
1123
        ulint   space,  /* in: space id */
1515
1124
        ulint   offset) /* in: page number */
1516
1125
{
1517
 
        buf_page_t*     bpage;
1518
 
 
1519
 
        buf_pool_mutex_enter();
1520
 
 
1521
 
        bpage = buf_page_hash_get(space, offset);
1522
 
 
1523
 
        if (bpage) {
1524
 
                bpage->file_page_was_freed = FALSE;
1525
 
        }
1526
 
 
1527
 
        buf_pool_mutex_exit();
1528
 
 
1529
 
        return(bpage);
1530
 
}
1531
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
1532
 
 
1533
 
/************************************************************************
1534
 
Get read access to a compressed page (usually of type
1535
 
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
1536
 
The page must be released with buf_page_release_zip().
1537
 
NOTE: the page is not protected by any latch.  Mutual exclusion has to
1538
 
be implemented at a higher level.  In other words, all possible
1539
 
accesses to a given page through this function must be protected by
1540
 
the same set of mutexes or latches. */
1541
 
UNIV_INTERN
1542
 
buf_page_t*
1543
 
buf_page_get_zip(
1544
 
/*=============*/
1545
 
                                /* out: pointer to the block */
1546
 
        ulint           space,  /* in: space id */
1547
 
        ulint           zip_size,/* in: compressed page size */
1548
 
        ulint           offset) /* in: page number */
1549
 
{
1550
 
        buf_page_t*     bpage;
1551
 
        mutex_t*        block_mutex;
1552
 
        ibool           must_read;
1553
 
 
1554
 
#ifndef UNIV_LOG_DEBUG
1555
 
        ut_ad(!ibuf_inside());
1556
 
#endif
1557
 
        buf_pool->n_page_gets++;
1558
 
 
1559
 
        for (;;) {
1560
 
                buf_pool_mutex_enter();
1561
 
lookup:
1562
 
                bpage = buf_page_hash_get(space, offset);
1563
 
                if (bpage) {
1564
 
                        break;
1565
 
                }
1566
 
 
1567
 
                /* Page not in buf_pool: needs to be read from file */
1568
 
 
1569
 
                buf_pool_mutex_exit();
1570
 
 
1571
 
                buf_read_page(space, zip_size, offset);
1572
 
 
1573
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1574
 
                ut_a(++buf_dbg_counter % 37 || buf_validate());
1575
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1576
 
        }
1577
 
 
1578
 
        if (UNIV_UNLIKELY(!bpage->zip.data)) {
1579
 
                /* There is no compressed page. */
1580
 
                buf_pool_mutex_exit();
1581
 
                return(NULL);
1582
 
        }
1583
 
 
1584
 
        block_mutex = buf_page_get_mutex(bpage);
1585
 
        mutex_enter(block_mutex);
1586
 
 
1587
 
        switch (buf_page_get_state(bpage)) {
1588
 
        case BUF_BLOCK_NOT_USED:
1589
 
        case BUF_BLOCK_READY_FOR_USE:
1590
 
        case BUF_BLOCK_MEMORY:
1591
 
        case BUF_BLOCK_REMOVE_HASH:
1592
 
        case BUF_BLOCK_ZIP_FREE:
1593
 
                ut_error;
1594
 
                break;
1595
 
        case BUF_BLOCK_ZIP_PAGE:
1596
 
        case BUF_BLOCK_ZIP_DIRTY:
1597
 
                bpage->buf_fix_count++;
1598
 
                break;
1599
 
        case BUF_BLOCK_FILE_PAGE:
1600
 
                /* Discard the uncompressed page frame if possible. */
1601
 
                if (buf_LRU_free_block(bpage, FALSE, NULL)
1602
 
                    == BUF_LRU_FREED) {
1603
 
 
1604
 
                        mutex_exit(block_mutex);
1605
 
                        goto lookup;
1606
 
                }
1607
 
 
1608
 
                buf_block_buf_fix_inc((buf_block_t*) bpage,
1609
 
                                      __FILE__, __LINE__);
1610
 
                break;
1611
 
        }
1612
 
 
1613
 
        must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1614
 
 
1615
 
        buf_pool_mutex_exit();
1616
 
 
1617
 
        buf_page_set_accessed(bpage, TRUE);
1618
 
 
1619
 
        mutex_exit(block_mutex);
1620
 
 
1621
 
        buf_block_make_young(bpage);
1622
 
 
1623
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
1624
 
        ut_a(!bpage->file_page_was_freed);
1625
 
#endif
1626
 
 
1627
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1628
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
1629
 
        ut_a(bpage->buf_fix_count > 0);
1630
 
        ut_a(buf_page_in_file(bpage));
1631
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1632
 
 
1633
 
        if (must_read) {
1634
 
                /* Let us wait until the read operation
1635
 
                completes */
1636
 
 
1637
 
                for (;;) {
1638
 
                        enum buf_io_fix io_fix;
1639
 
 
1640
 
                        mutex_enter(block_mutex);
1641
 
                        io_fix = buf_page_get_io_fix(bpage);
1642
 
                        mutex_exit(block_mutex);
1643
 
 
1644
 
                        if (io_fix == BUF_IO_READ) {
1645
 
 
1646
 
                                os_thread_sleep(WAIT_FOR_READ);
1647
 
                        } else {
1648
 
                                break;
1649
 
                        }
1650
 
                }
1651
 
        }
1652
 
 
1653
 
#ifdef UNIV_IBUF_COUNT_DEBUG
1654
 
        ut_a(ibuf_count_get(buf_page_get_space(bpage),
1655
 
                            buf_page_get_page_no(bpage)) == 0);
1656
 
#endif
1657
 
        return(bpage);
1658
 
}
1659
 
 
1660
 
/************************************************************************
1661
 
Initialize some fields of a control block. */
1662
 
UNIV_INLINE
1663
 
void
1664
 
buf_block_init_low(
1665
 
/*===============*/
1666
 
        buf_block_t*    block)  /* in: block to init */
1667
 
{
1668
 
        block->check_index_page_at_flush = FALSE;
1669
 
        block->index            = NULL;
1670
 
 
1671
 
        block->n_hash_helps     = 0;
1672
 
        block->is_hashed        = FALSE;
1673
 
        block->n_fields         = 1;
1674
 
        block->n_bytes          = 0;
1675
 
        block->left_side        = TRUE;
1676
 
}
1677
 
 
1678
 
/************************************************************************
1679
 
Decompress a block. */
1680
 
static
1681
 
ibool
1682
 
buf_zip_decompress(
1683
 
/*===============*/
1684
 
                                /* out: TRUE if successful */
1685
 
        buf_block_t*    block,  /* in/out: block */
1686
 
        ibool           check)  /* in: TRUE=verify the page checksum */
1687
 
{
1688
 
        const byte* frame = block->page.zip.data;
1689
 
 
1690
 
        ut_ad(buf_block_get_zip_size(block));
1691
 
        ut_a(buf_block_get_space(block) != 0);
1692
 
 
1693
 
        if (UNIV_LIKELY(check)) {
1694
 
                ulint   stamp_checksum  = mach_read_from_4(
1695
 
                        frame + FIL_PAGE_SPACE_OR_CHKSUM);
1696
 
                ulint   calc_checksum   = page_zip_calc_checksum(
1697
 
                        frame, page_zip_get_size(&block->page.zip));
1698
 
 
1699
 
                if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
1700
 
                        ut_print_timestamp(stderr);
1701
 
                        fprintf(stderr,
1702
 
                                "  InnoDB: compressed page checksum mismatch"
1703
 
                                " (space %u page %u): %lu != %lu\n",
1704
 
                                block->page.space, block->page.offset,
1705
 
                                stamp_checksum, calc_checksum);
1706
 
                        return(FALSE);
1707
 
                }
1708
 
        }
1709
 
 
1710
 
        switch (fil_page_get_type(frame)) {
1711
 
        case FIL_PAGE_INDEX:
1712
 
                if (page_zip_decompress(&block->page.zip,
1713
 
                                        block->frame)) {
1714
 
                        return(TRUE);
1715
 
                }
1716
 
 
1717
 
                fprintf(stderr,
1718
 
                        "InnoDB: unable to decompress space %lu page %lu\n",
1719
 
                        (ulong) block->page.space,
1720
 
                        (ulong) block->page.offset);
1721
 
                return(FALSE);
1722
 
 
1723
 
        case FIL_PAGE_TYPE_ALLOCATED:
1724
 
        case FIL_PAGE_INODE:
1725
 
        case FIL_PAGE_IBUF_BITMAP:
1726
 
        case FIL_PAGE_TYPE_FSP_HDR:
1727
 
        case FIL_PAGE_TYPE_XDES:
1728
 
        case FIL_PAGE_TYPE_ZBLOB:
1729
 
        case FIL_PAGE_TYPE_ZBLOB2:
1730
 
                /* Copy to uncompressed storage. */
1731
 
                memcpy(block->frame, frame,
1732
 
                       buf_block_get_zip_size(block));
1733
 
                return(TRUE);
1734
 
        }
1735
 
 
1736
 
        ut_print_timestamp(stderr);
1737
 
        fprintf(stderr,
1738
 
                "  InnoDB: unknown compressed page"
1739
 
                " type %lu\n",
1740
 
                fil_page_get_type(frame));
1741
 
        return(FALSE);
1742
 
}
1743
 
 
1744
 
/************************************************************************
1745
 
Find out if a buffer block was created by buf_chunk_init(). */
1746
 
static
1747
 
ibool
1748
 
buf_block_is_uncompressed(
1749
 
/*======================*/
1750
 
                                        /* out: TRUE if "block" has
1751
 
                                        been added to buf_pool->free
1752
 
                                        by buf_chunk_init() */
1753
 
        const buf_block_t*      block)  /* in: pointer to block,
1754
 
                                        not dereferenced */
1755
 
{
1756
 
        const buf_chunk_t*              chunk   = buf_pool->chunks;
1757
 
        const buf_chunk_t* const        echunk  = chunk + buf_pool->n_chunks;
1758
 
 
1759
 
        ut_ad(buf_pool_mutex_own());
1760
 
 
1761
 
        if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1762
 
                /* The pointer should be aligned. */
1763
 
                return(FALSE);
1764
 
        }
1765
 
 
1766
 
        while (chunk < echunk) {
1767
 
                if (block >= chunk->blocks
1768
 
                    && block < chunk->blocks + chunk->size) {
1769
 
 
1770
 
                        return(TRUE);
1771
 
                }
1772
 
 
1773
 
                chunk++;
1774
 
        }
1775
 
 
1776
 
        return(FALSE);
 
1126
        buf_block_t*    block;
 
1127
 
 
1128
        mutex_enter_fast(&(buf_pool->mutex));
 
1129
 
 
1130
        block = buf_page_hash_get(space, offset);
 
1131
 
 
1132
        if (block) {
 
1133
                block->file_page_was_freed = FALSE;
 
1134
        }
 
1135
 
 
1136
        mutex_exit(&(buf_pool->mutex));
 
1137
 
 
1138
        return(block);
1777
1139
}
1778
1140
 
1779
1141
/************************************************************************
1780
1142
This is the general function used to get access to a database page. */
1781
 
UNIV_INTERN
1782
 
buf_block_t*
 
1143
 
 
1144
buf_frame_t*
1783
1145
buf_page_get_gen(
1784
1146
/*=============*/
1785
 
                                /* out: pointer to the block or NULL */
 
1147
                                /* out: pointer to the frame or NULL */
1786
1148
        ulint           space,  /* in: space id */
1787
 
        ulint           zip_size,/* in: compressed page size in bytes
1788
 
                                or 0 for uncompressed pages */
1789
1149
        ulint           offset, /* in: page number */
1790
1150
        ulint           rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
1791
 
        buf_block_t*    guess,  /* in: guessed block or NULL */
 
1151
        buf_frame_t*    guess,  /* in: guessed frame or NULL */
1792
1152
        ulint           mode,   /* in: BUF_GET, BUF_GET_IF_IN_POOL,
1793
1153
                                BUF_GET_NO_LATCH, BUF_GET_NOWAIT */
1794
1154
        const char*     file,   /* in: file name */
1798
1158
        buf_block_t*    block;
1799
1159
        ibool           accessed;
1800
1160
        ulint           fix_type;
 
1161
        ibool           success;
1801
1162
        ibool           must_read;
1802
1163
 
1803
1164
        ut_ad(mtr);
1807
1168
        ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
1808
1169
        ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
1809
1170
              || (mode == BUF_GET_NO_LATCH) || (mode == BUF_GET_NOWAIT));
1810
 
        ut_ad(zip_size == fil_space_get_zip_size(space));
1811
1171
#ifndef UNIV_LOG_DEBUG
1812
 
        ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset));
 
1172
        ut_ad(!ibuf_inside() || ibuf_page(space, offset));
1813
1173
#endif
1814
1174
        buf_pool->n_page_gets++;
1815
1175
loop:
1816
 
        block = guess;
1817
 
        buf_pool_mutex_enter();
1818
 
 
1819
 
        if (block) {
1820
 
                /* If the guess is a compressed page descriptor that
1821
 
                has been allocated by buf_buddy_alloc(), it may have
1822
 
                been invalidated by buf_buddy_relocate().  In that
1823
 
                case, block could point to something that happens to
1824
 
                contain the expected bits in block->page.  Similarly,
1825
 
                the guess may be pointing to a buffer pool chunk that
1826
 
                has been released when resizing the buffer pool. */
1827
 
 
1828
 
                if (!buf_block_is_uncompressed(block)
1829
 
                    || offset != block->page.offset
1830
 
                    || space != block->page.space
1831
 
                    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1832
 
 
1833
 
                        block = guess = NULL;
1834
 
                } else {
1835
 
                        ut_ad(!block->page.in_zip_hash);
1836
 
                        ut_ad(block->page.in_page_hash);
 
1176
        block = NULL;
 
1177
        mutex_enter_fast(&(buf_pool->mutex));
 
1178
 
 
1179
        if (guess) {
 
1180
                block = buf_block_align(guess);
 
1181
 
 
1182
                if ((offset != block->offset) || (space != block->space)
 
1183
                    || (block->state != BUF_BLOCK_FILE_PAGE)) {
 
1184
 
 
1185
                        block = NULL;
1837
1186
                }
1838
1187
        }
1839
1188
 
1840
1189
        if (block == NULL) {
1841
 
                block = (buf_block_t*) buf_page_hash_get(space, offset);
 
1190
                block = buf_page_hash_get(space, offset);
1842
1191
        }
1843
1192
 
1844
 
loop2:
1845
1193
        if (block == NULL) {
1846
1194
                /* Page not in buf_pool: needs to be read from file */
1847
1195
 
1848
 
                buf_pool_mutex_exit();
 
1196
                mutex_exit(&(buf_pool->mutex));
1849
1197
 
1850
1198
                if (mode == BUF_GET_IF_IN_POOL) {
1851
1199
 
1852
1200
                        return(NULL);
1853
1201
                }
1854
1202
 
1855
 
                buf_read_page(space, zip_size, offset);
1856
 
 
1857
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1858
 
                ut_a(++buf_dbg_counter % 37 || buf_validate());
1859
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
1203
                buf_read_page(space, offset);
 
1204
 
 
1205
#ifdef UNIV_DEBUG
 
1206
                buf_dbg_counter++;
 
1207
 
 
1208
                if (buf_dbg_counter % 37 == 0) {
 
1209
                        ut_ad(buf_validate());
 
1210
                }
 
1211
#endif
1860
1212
                goto loop;
1861
1213
        }
1862
1214
 
1863
 
        ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
1864
 
 
1865
 
        must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
1866
 
 
1867
 
        if (must_read && mode == BUF_GET_IF_IN_POOL) {
1868
 
                /* The page is only being read to buffer */
1869
 
                buf_pool_mutex_exit();
1870
 
 
1871
 
                return(NULL);
1872
 
        }
1873
 
 
1874
 
        switch (buf_block_get_state(block)) {
1875
 
                buf_page_t*     bpage;
1876
 
                ibool           success;
1877
 
 
1878
 
        case BUF_BLOCK_FILE_PAGE:
1879
 
                break;
1880
 
 
1881
 
        case BUF_BLOCK_ZIP_PAGE:
1882
 
        case BUF_BLOCK_ZIP_DIRTY:
1883
 
                bpage = &block->page;
1884
 
 
1885
 
                if (bpage->buf_fix_count
1886
 
                    || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1887
 
                        /* This condition often occurs when the buffer
1888
 
                        is not buffer-fixed, but I/O-fixed by
1889
 
                        buf_page_init_for_read(). */
1890
 
wait_until_unfixed:
1891
 
                        /* The block is buffer-fixed or I/O-fixed.
1892
 
                        Try again later. */
1893
 
                        buf_pool_mutex_exit();
1894
 
                        os_thread_sleep(WAIT_FOR_READ);
1895
 
 
1896
 
                        goto loop;
1897
 
                }
1898
 
 
1899
 
                /* Allocate an uncompressed page. */
1900
 
                buf_pool_mutex_exit();
1901
 
 
1902
 
                block = buf_LRU_get_free_block(0);
1903
 
                ut_a(block);
1904
 
 
1905
 
                buf_pool_mutex_enter();
1906
 
                mutex_enter(&block->mutex);
1907
 
 
1908
 
                {
1909
 
                        buf_page_t*     hash_bpage
1910
 
                                = buf_page_hash_get(space, offset);
1911
 
 
1912
 
                        if (UNIV_UNLIKELY(bpage != hash_bpage)) {
1913
 
                                /* The buf_pool->page_hash was modified
1914
 
                                while buf_pool_mutex was released.
1915
 
                                Free the block that was allocated. */
1916
 
 
1917
 
                                buf_LRU_block_free_non_file_page(block);
1918
 
                                mutex_exit(&block->mutex);
1919
 
 
1920
 
                                block = (buf_block_t*) hash_bpage;
1921
 
                                goto loop2;
1922
 
                        }
1923
 
                }
1924
 
 
1925
 
                if (UNIV_UNLIKELY
1926
 
                    (bpage->buf_fix_count
1927
 
                     || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1928
 
 
1929
 
                        /* The block was buffer-fixed or I/O-fixed
1930
 
                        while buf_pool_mutex was not held by this thread.
1931
 
                        Free the block that was allocated and try again.
1932
 
                        This should be extremely unlikely. */
1933
 
 
1934
 
                        buf_LRU_block_free_non_file_page(block);
 
1215
        mutex_enter(&block->mutex);
 
1216
 
 
1217
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
1218
 
 
1219
        must_read = FALSE;
 
1220
 
 
1221
        if (block->io_fix == BUF_IO_READ) {
 
1222
 
 
1223
                must_read = TRUE;
 
1224
 
 
1225
                if (mode == BUF_GET_IF_IN_POOL) {
 
1226
                        /* The page is only being read to buffer */
 
1227
                        mutex_exit(&buf_pool->mutex);
1935
1228
                        mutex_exit(&block->mutex);
1936
1229
 
1937
 
                        goto wait_until_unfixed;
1938
 
                }
1939
 
 
1940
 
                /* Move the compressed page from bpage to block,
1941
 
                and uncompress it. */
1942
 
 
1943
 
                mutex_enter(&buf_pool_zip_mutex);
1944
 
 
1945
 
                buf_relocate(bpage, &block->page);
1946
 
                buf_block_init_low(block);
1947
 
                block->lock_hash_val = lock_rec_hash(space, offset);
1948
 
 
1949
 
                UNIV_MEM_DESC(&block->page.zip.data,
1950
 
                              page_zip_get_size(&block->page.zip), block);
1951
 
 
1952
 
                if (buf_page_get_state(&block->page)
1953
 
                    == BUF_BLOCK_ZIP_PAGE) {
1954
 
                        UT_LIST_REMOVE(list, buf_pool->zip_clean,
1955
 
                                       &block->page);
1956
 
                        ut_ad(!block->page.in_flush_list);
1957
 
                } else {
1958
 
                        /* Relocate buf_pool->flush_list. */
1959
 
                        buf_page_t*     b;
1960
 
 
1961
 
                        b = UT_LIST_GET_PREV(list, &block->page);
1962
 
                        ut_ad(block->page.in_flush_list);
1963
 
                        UT_LIST_REMOVE(list, buf_pool->flush_list,
1964
 
                                       &block->page);
1965
 
 
1966
 
                        if (b) {
1967
 
                                UT_LIST_INSERT_AFTER(
1968
 
                                        list, buf_pool->flush_list, b,
1969
 
                                        &block->page);
1970
 
                        } else {
1971
 
                                UT_LIST_ADD_FIRST(
1972
 
                                        list, buf_pool->flush_list,
1973
 
                                        &block->page);
1974
 
                        }
1975
 
                }
1976
 
 
1977
 
                /* Buffer-fix, I/O-fix, and X-latch the block
1978
 
                for the duration of the decompression.
1979
 
                Also add the block to the unzip_LRU list. */
1980
 
                block->page.state = BUF_BLOCK_FILE_PAGE;
1981
 
 
1982
 
                /* Insert at the front of unzip_LRU list */
1983
 
                buf_unzip_LRU_add_block(block, FALSE);
1984
 
 
1985
 
                block->page.buf_fix_count = 1;
1986
 
                buf_block_set_io_fix(block, BUF_IO_READ);
1987
 
                buf_pool->n_pend_unzip++;
1988
 
                rw_lock_x_lock(&block->lock);
1989
 
                mutex_exit(&block->mutex);
1990
 
                mutex_exit(&buf_pool_zip_mutex);
1991
 
 
1992
 
                buf_buddy_free(bpage, sizeof *bpage);
1993
 
 
1994
 
                buf_pool_mutex_exit();
1995
 
 
1996
 
                /* Decompress the page and apply buffered operations
1997
 
                while not holding buf_pool_mutex or block->mutex. */
1998
 
                success = buf_zip_decompress(block, srv_use_checksums);
1999
 
 
2000
 
                if (UNIV_LIKELY(success)) {
2001
 
                        ibuf_merge_or_delete_for_page(block, space, offset,
2002
 
                                                      zip_size, TRUE);
2003
 
                }
2004
 
 
2005
 
                /* Unfix and unlatch the block. */
2006
 
                buf_pool_mutex_enter();
2007
 
                mutex_enter(&block->mutex);
2008
 
                buf_pool->n_pend_unzip--;
2009
 
                block->page.buf_fix_count--;
2010
 
                buf_block_set_io_fix(block, BUF_IO_NONE);
2011
 
                mutex_exit(&block->mutex);
2012
 
                rw_lock_x_unlock(&block->lock);
2013
 
 
2014
 
                if (UNIV_UNLIKELY(!success)) {
2015
 
 
2016
 
                        buf_pool_mutex_exit();
2017
1230
                        return(NULL);
2018
1231
                }
2019
 
 
2020
 
                break;
2021
 
 
2022
 
        case BUF_BLOCK_ZIP_FREE:
2023
 
        case BUF_BLOCK_NOT_USED:
2024
 
        case BUF_BLOCK_READY_FOR_USE:
2025
 
        case BUF_BLOCK_MEMORY:
2026
 
        case BUF_BLOCK_REMOVE_HASH:
2027
 
                ut_error;
2028
 
                break;
2029
 
        }
2030
 
 
2031
 
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2032
 
 
2033
 
        mutex_enter(&block->mutex);
2034
 
        UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
2035
 
 
2036
 
        buf_block_buf_fix_inc(block, file, line);
2037
 
        buf_pool_mutex_exit();
 
1232
        }
 
1233
 
 
1234
        /* If AWE is enabled and the page is not mapped to a frame, then
 
1235
        map it */
 
1236
 
 
1237
        if (block->frame == NULL) {
 
1238
                ut_a(srv_use_awe);
 
1239
 
 
1240
                /* We set second parameter TRUE because the block is in the
 
1241
                LRU list and we must put it to awe_LRU_free_mapped list once
 
1242
                mapped to a frame */
 
1243
 
 
1244
                buf_awe_map_page_to_frame(block, TRUE);
 
1245
        }
 
1246
 
 
1247
#ifdef UNIV_SYNC_DEBUG
 
1248
        buf_block_buf_fix_inc_debug(block, file, line);
 
1249
#else
 
1250
        buf_block_buf_fix_inc(block);
 
1251
#endif
 
1252
        mutex_exit(&buf_pool->mutex);
2038
1253
 
2039
1254
        /* Check if this is the first access to the page */
2040
1255
 
2041
 
        accessed = buf_page_is_accessed(&block->page);
 
1256
        accessed = block->accessed;
2042
1257
 
2043
 
        buf_page_set_accessed(&block->page, TRUE);
 
1258
        block->accessed = TRUE;
2044
1259
 
2045
1260
        mutex_exit(&block->mutex);
2046
1261
 
2047
 
        buf_block_make_young(&block->page);
 
1262
        buf_block_make_young(block);
2048
1263
 
2049
1264
#ifdef UNIV_DEBUG_FILE_ACCESSES
2050
 
        ut_a(!block->page.file_page_was_freed);
2051
 
#endif
2052
 
 
2053
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2054
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
2055
 
        ut_a(block->page.buf_fix_count > 0);
2056
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2057
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
1265
        ut_a(block->file_page_was_freed == FALSE);
 
1266
#endif
 
1267
 
 
1268
#ifdef UNIV_DEBUG
 
1269
        buf_dbg_counter++;
 
1270
 
 
1271
        if (buf_dbg_counter % 5771 == 0) {
 
1272
                ut_ad(buf_validate());
 
1273
        }
 
1274
#endif
 
1275
        ut_ad(block->buf_fix_count > 0);
 
1276
        ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
2058
1277
 
2059
1278
        if (mode == BUF_GET_NOWAIT) {
2060
 
                ibool   success;
2061
 
 
2062
1279
                if (rw_latch == RW_S_LATCH) {
2063
1280
                        success = rw_lock_s_lock_func_nowait(&(block->lock),
2064
1281
                                                             file, line);
2072
1289
 
2073
1290
                if (!success) {
2074
1291
                        mutex_enter(&block->mutex);
2075
 
                        buf_block_buf_fix_dec(block);
 
1292
 
 
1293
                        block->buf_fix_count--;
 
1294
 
2076
1295
                        mutex_exit(&block->mutex);
 
1296
#ifdef UNIV_SYNC_DEBUG
 
1297
                        rw_lock_s_unlock(&(block->debug_latch));
 
1298
#endif
2077
1299
 
2078
1300
                        return(NULL);
2079
1301
                }
2084
1306
                        completes */
2085
1307
 
2086
1308
                        for (;;) {
2087
 
                                enum buf_io_fix io_fix;
2088
 
 
2089
1309
                                mutex_enter(&block->mutex);
2090
 
                                io_fix = buf_block_get_io_fix(block);
2091
 
                                mutex_exit(&block->mutex);
2092
 
 
2093
 
                                if (io_fix == BUF_IO_READ) {
 
1310
 
 
1311
                                if (block->io_fix == BUF_IO_READ) {
 
1312
 
 
1313
                                        mutex_exit(&block->mutex);
2094
1314
 
2095
1315
                                        os_thread_sleep(WAIT_FOR_READ);
2096
1316
                                } else {
 
1317
 
 
1318
                                        mutex_exit(&block->mutex);
 
1319
 
2097
1320
                                        break;
2098
1321
                                }
2099
1322
                        }
2117
1340
                /* In the case of a first access, try to apply linear
2118
1341
                read-ahead */
2119
1342
 
2120
 
                buf_read_ahead_linear(space, zip_size, offset);
 
1343
                buf_read_ahead_linear(space, offset);
2121
1344
        }
2122
1345
 
2123
 
#ifdef UNIV_IBUF_COUNT_DEBUG
2124
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
2125
 
                            buf_block_get_page_no(block)) == 0);
 
1346
#ifdef UNIV_IBUF_DEBUG
 
1347
        ut_a(ibuf_count_get(block->space, block->offset) == 0);
2126
1348
#endif
2127
 
        return(block);
 
1349
        return(block->frame);
2128
1350
}
2129
1351
 
2130
1352
/************************************************************************
2131
1353
This is the general function used to get optimistic access to a database
2132
1354
page. */
2133
 
UNIV_INTERN
 
1355
 
2134
1356
ibool
2135
1357
buf_page_optimistic_get_func(
2136
1358
/*=========================*/
2137
1359
                                /* out: TRUE if success */
2138
1360
        ulint           rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
2139
1361
        buf_block_t*    block,  /* in: guessed buffer block */
2140
 
        ib_uint64_t     modify_clock,/* in: modify clock value if mode is
 
1362
        buf_frame_t*    guess,  /* in: guessed frame; note that AWE may move
 
1363
                                frames */
 
1364
        dulint          modify_clock,/* in: modify clock value if mode is
2141
1365
                                ..._GUESS_ON_CLOCK */
2142
1366
        const char*     file,   /* in: file name */
2143
1367
        ulint           line,   /* in: line where called */
2150
1374
        ut_ad(mtr && block);
2151
1375
        ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
2152
1376
 
 
1377
        /* If AWE is used, block may have a different frame now, e.g., NULL */
 
1378
 
2153
1379
        mutex_enter(&block->mutex);
2154
1380
 
2155
 
        if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
 
1381
        if (UNIV_UNLIKELY(block->state != BUF_BLOCK_FILE_PAGE)
 
1382
            || UNIV_UNLIKELY(block->frame != guess)) {
2156
1383
 
2157
1384
                mutex_exit(&block->mutex);
2158
1385
 
2159
1386
                return(FALSE);
2160
1387
        }
2161
1388
 
2162
 
        buf_block_buf_fix_inc(block, file, line);
2163
 
        accessed = buf_page_is_accessed(&block->page);
2164
 
        buf_page_set_accessed(&block->page, TRUE);
 
1389
#ifdef UNIV_SYNC_DEBUG
 
1390
        buf_block_buf_fix_inc_debug(block, file, line);
 
1391
#else
 
1392
        buf_block_buf_fix_inc(block);
 
1393
#endif
 
1394
        accessed = block->accessed;
 
1395
        block->accessed = TRUE;
2165
1396
 
2166
1397
        mutex_exit(&block->mutex);
2167
1398
 
2168
 
        buf_block_make_young(&block->page);
 
1399
        buf_block_make_young(block);
2169
1400
 
2170
1401
        /* Check if this is the first access to the page */
2171
1402
 
2172
 
        ut_ad(!ibuf_inside()
2173
 
              || ibuf_page(buf_block_get_space(block),
2174
 
                           buf_block_get_zip_size(block),
2175
 
                           buf_block_get_page_no(block)));
 
1403
        ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset));
2176
1404
 
2177
1405
        if (rw_latch == RW_S_LATCH) {
2178
1406
                success = rw_lock_s_lock_func_nowait(&(block->lock),
2186
1414
 
2187
1415
        if (UNIV_UNLIKELY(!success)) {
2188
1416
                mutex_enter(&block->mutex);
2189
 
                buf_block_buf_fix_dec(block);
 
1417
 
 
1418
                block->buf_fix_count--;
 
1419
 
2190
1420
                mutex_exit(&block->mutex);
2191
1421
 
 
1422
#ifdef UNIV_SYNC_DEBUG
 
1423
                rw_lock_s_unlock(&(block->debug_latch));
 
1424
#endif
2192
1425
                return(FALSE);
2193
1426
        }
2194
1427
 
2195
 
        if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
 
1428
        if (UNIV_UNLIKELY(!UT_DULINT_EQ(modify_clock, block->modify_clock))) {
2196
1429
#ifdef UNIV_SYNC_DEBUG
2197
 
                buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
1430
                buf_page_dbg_add_level(block->frame, SYNC_NO_ORDER_CHECK);
2198
1431
#endif /* UNIV_SYNC_DEBUG */
2199
1432
                if (rw_latch == RW_S_LATCH) {
2200
1433
                        rw_lock_s_unlock(&(block->lock));
2203
1436
                }
2204
1437
 
2205
1438
                mutex_enter(&block->mutex);
2206
 
                buf_block_buf_fix_dec(block);
 
1439
 
 
1440
                block->buf_fix_count--;
 
1441
 
2207
1442
                mutex_exit(&block->mutex);
2208
1443
 
 
1444
#ifdef UNIV_SYNC_DEBUG
 
1445
                rw_lock_s_unlock(&(block->debug_latch));
 
1446
#endif
2209
1447
                return(FALSE);
2210
1448
        }
2211
1449
 
2212
1450
        mtr_memo_push(mtr, block, fix_type);
2213
1451
 
2214
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2215
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
2216
 
        ut_a(block->page.buf_fix_count > 0);
2217
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2218
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
1452
#ifdef UNIV_DEBUG
 
1453
        buf_dbg_counter++;
 
1454
 
 
1455
        if (buf_dbg_counter % 5771 == 0) {
 
1456
                ut_ad(buf_validate());
 
1457
        }
 
1458
#endif
 
1459
        ut_ad(block->buf_fix_count > 0);
 
1460
        ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
2219
1461
 
2220
1462
#ifdef UNIV_DEBUG_FILE_ACCESSES
2221
 
        ut_a(block->page.file_page_was_freed == FALSE);
 
1463
        ut_a(block->file_page_was_freed == FALSE);
2222
1464
#endif
2223
1465
        if (UNIV_UNLIKELY(!accessed)) {
2224
1466
                /* In the case of a first access, try to apply linear
2225
1467
                read-ahead */
2226
1468
 
2227
 
                buf_read_ahead_linear(buf_block_get_space(block),
2228
 
                                      buf_block_get_zip_size(block),
2229
 
                                      buf_block_get_page_no(block));
 
1469
                buf_read_ahead_linear(buf_frame_get_space_id(guess),
 
1470
                                      buf_frame_get_page_no(guess));
2230
1471
        }
2231
1472
 
2232
 
#ifdef UNIV_IBUF_COUNT_DEBUG
2233
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
2234
 
                            buf_block_get_page_no(block)) == 0);
 
1473
#ifdef UNIV_IBUF_DEBUG
 
1474
        ut_a(ibuf_count_get(block->space, block->offset) == 0);
2235
1475
#endif
2236
1476
        buf_pool->n_page_gets++;
2237
1477
 
2242
1482
This is used to get access to a known database page, when no waiting can be
2243
1483
done. For example, if a search in an adaptive hash index leads us to this
2244
1484
frame. */
2245
 
UNIV_INTERN
 
1485
 
2246
1486
ibool
2247
1487
buf_page_get_known_nowait(
2248
1488
/*======================*/
2249
1489
                                /* out: TRUE if success */
2250
1490
        ulint           rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
2251
 
        buf_block_t*    block,  /* in: the known page */
 
1491
        buf_frame_t*    guess,  /* in: the known page frame */
2252
1492
        ulint           mode,   /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
2253
1493
        const char*     file,   /* in: file name */
2254
1494
        ulint           line,   /* in: line where called */
2255
1495
        mtr_t*          mtr)    /* in: mini-transaction */
2256
1496
{
 
1497
        buf_block_t*    block;
2257
1498
        ibool           success;
2258
1499
        ulint           fix_type;
2259
1500
 
2260
1501
        ut_ad(mtr);
2261
1502
        ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
2262
1503
 
 
1504
        block = buf_block_align(guess);
 
1505
 
2263
1506
        mutex_enter(&block->mutex);
2264
1507
 
2265
 
        if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
 
1508
        if (block->state == BUF_BLOCK_REMOVE_HASH) {
2266
1509
                /* Another thread is just freeing the block from the LRU list
2267
1510
                of the buffer pool: do not try to access this page; this
2268
1511
                attempt to access the page can only come through the hash
2275
1518
                return(FALSE);
2276
1519
        }
2277
1520
 
2278
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2279
 
 
2280
 
        buf_block_buf_fix_inc(block, file, line);
2281
 
 
 
1521
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
1522
 
 
1523
#ifdef UNIV_SYNC_DEBUG
 
1524
        buf_block_buf_fix_inc_debug(block, file, line);
 
1525
#else
 
1526
        buf_block_buf_fix_inc(block);
 
1527
#endif
2282
1528
        mutex_exit(&block->mutex);
2283
1529
 
2284
1530
        if (mode == BUF_MAKE_YOUNG) {
2285
 
                buf_block_make_young(&block->page);
 
1531
                buf_block_make_young(block);
2286
1532
        }
2287
1533
 
2288
1534
        ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
2299
1545
 
2300
1546
        if (!success) {
2301
1547
                mutex_enter(&block->mutex);
2302
 
                buf_block_buf_fix_dec(block);
 
1548
 
 
1549
                block->buf_fix_count--;
 
1550
 
2303
1551
                mutex_exit(&block->mutex);
2304
1552
 
 
1553
#ifdef UNIV_SYNC_DEBUG
 
1554
                rw_lock_s_unlock(&(block->debug_latch));
 
1555
#endif
 
1556
 
2305
1557
                return(FALSE);
2306
1558
        }
2307
1559
 
2308
1560
        mtr_memo_push(mtr, block, fix_type);
2309
1561
 
2310
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2311
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
2312
 
        ut_a(block->page.buf_fix_count > 0);
2313
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2314
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
1562
#ifdef UNIV_DEBUG
 
1563
        buf_dbg_counter++;
 
1564
 
 
1565
        if (buf_dbg_counter % 5771 == 0) {
 
1566
                ut_ad(buf_validate());
 
1567
        }
 
1568
#endif
 
1569
        ut_ad(block->buf_fix_count > 0);
 
1570
        ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
2315
1571
#ifdef UNIV_DEBUG_FILE_ACCESSES
2316
 
        ut_a(block->page.file_page_was_freed == FALSE);
 
1572
        ut_a(block->file_page_was_freed == FALSE);
2317
1573
#endif
2318
1574
 
2319
 
#ifdef UNIV_IBUF_COUNT_DEBUG
 
1575
#ifdef UNIV_IBUF_DEBUG
2320
1576
        ut_a((mode == BUF_KEEP_OLD)
2321
 
             || (ibuf_count_get(buf_block_get_space(block),
2322
 
                                buf_block_get_page_no(block)) == 0));
 
1577
             || (ibuf_count_get(block->space, block->offset) == 0));
2323
1578
#endif
2324
1579
        buf_pool->n_page_gets++;
2325
1580
 
2326
1581
        return(TRUE);
2327
1582
}
2328
1583
 
2329
 
/***********************************************************************
2330
 
Given a tablespace id and page number tries to get that page. If the
2331
 
page is not in the buffer pool it is not loaded and NULL is returned.
2332
 
Suitable for using when holding the kernel mutex. */
2333
 
UNIV_INTERN
2334
 
const buf_block_t*
2335
 
buf_page_try_get_func(
2336
 
/*==================*/
2337
 
                                /* out: pointer to a page or NULL */
2338
 
        ulint           space_id,/* in: tablespace id */
2339
 
        ulint           page_no,/* in: page number */
2340
 
        const char*     file,   /* in: file name */
2341
 
        ulint           line,   /* in: line where called */
2342
 
        mtr_t*          mtr)    /* in: mini-transaction */
2343
 
{
2344
 
        buf_block_t*    block;
2345
 
        ibool           success;
2346
 
        ulint           fix_type;
2347
 
 
2348
 
        buf_pool_mutex_enter();
2349
 
        block = buf_block_hash_get(space_id, page_no);
2350
 
 
2351
 
        if (!block) {
2352
 
                buf_pool_mutex_exit();
2353
 
                return(NULL);
2354
 
        }
2355
 
 
2356
 
        mutex_enter(&block->mutex);
2357
 
        buf_pool_mutex_exit();
2358
 
 
2359
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2360
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2361
 
        ut_a(buf_block_get_space(block) == space_id);
2362
 
        ut_a(buf_block_get_page_no(block) == page_no);
2363
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2364
 
 
2365
 
        buf_block_buf_fix_inc(block, file, line);
2366
 
        mutex_exit(&block->mutex);
2367
 
 
2368
 
        fix_type = MTR_MEMO_PAGE_S_FIX;
2369
 
        success = rw_lock_s_lock_func_nowait(&block->lock, file, line);
2370
 
 
2371
 
        if (!success) {
2372
 
                /* Let us try to get an X-latch. If the current thread
2373
 
                is holding an X-latch on the page, we cannot get an
2374
 
                S-latch. */
2375
 
 
2376
 
                fix_type = MTR_MEMO_PAGE_X_FIX;
2377
 
                success = rw_lock_x_lock_func_nowait(&block->lock,
2378
 
                                                     file, line);
2379
 
        }
2380
 
 
2381
 
        if (!success) {
2382
 
                mutex_enter(&block->mutex);
2383
 
                buf_block_buf_fix_dec(block);
2384
 
                mutex_exit(&block->mutex);
2385
 
 
2386
 
                return(NULL);
2387
 
        }
2388
 
 
2389
 
        mtr_memo_push(mtr, block, fix_type);
2390
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2391
 
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
2392
 
        ut_a(block->page.buf_fix_count > 0);
2393
 
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2394
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2395
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2396
 
        ut_a(block->page.file_page_was_freed == FALSE);
2397
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
2398
 
#ifdef UNIV_SYNC_DEBUG
2399
 
        buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
2400
 
#endif /* UNIV_SYNC_DEBUG */
2401
 
        buf_pool->n_page_gets++;
2402
 
 
2403
 
        return(block);
2404
 
}
2405
 
 
2406
 
/************************************************************************
2407
 
Initialize some fields of a control block. */
2408
 
UNIV_INLINE
2409
 
void
2410
 
buf_page_init_low(
2411
 
/*==============*/
2412
 
        buf_page_t*     bpage)  /* in: block to init */
2413
 
{
2414
 
        bpage->flush_type = BUF_FLUSH_LRU;
2415
 
        bpage->accessed = FALSE;
2416
 
        bpage->io_fix = BUF_IO_NONE;
2417
 
        bpage->buf_fix_count = 0;
2418
 
        bpage->freed_page_clock = 0;
2419
 
        bpage->newest_modification = 0;
2420
 
        bpage->oldest_modification = 0;
2421
 
        HASH_INVALIDATE(bpage, hash);
2422
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2423
 
        bpage->file_page_was_freed = FALSE;
2424
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
2425
 
}
2426
 
 
2427
 
#ifdef UNIV_HOTBACKUP
2428
1584
/************************************************************************
2429
1585
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
2430
 
UNIV_INTERN
 
1586
 
2431
1587
void
2432
1588
buf_page_init_for_backup_restore(
2433
1589
/*=============================*/
2434
1590
        ulint           space,  /* in: space id */
2435
1591
        ulint           offset, /* in: offset of the page within space
2436
1592
                                in units of a page */
2437
 
        ulint           zip_size,/* in: compressed page size in bytes
2438
 
                                or 0 for uncompressed pages */
2439
1593
        buf_block_t*    block)  /* in: block to init */
2440
1594
{
2441
 
        buf_block_init_low(block);
 
1595
        /* Set the state of the block */
 
1596
        block->magic_n          = BUF_BLOCK_MAGIC_N;
 
1597
 
 
1598
        block->state            = BUF_BLOCK_FILE_PAGE;
 
1599
        block->space            = space;
 
1600
        block->offset           = offset;
2442
1601
 
2443
1602
        block->lock_hash_val    = 0;
2444
1603
 
2445
 
        buf_page_init_low(&block->page);
2446
 
        block->page.state       = BUF_BLOCK_FILE_PAGE;
2447
 
        block->page.space       = space;
2448
 
        block->page.offset      = offset;
2449
 
 
2450
 
        page_zip_des_init(&block->page.zip);
2451
 
 
2452
 
        /* We assume that block->page.data has been allocated
2453
 
        with zip_size == UNIV_PAGE_SIZE. */
2454
 
        ut_ad(zip_size <= UNIV_PAGE_SIZE);
2455
 
        ut_ad(ut_is_2pow(zip_size));
2456
 
        page_zip_set_size(&block->page.zip, zip_size);
 
1604
        block->freed_page_clock = 0;
 
1605
 
 
1606
        block->newest_modification = ut_dulint_zero;
 
1607
        block->oldest_modification = ut_dulint_zero;
 
1608
 
 
1609
        block->accessed         = FALSE;
 
1610
        block->buf_fix_count    = 0;
 
1611
        block->io_fix           = 0;
 
1612
 
 
1613
        block->n_hash_helps     = 0;
 
1614
        block->is_hashed        = FALSE;
 
1615
        block->n_fields         = 1;
 
1616
        block->n_bytes          = 0;
 
1617
        block->left_side        = TRUE;
 
1618
 
 
1619
        block->file_page_was_freed = FALSE;
2457
1620
}
2458
 
#endif /* UNIV_HOTBACKUP */
2459
1621
 
2460
1622
/************************************************************************
2461
1623
Inits a page to the buffer buf_pool. */
2468
1630
                                in units of a page */
2469
1631
        buf_block_t*    block)  /* in: block to init */
2470
1632
{
2471
 
        buf_page_t*     hash_page;
2472
1633
 
2473
 
        ut_ad(buf_pool_mutex_own());
 
1634
        ut_ad(mutex_own(&(buf_pool->mutex)));
2474
1635
        ut_ad(mutex_own(&(block->mutex)));
2475
 
        ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
 
1636
        ut_a(block->state != BUF_BLOCK_FILE_PAGE);
2476
1637
 
2477
1638
        /* Set the state of the block */
2478
 
        buf_block_set_file_page(block, space, offset);
 
1639
        block->magic_n          = BUF_BLOCK_MAGIC_N;
 
1640
 
 
1641
        block->state            = BUF_BLOCK_FILE_PAGE;
 
1642
        block->space            = space;
 
1643
        block->offset           = offset;
 
1644
 
 
1645
        block->check_index_page_at_flush = FALSE;
 
1646
        block->index            = NULL;
 
1647
 
 
1648
        block->lock_hash_val    = lock_rec_hash(space, offset);
2479
1649
 
2480
1650
#ifdef UNIV_DEBUG_VALGRIND
2481
1651
        if (!space) {
2486
1656
        }
2487
1657
#endif /* UNIV_DEBUG_VALGRIND */
2488
1658
 
2489
 
        buf_block_init_low(block);
2490
 
 
2491
 
        block->lock_hash_val    = lock_rec_hash(space, offset);
2492
 
 
2493
1659
        /* Insert into the hash table of file pages */
2494
1660
 
2495
 
        hash_page = buf_page_hash_get(space, offset);
2496
 
 
2497
 
        if (UNIV_LIKELY_NULL(hash_page)) {
 
1661
        if (buf_page_hash_get(space, offset)) {
2498
1662
                fprintf(stderr,
2499
1663
                        "InnoDB: Error: page %lu %lu already found"
2500
 
                        " in the hash table: %p, %p\n",
 
1664
                        " in the hash table\n",
2501
1665
                        (ulong) space,
2502
 
                        (ulong) offset,
2503
 
                        (const void*) hash_page, (const void*) block);
2504
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2505
 
                mutex_exit(&block->mutex);
2506
 
                buf_pool_mutex_exit();
 
1666
                        (ulong) offset);
 
1667
#ifdef UNIV_DEBUG
2507
1668
                buf_print();
2508
1669
                buf_LRU_print();
2509
1670
                buf_validate();
2510
1671
                buf_LRU_validate();
2511
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2512
 
                ut_error;
 
1672
#endif /* UNIV_DEBUG */
 
1673
                ut_a(0);
2513
1674
        }
2514
1675
 
2515
 
        buf_page_init_low(&block->page);
2516
 
 
2517
 
        ut_ad(!block->page.in_zip_hash);
2518
 
        ut_ad(!block->page.in_page_hash);
2519
 
        ut_d(block->page.in_page_hash = TRUE);
2520
 
        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
2521
 
                    buf_page_address_fold(space, offset), &block->page);
 
1676
        HASH_INSERT(buf_block_t, hash, buf_pool->page_hash,
 
1677
                    buf_page_address_fold(space, offset), block);
 
1678
 
 
1679
        block->freed_page_clock = 0;
 
1680
 
 
1681
        block->newest_modification = ut_dulint_zero;
 
1682
        block->oldest_modification = ut_dulint_zero;
 
1683
 
 
1684
        block->accessed         = FALSE;
 
1685
        block->buf_fix_count    = 0;
 
1686
        block->io_fix           = 0;
 
1687
 
 
1688
        block->n_hash_helps     = 0;
 
1689
        block->is_hashed        = FALSE;
 
1690
        block->n_fields         = 1;
 
1691
        block->n_bytes          = 0;
 
1692
        block->left_side        = TRUE;
 
1693
 
 
1694
        block->file_page_was_freed = FALSE;
2522
1695
}
2523
1696
 
2524
1697
/************************************************************************
2529
1702
then this function does nothing.
2530
1703
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
2531
1704
on the buffer frame. The io-handler must take care that the flag is cleared
2532
 
and the lock released later. */
2533
 
UNIV_INTERN
2534
 
buf_page_t*
 
1705
and the lock released later. This is one of the functions which perform the
 
1706
state transition NOT_USED => FILE_PAGE to a block (the other is
 
1707
buf_page_create). */
 
1708
 
 
1709
buf_block_t*
2535
1710
buf_page_init_for_read(
2536
1711
/*===================*/
2537
1712
                                /* out: pointer to the block or NULL */
2538
1713
        ulint*          err,    /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
2539
1714
        ulint           mode,   /* in: BUF_READ_IBUF_PAGES_ONLY, ... */
2540
1715
        ulint           space,  /* in: space id */
2541
 
        ulint           zip_size,/* in: compressed page size, or 0 */
2542
 
        ibool           unzip,  /* in: TRUE=request uncompressed page */
2543
 
        ib_int64_t      tablespace_version,/* in: prevents reading from a wrong
 
1716
        ib_longlong     tablespace_version,/* in: prevents reading from a wrong
2544
1717
                                version of the tablespace in case we have done
2545
1718
                                DISCARD + IMPORT */
2546
1719
        ulint           offset) /* in: page number */
2547
1720
{
2548
1721
        buf_block_t*    block;
2549
 
        buf_page_t*     bpage;
2550
1722
        mtr_t           mtr;
2551
 
        ibool           lru     = FALSE;
2552
 
        void*           data;
2553
1723
 
2554
1724
        ut_ad(buf_pool);
2555
1725
 
2558
1728
        if (mode == BUF_READ_IBUF_PAGES_ONLY) {
2559
1729
                /* It is a read-ahead within an ibuf routine */
2560
1730
 
2561
 
                ut_ad(!ibuf_bitmap_page(zip_size, offset));
 
1731
                ut_ad(!ibuf_bitmap_page(offset));
2562
1732
                ut_ad(ibuf_inside());
2563
1733
 
2564
1734
                mtr_start(&mtr);
2565
1735
 
2566
 
                if (!ibuf_page_low(space, zip_size, offset, &mtr)) {
 
1736
                if (!ibuf_page_low(space, offset, &mtr)) {
2567
1737
 
2568
1738
                        mtr_commit(&mtr);
2569
1739
 
2573
1743
                ut_ad(mode == BUF_READ_ANY_PAGE);
2574
1744
        }
2575
1745
 
2576
 
        if (zip_size && UNIV_LIKELY(!unzip)
2577
 
            && UNIV_LIKELY(!recv_recovery_is_on())) {
2578
 
                block = NULL;
2579
 
        } else {
2580
 
                block = buf_LRU_get_free_block(0);
2581
 
                ut_ad(block);
2582
 
        }
2583
 
 
2584
 
        buf_pool_mutex_enter();
2585
 
 
2586
 
        if (buf_page_hash_get(space, offset)) {
2587
 
                /* The page is already in the buffer pool. */
2588
 
err_exit:
2589
 
                if (block) {
2590
 
                        mutex_enter(&block->mutex);
2591
 
                        buf_LRU_block_free_non_file_page(block);
2592
 
                        mutex_exit(&block->mutex);
2593
 
                }
2594
 
 
2595
 
err_exit2:
2596
 
                buf_pool_mutex_exit();
2597
 
 
2598
 
                if (mode == BUF_READ_IBUF_PAGES_ONLY) {
2599
 
 
2600
 
                        mtr_commit(&mtr);
2601
 
                }
2602
 
 
2603
 
                return(NULL);
2604
 
        }
 
1746
        block = buf_block_alloc();
 
1747
 
 
1748
        ut_a(block);
 
1749
 
 
1750
        mutex_enter(&(buf_pool->mutex));
 
1751
        mutex_enter(&block->mutex);
2605
1752
 
2606
1753
        if (fil_tablespace_deleted_or_being_deleted_in_mem(
2607
1754
                    space, tablespace_version)) {
 
1755
                *err = DB_TABLESPACE_DELETED;
 
1756
        }
 
1757
 
 
1758
        if (*err == DB_TABLESPACE_DELETED
 
1759
            || NULL != buf_page_hash_get(space, offset)) {
 
1760
 
2608
1761
                /* The page belongs to a space which has been
2609
 
                deleted or is being deleted. */
2610
 
                *err = DB_TABLESPACE_DELETED;
2611
 
 
2612
 
                goto err_exit;
2613
 
        }
2614
 
 
2615
 
        if (block) {
2616
 
                bpage = &block->page;
2617
 
                mutex_enter(&block->mutex);
2618
 
                buf_page_init(space, offset, block);
2619
 
 
2620
 
                /* The block must be put to the LRU list, to the old blocks */
2621
 
                buf_LRU_add_block(bpage, TRUE/* to old blocks */);
2622
 
 
2623
 
                /* We set a pass-type x-lock on the frame because then
2624
 
                the same thread which called for the read operation
2625
 
                (and is running now at this point of code) can wait
2626
 
                for the read to complete by waiting for the x-lock on
2627
 
                the frame; if the x-lock were recursive, the same
2628
 
                thread would illegally get the x-lock before the page
2629
 
                read is completed.  The x-lock is cleared by the
2630
 
                io-handler thread. */
2631
 
 
2632
 
                rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
2633
 
                buf_page_set_io_fix(bpage, BUF_IO_READ);
2634
 
 
2635
 
                if (UNIV_UNLIKELY(zip_size)) {
2636
 
                        page_zip_set_size(&block->page.zip, zip_size);
2637
 
 
2638
 
                        /* buf_pool_mutex may be released and
2639
 
                        reacquired by buf_buddy_alloc().  Thus, we
2640
 
                        must release block->mutex in order not to
2641
 
                        break the latching order in the reacquisition
2642
 
                        of buf_pool_mutex.  We also must defer this
2643
 
                        operation until after the block descriptor has
2644
 
                        been added to buf_pool->LRU and
2645
 
                        buf_pool->page_hash. */
2646
 
                        mutex_exit(&block->mutex);
2647
 
                        data = buf_buddy_alloc(zip_size, &lru);
2648
 
                        mutex_enter(&block->mutex);
2649
 
                        block->page.zip.data = data;
2650
 
 
2651
 
                        /* To maintain the invariant
2652
 
                        block->in_unzip_LRU_list
2653
 
                        == buf_page_belongs_to_unzip_LRU(&block->page)
2654
 
                        we have to add this block to unzip_LRU
2655
 
                        after block->page.zip.data is set. */
2656
 
                        ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
2657
 
                        buf_unzip_LRU_add_block(block, TRUE);
2658
 
                }
 
1762
                deleted or is being deleted, or the page is
 
1763
                already in buf_pool, return */
2659
1764
 
2660
1765
                mutex_exit(&block->mutex);
2661
 
        } else {
2662
 
                /* Defer buf_buddy_alloc() until after the block has
2663
 
                been found not to exist.  The buf_buddy_alloc() and
2664
 
                buf_buddy_free() calls may be expensive because of
2665
 
                buf_buddy_relocate(). */
2666
 
 
2667
 
                /* The compressed page must be allocated before the
2668
 
                control block (bpage), in order to avoid the
2669
 
                invocation of buf_buddy_relocate_block() on
2670
 
                uninitialized data. */
2671
 
                data = buf_buddy_alloc(zip_size, &lru);
2672
 
                bpage = buf_buddy_alloc(sizeof *bpage, &lru);
2673
 
 
2674
 
                /* If buf_buddy_alloc() allocated storage from the LRU list,
2675
 
                it released and reacquired buf_pool_mutex.  Thus, we must
2676
 
                check the page_hash again, as it may have been modified. */
2677
 
                if (UNIV_UNLIKELY(lru)
2678
 
                    && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
2679
 
 
2680
 
                        /* The block was added by some other thread. */
2681
 
                        buf_buddy_free(bpage, sizeof *bpage);
2682
 
                        buf_buddy_free(data, zip_size);
2683
 
                        goto err_exit2;
 
1766
                mutex_exit(&(buf_pool->mutex));
 
1767
 
 
1768
                buf_block_free(block);
 
1769
 
 
1770
                if (mode == BUF_READ_IBUF_PAGES_ONLY) {
 
1771
 
 
1772
                        mtr_commit(&mtr);
2684
1773
                }
2685
1774
 
2686
 
                page_zip_des_init(&bpage->zip);
2687
 
                page_zip_set_size(&bpage->zip, zip_size);
2688
 
                bpage->zip.data = data;
2689
 
 
2690
 
                mutex_enter(&buf_pool_zip_mutex);
2691
 
                UNIV_MEM_DESC(bpage->zip.data,
2692
 
                              page_zip_get_size(&bpage->zip), bpage);
2693
 
                buf_page_init_low(bpage);
2694
 
                bpage->state    = BUF_BLOCK_ZIP_PAGE;
2695
 
                bpage->space    = space;
2696
 
                bpage->offset   = offset;
2697
 
 
2698
 
#ifdef UNIV_DEBUG
2699
 
                bpage->in_page_hash = FALSE;
2700
 
                bpage->in_zip_hash = FALSE;
2701
 
                bpage->in_flush_list = FALSE;
2702
 
                bpage->in_free_list = FALSE;
2703
 
                bpage->in_LRU_list = FALSE;
2704
 
#endif /* UNIV_DEBUG */
2705
 
 
2706
 
                ut_d(bpage->in_page_hash = TRUE);
2707
 
                HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
2708
 
                            buf_page_address_fold(space, offset), bpage);
2709
 
 
2710
 
                /* The block must be put to the LRU list, to the old blocks */
2711
 
                buf_LRU_add_block(bpage, TRUE/* to old blocks */);
2712
 
                buf_LRU_insert_zip_clean(bpage);
2713
 
 
2714
 
                buf_page_set_io_fix(bpage, BUF_IO_READ);
2715
 
 
2716
 
                mutex_exit(&buf_pool_zip_mutex);
 
1775
                return(NULL);
2717
1776
        }
2718
1777
 
 
1778
        ut_ad(block);
 
1779
 
 
1780
        buf_page_init(space, offset, block);
 
1781
 
 
1782
        /* The block must be put to the LRU list, to the old blocks */
 
1783
 
 
1784
        buf_LRU_add_block(block, TRUE);         /* TRUE == to old blocks */
 
1785
 
 
1786
        block->io_fix = BUF_IO_READ;
 
1787
 
2719
1788
        buf_pool->n_pend_reads++;
2720
 
        buf_pool_mutex_exit();
 
1789
 
 
1790
        /* We set a pass-type x-lock on the frame because then the same
 
1791
        thread which called for the read operation (and is running now at
 
1792
        this point of code) can wait for the read to complete by waiting
 
1793
        for the x-lock on the frame; if the x-lock were recursive, the
 
1794
        same thread would illegally get the x-lock before the page read
 
1795
        is completed. The x-lock is cleared by the io-handler thread. */
 
1796
 
 
1797
        rw_lock_x_lock_gen(&(block->lock), BUF_IO_READ);
 
1798
 
 
1799
        mutex_exit(&block->mutex);
 
1800
        mutex_exit(&(buf_pool->mutex));
2721
1801
 
2722
1802
        if (mode == BUF_READ_IBUF_PAGES_ONLY) {
2723
1803
 
2724
1804
                mtr_commit(&mtr);
2725
1805
        }
2726
1806
 
2727
 
        ut_ad(buf_page_in_file(bpage));
2728
 
        return(bpage);
 
1807
        return(block);
2729
1808
}
2730
1809
 
2731
1810
/************************************************************************
2732
1811
Initializes a page to the buffer buf_pool. The page is usually not read
2733
1812
from a file even if it cannot be found in the buffer buf_pool. This is one
2734
1813
of the functions which perform to a block a state transition NOT_USED =>
2735
 
FILE_PAGE (the other is buf_page_get_gen). */
2736
 
UNIV_INTERN
2737
 
buf_block_t*
 
1814
FILE_PAGE (the other is buf_page_init_for_read above). */
 
1815
 
 
1816
buf_frame_t*
2738
1817
buf_page_create(
2739
1818
/*============*/
2740
 
                        /* out: pointer to the block, page bufferfixed */
 
1819
                        /* out: pointer to the frame, page bufferfixed */
2741
1820
        ulint   space,  /* in: space id */
2742
1821
        ulint   offset, /* in: offset of the page within space in units of
2743
1822
                        a page */
2744
 
        ulint   zip_size,/* in: compressed page size, or 0 */
2745
1823
        mtr_t*  mtr)    /* in: mini-transaction handle */
2746
1824
{
2747
1825
        buf_frame_t*    frame;
2749
1827
        buf_block_t*    free_block      = NULL;
2750
1828
 
2751
1829
        ut_ad(mtr);
2752
 
        ut_ad(space || !zip_size);
2753
 
 
2754
 
        free_block = buf_LRU_get_free_block(0);
2755
 
 
2756
 
        buf_pool_mutex_enter();
2757
 
 
2758
 
        block = (buf_block_t*) buf_page_hash_get(space, offset);
2759
 
 
2760
 
        if (block && buf_page_in_file(&block->page)) {
2761
 
#ifdef UNIV_IBUF_COUNT_DEBUG
2762
 
                ut_a(ibuf_count_get(space, offset) == 0);
 
1830
 
 
1831
        free_block = buf_LRU_get_free_block();
 
1832
 
 
1833
        mutex_enter(&(buf_pool->mutex));
 
1834
 
 
1835
        block = buf_page_hash_get(space, offset);
 
1836
 
 
1837
        if (block != NULL) {
 
1838
#ifdef UNIV_IBUF_DEBUG
 
1839
                ut_a(ibuf_count_get(block->space, block->offset) == 0);
2763
1840
#endif
2764
 
#ifdef UNIV_DEBUG_FILE_ACCESSES
2765
 
                block->page.file_page_was_freed = FALSE;
2766
 
#endif /* UNIV_DEBUG_FILE_ACCESSES */
 
1841
                block->file_page_was_freed = FALSE;
2767
1842
 
2768
1843
                /* Page can be found in buf_pool */
2769
 
                buf_pool_mutex_exit();
 
1844
                mutex_exit(&(buf_pool->mutex));
2770
1845
 
2771
1846
                buf_block_free(free_block);
2772
1847
 
2773
 
                return(buf_page_get_with_no_latch(space, zip_size,
2774
 
                                                  offset, mtr));
 
1848
                frame = buf_page_get_with_no_latch(space, offset, mtr);
 
1849
 
 
1850
                return(frame);
2775
1851
        }
2776
1852
 
2777
1853
        /* If we get here, the page was not in buf_pool: init it there */
2790
1866
        buf_page_init(space, offset, block);
2791
1867
 
2792
1868
        /* The block must be put to the LRU list */
2793
 
        buf_LRU_add_block(&block->page, FALSE);
 
1869
        buf_LRU_add_block(block, FALSE);
2794
1870
 
2795
 
        buf_block_buf_fix_inc(block, __FILE__, __LINE__);
 
1871
#ifdef UNIV_SYNC_DEBUG
 
1872
        buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__);
 
1873
#else
 
1874
        buf_block_buf_fix_inc(block);
 
1875
#endif
2796
1876
        buf_pool->n_pages_created++;
2797
1877
 
2798
 
        if (zip_size) {
2799
 
                void*   data;
2800
 
                ibool   lru;
2801
 
 
2802
 
                /* Prevent race conditions during buf_buddy_alloc(),
2803
 
                which may release and reacquire buf_pool_mutex,
2804
 
                by IO-fixing and X-latching the block. */
2805
 
 
2806
 
                buf_page_set_io_fix(&block->page, BUF_IO_READ);
2807
 
                rw_lock_x_lock(&block->lock);
2808
 
 
2809
 
                page_zip_set_size(&block->page.zip, zip_size);
2810
 
                mutex_exit(&block->mutex);
2811
 
                /* buf_pool_mutex may be released and reacquired by
2812
 
                buf_buddy_alloc().  Thus, we must release block->mutex
2813
 
                in order not to break the latching order in
2814
 
                the reacquisition of buf_pool_mutex.  We also must
2815
 
                defer this operation until after the block descriptor
2816
 
                has been added to buf_pool->LRU and buf_pool->page_hash. */
2817
 
                data = buf_buddy_alloc(zip_size, &lru);
2818
 
                mutex_enter(&block->mutex);
2819
 
                block->page.zip.data = data;
2820
 
 
2821
 
                /* To maintain the invariant
2822
 
                block->in_unzip_LRU_list
2823
 
                == buf_page_belongs_to_unzip_LRU(&block->page)
2824
 
                we have to add this block to unzip_LRU after
2825
 
                block->page.zip.data is set. */
2826
 
                ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
2827
 
                buf_unzip_LRU_add_block(block, FALSE);
2828
 
 
2829
 
                buf_page_set_io_fix(&block->page, BUF_IO_NONE);
2830
 
                rw_lock_x_unlock(&block->lock);
2831
 
        }
2832
 
 
2833
 
        buf_pool_mutex_exit();
 
1878
        mutex_exit(&(buf_pool->mutex));
2834
1879
 
2835
1880
        mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
2836
1881
 
2837
 
        buf_page_set_accessed(&block->page, TRUE);
 
1882
        block->accessed = TRUE;
2838
1883
 
2839
1884
        mutex_exit(&block->mutex);
2840
1885
 
2841
1886
        /* Delete possible entries for the page from the insert buffer:
2842
1887
        such can exist if the page belonged to an index which was dropped */
2843
1888
 
2844
 
        ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
 
1889
        ibuf_merge_or_delete_for_page(NULL, space, offset, TRUE);
2845
1890
 
2846
1891
        /* Flush pages from the end of the LRU list if necessary */
2847
1892
        buf_flush_free_margin();
2860
1905
 
2861
1906
        memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
2862
1907
 
2863
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2864
 
        ut_a(++buf_dbg_counter % 357 || buf_validate());
2865
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2866
 
#ifdef UNIV_IBUF_COUNT_DEBUG
2867
 
        ut_a(ibuf_count_get(buf_block_get_space(block),
2868
 
                            buf_block_get_page_no(block)) == 0);
2869
 
#endif
2870
 
        return(block);
 
1908
#ifdef UNIV_DEBUG
 
1909
        buf_dbg_counter++;
 
1910
 
 
1911
        if (buf_dbg_counter % 357 == 0) {
 
1912
                ut_ad(buf_validate());
 
1913
        }
 
1914
#endif
 
1915
#ifdef UNIV_IBUF_DEBUG
 
1916
        ut_a(ibuf_count_get(block->space, block->offset) == 0);
 
1917
#endif
 
1918
        return(frame);
2871
1919
}
2872
1920
 
2873
1921
/************************************************************************
2874
1922
Completes an asynchronous read or write request of a file page to or from
2875
1923
the buffer pool. */
2876
 
UNIV_INTERN
 
1924
 
2877
1925
void
2878
1926
buf_page_io_complete(
2879
1927
/*=================*/
2880
 
        buf_page_t*     bpage)  /* in: pointer to the block in question */
 
1928
        buf_block_t*    block)  /* in: pointer to the block in question */
2881
1929
{
2882
 
        enum buf_io_fix io_type;
2883
 
        const ibool     uncompressed = (buf_page_get_state(bpage)
2884
 
                                        == BUF_BLOCK_FILE_PAGE);
2885
 
 
2886
 
        ut_a(buf_page_in_file(bpage));
2887
 
 
2888
 
        /* We do not need protect io_fix here by mutex to read
 
1930
        ulint           io_type;
 
1931
 
 
1932
        ut_ad(block);
 
1933
 
 
1934
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
1935
 
 
1936
        /* We do not need protect block->io_fix here by block->mutex to read
2889
1937
        it because this is the only function where we can change the value
2890
1938
        from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
2891
1939
        ensures that this is the only thread that handles the i/o for this
2892
1940
        block. */
2893
1941
 
2894
 
        io_type = buf_page_get_io_fix(bpage);
2895
 
        ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
 
1942
        io_type = block->io_fix;
2896
1943
 
2897
1944
        if (io_type == BUF_IO_READ) {
2898
 
                ulint   read_page_no;
2899
 
                ulint   read_space_id;
2900
 
                byte*   frame;
2901
 
 
2902
 
                if (buf_page_get_zip_size(bpage)) {
2903
 
                        frame = bpage->zip.data;
2904
 
                        buf_pool->n_pend_unzip++;
2905
 
                        if (uncompressed
2906
 
                            && !buf_zip_decompress((buf_block_t*) bpage,
2907
 
                                                   FALSE)) {
2908
 
 
2909
 
                                buf_pool->n_pend_unzip--;
2910
 
                                goto corrupt;
2911
 
                        }
2912
 
                        buf_pool->n_pend_unzip--;
2913
 
                } else {
2914
 
                        ut_a(uncompressed);
2915
 
                        frame = ((buf_block_t*) bpage)->frame;
2916
 
                }
2917
 
 
2918
1945
                /* If this page is not uninitialized and not in the
2919
1946
                doublewrite buffer, then the page number and space id
2920
1947
                should be the same as in block. */
2921
 
                read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
2922
 
                read_space_id = mach_read_from_4(
2923
 
                        frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
1948
                ulint   read_page_no = mach_read_from_4(
 
1949
                        block->frame + FIL_PAGE_OFFSET);
 
1950
                ulint   read_space_id = mach_read_from_4(
 
1951
                        block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
2924
1952
 
2925
 
                if (bpage->space == TRX_SYS_SPACE
2926
 
                    && trx_doublewrite_page_inside(bpage->offset)) {
 
1953
                if (!block->space
 
1954
                    && trx_doublewrite_page_inside(block->offset)) {
2927
1955
 
2928
1956
                        ut_print_timestamp(stderr);
2929
1957
                        fprintf(stderr,
2930
1958
                                "  InnoDB: Error: reading page %lu\n"
2931
1959
                                "InnoDB: which is in the"
2932
1960
                                " doublewrite buffer!\n",
2933
 
                                (ulong) bpage->offset);
 
1961
                                (ulong) block->offset);
2934
1962
                } else if (!read_space_id && !read_page_no) {
2935
1963
                        /* This is likely an uninitialized page. */
2936
 
                } else if ((bpage->space
2937
 
                            && bpage->space != read_space_id)
2938
 
                           || bpage->offset != read_page_no) {
 
1964
                } else if ((block->space && block->space != read_space_id)
 
1965
                           || block->offset != read_page_no) {
2939
1966
                        /* We did not compare space_id to read_space_id
2940
 
                        if bpage->space == 0, because the field on the
 
1967
                        if block->space == 0, because the field on the
2941
1968
                        page may contain garbage in MySQL < 4.1.1,
2942
 
                        which only supported bpage->space == 0. */
 
1969
                        which only supported block->space == 0. */
2943
1970
 
2944
1971
                        ut_print_timestamp(stderr);
2945
1972
                        fprintf(stderr,
2948
1975
                                "InnoDB: read in are %lu:%lu,"
2949
1976
                                " should be %lu:%lu!\n",
2950
1977
                                (ulong) read_space_id, (ulong) read_page_no,
2951
 
                                (ulong) bpage->space,
2952
 
                                (ulong) bpage->offset);
 
1978
                                (ulong) block->space, (ulong) block->offset);
2953
1979
                }
2954
 
 
2955
1980
                /* From version 3.23.38 up we store the page checksum
2956
1981
                to the 4 first bytes of the page end lsn field */
2957
1982
 
2958
 
                if (buf_page_is_corrupted(frame,
2959
 
                                          buf_page_get_zip_size(bpage))) {
2960
 
corrupt:
2961
 
                        fprintf(stderr,
2962
 
                                "InnoDB: Database page corruption on disk"
2963
 
                                " or a failed\n"
2964
 
                                "InnoDB: file read of page %lu.\n"
2965
 
                                "InnoDB: You may have to recover"
2966
 
                                " from a backup.\n",
2967
 
                                (ulong) bpage->offset);
2968
 
                        buf_page_print(frame, buf_page_get_zip_size(bpage));
2969
 
                        fprintf(stderr,
2970
 
                                "InnoDB: Database page corruption on disk"
2971
 
                                " or a failed\n"
2972
 
                                "InnoDB: file read of page %lu.\n"
2973
 
                                "InnoDB: You may have to recover"
2974
 
                                " from a backup.\n",
2975
 
                                (ulong) bpage->offset);
 
1983
                if (buf_page_is_corrupted(block->frame)) {
 
1984
                        fprintf(stderr,
 
1985
                                "InnoDB: Database page corruption on disk"
 
1986
                                " or a failed\n"
 
1987
                                "InnoDB: file read of page %lu.\n",
 
1988
                                (ulong) block->offset);
 
1989
 
 
1990
                        fputs("InnoDB: You may have to recover"
 
1991
                              " from a backup.\n", stderr);
 
1992
 
 
1993
                        buf_page_print(block->frame);
 
1994
 
 
1995
                        fprintf(stderr,
 
1996
                                "InnoDB: Database page corruption on disk"
 
1997
                                " or a failed\n"
 
1998
                                "InnoDB: file read of page %lu.\n",
 
1999
                                (ulong) block->offset);
 
2000
                        fputs("InnoDB: You may have to recover"
 
2001
                              " from a backup.\n", stderr);
2976
2002
                        fputs("InnoDB: It is also possible that"
2977
2003
                              " your operating\n"
2978
2004
                              "InnoDB: system has corrupted its"
3003
2029
                }
3004
2030
 
3005
2031
                if (recv_recovery_is_on()) {
3006
 
                        /* Pages must be uncompressed for crash recovery. */
3007
 
                        ut_a(uncompressed);
3008
 
                        recv_recover_page(FALSE, TRUE, (buf_block_t*) bpage);
 
2032
                        recv_recover_page(FALSE, TRUE, block->frame,
 
2033
                                          block->space, block->offset);
3009
2034
                }
3010
2035
 
3011
 
                if (uncompressed && !recv_no_ibuf_operations) {
 
2036
                if (!recv_no_ibuf_operations) {
3012
2037
                        ibuf_merge_or_delete_for_page(
3013
 
                                (buf_block_t*) bpage, bpage->space,
3014
 
                                bpage->offset, buf_page_get_zip_size(bpage),
 
2038
                                block->frame, block->space, block->offset,
3015
2039
                                TRUE);
3016
2040
                }
3017
2041
        }
3018
2042
 
3019
 
        buf_pool_mutex_enter();
3020
 
        mutex_enter(buf_page_get_mutex(bpage));
 
2043
        mutex_enter(&(buf_pool->mutex));
 
2044
        mutex_enter(&block->mutex);
3021
2045
 
3022
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3023
 
        if (io_type == BUF_IO_WRITE || uncompressed) {
3024
 
                /* For BUF_IO_READ of compressed-only blocks, the
3025
 
                buffered operations will be merged by buf_page_get_gen()
3026
 
                after the block has been uncompressed. */
3027
 
                ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
3028
 
        }
 
2046
#ifdef UNIV_IBUF_DEBUG
 
2047
        ut_a(ibuf_count_get(block->space, block->offset) == 0);
3029
2048
#endif
3030
2049
        /* Because this thread which does the unlocking is not the same that
3031
2050
        did the locking, we use a pass value != 0 in unlock, which simply
3032
2051
        removes the newest lock debug record, without checking the thread
3033
2052
        id. */
3034
2053
 
3035
 
        buf_page_set_io_fix(bpage, BUF_IO_NONE);
 
2054
        block->io_fix = 0;
3036
2055
 
3037
 
        switch (io_type) {
3038
 
        case BUF_IO_READ:
 
2056
        if (io_type == BUF_IO_READ) {
3039
2057
                /* NOTE that the call to ibuf may have moved the ownership of
3040
2058
                the x-latch to this OS thread: do not let this confuse you in
3041
2059
                debugging! */
3044
2062
                buf_pool->n_pend_reads--;
3045
2063
                buf_pool->n_pages_read++;
3046
2064
 
3047
 
                if (uncompressed) {
3048
 
                        rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
3049
 
                                             BUF_IO_READ);
 
2065
                rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ);
 
2066
 
 
2067
#ifdef UNIV_DEBUG
 
2068
                if (buf_debug_prints) {
 
2069
                        fputs("Has read ", stderr);
3050
2070
                }
3051
 
 
3052
 
                break;
3053
 
 
3054
 
        case BUF_IO_WRITE:
 
2071
#endif /* UNIV_DEBUG */
 
2072
        } else {
 
2073
                ut_ad(io_type == BUF_IO_WRITE);
 
2074
 
3055
2075
                /* Write means a flush operation: call the completion
3056
2076
                routine in the flush system */
3057
2077
 
3058
 
                buf_flush_write_complete(bpage);
 
2078
                buf_flush_write_complete(block);
3059
2079
 
3060
 
                if (uncompressed) {
3061
 
                        rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
3062
 
                                             BUF_IO_WRITE);
3063
 
                }
 
2080
                rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE);
3064
2081
 
3065
2082
                buf_pool->n_pages_written++;
3066
2083
 
3067
 
                break;
3068
 
 
3069
 
        default:
3070
 
                ut_error;
 
2084
#ifdef UNIV_DEBUG
 
2085
                if (buf_debug_prints) {
 
2086
                        fputs("Has written ", stderr);
 
2087
                }
 
2088
#endif /* UNIV_DEBUG */
3071
2089
        }
3072
2090
 
3073
 
        mutex_exit(buf_page_get_mutex(bpage));
3074
 
        buf_pool_mutex_exit();
 
2091
        mutex_exit(&block->mutex);
 
2092
        mutex_exit(&(buf_pool->mutex));
3075
2093
 
3076
2094
#ifdef UNIV_DEBUG
3077
2095
        if (buf_debug_prints) {
3078
 
                fprintf(stderr, "Has %s page space %lu page no %lu\n",
3079
 
                        io_type == BUF_IO_READ ? "read" : "written",
3080
 
                        (ulong) buf_page_get_space(bpage),
3081
 
                        (ulong) buf_page_get_page_no(bpage));
 
2096
                fprintf(stderr, "page space %lu page no %lu\n",
 
2097
                        (ulong) block->space, (ulong) block->offset);
3082
2098
        }
3083
2099
#endif /* UNIV_DEBUG */
3084
2100
}
3087
2103
Invalidates the file pages in the buffer pool when an archive recovery is
3088
2104
completed. All the file pages buffered must be in a replaceable state when
3089
2105
this function is called: not latched and not modified. */
3090
 
UNIV_INTERN
 
2106
 
3091
2107
void
3092
2108
buf_pool_invalidate(void)
3093
2109
/*=====================*/
3102
2118
                freed = buf_LRU_search_and_free_block(100);
3103
2119
        }
3104
2120
 
3105
 
        buf_pool_mutex_enter();
 
2121
        mutex_enter(&(buf_pool->mutex));
3106
2122
 
3107
2123
        ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
3108
 
        ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
3109
2124
 
3110
 
        buf_pool_mutex_exit();
 
2125
        mutex_exit(&(buf_pool->mutex));
3111
2126
}
3112
2127
 
3113
 
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2128
#ifdef UNIV_DEBUG
3114
2129
/*************************************************************************
3115
2130
Validates the buffer buf_pool data structure. */
3116
 
UNIV_INTERN
 
2131
 
3117
2132
ibool
3118
2133
buf_validate(void)
3119
2134
/*==============*/
3120
2135
{
3121
 
        buf_page_t*     b;
3122
 
        buf_chunk_t*    chunk;
 
2136
        buf_block_t*    block;
3123
2137
        ulint           i;
3124
2138
        ulint           n_single_flush  = 0;
3125
2139
        ulint           n_lru_flush     = 0;
3127
2141
        ulint           n_lru           = 0;
3128
2142
        ulint           n_flush         = 0;
3129
2143
        ulint           n_free          = 0;
3130
 
        ulint           n_zip           = 0;
 
2144
        ulint           n_page          = 0;
3131
2145
 
3132
2146
        ut_ad(buf_pool);
3133
2147
 
3134
 
        buf_pool_mutex_enter();
3135
 
 
3136
 
        chunk = buf_pool->chunks;
3137
 
 
3138
 
        /* Check the uncompressed blocks. */
3139
 
 
3140
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
3141
 
 
3142
 
                ulint           j;
3143
 
                buf_block_t*    block = chunk->blocks;
3144
 
 
3145
 
                for (j = chunk->size; j--; block++) {
3146
 
 
3147
 
                        mutex_enter(&block->mutex);
3148
 
 
3149
 
                        switch (buf_block_get_state(block)) {
3150
 
                        case BUF_BLOCK_ZIP_FREE:
3151
 
                        case BUF_BLOCK_ZIP_PAGE:
3152
 
                        case BUF_BLOCK_ZIP_DIRTY:
3153
 
                                /* These should only occur on
3154
 
                                zip_clean, zip_free[], or flush_list. */
3155
 
                                ut_error;
3156
 
                                break;
3157
 
 
3158
 
                        case BUF_BLOCK_FILE_PAGE:
3159
 
                                ut_a(buf_page_hash_get(buf_block_get_space(
3160
 
                                                               block),
3161
 
                                                       buf_block_get_page_no(
3162
 
                                                               block))
3163
 
                                     == &block->page);
3164
 
 
3165
 
#ifdef UNIV_IBUF_COUNT_DEBUG
3166
 
                                ut_a(buf_page_get_io_fix(&block->page)
3167
 
                                     == BUF_IO_READ
3168
 
                                     || !ibuf_count_get(buf_block_get_space(
3169
 
                                                                block),
3170
 
                                                        buf_block_get_page_no(
3171
 
                                                                block)));
 
2148
        mutex_enter(&(buf_pool->mutex));
 
2149
 
 
2150
        for (i = 0; i < buf_pool->curr_size; i++) {
 
2151
 
 
2152
                block = buf_pool_get_nth_block(buf_pool, i);
 
2153
 
 
2154
                mutex_enter(&block->mutex);
 
2155
 
 
2156
                if (block->state == BUF_BLOCK_FILE_PAGE) {
 
2157
 
 
2158
                        ut_a(buf_page_hash_get(block->space,
 
2159
                                               block->offset) == block);
 
2160
                        n_page++;
 
2161
 
 
2162
#ifdef UNIV_IBUF_DEBUG
 
2163
                        ut_a((block->io_fix == BUF_IO_READ)
 
2164
                             || ibuf_count_get(block->space, block->offset)
 
2165
                             == 0);
3172
2166
#endif
3173
 
                                switch (buf_page_get_io_fix(&block->page)) {
3174
 
                                case BUF_IO_NONE:
3175
 
                                        break;
3176
 
 
3177
 
                                case BUF_IO_WRITE:
3178
 
                                        switch (buf_page_get_flush_type(
3179
 
                                                        &block->page)) {
3180
 
                                        case BUF_FLUSH_LRU:
3181
 
                                                n_lru_flush++;
3182
 
                                                ut_a(rw_lock_is_locked(
3183
 
                                                             &block->lock,
3184
 
                                                             RW_LOCK_SHARED));
3185
 
                                                break;
3186
 
                                        case BUF_FLUSH_LIST:
3187
 
                                                n_list_flush++;
3188
 
                                                break;
3189
 
                                        case BUF_FLUSH_SINGLE_PAGE:
3190
 
                                                n_single_flush++;
3191
 
                                                break;
3192
 
                                        default:
3193
 
                                                ut_error;
3194
 
                                        }
3195
 
 
3196
 
                                        break;
3197
 
 
3198
 
                                case BUF_IO_READ:
3199
 
 
3200
 
                                        ut_a(rw_lock_is_locked(&block->lock,
3201
 
                                                               RW_LOCK_EX));
3202
 
                                        break;
3203
 
                                }
3204
 
 
3205
 
                                n_lru++;
3206
 
 
3207
 
                                if (block->page.oldest_modification > 0) {
3208
 
                                        n_flush++;
3209
 
                                }
3210
 
 
3211
 
                                break;
3212
 
 
3213
 
                        case BUF_BLOCK_NOT_USED:
3214
 
                                n_free++;
3215
 
                                break;
3216
 
 
3217
 
                        case BUF_BLOCK_READY_FOR_USE:
3218
 
                        case BUF_BLOCK_MEMORY:
3219
 
                        case BUF_BLOCK_REMOVE_HASH:
3220
 
                                /* do nothing */
3221
 
                                break;
3222
 
                        }
3223
 
 
3224
 
                        mutex_exit(&block->mutex);
3225
 
                }
3226
 
        }
3227
 
 
3228
 
        mutex_enter(&buf_pool_zip_mutex);
3229
 
 
3230
 
        /* Check clean compressed-only blocks. */
3231
 
 
3232
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
3233
 
             b = UT_LIST_GET_NEXT(list, b)) {
3234
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
3235
 
                switch (buf_page_get_io_fix(b)) {
3236
 
                case BUF_IO_NONE:
3237
 
                        /* All clean blocks should be I/O-unfixed. */
3238
 
                        break;
3239
 
                case BUF_IO_READ:
3240
 
                        /* In buf_LRU_free_block(), we temporarily set
3241
 
                        b->io_fix = BUF_IO_READ for a newly allocated
3242
 
                        control block in order to prevent
3243
 
                        buf_page_get_gen() from decompressing the block. */
3244
 
                        break;
3245
 
                default:
3246
 
                        ut_error;
3247
 
                        break;
3248
 
                }
3249
 
                ut_a(!b->oldest_modification);
3250
 
                ut_a(buf_page_hash_get(b->space, b->offset) == b);
3251
 
 
3252
 
                n_lru++;
3253
 
                n_zip++;
3254
 
        }
3255
 
 
3256
 
        /* Check dirty compressed-only blocks. */
3257
 
 
3258
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
3259
 
             b = UT_LIST_GET_NEXT(list, b)) {
3260
 
                ut_ad(b->in_flush_list);
3261
 
 
3262
 
                switch (buf_page_get_state(b)) {
3263
 
                case BUF_BLOCK_ZIP_DIRTY:
3264
 
                        ut_a(b->oldest_modification);
3265
 
                        n_lru++;
3266
 
                        n_flush++;
3267
 
                        n_zip++;
3268
 
                        switch (buf_page_get_io_fix(b)) {
3269
 
                        case BUF_IO_NONE:
3270
 
                        case BUF_IO_READ:
3271
 
                                break;
3272
 
 
3273
 
                        case BUF_IO_WRITE:
3274
 
                                switch (buf_page_get_flush_type(b)) {
3275
 
                                case BUF_FLUSH_LRU:
 
2167
                        if (block->io_fix == BUF_IO_WRITE) {
 
2168
 
 
2169
                                if (block->flush_type == BUF_FLUSH_LRU) {
3276
2170
                                        n_lru_flush++;
3277
 
                                        break;
3278
 
                                case BUF_FLUSH_LIST:
 
2171
                                        ut_a(rw_lock_is_locked(
 
2172
                                                     &block->lock,
 
2173
                                                     RW_LOCK_SHARED));
 
2174
                                } else if (block->flush_type
 
2175
                                           == BUF_FLUSH_LIST) {
3279
2176
                                        n_list_flush++;
3280
 
                                        break;
3281
 
                                case BUF_FLUSH_SINGLE_PAGE:
 
2177
                                } else if (block->flush_type
 
2178
                                           == BUF_FLUSH_SINGLE_PAGE) {
3282
2179
                                        n_single_flush++;
3283
 
                                        break;
3284
 
                                default:
 
2180
                                } else {
3285
2181
                                        ut_error;
3286
2182
                                }
3287
 
                                break;
3288
 
                        }
3289
 
                        break;
3290
 
                case BUF_BLOCK_FILE_PAGE:
3291
 
                        /* uncompressed page */
3292
 
                        break;
3293
 
                case BUF_BLOCK_ZIP_FREE:
3294
 
                case BUF_BLOCK_ZIP_PAGE:
3295
 
                case BUF_BLOCK_NOT_USED:
3296
 
                case BUF_BLOCK_READY_FOR_USE:
3297
 
                case BUF_BLOCK_MEMORY:
3298
 
                case BUF_BLOCK_REMOVE_HASH:
3299
 
                        ut_error;
3300
 
                        break;
 
2183
 
 
2184
                        } else if (block->io_fix == BUF_IO_READ) {
 
2185
 
 
2186
                                ut_a(rw_lock_is_locked(&(block->lock),
 
2187
                                                       RW_LOCK_EX));
 
2188
                        }
 
2189
 
 
2190
                        n_lru++;
 
2191
 
 
2192
                        if (ut_dulint_cmp(block->oldest_modification,
 
2193
                                          ut_dulint_zero) > 0) {
 
2194
                                n_flush++;
 
2195
                        }
 
2196
 
 
2197
                } else if (block->state == BUF_BLOCK_NOT_USED) {
 
2198
                        n_free++;
3301
2199
                }
3302
 
                ut_a(buf_page_hash_get(b->space, b->offset) == b);
 
2200
 
 
2201
                mutex_exit(&block->mutex);
3303
2202
        }
3304
2203
 
3305
 
        mutex_exit(&buf_pool_zip_mutex);
3306
 
 
3307
 
        if (n_lru + n_free > buf_pool->curr_size + n_zip) {
3308
 
                fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
3309
 
                        (ulong) n_lru, (ulong) n_free,
3310
 
                        (ulong) buf_pool->curr_size, (ulong) n_zip);
 
2204
        if (n_lru + n_free > buf_pool->curr_size) {
 
2205
                fprintf(stderr, "n LRU %lu, n free %lu\n",
 
2206
                        (ulong) n_lru, (ulong) n_free);
3311
2207
                ut_error;
3312
2208
        }
3313
2209
 
3324
2220
        ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
3325
2221
        ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
3326
2222
 
3327
 
        buf_pool_mutex_exit();
 
2223
        mutex_exit(&(buf_pool->mutex));
3328
2224
 
3329
2225
        ut_a(buf_LRU_validate());
3330
2226
        ut_a(buf_flush_validate());
3331
2227
 
3332
2228
        return(TRUE);
3333
2229
}
3334
 
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3335
2230
 
3336
 
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3337
2231
/*************************************************************************
3338
2232
Prints info of the buffer buf_pool data structure. */
3339
 
UNIV_INTERN
 
2233
 
3340
2234
void
3341
2235
buf_print(void)
3342
2236
/*===========*/
3348
2242
        ulint           j;
3349
2243
        dulint          id;
3350
2244
        ulint           n_found;
3351
 
        buf_chunk_t*    chunk;
 
2245
        buf_frame_t*    frame;
3352
2246
        dict_index_t*   index;
3353
2247
 
3354
2248
        ut_ad(buf_pool);
3358
2252
        index_ids = mem_alloc(sizeof(dulint) * size);
3359
2253
        counts = mem_alloc(sizeof(ulint) * size);
3360
2254
 
3361
 
        buf_pool_mutex_enter();
 
2255
        mutex_enter(&(buf_pool->mutex));
3362
2256
 
3363
2257
        fprintf(stderr,
3364
2258
                "buf_pool size %lu\n"
3365
2259
                "database pages %lu\n"
3366
2260
                "free pages %lu\n"
3367
2261
                "modified database pages %lu\n"
3368
 
                "n pending decompressions %lu\n"
3369
2262
                "n pending reads %lu\n"
3370
2263
                "n pending flush LRU %lu list %lu single page %lu\n"
3371
2264
                "pages read %lu, created %lu, written %lu\n",
3373
2266
                (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
3374
2267
                (ulong) UT_LIST_GET_LEN(buf_pool->free),
3375
2268
                (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
3376
 
                (ulong) buf_pool->n_pend_unzip,
3377
2269
                (ulong) buf_pool->n_pend_reads,
3378
2270
                (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
3379
2271
                (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
3385
2277
 
3386
2278
        n_found = 0;
3387
2279
 
3388
 
        chunk = buf_pool->chunks;
3389
 
 
3390
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
3391
 
                buf_block_t*    block           = chunk->blocks;
3392
 
                ulint           n_blocks        = chunk->size;
3393
 
 
3394
 
                for (; n_blocks--; block++) {
3395
 
                        const buf_frame_t* frame = block->frame;
3396
 
 
3397
 
                        if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
3398
 
 
3399
 
                                id = btr_page_get_index_id(frame);
3400
 
 
3401
 
                                /* Look for the id in the index_ids array */
3402
 
                                j = 0;
3403
 
 
3404
 
                                while (j < n_found) {
3405
 
 
3406
 
                                        if (ut_dulint_cmp(index_ids[j],
3407
 
                                                          id) == 0) {
3408
 
                                                counts[j]++;
3409
 
 
3410
 
                                                break;
3411
 
                                        }
3412
 
                                        j++;
3413
 
                                }
3414
 
 
3415
 
                                if (j == n_found) {
3416
 
                                        n_found++;
3417
 
                                        index_ids[j] = id;
3418
 
                                        counts[j] = 1;
3419
 
                                }
 
2280
        for (i = 0; i < size; i++) {
 
2281
                frame = buf_pool_get_nth_block(buf_pool, i)->frame;
 
2282
 
 
2283
                if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
 
2284
 
 
2285
                        id = btr_page_get_index_id(frame);
 
2286
 
 
2287
                        /* Look for the id in the index_ids array */
 
2288
                        j = 0;
 
2289
 
 
2290
                        while (j < n_found) {
 
2291
 
 
2292
                                if (ut_dulint_cmp(index_ids[j], id) == 0) {
 
2293
                                        (counts[j])++;
 
2294
 
 
2295
                                        break;
 
2296
                                }
 
2297
                                j++;
 
2298
                        }
 
2299
 
 
2300
                        if (j == n_found) {
 
2301
                                n_found++;
 
2302
                                index_ids[j] = id;
 
2303
                                counts[j] = 1;
3420
2304
                        }
3421
2305
                }
3422
2306
        }
3423
2307
 
3424
 
        buf_pool_mutex_exit();
 
2308
        mutex_exit(&(buf_pool->mutex));
3425
2309
 
3426
2310
        for (i = 0; i < n_found; i++) {
3427
2311
                index = dict_index_get_if_in_cache(index_ids[i]);
3444
2328
 
3445
2329
        ut_a(buf_validate());
3446
2330
}
3447
 
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
 
2331
#endif /* UNIV_DEBUG */
3448
2332
 
3449
2333
/*************************************************************************
3450
2334
Returns the number of latched pages in the buffer pool. */
3451
 
UNIV_INTERN
 
2335
 
3452
2336
ulint
3453
2337
buf_get_latched_pages_number(void)
3454
 
/*==============================*/
3455
2338
{
3456
 
        buf_chunk_t*    chunk;
3457
 
        buf_page_t*     b;
 
2339
        buf_block_t*    block;
3458
2340
        ulint           i;
3459
2341
        ulint           fixed_pages_number = 0;
3460
2342
 
3461
 
        buf_pool_mutex_enter();
3462
 
 
3463
 
        chunk = buf_pool->chunks;
3464
 
 
3465
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
3466
 
                buf_block_t*    block;
3467
 
                ulint           j;
3468
 
 
3469
 
                block = chunk->blocks;
3470
 
 
3471
 
                for (j = chunk->size; j--; block++) {
3472
 
                        if (buf_block_get_state(block)
3473
 
                            != BUF_BLOCK_FILE_PAGE) {
3474
 
 
3475
 
                                continue;
3476
 
                        }
3477
 
 
 
2343
        mutex_enter(&(buf_pool->mutex));
 
2344
 
 
2345
        for (i = 0; i < buf_pool->curr_size; i++) {
 
2346
 
 
2347
                block = buf_pool_get_nth_block(buf_pool, i);
 
2348
 
 
2349
                if (block->magic_n == BUF_BLOCK_MAGIC_N) {
3478
2350
                        mutex_enter(&block->mutex);
3479
2351
 
3480
 
                        if (block->page.buf_fix_count != 0
3481
 
                            || buf_page_get_io_fix(&block->page)
3482
 
                            != BUF_IO_NONE) {
 
2352
                        if (block->buf_fix_count != 0 || block->io_fix != 0) {
3483
2353
                                fixed_pages_number++;
3484
2354
                        }
3485
2355
 
3487
2357
                }
3488
2358
        }
3489
2359
 
3490
 
        mutex_enter(&buf_pool_zip_mutex);
3491
 
 
3492
 
        /* Traverse the lists of clean and dirty compressed-only blocks. */
3493
 
 
3494
 
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
3495
 
             b = UT_LIST_GET_NEXT(list, b)) {
3496
 
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
3497
 
                ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
3498
 
 
3499
 
                if (b->buf_fix_count != 0
3500
 
                    || buf_page_get_io_fix(b) != BUF_IO_NONE) {
3501
 
                        fixed_pages_number++;
3502
 
                }
3503
 
        }
3504
 
 
3505
 
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
3506
 
             b = UT_LIST_GET_NEXT(list, b)) {
3507
 
                ut_ad(b->in_flush_list);
3508
 
 
3509
 
                switch (buf_page_get_state(b)) {
3510
 
                case BUF_BLOCK_ZIP_DIRTY:
3511
 
                        if (b->buf_fix_count != 0
3512
 
                            || buf_page_get_io_fix(b) != BUF_IO_NONE) {
3513
 
                                fixed_pages_number++;
3514
 
                        }
3515
 
                        break;
3516
 
                case BUF_BLOCK_FILE_PAGE:
3517
 
                        /* uncompressed page */
3518
 
                        break;
3519
 
                case BUF_BLOCK_ZIP_FREE:
3520
 
                case BUF_BLOCK_ZIP_PAGE:
3521
 
                case BUF_BLOCK_NOT_USED:
3522
 
                case BUF_BLOCK_READY_FOR_USE:
3523
 
                case BUF_BLOCK_MEMORY:
3524
 
                case BUF_BLOCK_REMOVE_HASH:
3525
 
                        ut_error;
3526
 
                        break;
3527
 
                }
3528
 
        }
3529
 
 
3530
 
        mutex_exit(&buf_pool_zip_mutex);
3531
 
        buf_pool_mutex_exit();
 
2360
        mutex_exit(&(buf_pool->mutex));
3532
2361
 
3533
2362
        return(fixed_pages_number);
3534
2363
}
3535
2364
 
3536
2365
/*************************************************************************
3537
2366
Returns the number of pending buf pool ios. */
3538
 
UNIV_INTERN
 
2367
 
3539
2368
ulint
3540
2369
buf_get_n_pending_ios(void)
3541
2370
/*=======================*/
3549
2378
/*************************************************************************
3550
2379
Returns the ratio in percents of modified pages in the buffer pool /
3551
2380
database pages in the buffer pool. */
3552
 
UNIV_INTERN
 
2381
 
3553
2382
ulint
3554
2383
buf_get_modified_ratio_pct(void)
3555
2384
/*============================*/
3556
2385
{
3557
2386
        ulint   ratio;
3558
2387
 
3559
 
        buf_pool_mutex_enter();
 
2388
        mutex_enter(&(buf_pool->mutex));
3560
2389
 
3561
2390
        ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
3562
2391
                / (1 + UT_LIST_GET_LEN(buf_pool->LRU)
3564
2393
 
3565
2394
        /* 1 + is there to avoid division by zero */
3566
2395
 
3567
 
        buf_pool_mutex_exit();
 
2396
        mutex_exit(&(buf_pool->mutex));
3568
2397
 
3569
2398
        return(ratio);
3570
2399
}
3571
2400
 
3572
2401
/*************************************************************************
3573
2402
Prints info of the buffer i/o. */
3574
 
UNIV_INTERN
 
2403
 
3575
2404
void
3576
2405
buf_print_io(
3577
2406
/*=========*/
3584
2413
        ut_ad(buf_pool);
3585
2414
        size = buf_pool->curr_size;
3586
2415
 
3587
 
        buf_pool_mutex_enter();
3588
 
 
 
2416
        mutex_enter(&(buf_pool->mutex));
 
2417
 
 
2418
        if (srv_use_awe) {
 
2419
                fprintf(stderr,
 
2420
                        "AWE: Buffer pool memory frames %lu\n",
 
2421
                        (ulong) buf_pool->n_frames);
 
2422
 
 
2423
                fprintf(stderr,
 
2424
                        "AWE: Database pages and free buffers"
 
2425
                        " mapped in frames %lu\n",
 
2426
                        (ulong)
 
2427
                        UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
 
2428
        }
3589
2429
        fprintf(file,
3590
2430
                "Buffer pool size   %lu\n"
3591
2431
                "Free buffers       %lu\n"
3622
2462
                (buf_pool->n_pages_written - buf_pool->n_pages_written_old)
3623
2463
                / time_elapsed);
3624
2464
 
 
2465
        if (srv_use_awe) {
 
2466
                fprintf(file, "AWE: %.2f page remaps/s\n",
 
2467
                        (buf_pool->n_pages_awe_remapped
 
2468
                         - buf_pool->n_pages_awe_remapped_old)
 
2469
                        / time_elapsed);
 
2470
        }
 
2471
 
3625
2472
        if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
3626
2473
                fprintf(file, "Buffer pool hit rate %lu / 1000\n",
3627
2474
                        (ulong)
3638
2485
        buf_pool->n_pages_read_old = buf_pool->n_pages_read;
3639
2486
        buf_pool->n_pages_created_old = buf_pool->n_pages_created;
3640
2487
        buf_pool->n_pages_written_old = buf_pool->n_pages_written;
3641
 
 
3642
 
        /* Print some values to help us with visualizing what is
3643
 
        happening with LRU eviction. */
3644
 
        fprintf(file,
3645
 
                "LRU len: %lu, unzip_LRU len: %lu\n"
3646
 
                "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
3647
 
                UT_LIST_GET_LEN(buf_pool->LRU),
3648
 
                UT_LIST_GET_LEN(buf_pool->unzip_LRU),
3649
 
                buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
3650
 
                buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
3651
 
 
3652
 
        buf_pool_mutex_exit();
 
2488
        buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
 
2489
 
 
2490
        mutex_exit(&(buf_pool->mutex));
3653
2491
}
3654
2492
 
3655
2493
/**************************************************************************
3656
2494
Refreshes the statistics used to print per-second averages. */
3657
 
UNIV_INTERN
 
2495
 
3658
2496
void
3659
2497
buf_refresh_io_stats(void)
3660
2498
/*======================*/
3664
2502
        buf_pool->n_pages_read_old = buf_pool->n_pages_read;
3665
2503
        buf_pool->n_pages_created_old = buf_pool->n_pages_created;
3666
2504
        buf_pool->n_pages_written_old = buf_pool->n_pages_written;
 
2505
        buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
3667
2506
}
3668
2507
 
3669
2508
/*************************************************************************
3670
2509
Checks that all file pages in the buffer are in a replaceable state. */
3671
 
UNIV_INTERN
 
2510
 
3672
2511
ibool
3673
2512
buf_all_freed(void)
3674
2513
/*===============*/
3675
2514
{
3676
 
        buf_chunk_t*    chunk;
 
2515
        buf_block_t*    block;
3677
2516
        ulint           i;
3678
2517
 
3679
2518
        ut_ad(buf_pool);
3680
2519
 
3681
 
        buf_pool_mutex_enter();
3682
 
 
3683
 
        chunk = buf_pool->chunks;
3684
 
 
3685
 
        for (i = buf_pool->n_chunks; i--; chunk++) {
3686
 
 
3687
 
                const buf_block_t* block = buf_chunk_not_freed(chunk);
3688
 
 
3689
 
                if (UNIV_LIKELY_NULL(block)) {
3690
 
                        fprintf(stderr,
3691
 
                                "Page %lu %lu still fixed or dirty\n",
3692
 
                                (ulong) block->page.space,
3693
 
                                (ulong) block->page.offset);
3694
 
                        ut_error;
 
2520
        mutex_enter(&(buf_pool->mutex));
 
2521
 
 
2522
        for (i = 0; i < buf_pool->curr_size; i++) {
 
2523
 
 
2524
                block = buf_pool_get_nth_block(buf_pool, i);
 
2525
 
 
2526
                mutex_enter(&block->mutex);
 
2527
 
 
2528
                if (block->state == BUF_BLOCK_FILE_PAGE) {
 
2529
 
 
2530
                        if (!buf_flush_ready_for_replace(block)) {
 
2531
 
 
2532
                                fprintf(stderr,
 
2533
                                        "Page %lu %lu still fixed or dirty\n",
 
2534
                                        (ulong) block->space,
 
2535
                                        (ulong) block->offset);
 
2536
                                ut_error;
 
2537
                        }
3695
2538
                }
 
2539
 
 
2540
                mutex_exit(&block->mutex);
3696
2541
        }
3697
2542
 
3698
 
        buf_pool_mutex_exit();
 
2543
        mutex_exit(&(buf_pool->mutex));
3699
2544
 
3700
2545
        return(TRUE);
3701
2546
}
3703
2548
/*************************************************************************
3704
2549
Checks that there currently are no pending i/o-operations for the buffer
3705
2550
pool. */
3706
 
UNIV_INTERN
 
2551
 
3707
2552
ibool
3708
2553
buf_pool_check_no_pending_io(void)
3709
2554
/*==============================*/
3711
2556
{
3712
2557
        ibool   ret;
3713
2558
 
3714
 
        buf_pool_mutex_enter();
 
2559
        mutex_enter(&(buf_pool->mutex));
3715
2560
 
3716
2561
        if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
3717
2562
            + buf_pool->n_flush[BUF_FLUSH_LIST]
3721
2566
                ret = TRUE;
3722
2567
        }
3723
2568
 
3724
 
        buf_pool_mutex_exit();
 
2569
        mutex_exit(&(buf_pool->mutex));
3725
2570
 
3726
2571
        return(ret);
3727
2572
}
3728
2573
 
3729
2574
/*************************************************************************
3730
2575
Gets the current length of the free list of buffer blocks. */
3731
 
UNIV_INTERN
 
2576
 
3732
2577
ulint
3733
2578
buf_get_free_list_len(void)
3734
2579
/*=======================*/
3735
2580
{
3736
2581
        ulint   len;
3737
2582
 
3738
 
        buf_pool_mutex_enter();
 
2583
        mutex_enter(&(buf_pool->mutex));
3739
2584
 
3740
2585
        len = UT_LIST_GET_LEN(buf_pool->free);
3741
2586
 
3742
 
        buf_pool_mutex_exit();
 
2587
        mutex_exit(&(buf_pool->mutex));
3743
2588
 
3744
2589
        return(len);
3745
2590
}