1
/* Innobase relational database engine; Copyright (C) 2001 Innobase Oy
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License 2
5
as published by the Free Software Foundation in June 1991.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License 2
13
along with this program (in file COPYING); if not, write to the Free
14
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
15
/******************************************************
16
The database buffer pool high-level routines
20
Created 11/5/1995 Heikki Tuuri
21
*******************************************************/
28
#include "mtr0types.h"
29
#include "buf0types.h"
31
#include "hash0hash.h"
34
#include "page0types.h"
36
/* Modes for buf_page_get_gen */
37
#define BUF_GET 10 /* get always */
38
#define BUF_GET_IF_IN_POOL 11 /* get if in pool */
39
#define BUF_GET_NOWAIT 12 /* get if can set the latch without
41
#define BUF_GET_NO_LATCH 14 /* get and bufferfix, but set no latch;
42
we have separated this case, because
43
it is error-prone programming not to
44
set a latch, and it should be used
46
/* Modes for buf_page_get_known_nowait */
47
#define BUF_MAKE_YOUNG 51
48
#define BUF_KEEP_OLD 52
49
/* Magic value to use instead of checksums when they are disabled */
50
#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
52
extern buf_pool_t* buf_pool; /* The buffer pool of the database */
54
extern ibool buf_debug_prints;/* If this is set TRUE, the program
55
prints info whenever read or flush
57
#endif /* UNIV_DEBUG */
58
extern ulint srv_buf_pool_write_requests; /* variable to count write request
61
/* States of a control block (@see buf_page_struct).
62
The enumeration values must be 0..7. */
64
BUF_BLOCK_ZIP_FREE = 0, /* contains a free compressed page */
65
BUF_BLOCK_ZIP_PAGE, /* contains a clean compressed page */
66
BUF_BLOCK_ZIP_DIRTY, /* contains a compressed page that is
67
in the buf_pool->flush_list */
69
/* The constants for compressed-only pages must precede
70
BUF_BLOCK_NOT_USED; @see buf_block_state_valid() */
72
BUF_BLOCK_NOT_USED, /* is in the free list */
73
BUF_BLOCK_READY_FOR_USE, /* when buf_LRU_get_free_block returns
74
a block, it is in this state */
75
BUF_BLOCK_FILE_PAGE, /* contains a buffered file page */
76
BUF_BLOCK_MEMORY, /* contains some main memory object */
77
BUF_BLOCK_REMOVE_HASH /* hash index should be removed
78
before putting to the free list */
81
/************************************************************************
82
Creates the buffer pool. */
87
/* out, own: buf_pool object, NULL if not
88
enough memory or error */
89
/************************************************************************
90
Frees the buffer pool at shutdown. This must not be invoked before
91
freeing all mutexes. */
97
/************************************************************************
98
Relocate a buffer control block. Relocates the block on the LRU list
99
and in buf_pool->page_hash. Does not relocate bpage->list.
100
The caller must take care of relocating bpage->list. */
105
buf_page_t* bpage, /* in/out: control block being relocated;
106
buf_page_get_state(bpage) must be
107
BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
108
buf_page_t* dpage) /* in/out: destination control block */
109
__attribute__((nonnull));
110
/************************************************************************
111
Resizes the buffer pool. */
114
buf_pool_resize(void);
115
/*=================*/
116
/*************************************************************************
117
Gets the current size of buffer buf_pool in bytes. */
120
buf_pool_get_curr_size(void);
121
/*========================*/
122
/* out: size in bytes */
123
/************************************************************************
124
Gets the smallest oldest_modification lsn for any page in the pool. Returns
125
zero if all modified pages have been flushed to disk. */
128
buf_pool_get_oldest_modification(void);
129
/*==================================*/
130
/* out: oldest modification in pool,
132
/************************************************************************
133
Allocates a buffer block. */
138
/* out, own: the allocated block,
139
in state BUF_BLOCK_MEMORY */
140
ulint zip_size); /* in: compressed page size in bytes,
141
or 0 if uncompressed tablespace */
142
/************************************************************************
143
Frees a buffer block which does not contain a file page. */
148
buf_block_t* block); /* in, own: block to be freed */
149
/*************************************************************************
150
Copies contents of a buffer frame to a given buffer. */
156
byte* buf, /* in: buffer to copy to */
157
const buf_frame_t* frame); /* in: buffer frame */
158
/******************************************************************
159
NOTE! The following macros should be used instead of buf_page_get_gen,
160
to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
162
#define buf_page_get(SP, ZS, OF, LA, MTR) buf_page_get_gen(\
163
SP, ZS, OF, LA, NULL,\
164
BUF_GET, __FILE__, __LINE__, MTR)
165
/******************************************************************
166
Use these macros to bufferfix a page with no latching. Remember not to
167
read the contents of the page unless you know it is safe. Do not modify
168
the contents of the page! We have separated this case, because it is
169
error-prone programming not to set a latch, and it should be used
171
#define buf_page_get_with_no_latch(SP, ZS, OF, MTR) buf_page_get_gen(\
172
SP, ZS, OF, RW_NO_LATCH, NULL,\
173
BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
174
/******************************************************************
175
NOTE! The following macros should be used instead of buf_page_get_gen, to
176
improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */
177
#define buf_page_get_nowait(SP, ZS, OF, LA, MTR) buf_page_get_gen(\
178
SP, ZS, OF, LA, NULL,\
179
BUF_GET_NOWAIT, __FILE__, __LINE__, MTR)
180
/******************************************************************
181
NOTE! The following macros should be used instead of
182
buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
183
RW_X_LATCH are allowed as LA! */
184
#define buf_page_optimistic_get(LA, BL, MC, MTR) \
185
buf_page_optimistic_get_func(LA, BL, MC, __FILE__, __LINE__, MTR)
186
/************************************************************************
187
This is the general function used to get optimistic access to a database
191
buf_page_optimistic_get_func(
192
/*=========================*/
193
/* out: TRUE if success */
194
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
195
buf_block_t* block, /* in: guessed block */
196
ib_uint64_t modify_clock,/* in: modify clock value if mode is
197
..._GUESS_ON_CLOCK */
198
const char* file, /* in: file name */
199
ulint line, /* in: line where called */
200
mtr_t* mtr); /* in: mini-transaction */
201
/************************************************************************
202
This is used to get access to a known database page, when no waiting can be
206
buf_page_get_known_nowait(
207
/*======================*/
208
/* out: TRUE if success */
209
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
210
buf_block_t* block, /* in: the known page */
211
ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
212
const char* file, /* in: file name */
213
ulint line, /* in: line where called */
214
mtr_t* mtr); /* in: mini-transaction */
216
/***********************************************************************
217
Given a tablespace id and page number tries to get that page. If the
218
page is not in the buffer pool it is not loaded and NULL is returned.
219
Suitable for using when holding the kernel mutex. */
222
buf_page_try_get_func(
223
/*==================*/
224
ulint space_id,/* in: tablespace id */
225
ulint page_no,/* in: page number */
226
const char* file, /* in: file name */
227
ulint line, /* in: line where called */
228
mtr_t* mtr); /* in: mini-transaction */
230
#define buf_page_try_get(space_id, page_no, mtr) \
231
buf_page_try_get_func(space_id, page_no, __FILE__, __LINE__, mtr);
233
/************************************************************************
234
Get read access to a compressed page (usually of type
235
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
236
The page must be released with buf_page_release_zip().
237
NOTE: the page is not protected by any latch. Mutual exclusion has to
238
be implemented at a higher level. In other words, all possible
239
accesses to a given page through this function must be protected by
240
the same set of mutexes or latches. */
245
/* out: pointer to the block,
246
or NULL if not compressed */
247
ulint space, /* in: space id */
248
ulint zip_size,/* in: compressed page size */
249
ulint offset);/* in: page number */
250
/************************************************************************
251
This is the general function used to get access to a database page. */
256
/* out: pointer to the block or NULL */
257
ulint space, /* in: space id */
258
ulint zip_size,/* in: compressed page size in bytes
259
or 0 for uncompressed pages */
260
ulint offset, /* in: page number */
261
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
262
buf_block_t* guess, /* in: guessed block or NULL */
263
ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL,
265
const char* file, /* in: file name */
266
ulint line, /* in: line where called */
267
mtr_t* mtr); /* in: mini-transaction */
268
/************************************************************************
269
Initializes a page to the buffer buf_pool. The page is usually not read
270
from a file even if it cannot be found in the buffer buf_pool. This is one
271
of the functions which perform to a block a state transition NOT_USED =>
272
FILE_PAGE (the other is buf_page_get_gen). */
277
/* out: pointer to the block, page bufferfixed */
278
ulint space, /* in: space id */
279
ulint offset, /* in: offset of the page within space in units of
281
ulint zip_size,/* in: compressed page size, or 0 */
282
mtr_t* mtr); /* in: mini-transaction handle */
283
#ifdef UNIV_HOTBACKUP
284
/************************************************************************
285
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
288
buf_page_init_for_backup_restore(
289
/*=============================*/
290
ulint space, /* in: space id */
291
ulint offset, /* in: offset of the page within space
292
in units of a page */
293
ulint zip_size,/* in: compressed page size in bytes
294
or 0 for uncompressed pages */
295
buf_block_t* block); /* in: block to init */
296
#endif /* UNIV_HOTBACKUP */
297
/************************************************************************
298
Releases a compressed-only page acquired with buf_page_get_zip(). */
301
buf_page_release_zip(
302
/*=================*/
303
buf_page_t* bpage); /* in: buffer block */
304
/************************************************************************
305
Decrements the bufferfix count of a buffer control block and releases
306
a latch, if specified. */
311
buf_block_t* block, /* in: buffer block */
312
ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH,
314
mtr_t* mtr); /* in: mtr */
315
/************************************************************************
316
Moves a page to the start of the buffer pool LRU list. This high-level
317
function can be used to prevent an important page from from slipping out of
323
buf_page_t* bpage); /* in: buffer block of a file page */
324
/************************************************************************
325
Returns TRUE if the page can be found in the buffer pool hash table. NOTE
326
that it is possible that the page is not yet read from disk, though. */
331
/* out: TRUE if found from page hash table,
332
NOTE that the page is not necessarily yet read
334
ulint space, /* in: space id */
335
ulint offset);/* in: page number */
336
/************************************************************************
337
Resets the check_index_page_at_flush field of a page if found in the buffer
341
buf_reset_check_index_page_at_flush(
342
/*================================*/
343
ulint space, /* in: space id */
344
ulint offset);/* in: page number */
345
#ifdef UNIV_DEBUG_FILE_ACCESSES
346
/************************************************************************
347
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
348
This function should be called when we free a file page and want the
349
debug version to check that it is not accessed any more unless
353
buf_page_set_file_page_was_freed(
354
/*=============================*/
355
/* out: control block if found in page hash table,
357
ulint space, /* in: space id */
358
ulint offset);/* in: page number */
359
/************************************************************************
360
Sets file_page_was_freed FALSE if the page is found in the buffer pool.
361
This function should be called when we free a file page and want the
362
debug version to check that it is not accessed any more unless
366
buf_page_reset_file_page_was_freed(
367
/*===============================*/
368
/* out: control block if found in page hash table,
370
ulint space, /* in: space id */
371
ulint offset); /* in: page number */
372
#endif /* UNIV_DEBUG_FILE_ACCESSES */
373
/************************************************************************
374
Reads the freed_page_clock of a buffer block. */
377
buf_page_get_freed_page_clock(
378
/*==========================*/
379
/* out: freed_page_clock */
380
const buf_page_t* bpage) /* in: block */
381
__attribute__((pure));
382
/************************************************************************
383
Reads the freed_page_clock of a buffer block. */
386
buf_block_get_freed_page_clock(
387
/*===========================*/
388
/* out: freed_page_clock */
389
const buf_block_t* block) /* in: block */
390
__attribute__((pure));
392
/************************************************************************
393
Recommends a move of a block to the start of the LRU list if there is danger
394
of dropping from the buffer pool. NOTE: does not reserve the buffer pool
398
buf_page_peek_if_too_old(
399
/*=====================*/
400
/* out: TRUE if should be made
402
const buf_page_t* bpage); /* in: block to make younger */
403
/************************************************************************
404
Returns the current state of is_hashed of a page. FALSE if the page is
405
not in the pool. NOTE that this operation does not fix the page in the
406
pool if it is found there. */
409
buf_page_peek_if_search_hashed(
410
/*===========================*/
411
/* out: TRUE if page hash index is built in search
413
ulint space, /* in: space id */
414
ulint offset);/* in: page number */
415
/************************************************************************
416
Gets the youngest modification log sequence number for a frame.
417
Returns zero if not file page or no modification occurred yet. */
420
buf_page_get_newest_modification(
421
/*=============================*/
422
/* out: newest modification to page */
423
const buf_page_t* bpage); /* in: block containing the
425
/************************************************************************
426
Increments the modify clock of a frame by 1. The caller must (1) own the
427
buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
431
buf_block_modify_clock_inc(
432
/*=======================*/
433
buf_block_t* block); /* in: block */
434
/************************************************************************
435
Returns the value of the modify clock. The caller must have an s-lock
436
or x-lock on the block. */
439
buf_block_get_modify_clock(
440
/*=======================*/
442
buf_block_t* block); /* in: block */
443
/************************************************************************
444
Calculates a page checksum which is stored to the page when it is written
445
to a file. Note that we must be careful to calculate the same value
446
on 32-bit and 64-bit architectures. */
449
buf_calc_page_new_checksum(
450
/*=======================*/
452
const byte* page); /* in: buffer page */
453
/************************************************************************
454
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
455
looked at the first few bytes of the page. This calculates that old
457
NOTE: we must first store the new formula checksum to
458
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
459
because this takes that field as an input! */
462
buf_calc_page_old_checksum(
463
/*=======================*/
465
const byte* page); /* in: buffer page */
466
/************************************************************************
467
Checks if a page is corrupt. */
470
buf_page_is_corrupted(
471
/*==================*/
472
/* out: TRUE if corrupted */
473
const byte* read_buf, /* in: a database page */
474
ulint zip_size); /* in: size of compressed page;
475
0 for uncompressed pages */
476
/**************************************************************************
477
Gets the space id, page offset, and byte offset within page of a
478
pointer pointing to a buffer frame containing a file page. */
481
buf_ptr_get_fsp_addr(
482
/*=================*/
483
const void* ptr, /* in: pointer to a buffer frame */
484
ulint* space, /* out: space id */
485
fil_addr_t* addr); /* out: page offset and byte offset */
486
/**************************************************************************
487
Gets the hash value of a block. This can be used in searches in the
491
buf_block_get_lock_hash_val(
492
/*========================*/
493
/* out: lock hash value */
494
const buf_block_t* block) /* in: block */
495
__attribute__((pure));
497
/*************************************************************************
498
Finds a block in the buffer pool that points to a
499
given compressed page. */
502
buf_pool_contains_zip(
503
/*==================*/
504
/* out: buffer block pointing to
505
the compressed page, or NULL */
506
const void* data); /* in: pointer to compressed page */
507
#endif /* UNIV_DEBUG */
508
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
509
/*************************************************************************
510
Validates the buffer pool data structure. */
515
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
516
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
517
/*************************************************************************
518
Prints info of the buffer pool data structure. */
523
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
524
/************************************************************************
525
Prints a page to stderr. */
530
const byte* read_buf, /* in: a database page */
531
ulint zip_size); /* in: compressed page size, or
532
0 for uncompressed pages */
533
/*************************************************************************
534
Returns the number of latched pages in the buffer pool. */
537
buf_get_latched_pages_number(void);
538
/*==============================*/
539
/*************************************************************************
540
Returns the number of pending buf pool ios. */
543
buf_get_n_pending_ios(void);
544
/*=======================*/
545
/*************************************************************************
546
Prints info of the buffer i/o. */
551
FILE* file); /* in: file where to print */
552
/*************************************************************************
553
Returns the ratio in percents of modified pages in the buffer pool /
554
database pages in the buffer pool. */
557
buf_get_modified_ratio_pct(void);
558
/*============================*/
559
/**************************************************************************
560
Refreshes the statistics used to print per-second averages. */
563
buf_refresh_io_stats(void);
564
/*======================*/
565
/*************************************************************************
566
Checks that all file pages in the buffer are in a replaceable state. */
571
/*************************************************************************
572
Checks that there currently are no pending i/o-operations for the buffer
576
buf_pool_check_no_pending_io(void);
577
/*==============================*/
578
/* out: TRUE if there is no pending i/o */
579
/*************************************************************************
580
Invalidates the file pages in the buffer pool when an archive recovery is
581
completed. All the file pages buffered must be in a replaceable state when
582
this function is called: not latched and not modified. */
585
buf_pool_invalidate(void);
586
/*=====================*/
588
/*========================================================================
589
--------------------------- LOWER LEVEL ROUTINES -------------------------
590
=========================================================================*/
592
#ifdef UNIV_SYNC_DEBUG
593
/*************************************************************************
594
Adds latch level info for the rw-lock protecting the buffer frame. This
595
should be called in the debug version after a successful latching of a
596
page if we know the latching order level of the acquired latch. */
599
buf_block_dbg_add_level(
600
/*====================*/
601
buf_block_t* block, /* in: buffer page
602
where we have acquired latch */
603
ulint level); /* in: latching order level */
604
#endif /* UNIV_SYNC_DEBUG */
605
/*************************************************************************
606
Gets the state of a block. */
612
const buf_page_t* bpage); /* in: pointer to the control block */
613
/*************************************************************************
614
Gets the state of a block. */
620
const buf_block_t* block) /* in: pointer to the control block */
621
__attribute__((pure));
622
/*************************************************************************
623
Sets the state of a block. */
628
buf_page_t* bpage, /* in/out: pointer to control block */
629
enum buf_page_state state); /* in: state */
630
/*************************************************************************
631
Sets the state of a block. */
636
buf_block_t* block, /* in/out: pointer to control block */
637
enum buf_page_state state); /* in: state */
638
/*************************************************************************
639
Determines if a block is mapped to a tablespace. */
644
/* out: TRUE if mapped */
645
const buf_page_t* bpage) /* in: pointer to control block */
646
__attribute__((pure));
647
/*************************************************************************
648
Determines if a block should be on unzip_LRU list. */
651
buf_page_belongs_to_unzip_LRU(
652
/*==========================*/
653
/* out: TRUE if block belongs
655
const buf_page_t* bpage) /* in: pointer to control block */
656
__attribute__((pure));
657
/*************************************************************************
658
Determine the approximate LRU list position of a block. */
661
buf_page_get_LRU_position(
662
/*======================*/
663
/* out: LRU list position */
664
const buf_page_t* bpage) /* in: control block */
665
__attribute__((pure));
667
/*************************************************************************
668
Gets the mutex of a block. */
673
/* out: pointer to mutex
675
const buf_page_t* bpage) /* in: pointer to control block */
676
__attribute__((pure));
678
/*************************************************************************
679
Get the flush type of a page. */
682
buf_page_get_flush_type(
683
/*====================*/
684
/* out: flush type */
685
const buf_page_t* bpage) /* in: buffer page */
686
__attribute__((pure));
687
/*************************************************************************
688
Set the flush type of a page. */
691
buf_page_set_flush_type(
692
/*====================*/
693
buf_page_t* bpage, /* in: buffer page */
694
enum buf_flush flush_type); /* in: flush type */
695
/*************************************************************************
696
Map a block to a file page. */
699
buf_block_set_file_page(
700
/*====================*/
701
buf_block_t* block, /* in/out: pointer to control block */
702
ulint space, /* in: tablespace id */
703
ulint page_no);/* in: page number */
704
/*************************************************************************
705
Gets the io_fix state of a block. */
710
/* out: io_fix state */
711
const buf_page_t* bpage) /* in: pointer to the control block */
712
__attribute__((pure));
713
/*************************************************************************
714
Gets the io_fix state of a block. */
717
buf_block_get_io_fix(
719
/* out: io_fix state */
720
const buf_block_t* block) /* in: pointer to the control block */
721
__attribute__((pure));
722
/*************************************************************************
723
Sets the io_fix state of a block. */
728
buf_page_t* bpage, /* in/out: control block */
729
enum buf_io_fix io_fix);/* in: io_fix state */
730
/*************************************************************************
731
Sets the io_fix state of a block. */
734
buf_block_set_io_fix(
735
/*=================*/
736
buf_block_t* block, /* in/out: control block */
737
enum buf_io_fix io_fix);/* in: io_fix state */
739
/************************************************************************
740
Determine if a buffer block can be relocated in memory. The block
741
can be dirty, but it must not be I/O-fixed or bufferfixed. */
744
buf_page_can_relocate(
745
/*==================*/
746
const buf_page_t* bpage) /* control block being relocated */
747
__attribute__((pure));
749
/*************************************************************************
750
Determine if a block has been flagged old. */
755
/* out: TRUE if old */
756
const buf_page_t* bpage) /* in: control block */
757
__attribute__((pure));
758
/*************************************************************************
764
buf_page_t* bpage, /* in/out: control block */
765
ibool old); /* in: old */
766
/*************************************************************************
767
Determine if a block has been accessed in the buffer pool. */
770
buf_page_is_accessed(
771
/*=================*/
772
/* out: TRUE if accessed */
773
const buf_page_t* bpage) /* in: control block */
774
__attribute__((pure));
775
/*************************************************************************
776
Flag a block accessed. */
779
buf_page_set_accessed(
780
/*==================*/
781
buf_page_t* bpage, /* in/out: control block */
782
ibool accessed); /* in: accessed */
783
/*************************************************************************
784
Gets the buf_block_t handle of a buffered file block if an uncompressed
785
page frame exists, or NULL. */
790
/* out: control block, or NULL */
791
buf_page_t* bpage) /* in: control block, or NULL */
792
__attribute__((pure));
794
/*************************************************************************
795
Gets a pointer to the memory frame of a block. */
800
/* out: pointer to the frame */
801
const buf_block_t* block) /* in: pointer to the control block */
802
__attribute__((pure));
803
#else /* UNIV_DEBUG */
804
# define buf_block_get_frame(block) (block)->frame
805
#endif /* UNIV_DEBUG */
806
/*************************************************************************
807
Gets the space id of a block. */
813
const buf_page_t* bpage) /* in: pointer to the control block */
814
__attribute__((pure));
815
/*************************************************************************
816
Gets the space id of a block. */
822
const buf_block_t* block) /* in: pointer to the control block */
823
__attribute__((pure));
824
/*************************************************************************
825
Gets the page number of a block. */
828
buf_page_get_page_no(
829
/*=================*/
830
/* out: page number */
831
const buf_page_t* bpage) /* in: pointer to the control block */
832
__attribute__((pure));
833
/*************************************************************************
834
Gets the page number of a block. */
837
buf_block_get_page_no(
838
/*==================*/
839
/* out: page number */
840
const buf_block_t* block) /* in: pointer to the control block */
841
__attribute__((pure));
842
/*************************************************************************
843
Gets the compressed page size of a block. */
846
buf_page_get_zip_size(
847
/*==================*/
848
/* out: compressed page size, or 0 */
849
const buf_page_t* bpage) /* in: pointer to the control block */
850
__attribute__((pure));
851
/*************************************************************************
852
Gets the compressed page size of a block. */
855
buf_block_get_zip_size(
856
/*===================*/
857
/* out: compressed page size, or 0 */
858
const buf_block_t* block) /* in: pointer to the control block */
859
__attribute__((pure));
860
/*************************************************************************
861
Gets the compressed page descriptor corresponding to an uncompressed page
863
#define buf_block_get_page_zip(block) \
864
(UNIV_LIKELY_NULL((block)->page.zip.data) ? &(block)->page.zip : NULL)
865
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
866
/***********************************************************************
867
Gets the block to whose frame the pointer is pointing to. */
872
/* out: pointer to block */
873
const byte* ptr); /* in: pointer to a frame */
874
/*************************************************************************
875
Gets the compressed page descriptor corresponding to an uncompressed page
878
const page_zip_des_t*
879
buf_frame_get_page_zip(
880
/*===================*/
881
/* out: compressed page descriptor, or NULL */
882
const byte* ptr); /* in: pointer to the page */
883
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
884
/************************************************************************
885
This function is used to get info if there is an io operation
886
going on on a buffer page. */
891
/* out: TRUE if io going on */
892
buf_page_t* bpage); /* in: pool block, must be bufferfixed */
893
/************************************************************************
894
Function which inits a page for read to the buffer buf_pool. If the page is
895
(1) already in buf_pool, or
896
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
897
(3) if the space is deleted or being deleted,
898
then this function does nothing.
899
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
900
on the buffer frame. The io-handler must take care that the flag is cleared
901
and the lock released later. */
904
buf_page_init_for_read(
905
/*===================*/
906
/* out: pointer to the block or NULL */
907
ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
908
ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */
909
ulint space, /* in: space id */
910
ulint zip_size,/* in: compressed page size, or 0 */
911
ibool unzip, /* in: TRUE=request uncompressed page */
912
ib_int64_t tablespace_version,/* in: prevents reading from a wrong
913
version of the tablespace in case we have done
915
ulint offset);/* in: page number */
916
/************************************************************************
917
Completes an asynchronous read or write request of a file page to or from
921
buf_page_io_complete(
922
/*=================*/
923
buf_page_t* bpage); /* in: pointer to the block in question */
924
/************************************************************************
925
Calculates a folded value of a file page address to use in the page hash
929
buf_page_address_fold(
930
/*==================*/
931
/* out: the folded value */
932
ulint space, /* in: space id */
933
ulint offset) /* in: offset of the page within space */
934
__attribute__((const));
935
/**********************************************************************
936
Returns the control block of a file page, NULL if not found. */
941
/* out: block, NULL if not found */
942
ulint space, /* in: space id */
943
ulint offset);/* in: offset of the page within space */
944
/**********************************************************************
945
Returns the control block of a file page, NULL if not found
946
or an uncompressed page frame does not exist. */
951
/* out: block, NULL if not found */
952
ulint space, /* in: space id */
953
ulint offset);/* in: offset of the page within space */
954
/***********************************************************************
955
Increments the pool clock by one and returns its new value. Remember that
956
in the 32 bit version the clock wraps around at 4 billion! */
959
buf_pool_clock_tic(void);
960
/*====================*/
961
/* out: new clock value */
962
/*************************************************************************
963
Gets the current length of the free list of buffer blocks. */
966
buf_get_free_list_len(void);
967
/*=======================*/
971
/* The common buffer control block structure
972
for compressed and uncompressed frames */
974
struct buf_page_struct{
975
/* None of the following bit-fields must be modified without
976
holding buf_page_get_mutex() [block->mutex or buf_pool_zip_mutex],
977
since they can be stored in the same machine word. Some of them are
978
additionally protected by buf_pool_mutex. */
980
unsigned space:32; /* tablespace id */
981
unsigned offset:32; /* page number */
983
unsigned state:3; /* state of the control block
984
(@see enum buf_page_state); also
985
protected by buf_pool_mutex.
986
State transitions from
987
BUF_BLOCK_READY_FOR_USE to
988
BUF_BLOCK_MEMORY need not be
989
protected by buf_page_get_mutex(). */
990
unsigned flush_type:2; /* if this block is currently being
991
flushed to disk, this tells the
992
flush_type (@see enum buf_flush) */
993
unsigned accessed:1; /* TRUE if the page has been accessed
994
while in the buffer pool: read-ahead
995
may read in pages which have not been
996
accessed yet; a thread is allowed to
997
read this for heuristic purposes
998
without holding any mutex or latch */
999
unsigned io_fix:2; /* type of pending I/O operation
1000
(@see enum buf_io_fix); also
1001
protected by buf_pool_mutex */
1002
unsigned buf_fix_count:24;/* count of how manyfold this block
1003
is currently bufferfixed */
1005
page_zip_des_t zip; /* compressed page; zip.data
1006
(but not the data it points to) is
1007
also protected by buf_pool_mutex */
1008
buf_page_t* hash; /* node used in chaining to
1009
buf_pool->page_hash or
1010
buf_pool->zip_hash */
1012
ibool in_page_hash; /* TRUE if in buf_pool->page_hash */
1013
ibool in_zip_hash; /* TRUE if in buf_pool->zip_hash */
1014
#endif /* UNIV_DEBUG */
1016
/* 2. Page flushing fields; protected by buf_pool_mutex */
1018
UT_LIST_NODE_T(buf_page_t) list;
1019
/* based on state, this is a list
1020
node in one of the following lists
1023
BUF_BLOCK_NOT_USED: free
1024
BUF_BLOCK_FILE_PAGE: flush_list
1025
BUF_BLOCK_ZIP_DIRTY: flush_list
1026
BUF_BLOCK_ZIP_PAGE: zip_clean
1027
BUF_BLOCK_ZIP_FREE: zip_free[] */
1029
ibool in_flush_list; /* TRUE if in buf_pool->flush_list;
1030
when buf_pool_mutex is free, the
1031
following should hold: in_flush_list
1032
== (state == BUF_BLOCK_FILE_PAGE
1033
|| state == BUF_BLOCK_ZIP_DIRTY) */
1034
ibool in_free_list; /* TRUE if in buf_pool->free; when
1035
buf_pool_mutex is free, the following
1036
should hold: in_free_list
1037
== (state == BUF_BLOCK_NOT_USED) */
1038
#endif /* UNIV_DEBUG */
1039
ib_uint64_t newest_modification;
1040
/* log sequence number of the youngest
1041
modification to this block, zero if
1043
ib_uint64_t oldest_modification;
1044
/* log sequence number of the START of
1045
the log entry written of the oldest
1046
modification to this block which has
1047
not yet been flushed on disk; zero if
1048
all modifications are on disk */
1050
/* 3. LRU replacement algorithm fields; protected by buf_pool_mutex */
1052
UT_LIST_NODE_T(buf_page_t) LRU;
1053
/* node of the LRU list */
1055
ibool in_LRU_list; /* TRUE if the page is in the LRU list;
1056
used in debugging */
1057
#endif /* UNIV_DEBUG */
1058
unsigned old:1; /* TRUE if the block is in the old
1059
blocks in the LRU list */
1060
unsigned LRU_position:31;/* value which monotonically decreases
1061
(or may stay constant if old==TRUE)
1062
toward the end of the LRU list, if
1063
buf_pool->ulint_clock has not wrapped
1064
around: NOTE that this value can only
1065
be used in heuristic algorithms,
1066
because of the possibility of a
1068
unsigned freed_page_clock:32;/* the value of
1069
buf_pool->freed_page_clock when this
1070
block was the last time put to the
1071
head of the LRU list; a thread is
1072
allowed to read this for heuristic
1073
purposes without holding any mutex or
1075
#ifdef UNIV_DEBUG_FILE_ACCESSES
1076
ibool file_page_was_freed;
1077
/* this is set to TRUE when fsp
1078
frees a page in buffer pool */
1079
#endif /* UNIV_DEBUG_FILE_ACCESSES */
1082
/* The buffer control block structure */
1084
struct buf_block_struct{
1086
/* 1. General fields */
1088
buf_page_t page; /* page information; this must
1089
be the first field, so that
1090
buf_pool->page_hash can point
1091
to buf_page_t or buf_block_t */
1092
UT_LIST_NODE_T(buf_block_t) unzip_LRU;
1093
/* node of the decompressed LRU list;
1094
a block is in the unzip_LRU list
1095
if page.state == BUF_BLOCK_FILE_PAGE
1096
and page.zip.data != NULL */
1098
ibool in_unzip_LRU_list;/* TRUE if the page is in the
1099
decompressed LRU list;
1100
used in debugging */
1101
#endif /* UNIV_DEBUG */
1102
byte* frame; /* pointer to buffer frame which
1103
is of size UNIV_PAGE_SIZE, and
1104
aligned to an address divisible by
1106
mutex_t mutex; /* mutex protecting this block:
1107
state (also protected by the buffer
1108
pool mutex), io_fix, buf_fix_count,
1109
and accessed; we introduce this new
1110
mutex in InnoDB-5.1 to relieve
1111
contention on the buffer pool mutex */
1112
rw_lock_t lock; /* read-write lock of the buffer
1114
unsigned lock_hash_val:32;/* hashed value of the page address
1115
in the record lock hash table */
1116
unsigned check_index_page_at_flush:1;
1117
/* TRUE if we know that this is
1118
an index page, and want the database
1119
to check its consistency before flush;
1120
note that there may be pages in the
1121
buffer pool which are index pages,
1122
but this flag is not set because
1123
we do not keep track of all pages */
1125
/* 2. Optimistic search field */
1127
ib_uint64_t modify_clock; /* this clock is incremented every
1128
time a pointer to a record on the
1129
page may become obsolete; this is
1130
used in the optimistic cursor
1131
positioning: if the modify clock has
1132
not changed, we know that the pointer
1133
is still valid; this field may be
1134
changed if the thread (1) owns the
1135
pool mutex and the page is not
1136
bufferfixed, or (2) the thread has an
1137
x-latch on the block */
1139
/* 3. Hash search fields: NOTE that the first 4 fields are NOT
1140
protected by any semaphore! */
1142
ulint n_hash_helps; /* counter which controls building
1143
of a new hash index for the page */
1144
ulint n_fields; /* recommended prefix length for hash
1145
search: number of full fields */
1146
ulint n_bytes; /* recommended prefix: number of bytes
1147
in an incomplete field */
1148
ibool left_side; /* TRUE or FALSE, depending on
1149
whether the leftmost record of several
1150
records with the same prefix should be
1151
indexed in the hash index */
1153
/* These 6 fields may only be modified when we have
1154
an x-latch on btr_search_latch AND
1155
a) we are holding an s-latch or x-latch on block->lock or
1156
b) we know that block->buf_fix_count == 0.
1158
An exception to this is when we init or create a page
1159
in the buffer pool in buf0buf.c. */
1162
ulint n_pointers; /* used in debugging: the number of
1163
pointers in the adaptive hash index
1164
pointing to this frame */
1165
#endif /* UNIV_DEBUG */
1166
unsigned is_hashed:1; /* TRUE if hash index has already been
1167
built on this page; note that it does
1168
not guarantee that the index is
1169
complete, though: there may have been
1170
hash collisions, record deletions,
1172
unsigned curr_n_fields:10;/* prefix length for hash indexing:
1173
number of full fields */
1174
unsigned curr_n_bytes:15;/* number of bytes in hash indexing */
1175
unsigned curr_left_side:1;/* TRUE or FALSE in hash indexing */
1176
dict_index_t* index; /* Index for which the adaptive
1177
hash index has been created. */
1178
/* 4. Debug fields */
1179
#ifdef UNIV_SYNC_DEBUG
1180
rw_lock_t debug_latch; /* in the debug version, each thread
1181
which bufferfixes the block acquires
1182
an s-latch here; so we can use the
1183
debug utilities in sync0rw */
1187
/* Check if a buf_block_t object is in a valid state. */
1188
#define buf_block_state_valid(block) \
1189
(buf_block_get_state(block) >= BUF_BLOCK_NOT_USED \
1190
&& (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH))
1192
/**************************************************************************
1193
Compute the hash fold value for blocks in buf_pool->zip_hash. */
1194
#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
1195
#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
1196
#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
1198
/* The buffer pool structure. NOTE! The definition appears here only for
1199
other modules of this directory (buf) to see it. Do not use from outside! */
1201
struct buf_pool_struct{
1203
/* 1. General fields */
1205
ulint n_chunks; /* number of buffer pool chunks */
1206
buf_chunk_t* chunks; /* buffer pool chunks */
1207
ulint curr_size; /* current pool size in pages */
1208
hash_table_t* page_hash; /* hash table of buf_page_t or
1209
buf_block_t file pages,
1210
buf_page_in_file() == TRUE,
1211
indexed by (space_id, offset) */
1212
hash_table_t* zip_hash; /* hash table of buf_block_t blocks
1213
whose frames are allocated to the
1215
indexed by block->frame */
1216
ulint n_pend_reads; /* number of pending read operations */
1217
ulint n_pend_unzip; /* number of pending decompressions */
1219
time_t last_printout_time; /* when buf_print was last time
1221
ulint n_pages_read; /* number read operations */
1222
ulint n_pages_written;/* number write operations */
1223
ulint n_pages_created;/* number of pages created in the pool
1225
ulint n_page_gets; /* number of page gets performed;
1226
also successful searches through
1227
the adaptive hash index are
1228
counted as page gets; this field
1229
is NOT protected by the buffer
1231
ulint n_page_gets_old;/* n_page_gets when buf_print was
1232
last time called: used to calculate
1234
ulint n_pages_read_old;/* n_pages_read when buf_print was
1236
ulint n_pages_written_old;/* number write operations */
1237
ulint n_pages_created_old;/* number of pages created in
1238
the pool with no read */
1239
/* 2. Page flushing algorithm fields */
1241
UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
1242
/* base node of the modified block
1244
ibool init_flush[BUF_FLUSH_N_TYPES];
1245
/* this is TRUE when a flush of the
1246
given type is being initialized */
1247
ulint n_flush[BUF_FLUSH_N_TYPES];
1248
/* this is the number of pending
1249
writes in the given flush type */
1250
os_event_t no_flush[BUF_FLUSH_N_TYPES];
1251
/* this is in the set state when there
1252
is no flush batch of the given type
1254
ulint ulint_clock; /* a sequence number used to count
1255
time. NOTE! This counter wraps
1256
around at 4 billion (if ulint ==
1258
ulint freed_page_clock;/* a sequence number used to count the
1259
number of buffer blocks removed from
1260
the end of the LRU list; NOTE that
1261
this counter may wrap around at 4
1262
billion! A thread is allowed to
1263
read this for heuristic purposes
1264
without holding any mutex or latch */
1265
ulint LRU_flush_ended;/* when an LRU flush ends for a page,
1266
this is incremented by one; this is
1267
set to zero when a buffer block is
1270
/* 3. LRU replacement algorithm fields */
1272
UT_LIST_BASE_NODE_T(buf_page_t) free;
1273
/* base node of the free block list */
1274
UT_LIST_BASE_NODE_T(buf_page_t) LRU;
1275
/* base node of the LRU list */
1276
buf_page_t* LRU_old; /* pointer to the about 3/8 oldest
1277
blocks in the LRU list; NULL if LRU
1278
length less than BUF_LRU_OLD_MIN_LEN */
1279
ulint LRU_old_len; /* length of the LRU list from
1280
the block to which LRU_old points
1281
onward, including that block;
1282
see buf0lru.c for the restrictions
1283
on this value; not defined if
1286
UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
1287
/* base node of the unzip_LRU list */
1289
/* 4. Fields for the buddy allocator of compressed pages */
1290
UT_LIST_BASE_NODE_T(buf_page_t) zip_clean;
1291
/* unmodified compressed pages */
1292
UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES];
1293
/* buddy free lists */
1294
#if BUF_BUDDY_HIGH != UNIV_PAGE_SIZE
1295
# error "BUF_BUDDY_HIGH != UNIV_PAGE_SIZE"
1297
#if BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE
1298
# error "BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE"
1302
/* mutex protecting the buffer pool struct and control blocks, except the
1303
read-write lock in them */
1304
extern mutex_t buf_pool_mutex;
1305
/* mutex protecting the control blocks of compressed-only pages
1306
(of type buf_page_t, not buf_block_t) */
1307
extern mutex_t buf_pool_zip_mutex;
1309
/* Accessors for buf_pool_mutex. Use these instead of accessing
1310
buf_pool_mutex directly. */
1312
/* Test if buf_pool_mutex is owned. */
1313
#define buf_pool_mutex_own() mutex_own(&buf_pool_mutex)
1314
/* Acquire the buffer pool mutex. */
1315
#define buf_pool_mutex_enter() do { \
1316
ut_ad(!mutex_own(&buf_pool_zip_mutex)); \
1317
mutex_enter(&buf_pool_mutex); \
1320
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1321
/** Flag to forbid the release of the buffer pool mutex.
1322
Protected by buf_pool_mutex. */
1323
extern ulint buf_pool_mutex_exit_forbidden;
1324
/* Forbid the release of the buffer pool mutex. */
1325
# define buf_pool_mutex_exit_forbid() do { \
1326
ut_ad(buf_pool_mutex_own()); \
1327
buf_pool_mutex_exit_forbidden++; \
1329
/* Allow the release of the buffer pool mutex. */
1330
# define buf_pool_mutex_exit_allow() do { \
1331
ut_ad(buf_pool_mutex_own()); \
1332
ut_a(buf_pool_mutex_exit_forbidden); \
1333
buf_pool_mutex_exit_forbidden--; \
1335
/* Release the buffer pool mutex. */
1336
# define buf_pool_mutex_exit() do { \
1337
ut_a(!buf_pool_mutex_exit_forbidden); \
1338
mutex_exit(&buf_pool_mutex); \
1341
/* Forbid the release of the buffer pool mutex. */
1342
# define buf_pool_mutex_exit_forbid() ((void) 0)
1343
/* Allow the release of the buffer pool mutex. */
1344
# define buf_pool_mutex_exit_allow() ((void) 0)
1345
/* Release the buffer pool mutex. */
1346
# define buf_pool_mutex_exit() mutex_exit(&buf_pool_mutex)
1349
/************************************************************************
1350
Let us list the consistency conditions for different control block states.
1352
NOT_USED: is in free list, not in LRU list, not in flush list, nor
1354
READY_FOR_USE: is not in free list, LRU list, or flush list, nor page
1356
MEMORY: is not in free list, LRU list, or flush list, nor page
1358
FILE_PAGE: space and offset are defined, is in page hash table
1359
if io_fix == BUF_IO_WRITE,
1360
pool: no_flush[flush_type] is in reset state,
1361
pool: n_flush[flush_type] > 0
1363
(1) if buf_fix_count == 0, then
1364
is in LRU list, not in free list
1366
if and only if oldest_modification > 0
1368
if and only if io_fix == BUF_IO_READ
1370
if and only if io_fix == BUF_IO_WRITE
1372
(2) if buf_fix_count > 0, then
1373
is not in LRU list, not in free list
1375
if and only if oldest_modification > 0
1376
if io_fix == BUF_IO_READ,
1378
if io_fix == BUF_IO_WRITE,
1383
NOT_USED => READY_FOR_USE
1384
READY_FOR_USE => MEMORY
1385
READY_FOR_USE => FILE_PAGE
1387
FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if
1388
(1) buf_fix_count == 0,
1389
(2) oldest_modification == 0, and
1394
#include "buf0buf.ic"