84
86
The buffer buf_pool contains a single mutex which protects all the
85
87
control data structures of the buf_pool. The content of a buffer frame is
86
88
protected by a separate read-write lock in its control block, though.
87
These locks can be locked and unlocked without owning the buf_pool mutex.
89
These locks can be locked and unlocked without owning the buf_pool->mutex.
88
90
The OS events in the buf_pool struct can be waited for without owning the
91
The buf_pool mutex is a hot-spot in main memory, causing a lot of
93
The buf_pool->mutex is a hot-spot in main memory, causing a lot of
92
94
memory bus traffic on multiprocessor systems when processors
93
95
alternately access the mutex. On our Pentium, the mutex is accessed
94
96
maybe every 10 microseconds. We gave up the solution to have mutexes
95
97
for each control block, for instance, because it seemed to be
98
A solution to reduce mutex contention of the buf_pool mutex is to
100
A solution to reduce mutex contention of the buf_pool->mutex is to
99
101
create a separate mutex for the page hash table. On Pentium,
100
102
accessing the hash table takes 2 microseconds, about half
101
of the total buf_pool mutex hold time.
103
of the total buf_pool->mutex hold time.
153
155
which we can use when we want to artificially age a page in the
154
156
buf_pool. This is used if we know that some page is not needed
155
157
again for some time: we insert the block right after the pointer,
156
causing it to be replaced sooner than would noramlly be the case.
158
causing it to be replaced sooner than would normally be the case.
157
159
Currently this aging mechanism is used for read-ahead mechanism
158
160
of pages, and it can also be used when there is a scan of a full
159
161
table which cannot fit in the memory. Putting the pages near the
160
of the LRU list, we make sure that most of the buf_pool stays in the
161
main memory, undisturbed.
162
end of the LRU list, we make sure that most of the buf_pool stays
163
in the main memory, undisturbed.
163
165
The unzip_LRU list contains a subset of the common LRU list. The
164
166
blocks on the unzip_LRU list hold a compressed file page and the
242
245
#ifndef UNIV_HOTBACKUP
243
246
/** Value in microseconds */
244
247
static const int WAIT_FOR_READ = 5000;
246
/** The buffer buf_pool of the database */
247
UNIV_INTERN buf_pool_t* buf_pool = NULL;
249
/** mutex protecting the buffer pool struct and control blocks, except the
250
read-write lock in them */
251
UNIV_INTERN mutex_t buf_pool_mutex;
252
/** mutex protecting the control blocks of compressed-only pages
253
(of type buf_page_t, not buf_block_t) */
254
UNIV_INTERN mutex_t buf_pool_zip_mutex;
248
/** Number of attemtps made to read in a page in the buffer pool */
249
static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
251
/** The buffer pools of the database */
252
UNIV_INTERN buf_pool_t* buf_pool_ptr;
256
254
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
257
255
static ulint buf_dbg_counter = 0; /*!< This is used to insert validation
258
operations in excution in the
256
operations in execution in the
260
/** Flag to forbid the release of the buffer pool mutex.
261
Protected by buf_pool_mutex. */
262
UNIV_INTERN ulint buf_pool_mutex_exit_forbidden = 0;
263
258
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
264
259
#ifdef UNIV_DEBUG
265
260
/** If this is set TRUE, the program prints info whenever
267
262
UNIV_INTERN ibool buf_debug_prints = FALSE;
268
263
#endif /* UNIV_DEBUG */
265
#ifdef UNIV_PFS_RWLOCK
266
/* Keys to register buffer block related rwlocks and mutexes with
267
performance schema */
268
UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
269
# ifdef UNIV_SYNC_DEBUG
270
UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
271
# endif /* UNIV_SYNC_DEBUG */
272
#endif /* UNIV_PFS_RWLOCK */
274
#ifdef UNIV_PFS_MUTEX
275
UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
276
UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
277
UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
278
UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
279
#endif /* UNIV_PFS_MUTEX */
281
#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
282
# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
284
/* Buffer block mutexes and rwlocks can be registered
285
in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
286
is defined, register buffer block mutex and rwlock
287
in one group after their initialization. */
288
# define PFS_GROUP_BUFFER_SYNC
290
/* This define caps the number of mutexes/rwlocks can
291
be registered with performance schema. Developers can
292
modify this define if necessary. Please note, this would
293
be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
294
# define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER ULINT_MAX
296
# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
297
#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
270
299
/** A chunk of buffers. The buffer pool is allocated in chunks. */
271
300
struct buf_chunk_struct{
272
301
ulint mem_size; /*!< allocated size of the chunk */
278
307
#endif /* !UNIV_HOTBACKUP */
280
309
/********************************************************************//**
310
Gets the smallest oldest_modification lsn for any page in the pool. Returns
311
zero if all modified pages have been flushed to disk.
312
@return oldest modification in pool, zero if none */
315
buf_pool_get_oldest_modification(void)
316
/*==================================*/
321
ib_uint64_t oldest_lsn = 0;
323
/* When we traverse all the flush lists we don't want another
324
thread to add a dirty page to any flush list. */
325
log_flush_order_mutex_enter();
327
for (i = 0; i < srv_buf_pool_instances; i++) {
328
buf_pool_t* buf_pool;
330
buf_pool = buf_pool_from_array(i);
332
buf_flush_list_mutex_enter(buf_pool);
334
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
337
ut_ad(bpage->in_flush_list);
338
lsn = bpage->oldest_modification;
341
buf_flush_list_mutex_exit(buf_pool);
343
if (!oldest_lsn || oldest_lsn > lsn) {
348
log_flush_order_mutex_exit();
350
/* The returned answer may be out of date: the flush_list can
351
change after the mutex has been released. */
356
/********************************************************************//**
357
Get total buffer pool statistics. */
360
buf_get_total_list_len(
361
/*===================*/
362
ulint* LRU_len, /*!< out: length of all LRU lists */
363
ulint* free_len, /*!< out: length of all free lists */
364
ulint* flush_list_len) /*!< out: length of all flush lists */
372
for (i = 0; i < srv_buf_pool_instances; i++) {
373
buf_pool_t* buf_pool;
375
buf_pool = buf_pool_from_array(i);
376
*LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
377
*free_len += UT_LIST_GET_LEN(buf_pool->free);
378
*flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
382
/********************************************************************//**
383
Get total buffer pool statistics. */
388
buf_pool_stat_t* tot_stat) /*!< out: buffer pool stats */
392
memset(tot_stat, 0, sizeof(*tot_stat));
394
for (i = 0; i < srv_buf_pool_instances; i++) {
395
buf_pool_stat_t*buf_stat;
396
buf_pool_t* buf_pool;
398
buf_pool = buf_pool_from_array(i);
400
buf_stat = &buf_pool->stat;
401
tot_stat->n_page_gets += buf_stat->n_page_gets;
402
tot_stat->n_pages_read += buf_stat->n_pages_read;
403
tot_stat->n_pages_written += buf_stat->n_pages_written;
404
tot_stat->n_pages_created += buf_stat->n_pages_created;
405
tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
406
tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
407
tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
409
tot_stat->n_pages_not_made_young +=
410
buf_stat->n_pages_not_made_young;
414
/********************************************************************//**
415
Allocates a buffer block.
416
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
421
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
422
ulint zip_size) /*!< in: compressed page size in bytes,
423
or 0 if uncompressed tablespace */
427
static ulint buf_pool_index;
429
if (buf_pool == NULL) {
430
/* We are allocating memory from any buffer pool, ensure
431
we spread the grace on all buffer pool instances. */
432
index = buf_pool_index++ % srv_buf_pool_instances;
433
buf_pool = buf_pool_from_array(index);
436
block = buf_LRU_get_free_block(buf_pool, zip_size);
438
buf_block_set_state(block, BUF_BLOCK_MEMORY);
443
/********************************************************************//**
281
444
Calculates a page checksum which is stored to the page when it is written
282
445
to a file. Note that we must be careful to calculate the same value on
283
446
32-bit and 64-bit architectures.
361
524
ib_uint64_t current_lsn;
363
526
if (log_peek_lsn(¤t_lsn)
364
&& current_lsn < mach_read_ull(read_buf + FIL_PAGE_LSN)) {
529
< mach_read_from_8(read_buf + FIL_PAGE_LSN))) {
365
530
ut_print_timestamp(stderr);
368
" InnoDB: Error: page %lu log sequence number"
370
"InnoDB: is in the future! Current system "
371
"log sequence number %"PRIu64".\n"
372
"InnoDB: Your database may be corrupt or "
373
"you may have copied the InnoDB\n"
374
"InnoDB: tablespace but not the InnoDB "
376
"InnoDB: " REFMAN "forcing-recovery.html\n"
377
"InnoDB: for more information.\n",
378
(ulong) mach_read_from_4(read_buf
380
mach_read_ull(read_buf + FIL_PAGE_LSN),
532
drizzled::errmsg_printf(drizzled::error::INFO,
533
"InnoDB: Error: page %lu log sequence number %"PRIu64". "
534
"InnoDB: is in the future! Current system log sequence number %"PRIu64". "
535
"Your database may be corrupt or you may have copied the InnoDB tablespace but not the InnoDB log files. See "
536
" " REFMAN "forcing-recovery.html for more information. ",
537
(ulong) mach_read_from_4(read_buf
539
mach_read_from_8(read_buf + FIL_PAGE_LSN),
638
795
#ifndef UNIV_HOTBACKUP
797
# ifdef PFS_GROUP_BUFFER_SYNC
798
/********************************************************************//**
799
This function registers mutexes and rwlocks in buffer blocks with
800
performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
801
defined to be a value less than chunk->size, then only mutexes
802
and rwlocks in the first PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER
803
blocks are registered. */
806
pfs_register_buffer_block(
807
/*======================*/
808
buf_chunk_t* chunk) /*!< in/out: chunk of buffers */
811
ulint num_to_register;
814
block = chunk->blocks;
816
num_to_register = ut_min(chunk->size,
817
PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
819
for (i = 0; i < num_to_register; i++) {
823
# ifdef UNIV_PFS_MUTEX
824
mutex = &block->mutex;
825
ut_a(!mutex->pfs_psi);
826
mutex->pfs_psi = (PSI_server)
827
? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
829
# endif /* UNIV_PFS_MUTEX */
831
# ifdef UNIV_PFS_RWLOCK
832
rwlock = &block->lock;
833
ut_a(!rwlock->pfs_psi);
834
rwlock->pfs_psi = (PSI_server)
835
? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
837
# endif /* UNIV_PFS_RWLOCK */
841
# endif /* PFS_GROUP_BUFFER_SYNC */
639
843
/********************************************************************//**
640
844
Initializes a buffer control block when the buf_pool is created. */
645
buf_block_t* block, /*!< in: pointer to control block */
646
byte* frame) /*!< in: pointer to buffer frame */
849
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
850
buf_block_t* block, /*!< in: pointer to control block */
851
byte* frame) /*!< in: pointer to buffer frame */
648
853
UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
650
855
block->frame = frame;
857
block->page.buf_pool_index = buf_pool_index(buf_pool);
652
858
block->page.state = BUF_BLOCK_NOT_USED;
653
859
block->page.buf_fix_count = 0;
654
860
block->page.io_fix = BUF_IO_NONE;
675
883
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
676
884
page_zip_des_init(&block->page.zip);
678
mutex_create(&block->mutex, SYNC_BUF_BLOCK);
680
rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
886
#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
887
/* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
888
of buffer block mutex/rwlock with performance schema. If
889
PFS_GROUP_BUFFER_SYNC is defined, skip the registration
890
since buffer block mutex/rwlock will be registered later in
891
pfs_register_buffer_block() */
893
mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
894
rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
895
#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
896
mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
897
rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
898
#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
681
900
ut_ad(rw_lock_validate(&(block->lock)));
683
902
#ifdef UNIV_SYNC_DEBUG
684
rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
903
rw_lock_create(buf_block_debug_latch_key,
904
&block->debug_latch, SYNC_NO_ORDER_CHECK);
685
905
#endif /* UNIV_SYNC_DEBUG */
940
1162
/********************************************************************//**
941
Creates the buffer pool.
942
@return own: buf_pool object, NULL if not enough memory or error */
1163
Set buffer pool size variables after resizing it */
1166
buf_pool_set_sizes(void)
1167
/*====================*/
1170
ulint curr_size = 0;
1172
buf_pool_mutex_enter_all();
1174
for (i = 0; i < srv_buf_pool_instances; i++) {
1175
buf_pool_t* buf_pool;
1177
buf_pool = buf_pool_from_array(i);
1178
curr_size += buf_pool->curr_pool_size;
1181
srv_buf_pool_curr_size = curr_size;
1182
srv_buf_pool_old_size = srv_buf_pool_size;
1184
buf_pool_mutex_exit_all();
1187
/********************************************************************//**
1188
Initialize a buffer pool instance.
1189
@return DB_SUCCESS if all goes well. */
1192
buf_pool_init_instance(
1193
/*===================*/
1194
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1195
ulint buf_pool_size, /*!< in: size in bytes */
1196
ulint instance_no) /*!< in: id of the instance */
948
1199
buf_chunk_t* chunk;
951
buf_pool = mem_zalloc(sizeof(buf_pool_t));
953
1201
/* 1. Initialize general fields
954
1202
------------------------------- */
955
mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
956
mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
958
buf_pool_mutex_enter();
960
buf_pool->n_chunks = 1;
961
buf_pool->chunks = chunk = mem_alloc(sizeof *chunk);
963
UT_LIST_INIT(buf_pool->free);
965
if (!buf_chunk_init(chunk, srv_buf_pool_size)) {
1203
mutex_create(buf_pool_mutex_key,
1204
&buf_pool->mutex, SYNC_BUF_POOL);
1205
mutex_create(buf_pool_zip_mutex_key,
1206
&buf_pool->zip_mutex, SYNC_BUF_BLOCK);
1208
buf_pool_mutex_enter(buf_pool);
1210
if (buf_pool_size > 0) {
1211
buf_pool->n_chunks = 1;
1212
void *chunk_ptr= mem_zalloc((sizeof *chunk));
1213
buf_pool->chunks = chunk = static_cast<buf_chunk_t *>(chunk_ptr);
1215
UT_LIST_INIT(buf_pool->free);
1217
if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
1221
buf_pool_mutex_exit(buf_pool);
1226
buf_pool->instance_no = instance_no;
1227
buf_pool->old_pool_size = buf_pool_size;
1228
buf_pool->curr_size = chunk->size;
1229
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1231
buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
1232
buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
1234
buf_pool->last_printout_time = ut_time();
972
srv_buf_pool_old_size = srv_buf_pool_size;
973
buf_pool->curr_size = chunk->size;
974
srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
976
buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
977
buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
979
buf_pool->last_printout_time = time(NULL);
981
1236
/* 2. Initialize flushing fields
982
1237
-------------------------------- */
1239
mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
1240
SYNC_BUF_FLUSH_LIST);
984
1242
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
985
1243
buf_pool->no_flush[i] = os_event_create(NULL);
988
1246
/* 3. Initialize LRU fields
989
1247
--------------------------- */
990
/* All fields are initialized by mem_zalloc(). */
992
buf_pool_mutex_exit();
994
btr_search_sys_create(buf_pool->curr_size
995
* UNIV_PAGE_SIZE / sizeof(void*) / 64);
997
/* 4. Initialize the buddy allocator fields */
998
/* All fields are initialized by mem_zalloc(). */
1249
/* All fields are initialized by mem_zalloc(). */
1251
buf_pool_mutex_exit(buf_pool);
1003
1256
/********************************************************************//**
1004
Frees the buffer pool at shutdown. This must not be invoked before
1005
freeing all mutexes. */
1257
free one buffer pool instance */
1260
buf_pool_free_instance(
1261
/*===================*/
1262
buf_pool_t* buf_pool) /* in,own: buffer pool instance
1011
1265
buf_chunk_t* chunk;
1012
1266
buf_chunk_t* chunks;
1023
1277
mem_free(buf_pool->chunks);
1024
1278
hash_table_free(buf_pool->page_hash);
1025
1279
hash_table_free(buf_pool->zip_hash);
1282
/********************************************************************//**
1283
Creates the buffer pool.
1284
@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
1289
ulint total_size, /*!< in: size of the total pool in bytes */
1290
ulint n_instances) /*!< in: number of instances */
1293
const ulint size = total_size / n_instances;
1295
ut_ad(n_instances > 0);
1296
ut_ad(n_instances <= MAX_BUFFER_POOLS);
1297
ut_ad(n_instances == srv_buf_pool_instances);
1299
/* We create an extra buffer pool instance, this instance is used
1300
for flushing the flush lists, to keep track of n_flush for all
1301
the buffer pools and also used as a waiting object during flushing. */
1302
void *buf_pool_void_ptr= mem_zalloc(n_instances * sizeof *buf_pool_ptr);
1303
buf_pool_ptr = static_cast<buf_pool_struct *>(buf_pool_void_ptr);
1305
for (i = 0; i < n_instances; i++) {
1306
buf_pool_t* ptr = &buf_pool_ptr[i];
1308
if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) {
1310
/* Free all the instances created so far. */
1317
buf_pool_set_sizes();
1318
buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
1320
btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
1325
/********************************************************************//**
1326
Frees the buffer pool at shutdown. This must not be invoked before
1327
freeing all mutexes. */
1332
ulint n_instances) /*!< in: numbere of instances to free */
1336
for (i = 0; i < n_instances; i++) {
1337
buf_pool_free_instance(buf_pool_from_array(i));
1340
mem_free(buf_pool_ptr);
1341
buf_pool_ptr = NULL;
1344
/********************************************************************//**
1345
Drops adaptive hash index for a buffer pool instance. */
1348
buf_pool_drop_hash_index_instance(
1349
/*==============================*/
1350
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1351
ibool* released_search_latch) /*!< out: flag for signalling
1352
whether the search latch was
1355
buf_chunk_t* chunks = buf_pool->chunks;
1356
buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
1358
while (--chunk >= chunks) {
1360
buf_block_t* block = chunk->blocks;
1362
for (i = chunk->size; i--; block++) {
1363
/* block->is_hashed cannot be modified
1364
when we have an x-latch on btr_search_latch;
1365
see the comment in buf0buf.h */
1367
if (!block->is_hashed) {
1371
/* To follow the latching order, we
1372
have to release btr_search_latch
1373
before acquiring block->latch. */
1374
rw_lock_x_unlock(&btr_search_latch);
1375
/* When we release the search latch,
1376
we must rescan all blocks, because
1377
some may become hashed again. */
1378
*released_search_latch = TRUE;
1380
rw_lock_x_lock(&block->lock);
1382
/* This should be guaranteed by the
1383
callers, which will be holding
1384
btr_search_enabled_mutex. */
1385
ut_ad(!btr_search_enabled);
1387
/* Because we did not buffer-fix the
1388
block by calling buf_block_get_gen(),
1389
it is possible that the block has been
1390
allocated for some other use after
1391
btr_search_latch was released above.
1392
We do not care which file page the
1393
block is mapped to. All we want to do
1394
is to drop any hash entries referring
1397
/* It is possible that
1398
block->page.state != BUF_FILE_PAGE.
1399
Even that does not matter, because
1400
btr_search_drop_page_hash_index() will
1401
check block->is_hashed before doing
1402
anything. block->is_hashed can only
1403
be set on uncompressed file pages. */
1405
btr_search_drop_page_hash_index(block);
1407
rw_lock_x_unlock(&block->lock);
1409
rw_lock_x_lock(&btr_search_latch);
1411
ut_ad(!btr_search_enabled);
1030
1416
/********************************************************************//**
1031
1417
Drops the adaptive hash index. To prevent a livelock, this function
1032
1418
is only to be called while holding btr_search_latch and while
1044
1430
ut_ad(!btr_search_enabled);
1047
buf_chunk_t* chunks = buf_pool->chunks;
1048
buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
1050
1435
released_search_latch = FALSE;
1052
while (--chunk >= chunks) {
1053
buf_block_t* block = chunk->blocks;
1054
ulint i = chunk->size;
1056
for (; i--; block++) {
1057
/* block->is_hashed cannot be modified
1058
when we have an x-latch on btr_search_latch;
1059
see the comment in buf0buf.h */
1061
if (!block->is_hashed) {
1065
/* To follow the latching order, we
1066
have to release btr_search_latch
1067
before acquiring block->latch. */
1068
rw_lock_x_unlock(&btr_search_latch);
1069
/* When we release the search latch,
1070
we must rescan all blocks, because
1071
some may become hashed again. */
1072
released_search_latch = TRUE;
1074
rw_lock_x_lock(&block->lock);
1076
/* This should be guaranteed by the
1077
callers, which will be holding
1078
btr_search_enabled_mutex. */
1079
ut_ad(!btr_search_enabled);
1081
/* Because we did not buffer-fix the
1082
block by calling buf_block_get_gen(),
1083
it is possible that the block has been
1084
allocated for some other use after
1085
btr_search_latch was released above.
1086
We do not care which file page the
1087
block is mapped to. All we want to do
1088
is to drop any hash entries referring
1091
/* It is possible that
1092
block->page.state != BUF_FILE_PAGE.
1093
Even that does not matter, because
1094
btr_search_drop_page_hash_index() will
1095
check block->is_hashed before doing
1096
anything. block->is_hashed can only
1097
be set on uncompressed file pages. */
1099
btr_search_drop_page_hash_index(block);
1101
rw_lock_x_unlock(&block->lock);
1103
rw_lock_x_lock(&btr_search_latch);
1105
ut_ad(!btr_search_enabled);
1437
for (i = 0; i < srv_buf_pool_instances; i++) {
1438
buf_pool_t* buf_pool;
1440
buf_pool = buf_pool_from_array(i);
1442
buf_pool_drop_hash_index_instance(
1443
buf_pool, &released_search_latch);
1108
1446
} while (released_search_latch);
1335
1675
goto shrink_again;
1339
srv_buf_pool_old_size = srv_buf_pool_size;
1680
buf_pool->old_pool_size = buf_pool->curr_pool_size;
1341
buf_pool_mutex_exit();
1682
buf_pool_mutex_exit(buf_pool);
1342
1683
btr_search_enable();
1345
1686
/********************************************************************//**
1346
Rebuild buf_pool->page_hash. */
1349
buf_pool_page_hash_rebuild(void)
1350
/*============================*/
1687
Shrinks the buffer pool. */
1692
ulint chunk_size) /*!< in: number of pages to remove */
1696
for (i = 0; i < srv_buf_pool_instances; i++) {
1697
buf_pool_t* buf_pool;
1698
ulint instance_chunk_size;
1700
instance_chunk_size = chunk_size / srv_buf_pool_instances;
1701
buf_pool = buf_pool_from_array(i);
1702
buf_pool_shrink_instance(buf_pool, instance_chunk_size);
1705
buf_pool_set_sizes();
1708
/********************************************************************//**
1709
Rebuild buf_pool->page_hash for a buffer pool instance. */
1712
buf_pool_page_hash_rebuild_instance(
1713
/*================================*/
1714
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
1353
1719
ulint n_chunks;
1720
hash_table_t* zip_hash;
1355
1721
hash_table_t* page_hash;
1356
hash_table_t* zip_hash;
1359
buf_pool_mutex_enter();
1723
buf_pool_mutex_enter(buf_pool);
1361
1725
/* Free, create, and populate the hash table. */
1362
1726
hash_table_free(buf_pool->page_hash);
1439
buf_pool_mutex_exit();
1804
buf_flush_list_mutex_exit(buf_pool);
1805
buf_pool_mutex_exit(buf_pool);
1808
/********************************************************************
1809
Determine if a block is a sentinel for a buffer pool watch.
1810
@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
1813
buf_pool_watch_is_sentinel(
1814
/*=======================*/
1815
buf_pool_t* buf_pool, /*!< buffer pool instance */
1816
const buf_page_t* bpage) /*!< in: block */
1818
ut_ad(buf_page_in_file(bpage));
1820
if (bpage < &buf_pool->watch[0]
1821
|| bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
1823
ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_PAGE
1824
|| bpage->zip.data != NULL);
1829
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
1830
ut_ad(!bpage->in_zip_hash);
1831
ut_ad(bpage->in_page_hash);
1832
ut_ad(bpage->zip.data == NULL);
1833
ut_ad(bpage->buf_fix_count > 0);
1837
/****************************************************************//**
1838
Add watch for the given page to be read in. Caller must have the buffer pool
1840
@return NULL if watch set, block if the page is in the buffer pool */
1845
ulint space, /*!< in: space id */
1846
ulint offset, /*!< in: page number */
1847
ulint fold) /*!< in: buf_page_address_fold(space, offset) */
1851
buf_pool_t* buf_pool = buf_pool_get(space, offset);
1853
ut_ad(buf_pool_mutex_own(buf_pool));
1855
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
1857
if (UNIV_LIKELY_NULL(bpage)) {
1858
if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
1859
/* The page was loaded meanwhile. */
1862
/* Add to an existing watch. */
1863
bpage->buf_fix_count++;
1867
for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
1868
bpage = &buf_pool->watch[i];
1870
ut_ad(bpage->access_time == 0);
1871
ut_ad(bpage->newest_modification == 0);
1872
ut_ad(bpage->oldest_modification == 0);
1873
ut_ad(bpage->zip.data == NULL);
1874
ut_ad(!bpage->in_zip_hash);
1876
switch (bpage->state) {
1877
case BUF_BLOCK_POOL_WATCH:
1878
ut_ad(!bpage->in_page_hash);
1879
ut_ad(bpage->buf_fix_count == 0);
1881
/* bpage is pointing to buf_pool->watch[],
1882
which is protected by buf_pool->mutex.
1883
Normally, buf_page_t objects are protected by
1884
buf_block_t::mutex or buf_pool->zip_mutex or both. */
1886
bpage->state = BUF_BLOCK_ZIP_PAGE;
1887
bpage->space = space;
1888
bpage->offset = offset;
1889
bpage->buf_fix_count = 1;
1891
ut_d(bpage->in_page_hash = TRUE);
1892
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
1895
case BUF_BLOCK_ZIP_PAGE:
1896
ut_ad(bpage->in_page_hash);
1897
ut_ad(bpage->buf_fix_count > 0);
1904
/* Allocation failed. Either the maximum number of purge
1905
threads should never exceed BUF_POOL_WATCH_SIZE, or this code
1906
should be modified to return a special non-NULL value and the
1907
caller should purge the record directly. */
1910
/* Fix compiler warning */
1914
/********************************************************************//**
1915
Rebuild buf_pool->page_hash. */
1918
buf_pool_page_hash_rebuild(void)
1919
/*============================*/
1923
for (i = 0; i < srv_buf_pool_instances; i++) {
1924
buf_pool_page_hash_rebuild_instance(buf_pool_from_array(i));
1928
/********************************************************************//**
1929
Increase the buffer pool size of one buffer pool instance. */
1932
buf_pool_increase_instance(
1933
/*=======================*/
1934
buf_pool_t* buf_pool, /*!< in: buffer pool instane */
1935
ulint change_size) /*!< in: new size of the pool */
1937
buf_chunk_t* chunks;
1940
buf_pool_mutex_enter(buf_pool);
1941
chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks));
1943
memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks * sizeof *chunks);
1945
chunk = &chunks[buf_pool->n_chunks];
1947
if (!buf_chunk_init(buf_pool, chunk, change_size)) {
1950
buf_pool->old_pool_size = buf_pool->curr_pool_size;
1951
buf_pool->curr_size += chunk->size;
1952
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1953
mem_free(buf_pool->chunks);
1954
buf_pool->chunks = chunks;
1955
buf_pool->n_chunks++;
1958
buf_pool_mutex_exit(buf_pool);
1961
/********************************************************************//**
1962
Increase the buffer pool size. */
1971
for (i = 0; i < srv_buf_pool_instances; i++) {
1972
buf_pool_increase_instance(
1973
buf_pool_from_array(i),
1974
change_size / srv_buf_pool_instances);
1977
buf_pool_set_sizes();
1442
1980
/********************************************************************//**
1446
1984
buf_pool_resize(void)
1447
1985
/*=================*/
1449
buf_pool_mutex_enter();
1451
if (srv_buf_pool_old_size == srv_buf_pool_size) {
1453
buf_pool_mutex_exit();
1988
ulint min_change_size = 1048576 * srv_buf_pool_instances;
1990
buf_pool_mutex_enter_all();
1992
if (srv_buf_pool_old_size == srv_buf_pool_size) {
1994
buf_pool_mutex_exit_all();
1998
} else if (srv_buf_pool_curr_size + min_change_size
1999
> srv_buf_pool_size) {
2001
change_size = (srv_buf_pool_curr_size - srv_buf_pool_size)
2004
buf_pool_mutex_exit_all();
2006
/* Disable adaptive hash indexes and empty the index
2007
in order to free up memory in the buffer pool chunks. */
2008
buf_pool_shrink(change_size);
2010
} else if (srv_buf_pool_curr_size + min_change_size
2011
< srv_buf_pool_size) {
2013
/* Enlarge the buffer pool by at least one megabyte */
2015
change_size = srv_buf_pool_size - srv_buf_pool_curr_size;
2017
buf_pool_mutex_exit_all();
2019
buf_pool_increase(change_size);
2021
srv_buf_pool_size = srv_buf_pool_old_size;
2023
buf_pool_mutex_exit_all();
1457
if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
1459
buf_pool_mutex_exit();
1461
/* Disable adaptive hash indexes and empty the index
1462
in order to free up memory in the buffer pool chunks. */
1463
buf_pool_shrink((srv_buf_pool_curr_size - srv_buf_pool_size)
1465
} else if (srv_buf_pool_curr_size + 1048576 < srv_buf_pool_size) {
1467
/* Enlarge the buffer pool by at least one megabyte */
1470
= srv_buf_pool_size - srv_buf_pool_curr_size;
1471
buf_chunk_t* chunks;
1474
chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
1476
memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks
1479
chunk = &chunks[buf_pool->n_chunks];
1481
if (!buf_chunk_init(chunk, mem_size)) {
1484
buf_pool->curr_size += chunk->size;
1485
srv_buf_pool_curr_size = buf_pool->curr_size
1487
mem_free(buf_pool->chunks);
1488
buf_pool->chunks = chunks;
1489
buf_pool->n_chunks++;
2028
buf_pool_page_hash_rebuild();
2031
/****************************************************************//**
2032
Remove the sentinel block for the watch before replacing it with a real block.
2033
buf_page_watch_clear() or buf_page_watch_occurred() will notice that
2034
the block has been replaced with the real block.
2035
@return reference count, to be added to the replacement block */
2038
buf_pool_watch_remove(
2039
/*==================*/
2040
buf_pool_t* buf_pool, /*!< buffer pool instance */
2041
ulint fold, /*!< in: buf_page_address_fold(
2043
buf_page_t* watch) /*!< in/out: sentinel for watch */
2045
ut_ad(buf_pool_mutex_own(buf_pool));
2047
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
2048
ut_d(watch->in_page_hash = FALSE);
2049
watch->buf_fix_count = 0;
2050
watch->state = BUF_BLOCK_POOL_WATCH;
2053
/****************************************************************//**
2054
Stop watching if the page has been read in.
2055
buf_pool_watch_set(space,offset) must have returned NULL before. */
2058
buf_pool_watch_unset(
2059
/*=================*/
2060
ulint space, /*!< in: space id */
2061
ulint offset) /*!< in: page number */
2064
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2065
ulint fold = buf_page_address_fold(space, offset);
2067
buf_pool_mutex_enter(buf_pool);
2068
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2069
/* The page must exist because buf_pool_watch_set()
2070
increments buf_fix_count. */
2073
if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
2074
mutex_t* mutex = buf_page_get_mutex(bpage);
2077
ut_a(bpage->buf_fix_count > 0);
2078
bpage->buf_fix_count--;
2081
ut_a(bpage->buf_fix_count > 0);
2083
if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
2084
buf_pool_watch_remove(buf_pool, fold, bpage);
1492
srv_buf_pool_old_size = srv_buf_pool_size;
1493
buf_pool_mutex_exit();
1496
buf_pool_page_hash_rebuild();
2088
buf_pool_mutex_exit(buf_pool);
2091
/****************************************************************//**
2092
Check if the page has been read in.
2093
This may only be called after buf_pool_watch_set(space,offset)
2094
has returned NULL and before invoking buf_pool_watch_unset(space,offset).
2095
@return FALSE if the given page was not read in, TRUE if it was */
2098
buf_pool_watch_occurred(
2099
/*====================*/
2100
ulint space, /*!< in: space id */
2101
ulint offset) /*!< in: page number */
2105
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2106
ulint fold = buf_page_address_fold(space, offset);
2108
buf_pool_mutex_enter(buf_pool);
2110
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2111
/* The page must exist because buf_pool_watch_set()
2112
increments buf_fix_count. */
2114
ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
2115
buf_pool_mutex_exit(buf_pool);
1499
2120
/********************************************************************//**
1994
2659
/********************************************************************//**
2660
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
2661
the buf_block_t itself or a member of it
2662
@return TRUE if ptr belongs to a buf_block_t struct */
2665
buf_pointer_is_block_field(
2666
/*=======================*/
2667
const void* ptr) /*!< in: pointer not dereferenced */
2671
for (i = 0; i < srv_buf_pool_instances; i++) {
2674
found = buf_pointer_is_block_field_instance(
2675
buf_pool_from_array(i), ptr);
2684
/********************************************************************//**
1995
2685
Find out if a buffer block was created by buf_chunk_init().
1996
2686
@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
1999
2689
buf_block_is_uncompressed(
2000
2690
/*======================*/
2001
const buf_block_t* block) /*!< in: pointer to block,
2691
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2692
const buf_block_t* block) /*!< in: pointer to block,
2004
ut_ad(buf_pool_mutex_own());
2695
ut_ad(buf_pool_mutex_own(buf_pool));
2006
2697
if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
2007
2698
/* The pointer should be aligned. */
2011
return(buf_pointer_is_block_field((void *)block));
2702
return(buf_pointer_is_block_field_instance(buf_pool, (void *)block));
2014
2705
/********************************************************************//**
2025
2716
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
2026
2717
buf_block_t* guess, /*!< in: guessed block or NULL */
2027
2718
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
2719
BUF_GET_NO_LATCH, or
2720
BUF_GET_IF_IN_POOL_OR_WATCH */
2029
2721
const char* file, /*!< in: file name */
2030
2722
ulint line, /*!< in: line where called */
2031
2723
mtr_t* mtr) /*!< in: mini-transaction */
2033
2725
buf_block_t* block;
2034
2727
unsigned access_time;
2035
2728
ulint fix_type;
2036
2729
ibool must_read;
2731
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2734
ut_ad(mtr->state == MTR_ACTIVE);
2039
2735
ut_ad((rw_latch == RW_S_LATCH)
2040
2736
|| (rw_latch == RW_X_LATCH)
2041
2737
|| (rw_latch == RW_NO_LATCH));
2042
2738
ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
2043
ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
2044
|| (mode == BUF_GET_NO_LATCH));
2739
ut_ad(mode == BUF_GET
2740
|| mode == BUF_GET_IF_IN_POOL
2741
|| mode == BUF_GET_NO_LATCH
2742
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH);
2045
2743
ut_ad(zip_size == fil_space_get_zip_size(space));
2046
2744
ut_ad(ut_is_2pow(zip_size));
2047
2745
#ifndef UNIV_LOG_DEBUG
2048
2746
ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
2050
2748
buf_pool->stat.n_page_gets++;
2749
fold = buf_page_address_fold(space, offset);
2053
buf_pool_mutex_enter();
2752
buf_pool_mutex_enter(buf_pool);
2056
2755
/* If the guess is a compressed page descriptor that
2076
2775
if (block == NULL) {
2077
block = (buf_block_t*) buf_page_hash_get(space, offset);
2776
block = (buf_block_t*) buf_page_hash_get_low(
2777
buf_pool, space, offset, fold);
2781
if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
2081
2785
if (block == NULL) {
2082
2786
/* Page not in buf_pool: needs to be read from file */
2084
buf_pool_mutex_exit();
2086
if (mode == BUF_GET_IF_IN_POOL) {
2788
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2789
block = (buf_block_t*) buf_pool_watch_set(
2790
space, offset, fold);
2792
if (UNIV_LIKELY_NULL(block)) {
2798
buf_pool_mutex_exit(buf_pool);
2800
if (mode == BUF_GET_IF_IN_POOL
2801
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2091
buf_read_page(space, zip_size, offset);
2806
if (buf_read_page(space, zip_size, offset)) {
2808
} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
2811
fprintf(stderr, "InnoDB: Error: Unable"
2812
" to read tablespace %lu page no"
2813
" %lu into the buffer pool after"
2815
"InnoDB: The most probable cause"
2816
" of this error may be that the"
2817
" table has been corrupted.\n"
2818
"InnoDB: You can try to fix this"
2820
" innodb_force_recovery.\n"
2821
"InnoDB: Please see reference manual"
2822
" for more details.\n"
2823
"InnoDB: Aborting...\n",
2825
BUF_PAGE_READ_MAX_RETRIES);
2093
2830
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2094
2831
ut_a(++buf_dbg_counter % 37 || buf_validate());
2118
2859
case BUF_BLOCK_ZIP_DIRTY:
2119
2860
bpage = &block->page;
2120
2861
/* Protect bpage->buf_fix_count. */
2121
mutex_enter(&buf_pool_zip_mutex);
2862
mutex_enter(&buf_pool->zip_mutex);
2123
2864
if (bpage->buf_fix_count
2124
2865
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
2125
2866
/* This condition often occurs when the buffer
2126
2867
is not buffer-fixed, but I/O-fixed by
2127
2868
buf_page_init_for_read(). */
2128
mutex_exit(&buf_pool_zip_mutex);
2869
mutex_exit(&buf_pool->zip_mutex);
2129
2870
wait_until_unfixed:
2130
2871
/* The block is buffer-fixed or I/O-fixed.
2131
2872
Try again later. */
2132
buf_pool_mutex_exit();
2873
buf_pool_mutex_exit(buf_pool);
2133
2874
os_thread_sleep(WAIT_FOR_READ);
2138
2879
/* Allocate an uncompressed page. */
2139
buf_pool_mutex_exit();
2140
mutex_exit(&buf_pool_zip_mutex);
2880
buf_pool_mutex_exit(buf_pool);
2881
mutex_exit(&buf_pool->zip_mutex);
2142
block = buf_LRU_get_free_block(0);
2883
block = buf_LRU_get_free_block(buf_pool, 0);
2145
buf_pool_mutex_enter();
2886
buf_pool_mutex_enter(buf_pool);
2146
2887
mutex_enter(&block->mutex);
2149
buf_page_t* hash_bpage
2150
= buf_page_hash_get(space, offset);
2890
buf_page_t* hash_bpage;
2892
hash_bpage = buf_page_hash_get_low(
2893
buf_pool, space, offset, fold);
2152
2895
if (UNIV_UNLIKELY(bpage != hash_bpage)) {
2153
2896
/* The buf_pool->page_hash was modified
2154
while buf_pool_mutex was released.
2897
while buf_pool->mutex was released.
2155
2898
Free the block that was allocated. */
2157
2900
buf_LRU_block_free_non_file_page(block);
2225
2954
block->page.buf_fix_count = 1;
2226
2955
buf_block_set_io_fix(block, BUF_IO_READ);
2227
rw_lock_x_lock(&block->lock);
2956
rw_lock_x_lock_func(&block->lock, 0, file, line);
2958
UNIV_MEM_INVALID(bpage, sizeof *bpage);
2228
2960
mutex_exit(&block->mutex);
2229
mutex_exit(&buf_pool_zip_mutex);
2961
mutex_exit(&buf_pool->zip_mutex);
2230
2962
buf_pool->n_pend_unzip++;
2232
buf_buddy_free(bpage, sizeof *bpage);
2964
buf_buddy_free(buf_pool, bpage, sizeof *bpage);
2234
buf_pool_mutex_exit();
2966
buf_pool_mutex_exit(buf_pool);
2236
2968
/* Decompress the page and apply buffered operations
2237
while not holding buf_pool_mutex or block->mutex. */
2969
while not holding buf_pool->mutex or block->mutex. */
2238
2970
success = buf_zip_decompress(block, srv_use_checksums);
2240
if (UNIV_LIKELY(success)) {
2973
if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
2241
2974
ibuf_merge_or_delete_for_page(block, space, offset,
2242
2975
zip_size, TRUE);
2245
2978
/* Unfix and unlatch the block. */
2246
buf_pool_mutex_enter();
2979
buf_pool_mutex_enter(buf_pool);
2247
2980
mutex_enter(&block->mutex);
2248
2981
block->page.buf_fix_count--;
2249
2982
buf_block_set_io_fix(block, BUF_IO_NONE);
2271
2998
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2273
3000
mutex_enter(&block->mutex);
3001
#if UNIV_WORD_SIZE == 4
3002
/* On 32-bit systems, there is no padding in buf_page_t. On
3003
other systems, Valgrind could complain about uninitialized pad
2274
3005
UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
3007
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
3008
if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)
3010
/* Try to evict the block from the buffer pool, to use the
3011
insert buffer (change buffer) as much as possible. */
3013
if (buf_LRU_free_block(&block->page, TRUE, NULL)
3015
mutex_exit(&block->mutex);
3016
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
3017
/* Set the watch, as it would have
3018
been set if the page were not in the
3019
buffer pool in the first place. */
3020
block = (buf_block_t*) buf_pool_watch_set(
3021
space, offset, fold);
3023
if (UNIV_LIKELY_NULL(block)) {
3025
/* The page entered the buffer
3026
pool for some reason. Try to
3031
buf_pool_mutex_exit(buf_pool);
3033
"innodb_change_buffering_debug evict %u %u\n",
3034
(unsigned) space, (unsigned) offset);
3036
} else if (buf_flush_page_try(buf_pool, block)) {
3038
"innodb_change_buffering_debug flush %u %u\n",
3039
(unsigned) space, (unsigned) offset);
3044
/* Failed to evict the page; change it directly */
3046
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
2276
3048
buf_block_buf_fix_inc(block, file, line);
2696
3484
buf_block_init_low(block);
2698
block->lock_hash_val = lock_rec_hash(space, offset);
3486
block->lock_hash_val = lock_rec_hash(space, offset);
3488
buf_page_init_low(&block->page);
2700
3490
/* Insert into the hash table of file pages */
2702
hash_page = buf_page_hash_get(space, offset);
2704
if (UNIV_LIKELY_NULL(hash_page)) {
3492
hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3494
if (UNIV_LIKELY(!hash_page)) {
3495
} else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
3496
/* Preserve the reference count. */
3497
ulint buf_fix_count = hash_page->buf_fix_count;
3499
ut_a(buf_fix_count > 0);
3500
block->page.buf_fix_count += buf_fix_count;
3501
buf_pool_watch_remove(buf_pool, fold, hash_page);
2705
3503
fprintf(stderr,
2706
3504
"InnoDB: Error: page %lu %lu already found"
2707
3505
" in the hash table: %p, %p\n",
2836
3644
if (UNIV_UNLIKELY(zip_size)) {
2837
3645
page_zip_set_size(&block->page.zip, zip_size);
2839
/* buf_pool_mutex may be released and
3647
/* buf_pool->mutex may be released and
2840
3648
reacquired by buf_buddy_alloc(). Thus, we
2841
3649
must release block->mutex in order not to
2842
3650
break the latching order in the reacquisition
2843
of buf_pool_mutex. We also must defer this
3651
of buf_pool->mutex. We also must defer this
2844
3652
operation until after the block descriptor has
2845
3653
been added to buf_pool->LRU and
2846
3654
buf_pool->page_hash. */
2847
3655
mutex_exit(&block->mutex);
2848
data = buf_buddy_alloc(zip_size, &lru);
3656
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
2849
3657
mutex_enter(&block->mutex);
2850
block->page.zip.data = data;
3658
block->page.zip.data = static_cast<unsigned char *>(data);
2852
3660
/* To maintain the invariant
2853
3661
block->in_unzip_LRU_list
2869
3677
control block (bpage), in order to avoid the
2870
3678
invocation of buf_buddy_relocate_block() on
2871
3679
uninitialized data. */
2872
data = buf_buddy_alloc(zip_size, &lru);
2873
bpage = buf_buddy_alloc(sizeof *bpage, &lru);
3680
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3681
bpage = static_cast<buf_page_struct *>(buf_buddy_alloc(buf_pool, sizeof *bpage, &lru));
3683
/* Initialize the buf_pool pointer. */
3684
bpage->buf_pool_index = buf_pool_index(buf_pool);
2875
3686
/* If buf_buddy_alloc() allocated storage from the LRU list,
2876
it released and reacquired buf_pool_mutex. Thus, we must
3687
it released and reacquired buf_pool->mutex. Thus, we must
2877
3688
check the page_hash again, as it may have been modified. */
2878
if (UNIV_UNLIKELY(lru)
2879
&& UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
2881
/* The block was added by some other thread. */
2882
buf_buddy_free(bpage, sizeof *bpage);
2883
buf_buddy_free(data, zip_size);
3689
if (UNIV_UNLIKELY(lru)) {
3691
watch_page = buf_page_hash_get_low(
3692
buf_pool, space, offset, fold);
3695
&& !buf_pool_watch_is_sentinel(buf_pool,
3698
/* The block was added by some other thread. */
3700
buf_buddy_free(buf_pool, bpage, sizeof *bpage);
3701
buf_buddy_free(buf_pool, data, zip_size);
2889
3708
page_zip_des_init(&bpage->zip);
2890
3709
page_zip_set_size(&bpage->zip, zip_size);
2891
bpage->zip.data = data;
3710
bpage->zip.data = static_cast<unsigned char *>(data);
2893
mutex_enter(&buf_pool_zip_mutex);
3712
mutex_enter(&buf_pool->zip_mutex);
2894
3713
UNIV_MEM_DESC(bpage->zip.data,
2895
3714
page_zip_get_size(&bpage->zip), bpage);
2896
3716
buf_page_init_low(bpage);
2897
3718
bpage->state = BUF_BLOCK_ZIP_PAGE;
2898
3719
bpage->space = space;
2899
3720
bpage->offset = offset;
2901
3723
#ifdef UNIV_DEBUG
2902
3724
bpage->in_page_hash = FALSE;
2903
3725
bpage->in_zip_hash = FALSE;
2907
3729
#endif /* UNIV_DEBUG */
2909
3731
ut_d(bpage->in_page_hash = TRUE);
2910
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
2911
buf_page_address_fold(space, offset), bpage);
3733
if (UNIV_LIKELY_NULL(watch_page)) {
3734
/* Preserve the reference count. */
3735
ulint buf_fix_count = watch_page->buf_fix_count;
3736
ut_a(buf_fix_count > 0);
3737
bpage->buf_fix_count += buf_fix_count;
3738
ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
3739
buf_pool_watch_remove(buf_pool, fold, watch_page);
3742
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
2913
3745
/* The block must be put to the LRU list, to the old blocks */
2914
3746
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
2951
3783
buf_frame_t* frame;
2952
3784
buf_block_t* block;
2953
3786
buf_block_t* free_block = NULL;
2954
3787
ulint time_ms = ut_time_ms();
3788
buf_pool_t* buf_pool = buf_pool_get(space, offset);
3791
ut_ad(mtr->state == MTR_ACTIVE);
2957
3792
ut_ad(space || !zip_size);
2959
free_block = buf_LRU_get_free_block(0);
2961
buf_pool_mutex_enter();
2963
block = (buf_block_t*) buf_page_hash_get(space, offset);
2965
if (block && buf_page_in_file(&block->page)) {
3794
free_block = buf_LRU_get_free_block(buf_pool, 0);
3796
fold = buf_page_address_fold(space, offset);
3798
buf_pool_mutex_enter(buf_pool);
3800
block = (buf_block_t*) buf_page_hash_get_low(
3801
buf_pool, space, offset, fold);
3804
&& buf_page_in_file(&block->page)
3805
&& !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
2966
3806
#ifdef UNIV_IBUF_COUNT_DEBUG
2967
3807
ut_a(ibuf_count_get(space, offset) == 0);
3014
3854
page_zip_set_size(&block->page.zip, zip_size);
3015
3855
mutex_exit(&block->mutex);
3016
/* buf_pool_mutex may be released and reacquired by
3856
/* buf_pool->mutex may be released and reacquired by
3017
3857
buf_buddy_alloc(). Thus, we must release block->mutex
3018
3858
in order not to break the latching order in
3019
the reacquisition of buf_pool_mutex. We also must
3859
the reacquisition of buf_pool->mutex. We also must
3020
3860
defer this operation until after the block descriptor
3021
3861
has been added to buf_pool->LRU and buf_pool->page_hash. */
3022
data = buf_buddy_alloc(zip_size, &lru);
3862
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3023
3863
mutex_enter(&block->mutex);
3024
block->page.zip.data = data;
3864
block->page.zip.data = static_cast<unsigned char *>(data);
3026
3866
/* To maintain the invariant
3027
3867
block->in_unzip_LRU_list
3284
4125
#endif /* UNIV_DEBUG */
3286
4127
mutex_exit(buf_page_get_mutex(bpage));
3287
buf_pool_mutex_exit();
3290
/*********************************************************************//**
3291
Invalidates the file pages in the buffer pool when an archive recovery is
3292
completed. All the file pages buffered must be in a replaceable state when
3293
this function is called: not latched and not modified. */
4128
buf_pool_mutex_exit(buf_pool);
4131
/*********************************************************************//**
4132
Asserts that all file pages in the buffer are in a replaceable state.
4136
buf_all_freed_instance(
4137
/*===================*/
4138
buf_pool_t* buf_pool) /*!< in: buffer pool instancce */
4145
buf_pool_mutex_enter(buf_pool);
4147
chunk = buf_pool->chunks;
4149
for (i = buf_pool->n_chunks; i--; chunk++) {
4151
const buf_block_t* block = buf_chunk_not_freed(chunk);
4153
if (UNIV_LIKELY_NULL(block)) {
4155
"Page %lu %lu still fixed or dirty\n",
4156
(ulong) block->page.space,
4157
(ulong) block->page.offset);
4162
buf_pool_mutex_exit(buf_pool);
4167
/*********************************************************************//**
4168
Invalidates file pages in one buffer pool instance */
3296
buf_pool_invalidate(void)
3297
/*=====================*/
4171
buf_pool_invalidate_instance(
4172
/*=========================*/
4173
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
3302
buf_pool_mutex_enter();
4178
buf_pool_mutex_enter(buf_pool);
3304
4180
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
3342
4218
buf_pool->LRU_flush_ended = 0;
3344
4220
memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
3345
buf_refresh_io_stats();
3347
buf_pool_mutex_exit();
4221
buf_refresh_io_stats(buf_pool);
4223
buf_pool_mutex_exit(buf_pool);
4226
/*********************************************************************//**
4227
Invalidates the file pages in the buffer pool when an archive recovery is
4228
completed. All the file pages buffered must be in a replaceable state when
4229
this function is called: not latched and not modified. */
4232
buf_pool_invalidate(void)
4233
/*=====================*/
4237
for (i = 0; i < srv_buf_pool_instances; i++) {
4238
buf_pool_invalidate_instance(buf_pool_from_array(i));
3350
4242
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3351
4243
/*********************************************************************//**
3352
Validates the buffer buf_pool data structure.
4244
Validates data in one buffer pool instance
3353
4245
@return TRUE */
4248
buf_pool_validate_instance(
4249
/*=======================*/
4250
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
3360
4253
buf_chunk_t* chunk;
4377
/* It is OK to read oldest_modification here because
4378
we have acquired buf_pool->zip_mutex above which acts
4379
as the 'block->mutex' for these bpages. */
3487
4380
ut_a(!b->oldest_modification);
3488
ut_a(buf_page_hash_get(b->space, b->offset) == b);
4381
ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
3494
/* Check dirty compressed-only blocks. */
4387
/* Check dirty blocks. */
4389
buf_flush_list_mutex_enter(buf_pool);
3496
4390
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
3497
4391
b = UT_LIST_GET_NEXT(list, b)) {
3498
4392
ut_ad(b->in_flush_list);
4393
ut_a(b->oldest_modification);
3500
4396
switch (buf_page_get_state(b)) {
3501
4397
case BUF_BLOCK_ZIP_DIRTY:
3502
ut_a(b->oldest_modification);
3506
4400
switch (buf_page_get_io_fix(b)) {
3507
4401
case BUF_IO_NONE:
3508
4402
case BUF_IO_READ:
3511
4404
case BUF_IO_WRITE:
3512
4405
switch (buf_page_get_flush_type(b)) {
3513
4406
case BUF_FLUSH_LRU:
3556
4453
(ulong) n_free);
3559
ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
3561
4457
ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
3562
4458
ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
3563
4459
ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
3565
buf_pool_mutex_exit();
4461
buf_pool_mutex_exit(buf_pool);
3567
4463
ut_a(buf_LRU_validate());
3568
ut_a(buf_flush_validate());
4464
ut_a(buf_flush_validate(buf_pool));
4469
/*********************************************************************//**
4470
Validates the buffer buf_pool data structure.
4479
for (i = 0; i < srv_buf_pool_instances; i++) {
4480
buf_pool_t* buf_pool;
4482
buf_pool = buf_pool_from_array(i);
4484
buf_pool_validate_instance(buf_pool);
3572
4489
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3574
4491
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3575
4492
/*********************************************************************//**
3576
Prints info of the buffer buf_pool data structure. */
4493
Prints info of the buffer buf_pool data structure for one instance. */
4498
buf_pool_t* buf_pool)
4500
index_id_t* index_ids;
3589
4507
buf_chunk_t* chunk;
3590
4508
dict_index_t* index;
3922
4902
"LRU len: %lu, unzip_LRU len: %lu\n"
3923
4903
"I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
3924
UT_LIST_GET_LEN(buf_pool->LRU),
3925
UT_LIST_GET_LEN(buf_pool->unzip_LRU),
4904
static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->LRU)),
4905
static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->unzip_LRU)),
3926
4906
buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
3927
4907
buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
3929
buf_refresh_io_stats();
3930
buf_pool_mutex_exit();
4909
buf_refresh_io_stats(buf_pool);
4910
buf_pool_mutex_exit(buf_pool);
4913
/*********************************************************************//**
4914
Prints info of the buffer i/o. */
4919
FILE* file) /*!< in/out: buffer where to print */
4923
for (i = 0; i < srv_buf_pool_instances; i++) {
4924
buf_pool_t* buf_pool;
4926
buf_pool = buf_pool_from_array(i);
4927
buf_print_io_instance(buf_pool, file);
3933
4931
/**********************************************************************//**
3934
4932
Refreshes the statistics used to print per-second averages. */
3937
buf_refresh_io_stats(void)
3938
/*======================*/
4935
buf_refresh_io_stats(
4936
/*=================*/
4937
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
3940
buf_pool->last_printout_time = time(NULL);
4939
buf_pool->last_printout_time = ut_time();
3941
4940
buf_pool->old_stat = buf_pool->stat;
3944
/*********************************************************************//**
3945
Asserts that all file pages in the buffer are in a replaceable state.
4943
/**********************************************************************//**
4944
Refreshes the statistics used to print per-second averages. */
4947
buf_refresh_io_stats_all(void)
4948
/*==========================*/
4952
for (i = 0; i < srv_buf_pool_instances; i++) {
4953
buf_pool_t* buf_pool;
4955
buf_pool = buf_pool_from_array(i);
4957
buf_refresh_io_stats(buf_pool);
4961
/**********************************************************************//**
4962
Check if all pages in all buffer pools are in a replacable state.
4963
@return FALSE if not */
3949
4966
buf_all_freed(void)
3950
4967
/*===============*/
3957
buf_pool_mutex_enter();
3959
chunk = buf_pool->chunks;
3961
for (i = buf_pool->n_chunks; i--; chunk++) {
3963
const buf_block_t* block = buf_chunk_not_freed(chunk);
3965
if (UNIV_LIKELY_NULL(block)) {
3967
"Page %lu %lu still fixed or dirty\n",
3968
(ulong) block->page.space,
3969
(ulong) block->page.offset);
4971
for (i = 0; i < srv_buf_pool_instances; i++) {
4972
buf_pool_t* buf_pool;
4974
buf_pool = buf_pool_from_array(i);
4976
if (!buf_all_freed_instance(buf_pool)) {
3974
buf_pool_mutex_exit();
3979
4984
/*********************************************************************//**
3980
4985
Checks that there currently are no pending i/o-operations for the buffer
3985
4990
buf_pool_check_no_pending_io(void)
3986
4991
/*==============================*/
3990
buf_pool_mutex_enter();
3992
if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
3993
+ buf_pool->n_flush[BUF_FLUSH_LIST]
3994
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
4996
buf_pool_mutex_enter_all();
4998
for (i = 0; i < srv_buf_pool_instances && ret; i++) {
4999
const buf_pool_t* buf_pool;
5001
buf_pool = buf_pool_from_array(i);
5003
if (buf_pool->n_pend_reads
5004
+ buf_pool->n_flush[BUF_FLUSH_LRU]
5005
+ buf_pool->n_flush[BUF_FLUSH_LIST]
5006
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
4000
buf_pool_mutex_exit();
5012
buf_pool_mutex_exit_all();
5018
Code currently not used
4005
5019
/*********************************************************************//**
4006
5020
Gets the current length of the free list of buffer blocks.
4007
5021
@return length of the free list */