86
84
The buffer buf_pool contains a single mutex which protects all the
87
85
control data structures of the buf_pool. The content of a buffer frame is
88
86
protected by a separate read-write lock in its control block, though.
89
These locks can be locked and unlocked without owning the buf_pool->mutex.
87
These locks can be locked and unlocked without owning the buf_pool mutex.
90
88
The OS events in the buf_pool struct can be waited for without owning the
93
The buf_pool->mutex is a hot-spot in main memory, causing a lot of
91
The buf_pool mutex is a hot-spot in main memory, causing a lot of
94
92
memory bus traffic on multiprocessor systems when processors
95
93
alternately access the mutex. On our Pentium, the mutex is accessed
96
94
maybe every 10 microseconds. We gave up the solution to have mutexes
97
95
for each control block, for instance, because it seemed to be
100
A solution to reduce mutex contention of the buf_pool->mutex is to
98
A solution to reduce mutex contention of the buf_pool mutex is to
101
99
create a separate mutex for the page hash table. On Pentium,
102
100
accessing the hash table takes 2 microseconds, about half
103
of the total buf_pool->mutex hold time.
101
of the total buf_pool mutex hold time.
155
153
which we can use when we want to artificially age a page in the
156
154
buf_pool. This is used if we know that some page is not needed
157
155
again for some time: we insert the block right after the pointer,
158
causing it to be replaced sooner than would normally be the case.
156
causing it to be replaced sooner than would noramlly be the case.
159
157
Currently this aging mechanism is used for read-ahead mechanism
160
158
of pages, and it can also be used when there is a scan of a full
161
159
table which cannot fit in the memory. Putting the pages near the
162
end of the LRU list, we make sure that most of the buf_pool stays
163
in the main memory, undisturbed.
160
of the LRU list, we make sure that most of the buf_pool stays in the
161
main memory, undisturbed.
165
163
The unzip_LRU list contains a subset of the common LRU list. The
166
164
blocks on the unzip_LRU list hold a compressed file page and the
245
242
#ifndef UNIV_HOTBACKUP
246
243
/** Value in microseconds */
247
244
static const int WAIT_FOR_READ = 5000;
248
/** Number of attemtps made to read in a page in the buffer pool */
249
static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
251
/** The buffer pools of the database */
252
UNIV_INTERN buf_pool_t* buf_pool_ptr;
246
/** The buffer buf_pool of the database */
247
UNIV_INTERN buf_pool_t* buf_pool = NULL;
249
/** mutex protecting the buffer pool struct and control blocks, except the
250
read-write lock in them */
251
UNIV_INTERN mutex_t buf_pool_mutex;
252
/** mutex protecting the control blocks of compressed-only pages
253
(of type buf_page_t, not buf_block_t) */
254
UNIV_INTERN mutex_t buf_pool_zip_mutex;
254
256
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
255
257
static ulint buf_dbg_counter = 0; /*!< This is used to insert validation
256
operations in execution in the
258
operations in excution in the
260
/** Flag to forbid the release of the buffer pool mutex.
261
Protected by buf_pool_mutex. */
262
UNIV_INTERN ulint buf_pool_mutex_exit_forbidden = 0;
258
263
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
259
264
#ifdef UNIV_DEBUG
260
265
/** If this is set TRUE, the program prints info whenever
262
267
UNIV_INTERN ibool buf_debug_prints = FALSE;
263
268
#endif /* UNIV_DEBUG */
265
#ifdef UNIV_PFS_RWLOCK
266
/* Keys to register buffer block related rwlocks and mutexes with
267
performance schema */
268
UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
269
# ifdef UNIV_SYNC_DEBUG
270
UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
271
# endif /* UNIV_SYNC_DEBUG */
272
#endif /* UNIV_PFS_RWLOCK */
274
#ifdef UNIV_PFS_MUTEX
275
UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
276
UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
277
UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
278
UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
279
#endif /* UNIV_PFS_MUTEX */
281
#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
282
# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
284
/* Buffer block mutexes and rwlocks can be registered
285
in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
286
is defined, register buffer block mutex and rwlock
287
in one group after their initialization. */
288
# define PFS_GROUP_BUFFER_SYNC
290
/* This define caps the number of mutexes/rwlocks can
291
be registered with performance schema. Developers can
292
modify this define if necessary. Please note, this would
293
be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
294
# define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER ULINT_MAX
296
# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
297
#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
299
270
/** A chunk of buffers. The buffer pool is allocated in chunks. */
300
271
struct buf_chunk_struct{
301
272
ulint mem_size; /*!< allocated size of the chunk */
307
278
#endif /* !UNIV_HOTBACKUP */
309
280
/********************************************************************//**
310
Gets the smallest oldest_modification lsn for any page in the pool. Returns
311
zero if all modified pages have been flushed to disk.
312
@return oldest modification in pool, zero if none */
315
buf_pool_get_oldest_modification(void)
316
/*==================================*/
321
ib_uint64_t oldest_lsn = 0;
323
/* When we traverse all the flush lists we don't want another
324
thread to add a dirty page to any flush list. */
325
log_flush_order_mutex_enter();
327
for (i = 0; i < srv_buf_pool_instances; i++) {
328
buf_pool_t* buf_pool;
330
buf_pool = buf_pool_from_array(i);
332
buf_flush_list_mutex_enter(buf_pool);
334
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
337
ut_ad(bpage->in_flush_list);
338
lsn = bpage->oldest_modification;
341
buf_flush_list_mutex_exit(buf_pool);
343
if (!oldest_lsn || oldest_lsn > lsn) {
348
log_flush_order_mutex_exit();
350
/* The returned answer may be out of date: the flush_list can
351
change after the mutex has been released. */
356
/********************************************************************//**
357
Get total buffer pool statistics. */
360
buf_get_total_list_len(
361
/*===================*/
362
ulint* LRU_len, /*!< out: length of all LRU lists */
363
ulint* free_len, /*!< out: length of all free lists */
364
ulint* flush_list_len) /*!< out: length of all flush lists */
372
for (i = 0; i < srv_buf_pool_instances; i++) {
373
buf_pool_t* buf_pool;
375
buf_pool = buf_pool_from_array(i);
376
*LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
377
*free_len += UT_LIST_GET_LEN(buf_pool->free);
378
*flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
382
/********************************************************************//**
383
Get total buffer pool statistics. */
388
buf_pool_stat_t* tot_stat) /*!< out: buffer pool stats */
392
memset(tot_stat, 0, sizeof(*tot_stat));
394
for (i = 0; i < srv_buf_pool_instances; i++) {
395
buf_pool_stat_t*buf_stat;
396
buf_pool_t* buf_pool;
398
buf_pool = buf_pool_from_array(i);
400
buf_stat = &buf_pool->stat;
401
tot_stat->n_page_gets += buf_stat->n_page_gets;
402
tot_stat->n_pages_read += buf_stat->n_pages_read;
403
tot_stat->n_pages_written += buf_stat->n_pages_written;
404
tot_stat->n_pages_created += buf_stat->n_pages_created;
405
tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
406
tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
407
tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
409
tot_stat->n_pages_not_made_young +=
410
buf_stat->n_pages_not_made_young;
414
/********************************************************************//**
415
Allocates a buffer block.
416
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
421
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
422
ulint zip_size) /*!< in: compressed page size in bytes,
423
or 0 if uncompressed tablespace */
427
static ulint buf_pool_index;
429
if (buf_pool == NULL) {
430
/* We are allocating memory from any buffer pool, ensure
431
we spread the grace on all buffer pool instances. */
432
index = buf_pool_index++ % srv_buf_pool_instances;
433
buf_pool = buf_pool_from_array(index);
436
block = buf_LRU_get_free_block(buf_pool, zip_size);
438
buf_block_set_state(block, BUF_BLOCK_MEMORY);
443
/********************************************************************//**
444
281
Calculates a page checksum which is stored to the page when it is written
445
282
to a file. Note that we must be careful to calculate the same value on
446
283
32-bit and 64-bit architectures.
524
361
ib_uint64_t current_lsn;
526
363
if (log_peek_lsn(¤t_lsn)
529
< mach_read_from_8(read_buf + FIL_PAGE_LSN))) {
364
&& current_lsn < mach_read_ull(read_buf + FIL_PAGE_LSN)) {
530
365
ut_print_timestamp(stderr);
532
drizzled::errmsg_printf(drizzled::error::INFO,
533
"InnoDB: Error: page %lu log sequence number %"PRIu64". "
534
"InnoDB: is in the future! Current system log sequence number %"PRIu64". "
535
"Your database may be corrupt or you may have copied the InnoDB tablespace but not the InnoDB log files. See "
536
" " REFMAN "forcing-recovery.html for more information. ",
537
(ulong) mach_read_from_4(read_buf
539
mach_read_from_8(read_buf + FIL_PAGE_LSN),
368
" InnoDB: Error: page %lu log sequence number"
370
"InnoDB: is in the future! Current system "
371
"log sequence number %"PRIu64".\n"
372
"InnoDB: Your database may be corrupt or "
373
"you may have copied the InnoDB\n"
374
"InnoDB: tablespace but not the InnoDB "
376
"InnoDB: " REFMAN "forcing-recovery.html\n"
377
"InnoDB: for more information.\n",
378
(ulong) mach_read_from_4(read_buf
380
mach_read_ull(read_buf + FIL_PAGE_LSN),
795
638
#ifndef UNIV_HOTBACKUP
797
# ifdef PFS_GROUP_BUFFER_SYNC
798
/********************************************************************//**
799
This function registers mutexes and rwlocks in buffer blocks with
800
performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
801
defined to be a value less than chunk->size, then only mutexes
802
and rwlocks in the first PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER
803
blocks are registered. */
806
pfs_register_buffer_block(
807
/*======================*/
808
buf_chunk_t* chunk) /*!< in/out: chunk of buffers */
811
ulint num_to_register;
814
block = chunk->blocks;
816
num_to_register = ut_min(chunk->size,
817
PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
819
for (i = 0; i < num_to_register; i++) {
823
# ifdef UNIV_PFS_MUTEX
824
mutex = &block->mutex;
825
ut_a(!mutex->pfs_psi);
826
mutex->pfs_psi = (PSI_server)
827
? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
829
# endif /* UNIV_PFS_MUTEX */
831
# ifdef UNIV_PFS_RWLOCK
832
rwlock = &block->lock;
833
ut_a(!rwlock->pfs_psi);
834
rwlock->pfs_psi = (PSI_server)
835
? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
837
# endif /* UNIV_PFS_RWLOCK */
841
# endif /* PFS_GROUP_BUFFER_SYNC */
843
639
/********************************************************************//**
844
640
Initializes a buffer control block when the buf_pool is created. */
849
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
850
buf_block_t* block, /*!< in: pointer to control block */
851
byte* frame) /*!< in: pointer to buffer frame */
645
buf_block_t* block, /*!< in: pointer to control block */
646
byte* frame) /*!< in: pointer to buffer frame */
853
648
UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
855
650
block->frame = frame;
857
block->page.buf_pool_index = buf_pool_index(buf_pool);
858
652
block->page.state = BUF_BLOCK_NOT_USED;
859
653
block->page.buf_fix_count = 0;
860
654
block->page.io_fix = BUF_IO_NONE;
883
675
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
884
676
page_zip_des_init(&block->page.zip);
886
#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
887
/* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
888
of buffer block mutex/rwlock with performance schema. If
889
PFS_GROUP_BUFFER_SYNC is defined, skip the registration
890
since buffer block mutex/rwlock will be registered later in
891
pfs_register_buffer_block() */
893
mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
894
rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
895
#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
896
mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
897
rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
898
#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
678
mutex_create(&block->mutex, SYNC_BUF_BLOCK);
680
rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
900
681
ut_ad(rw_lock_validate(&(block->lock)));
902
683
#ifdef UNIV_SYNC_DEBUG
903
rw_lock_create(buf_block_debug_latch_key,
904
&block->debug_latch, SYNC_NO_ORDER_CHECK);
684
rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
905
685
#endif /* UNIV_SYNC_DEBUG */
1162
940
/********************************************************************//**
1163
Set buffer pool size variables after resizing it */
1166
buf_pool_set_sizes(void)
1167
/*====================*/
1170
ulint curr_size = 0;
1172
buf_pool_mutex_enter_all();
1174
for (i = 0; i < srv_buf_pool_instances; i++) {
1175
buf_pool_t* buf_pool;
1177
buf_pool = buf_pool_from_array(i);
1178
curr_size += buf_pool->curr_pool_size;
1181
srv_buf_pool_curr_size = curr_size;
1182
srv_buf_pool_old_size = srv_buf_pool_size;
1184
buf_pool_mutex_exit_all();
1187
/********************************************************************//**
1188
Initialize a buffer pool instance.
1189
@return DB_SUCCESS if all goes well. */
1192
buf_pool_init_instance(
1193
/*===================*/
1194
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1195
ulint buf_pool_size, /*!< in: size in bytes */
1196
ulint instance_no) /*!< in: id of the instance */
941
Creates the buffer pool.
942
@return own: buf_pool object, NULL if not enough memory or error */
951
buf_pool = mem_zalloc(sizeof(buf_pool_t));
1201
953
/* 1. Initialize general fields
1202
954
------------------------------- */
1203
mutex_create(buf_pool_mutex_key,
1204
&buf_pool->mutex, SYNC_BUF_POOL);
1205
mutex_create(buf_pool_zip_mutex_key,
1206
&buf_pool->zip_mutex, SYNC_BUF_BLOCK);
1208
buf_pool_mutex_enter(buf_pool);
1210
if (buf_pool_size > 0) {
1211
buf_pool->n_chunks = 1;
1212
void *chunk_ptr= mem_zalloc((sizeof *chunk));
1213
buf_pool->chunks = chunk = static_cast<buf_chunk_t *>(chunk_ptr);
1215
UT_LIST_INIT(buf_pool->free);
1217
if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
1221
buf_pool_mutex_exit(buf_pool);
1226
buf_pool->instance_no = instance_no;
1227
buf_pool->old_pool_size = buf_pool_size;
1228
buf_pool->curr_size = chunk->size;
1229
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1231
buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
1232
buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
1234
buf_pool->last_printout_time = ut_time();
955
mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
956
mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
958
buf_pool_mutex_enter();
960
buf_pool->n_chunks = 1;
961
buf_pool->chunks = chunk = mem_alloc(sizeof *chunk);
963
UT_LIST_INIT(buf_pool->free);
965
if (!buf_chunk_init(chunk, srv_buf_pool_size)) {
972
srv_buf_pool_old_size = srv_buf_pool_size;
973
buf_pool->curr_size = chunk->size;
974
srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
976
buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
977
buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
979
buf_pool->last_printout_time = time(NULL);
1236
981
/* 2. Initialize flushing fields
1237
982
-------------------------------- */
1239
mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
1240
SYNC_BUF_FLUSH_LIST);
1242
984
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
1243
985
buf_pool->no_flush[i] = os_event_create(NULL);
1246
988
/* 3. Initialize LRU fields
1247
989
--------------------------- */
1249
/* All fields are initialized by mem_zalloc(). */
1251
buf_pool_mutex_exit(buf_pool);
990
/* All fields are initialized by mem_zalloc(). */
992
buf_pool_mutex_exit();
994
btr_search_sys_create(buf_pool->curr_size
995
* UNIV_PAGE_SIZE / sizeof(void*) / 64);
997
/* 4. Initialize the buddy allocator fields */
998
/* All fields are initialized by mem_zalloc(). */
1256
1003
/********************************************************************//**
1257
free one buffer pool instance */
1004
Frees the buffer pool at shutdown. This must not be invoked before
1005
freeing all mutexes. */
1260
buf_pool_free_instance(
1261
/*===================*/
1262
buf_pool_t* buf_pool) /* in,own: buffer pool instance
1265
1011
buf_chunk_t* chunk;
1266
1012
buf_chunk_t* chunks;
1277
1023
mem_free(buf_pool->chunks);
1278
1024
hash_table_free(buf_pool->page_hash);
1279
1025
hash_table_free(buf_pool->zip_hash);
1282
/********************************************************************//**
1283
Creates the buffer pool.
1284
@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
1289
ulint total_size, /*!< in: size of the total pool in bytes */
1290
ulint n_instances) /*!< in: number of instances */
1293
const ulint size = total_size / n_instances;
1295
ut_ad(n_instances > 0);
1296
ut_ad(n_instances <= MAX_BUFFER_POOLS);
1297
ut_ad(n_instances == srv_buf_pool_instances);
1299
/* We create an extra buffer pool instance, this instance is used
1300
for flushing the flush lists, to keep track of n_flush for all
1301
the buffer pools and also used as a waiting object during flushing. */
1302
void *buf_pool_void_ptr= mem_zalloc(n_instances * sizeof *buf_pool_ptr);
1303
buf_pool_ptr = static_cast<buf_pool_struct *>(buf_pool_void_ptr);
1305
for (i = 0; i < n_instances; i++) {
1306
buf_pool_t* ptr = &buf_pool_ptr[i];
1308
if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) {
1310
/* Free all the instances created so far. */
1317
buf_pool_set_sizes();
1318
buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
1320
btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
1325
/********************************************************************//**
1326
Frees the buffer pool at shutdown. This must not be invoked before
1327
freeing all mutexes. */
1332
ulint n_instances) /*!< in: numbere of instances to free */
1336
for (i = 0; i < n_instances; i++) {
1337
buf_pool_free_instance(buf_pool_from_array(i));
1340
mem_free(buf_pool_ptr);
1341
buf_pool_ptr = NULL;
1344
/********************************************************************//**
1345
Drops adaptive hash index for a buffer pool instance. */
1348
buf_pool_drop_hash_index_instance(
1349
/*==============================*/
1350
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1351
ibool* released_search_latch) /*!< out: flag for signalling
1352
whether the search latch was
1355
buf_chunk_t* chunks = buf_pool->chunks;
1356
buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
1358
while (--chunk >= chunks) {
1360
buf_block_t* block = chunk->blocks;
1362
for (i = chunk->size; i--; block++) {
1363
/* block->is_hashed cannot be modified
1364
when we have an x-latch on btr_search_latch;
1365
see the comment in buf0buf.h */
1367
if (!block->is_hashed) {
1371
/* To follow the latching order, we
1372
have to release btr_search_latch
1373
before acquiring block->latch. */
1374
rw_lock_x_unlock(&btr_search_latch);
1375
/* When we release the search latch,
1376
we must rescan all blocks, because
1377
some may become hashed again. */
1378
*released_search_latch = TRUE;
1380
rw_lock_x_lock(&block->lock);
1382
/* This should be guaranteed by the
1383
callers, which will be holding
1384
btr_search_enabled_mutex. */
1385
ut_ad(!btr_search_enabled);
1387
/* Because we did not buffer-fix the
1388
block by calling buf_block_get_gen(),
1389
it is possible that the block has been
1390
allocated for some other use after
1391
btr_search_latch was released above.
1392
We do not care which file page the
1393
block is mapped to. All we want to do
1394
is to drop any hash entries referring
1397
/* It is possible that
1398
block->page.state != BUF_FILE_PAGE.
1399
Even that does not matter, because
1400
btr_search_drop_page_hash_index() will
1401
check block->is_hashed before doing
1402
anything. block->is_hashed can only
1403
be set on uncompressed file pages. */
1405
btr_search_drop_page_hash_index(block);
1407
rw_lock_x_unlock(&block->lock);
1409
rw_lock_x_lock(&btr_search_latch);
1411
ut_ad(!btr_search_enabled);
1416
1030
/********************************************************************//**
1417
1031
Drops the adaptive hash index. To prevent a livelock, this function
1418
1032
is only to be called while holding btr_search_latch and while
1430
1044
ut_ad(!btr_search_enabled);
1047
buf_chunk_t* chunks = buf_pool->chunks;
1048
buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
1435
1050
released_search_latch = FALSE;
1437
for (i = 0; i < srv_buf_pool_instances; i++) {
1438
buf_pool_t* buf_pool;
1440
buf_pool = buf_pool_from_array(i);
1442
buf_pool_drop_hash_index_instance(
1443
buf_pool, &released_search_latch);
1052
while (--chunk >= chunks) {
1053
buf_block_t* block = chunk->blocks;
1054
ulint i = chunk->size;
1056
for (; i--; block++) {
1057
/* block->is_hashed cannot be modified
1058
when we have an x-latch on btr_search_latch;
1059
see the comment in buf0buf.h */
1061
if (!block->is_hashed) {
1065
/* To follow the latching order, we
1066
have to release btr_search_latch
1067
before acquiring block->latch. */
1068
rw_lock_x_unlock(&btr_search_latch);
1069
/* When we release the search latch,
1070
we must rescan all blocks, because
1071
some may become hashed again. */
1072
released_search_latch = TRUE;
1074
rw_lock_x_lock(&block->lock);
1076
/* This should be guaranteed by the
1077
callers, which will be holding
1078
btr_search_enabled_mutex. */
1079
ut_ad(!btr_search_enabled);
1081
/* Because we did not buffer-fix the
1082
block by calling buf_block_get_gen(),
1083
it is possible that the block has been
1084
allocated for some other use after
1085
btr_search_latch was released above.
1086
We do not care which file page the
1087
block is mapped to. All we want to do
1088
is to drop any hash entries referring
1091
/* It is possible that
1092
block->page.state != BUF_FILE_PAGE.
1093
Even that does not matter, because
1094
btr_search_drop_page_hash_index() will
1095
check block->is_hashed before doing
1096
anything. block->is_hashed can only
1097
be set on uncompressed file pages. */
1099
btr_search_drop_page_hash_index(block);
1101
rw_lock_x_unlock(&block->lock);
1103
rw_lock_x_lock(&btr_search_latch);
1105
ut_ad(!btr_search_enabled);
1446
1108
} while (released_search_latch);
1675
1335
goto shrink_again;
1680
buf_pool->old_pool_size = buf_pool->curr_pool_size;
1339
srv_buf_pool_old_size = srv_buf_pool_size;
1682
buf_pool_mutex_exit(buf_pool);
1341
buf_pool_mutex_exit();
1683
1342
btr_search_enable();
1686
1345
/********************************************************************//**
1687
Shrinks the buffer pool. */
1692
ulint chunk_size) /*!< in: number of pages to remove */
1696
for (i = 0; i < srv_buf_pool_instances; i++) {
1697
buf_pool_t* buf_pool;
1698
ulint instance_chunk_size;
1700
instance_chunk_size = chunk_size / srv_buf_pool_instances;
1701
buf_pool = buf_pool_from_array(i);
1702
buf_pool_shrink_instance(buf_pool, instance_chunk_size);
1705
buf_pool_set_sizes();
1708
/********************************************************************//**
1709
Rebuild buf_pool->page_hash for a buffer pool instance. */
1712
buf_pool_page_hash_rebuild_instance(
1713
/*================================*/
1714
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
1346
Rebuild buf_pool->page_hash. */
1349
buf_pool_page_hash_rebuild(void)
1350
/*============================*/
1719
1353
ulint n_chunks;
1355
hash_table_t* page_hash;
1720
1356
hash_table_t* zip_hash;
1721
hash_table_t* page_hash;
1723
buf_pool_mutex_enter(buf_pool);
1359
buf_pool_mutex_enter();
1725
1361
/* Free, create, and populate the hash table. */
1726
1362
hash_table_free(buf_pool->page_hash);
1804
buf_flush_list_mutex_exit(buf_pool);
1805
buf_pool_mutex_exit(buf_pool);
1808
/********************************************************************
1809
Determine if a block is a sentinel for a buffer pool watch.
1810
@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
1813
buf_pool_watch_is_sentinel(
1814
/*=======================*/
1815
buf_pool_t* buf_pool, /*!< buffer pool instance */
1816
const buf_page_t* bpage) /*!< in: block */
1818
ut_ad(buf_page_in_file(bpage));
1820
if (bpage < &buf_pool->watch[0]
1821
|| bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
1823
ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_PAGE
1824
|| bpage->zip.data != NULL);
1829
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
1830
ut_ad(!bpage->in_zip_hash);
1831
ut_ad(bpage->in_page_hash);
1832
ut_ad(bpage->zip.data == NULL);
1833
ut_ad(bpage->buf_fix_count > 0);
1837
/****************************************************************//**
1838
Add watch for the given page to be read in. Caller must have the buffer pool
1840
@return NULL if watch set, block if the page is in the buffer pool */
1845
ulint space, /*!< in: space id */
1846
ulint offset, /*!< in: page number */
1847
ulint fold) /*!< in: buf_page_address_fold(space, offset) */
1851
buf_pool_t* buf_pool = buf_pool_get(space, offset);
1853
ut_ad(buf_pool_mutex_own(buf_pool));
1855
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
1857
if (UNIV_LIKELY_NULL(bpage)) {
1858
if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
1859
/* The page was loaded meanwhile. */
1862
/* Add to an existing watch. */
1863
bpage->buf_fix_count++;
1867
for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
1868
bpage = &buf_pool->watch[i];
1870
ut_ad(bpage->access_time == 0);
1871
ut_ad(bpage->newest_modification == 0);
1872
ut_ad(bpage->oldest_modification == 0);
1873
ut_ad(bpage->zip.data == NULL);
1874
ut_ad(!bpage->in_zip_hash);
1876
switch (bpage->state) {
1877
case BUF_BLOCK_POOL_WATCH:
1878
ut_ad(!bpage->in_page_hash);
1879
ut_ad(bpage->buf_fix_count == 0);
1881
/* bpage is pointing to buf_pool->watch[],
1882
which is protected by buf_pool->mutex.
1883
Normally, buf_page_t objects are protected by
1884
buf_block_t::mutex or buf_pool->zip_mutex or both. */
1886
bpage->state = BUF_BLOCK_ZIP_PAGE;
1887
bpage->space = space;
1888
bpage->offset = offset;
1889
bpage->buf_fix_count = 1;
1891
ut_d(bpage->in_page_hash = TRUE);
1892
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
1895
case BUF_BLOCK_ZIP_PAGE:
1896
ut_ad(bpage->in_page_hash);
1897
ut_ad(bpage->buf_fix_count > 0);
1904
/* Allocation failed. Either the maximum number of purge
1905
threads should never exceed BUF_POOL_WATCH_SIZE, or this code
1906
should be modified to return a special non-NULL value and the
1907
caller should purge the record directly. */
1910
/* Fix compiler warning */
1914
/********************************************************************//**
1915
Rebuild buf_pool->page_hash. */
1918
buf_pool_page_hash_rebuild(void)
1919
/*============================*/
1923
for (i = 0; i < srv_buf_pool_instances; i++) {
1924
buf_pool_page_hash_rebuild_instance(buf_pool_from_array(i));
1928
/********************************************************************//**
1929
Increase the buffer pool size of one buffer pool instance. */
1932
buf_pool_increase_instance(
1933
/*=======================*/
1934
buf_pool_t* buf_pool, /*!< in: buffer pool instane */
1935
ulint change_size) /*!< in: new size of the pool */
1937
buf_chunk_t* chunks;
1940
buf_pool_mutex_enter(buf_pool);
1941
chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks));
1943
memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks * sizeof *chunks);
1945
chunk = &chunks[buf_pool->n_chunks];
1947
if (!buf_chunk_init(buf_pool, chunk, change_size)) {
1950
buf_pool->old_pool_size = buf_pool->curr_pool_size;
1951
buf_pool->curr_size += chunk->size;
1952
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1953
mem_free(buf_pool->chunks);
1954
buf_pool->chunks = chunks;
1955
buf_pool->n_chunks++;
1958
buf_pool_mutex_exit(buf_pool);
1961
/********************************************************************//**
1962
Increase the buffer pool size. */
1971
for (i = 0; i < srv_buf_pool_instances; i++) {
1972
buf_pool_increase_instance(
1973
buf_pool_from_array(i),
1974
change_size / srv_buf_pool_instances);
1977
buf_pool_set_sizes();
1439
buf_pool_mutex_exit();
1980
1442
/********************************************************************//**
1984
1446
buf_pool_resize(void)
1985
1447
/*=================*/
1988
ulint min_change_size = 1048576 * srv_buf_pool_instances;
1990
buf_pool_mutex_enter_all();
1992
if (srv_buf_pool_old_size == srv_buf_pool_size) {
1994
buf_pool_mutex_exit_all();
1998
} else if (srv_buf_pool_curr_size + min_change_size
1999
> srv_buf_pool_size) {
2001
change_size = (srv_buf_pool_curr_size - srv_buf_pool_size)
2004
buf_pool_mutex_exit_all();
2006
/* Disable adaptive hash indexes and empty the index
2007
in order to free up memory in the buffer pool chunks. */
2008
buf_pool_shrink(change_size);
2010
} else if (srv_buf_pool_curr_size + min_change_size
2011
< srv_buf_pool_size) {
2013
/* Enlarge the buffer pool by at least one megabyte */
2015
change_size = srv_buf_pool_size - srv_buf_pool_curr_size;
2017
buf_pool_mutex_exit_all();
2019
buf_pool_increase(change_size);
2021
srv_buf_pool_size = srv_buf_pool_old_size;
2023
buf_pool_mutex_exit_all();
1449
buf_pool_mutex_enter();
1451
if (srv_buf_pool_old_size == srv_buf_pool_size) {
1453
buf_pool_mutex_exit();
2028
buf_pool_page_hash_rebuild();
2031
/****************************************************************//**
2032
Remove the sentinel block for the watch before replacing it with a real block.
2033
buf_page_watch_clear() or buf_page_watch_occurred() will notice that
2034
the block has been replaced with the real block.
2035
@return reference count, to be added to the replacement block */
2038
buf_pool_watch_remove(
2039
/*==================*/
2040
buf_pool_t* buf_pool, /*!< buffer pool instance */
2041
ulint fold, /*!< in: buf_page_address_fold(
2043
buf_page_t* watch) /*!< in/out: sentinel for watch */
2045
ut_ad(buf_pool_mutex_own(buf_pool));
2047
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
2048
ut_d(watch->in_page_hash = FALSE);
2049
watch->buf_fix_count = 0;
2050
watch->state = BUF_BLOCK_POOL_WATCH;
2053
/****************************************************************//**
2054
Stop watching if the page has been read in.
2055
buf_pool_watch_set(space,offset) must have returned NULL before. */
2058
buf_pool_watch_unset(
2059
/*=================*/
2060
ulint space, /*!< in: space id */
2061
ulint offset) /*!< in: page number */
2064
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2065
ulint fold = buf_page_address_fold(space, offset);
2067
buf_pool_mutex_enter(buf_pool);
2068
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2069
/* The page must exist because buf_pool_watch_set()
2070
increments buf_fix_count. */
2073
if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
2074
mutex_t* mutex = buf_page_get_mutex(bpage);
2077
ut_a(bpage->buf_fix_count > 0);
2078
bpage->buf_fix_count--;
2081
ut_a(bpage->buf_fix_count > 0);
2083
if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
2084
buf_pool_watch_remove(buf_pool, fold, bpage);
1457
if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
1459
buf_pool_mutex_exit();
1461
/* Disable adaptive hash indexes and empty the index
1462
in order to free up memory in the buffer pool chunks. */
1463
buf_pool_shrink((srv_buf_pool_curr_size - srv_buf_pool_size)
1465
} else if (srv_buf_pool_curr_size + 1048576 < srv_buf_pool_size) {
1467
/* Enlarge the buffer pool by at least one megabyte */
1470
= srv_buf_pool_size - srv_buf_pool_curr_size;
1471
buf_chunk_t* chunks;
1474
chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
1476
memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks
1479
chunk = &chunks[buf_pool->n_chunks];
1481
if (!buf_chunk_init(chunk, mem_size)) {
1484
buf_pool->curr_size += chunk->size;
1485
srv_buf_pool_curr_size = buf_pool->curr_size
1487
mem_free(buf_pool->chunks);
1488
buf_pool->chunks = chunks;
1489
buf_pool->n_chunks++;
1492
srv_buf_pool_old_size = srv_buf_pool_size;
1493
buf_pool_mutex_exit();
2088
buf_pool_mutex_exit(buf_pool);
2091
/****************************************************************//**
2092
Check if the page has been read in.
2093
This may only be called after buf_pool_watch_set(space,offset)
2094
has returned NULL and before invoking buf_pool_watch_unset(space,offset).
2095
@return FALSE if the given page was not read in, TRUE if it was */
2098
buf_pool_watch_occurred(
2099
/*====================*/
2100
ulint space, /*!< in: space id */
2101
ulint offset) /*!< in: page number */
2105
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2106
ulint fold = buf_page_address_fold(space, offset);
2108
buf_pool_mutex_enter(buf_pool);
2110
bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2111
/* The page must exist because buf_pool_watch_set()
2112
increments buf_fix_count. */
2114
ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
2115
buf_pool_mutex_exit(buf_pool);
1496
buf_pool_page_hash_rebuild();
2120
1499
/********************************************************************//**
2659
1994
/********************************************************************//**
2660
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
2661
the buf_block_t itself or a member of it
2662
@return TRUE if ptr belongs to a buf_block_t struct */
2665
buf_pointer_is_block_field(
2666
/*=======================*/
2667
const void* ptr) /*!< in: pointer not dereferenced */
2671
for (i = 0; i < srv_buf_pool_instances; i++) {
2674
found = buf_pointer_is_block_field_instance(
2675
buf_pool_from_array(i), ptr);
2684
/********************************************************************//**
2685
1995
Find out if a buffer block was created by buf_chunk_init().
2686
1996
@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
2689
1999
buf_block_is_uncompressed(
2690
2000
/*======================*/
2691
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2692
const buf_block_t* block) /*!< in: pointer to block,
2001
const buf_block_t* block) /*!< in: pointer to block,
2695
ut_ad(buf_pool_mutex_own(buf_pool));
2004
ut_ad(buf_pool_mutex_own());
2697
2006
if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
2698
2007
/* The pointer should be aligned. */
2702
return(buf_pointer_is_block_field_instance(buf_pool, (void *)block));
2011
return(buf_pointer_is_block_field((void *)block));
2705
2014
/********************************************************************//**
2716
2025
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
2717
2026
buf_block_t* guess, /*!< in: guessed block or NULL */
2718
2027
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
2719
BUF_GET_NO_LATCH, or
2720
BUF_GET_IF_IN_POOL_OR_WATCH */
2721
2029
const char* file, /*!< in: file name */
2722
2030
ulint line, /*!< in: line where called */
2723
2031
mtr_t* mtr) /*!< in: mini-transaction */
2725
2033
buf_block_t* block;
2727
2034
unsigned access_time;
2728
2035
ulint fix_type;
2729
2036
ibool must_read;
2731
buf_pool_t* buf_pool = buf_pool_get(space, offset);
2734
ut_ad(mtr->state == MTR_ACTIVE);
2735
2039
ut_ad((rw_latch == RW_S_LATCH)
2736
2040
|| (rw_latch == RW_X_LATCH)
2737
2041
|| (rw_latch == RW_NO_LATCH));
2738
2042
ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
2739
ut_ad(mode == BUF_GET
2740
|| mode == BUF_GET_IF_IN_POOL
2741
|| mode == BUF_GET_NO_LATCH
2742
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH);
2043
ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
2044
|| (mode == BUF_GET_NO_LATCH));
2743
2045
ut_ad(zip_size == fil_space_get_zip_size(space));
2744
2046
ut_ad(ut_is_2pow(zip_size));
2745
2047
#ifndef UNIV_LOG_DEBUG
2746
2048
ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
2748
2050
buf_pool->stat.n_page_gets++;
2749
fold = buf_page_address_fold(space, offset);
2752
buf_pool_mutex_enter(buf_pool);
2053
buf_pool_mutex_enter();
2755
2056
/* If the guess is a compressed page descriptor that
2775
2076
if (block == NULL) {
2776
block = (buf_block_t*) buf_page_hash_get_low(
2777
buf_pool, space, offset, fold);
2077
block = (buf_block_t*) buf_page_hash_get(space, offset);
2781
if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
2785
2081
if (block == NULL) {
2786
2082
/* Page not in buf_pool: needs to be read from file */
2788
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2789
block = (buf_block_t*) buf_pool_watch_set(
2790
space, offset, fold);
2792
if (UNIV_LIKELY_NULL(block)) {
2798
buf_pool_mutex_exit(buf_pool);
2800
if (mode == BUF_GET_IF_IN_POOL
2801
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2084
buf_pool_mutex_exit();
2086
if (mode == BUF_GET_IF_IN_POOL) {
2806
if (buf_read_page(space, zip_size, offset)) {
2808
} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
2811
fprintf(stderr, "InnoDB: Error: Unable"
2812
" to read tablespace %lu page no"
2813
" %lu into the buffer pool after"
2815
"InnoDB: The most probable cause"
2816
" of this error may be that the"
2817
" table has been corrupted.\n"
2818
"InnoDB: You can try to fix this"
2820
" innodb_force_recovery.\n"
2821
"InnoDB: Please see reference manual"
2822
" for more details.\n"
2823
"InnoDB: Aborting...\n",
2825
BUF_PAGE_READ_MAX_RETRIES);
2091
buf_read_page(space, zip_size, offset);
2830
2093
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2831
2094
ut_a(++buf_dbg_counter % 37 || buf_validate());
2859
2118
case BUF_BLOCK_ZIP_DIRTY:
2860
2119
bpage = &block->page;
2861
2120
/* Protect bpage->buf_fix_count. */
2862
mutex_enter(&buf_pool->zip_mutex);
2121
mutex_enter(&buf_pool_zip_mutex);
2864
2123
if (bpage->buf_fix_count
2865
2124
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
2866
2125
/* This condition often occurs when the buffer
2867
2126
is not buffer-fixed, but I/O-fixed by
2868
2127
buf_page_init_for_read(). */
2869
mutex_exit(&buf_pool->zip_mutex);
2128
mutex_exit(&buf_pool_zip_mutex);
2870
2129
wait_until_unfixed:
2871
2130
/* The block is buffer-fixed or I/O-fixed.
2872
2131
Try again later. */
2873
buf_pool_mutex_exit(buf_pool);
2132
buf_pool_mutex_exit();
2874
2133
os_thread_sleep(WAIT_FOR_READ);
2879
2138
/* Allocate an uncompressed page. */
2880
buf_pool_mutex_exit(buf_pool);
2881
mutex_exit(&buf_pool->zip_mutex);
2139
buf_pool_mutex_exit();
2140
mutex_exit(&buf_pool_zip_mutex);
2883
block = buf_LRU_get_free_block(buf_pool, 0);
2142
block = buf_LRU_get_free_block(0);
2886
buf_pool_mutex_enter(buf_pool);
2145
buf_pool_mutex_enter();
2887
2146
mutex_enter(&block->mutex);
2890
buf_page_t* hash_bpage;
2892
hash_bpage = buf_page_hash_get_low(
2893
buf_pool, space, offset, fold);
2149
buf_page_t* hash_bpage
2150
= buf_page_hash_get(space, offset);
2895
2152
if (UNIV_UNLIKELY(bpage != hash_bpage)) {
2896
2153
/* The buf_pool->page_hash was modified
2897
while buf_pool->mutex was released.
2154
while buf_pool_mutex was released.
2898
2155
Free the block that was allocated. */
2900
2157
buf_LRU_block_free_non_file_page(block);
2954
2225
block->page.buf_fix_count = 1;
2955
2226
buf_block_set_io_fix(block, BUF_IO_READ);
2956
rw_lock_x_lock_func(&block->lock, 0, file, line);
2958
UNIV_MEM_INVALID(bpage, sizeof *bpage);
2227
rw_lock_x_lock(&block->lock);
2960
2228
mutex_exit(&block->mutex);
2961
mutex_exit(&buf_pool->zip_mutex);
2229
mutex_exit(&buf_pool_zip_mutex);
2962
2230
buf_pool->n_pend_unzip++;
2964
buf_buddy_free(buf_pool, bpage, sizeof *bpage);
2232
buf_buddy_free(bpage, sizeof *bpage);
2966
buf_pool_mutex_exit(buf_pool);
2234
buf_pool_mutex_exit();
2968
2236
/* Decompress the page and apply buffered operations
2969
while not holding buf_pool->mutex or block->mutex. */
2237
while not holding buf_pool_mutex or block->mutex. */
2970
2238
success = buf_zip_decompress(block, srv_use_checksums);
2973
if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
2240
if (UNIV_LIKELY(success)) {
2974
2241
ibuf_merge_or_delete_for_page(block, space, offset,
2975
2242
zip_size, TRUE);
2978
2245
/* Unfix and unlatch the block. */
2979
buf_pool_mutex_enter(buf_pool);
2246
buf_pool_mutex_enter();
2980
2247
mutex_enter(&block->mutex);
2981
2248
block->page.buf_fix_count--;
2982
2249
buf_block_set_io_fix(block, BUF_IO_NONE);
2998
2271
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3000
2273
mutex_enter(&block->mutex);
3001
#if UNIV_WORD_SIZE == 4
3002
/* On 32-bit systems, there is no padding in buf_page_t. On
3003
other systems, Valgrind could complain about uninitialized pad
3005
2274
UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
3007
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
3008
if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)
3010
/* Try to evict the block from the buffer pool, to use the
3011
insert buffer (change buffer) as much as possible. */
3013
if (buf_LRU_free_block(&block->page, TRUE, NULL)
3015
mutex_exit(&block->mutex);
3016
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
3017
/* Set the watch, as it would have
3018
been set if the page were not in the
3019
buffer pool in the first place. */
3020
block = (buf_block_t*) buf_pool_watch_set(
3021
space, offset, fold);
3023
if (UNIV_LIKELY_NULL(block)) {
3025
/* The page entered the buffer
3026
pool for some reason. Try to
3031
buf_pool_mutex_exit(buf_pool);
3033
"innodb_change_buffering_debug evict %u %u\n",
3034
(unsigned) space, (unsigned) offset);
3036
} else if (buf_flush_page_try(buf_pool, block)) {
3038
"innodb_change_buffering_debug flush %u %u\n",
3039
(unsigned) space, (unsigned) offset);
3044
/* Failed to evict the page; change it directly */
3046
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
3048
2276
buf_block_buf_fix_inc(block, file, line);
3484
2696
buf_block_init_low(block);
3486
block->lock_hash_val = lock_rec_hash(space, offset);
3488
buf_page_init_low(&block->page);
2698
block->lock_hash_val = lock_rec_hash(space, offset);
3490
2700
/* Insert into the hash table of file pages */
3492
hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3494
if (UNIV_LIKELY(!hash_page)) {
3495
} else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
3496
/* Preserve the reference count. */
3497
ulint buf_fix_count = hash_page->buf_fix_count;
3499
ut_a(buf_fix_count > 0);
3500
block->page.buf_fix_count += buf_fix_count;
3501
buf_pool_watch_remove(buf_pool, fold, hash_page);
2702
hash_page = buf_page_hash_get(space, offset);
2704
if (UNIV_LIKELY_NULL(hash_page)) {
3503
2705
fprintf(stderr,
3504
2706
"InnoDB: Error: page %lu %lu already found"
3505
2707
" in the hash table: %p, %p\n",
3644
2836
if (UNIV_UNLIKELY(zip_size)) {
3645
2837
page_zip_set_size(&block->page.zip, zip_size);
3647
/* buf_pool->mutex may be released and
2839
/* buf_pool_mutex may be released and
3648
2840
reacquired by buf_buddy_alloc(). Thus, we
3649
2841
must release block->mutex in order not to
3650
2842
break the latching order in the reacquisition
3651
of buf_pool->mutex. We also must defer this
2843
of buf_pool_mutex. We also must defer this
3652
2844
operation until after the block descriptor has
3653
2845
been added to buf_pool->LRU and
3654
2846
buf_pool->page_hash. */
3655
2847
mutex_exit(&block->mutex);
3656
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
2848
data = buf_buddy_alloc(zip_size, &lru);
3657
2849
mutex_enter(&block->mutex);
3658
block->page.zip.data = static_cast<unsigned char *>(data);
2850
block->page.zip.data = data;
3660
2852
/* To maintain the invariant
3661
2853
block->in_unzip_LRU_list
3677
2869
control block (bpage), in order to avoid the
3678
2870
invocation of buf_buddy_relocate_block() on
3679
2871
uninitialized data. */
3680
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3681
bpage = static_cast<buf_page_struct *>(buf_buddy_alloc(buf_pool, sizeof *bpage, &lru));
3683
/* Initialize the buf_pool pointer. */
3684
bpage->buf_pool_index = buf_pool_index(buf_pool);
2872
data = buf_buddy_alloc(zip_size, &lru);
2873
bpage = buf_buddy_alloc(sizeof *bpage, &lru);
3686
2875
/* If buf_buddy_alloc() allocated storage from the LRU list,
3687
it released and reacquired buf_pool->mutex. Thus, we must
2876
it released and reacquired buf_pool_mutex. Thus, we must
3688
2877
check the page_hash again, as it may have been modified. */
3689
if (UNIV_UNLIKELY(lru)) {
3691
watch_page = buf_page_hash_get_low(
3692
buf_pool, space, offset, fold);
3695
&& !buf_pool_watch_is_sentinel(buf_pool,
3698
/* The block was added by some other thread. */
3700
buf_buddy_free(buf_pool, bpage, sizeof *bpage);
3701
buf_buddy_free(buf_pool, data, zip_size);
2878
if (UNIV_UNLIKELY(lru)
2879
&& UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
2881
/* The block was added by some other thread. */
2882
buf_buddy_free(bpage, sizeof *bpage);
2883
buf_buddy_free(data, zip_size);
3708
2889
page_zip_des_init(&bpage->zip);
3709
2890
page_zip_set_size(&bpage->zip, zip_size);
3710
bpage->zip.data = static_cast<unsigned char *>(data);
2891
bpage->zip.data = data;
3712
mutex_enter(&buf_pool->zip_mutex);
2893
mutex_enter(&buf_pool_zip_mutex);
3713
2894
UNIV_MEM_DESC(bpage->zip.data,
3714
2895
page_zip_get_size(&bpage->zip), bpage);
3716
2896
buf_page_init_low(bpage);
3718
2897
bpage->state = BUF_BLOCK_ZIP_PAGE;
3719
2898
bpage->space = space;
3720
2899
bpage->offset = offset;
3723
2901
#ifdef UNIV_DEBUG
3724
2902
bpage->in_page_hash = FALSE;
3725
2903
bpage->in_zip_hash = FALSE;
3729
2907
#endif /* UNIV_DEBUG */
3731
2909
ut_d(bpage->in_page_hash = TRUE);
3733
if (UNIV_LIKELY_NULL(watch_page)) {
3734
/* Preserve the reference count. */
3735
ulint buf_fix_count = watch_page->buf_fix_count;
3736
ut_a(buf_fix_count > 0);
3737
bpage->buf_fix_count += buf_fix_count;
3738
ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
3739
buf_pool_watch_remove(buf_pool, fold, watch_page);
3742
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
2910
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
2911
buf_page_address_fold(space, offset), bpage);
3745
2913
/* The block must be put to the LRU list, to the old blocks */
3746
2914
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3783
2951
buf_frame_t* frame;
3784
2952
buf_block_t* block;
3786
2953
buf_block_t* free_block = NULL;
3787
2954
ulint time_ms = ut_time_ms();
3788
buf_pool_t* buf_pool = buf_pool_get(space, offset);
3791
ut_ad(mtr->state == MTR_ACTIVE);
3792
2957
ut_ad(space || !zip_size);
3794
free_block = buf_LRU_get_free_block(buf_pool, 0);
3796
fold = buf_page_address_fold(space, offset);
3798
buf_pool_mutex_enter(buf_pool);
3800
block = (buf_block_t*) buf_page_hash_get_low(
3801
buf_pool, space, offset, fold);
3804
&& buf_page_in_file(&block->page)
3805
&& !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
2959
free_block = buf_LRU_get_free_block(0);
2961
buf_pool_mutex_enter();
2963
block = (buf_block_t*) buf_page_hash_get(space, offset);
2965
if (block && buf_page_in_file(&block->page)) {
3806
2966
#ifdef UNIV_IBUF_COUNT_DEBUG
3807
2967
ut_a(ibuf_count_get(space, offset) == 0);
3854
3014
page_zip_set_size(&block->page.zip, zip_size);
3855
3015
mutex_exit(&block->mutex);
3856
/* buf_pool->mutex may be released and reacquired by
3016
/* buf_pool_mutex may be released and reacquired by
3857
3017
buf_buddy_alloc(). Thus, we must release block->mutex
3858
3018
in order not to break the latching order in
3859
the reacquisition of buf_pool->mutex. We also must
3019
the reacquisition of buf_pool_mutex. We also must
3860
3020
defer this operation until after the block descriptor
3861
3021
has been added to buf_pool->LRU and buf_pool->page_hash. */
3862
data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3022
data = buf_buddy_alloc(zip_size, &lru);
3863
3023
mutex_enter(&block->mutex);
3864
block->page.zip.data = static_cast<unsigned char *>(data);
3024
block->page.zip.data = data;
3866
3026
/* To maintain the invariant
3867
3027
block->in_unzip_LRU_list
4125
3284
#endif /* UNIV_DEBUG */
4127
3286
mutex_exit(buf_page_get_mutex(bpage));
4128
buf_pool_mutex_exit(buf_pool);
4131
/*********************************************************************//**
4132
Asserts that all file pages in the buffer are in a replaceable state.
4136
buf_all_freed_instance(
4137
/*===================*/
4138
buf_pool_t* buf_pool) /*!< in: buffer pool instancce */
4145
buf_pool_mutex_enter(buf_pool);
4147
chunk = buf_pool->chunks;
4149
for (i = buf_pool->n_chunks; i--; chunk++) {
4151
const buf_block_t* block = buf_chunk_not_freed(chunk);
4153
if (UNIV_LIKELY_NULL(block)) {
4155
"Page %lu %lu still fixed or dirty\n",
4156
(ulong) block->page.space,
4157
(ulong) block->page.offset);
4162
buf_pool_mutex_exit(buf_pool);
4167
/*********************************************************************//**
4168
Invalidates file pages in one buffer pool instance */
3287
buf_pool_mutex_exit();
3290
/*********************************************************************//**
3291
Invalidates the file pages in the buffer pool when an archive recovery is
3292
completed. All the file pages buffered must be in a replaceable state when
3293
this function is called: not latched and not modified. */
4171
buf_pool_invalidate_instance(
4172
/*=========================*/
4173
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
3296
buf_pool_invalidate(void)
3297
/*=====================*/
4178
buf_pool_mutex_enter(buf_pool);
3302
buf_pool_mutex_enter();
4180
3304
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
4218
3342
buf_pool->LRU_flush_ended = 0;
4220
3344
memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
4221
buf_refresh_io_stats(buf_pool);
4223
buf_pool_mutex_exit(buf_pool);
4226
/*********************************************************************//**
4227
Invalidates the file pages in the buffer pool when an archive recovery is
4228
completed. All the file pages buffered must be in a replaceable state when
4229
this function is called: not latched and not modified. */
4232
buf_pool_invalidate(void)
4233
/*=====================*/
4237
for (i = 0; i < srv_buf_pool_instances; i++) {
4238
buf_pool_invalidate_instance(buf_pool_from_array(i));
3345
buf_refresh_io_stats();
3347
buf_pool_mutex_exit();
4242
3350
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4243
3351
/*********************************************************************//**
4244
Validates data in one buffer pool instance
3352
Validates the buffer buf_pool data structure.
4245
3353
@return TRUE */
4248
buf_pool_validate_instance(
4249
/*=======================*/
4250
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
4253
3360
buf_chunk_t* chunk;
4377
/* It is OK to read oldest_modification here because
4378
we have acquired buf_pool->zip_mutex above which acts
4379
as the 'block->mutex' for these bpages. */
4380
3487
ut_a(!b->oldest_modification);
4381
ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
3488
ut_a(buf_page_hash_get(b->space, b->offset) == b);
4387
/* Check dirty blocks. */
3494
/* Check dirty compressed-only blocks. */
4389
buf_flush_list_mutex_enter(buf_pool);
4390
3496
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4391
3497
b = UT_LIST_GET_NEXT(list, b)) {
4392
3498
ut_ad(b->in_flush_list);
4393
ut_a(b->oldest_modification);
4396
3500
switch (buf_page_get_state(b)) {
4397
3501
case BUF_BLOCK_ZIP_DIRTY:
3502
ut_a(b->oldest_modification);
4400
3506
switch (buf_page_get_io_fix(b)) {
4401
3507
case BUF_IO_NONE:
4402
3508
case BUF_IO_READ:
4404
3511
case BUF_IO_WRITE:
4405
3512
switch (buf_page_get_flush_type(b)) {
4406
3513
case BUF_FLUSH_LRU:
4453
3556
(ulong) n_free);
3559
ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
4457
3561
ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
4458
3562
ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
4459
3563
ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
4461
buf_pool_mutex_exit(buf_pool);
3565
buf_pool_mutex_exit();
4463
3567
ut_a(buf_LRU_validate());
4464
ut_a(buf_flush_validate(buf_pool));
4469
/*********************************************************************//**
4470
Validates the buffer buf_pool data structure.
4479
for (i = 0; i < srv_buf_pool_instances; i++) {
4480
buf_pool_t* buf_pool;
4482
buf_pool = buf_pool_from_array(i);
4484
buf_pool_validate_instance(buf_pool);
3568
ut_a(buf_flush_validate());
4489
3572
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
4491
3574
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4492
3575
/*********************************************************************//**
4493
Prints info of the buffer buf_pool data structure for one instance. */
3576
Prints info of the buffer buf_pool data structure. */
4498
buf_pool_t* buf_pool)
4500
index_id_t* index_ids;
4507
3589
buf_chunk_t* chunk;
4508
3590
dict_index_t* index;
4902
3922
"LRU len: %lu, unzip_LRU len: %lu\n"
4903
3923
"I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
4904
static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->LRU)),
4905
static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->unzip_LRU)),
3924
UT_LIST_GET_LEN(buf_pool->LRU),
3925
UT_LIST_GET_LEN(buf_pool->unzip_LRU),
4906
3926
buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
4907
3927
buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
4909
buf_refresh_io_stats(buf_pool);
4910
buf_pool_mutex_exit(buf_pool);
4913
/*********************************************************************//**
4914
Prints info of the buffer i/o. */
4919
FILE* file) /*!< in/out: buffer where to print */
4923
for (i = 0; i < srv_buf_pool_instances; i++) {
4924
buf_pool_t* buf_pool;
4926
buf_pool = buf_pool_from_array(i);
4927
buf_print_io_instance(buf_pool, file);
3929
buf_refresh_io_stats();
3930
buf_pool_mutex_exit();
4931
3933
/**********************************************************************//**
4932
3934
Refreshes the statistics used to print per-second averages. */
4935
buf_refresh_io_stats(
4936
/*=================*/
4937
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
3937
buf_refresh_io_stats(void)
3938
/*======================*/
4939
buf_pool->last_printout_time = ut_time();
3940
buf_pool->last_printout_time = time(NULL);
4940
3941
buf_pool->old_stat = buf_pool->stat;
4943
/**********************************************************************//**
4944
Refreshes the statistics used to print per-second averages. */
4947
buf_refresh_io_stats_all(void)
4948
/*==========================*/
4952
for (i = 0; i < srv_buf_pool_instances; i++) {
4953
buf_pool_t* buf_pool;
4955
buf_pool = buf_pool_from_array(i);
4957
buf_refresh_io_stats(buf_pool);
4961
/**********************************************************************//**
4962
Check if all pages in all buffer pools are in a replacable state.
4963
@return FALSE if not */
3944
/*********************************************************************//**
3945
Asserts that all file pages in the buffer are in a replaceable state.
4966
3949
buf_all_freed(void)
4967
3950
/*===============*/
4971
for (i = 0; i < srv_buf_pool_instances; i++) {
4972
buf_pool_t* buf_pool;
4974
buf_pool = buf_pool_from_array(i);
4976
if (!buf_all_freed_instance(buf_pool)) {
3957
buf_pool_mutex_enter();
3959
chunk = buf_pool->chunks;
3961
for (i = buf_pool->n_chunks; i--; chunk++) {
3963
const buf_block_t* block = buf_chunk_not_freed(chunk);
3965
if (UNIV_LIKELY_NULL(block)) {
3967
"Page %lu %lu still fixed or dirty\n",
3968
(ulong) block->page.space,
3969
(ulong) block->page.offset);
3974
buf_pool_mutex_exit();
4984
3979
/*********************************************************************//**
4985
3980
Checks that there currently are no pending i/o-operations for the buffer
4990
3985
buf_pool_check_no_pending_io(void)
4991
3986
/*==============================*/
4996
buf_pool_mutex_enter_all();
4998
for (i = 0; i < srv_buf_pool_instances && ret; i++) {
4999
const buf_pool_t* buf_pool;
5001
buf_pool = buf_pool_from_array(i);
5003
if (buf_pool->n_pend_reads
5004
+ buf_pool->n_flush[BUF_FLUSH_LRU]
5005
+ buf_pool->n_flush[BUF_FLUSH_LIST]
5006
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
3990
buf_pool_mutex_enter();
3992
if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
3993
+ buf_pool->n_flush[BUF_FLUSH_LIST]
3994
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
5012
buf_pool_mutex_exit_all();
4000
buf_pool_mutex_exit();
5018
Code currently not used
5019
4005
/*********************************************************************//**
5020
4006
Gets the current length of the free list of buffer blocks.
5021
4007
@return length of the free list */