1
/*****************************************************************************
3
Copyright (C) 1997, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15
St, Fifth Floor, Boston, MA 02110-1301 USA
17
*****************************************************************************/
19
/********************************************************************//**
21
The lowest-level memory management
23
Created 5/12/1997 Heikki Tuuri
24
*************************************************************************/
28
#include "mem0pool.ic"
32
#include "sync0sync.h"
37
#include "srv0start.h"
39
/* We would like to use also the buffer frames to allocate memory. This
40
would be desirable, because then the memory consumption of the database
41
would be fixed, and we might even lock the buffer pool to the main memory.
42
The problem here is that the buffer management routines can themselves call
43
memory allocation, while the buffer pool mutex is reserved.
45
The main components of the memory consumption are:
48
2. parsed and optimized SQL statements,
49
3. data dictionary cache,
51
5. locks for each transaction,
52
6. hash table for the adaptive index,
53
7. state and buffers for each SQL query currently being executed,
54
8. session for each user, and
55
9. stack for each OS thread.
57
Items 1 and 2 are managed by an LRU algorithm. Items 5 and 6 can potentially
58
consume very much memory. Items 7 and 8 should consume quite little memory,
59
and the OS should take care of item 9, which too should consume little memory.
61
A solution to the memory management:
63
1. the buffer pool size is set separately;
64
2. log buffer size is set separately;
65
3. the common pool size for all the other entries, except 8, is set separately.
67
Problems: we may waste memory if the common pool is set too big. Another
68
problem is the locks, which may take very much space in big transactions.
69
Then the shared pool size should be set very big. We can allow locks to take
70
space from the buffer pool, but the SQL optimizer is then unaware of the
71
usable size of the buffer pool. We could also combine the objects in the
72
common pool and the buffers in the buffer pool into a single LRU list and
73
manage it uniformly, but this approach does not take into account the parsing
74
and other costs unique to SQL statements.
76
The locks for a transaction can be seen as a part of the state of the
77
transaction. Hence, they should be stored in the common pool. We still
78
have the problem of a very big update transaction, for example, which
79
will set very many x-locks on rows, and the locks will consume a lot
80
of memory, say, half of the buffer pool size.
82
Another problem is what to do if we are not able to malloc a requested
83
block of memory from the common pool. Then we can request memory from
84
the operating system. If it does not help, a system error results.
86
Because 5 and 6 may potentially consume very much memory, we let them grow
87
into the buffer pool. We may let the locks of a transaction take frames
88
from the buffer pool, when the corresponding memory heap block has grown to
89
the size of a buffer frame. Similarly for the hash node cells of the locks,
90
and for the adaptive index. Thus, for each individual transaction, its locks
91
can occupy at most about the size of the buffer frame of memory in the common
92
pool, and after that its locks will grow into the buffer pool. */
94
/** Mask used to extract the free bit from area->size */
95
#define MEM_AREA_FREE 1
97
/** The smallest memory area total size */
98
#define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE)
101
/** Data structure for a memory pool. The space is allocated using the buddy
102
algorithm, where free list i contains areas of size 2 to power i. */
103
struct mem_pool_struct{
104
byte* buf; /*!< memory pool */
105
ulint size; /*!< memory common pool size */
106
ulint reserved; /*!< amount of currently allocated
108
mutex_t mutex; /*!< mutex protecting this struct */
109
UT_LIST_BASE_NODE_T(mem_area_t)
110
free_list[64]; /*!< lists of free memory areas: an
111
area is put to the list whose number
112
is the 2-logarithm of the area size */
115
/** The common memory pool */
116
UNIV_INTERN mem_pool_t* mem_comm_pool = NULL;
118
#ifdef UNIV_PFS_MUTEX
119
/* Key to register mutex in mem_pool_struct with performance schema */
120
UNIV_INTERN mysql_pfs_key_t mem_pool_mutex_key;
121
#endif /* UNIV_PFS_MUTEX */
123
/* We use this counter to check that the mem pool mutex does not leak;
124
this is to track a strange assertion failure reported at
125
mysql@lists.mysql.com */
127
UNIV_INTERN ulint mem_n_threads_inside = 0;
129
/********************************************************************//**
130
Reserves the mem pool mutex if we are not in server shutdown. Use
131
this function only in memory free functions, since only memory
132
free functions are used during server shutdown. */
135
mem_pool_mutex_enter(
136
/*=================*/
137
mem_pool_t* pool) /*!< in: memory pool */
139
if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) {
140
mutex_enter(&(pool->mutex));
144
/********************************************************************//**
145
Releases the mem pool mutex if we are not in server shutdown. As
146
its corresponding mem_pool_mutex_enter() function, use it only
147
in memory free functions */
152
mem_pool_t* pool) /*!< in: memory pool */
154
if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) {
155
mutex_exit(&(pool->mutex));
159
/********************************************************************//**
160
Returns memory area size.
166
mem_area_t* area) /*!< in: area */
168
return(area->size_and_free & ~MEM_AREA_FREE);
171
/********************************************************************//**
172
Sets memory area size. */
177
mem_area_t* area, /*!< in: area */
178
ulint size) /*!< in: size */
180
area->size_and_free = (area->size_and_free & MEM_AREA_FREE)
184
/********************************************************************//**
185
Returns memory area free bit.
186
@return TRUE if free */
191
mem_area_t* area) /*!< in: area */
193
#if TRUE != MEM_AREA_FREE
194
# error "TRUE != MEM_AREA_FREE"
196
return(area->size_and_free & MEM_AREA_FREE);
199
/********************************************************************//**
200
Sets memory area free bit. */
205
mem_area_t* area, /*!< in: area */
206
ibool free) /*!< in: free bit value */
208
#if TRUE != MEM_AREA_FREE
209
# error "TRUE != MEM_AREA_FREE"
211
area->size_and_free = (area->size_and_free & ~MEM_AREA_FREE)
215
/********************************************************************//**
216
Creates a memory pool.
217
@return memory pool */
222
ulint size) /*!< in: pool size in bytes */
229
pool = static_cast<mem_pool_t *>(ut_malloc(sizeof(mem_pool_t)));
231
/* We do not set the memory to zero (FALSE) in the pool,
232
but only when allocated at a higher level in mem0mem.c.
233
This is to avoid masking useful Purify warnings. */
235
pool->buf = static_cast<unsigned char *>(ut_malloc_low(size, FALSE, TRUE));
238
mutex_create(mem_pool_mutex_key, &pool->mutex, SYNC_MEM_POOL);
240
/* Initialize the free lists */
242
for (i = 0; i < 64; i++) {
244
UT_LIST_INIT(pool->free_list[i]);
249
while (size - used >= MEM_AREA_MIN_SIZE) {
251
i = ut_2_log(size - used);
253
if (ut_2_exp(i) > size - used) {
255
/* ut_2_log rounds upward */
260
area = (mem_area_t*)(pool->buf + used);
262
mem_area_set_size(area, ut_2_exp(i));
263
mem_area_set_free(area, TRUE);
264
UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area,
265
ut_2_exp(i) - MEM_AREA_EXTRA_SIZE);
267
UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
269
used = used + ut_2_exp(i);
279
/********************************************************************//**
280
Frees a memory pool. */
285
mem_pool_t* pool) /*!< in, own: memory pool */
291
/********************************************************************//**
292
Fills the specified free list.
293
@return TRUE if we were able to insert a block to the free list */
296
mem_pool_fill_free_list(
297
/*====================*/
298
ulint i, /*!< in: free list index */
299
mem_pool_t* pool) /*!< in: memory pool */
305
ut_ad(mutex_own(&(pool->mutex)));
307
if (UNIV_UNLIKELY(i >= 63)) {
308
/* We come here when we have run out of space in the
314
area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
317
if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) {
318
ut_print_timestamp(stderr);
321
" InnoDB: Error: mem pool free list %lu"
323
"InnoDB: though the list is empty!\n",
326
UT_LIST_GET_LEN(pool->free_list[i + 1]));
329
ret = mem_pool_fill_free_list(i + 1, pool);
336
area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
339
if (UNIV_UNLIKELY(UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0)) {
340
mem_analyze_corruption(area);
345
UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);
347
area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i));
348
UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE);
350
mem_area_set_size(area2, ut_2_exp(i));
351
mem_area_set_free(area2, TRUE);
353
UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2);
355
mem_area_set_size(area, ut_2_exp(i));
357
UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
362
/********************************************************************//**
363
Allocates memory from a pool. NOTE: This low-level function should only be
365
@return own: allocated memory buffer */
370
ulint* psize, /*!< in: requested size in bytes; for optimum
371
space usage, the size should be a power of 2
372
minus MEM_AREA_EXTRA_SIZE;
373
out: allocated size in bytes (greater than
374
or equal to the requested size) */
375
mem_pool_t* pool) /*!< in: memory pool */
382
/* If we are using os allocator just make a simple call
384
if (UNIV_LIKELY(srv_use_sys_malloc)) {
385
return(malloc(*psize));
389
n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
391
mutex_enter(&(pool->mutex));
392
mem_n_threads_inside++;
394
ut_a(mem_n_threads_inside == 1);
396
area = UT_LIST_GET_FIRST(pool->free_list[n]);
399
ret = mem_pool_fill_free_list(n, pool);
402
/* Out of memory in memory pool: we try to allocate
403
from the operating system with the regular malloc: */
405
mem_n_threads_inside--;
406
mutex_exit(&(pool->mutex));
408
return(ut_malloc(size));
411
area = UT_LIST_GET_FIRST(pool->free_list[n]);
414
if (!mem_area_get_free(area)) {
416
"InnoDB: Error: Removing element from mem pool"
417
" free list %lu though the\n"
418
"InnoDB: element is not marked free!\n",
421
mem_analyze_corruption(area);
423
/* Try to analyze a strange assertion failure reported at
424
mysql@lists.mysql.com where the free bit IS 1 in the
427
if (mem_area_get_free(area)) {
429
"InnoDB: Probably a race condition"
430
" because now the area is marked free!\n");
436
if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) {
438
"InnoDB: Error: Removing element from mem pool"
440
"InnoDB: though the list length is 0!\n",
442
mem_analyze_corruption(area);
447
ut_ad(mem_area_get_size(area) == ut_2_exp(n));
449
mem_area_set_free(area, FALSE);
451
UT_LIST_REMOVE(free_list, pool->free_list[n], area);
453
pool->reserved += mem_area_get_size(area);
455
mem_n_threads_inside--;
456
mutex_exit(&(pool->mutex));
458
ut_ad(mem_pool_validate(pool));
460
*psize = ut_2_exp(n) - MEM_AREA_EXTRA_SIZE;
461
UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area, *psize);
463
return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
466
/********************************************************************//**
467
Gets the buddy of an area, if it exists in pool.
468
@return the buddy, NULL if no buddy in pool */
473
mem_area_t* area, /*!< in: memory area */
474
ulint size, /*!< in: memory area size */
475
mem_pool_t* pool) /*!< in: memory pool */
481
if (((((byte*)area) - pool->buf) % (2 * size)) == 0) {
483
/* The buddy is in a higher address */
485
buddy = (mem_area_t*)(((byte*)area) + size);
487
if ((((byte*)buddy) - pool->buf) + size > pool->size) {
489
/* The buddy is not wholly contained in the pool:
495
/* The buddy is in a lower address; NOTE that area cannot
496
be at the pool lower end, because then we would end up to
497
the upper branch in this if-clause: the remainder would be
500
buddy = (mem_area_t*)(((byte*)area) - size);
506
/********************************************************************//**
507
Frees memory to a pool. */
512
void* ptr, /*!< in, own: pointer to allocated memory
514
mem_pool_t* pool) /*!< in: memory pool */
522
if (UNIV_LIKELY(srv_use_sys_malloc)) {
528
/* It may be that the area was really allocated from the OS with
529
regular malloc: check if ptr points within our memory pool */
531
if ((byte*)ptr < pool->buf || (byte*)ptr >= pool->buf + pool->size) {
537
area = (mem_area_t*) (((byte*)ptr) - MEM_AREA_EXTRA_SIZE);
539
if (mem_area_get_free(area)) {
541
"InnoDB: Error: Freeing element to mem pool"
542
" free list though the\n"
543
"InnoDB: element is marked free!\n");
545
mem_analyze_corruption(area);
549
size = mem_area_get_size(area);
550
UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE);
554
"InnoDB: Error: Mem area size is 0. Possibly a"
555
" memory overrun of the\n"
556
"InnoDB: previous allocated area!\n");
558
mem_analyze_corruption(area);
562
#ifdef UNIV_LIGHT_MEM_DEBUG
563
if (((byte*)area) + size < pool->buf + pool->size) {
567
next_size = mem_area_get_size(
568
(mem_area_t*)(((byte*)area) + size));
569
if (UNIV_UNLIKELY(!next_size || !ut_is_2pow(next_size))) {
571
"InnoDB: Error: Memory area size %lu,"
572
" next area size %lu not a power of 2!\n"
573
"InnoDB: Possibly a memory overrun of"
574
" the buffer being freed here.\n",
575
(ulong) size, (ulong) next_size);
576
mem_analyze_corruption(area);
582
buddy = mem_area_get_buddy(area, size, pool);
586
mem_pool_mutex_enter(pool);
587
mem_n_threads_inside++;
589
ut_a(mem_n_threads_inside == 1);
591
if (buddy && mem_area_get_free(buddy)
592
&& (size == mem_area_get_size(buddy))) {
594
/* The buddy is in a free list */
596
if ((byte*)buddy < (byte*)area) {
597
new_ptr = ((byte*)buddy) + MEM_AREA_EXTRA_SIZE;
599
mem_area_set_size(buddy, 2 * size);
600
mem_area_set_free(buddy, FALSE);
604
mem_area_set_size(area, 2 * size);
607
/* Remove the buddy from its free list and merge it to area */
609
UT_LIST_REMOVE(free_list, pool->free_list[n], buddy);
611
pool->reserved += ut_2_exp(n);
613
mem_n_threads_inside--;
614
mem_pool_mutex_exit(pool);
616
mem_area_free(new_ptr, pool);
620
UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area);
622
mem_area_set_free(area, TRUE);
624
ut_ad(pool->reserved >= size);
626
pool->reserved -= size;
629
mem_n_threads_inside--;
630
mem_pool_mutex_exit(pool);
632
ut_ad(mem_pool_validate(pool));
635
/********************************************************************//**
636
Validates a memory pool.
637
@return TRUE if ok */
642
mem_pool_t* pool) /*!< in: memory pool */
649
mem_pool_mutex_enter(pool);
653
for (i = 0; i < 64; i++) {
655
UT_LIST_VALIDATE(free_list, mem_area_t, pool->free_list[i],
658
area = UT_LIST_GET_FIRST(pool->free_list[i]);
660
while (area != NULL) {
661
ut_a(mem_area_get_free(area));
662
ut_a(mem_area_get_size(area) == ut_2_exp(i));
664
buddy = mem_area_get_buddy(area, ut_2_exp(i), pool);
666
ut_a(!buddy || !mem_area_get_free(buddy)
667
|| (ut_2_exp(i) != mem_area_get_size(buddy)));
669
area = UT_LIST_GET_NEXT(free_list, area);
675
ut_a(free + pool->reserved == pool->size);
677
mem_pool_mutex_exit(pool);
682
/********************************************************************//**
683
Prints info of a memory pool. */
688
FILE* outfile,/*!< in: output file to write to */
689
mem_pool_t* pool) /*!< in: memory pool */
693
mem_pool_validate(pool);
695
fprintf(outfile, "INFO OF A MEMORY POOL\n");
697
mutex_enter(&(pool->mutex));
699
for (i = 0; i < 64; i++) {
700
if (UT_LIST_GET_LEN(pool->free_list[i]) > 0) {
703
"Free list length %lu for"
704
" blocks of size %lu\n",
705
(ulong) UT_LIST_GET_LEN(pool->free_list[i]),
706
(ulong) ut_2_exp(i));
710
fprintf(outfile, "Pool size %lu, reserved %lu.\n", (ulong) pool->size,
711
(ulong) pool->reserved);
712
mutex_exit(&(pool->mutex));
715
/********************************************************************//**
716
Returns the amount of reserved memory.
717
@return reserved memory in bytes */
720
mem_pool_get_reserved(
721
/*==================*/
722
mem_pool_t* pool) /*!< in: memory pool */
726
mutex_enter(&(pool->mutex));
728
reserved = pool->reserved;
730
mutex_exit(&(pool->mutex));