1
/*****************************************************************************
3
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
Place, Suite 330, Boston, MA 02111-1307 USA
17
*****************************************************************************/
19
/************************************************************************
20
The lowest-level memory management
22
Created 5/12/1997 Heikki Tuuri
23
*************************************************************************/
27
#include "mem0pool.ic"
31
#include "sync0sync.h"
37
/* We would like to use also the buffer frames to allocate memory. This
38
would be desirable, because then the memory consumption of the database
39
would be fixed, and we might even lock the buffer pool to the main memory.
40
The problem here is that the buffer management routines can themselves call
41
memory allocation, while the buffer pool mutex is reserved.
43
The main components of the memory consumption are:
46
2. parsed and optimized SQL statements,
47
3. data dictionary cache,
49
5. locks for each transaction,
50
6. hash table for the adaptive index,
51
7. state and buffers for each SQL query currently being executed,
52
8. session for each user, and
53
9. stack for each OS thread.
55
Items 1 and 2 are managed by an LRU algorithm. Items 5 and 6 can potentially
56
consume very much memory. Items 7 and 8 should consume quite little memory,
57
and the OS should take care of item 9, which too should consume little memory.
59
A solution to the memory management:
61
1. the buffer pool size is set separately;
62
2. log buffer size is set separately;
63
3. the common pool size for all the other entries, except 8, is set separately.
65
Problems: we may waste memory if the common pool is set too big. Another
66
problem is the locks, which may take very much space in big transactions.
67
Then the shared pool size should be set very big. We can allow locks to take
68
space from the buffer pool, but the SQL optimizer is then unaware of the
69
usable size of the buffer pool. We could also combine the objects in the
70
common pool and the buffers in the buffer pool into a single LRU list and
71
manage it uniformly, but this approach does not take into account the parsing
72
and other costs unique to SQL statements.
74
The locks for a transaction can be seen as a part of the state of the
75
transaction. Hence, they should be stored in the common pool. We still
76
have the problem of a very big update transaction, for example, which
77
will set very many x-locks on rows, and the locks will consume a lot
78
of memory, say, half of the buffer pool size.
80
Another problem is what to do if we are not able to malloc a requested
81
block of memory from the common pool. Then we can request memory from
82
the operating system. If it does not help, a system error results.
84
Because 5 and 6 may potentially consume very much memory, we let them grow
85
into the buffer pool. We may let the locks of a transaction take frames
86
from the buffer pool, when the corresponding memory heap block has grown to
87
the size of a buffer frame. Similarly for the hash node cells of the locks,
88
and for the adaptive index. Thus, for each individual transaction, its locks
89
can occupy at most about the size of the buffer frame of memory in the common
90
pool, and after that its locks will grow into the buffer pool. */
92
/* Mask used to extract the free bit from area->size */
93
#define MEM_AREA_FREE 1
95
/* The smallest memory area total size */
96
#define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE)
99
/* Data structure for a memory pool. The space is allocated using the buddy
100
algorithm, where free list i contains areas of size 2 to power i. */
101
struct mem_pool_struct{
102
byte* buf; /* memory pool */
103
ulint size; /* memory common pool size */
104
ulint reserved; /* amount of currently allocated
106
mutex_t mutex; /* mutex protecting this struct */
107
UT_LIST_BASE_NODE_T(mem_area_t)
108
free_list[64]; /* lists of free memory areas: an
109
area is put to the list whose number
110
is the 2-logarithm of the area size */
113
/* The common memory pool */
114
UNIV_INTERN mem_pool_t* mem_comm_pool = NULL;
116
/* We use this counter to check that the mem pool mutex does not leak;
117
this is to track a strange assertion failure reported at
118
mysql@lists.mysql.com */
120
UNIV_INTERN ulint mem_n_threads_inside = 0;
122
/************************************************************************
123
Reserves the mem pool mutex. */
126
mem_pool_mutex_enter(void)
127
/*======================*/
129
mutex_enter(&(mem_comm_pool->mutex));
132
/************************************************************************
133
Releases the mem pool mutex. */
136
mem_pool_mutex_exit(void)
137
/*=====================*/
139
mutex_exit(&(mem_comm_pool->mutex));
142
/************************************************************************
143
Returns memory area size. */
149
mem_area_t* area) /* in: area */
151
return(area->size_and_free & ~MEM_AREA_FREE);
154
/************************************************************************
155
Sets memory area size. */
160
mem_area_t* area, /* in: area */
161
ulint size) /* in: size */
163
area->size_and_free = (area->size_and_free & MEM_AREA_FREE)
167
/************************************************************************
168
Returns memory area free bit. */
173
/* out: TRUE if free */
174
mem_area_t* area) /* in: area */
176
#if TRUE != MEM_AREA_FREE
177
# error "TRUE != MEM_AREA_FREE"
179
return(area->size_and_free & MEM_AREA_FREE);
182
/************************************************************************
183
Sets memory area free bit. */
188
mem_area_t* area, /* in: area */
189
ibool free) /* in: free bit value */
191
#if TRUE != MEM_AREA_FREE
192
# error "TRUE != MEM_AREA_FREE"
194
area->size_and_free = (area->size_and_free & ~MEM_AREA_FREE)
198
/************************************************************************
199
Creates a memory pool. */
204
/* out: memory pool */
205
ulint size) /* in: pool size in bytes */
212
pool = ut_malloc(sizeof(mem_pool_t));
214
/* We do not set the memory to zero (FALSE) in the pool,
215
but only when allocated at a higher level in mem0mem.c.
216
This is to avoid masking useful Purify warnings. */
218
pool->buf = ut_malloc_low(size, FALSE, TRUE);
221
mutex_create(&pool->mutex, SYNC_MEM_POOL);
223
/* Initialize the free lists */
225
for (i = 0; i < 64; i++) {
227
UT_LIST_INIT(pool->free_list[i]);
232
while (size - used >= MEM_AREA_MIN_SIZE) {
234
i = ut_2_log(size - used);
236
if (ut_2_exp(i) > size - used) {
238
/* ut_2_log rounds upward */
243
area = (mem_area_t*)(pool->buf + used);
245
mem_area_set_size(area, ut_2_exp(i));
246
mem_area_set_free(area, TRUE);
247
UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area,
248
ut_2_exp(i) - MEM_AREA_EXTRA_SIZE);
250
UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
252
used = used + ut_2_exp(i);
262
/************************************************************************
263
Fills the specified free list. */
266
mem_pool_fill_free_list(
267
/*====================*/
268
/* out: TRUE if we were able to insert a
269
block to the free list */
270
ulint i, /* in: free list index */
271
mem_pool_t* pool) /* in: memory pool */
277
ut_ad(mutex_own(&(pool->mutex)));
279
if (UNIV_UNLIKELY(i >= 63)) {
280
/* We come here when we have run out of space in the
286
area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
289
if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) {
290
ut_print_timestamp(stderr);
293
" InnoDB: Error: mem pool free list %lu"
295
"InnoDB: though the list is empty!\n",
298
UT_LIST_GET_LEN(pool->free_list[i + 1]));
301
ret = mem_pool_fill_free_list(i + 1, pool);
308
area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
311
if (UNIV_UNLIKELY(UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0)) {
312
mem_analyze_corruption(area);
317
UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);
319
area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i));
320
UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE);
322
mem_area_set_size(area2, ut_2_exp(i));
323
mem_area_set_free(area2, TRUE);
325
UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2);
327
mem_area_set_size(area, ut_2_exp(i));
329
UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
334
/************************************************************************
335
Allocates memory from a pool. NOTE: This low-level function should only be
336
used in mem0mem.*! */
341
/* out, own: allocated memory buffer */
342
ulint* psize, /* in: requested size in bytes; for optimum
343
space usage, the size should be a power of 2
344
minus MEM_AREA_EXTRA_SIZE;
345
out: allocated size in bytes (greater than
346
or equal to the requested size) */
347
mem_pool_t* pool) /* in: memory pool */
354
/* If we are using os allocator just make a simple call
356
if (UNIV_LIKELY(srv_use_sys_malloc)) {
357
return(malloc(*psize));
361
n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
363
mutex_enter(&(pool->mutex));
364
mem_n_threads_inside++;
366
ut_a(mem_n_threads_inside == 1);
368
area = UT_LIST_GET_FIRST(pool->free_list[n]);
371
ret = mem_pool_fill_free_list(n, pool);
374
/* Out of memory in memory pool: we try to allocate
375
from the operating system with the regular malloc: */
377
mem_n_threads_inside--;
378
mutex_exit(&(pool->mutex));
380
return(ut_malloc(size));
383
area = UT_LIST_GET_FIRST(pool->free_list[n]);
386
if (!mem_area_get_free(area)) {
388
"InnoDB: Error: Removing element from mem pool"
389
" free list %lu though the\n"
390
"InnoDB: element is not marked free!\n",
393
mem_analyze_corruption(area);
395
/* Try to analyze a strange assertion failure reported at
396
mysql@lists.mysql.com where the free bit IS 1 in the
399
if (mem_area_get_free(area)) {
401
"InnoDB: Probably a race condition"
402
" because now the area is marked free!\n");
408
if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) {
410
"InnoDB: Error: Removing element from mem pool"
412
"InnoDB: though the list length is 0!\n",
414
mem_analyze_corruption(area);
419
ut_ad(mem_area_get_size(area) == ut_2_exp(n));
421
mem_area_set_free(area, FALSE);
423
UT_LIST_REMOVE(free_list, pool->free_list[n], area);
425
pool->reserved += mem_area_get_size(area);
427
mem_n_threads_inside--;
428
mutex_exit(&(pool->mutex));
430
ut_ad(mem_pool_validate(pool));
432
*psize = ut_2_exp(n) - MEM_AREA_EXTRA_SIZE;
433
UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area, *psize);
435
return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
438
/************************************************************************
439
Gets the buddy of an area, if it exists in pool. */
444
/* out: the buddy, NULL if no buddy in pool */
445
mem_area_t* area, /* in: memory area */
446
ulint size, /* in: memory area size */
447
mem_pool_t* pool) /* in: memory pool */
453
if (((((byte*)area) - pool->buf) % (2 * size)) == 0) {
455
/* The buddy is in a higher address */
457
buddy = (mem_area_t*)(((byte*)area) + size);
459
if ((((byte*)buddy) - pool->buf) + size > pool->size) {
461
/* The buddy is not wholly contained in the pool:
467
/* The buddy is in a lower address; NOTE that area cannot
468
be at the pool lower end, because then we would end up to
469
the upper branch in this if-clause: the remainder would be
472
buddy = (mem_area_t*)(((byte*)area) - size);
478
/************************************************************************
479
Frees memory to a pool. */
484
void* ptr, /* in, own: pointer to allocated memory
486
mem_pool_t* pool) /* in: memory pool */
494
if (UNIV_LIKELY(srv_use_sys_malloc)) {
500
/* It may be that the area was really allocated from the OS with
501
regular malloc: check if ptr points within our memory pool */
503
if ((byte*)ptr < pool->buf || (byte*)ptr >= pool->buf + pool->size) {
509
area = (mem_area_t*) (((byte*)ptr) - MEM_AREA_EXTRA_SIZE);
511
if (mem_area_get_free(area)) {
513
"InnoDB: Error: Freeing element to mem pool"
514
" free list though the\n"
515
"InnoDB: element is marked free!\n");
517
mem_analyze_corruption(area);
521
size = mem_area_get_size(area);
522
UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE);
526
"InnoDB: Error: Mem area size is 0. Possibly a"
527
" memory overrun of the\n"
528
"InnoDB: previous allocated area!\n");
530
mem_analyze_corruption(area);
534
#ifdef UNIV_LIGHT_MEM_DEBUG
535
if (((byte*)area) + size < pool->buf + pool->size) {
539
next_size = mem_area_get_size(
540
(mem_area_t*)(((byte*)area) + size));
541
if (UNIV_UNLIKELY(!next_size || !ut_is_2pow(next_size))) {
543
"InnoDB: Error: Memory area size %lu,"
544
" next area size %lu not a power of 2!\n"
545
"InnoDB: Possibly a memory overrun of"
546
" the buffer being freed here.\n",
547
(ulong) size, (ulong) next_size);
548
mem_analyze_corruption(area);
554
buddy = mem_area_get_buddy(area, size, pool);
558
mutex_enter(&(pool->mutex));
559
mem_n_threads_inside++;
561
ut_a(mem_n_threads_inside == 1);
563
if (buddy && mem_area_get_free(buddy)
564
&& (size == mem_area_get_size(buddy))) {
566
/* The buddy is in a free list */
568
if ((byte*)buddy < (byte*)area) {
569
new_ptr = ((byte*)buddy) + MEM_AREA_EXTRA_SIZE;
571
mem_area_set_size(buddy, 2 * size);
572
mem_area_set_free(buddy, FALSE);
576
mem_area_set_size(area, 2 * size);
579
/* Remove the buddy from its free list and merge it to area */
581
UT_LIST_REMOVE(free_list, pool->free_list[n], buddy);
583
pool->reserved += ut_2_exp(n);
585
mem_n_threads_inside--;
586
mutex_exit(&(pool->mutex));
588
mem_area_free(new_ptr, pool);
592
UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area);
594
mem_area_set_free(area, TRUE);
596
ut_ad(pool->reserved >= size);
598
pool->reserved -= size;
601
mem_n_threads_inside--;
602
mutex_exit(&(pool->mutex));
604
ut_ad(mem_pool_validate(pool));
607
/************************************************************************
608
Validates a memory pool. */
613
/* out: TRUE if ok */
614
mem_pool_t* pool) /* in: memory pool */
621
mutex_enter(&(pool->mutex));
625
for (i = 0; i < 64; i++) {
627
UT_LIST_VALIDATE(free_list, mem_area_t, pool->free_list[i]);
629
area = UT_LIST_GET_FIRST(pool->free_list[i]);
631
while (area != NULL) {
632
ut_a(mem_area_get_free(area));
633
ut_a(mem_area_get_size(area) == ut_2_exp(i));
635
buddy = mem_area_get_buddy(area, ut_2_exp(i), pool);
637
ut_a(!buddy || !mem_area_get_free(buddy)
638
|| (ut_2_exp(i) != mem_area_get_size(buddy)));
640
area = UT_LIST_GET_NEXT(free_list, area);
646
ut_a(free + pool->reserved == pool->size);
648
mutex_exit(&(pool->mutex));
653
/************************************************************************
654
Prints info of a memory pool. */
659
FILE* outfile,/* in: output file to write to */
660
mem_pool_t* pool) /* in: memory pool */
664
mem_pool_validate(pool);
666
fprintf(outfile, "INFO OF A MEMORY POOL\n");
668
mutex_enter(&(pool->mutex));
670
for (i = 0; i < 64; i++) {
671
if (UT_LIST_GET_LEN(pool->free_list[i]) > 0) {
674
"Free list length %lu for"
675
" blocks of size %lu\n",
676
(ulong) UT_LIST_GET_LEN(pool->free_list[i]),
677
(ulong) ut_2_exp(i));
681
fprintf(outfile, "Pool size %lu, reserved %lu.\n", (ulong) pool->size,
682
(ulong) pool->reserved);
683
mutex_exit(&(pool->mutex));
686
/************************************************************************
687
Returns the amount of reserved memory. */
690
mem_pool_get_reserved(
691
/*==================*/
692
/* out: reserved memory in bytes */
693
mem_pool_t* pool) /* in: memory pool */
697
mutex_enter(&(pool->mutex));
699
reserved = pool->reserved;
701
mutex_exit(&(pool->mutex));