1
/************************************************************************
2
The lowest-level memory management
6
Created 5/12/1997 Heikki Tuuri
7
*************************************************************************/
11
#include "mem0pool.ic"
14
#include "sync0sync.h"
20
/* We would like to use also the buffer frames to allocate memory. This
21
would be desirable, because then the memory consumption of the database
22
would be fixed, and we might even lock the buffer pool to the main memory.
23
The problem here is that the buffer management routines can themselves call
24
memory allocation, while the buffer pool mutex is reserved.
26
The main components of the memory consumption are:
29
2. parsed and optimized SQL statements,
30
3. data dictionary cache,
32
5. locks for each transaction,
33
6. hash table for the adaptive index,
34
7. state and buffers for each SQL query currently being executed,
35
8. session for each user, and
36
9. stack for each OS thread.
38
Items 1 and 2 are managed by an LRU algorithm. Items 5 and 6 can potentially
39
consume very much memory. Items 7 and 8 should consume quite little memory,
40
and the OS should take care of item 9, which too should consume little memory.
42
A solution to the memory management:
44
1. the buffer pool size is set separately;
45
2. log buffer size is set separately;
46
3. the common pool size for all the other entries, except 8, is set separately.
48
Problems: we may waste memory if the common pool is set too big. Another
49
problem is the locks, which may take very much space in big transactions.
50
Then the shared pool size should be set very big. We can allow locks to take
51
space from the buffer pool, but the SQL optimizer is then unaware of the
52
usable size of the buffer pool. We could also combine the objects in the
53
common pool and the buffers in the buffer pool into a single LRU list and
54
manage it uniformly, but this approach does not take into account the parsing
55
and other costs unique to SQL statements.
57
The locks for a transaction can be seen as a part of the state of the
58
transaction. Hence, they should be stored in the common pool. We still
59
have the problem of a very big update transaction, for example, which
60
will set very many x-locks on rows, and the locks will consume a lot
61
of memory, say, half of the buffer pool size.
63
Another problem is what to do if we are not able to malloc a requested
64
block of memory from the common pool. Then we can request memory from
65
the operating system. If it does not help, a system error results.
67
Because 5 and 6 may potentially consume very much memory, we let them grow
68
into the buffer pool. We may let the locks of a transaction take frames
69
from the buffer pool, when the corresponding memory heap block has grown to
70
the size of a buffer frame. Similarly for the hash node cells of the locks,
71
and for the adaptive index. Thus, for each individual transaction, its locks
72
can occupy at most about the size of the buffer frame of memory in the common
73
pool, and after that its locks will grow into the buffer pool. */
75
/* Mask used to extract the free bit from area->size */
76
#define MEM_AREA_FREE 1
78
/* The smallest memory area total size */
79
#define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE)
82
/* Data structure for a memory pool. The space is allocated using the buddy
83
algorithm, where free list i contains areas of size 2 to power i. */
84
struct mem_pool_struct{
85
byte* buf; /* memory pool */
86
ulint size; /* memory common pool size */
87
ulint reserved; /* amount of currently allocated
89
mutex_t mutex; /* mutex protecting this struct */
90
UT_LIST_BASE_NODE_T(mem_area_t)
91
free_list[64]; /* lists of free memory areas: an
92
area is put to the list whose number
93
is the 2-logarithm of the area size */
96
/* The common memory pool */
97
UNIV_INTERN mem_pool_t* mem_comm_pool = NULL;
99
/* We use this counter to check that the mem pool mutex does not leak;
100
this is to track a strange assertion failure reported at
101
mysql@lists.mysql.com */
103
UNIV_INTERN ulint mem_n_threads_inside = 0;
105
/************************************************************************
106
Reserves the mem pool mutex. */
109
mem_pool_mutex_enter(void)
110
/*======================*/
112
mutex_enter(&(mem_comm_pool->mutex));
115
/************************************************************************
116
Releases the mem pool mutex. */
119
mem_pool_mutex_exit(void)
120
/*=====================*/
122
mutex_exit(&(mem_comm_pool->mutex));
125
/************************************************************************
126
Returns memory area size. */
132
mem_area_t* area) /* in: area */
134
return(area->size_and_free & ~MEM_AREA_FREE);
137
/************************************************************************
138
Sets memory area size. */
143
mem_area_t* area, /* in: area */
144
ulint size) /* in: size */
146
area->size_and_free = (area->size_and_free & MEM_AREA_FREE)
150
/************************************************************************
151
Returns memory area free bit. */
156
/* out: TRUE if free */
157
mem_area_t* area) /* in: area */
159
#if TRUE != MEM_AREA_FREE
160
# error "TRUE != MEM_AREA_FREE"
162
return(area->size_and_free & MEM_AREA_FREE);
165
/************************************************************************
166
Sets memory area free bit. */
171
mem_area_t* area, /* in: area */
172
ibool free) /* in: free bit value */
174
#if TRUE != MEM_AREA_FREE
175
# error "TRUE != MEM_AREA_FREE"
177
area->size_and_free = (area->size_and_free & ~MEM_AREA_FREE)
181
/************************************************************************
182
Creates a memory pool. */
187
/* out: memory pool */
188
ulint size) /* in: pool size in bytes */
197
pool = ut_malloc(sizeof(mem_pool_t));
199
/* We do not set the memory to zero (FALSE) in the pool,
200
but only when allocated at a higher level in mem0mem.c.
201
This is to avoid masking useful Purify warnings. */
203
pool->buf = ut_malloc_low(size, FALSE, TRUE);
206
mutex_create(&pool->mutex, SYNC_MEM_POOL);
208
/* Initialize the free lists */
210
for (i = 0; i < 64; i++) {
212
UT_LIST_INIT(pool->free_list[i]);
217
while (size - used >= MEM_AREA_MIN_SIZE) {
219
i = ut_2_log(size - used);
221
if (ut_2_exp(i) > size - used) {
223
/* ut_2_log rounds upward */
228
area = (mem_area_t*)(pool->buf + used);
230
mem_area_set_size(area, ut_2_exp(i));
231
mem_area_set_free(area, TRUE);
232
UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area,
233
ut_2_exp(i) - MEM_AREA_EXTRA_SIZE);
235
UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
237
used = used + ut_2_exp(i);
247
/************************************************************************
248
Fills the specified free list. */
251
mem_pool_fill_free_list(
252
/*====================*/
253
/* out: TRUE if we were able to insert a
254
block to the free list */
255
ulint i, /* in: free list index */
256
mem_pool_t* pool) /* in: memory pool */
262
ut_ad(mutex_own(&(pool->mutex)));
264
if (UNIV_UNLIKELY(i >= 63)) {
265
/* We come here when we have run out of space in the
271
area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
274
if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) {
275
ut_print_timestamp(stderr);
278
" InnoDB: Error: mem pool free list %lu"
280
"InnoDB: though the list is empty!\n",
283
UT_LIST_GET_LEN(pool->free_list[i + 1]));
286
ret = mem_pool_fill_free_list(i + 1, pool);
293
area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
296
if (UNIV_UNLIKELY(UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0)) {
297
mem_analyze_corruption(area);
302
UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);
304
area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i));
305
UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE);
307
mem_area_set_size(area2, ut_2_exp(i));
308
mem_area_set_free(area2, TRUE);
310
UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2);
312
mem_area_set_size(area, ut_2_exp(i));
314
UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
319
/************************************************************************
320
Allocates memory from a pool. NOTE: This low-level function should only be
321
used in mem0mem.*! */
326
/* out, own: allocated memory buffer */
327
ulint* psize, /* in: requested size in bytes; for optimum
328
space usage, the size should be a power of 2
329
minus MEM_AREA_EXTRA_SIZE;
330
out: allocated size in bytes (greater than
331
or equal to the requested size) */
332
mem_pool_t* pool) /* in: memory pool */
340
n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
342
mutex_enter(&(pool->mutex));
343
mem_n_threads_inside++;
345
ut_a(mem_n_threads_inside == 1);
347
area = UT_LIST_GET_FIRST(pool->free_list[n]);
350
ret = mem_pool_fill_free_list(n, pool);
353
/* Out of memory in memory pool: we try to allocate
354
from the operating system with the regular malloc: */
356
mem_n_threads_inside--;
357
mutex_exit(&(pool->mutex));
359
return(ut_malloc(size));
362
area = UT_LIST_GET_FIRST(pool->free_list[n]);
365
if (!mem_area_get_free(area)) {
367
"InnoDB: Error: Removing element from mem pool"
368
" free list %lu though the\n"
369
"InnoDB: element is not marked free!\n",
372
mem_analyze_corruption(area);
374
/* Try to analyze a strange assertion failure reported at
375
mysql@lists.mysql.com where the free bit IS 1 in the
378
if (mem_area_get_free(area)) {
380
"InnoDB: Probably a race condition"
381
" because now the area is marked free!\n");
387
if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) {
389
"InnoDB: Error: Removing element from mem pool"
391
"InnoDB: though the list length is 0!\n",
393
mem_analyze_corruption(area);
398
ut_ad(mem_area_get_size(area) == ut_2_exp(n));
400
mem_area_set_free(area, FALSE);
402
UT_LIST_REMOVE(free_list, pool->free_list[n], area);
404
pool->reserved += mem_area_get_size(area);
406
mem_n_threads_inside--;
407
mutex_exit(&(pool->mutex));
409
ut_ad(mem_pool_validate(pool));
411
*psize = ut_2_exp(n) - MEM_AREA_EXTRA_SIZE;
412
UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area, *psize);
414
return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
417
/************************************************************************
418
Gets the buddy of an area, if it exists in pool. */
423
/* out: the buddy, NULL if no buddy in pool */
424
mem_area_t* area, /* in: memory area */
425
ulint size, /* in: memory area size */
426
mem_pool_t* pool) /* in: memory pool */
432
if (((((byte*)area) - pool->buf) % (2 * size)) == 0) {
434
/* The buddy is in a higher address */
436
buddy = (mem_area_t*)(((byte*)area) + size);
438
if ((((byte*)buddy) - pool->buf) + size > pool->size) {
440
/* The buddy is not wholly contained in the pool:
446
/* The buddy is in a lower address; NOTE that area cannot
447
be at the pool lower end, because then we would end up to
448
the upper branch in this if-clause: the remainder would be
451
buddy = (mem_area_t*)(((byte*)area) - size);
457
/************************************************************************
458
Frees memory to a pool. */
463
void* ptr, /* in, own: pointer to allocated memory
465
mem_pool_t* pool) /* in: memory pool */
473
/* It may be that the area was really allocated from the OS with
474
regular malloc: check if ptr points within our memory pool */
476
if ((byte*)ptr < pool->buf || (byte*)ptr >= pool->buf + pool->size) {
482
area = (mem_area_t*) (((byte*)ptr) - MEM_AREA_EXTRA_SIZE);
484
if (mem_area_get_free(area)) {
486
"InnoDB: Error: Freeing element to mem pool"
487
" free list though the\n"
488
"InnoDB: element is marked free!\n");
490
mem_analyze_corruption(area);
494
size = mem_area_get_size(area);
495
UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE);
499
"InnoDB: Error: Mem area size is 0. Possibly a"
500
" memory overrun of the\n"
501
"InnoDB: previous allocated area!\n");
503
mem_analyze_corruption(area);
507
#ifdef UNIV_LIGHT_MEM_DEBUG
508
if (((byte*)area) + size < pool->buf + pool->size) {
512
next_size = mem_area_get_size(
513
(mem_area_t*)(((byte*)area) + size));
514
if (UNIV_UNLIKELY(!next_size || !ut_is_2pow(next_size))) {
516
"InnoDB: Error: Memory area size %lu,"
517
" next area size %lu not a power of 2!\n"
518
"InnoDB: Possibly a memory overrun of"
519
" the buffer being freed here.\n",
520
(ulong) size, (ulong) next_size);
521
mem_analyze_corruption(area);
527
buddy = mem_area_get_buddy(area, size, pool);
531
mutex_enter(&(pool->mutex));
532
mem_n_threads_inside++;
534
ut_a(mem_n_threads_inside == 1);
536
if (buddy && mem_area_get_free(buddy)
537
&& (size == mem_area_get_size(buddy))) {
539
/* The buddy is in a free list */
541
if ((byte*)buddy < (byte*)area) {
542
new_ptr = ((byte*)buddy) + MEM_AREA_EXTRA_SIZE;
544
mem_area_set_size(buddy, 2 * size);
545
mem_area_set_free(buddy, FALSE);
549
mem_area_set_size(area, 2 * size);
552
/* Remove the buddy from its free list and merge it to area */
554
UT_LIST_REMOVE(free_list, pool->free_list[n], buddy);
556
pool->reserved += ut_2_exp(n);
558
mem_n_threads_inside--;
559
mutex_exit(&(pool->mutex));
561
mem_area_free(new_ptr, pool);
565
UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area);
567
mem_area_set_free(area, TRUE);
569
ut_ad(pool->reserved >= size);
571
pool->reserved -= size;
574
mem_n_threads_inside--;
575
mutex_exit(&(pool->mutex));
577
ut_ad(mem_pool_validate(pool));
580
/************************************************************************
581
Validates a memory pool. */
586
/* out: TRUE if ok */
587
mem_pool_t* pool) /* in: memory pool */
594
mutex_enter(&(pool->mutex));
598
for (i = 0; i < 64; i++) {
600
UT_LIST_VALIDATE(free_list, mem_area_t, pool->free_list[i]);
602
area = UT_LIST_GET_FIRST(pool->free_list[i]);
604
while (area != NULL) {
605
ut_a(mem_area_get_free(area));
606
ut_a(mem_area_get_size(area) == ut_2_exp(i));
608
buddy = mem_area_get_buddy(area, ut_2_exp(i), pool);
610
ut_a(!buddy || !mem_area_get_free(buddy)
611
|| (ut_2_exp(i) != mem_area_get_size(buddy)));
613
area = UT_LIST_GET_NEXT(free_list, area);
619
ut_a(free + pool->reserved == pool->size);
621
mutex_exit(&(pool->mutex));
626
/************************************************************************
627
Prints info of a memory pool. */
632
FILE* outfile,/* in: output file to write to */
633
mem_pool_t* pool) /* in: memory pool */
637
mem_pool_validate(pool);
639
fprintf(outfile, "INFO OF A MEMORY POOL\n");
641
mutex_enter(&(pool->mutex));
643
for (i = 0; i < 64; i++) {
644
if (UT_LIST_GET_LEN(pool->free_list[i]) > 0) {
647
"Free list length %lu for"
648
" blocks of size %lu\n",
649
(ulong) UT_LIST_GET_LEN(pool->free_list[i]),
650
(ulong) ut_2_exp(i));
654
fprintf(outfile, "Pool size %lu, reserved %lu.\n", (ulong) pool->size,
655
(ulong) pool->reserved);
656
mutex_exit(&(pool->mutex));
659
/************************************************************************
660
Returns the amount of reserved memory. */
663
mem_pool_get_reserved(
664
/*==================*/
665
/* out: reserved memory in bytes */
666
mem_pool_t* pool) /* in: memory pool */
670
mutex_enter(&(pool->mutex));
672
reserved = pool->reserved;
674
mutex_exit(&(pool->mutex));