~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/************************************************************************
2
The lowest-level memory management
3
4
(c) 1997 Innobase Oy
5
6
Created 5/12/1997 Heikki Tuuri
7
*************************************************************************/
8
9
#include "mem0pool.h"
10
#ifdef UNIV_NONINL
11
#include "mem0pool.ic"
12
#endif
13
14
#include "sync0sync.h"
15
#include "ut0mem.h"
16
#include "ut0lst.h"
17
#include "ut0byte.h"
18
#include "mem0mem.h"
19
20
/* We would like to use also the buffer frames to allocate memory. This
21
would be desirable, because then the memory consumption of the database
22
would be fixed, and we might even lock the buffer pool to the main memory.
23
The problem here is that the buffer management routines can themselves call
24
memory allocation, while the buffer pool mutex is reserved.
25
26
The main components of the memory consumption are:
27
28
1. buffer pool,
29
2. parsed and optimized SQL statements,
30
3. data dictionary cache,
31
4. log buffer,
32
5. locks for each transaction,
33
6. hash table for the adaptive index,
34
7. state and buffers for each SQL query currently being executed,
35
8. session for each user, and
36
9. stack for each OS thread.
37
38
Items 1 and 2 are managed by an LRU algorithm. Items 5 and 6 can potentially
39
consume very much memory. Items 7 and 8 should consume quite little memory,
40
and the OS should take care of item 9, which too should consume little memory.
41
42
A solution to the memory management:
43
44
1. the buffer pool size is set separately;
45
2. log buffer size is set separately;
46
3. the common pool size for all the other entries, except 8, is set separately.
47
48
Problems: we may waste memory if the common pool is set too big. Another
49
problem is the locks, which may take very much space in big transactions.
50
Then the shared pool size should be set very big. We can allow locks to take
51
space from the buffer pool, but the SQL optimizer is then unaware of the
52
usable size of the buffer pool. We could also combine the objects in the
53
common pool and the buffers in the buffer pool into a single LRU list and
54
manage it uniformly, but this approach does not take into account the parsing
55
and other costs unique to SQL statements.
56
57
The locks for a transaction can be seen as a part of the state of the
58
transaction. Hence, they should be stored in the common pool. We still
59
have the problem of a very big update transaction, for example, which
60
will set very many x-locks on rows, and the locks will consume a lot
61
of memory, say, half of the buffer pool size.
62
63
Another problem is what to do if we are not able to malloc a requested
64
block of memory from the common pool. Then we can request memory from
65
the operating system. If it does not help, a system error results.
66
67
Because 5 and 6 may potentially consume very much memory, we let them grow
68
into the buffer pool. We may let the locks of a transaction take frames
69
from the buffer pool, when the corresponding memory heap block has grown to
70
the size of a buffer frame. Similarly for the hash node cells of the locks,
71
and for the adaptive index. Thus, for each individual transaction, its locks
72
can occupy at most about the size of the buffer frame of memory in the common
73
pool, and after that its locks will grow into the buffer pool. */
74
75
/* Mask used to extract the free bit from area->size */
76
#define MEM_AREA_FREE	1
77
78
/* The smallest memory area total size */
79
#define MEM_AREA_MIN_SIZE	(2 * MEM_AREA_EXTRA_SIZE)
80
81
82
/* Data structure for a memory pool. The space is allocated using the buddy
83
algorithm, where free list i contains areas of size 2 to power i. */
84
struct mem_pool_struct{
85
	byte*		buf;		/* memory pool */
86
	ulint		size;		/* memory common pool size */
87
	ulint		reserved;	/* amount of currently allocated
88
					memory */
89
	mutex_t		mutex;		/* mutex protecting this struct */
90
	UT_LIST_BASE_NODE_T(mem_area_t)
91
			free_list[64];	/* lists of free memory areas: an
92
					area is put to the list whose number
93
					is the 2-logarithm of the area size */
94
};
95
96
/* The common memory pool */
97
mem_pool_t*	mem_comm_pool	= NULL;
98
99
/* We use this counter to check that the mem pool mutex does not leak;
100
this is to track a strange assertion failure reported at
101
mysql@lists.mysql.com */
102
103
ulint		mem_n_threads_inside		= 0;
104
105
/************************************************************************
106
Reserves the mem pool mutex. */
107
108
void
109
mem_pool_mutex_enter(void)
110
/*======================*/
111
{
112
	mutex_enter(&(mem_comm_pool->mutex));
113
}
114
115
/************************************************************************
116
Releases the mem pool mutex. */
117
118
void
119
mem_pool_mutex_exit(void)
120
/*=====================*/
121
{
122
	mutex_exit(&(mem_comm_pool->mutex));
123
}
124
125
/************************************************************************
126
Returns memory area size. */
127
UNIV_INLINE
128
ulint
129
mem_area_get_size(
130
/*==============*/
131
				/* out: size */
132
	mem_area_t*	area)	/* in: area */
133
{
134
	return(area->size_and_free & ~MEM_AREA_FREE);
135
}
136
137
/************************************************************************
138
Sets memory area size. */
139
UNIV_INLINE
140
void
141
mem_area_set_size(
142
/*==============*/
143
	mem_area_t*	area,	/* in: area */
144
	ulint		size)	/* in: size */
145
{
146
	area->size_and_free = (area->size_and_free & MEM_AREA_FREE)
147
		| size;
148
}
149
150
/************************************************************************
151
Returns memory area free bit. */
152
UNIV_INLINE
153
ibool
154
mem_area_get_free(
155
/*==============*/
156
				/* out: TRUE if free */
157
	mem_area_t*	area)	/* in: area */
158
{
159
#if TRUE != MEM_AREA_FREE
160
# error "TRUE != MEM_AREA_FREE"
161
#endif
162
	return(area->size_and_free & MEM_AREA_FREE);
163
}
164
165
/************************************************************************
166
Sets memory area free bit. */
167
UNIV_INLINE
168
void
169
mem_area_set_free(
170
/*==============*/
171
	mem_area_t*	area,	/* in: area */
172
	ibool		free)	/* in: free bit value */
173
{
174
#if TRUE != MEM_AREA_FREE
175
# error "TRUE != MEM_AREA_FREE"
176
#endif
177
	area->size_and_free = (area->size_and_free & ~MEM_AREA_FREE)
178
		| free;
179
}
180
181
/************************************************************************
182
Creates a memory pool. */
183
184
mem_pool_t*
185
mem_pool_create(
186
/*============*/
187
			/* out: memory pool */
188
	ulint	size)	/* in: pool size in bytes */
189
{
190
	mem_pool_t*	pool;
191
	mem_area_t*	area;
192
	ulint		i;
193
	ulint		used;
194
195
	ut_a(size > 10000);
196
197
	pool = ut_malloc(sizeof(mem_pool_t));
198
199
	/* We do not set the memory to zero (FALSE) in the pool,
200
	but only when allocated at a higher level in mem0mem.c.
201
	This is to avoid masking useful Purify warnings. */
202
203
	pool->buf = ut_malloc_low(size, FALSE, TRUE);
204
	pool->size = size;
205
206
	mutex_create(&pool->mutex, SYNC_MEM_POOL);
207
208
	/* Initialize the free lists */
209
210
	for (i = 0; i < 64; i++) {
211
212
		UT_LIST_INIT(pool->free_list[i]);
213
	}
214
215
	used = 0;
216
217
	while (size - used >= MEM_AREA_MIN_SIZE) {
218
219
		i = ut_2_log(size - used);
220
221
		if (ut_2_exp(i) > size - used) {
222
223
			/* ut_2_log rounds upward */
224
225
			i--;
226
		}
227
228
		area = (mem_area_t*)(pool->buf + used);
229
230
		mem_area_set_size(area, ut_2_exp(i));
231
		mem_area_set_free(area, TRUE);
232
		UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area,
233
			      ut_2_exp(i) - MEM_AREA_EXTRA_SIZE);
234
235
		UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
236
237
		used = used + ut_2_exp(i);
238
	}
239
240
	ut_ad(size >= used);
241
242
	pool->reserved = 0;
243
244
	return(pool);
245
}
246
247
/************************************************************************
248
Fills the specified free list. */
249
static
250
ibool
251
mem_pool_fill_free_list(
252
/*====================*/
253
				/* out: TRUE if we were able to insert a
254
				block to the free list */
255
	ulint		i,	/* in: free list index */
256
	mem_pool_t*	pool)	/* in: memory pool */
257
{
258
	mem_area_t*	area;
259
	mem_area_t*	area2;
260
	ibool		ret;
261
262
	ut_ad(mutex_own(&(pool->mutex)));
263
264
	if (i >= 63) {
265
		/* We come here when we have run out of space in the
266
		memory pool: */
267
268
		return(FALSE);
269
	}
270
271
	area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
272
273
	if (area == NULL) {
274
		if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) {
275
			ut_print_timestamp(stderr);
276
277
			fprintf(stderr,
278
				"  InnoDB: Error: mem pool free list %lu"
279
				" length is %lu\n"
280
				"InnoDB: though the list is empty!\n",
281
				(ulong) i + 1,
282
				(ulong)
283
				UT_LIST_GET_LEN(pool->free_list[i + 1]));
284
		}
285
286
		ret = mem_pool_fill_free_list(i + 1, pool);
287
288
		if (ret == FALSE) {
289
290
			return(FALSE);
291
		}
292
293
		area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
294
	}
295
296
	if (UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0) {
297
		mem_analyze_corruption(area);
298
299
		ut_error;
300
	}
301
302
	UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);
303
304
	area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i));
305
	UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE);
306
307
	mem_area_set_size(area2, ut_2_exp(i));
308
	mem_area_set_free(area2, TRUE);
309
310
	UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2);
311
312
	mem_area_set_size(area, ut_2_exp(i));
313
314
	UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
315
316
	return(TRUE);
317
}
318
319
/************************************************************************
320
Allocates memory from a pool. NOTE: This low-level function should only be
321
used in mem0mem.*! */
322
323
void*
324
mem_area_alloc(
325
/*===========*/
326
				/* out, own: allocated memory buffer */
327
	ulint		size,	/* in: allocated size in bytes; for optimum
328
				space usage, the size should be a power of 2
329
				minus MEM_AREA_EXTRA_SIZE */
330
	mem_pool_t*	pool)	/* in: memory pool */
331
{
332
	mem_area_t*	area;
333
	ulint		n;
334
	ibool		ret;
335
336
	n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
337
338
	mutex_enter(&(pool->mutex));
339
	mem_n_threads_inside++;
340
341
	ut_a(mem_n_threads_inside == 1);
342
343
	area = UT_LIST_GET_FIRST(pool->free_list[n]);
344
345
	if (area == NULL) {
346
		ret = mem_pool_fill_free_list(n, pool);
347
348
		if (ret == FALSE) {
349
			/* Out of memory in memory pool: we try to allocate
350
			from the operating system with the regular malloc: */
351
352
			mem_n_threads_inside--;
353
			mutex_exit(&(pool->mutex));
354
355
			return(ut_malloc(size));
356
		}
357
358
		area = UT_LIST_GET_FIRST(pool->free_list[n]);
359
	}
360
361
	if (!mem_area_get_free(area)) {
362
		fprintf(stderr,
363
			"InnoDB: Error: Removing element from mem pool"
364
			" free list %lu though the\n"
365
			"InnoDB: element is not marked free!\n",
366
			(ulong) n);
367
368
		mem_analyze_corruption(area);
369
370
		/* Try to analyze a strange assertion failure reported at
371
		mysql@lists.mysql.com where the free bit IS 1 in the
372
		hex dump above */
373
374
		if (mem_area_get_free(area)) {
375
			fprintf(stderr,
376
				"InnoDB: Probably a race condition"
377
				" because now the area is marked free!\n");
378
		}
379
380
		ut_error;
381
	}
382
383
	if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) {
384
		fprintf(stderr,
385
			"InnoDB: Error: Removing element from mem pool"
386
			" free list %lu\n"
387
			"InnoDB: though the list length is 0!\n",
388
			(ulong) n);
389
		mem_analyze_corruption(area);
390
391
		ut_error;
392
	}
393
394
	ut_ad(mem_area_get_size(area) == ut_2_exp(n));
395
396
	mem_area_set_free(area, FALSE);
397
398
	UT_LIST_REMOVE(free_list, pool->free_list[n], area);
399
400
	pool->reserved += mem_area_get_size(area);
401
402
	mem_n_threads_inside--;
403
	mutex_exit(&(pool->mutex));
404
405
	ut_ad(mem_pool_validate(pool));
406
	UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area,
407
		       ut_2_exp(n) - MEM_AREA_EXTRA_SIZE);
408
409
	return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
410
}
411
412
/************************************************************************
413
Gets the buddy of an area, if it exists in pool. */
414
UNIV_INLINE
415
mem_area_t*
416
mem_area_get_buddy(
417
/*===============*/
418
				/* out: the buddy, NULL if no buddy in pool */
419
	mem_area_t*	area,	/* in: memory area */
420
	ulint		size,	/* in: memory area size */
421
	mem_pool_t*	pool)	/* in: memory pool */
422
{
423
	mem_area_t*	buddy;
424
425
	ut_ad(size != 0);
426
427
	if (((((byte*)area) - pool->buf) % (2 * size)) == 0) {
428
429
		/* The buddy is in a higher address */
430
431
		buddy = (mem_area_t*)(((byte*)area) + size);
432
433
		if ((((byte*)buddy) - pool->buf) + size > pool->size) {
434
435
			/* The buddy is not wholly contained in the pool:
436
			there is no buddy */
437
438
			buddy = NULL;
439
		}
440
	} else {
441
		/* The buddy is in a lower address; NOTE that area cannot
442
		be at the pool lower end, because then we would end up to
443
		the upper branch in this if-clause: the remainder would be
444
		0 */
445
446
		buddy = (mem_area_t*)(((byte*)area) - size);
447
	}
448
449
	return(buddy);
450
}
451
452
/************************************************************************
453
Frees memory to a pool. */
454
455
void
456
mem_area_free(
457
/*==========*/
458
	void*		ptr,	/* in, own: pointer to allocated memory
459
				buffer */
460
	mem_pool_t*	pool)	/* in: memory pool */
461
{
462
	mem_area_t*	area;
463
	mem_area_t*	buddy;
464
	void*		new_ptr;
465
	ulint		size;
466
	ulint		n;
467
468
	/* It may be that the area was really allocated from the OS with
469
	regular malloc: check if ptr points within our memory pool */
470
471
	if ((byte*)ptr < pool->buf || (byte*)ptr >= pool->buf + pool->size) {
472
		ut_free(ptr);
473
474
		return;
475
	}
476
477
	area = (mem_area_t*) (((byte*)ptr) - MEM_AREA_EXTRA_SIZE);
478
479
	if (mem_area_get_free(area)) {
480
		fprintf(stderr,
481
			"InnoDB: Error: Freeing element to mem pool"
482
			" free list though the\n"
483
			"InnoDB: element is marked free!\n");
484
485
		mem_analyze_corruption(area);
486
		ut_error;
487
	}
488
489
	size = mem_area_get_size(area);
490
	UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE);
491
492
	if (size == 0) {
493
		fprintf(stderr,
494
			"InnoDB: Error: Mem area size is 0. Possibly a"
495
			" memory overrun of the\n"
496
			"InnoDB: previous allocated area!\n");
497
498
		mem_analyze_corruption(area);
499
		ut_error;
500
	}
501
502
#ifdef UNIV_LIGHT_MEM_DEBUG
503
	if (((byte*)area) + size < pool->buf + pool->size) {
504
505
		ulint	next_size;
506
507
		next_size = mem_area_get_size(
508
			(mem_area_t*)(((byte*)area) + size));
509
		if (ut_2_power_up(next_size) != next_size) {
510
			fprintf(stderr,
511
				"InnoDB: Error: Memory area size %lu,"
512
				" next area size %lu not a power of 2!\n"
513
				"InnoDB: Possibly a memory overrun of"
514
				" the buffer being freed here.\n",
515
				(ulong) size, (ulong) next_size);
516
			mem_analyze_corruption(area);
517
518
			ut_error;
519
		}
520
	}
521
#endif
522
	buddy = mem_area_get_buddy(area, size, pool);
523
524
	n = ut_2_log(size);
525
526
	mutex_enter(&(pool->mutex));
527
	mem_n_threads_inside++;
528
529
	ut_a(mem_n_threads_inside == 1);
530
531
	if (buddy && mem_area_get_free(buddy)
532
	    && (size == mem_area_get_size(buddy))) {
533
534
		/* The buddy is in a free list */
535
536
		if ((byte*)buddy < (byte*)area) {
537
			new_ptr = ((byte*)buddy) + MEM_AREA_EXTRA_SIZE;
538
539
			mem_area_set_size(buddy, 2 * size);
540
			mem_area_set_free(buddy, FALSE);
541
		} else {
542
			new_ptr = ptr;
543
544
			mem_area_set_size(area, 2 * size);
545
		}
546
547
		/* Remove the buddy from its free list and merge it to area */
548
549
		UT_LIST_REMOVE(free_list, pool->free_list[n], buddy);
550
551
		pool->reserved += ut_2_exp(n);
552
553
		mem_n_threads_inside--;
554
		mutex_exit(&(pool->mutex));
555
556
		mem_area_free(new_ptr, pool);
557
558
		return;
559
	} else {
560
		UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area);
561
562
		mem_area_set_free(area, TRUE);
563
564
		ut_ad(pool->reserved >= size);
565
566
		pool->reserved -= size;
567
	}
568
569
	mem_n_threads_inside--;
570
	mutex_exit(&(pool->mutex));
571
572
	ut_ad(mem_pool_validate(pool));
573
}
574
575
/************************************************************************
576
Validates a memory pool. */
577
578
ibool
579
mem_pool_validate(
580
/*==============*/
581
				/* out: TRUE if ok */
582
	mem_pool_t*	pool)	/* in: memory pool */
583
{
584
	mem_area_t*	area;
585
	mem_area_t*	buddy;
586
	ulint		free;
587
	ulint		i;
588
589
	mutex_enter(&(pool->mutex));
590
591
	free = 0;
592
593
	for (i = 0; i < 64; i++) {
594
595
		UT_LIST_VALIDATE(free_list, mem_area_t, pool->free_list[i]);
596
597
		area = UT_LIST_GET_FIRST(pool->free_list[i]);
598
599
		while (area != NULL) {
600
			ut_a(mem_area_get_free(area));
601
			ut_a(mem_area_get_size(area) == ut_2_exp(i));
602
603
			buddy = mem_area_get_buddy(area, ut_2_exp(i), pool);
604
605
			ut_a(!buddy || !mem_area_get_free(buddy)
606
			     || (ut_2_exp(i) != mem_area_get_size(buddy)));
607
608
			area = UT_LIST_GET_NEXT(free_list, area);
609
610
			free += ut_2_exp(i);
611
		}
612
	}
613
614
	ut_a(free + pool->reserved == pool->size);
615
616
	mutex_exit(&(pool->mutex));
617
618
	return(TRUE);
619
}
620
621
/************************************************************************
622
Prints info of a memory pool. */
623
624
void
625
mem_pool_print_info(
626
/*================*/
627
	FILE*		outfile,/* in: output file to write to */
628
	mem_pool_t*	pool)	/* in: memory pool */
629
{
630
	ulint		i;
631
632
	mem_pool_validate(pool);
633
634
	fprintf(outfile, "INFO OF A MEMORY POOL\n");
635
636
	mutex_enter(&(pool->mutex));
637
638
	for (i = 0; i < 64; i++) {
639
		if (UT_LIST_GET_LEN(pool->free_list[i]) > 0) {
640
641
			fprintf(outfile,
642
				"Free list length %lu for"
643
				" blocks of size %lu\n",
644
				(ulong) UT_LIST_GET_LEN(pool->free_list[i]),
645
				(ulong) ut_2_exp(i));
646
		}
647
	}
648
649
	fprintf(outfile, "Pool size %lu, reserved %lu.\n", (ulong) pool->size,
650
		(ulong) pool->reserved);
651
	mutex_exit(&(pool->mutex));
652
}
653
654
/************************************************************************
655
Returns the amount of reserved memory. */
656
657
ulint
658
mem_pool_get_reserved(
659
/*==================*/
660
				/* out: reserved memory in bytes */
661
	mem_pool_t*	pool)	/* in: memory pool */
662
{
663
	ulint	reserved;
664
665
	mutex_enter(&(pool->mutex));
666
667
	reserved = pool->reserved;
668
669
	mutex_exit(&(pool->mutex));
670
671
	return(reserved);
672
}