1
/******************************************************
2
Binary buddy allocator for compressed pages
6
Created December 2006 by Marko Makela
7
*******************************************************/
10
#include "buf0buddy.h"
12
# include "buf0buddy.ic"
20
/* Statistic counters */
23
/** Number of frames allocated from the buffer pool to the buddy system.
24
Protected by buf_pool_mutex. */
25
static ulint buf_buddy_n_frames;
26
#endif /* UNIV_DEBUG */
27
/** Statistics of the buddy system, indexed by block size.
28
Protected by buf_pool_mutex. */
29
UNIV_INTERN buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1];
31
/**************************************************************************
32
Get the offset of the buddy of a compressed page frame. */
37
/* out: the buddy relative of page */
38
byte* page, /* in: compressed page */
39
ulint size) /* in: page size in bytes */
41
ut_ad(ut_is_2pow(size));
42
ut_ad(size >= BUF_BUDDY_LOW);
43
ut_ad(size < BUF_BUDDY_HIGH);
44
ut_ad(!ut_align_offset(page, size));
46
if (((ulint) page) & size) {
53
/**************************************************************************
54
Add a block to the head of the appropriate buddy free list. */
57
buf_buddy_add_to_free(
58
/*==================*/
59
buf_page_t* bpage, /* in,own: block to be freed */
60
ulint i) /* in: index of buf_pool->zip_free[] */
62
#ifdef UNIV_DEBUG_VALGRIND
63
buf_page_t* b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
65
if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
66
#endif /* UNIV_DEBUG_VALGRIND */
68
ut_ad(buf_pool->zip_free[i].start != bpage);
69
UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
71
#ifdef UNIV_DEBUG_VALGRIND
72
if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
73
UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
74
#endif /* UNIV_DEBUG_VALGRIND */
77
/**************************************************************************
78
Remove a block from the appropriate buddy free list. */
81
buf_buddy_remove_from_free(
82
/*=======================*/
83
buf_page_t* bpage, /* in: block to be removed */
84
ulint i) /* in: index of buf_pool->zip_free[] */
86
#ifdef UNIV_DEBUG_VALGRIND
87
buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
88
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
90
if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
91
if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
93
ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
94
ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
95
#endif /* UNIV_DEBUG_VALGRIND */
97
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
98
UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
100
#ifdef UNIV_DEBUG_VALGRIND
101
if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
102
if (next) UNIV_MEM_FREE(next, BUF_BUDDY_LOW << i);
103
#endif /* UNIV_DEBUG_VALGRIND */
106
/**************************************************************************
107
Try to allocate a block from buf_pool->zip_free[]. */
112
/* out: allocated block, or NULL
113
if buf_pool->zip_free[] was empty */
114
ulint i) /* in: index of buf_pool->zip_free[] */
118
ut_ad(buf_pool_mutex_own());
119
ut_a(i < BUF_BUDDY_SIZES);
121
#if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
122
/* Valgrind would complain about accessing free memory. */
123
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i]);
124
#endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
125
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
128
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
129
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
131
buf_buddy_remove_from_free(bpage, i);
132
} else if (i + 1 < BUF_BUDDY_SIZES) {
133
/* Attempt to split. */
134
bpage = buf_buddy_alloc_zip(i + 1);
137
buf_page_t* buddy = (buf_page_t*)
138
(((char*) bpage) + (BUF_BUDDY_LOW << i));
140
ut_ad(!buf_pool_contains_zip(buddy));
141
ut_d(memset(buddy, i, BUF_BUDDY_LOW << i));
142
buddy->state = BUF_BLOCK_ZIP_FREE;
143
buf_buddy_add_to_free(buddy, i);
149
memset(bpage, ~i, BUF_BUDDY_LOW << i);
151
#endif /* UNIV_DEBUG */
153
UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
158
/**************************************************************************
159
Deallocate a buffer frame of UNIV_PAGE_SIZE. */
162
buf_buddy_block_free(
163
/*=================*/
164
void* buf) /* in: buffer frame to deallocate */
166
const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
170
ut_ad(buf_pool_mutex_own());
171
ut_ad(!mutex_own(&buf_pool_zip_mutex));
172
ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
174
HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
175
((buf_block_t*) bpage)->frame == buf);
177
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY);
178
ut_ad(!bpage->in_page_hash);
179
ut_ad(bpage->in_zip_hash);
180
ut_d(bpage->in_zip_hash = FALSE);
181
HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
183
ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
184
UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
186
block = (buf_block_t*) bpage;
187
mutex_enter(&block->mutex);
188
buf_LRU_block_free_non_file_page(block);
189
mutex_exit(&block->mutex);
191
ut_ad(buf_buddy_n_frames > 0);
192
ut_d(buf_buddy_n_frames--);
195
/**************************************************************************
196
Allocate a buffer block to the buddy allocator. */
199
buf_buddy_block_register(
200
/*=====================*/
201
buf_block_t* block) /* in: buffer frame to allocate */
203
const ulint fold = BUF_POOL_ZIP_FOLD(block);
204
ut_ad(buf_pool_mutex_own());
205
ut_ad(!mutex_own(&buf_pool_zip_mutex));
207
buf_block_set_state(block, BUF_BLOCK_MEMORY);
210
ut_a(!ut_align_offset(block->frame, UNIV_PAGE_SIZE));
212
ut_ad(!block->page.in_page_hash);
213
ut_ad(!block->page.in_zip_hash);
214
ut_d(block->page.in_zip_hash = TRUE);
215
HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
217
ut_d(buf_buddy_n_frames++);
220
/**************************************************************************
221
Allocate a block from a bigger object. */
224
buf_buddy_alloc_from(
225
/*=================*/
226
/* out: allocated block */
227
void* buf, /* in: a block that is free to use */
228
ulint i, /* in: index of buf_pool->zip_free[] */
229
ulint j) /* in: size of buf as an index
230
of buf_pool->zip_free[] */
232
ulint offs = BUF_BUDDY_LOW << j;
233
ut_ad(j <= BUF_BUDDY_SIZES);
235
ut_ad(!ut_align_offset(buf, offs));
237
/* Add the unused parts of the block to the free lists. */
244
bpage = (buf_page_t*) ((byte*) buf + offs);
245
ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
246
bpage->state = BUF_BLOCK_ZIP_FREE;
247
#if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
248
/* Valgrind would complain about accessing free memory. */
249
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[j]);
250
#endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
251
buf_buddy_add_to_free(bpage, j);
257
/**************************************************************************
258
Allocate a block. The thread calling this function must hold
259
buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
260
The buf_pool_mutex may only be released and reacquired if lru != NULL. */
265
/* out: allocated block,
266
possibly NULL if lru==NULL */
267
ulint i, /* in: index of buf_pool->zip_free[],
268
or BUF_BUDDY_SIZES */
269
ibool* lru) /* in: pointer to a variable that will be assigned
270
TRUE if storage was allocated from the LRU list
271
and buf_pool_mutex was temporarily released,
272
or NULL if the LRU list should not be used */
276
ut_ad(buf_pool_mutex_own());
277
ut_ad(!mutex_own(&buf_pool_zip_mutex));
279
if (i < BUF_BUDDY_SIZES) {
280
/* Try to allocate from the buddy system. */
281
block = buf_buddy_alloc_zip(i);
289
/* Try allocating from the buf_pool->free list. */
290
block = buf_LRU_get_free_only();
302
/* Try replacing an uncompressed page in the buffer pool. */
303
buf_pool_mutex_exit();
304
block = buf_LRU_get_free_block(0);
306
buf_pool_mutex_enter();
309
buf_buddy_block_register(block);
311
block = buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES);
314
buf_buddy_stat[i].used++;
318
/**************************************************************************
319
Try to relocate the control block of a compressed page. */
322
buf_buddy_relocate_block(
323
/*=====================*/
324
/* out: TRUE if relocated */
325
buf_page_t* bpage, /* in: block to relocate */
326
buf_page_t* dpage) /* in: free block to relocate to */
330
ut_ad(buf_pool_mutex_own());
332
switch (buf_page_get_state(bpage)) {
333
case BUF_BLOCK_ZIP_FREE:
334
case BUF_BLOCK_NOT_USED:
335
case BUF_BLOCK_READY_FOR_USE:
336
case BUF_BLOCK_FILE_PAGE:
337
case BUF_BLOCK_MEMORY:
338
case BUF_BLOCK_REMOVE_HASH:
340
case BUF_BLOCK_ZIP_DIRTY:
341
/* Cannot relocate dirty pages. */
344
case BUF_BLOCK_ZIP_PAGE:
348
mutex_enter(&buf_pool_zip_mutex);
350
if (!buf_page_can_relocate(bpage)) {
351
mutex_exit(&buf_pool_zip_mutex);
355
buf_relocate(bpage, dpage);
356
ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
358
/* relocate buf_pool->zip_clean */
359
b = UT_LIST_GET_PREV(list, dpage);
360
UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
363
UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
365
UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
368
mutex_exit(&buf_pool_zip_mutex);
372
/**************************************************************************
373
Try to relocate a block. */
378
/* out: TRUE if relocated */
379
void* src, /* in: block to relocate */
380
void* dst, /* in: free block to relocate to */
381
ulint i) /* in: index of buf_pool->zip_free[] */
384
const ulint size = BUF_BUDDY_LOW << i;
385
ullint usec = ut_time_us(NULL);
387
ut_ad(buf_pool_mutex_own());
388
ut_ad(!mutex_own(&buf_pool_zip_mutex));
389
ut_ad(!ut_align_offset(src, size));
390
ut_ad(!ut_align_offset(dst, size));
391
UNIV_MEM_ASSERT_W(dst, size);
393
/* We assume that all memory from buf_buddy_alloc()
394
is used for either compressed pages or buf_page_t
395
objects covering compressed pages. */
397
/* We look inside the allocated objects returned by
398
buf_buddy_alloc() and assume that anything of
399
PAGE_ZIP_MIN_SIZE or larger is a compressed page that contains
400
a valid space_id and page_no in the page header. Should the
401
fields be invalid, we will be unable to relocate the block.
402
We also assume that anything that fits sizeof(buf_page_t)
403
actually is a properly initialized buf_page_t object. */
405
if (size >= PAGE_ZIP_MIN_SIZE) {
406
/* This is a compressed page. */
409
/* The src block may be split into smaller blocks,
410
some of which may be free. Thus, the
411
mach_read_from_4() calls below may attempt to read
412
from free memory. The memory is "owned" by the buddy
413
allocator (and it has been allocated from the buffer
414
pool), so there is nothing wrong about this. The
415
mach_read_from_4() calls here will only trigger bogus
416
Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
417
bpage = buf_page_hash_get(
418
mach_read_from_4((const byte*) src
419
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID),
420
mach_read_from_4((const byte*) src
423
if (!bpage || bpage->zip.data != src) {
424
/* The block has probably been freshly
425
allocated by buf_LRU_get_free_block() but not
426
added to buf_pool->page_hash yet. Obviously,
427
it cannot be relocated. */
432
if (page_zip_get_size(&bpage->zip) != size) {
433
/* The block is of different size. We would
434
have to relocate all blocks covered by src.
435
For the sake of simplicity, give up. */
436
ut_ad(page_zip_get_size(&bpage->zip) < size);
441
/* The block must have been allocated, but it may
442
contain uninitialized data. */
443
UNIV_MEM_ASSERT_W(src, size);
445
mutex = buf_page_get_mutex(bpage);
449
if (buf_page_can_relocate(bpage)) {
450
/* Relocate the compressed page. */
451
ut_a(bpage->zip.data == src);
452
memcpy(dst, src, size);
453
bpage->zip.data = dst;
456
UNIV_MEM_INVALID(src, size);
458
buf_buddy_stat_t* buddy_stat
459
= &buf_buddy_stat[i];
460
buddy_stat->relocated++;
461
buddy_stat->relocated_usec
462
+= ut_time_us(NULL) - usec;
468
} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
469
/* This must be a buf_page_t object. */
470
UNIV_MEM_ASSERT_RW(src, size);
471
if (buf_buddy_relocate_block(src, dst)) {
480
/**************************************************************************
481
Deallocate a block. */
486
void* buf, /* in: block to be freed, must not be
487
pointed to by the buffer pool */
488
ulint i) /* in: index of buf_pool->zip_free[] */
493
ut_ad(buf_pool_mutex_own());
494
ut_ad(!mutex_own(&buf_pool_zip_mutex));
495
ut_ad(i <= BUF_BUDDY_SIZES);
496
ut_ad(buf_buddy_stat[i].used > 0);
498
buf_buddy_stat[i].used--;
500
UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
501
ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
503
if (i == BUF_BUDDY_SIZES) {
504
buf_buddy_block_free(buf);
508
ut_ad(i < BUF_BUDDY_SIZES);
509
ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
510
ut_ad(!buf_pool_contains_zip(buf));
512
/* Try to combine adjacent blocks. */
514
buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);
516
#ifndef UNIV_DEBUG_VALGRIND
517
/* Valgrind would complain about accessing free memory. */
519
if (buddy->state != BUF_BLOCK_ZIP_FREE) {
524
/* The field buddy->state can only be trusted for free blocks.
525
If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if
526
it is in the free list. */
527
#endif /* !UNIV_DEBUG_VALGRIND */
529
for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
530
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
531
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
533
if (bpage == buddy) {
535
/* The buddy is free: recombine */
536
buf_buddy_remove_from_free(bpage, i);
538
ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
539
ut_ad(!buf_pool_contains_zip(buddy));
541
buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
549
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
550
UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
555
#ifndef UNIV_DEBUG_VALGRIND
557
/* Valgrind would complain about accessing free memory. */
558
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i]));
559
#endif /* UNIV_DEBUG_VALGRIND */
561
/* The buddy is not free. Is there a free block of this size? */
562
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
565
/* Remove the block from the free list, because a successful
566
buf_buddy_relocate() will overwrite bpage->list. */
568
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
569
buf_buddy_remove_from_free(bpage, i);
571
/* Try to relocate the buddy of buf to the free block. */
572
if (buf_buddy_relocate(buddy, bpage, i)) {
574
ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
578
buf_buddy_add_to_free(bpage, i);
580
/* Try to relocate the buddy of the free block to buf. */
581
buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage),
584
#if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
588
/* The buddy must not be (completely) free, because
589
we always recombine adjacent free blocks.
590
(Parts of the buddy can be free in
591
buf_pool->zip_free[j] with j < i.)*/
592
for (b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
593
b; b = UT_LIST_GET_NEXT(list, b)) {
598
#endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
600
if (buf_buddy_relocate(buddy, buf, i)) {
603
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
604
ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
609
/* Free the block to the buddy list. */
612
if (i < buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE)) {
613
/* This area has most likely been allocated for at
614
least one compressed-only block descriptor. Check
615
that there are no live objects in the area. This is
616
not a complete check: it may yield false positives as
617
well as false negatives. Also, due to buddy blocks
618
being recombined, it is possible (although unlikely)
619
that this branch is never reached. */
623
# ifndef UNIV_DEBUG_VALGRIND
624
/* Valgrind would complain about accessing
625
uninitialized memory. Besides, Valgrind performs a
626
more exhaustive check, at every memory access. */
627
const buf_page_t* b = buf;
628
const buf_page_t* const b_end = (buf_page_t*)
629
((char*) b + (BUF_BUDDY_LOW << i));
631
for (; b < b_end; b++) {
632
/* Avoid false positives (and cause false
633
negatives) by checking for b->space < 1000. */
635
if ((b->state == BUF_BLOCK_ZIP_PAGE
636
|| b->state == BUF_BLOCK_ZIP_DIRTY)
637
&& b->space > 0 && b->space < 1000) {
639
"buddy dirty %p %u (%u,%u) %p,%lu\n",
641
b->state, b->space, b->offset,
645
# endif /* !UNIV_DEBUG_VALGRIND */
647
/* Scramble the block. This should make any pointers
648
invalid and trigger a segmentation violation. Because
649
the scrambling can be reversed, it may be possible to
650
track down the object pointing to the freed data by
651
dereferencing the unscrambled bpage->LRU or
652
bpage->list pointers. */
653
for (c = (char*) buf + (BUF_BUDDY_LOW << i);
654
c-- > (char*) buf; ) {
658
/* Fill large blocks with a constant pattern. */
659
memset(bpage, i, BUF_BUDDY_LOW << i);
661
#endif /* UNIV_DEBUG */
662
bpage->state = BUF_BLOCK_ZIP_FREE;
663
buf_buddy_add_to_free(bpage, i);