1
/*****************************************************************************
3
Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15
St, Fifth Floor, Boston, MA 02110-1301 USA
17
*****************************************************************************/
19
/**************************************************//**
21
Binary buddy allocator for compressed pages
23
Created December 2006 by Marko Makela
24
*******************************************************/
27
#include "buf0buddy.h"
29
# include "buf0buddy.ic"
37
/**********************************************************************//**
38
Get the offset of the buddy of a compressed page frame.
39
@return the buddy relative of page */
44
byte* page, /*!< in: compressed page */
45
ulint size) /*!< in: page size in bytes */
47
ut_ad(ut_is_2pow(size));
48
ut_ad(size >= BUF_BUDDY_LOW);
49
ut_ad(size < BUF_BUDDY_HIGH);
50
ut_ad(!ut_align_offset(page, size));
52
if (((ulint) page) & size) {
59
/**********************************************************************//**
60
Add a block to the head of the appropriate buddy free list. */
63
buf_buddy_add_to_free(
64
/*==================*/
65
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
66
buf_page_t* bpage, /*!< in,own: block to be freed */
67
ulint i) /*!< in: index of
68
buf_pool->zip_free[] */
70
#ifdef UNIV_DEBUG_VALGRIND
71
buf_page_t* b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
73
if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
74
#endif /* UNIV_DEBUG_VALGRIND */
76
ut_ad(buf_pool_mutex_own(buf_pool));
77
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
78
ut_ad(buf_pool->zip_free[i].start != bpage);
79
UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
81
#ifdef UNIV_DEBUG_VALGRIND
82
if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
83
UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
84
#endif /* UNIV_DEBUG_VALGRIND */
87
/**********************************************************************//**
88
Remove a block from the appropriate buddy free list. */
91
buf_buddy_remove_from_free(
92
/*=======================*/
93
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
94
buf_page_t* bpage, /*!< in: block to be removed */
95
ulint i) /*!< in: index of
96
buf_pool->zip_free[] */
98
#ifdef UNIV_DEBUG_VALGRIND
99
buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
100
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
102
if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
103
if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
105
ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
106
ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
107
#endif /* UNIV_DEBUG_VALGRIND */
109
ut_ad(buf_pool_mutex_own(buf_pool));
110
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
111
UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
113
#ifdef UNIV_DEBUG_VALGRIND
114
if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
115
if (next) UNIV_MEM_FREE(next, BUF_BUDDY_LOW << i);
116
#endif /* UNIV_DEBUG_VALGRIND */
119
/**********************************************************************//**
120
Try to allocate a block from buf_pool->zip_free[].
121
@return allocated block, or NULL if buf_pool->zip_free[] was empty */
126
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
127
ulint i) /*!< in: index of buf_pool->zip_free[] */
131
ut_ad(buf_pool_mutex_own(buf_pool));
132
ut_a(i < BUF_BUDDY_SIZES);
134
#ifndef UNIV_DEBUG_VALGRIND
135
/* Valgrind would complain about accessing free memory. */
136
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
137
ut_ad(buf_page_get_state(ut_list_node_313)
138
== BUF_BLOCK_ZIP_FREE)));
139
#endif /* !UNIV_DEBUG_VALGRIND */
140
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
143
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
144
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
146
buf_buddy_remove_from_free(buf_pool, bpage, i);
147
} else if (i + 1 < BUF_BUDDY_SIZES) {
148
/* Attempt to split. */
149
bpage = buf_buddy_alloc_zip(buf_pool, i + 1);
152
buf_page_t* buddy = (buf_page_t*)
153
(((char*) bpage) + (BUF_BUDDY_LOW << i));
155
ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
156
ut_d(memset(buddy, i, BUF_BUDDY_LOW << i));
157
buddy->state = BUF_BLOCK_ZIP_FREE;
158
buf_buddy_add_to_free(buf_pool, buddy, i);
164
memset(bpage, ~i, BUF_BUDDY_LOW << i);
166
#endif /* UNIV_DEBUG */
168
UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
173
/**********************************************************************//**
174
Deallocate a buffer frame of UNIV_PAGE_SIZE. */
177
buf_buddy_block_free(
178
/*=================*/
179
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
180
void* buf) /*!< in: buffer frame to deallocate */
182
const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
186
ut_ad(buf_pool_mutex_own(buf_pool));
187
ut_ad(!mutex_own(&buf_pool->zip_mutex));
188
ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
190
HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
191
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
192
&& bpage->in_zip_hash && !bpage->in_page_hash),
193
((buf_block_t*) bpage)->frame == buf);
195
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY);
196
ut_ad(!bpage->in_page_hash);
197
ut_ad(bpage->in_zip_hash);
198
ut_d(bpage->in_zip_hash = FALSE);
199
HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
201
ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
202
UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
204
block = (buf_block_t*) bpage;
205
mutex_enter(&block->mutex);
206
buf_LRU_block_free_non_file_page(block);
207
mutex_exit(&block->mutex);
209
ut_ad(buf_pool->buddy_n_frames > 0);
210
ut_d(buf_pool->buddy_n_frames--);
213
/**********************************************************************//**
214
Allocate a buffer block to the buddy allocator. */
217
buf_buddy_block_register(
218
/*=====================*/
219
buf_block_t* block) /*!< in: buffer frame to allocate */
221
buf_pool_t* buf_pool = buf_pool_from_block(block);
222
const ulint fold = BUF_POOL_ZIP_FOLD(block);
223
ut_ad(buf_pool_mutex_own(buf_pool));
224
ut_ad(!mutex_own(&buf_pool->zip_mutex));
225
ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
227
buf_block_set_state(block, BUF_BLOCK_MEMORY);
230
ut_a(!ut_align_offset(block->frame, UNIV_PAGE_SIZE));
232
ut_ad(!block->page.in_page_hash);
233
ut_ad(!block->page.in_zip_hash);
234
ut_d(block->page.in_zip_hash = TRUE);
235
HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
237
ut_d(buf_pool->buddy_n_frames++);
240
/**********************************************************************//**
241
Allocate a block from a bigger object.
242
@return allocated block */
245
buf_buddy_alloc_from(
246
/*=================*/
247
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
248
void* buf, /*!< in: a block that is free to use */
249
ulint i, /*!< in: index of
250
buf_pool->zip_free[] */
251
ulint j) /*!< in: size of buf as an index
252
of buf_pool->zip_free[] */
254
ulint offs = BUF_BUDDY_LOW << j;
255
ut_ad(j <= BUF_BUDDY_SIZES);
257
ut_ad(!ut_align_offset(buf, offs));
259
/* Add the unused parts of the block to the free lists. */
266
bpage = (buf_page_t*) ((byte*) buf + offs);
267
ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
268
bpage->state = BUF_BLOCK_ZIP_FREE;
269
#ifndef UNIV_DEBUG_VALGRIND
270
/* Valgrind would complain about accessing free memory. */
271
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
272
ut_ad(buf_page_get_state(
274
== BUF_BLOCK_ZIP_FREE)));
275
#endif /* !UNIV_DEBUG_VALGRIND */
276
buf_buddy_add_to_free(buf_pool, bpage, j);
282
/**********************************************************************//**
283
Allocate a block. The thread calling this function must hold
284
buf_pool->mutex and must not hold buf_pool_zip_mutex or any block->mutex.
285
The buf_pool->mutex may only be released and reacquired if lru != NULL.
286
@return allocated block, possibly NULL if lru==NULL */
291
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
292
ulint i, /*!< in: index of buf_pool->zip_free[],
293
or BUF_BUDDY_SIZES */
294
ibool* lru) /*!< in: pointer to a variable that
295
will be assigned TRUE if storage was
296
allocated from the LRU list and
297
buf_pool->mutex was temporarily
298
released, or NULL if the LRU list
299
should not be used */
303
ut_ad(buf_pool_mutex_own(buf_pool));
304
ut_ad(!mutex_own(&buf_pool->zip_mutex));
306
if (i < BUF_BUDDY_SIZES) {
307
/* Try to allocate from the buddy system. */
308
block = buf_buddy_alloc_zip(buf_pool, i);
315
/* Try allocating from the buf_pool->free list. */
316
block = buf_LRU_get_free_only(buf_pool);
328
/* Try replacing an uncompressed page in the buffer pool. */
329
buf_pool_mutex_exit(buf_pool);
330
block = buf_LRU_get_free_block(buf_pool, 0);
332
buf_pool_mutex_enter(buf_pool);
335
buf_buddy_block_register(block);
337
block = buf_buddy_alloc_from(
338
buf_pool, block->frame, i, BUF_BUDDY_SIZES);
341
buf_pool->buddy_stat[i].used++;
345
/**********************************************************************//**
346
Try to relocate the control block of a compressed page.
347
@return TRUE if relocated */
350
buf_buddy_relocate_block(
351
/*=====================*/
352
buf_page_t* bpage, /*!< in: block to relocate */
353
buf_page_t* dpage) /*!< in: free block to relocate to */
356
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
358
ut_ad(buf_pool_mutex_own(buf_pool));
360
switch (buf_page_get_state(bpage)) {
361
case BUF_BLOCK_ZIP_FREE:
362
case BUF_BLOCK_NOT_USED:
363
case BUF_BLOCK_READY_FOR_USE:
364
case BUF_BLOCK_FILE_PAGE:
365
case BUF_BLOCK_MEMORY:
366
case BUF_BLOCK_REMOVE_HASH:
368
case BUF_BLOCK_ZIP_DIRTY:
369
/* Cannot relocate dirty pages. */
372
case BUF_BLOCK_ZIP_PAGE:
376
mutex_enter(&buf_pool->zip_mutex);
378
if (!buf_page_can_relocate(bpage)) {
379
mutex_exit(&buf_pool->zip_mutex);
383
buf_relocate(bpage, dpage);
384
ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
386
/* relocate buf_pool->zip_clean */
387
b = UT_LIST_GET_PREV(list, dpage);
388
UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
391
UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
393
UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
396
UNIV_MEM_INVALID(bpage, sizeof *bpage);
398
mutex_exit(&buf_pool->zip_mutex);
402
/**********************************************************************//**
403
Try to relocate a block.
404
@return TRUE if relocated */
409
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
410
void* src, /*!< in: block to relocate */
411
void* dst, /*!< in: free block to relocate to */
412
ulint i) /*!< in: index of
413
buf_pool->zip_free[] */
416
const ulint size = BUF_BUDDY_LOW << i;
417
ullint usec = ut_time_us(NULL);
419
ut_ad(buf_pool_mutex_own(buf_pool));
420
ut_ad(!mutex_own(&buf_pool->zip_mutex));
421
ut_ad(!ut_align_offset(src, size));
422
ut_ad(!ut_align_offset(dst, size));
423
UNIV_MEM_ASSERT_W(dst, size);
425
/* We assume that all memory from buf_buddy_alloc()
426
is used for either compressed pages or buf_page_t
427
objects covering compressed pages. */
429
/* We look inside the allocated objects returned by
430
buf_buddy_alloc() and assume that anything of
431
PAGE_ZIP_MIN_SIZE or larger is a compressed page that contains
432
a valid space_id and page_no in the page header. Should the
433
fields be invalid, we will be unable to relocate the block.
434
We also assume that anything that fits sizeof(buf_page_t)
435
actually is a properly initialized buf_page_t object. */
437
if (size >= PAGE_ZIP_MIN_SIZE) {
438
/* This is a compressed page. */
441
/* The src block may be split into smaller blocks,
442
some of which may be free. Thus, the
443
mach_read_from_4() calls below may attempt to read
444
from free memory. The memory is "owned" by the buddy
445
allocator (and it has been allocated from the buffer
446
pool), so there is nothing wrong about this. The
447
mach_read_from_4() calls here will only trigger bogus
448
Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
449
bpage = buf_page_hash_get(
451
mach_read_from_4((const byte*) src
452
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID),
453
mach_read_from_4((const byte*) src
456
if (!bpage || bpage->zip.data != src) {
457
/* The block has probably been freshly
458
allocated by buf_LRU_get_free_block() but not
459
added to buf_pool->page_hash yet. Obviously,
460
it cannot be relocated. */
465
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
467
if (page_zip_get_size(&bpage->zip) != size) {
468
/* The block is of different size. We would
469
have to relocate all blocks covered by src.
470
For the sake of simplicity, give up. */
471
ut_ad(page_zip_get_size(&bpage->zip) < size);
476
/* The block must have been allocated, but it may
477
contain uninitialized data. */
478
UNIV_MEM_ASSERT_W(src, size);
480
mutex = buf_page_get_mutex(bpage);
484
if (buf_page_can_relocate(bpage)) {
485
/* Relocate the compressed page. */
486
ut_a(bpage->zip.data == src);
487
memcpy(dst, src, size);
488
bpage->zip.data = dst;
491
UNIV_MEM_INVALID(src, size);
493
buf_buddy_stat_t* buddy_stat
494
= &buf_pool->buddy_stat[i];
495
buddy_stat->relocated++;
496
buddy_stat->relocated_usec
497
+= ut_time_us(NULL) - usec;
503
} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
504
/* This must be a buf_page_t object. */
505
UNIV_MEM_ASSERT_RW(src, size);
506
if (buf_buddy_relocate_block(src, dst)) {
515
/**********************************************************************//**
516
Deallocate a block. */
521
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
522
void* buf, /*!< in: block to be freed, must not be
523
pointed to by the buffer pool */
524
ulint i) /*!< in: index of buf_pool->zip_free[],
525
or BUF_BUDDY_SIZES */
530
ut_ad(buf_pool_mutex_own(buf_pool));
531
ut_ad(!mutex_own(&buf_pool->zip_mutex));
532
ut_ad(i <= BUF_BUDDY_SIZES);
533
ut_ad(buf_pool->buddy_stat[i].used > 0);
535
buf_pool->buddy_stat[i].used--;
537
UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
538
ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
540
if (i == BUF_BUDDY_SIZES) {
541
buf_buddy_block_free(buf_pool, buf);
545
ut_ad(i < BUF_BUDDY_SIZES);
546
ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
547
ut_ad(!buf_pool_contains_zip(buf_pool, buf));
549
/* Try to combine adjacent blocks. */
551
buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);
553
#ifndef UNIV_DEBUG_VALGRIND
554
/* Valgrind would complain about accessing free memory. */
556
if (buddy->state != BUF_BLOCK_ZIP_FREE) {
561
/* The field buddy->state can only be trusted for free blocks.
562
If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if
563
it is in the free list. */
564
#endif /* !UNIV_DEBUG_VALGRIND */
566
for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
567
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
568
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
570
if (bpage == buddy) {
572
/* The buddy is free: recombine */
573
buf_buddy_remove_from_free(buf_pool, bpage, i);
575
ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
576
ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
578
buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
586
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
587
UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
592
#ifndef UNIV_DEBUG_VALGRIND
594
/* Valgrind would complain about accessing free memory. */
595
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
596
ut_ad(buf_page_get_state(ut_list_node_313)
597
== BUF_BLOCK_ZIP_FREE)));
598
#endif /* UNIV_DEBUG_VALGRIND */
600
/* The buddy is not free. Is there a free block of this size? */
601
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
604
/* Remove the block from the free list, because a successful
605
buf_buddy_relocate() will overwrite bpage->list. */
607
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
608
buf_buddy_remove_from_free(buf_pool, bpage, i);
610
/* Try to relocate the buddy of buf to the free block. */
611
if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
613
ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
617
buf_buddy_add_to_free(buf_pool, bpage, i);
619
/* Try to relocate the buddy of the free block to buf. */
620
buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage),
623
#ifndef UNIV_DEBUG_VALGRIND
624
/* Valgrind would complain about accessing free memory. */
626
/* The buddy must not be (completely) free, because we
627
always recombine adjacent free blocks.
629
(Parts of the buddy can be free in
630
buf_pool->zip_free[j] with j < i.) */
631
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
632
ut_ad(buf_page_get_state(
634
== BUF_BLOCK_ZIP_FREE
635
&& ut_list_node_313 != buddy)));
636
#endif /* !UNIV_DEBUG_VALGRIND */
638
if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
641
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
642
ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
647
/* Free the block to the buddy list. */
650
if (i < buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE)) {
651
/* This area has most likely been allocated for at
652
least one compressed-only block descriptor. Check
653
that there are no live objects in the area. This is
654
not a complete check: it may yield false positives as
655
well as false negatives. Also, due to buddy blocks
656
being recombined, it is possible (although unlikely)
657
that this branch is never reached. */
661
# ifndef UNIV_DEBUG_VALGRIND
662
/* Valgrind would complain about accessing
663
uninitialized memory. Besides, Valgrind performs a
664
more exhaustive check, at every memory access. */
665
const buf_page_t* b = buf;
666
const buf_page_t* const b_end = (buf_page_t*)
667
((char*) b + (BUF_BUDDY_LOW << i));
669
for (; b < b_end; b++) {
670
/* Avoid false positives (and cause false
671
negatives) by checking for b->space < 1000. */
673
if ((b->state == BUF_BLOCK_ZIP_PAGE
674
|| b->state == BUF_BLOCK_ZIP_DIRTY)
675
&& b->space > 0 && b->space < 1000) {
677
"buddy dirty %p %u (%u,%u) %p,%lu\n",
679
b->state, b->space, b->offset,
683
# endif /* !UNIV_DEBUG_VALGRIND */
685
/* Scramble the block. This should make any pointers
686
invalid and trigger a segmentation violation. Because
687
the scrambling can be reversed, it may be possible to
688
track down the object pointing to the freed data by
689
dereferencing the unscrambled bpage->LRU or
690
bpage->list pointers. */
691
for (c = (char*) buf + (BUF_BUDDY_LOW << i);
692
c-- > (char*) buf; ) {
696
/* Fill large blocks with a constant pattern. */
697
memset(bpage, i, BUF_BUDDY_LOW << i);
699
#endif /* UNIV_DEBUG */
700
bpage->state = BUF_BLOCK_ZIP_FREE;
701
buf_buddy_add_to_free(buf_pool, bpage, i);