1
/*****************************************************************************
3
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
Place, Suite 330, Boston, MA 02111-1307 USA
17
*****************************************************************************/
19
/**************************************************//**
20
@file ibuf/ibuf0ibuf.c
23
Created 7/19/1997 Heikki Tuuri
24
*******************************************************/
26
#include "ibuf0ibuf.h"
28
/** Number of bits describing a single page */
29
#define IBUF_BITS_PER_PAGE 4
30
#if IBUF_BITS_PER_PAGE % 2
31
# error "IBUF_BITS_PER_PAGE must be an even number!"
33
/** The start address for an insert buffer bitmap page bitmap */
34
#define IBUF_BITMAP PAGE_DATA
37
#include "ibuf0ibuf.ic"
40
#ifndef UNIV_HOTBACKUP
52
#include "sync0sync.h"
53
#include "dict0boot.h"
55
#include "lock0lock.h"
59
/* STRUCTURE OF AN INSERT BUFFER RECORD
63
1. The first field is the page number.
64
2. The second field is an array which stores type info for each subsequent
65
field. We store the information which affects the ordering of records, and
66
also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it
68
3. Next we have the fields of the actual index record.
72
Note that contary to what we planned in the 1990's, there will only be one
73
insert buffer tree, and that is in the system tablespace of InnoDB.
75
1. The first field is the space id.
76
2. The second field is a one-byte marker (0) which differentiates records from
77
the < 4.1.x storage format.
78
3. The third field is the page number.
79
4. The fourth field contains the type info, where we have also added 2 bytes to
80
store the charset. In the compressed table format of 5.0.x we must add more
81
information here so that we can build a dummy 'index' struct which 5.0.x
82
can use in the binary search on the index page in the ibuf merge phase.
83
5. The rest of the fields contain the fields of the actual index record.
87
The first byte of the fourth field is an additional marker (0) if the record
88
is in the compact format. The presence of this marker can be detected by
89
looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE.
91
The high-order bit of the character set field in the type info is the
92
"nullable" flag for the field. */
95
/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
97
If an OS thread performs any operation that brings in disk pages from
98
non-system tablespaces into the buffer pool, or creates such a page there,
99
then the operation may have as a side effect an insert buffer index tree
100
compression. Thus, the tree latch of the insert buffer tree may be acquired
101
in the x-mode, and also the file space latch of the system tablespace may
102
be acquired in the x-mode.
104
Also, an insert to an index in a non-system tablespace can have the same
105
effect. How do we know this cannot lead to a deadlock of OS threads? There
106
is a problem with the i\o-handler threads: they break the latching order
107
because they own x-latches to pages which are on a lower level than the
108
insert buffer tree latch, its page latches, and the tablespace latch an
109
insert buffer operation can reserve.
111
The solution is the following: Let all the tree and page latches connected
112
with the insert buffer be later in the latching order than the fsp latch and
115
Insert buffer pages must be such that the insert buffer is never invoked
116
when these pages are accessed as this would result in a recursion violating
117
the latching order. We let a special i/o-handler thread take care of i/o to
118
the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
119
pages and the first inode page, which contains the inode of the ibuf tree: let
120
us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead
121
access both non-ibuf and ibuf pages.
123
Then an i/o-handler for the insert buffer never needs to access recursively the
124
insert buffer tree and thus obeys the latching order. On the other hand, other
125
i/o-handlers for other tablespaces may require access to the insert buffer,
126
but because all kinds of latches they need to access there are later in the
127
latching order, no violation of the latching order occurs in this case,
130
A problem is how to grow and contract an insert buffer tree. As it is later
131
in the latching order than the fsp management, we have to reserve the fsp
132
latch first, before adding or removing pages from the insert buffer tree.
133
We let the insert buffer tree have its own file space management: a free
134
list of pages linked to the tree root. To prevent recursive using of the
135
insert buffer when adding pages to the tree, we must first load these pages
136
to memory, obtaining a latch on them, and only after that add them to the
137
free list of the insert buffer tree. More difficult is removing of pages
138
from the free list. If there is an excess of pages in the free list of the
139
ibuf tree, they might be needed if some thread reserves the fsp latch,
140
intending to allocate more file space. So we do the following: if a thread
141
reserves the fsp latch, we check the writer count field of the latch. If
142
this field has value 1, it means that the thread did not own the latch
143
before entering the fsp system, and the mtr of the thread contains no
144
modifications to the fsp pages. Now we are free to reserve the ibuf latch,
145
and check if there is an excess of pages in the free list. We can then, in a
146
separate mini-transaction, take them out of the free list and free them to
149
To avoid deadlocks in the ibuf system, we divide file pages into three levels:
152
(2) ibuf tree pages and the pages in the ibuf tree free list, and
153
(3) ibuf bitmap pages.
155
No OS thread is allowed to access higher level pages if it has latches to
156
lower level pages; even if the thread owns a B-tree latch it must not access
157
the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
158
is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
159
exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
160
level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
161
it uses synchronous aio, it can access any pages, as long as it obeys the
162
access order rules. */
164
/** Buffer pool size per the maximum insert buffer size */
165
#define IBUF_POOL_SIZE_PER_MAX_SIZE 2
167
/** Table name for the insert buffer. */
168
#define IBUF_TABLE_NAME "SYS_IBUF_TABLE"
170
/** Operations that can currently be buffered. */
171
UNIV_INTERN ibuf_use_t ibuf_use = IBUF_USE_INSERT;
173
/** The insert buffer control structure */
174
UNIV_INTERN ibuf_t* ibuf = NULL;
176
/** Counter for ibuf_should_try() */
177
UNIV_INTERN ulint ibuf_flush_count = 0;
179
#ifdef UNIV_IBUF_COUNT_DEBUG
180
/** Number of tablespaces in the ibuf_counts array */
181
#define IBUF_COUNT_N_SPACES 4
182
/** Number of pages within each tablespace in the ibuf_counts array */
183
#define IBUF_COUNT_N_PAGES 130000
185
/** Buffered entry counts for file pages, used in debugging */
186
static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
188
/******************************************************************//**
189
Checks that the indexes to ibuf_counts[][] are within limits. */
194
ulint space_id, /*!< in: space identifier */
195
ulint page_no) /*!< in: page number */
197
if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
202
"InnoDB: UNIV_IBUF_COUNT_DEBUG limits space_id and page_no\n"
203
"InnoDB: and breaks crash recovery.\n"
204
"InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
205
"InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
206
(ulint) space_id, (ulint) IBUF_COUNT_N_SPACES,
207
(ulint) page_no, (ulint) IBUF_COUNT_N_PAGES);
212
/** @name Offsets to the per-page bits in the insert buffer bitmap */
214
#define IBUF_BITMAP_FREE 0 /*!< Bits indicating the
215
amount of free space */
216
#define IBUF_BITMAP_BUFFERED 2 /*!< TRUE if there are buffered
217
changes for the page */
218
#define IBUF_BITMAP_IBUF 3 /*!< TRUE if page is a part of
219
the ibuf tree, excluding the
220
root page, or is in the free
224
/** The mutex used to block pessimistic inserts to ibuf trees */
225
static mutex_t ibuf_pessimistic_insert_mutex;
227
/** The mutex protecting the insert buffer structs */
228
static mutex_t ibuf_mutex;
230
/** The mutex protecting the insert buffer bitmaps */
231
static mutex_t ibuf_bitmap_mutex;
233
/** The area in pages from which contract looks for page numbers for merge */
234
#define IBUF_MERGE_AREA 8
236
/** Inside the merge area, pages which have at most 1 per this number less
237
buffered entries compared to maximum volume that can buffered for a single
238
page are merged along with the page whose buffer became full */
239
#define IBUF_MERGE_THRESHOLD 4
241
/** In ibuf_contract at most this number of pages is read to memory in one
242
batch, in order to merge the entries for them in the insert buffer */
243
#define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA
245
/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
246
many pages, we start to contract it in connection to inserts there, using
247
non-synchronous contract */
248
#define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0
250
/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
251
many pages, we start to contract it in connection to inserts there, using
252
synchronous contract */
253
#define IBUF_CONTRACT_ON_INSERT_SYNC 5
255
/** If the combined size of the ibuf trees exceeds ibuf->max_size by
256
this many pages, we start to contract it synchronous contract, but do
258
#define IBUF_CONTRACT_DO_NOT_INSERT 10
260
/* TODO: how to cope with drop table if there are records in the insert
261
buffer for the indexes of the table? Is there actually any problem,
262
because ibuf merge is done to a page when it is read in, and it is
263
still physically like the index page even if the index would have been
264
dropped! So, there seems to be no problem. */
266
/******************************************************************//**
267
Sets the flag in the current OS thread local storage denoting that it is
268
inside an insert buffer routine. */
276
ptr = thr_local_get_in_ibuf_field();
278
ut_ad(*ptr == FALSE);
283
/******************************************************************//**
284
Sets the flag in the current OS thread local storage denoting that it is
285
exiting an insert buffer routine. */
293
ptr = thr_local_get_in_ibuf_field();
300
/******************************************************************//**
301
Returns TRUE if the current OS thread is performing an insert buffer
304
For instance, a read-ahead of non-ibuf pages is forbidden by threads
305
that are executing an insert buffer routine.
306
@return TRUE if inside an insert buffer routine */
312
return(*thr_local_get_in_ibuf_field());
315
/******************************************************************//**
316
Gets the ibuf header page and x-latches it.
317
@return insert buffer header page */
320
ibuf_header_page_get(
321
/*=================*/
322
mtr_t* mtr) /*!< in: mtr */
326
ut_ad(!ibuf_inside());
328
block = buf_page_get(
329
IBUF_SPACE_ID, 0, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
330
buf_block_dbg_add_level(block, SYNC_IBUF_HEADER);
332
return(buf_block_get_frame(block));
335
/******************************************************************//**
336
Gets the root page and x-latches it.
337
@return insert buffer tree root page */
342
mtr_t* mtr) /*!< in: mtr */
346
ut_ad(ibuf_inside());
348
mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
350
block = buf_page_get(
351
IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, mtr);
353
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
355
return(buf_block_get_frame(block));
358
#ifdef UNIV_IBUF_COUNT_DEBUG
359
/******************************************************************//**
360
Gets the ibuf count for a given page.
361
@return number of entries in the insert buffer currently buffered for
367
ulint space, /*!< in: space id */
368
ulint page_no)/*!< in: page number */
370
ibuf_count_check(space, page_no);
372
return(ibuf_counts[space][page_no]);
375
/******************************************************************//**
376
Sets the ibuf count for a given page. */
381
ulint space, /*!< in: space id */
382
ulint page_no,/*!< in: page number */
383
ulint val) /*!< in: value to set */
385
ibuf_count_check(space, page_no);
386
ut_a(val < UNIV_PAGE_SIZE);
388
ibuf_counts[space][page_no] = val;
392
/******************************************************************//**
393
Updates the size information of the ibuf, assuming the segment size has not
399
const page_t* root, /*!< in: ibuf tree root */
400
mtr_t* mtr) /*!< in: mtr */
402
ut_ad(mutex_own(&ibuf_mutex));
404
ibuf->free_list_len = flst_get_len(root + PAGE_HEADER
405
+ PAGE_BTR_IBUF_FREE_LIST, mtr);
407
ibuf->height = 1 + btr_page_get_level(root, mtr);
409
/* the '1 +' is the ibuf header page */
410
ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
412
ibuf->empty = page_get_n_recs(root) == 0;
415
/******************************************************************//**
416
Creates the insert buffer data structure at a database startup and initializes
417
the data structures for the insert buffer. */
420
ibuf_init_at_db_start(void)
421
/*=======================*/
432
ibuf = mem_alloc(sizeof(ibuf_t));
434
memset(ibuf, 0, sizeof(*ibuf));
436
/* Note that also a pessimistic delete can sometimes make a B-tree
437
grow in size, as the references on the upper levels of the tree can
440
ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
441
/ IBUF_POOL_SIZE_PER_MAX_SIZE;
443
mutex_create(&ibuf_pessimistic_insert_mutex,
444
SYNC_IBUF_PESS_INSERT_MUTEX);
446
mutex_create(&ibuf_mutex, SYNC_IBUF_MUTEX);
448
mutex_create(&ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
452
mutex_enter(&ibuf_mutex);
454
mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, NULL), &mtr);
456
header_page = ibuf_header_page_get(&mtr);
458
fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
464
ibuf->seg_size = n_used;
469
block = buf_page_get(
470
IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO,
472
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
474
root = buf_block_get_frame(block);
477
ibuf_size_update(root, &mtr);
478
mutex_exit(&ibuf_mutex);
484
heap = mem_heap_create(450);
486
/* Use old-style record format for the insert buffer. */
487
table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0);
489
dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
491
table->id = ut_dulint_add(DICT_IBUF_ID_MIN, IBUF_SPACE_ID);
493
dict_table_add_to_cache(table, heap);
496
index = dict_mem_index_create(
497
IBUF_TABLE_NAME, "CLUST_IND",
498
IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1);
500
dict_mem_index_add_field(index, "DUMMY_COLUMN", 0);
502
index->id = ut_dulint_add(DICT_IBUF_ID_MIN, IBUF_SPACE_ID);
504
error = dict_index_add_to_cache(table, index,
505
FSP_IBUF_TREE_ROOT_PAGE_NO, FALSE);
506
ut_a(error == DB_SUCCESS);
508
ibuf->index = dict_table_get_first_index(table);
510
#endif /* !UNIV_HOTBACKUP */
511
/*********************************************************************//**
512
Initializes an ibuf bitmap page. */
515
ibuf_bitmap_page_init(
516
/*==================*/
517
buf_block_t* block, /*!< in: bitmap page */
518
mtr_t* mtr) /*!< in: mtr */
522
ulint zip_size = buf_block_get_zip_size(block);
524
ut_a(ut_is_2pow(zip_size));
526
page = buf_block_get_frame(block);
527
fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
529
/* Write all zeros to the bitmap */
532
byte_offset = UT_BITS_IN_BYTES(UNIV_PAGE_SIZE
533
* IBUF_BITS_PER_PAGE);
535
byte_offset = UT_BITS_IN_BYTES(zip_size * IBUF_BITS_PER_PAGE);
538
memset(page + IBUF_BITMAP, 0, byte_offset);
540
/* The remaining area (up to the page trailer) is uninitialized. */
542
#ifndef UNIV_HOTBACKUP
543
mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr);
544
#endif /* !UNIV_HOTBACKUP */
547
/*********************************************************************//**
548
Parses a redo log record of an ibuf bitmap page init.
549
@return end of log record or NULL */
552
ibuf_parse_bitmap_init(
553
/*===================*/
554
byte* ptr, /*!< in: buffer */
555
byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
556
buf_block_t* block, /*!< in: block or NULL */
557
mtr_t* mtr) /*!< in: mtr or NULL */
559
ut_ad(ptr && end_ptr);
562
ibuf_bitmap_page_init(block, mtr);
567
#ifndef UNIV_HOTBACKUP
568
/********************************************************************//**
569
Gets the desired bits for a given page from a bitmap page.
570
@return value of bits */
573
ibuf_bitmap_page_get_bits(
574
/*======================*/
575
const page_t* page, /*!< in: bitmap page */
576
ulint page_no,/*!< in: page whose bits to get */
577
ulint zip_size,/*!< in: compressed page size in bytes;
578
0 for uncompressed pages */
579
ulint bit, /*!< in: IBUF_BITMAP_FREE,
580
IBUF_BITMAP_BUFFERED, ... */
581
mtr_t* mtr __attribute__((unused)))
582
/*!< in: mtr containing an
583
x-latch to the bitmap page */
590
ut_ad(bit < IBUF_BITS_PER_PAGE);
591
#if IBUF_BITS_PER_PAGE % 2
592
# error "IBUF_BITS_PER_PAGE % 2 != 0"
594
ut_ad(ut_is_2pow(zip_size));
595
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
598
bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
601
bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
605
byte_offset = bit_offset / 8;
606
bit_offset = bit_offset % 8;
608
ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
610
map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
612
value = ut_bit_get_nth(map_byte, bit_offset);
614
if (bit == IBUF_BITMAP_FREE) {
615
ut_ad(bit_offset + 1 < 8);
617
value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1);
623
/********************************************************************//**
624
Sets the desired bit for a given page in a bitmap page. */
627
ibuf_bitmap_page_set_bits(
628
/*======================*/
629
page_t* page, /*!< in: bitmap page */
630
ulint page_no,/*!< in: page whose bits to set */
631
ulint zip_size,/*!< in: compressed page size in bytes;
632
0 for uncompressed pages */
633
ulint bit, /*!< in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
634
ulint val, /*!< in: value to set */
635
mtr_t* mtr) /*!< in: mtr containing an x-latch to the bitmap page */
641
ut_ad(bit < IBUF_BITS_PER_PAGE);
642
#if IBUF_BITS_PER_PAGE % 2
643
# error "IBUF_BITS_PER_PAGE % 2 != 0"
645
ut_ad(ut_is_2pow(zip_size));
646
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
647
#ifdef UNIV_IBUF_COUNT_DEBUG
648
ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
649
|| (0 == ibuf_count_get(page_get_space_id(page),
653
bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
656
bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
660
byte_offset = bit_offset / 8;
661
bit_offset = bit_offset % 8;
663
ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
665
map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
667
if (bit == IBUF_BITMAP_FREE) {
668
ut_ad(bit_offset + 1 < 8);
671
map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2);
672
map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2);
675
map_byte = ut_bit_set_nth(map_byte, bit_offset, val);
678
mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte,
682
/********************************************************************//**
683
Calculates the bitmap page number for a given page number.
684
@return the bitmap page number where the file page is mapped */
687
ibuf_bitmap_page_no_calc(
688
/*=====================*/
689
ulint zip_size, /*!< in: compressed page size in bytes;
690
0 for uncompressed pages */
691
ulint page_no) /*!< in: tablespace page number */
693
ut_ad(ut_is_2pow(zip_size));
696
return(FSP_IBUF_BITMAP_OFFSET
697
+ (page_no & ~(UNIV_PAGE_SIZE - 1)));
699
return(FSP_IBUF_BITMAP_OFFSET
700
+ (page_no & ~(zip_size - 1)));
704
/********************************************************************//**
705
Gets the ibuf bitmap page where the bits describing a given file page are
707
@return bitmap page where the file page is mapped, that is, the bitmap
708
page containing the descriptor bits for the file page; the bitmap page
712
ibuf_bitmap_get_map_page(
713
/*=====================*/
714
ulint space, /*!< in: space id of the file page */
715
ulint page_no,/*!< in: page number of the file page */
716
ulint zip_size,/*!< in: compressed page size in bytes;
717
0 for uncompressed pages */
718
mtr_t* mtr) /*!< in: mtr */
722
block = buf_page_get(space, zip_size,
723
ibuf_bitmap_page_no_calc(zip_size, page_no),
725
buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
727
return(buf_block_get_frame(block));
730
/************************************************************************//**
731
Sets the free bits of the page in the ibuf bitmap. This is done in a separate
732
mini-transaction, hence this operation does not restrict further work to only
733
ibuf bitmap operations, which would result if the latch to the bitmap page
737
ibuf_set_free_bits_low(
738
/*===================*/
739
ulint zip_size,/*!< in: compressed page size in bytes;
740
0 for uncompressed pages */
741
const buf_block_t* block, /*!< in: index page; free bits are set if
742
the index is non-clustered and page
744
ulint val, /*!< in: value to set: < 4 */
745
mtr_t* mtr) /*!< in/out: mtr */
751
if (!page_is_leaf(buf_block_get_frame(block))) {
756
space = buf_block_get_space(block);
757
page_no = buf_block_get_page_no(block);
758
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
759
#ifdef UNIV_IBUF_DEBUG
762
"Setting space %lu page %lu free bits to %lu should be %lu\n",
764
ibuf_index_page_calc_free(zip_size, block));
767
ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
768
#endif /* UNIV_IBUF_DEBUG */
769
ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
770
IBUF_BITMAP_FREE, val, mtr);
773
/************************************************************************//**
774
Sets the free bit of the page in the ibuf bitmap. This is done in a separate
775
mini-transaction, hence this operation does not restrict further work to only
776
ibuf bitmap operations, which would result if the latch to the bitmap page
780
ibuf_set_free_bits_func(
781
/*====================*/
782
buf_block_t* block, /*!< in: index page of a non-clustered index;
783
free bit is reset if page level is 0 */
784
#ifdef UNIV_IBUF_DEBUG
785
ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum
786
value which the bits must have before
787
setting; this is for debugging */
788
#endif /* UNIV_IBUF_DEBUG */
789
ulint val) /*!< in: value to set: < 4 */
798
page = buf_block_get_frame(block);
800
if (!page_is_leaf(page)) {
807
space = buf_block_get_space(block);
808
page_no = buf_block_get_page_no(block);
809
zip_size = buf_block_get_zip_size(block);
810
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr);
812
#ifdef UNIV_IBUF_DEBUG
813
if (max_val != ULINT_UNDEFINED) {
816
old_val = ibuf_bitmap_page_get_bits(
817
bitmap_page, page_no, zip_size,
818
IBUF_BITMAP_FREE, &mtr);
820
if (old_val != max_val) {
822
"Ibuf: page %lu old val %lu max val %lu\n",
823
page_get_page_no(page),
828
ut_a(old_val <= max_val);
831
fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
832
page_get_page_no(page), val,
833
ibuf_index_page_calc_free(zip_size, block));
836
ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
837
#endif /* UNIV_IBUF_DEBUG */
838
ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
839
IBUF_BITMAP_FREE, val, &mtr);
843
/************************************************************************//**
844
Resets the free bits of the page in the ibuf bitmap. This is done in a
845
separate mini-transaction, hence this operation does not restrict
846
further work to only ibuf bitmap operations, which would result if the
847
latch to the bitmap page were kept. NOTE: The free bits in the insert
848
buffer bitmap must never exceed the free space on a page. It is safe
849
to decrement or reset the bits in the bitmap in a mini-transaction
850
that is committed before the mini-transaction that affects the free
854
ibuf_reset_free_bits(
855
/*=================*/
856
buf_block_t* block) /*!< in: index page; free bits are set to 0
857
if the index is a non-clustered
858
non-unique, and page level is 0 */
860
ibuf_set_free_bits(block, 0, ULINT_UNDEFINED);
863
/**********************************************************************//**
864
Updates the free bits for an uncompressed page to reflect the present
865
state. Does this in the mtr given, which means that the latching
866
order rules virtually prevent any further operations for this OS
867
thread until mtr is committed. NOTE: The free bits in the insert
868
buffer bitmap must never exceed the free space on a page. It is safe
869
to set the free bits in the same mini-transaction that updated the
873
ibuf_update_free_bits_low(
874
/*======================*/
875
const buf_block_t* block, /*!< in: index page */
876
ulint max_ins_size, /*!< in: value of
878
with reorganize before
880
performed to the page */
881
mtr_t* mtr) /*!< in/out: mtr */
886
ut_a(!buf_block_get_page_zip(block));
888
before = ibuf_index_page_calc_free_bits(0, max_ins_size);
890
after = ibuf_index_page_calc_free(0, block);
892
/* This approach cannot be used on compressed pages, since the
893
computed value of "before" often does not match the current
894
state of the bitmap. This is because the free space may
895
increase or decrease when a compressed page is reorganized. */
896
if (before != after) {
897
ibuf_set_free_bits_low(0, block, after, mtr);
901
/**********************************************************************//**
902
Updates the free bits for a compressed page to reflect the present
903
state. Does this in the mtr given, which means that the latching
904
order rules virtually prevent any further operations for this OS
905
thread until mtr is committed. NOTE: The free bits in the insert
906
buffer bitmap must never exceed the free space on a page. It is safe
907
to set the free bits in the same mini-transaction that updated the
911
ibuf_update_free_bits_zip(
912
/*======================*/
913
buf_block_t* block, /*!< in/out: index page */
914
mtr_t* mtr) /*!< in/out: mtr */
922
space = buf_block_get_space(block);
923
page_no = buf_block_get_page_no(block);
924
zip_size = buf_block_get_zip_size(block);
926
ut_a(page_is_leaf(buf_block_get_frame(block)));
929
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
931
after = ibuf_index_page_calc_free_zip(zip_size, block);
934
/* We move the page to the front of the buffer pool LRU list:
935
the purpose of this is to prevent those pages to which we
936
cannot make inserts using the insert buffer from slipping
937
out of the buffer pool */
939
buf_page_make_young(&block->page);
942
ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
943
IBUF_BITMAP_FREE, after, mtr);
946
/**********************************************************************//**
947
Updates the free bits for the two pages to reflect the present state.
948
Does this in the mtr given, which means that the latching order rules
949
virtually prevent any further operations until mtr is committed.
950
NOTE: The free bits in the insert buffer bitmap must never exceed the
951
free space on a page. It is safe to set the free bits in the same
952
mini-transaction that updated the pages. */
955
ibuf_update_free_bits_for_two_pages_low(
956
/*====================================*/
957
ulint zip_size,/*!< in: compressed page size in bytes;
958
0 for uncompressed pages */
959
buf_block_t* block1, /*!< in: index page */
960
buf_block_t* block2, /*!< in: index page */
961
mtr_t* mtr) /*!< in: mtr */
965
/* As we have to x-latch two random bitmap pages, we have to acquire
966
the bitmap mutex to prevent a deadlock with a similar operation
967
performed by another OS thread. */
969
mutex_enter(&ibuf_bitmap_mutex);
971
state = ibuf_index_page_calc_free(zip_size, block1);
973
ibuf_set_free_bits_low(zip_size, block1, state, mtr);
975
state = ibuf_index_page_calc_free(zip_size, block2);
977
ibuf_set_free_bits_low(zip_size, block2, state, mtr);
979
mutex_exit(&ibuf_bitmap_mutex);
982
/**********************************************************************//**
983
Returns TRUE if the page is one of the fixed address ibuf pages.
984
@return TRUE if a fixed address ibuf i/o page */
987
ibuf_fixed_addr_page(
988
/*=================*/
989
ulint space, /*!< in: space id */
990
ulint zip_size,/*!< in: compressed page size in bytes;
991
0 for uncompressed pages */
992
ulint page_no)/*!< in: page number */
994
return((space == IBUF_SPACE_ID && page_no == IBUF_TREE_ROOT_PAGE_NO)
995
|| ibuf_bitmap_page(zip_size, page_no));
998
/***********************************************************************//**
999
Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
1000
Must not be called when recv_no_ibuf_operations==TRUE.
1001
@return TRUE if level 2 or level 3 page */
1006
ulint space, /*!< in: space id */
1007
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
1008
ulint page_no,/*!< in: page number */
1009
mtr_t* mtr) /*!< in: mtr which will contain an x-latch to the
1010
bitmap page if the page is not one of the fixed
1011
address ibuf pages, or NULL, in which case a new
1012
transaction is created. */
1016
page_t* bitmap_page;
1018
ut_ad(!recv_no_ibuf_operations);
1020
if (ibuf_fixed_addr_page(space, zip_size, page_no)) {
1023
} else if (space != IBUF_SPACE_ID) {
1028
ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TABLESPACE);
1035
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
1037
ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
1038
IBUF_BITMAP_IBUF, mtr);
1040
if (mtr == &local_mtr) {
1047
/********************************************************************//**
1048
Returns the page number field of an ibuf record.
1049
@return page number */
1052
ibuf_rec_get_page_no(
1053
/*=================*/
1054
const rec_t* rec) /*!< in: ibuf record */
1059
ut_ad(ibuf_inside());
1060
ut_ad(rec_get_n_fields_old(rec) > 2);
1062
field = rec_get_nth_field_old(rec, 1, &len);
1065
/* This is of the >= 4.1.x record format */
1066
ut_a(trx_sys_multiple_tablespace_format);
1068
field = rec_get_nth_field_old(rec, 2, &len);
1070
ut_a(trx_doublewrite_must_reset_space_ids);
1071
ut_a(!trx_sys_multiple_tablespace_format);
1073
field = rec_get_nth_field_old(rec, 0, &len);
1078
return(mach_read_from_4(field));
1081
/********************************************************************//**
1082
Returns the space id field of an ibuf record. For < 4.1.x format records
1089
const rec_t* rec) /*!< in: ibuf record */
1094
ut_ad(ibuf_inside());
1095
ut_ad(rec_get_n_fields_old(rec) > 2);
1097
field = rec_get_nth_field_old(rec, 1, &len);
1100
/* This is of the >= 4.1.x record format */
1102
ut_a(trx_sys_multiple_tablespace_format);
1103
field = rec_get_nth_field_old(rec, 0, &len);
1106
return(mach_read_from_4(field));
1109
ut_a(trx_doublewrite_must_reset_space_ids);
1110
ut_a(!trx_sys_multiple_tablespace_format);
1115
/********************************************************************//**
1116
Creates a dummy index for inserting a record to a non-clustered index.
1118
@return dummy index */
1121
ibuf_dummy_index_create(
1122
/*====================*/
1123
ulint n, /*!< in: number of fields */
1124
ibool comp) /*!< in: TRUE=use compact record format */
1126
dict_table_t* table;
1127
dict_index_t* index;
1129
table = dict_mem_table_create("IBUF_DUMMY",
1131
comp ? DICT_TF_COMPACT : 0);
1133
index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY",
1134
DICT_HDR_SPACE, 0, n);
1136
index->table = table;
1138
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
1139
index->cached = TRUE;
1143
/********************************************************************//**
1144
Add a column to the dummy index */
1147
ibuf_dummy_index_add_col(
1148
/*=====================*/
1149
dict_index_t* index, /*!< in: dummy index */
1150
const dtype_t* type, /*!< in: the data type of the column */
1151
ulint len) /*!< in: length of the column */
1153
ulint i = index->table->n_def;
1154
dict_mem_table_add_col(index->table, NULL, NULL,
1155
dtype_get_mtype(type),
1156
dtype_get_prtype(type),
1157
dtype_get_len(type));
1158
dict_index_add_col(index, index->table,
1159
dict_table_get_nth_col(index->table, i), len);
1161
/********************************************************************//**
1162
Deallocates a dummy index for inserting a record to a non-clustered index. */
1165
ibuf_dummy_index_free(
1166
/*==================*/
1167
dict_index_t* index) /*!< in, own: dummy index */
1169
dict_table_t* table = index->table;
1171
dict_mem_index_free(index);
1172
dict_mem_table_free(table);
1175
/*********************************************************************//**
1176
Builds the entry to insert into a non-clustered index when we have the
1177
corresponding record in an ibuf index.
1179
NOTE that as we copy pointers to fields in ibuf_rec, the caller must
1180
hold a latch to the ibuf_rec page as long as the entry is used!
1182
@return own: entry to insert to a non-clustered index */
1185
ibuf_build_entry_pre_4_1_x(
1186
/*=======================*/
1187
const rec_t* ibuf_rec, /*!< in: record in an insert buffer */
1188
mem_heap_t* heap, /*!< in: heap where built */
1189
dict_index_t** pindex) /*!< out, own: dummy index that
1190
describes the entry */
1198
ut_a(trx_doublewrite_must_reset_space_ids);
1199
ut_a(!trx_sys_multiple_tablespace_format);
1201
n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
1202
tuple = dtuple_create(heap, n_fields);
1203
types = rec_get_nth_field_old(ibuf_rec, 1, &len);
1205
ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1207
for (i = 0; i < n_fields; i++) {
1211
field = dtuple_get_nth_field(tuple, i);
1213
data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
1215
dfield_set_data(field, data, len);
1217
dtype_read_for_order_and_null_size(
1218
dfield_get_type(field),
1219
types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1222
*pindex = ibuf_dummy_index_create(n_fields, FALSE);
1227
/*********************************************************************//**
1228
Builds the entry to insert into a non-clustered index when we have the
1229
corresponding record in an ibuf index.
1231
NOTE that as we copy pointers to fields in ibuf_rec, the caller must
1232
hold a latch to the ibuf_rec page as long as the entry is used!
1234
@return own: entry to insert to a non-clustered index */
1237
ibuf_build_entry_from_ibuf_rec(
1238
/*===========================*/
1239
const rec_t* ibuf_rec, /*!< in: record in an insert buffer */
1240
mem_heap_t* heap, /*!< in: heap where built */
1241
dict_index_t** pindex) /*!< out, own: dummy index that
1242
describes the entry */
1251
dict_index_t* index;
1253
data = rec_get_nth_field_old(ibuf_rec, 1, &len);
1256
/* This a < 4.1.x format record */
1258
return(ibuf_build_entry_pre_4_1_x(ibuf_rec, heap, pindex));
1261
/* This a >= 4.1.x format record */
1263
ut_a(trx_sys_multiple_tablespace_format);
1265
ut_a(rec_get_n_fields_old(ibuf_rec) > 4);
1267
n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
1269
tuple = dtuple_create(heap, n_fields);
1271
types = rec_get_nth_field_old(ibuf_rec, 3, &len);
1273
ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1);
1274
index = ibuf_dummy_index_create(
1275
n_fields, len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1277
if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
1278
/* compact record format */
1284
ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1286
for (i = 0; i < n_fields; i++) {
1287
field = dtuple_get_nth_field(tuple, i);
1289
data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
1291
dfield_set_data(field, data, len);
1293
dtype_new_read_for_order_and_null_size(
1294
dfield_get_type(field),
1295
types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1297
ibuf_dummy_index_add_col(index, dfield_get_type(field), len);
1300
/* Prevent an ut_ad() failure in page_zip_write_rec() by
1301
adding system columns to the dummy table pointed to by the
1302
dummy secondary index. The insert buffer is only used for
1303
secondary indexes, whose records never contain any system
1304
columns, such as DB_TRX_ID. */
1305
ut_d(dict_table_add_system_columns(index->table, index->table->heap));
1312
/********************************************************************//**
1313
Returns the space taken by a stored non-clustered index entry if converted to
1315
@return size of index record in bytes + an upper limit of the space
1316
taken in the page directory */
1319
ibuf_rec_get_volume(
1320
/*================*/
1321
const rec_t* ibuf_rec)/*!< in: ibuf record */
1324
ibool new_format = FALSE;
1325
ulint data_size = 0;
1333
ut_ad(ibuf_inside());
1334
ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
1336
data = rec_get_nth_field_old(ibuf_rec, 1, &len);
1339
/* < 4.1.x format record */
1341
ut_a(trx_doublewrite_must_reset_space_ids);
1342
ut_a(!trx_sys_multiple_tablespace_format);
1344
n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
1346
types = rec_get_nth_field_old(ibuf_rec, 1, &len);
1348
ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1351
/* >= 4.1.x format record */
1353
ut_a(trx_sys_multiple_tablespace_format);
1356
types = rec_get_nth_field_old(ibuf_rec, 3, &len);
1358
comp = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1362
/* compact record format */
1364
dict_index_t* dummy_index;
1365
mem_heap_t* heap = mem_heap_create(500);
1366
dtuple_t* entry = ibuf_build_entry_from_ibuf_rec(
1367
ibuf_rec, heap, &dummy_index);
1368
volume = rec_get_converted_size(dummy_index, entry, 0);
1369
ibuf_dummy_index_free(dummy_index);
1370
mem_heap_free(heap);
1371
return(volume + page_dir_calc_reserved_space(1));
1374
n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
1379
for (i = 0; i < n_fields; i++) {
1381
data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
1383
dtype_new_read_for_order_and_null_size(
1385
* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1387
data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
1389
dtype_read_for_order_and_null_size(
1391
* DATA_ORDER_NULL_TYPE_BUF_SIZE);
1394
if (len == UNIV_SQL_NULL) {
1395
data_size += dtype_get_sql_null_size(&dtype, comp);
1401
return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
1402
+ page_dir_calc_reserved_space(1));
1405
/*********************************************************************//**
1406
Builds the tuple to insert to an ibuf tree when we have an entry for a
1407
non-clustered index.
1409
NOTE that the original entry must be kept because we copy pointers to
1412
@return own: entry to insert into an ibuf index tree */
1417
dict_index_t* index, /*!< in: non-clustered index */
1418
const dtuple_t* entry, /*!< in: entry for a non-clustered index */
1419
ulint space, /*!< in: space id */
1420
ulint page_no,/*!< in: index page number where entry should
1422
mem_heap_t* heap) /*!< in: heap into which to build */
1426
const dfield_t* entry_field;
1432
/* Starting from 4.1.x, we have to build a tuple whose
1433
(1) first field is the space id,
1434
(2) the second field a single marker byte (0) to tell that this
1435
is a new format record,
1436
(3) the third contains the page number, and
1437
(4) the fourth contains the relevent type information of each data
1438
field; the length of this field % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE is
1439
(a) 0 for b-trees in the old format, and
1440
(b) 1 for b-trees in the compact format, the first byte of the field
1441
being the marker (0);
1442
(5) and the rest of the fields are copied from entry. All fields
1443
in the tuple are ordered like the type binary in our insert buffer
1446
n_fields = dtuple_get_n_fields(entry);
1448
tuple = dtuple_create(heap, n_fields + 4);
1450
/* Store the space id in tuple */
1452
field = dtuple_get_nth_field(tuple, 0);
1454
buf = mem_heap_alloc(heap, 4);
1456
mach_write_to_4(buf, space);
1458
dfield_set_data(field, buf, 4);
1460
/* Store the marker byte field in tuple */
1462
field = dtuple_get_nth_field(tuple, 1);
1464
buf = mem_heap_alloc(heap, 1);
1466
/* We set the marker byte zero */
1468
mach_write_to_1(buf, 0);
1470
dfield_set_data(field, buf, 1);
1472
/* Store the page number in tuple */
1474
field = dtuple_get_nth_field(tuple, 2);
1476
buf = mem_heap_alloc(heap, 4);
1478
mach_write_to_4(buf, page_no);
1480
dfield_set_data(field, buf, 4);
1482
/* Store the type info in buf2, and add the fields from entry to
1484
buf2 = mem_heap_alloc(heap, n_fields
1485
* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
1486
+ dict_table_is_comp(index->table));
1487
if (dict_table_is_comp(index->table)) {
1488
*buf2++ = 0; /* write the compact format indicator */
1490
for (i = 0; i < n_fields; i++) {
1492
const dict_field_t* ifield;
1494
/* We add 4 below because we have the 4 extra fields at the
1495
start of an ibuf record */
1497
field = dtuple_get_nth_field(tuple, i + 4);
1498
entry_field = dtuple_get_nth_field(entry, i);
1499
dfield_copy(field, entry_field);
1501
ifield = dict_index_get_nth_field(index, i);
1502
/* Prefix index columns of fixed-length columns are of
1503
fixed length. However, in the function call below,
1504
dfield_get_type(entry_field) contains the fixed length
1505
of the column in the clustered index. Replace it with
1506
the fixed length of the secondary index column. */
1507
fixed_len = ifield->fixed_len;
1511
/* dict_index_add_col() should guarantee these */
1512
ut_ad(fixed_len <= (ulint)
1513
dfield_get_type(entry_field)->len);
1514
if (ifield->prefix_len) {
1515
ut_ad(ifield->prefix_len == fixed_len);
1517
ut_ad(fixed_len == (ulint)
1518
dfield_get_type(entry_field)->len);
1521
#endif /* UNIV_DEBUG */
1523
dtype_new_store_for_order_and_null_size(
1524
buf2 + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
1525
dfield_get_type(entry_field), fixed_len);
1528
/* Store the type info in buf2 to field 3 of tuple */
1530
field = dtuple_get_nth_field(tuple, 3);
1532
if (dict_table_is_comp(index->table)) {
1536
dfield_set_data(field, buf2, n_fields
1537
* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
1538
+ dict_table_is_comp(index->table));
1539
/* Set all the types in the new tuple binary */
1541
dtuple_set_types_binary(tuple, n_fields + 4);
1546
/*********************************************************************//**
1547
Builds a search tuple used to search buffered inserts for an index page.
1548
This is for < 4.1.x format records
1549
@return own: search tuple */
1552
ibuf_search_tuple_build(
1553
/*====================*/
1554
ulint space, /*!< in: space id */
1555
ulint page_no,/*!< in: index page number */
1556
mem_heap_t* heap) /*!< in: heap into which to build */
1563
ut_a(trx_doublewrite_must_reset_space_ids);
1564
ut_a(!trx_sys_multiple_tablespace_format);
1566
tuple = dtuple_create(heap, 1);
1568
/* Store the page number in tuple */
1570
field = dtuple_get_nth_field(tuple, 0);
1572
buf = mem_heap_alloc(heap, 4);
1574
mach_write_to_4(buf, page_no);
1576
dfield_set_data(field, buf, 4);
1578
dtuple_set_types_binary(tuple, 1);
1583
/*********************************************************************//**
1584
Builds a search tuple used to search buffered inserts for an index page.
1585
This is for >= 4.1.x format records.
1586
@return own: search tuple */
1589
ibuf_new_search_tuple_build(
1590
/*========================*/
1591
ulint space, /*!< in: space id */
1592
ulint page_no,/*!< in: index page number */
1593
mem_heap_t* heap) /*!< in: heap into which to build */
1599
ut_a(trx_sys_multiple_tablespace_format);
1601
tuple = dtuple_create(heap, 3);
1603
/* Store the space id in tuple */
1605
field = dtuple_get_nth_field(tuple, 0);
1607
buf = mem_heap_alloc(heap, 4);
1609
mach_write_to_4(buf, space);
1611
dfield_set_data(field, buf, 4);
1613
/* Store the new format record marker byte */
1615
field = dtuple_get_nth_field(tuple, 1);
1617
buf = mem_heap_alloc(heap, 1);
1619
mach_write_to_1(buf, 0);
1621
dfield_set_data(field, buf, 1);
1623
/* Store the page number in tuple */
1625
field = dtuple_get_nth_field(tuple, 2);
1627
buf = mem_heap_alloc(heap, 4);
1629
mach_write_to_4(buf, page_no);
1631
dfield_set_data(field, buf, 4);
1633
dtuple_set_types_binary(tuple, 3);
1638
/*********************************************************************//**
1639
Checks if there are enough pages in the free list of the ibuf tree that we
1640
dare to start a pessimistic insert to the insert buffer.
1641
@return TRUE if enough free pages in list */
1644
ibuf_data_enough_free_for_insert(void)
1645
/*==================================*/
1647
ut_ad(mutex_own(&ibuf_mutex));
1649
/* We want a big margin of free pages, because a B-tree can sometimes
1650
grow in size also if records are deleted from it, as the node pointers
1651
can change, and we must make sure that we are able to delete the
1652
inserts buffered for pages that we read to the buffer pool, without
1653
any risk of running out of free space in the insert buffer. */
1655
return(ibuf->free_list_len >= (ibuf->size / 2) + 3 * ibuf->height);
1658
/*********************************************************************//**
1659
Checks if there are enough pages in the free list of the ibuf tree that we
1660
should remove them and free to the file space management.
1661
@return TRUE if enough free pages in list */
1664
ibuf_data_too_much_free(void)
1665
/*=========================*/
1667
ut_ad(mutex_own(&ibuf_mutex));
1669
return(ibuf->free_list_len >= 3 + (ibuf->size / 2) + 3 * ibuf->height);
1672
/*********************************************************************//**
1673
Allocates a new page from the ibuf file segment and adds it to the free
1675
@return DB_SUCCESS, or DB_STRONG_FAIL if no space left */
1678
ibuf_add_free_page(void)
1679
/*====================*/
1682
page_t* header_page;
1688
page_t* bitmap_page;
1692
/* Acquire the fsp latch before the ibuf header, obeying the latching
1694
mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
1695
zip_size = dict_table_flags_to_zip_size(flags);
1697
header_page = ibuf_header_page_get(&mtr);
1699
/* Allocate a new page: NOTE that if the page has been a part of a
1700
non-clustered index which has subsequently been dropped, then the
1701
page may have buffered inserts in the insert buffer, and these
1702
should be deleted from there. These get deleted when the page
1703
allocation creates the page in buffer. Thus the call below may end
1704
up calling the insert buffer routines and, as we yet have no latches
1705
to insert buffer tree pages, these routines can run without a risk
1706
of a deadlock. This is the reason why we created a special ibuf
1707
header page apart from the ibuf tree. */
1709
page_no = fseg_alloc_free_page(
1710
header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
1713
if (page_no == FIL_NULL) {
1716
return(DB_STRONG_FAIL);
1722
block = buf_page_get(
1723
IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
1725
buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
1728
page = buf_block_get_frame(block);
1733
mutex_enter(&ibuf_mutex);
1735
root = ibuf_tree_root_get(&mtr);
1737
/* Add the page to the free list and update the ibuf size data */
1739
flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1740
page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
1742
mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST,
1746
ibuf->free_list_len++;
1748
/* Set the bit indicating that this page is now an ibuf tree page
1751
bitmap_page = ibuf_bitmap_get_map_page(
1752
IBUF_SPACE_ID, page_no, zip_size, &mtr);
1754
ibuf_bitmap_page_set_bits(
1755
bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, TRUE, &mtr);
1759
mutex_exit(&ibuf_mutex);
1766
/*********************************************************************//**
1767
Removes a page from the free list and frees it to the fsp system. */
1770
ibuf_remove_free_page(void)
1771
/*=======================*/
1775
page_t* header_page;
1781
page_t* bitmap_page;
1785
/* Acquire the fsp latch before the ibuf header, obeying the latching
1787
mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
1788
zip_size = dict_table_flags_to_zip_size(flags);
1790
header_page = ibuf_header_page_get(&mtr);
1792
/* Prevent pessimistic inserts to insert buffer trees for a while */
1793
mutex_enter(&ibuf_pessimistic_insert_mutex);
1797
mutex_enter(&ibuf_mutex);
1799
if (!ibuf_data_too_much_free()) {
1801
mutex_exit(&ibuf_mutex);
1805
mutex_exit(&ibuf_pessimistic_insert_mutex);
1814
root = ibuf_tree_root_get(&mtr2);
1816
page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1819
/* NOTE that we must release the latch on the ibuf tree root
1820
because in fseg_free_page we access level 1 pages, and the root
1821
is a level 2 page. */
1824
mutex_exit(&ibuf_mutex);
1828
/* Since pessimistic inserts were prevented, we know that the
1829
page is still in the free list. NOTE that also deletes may take
1830
pages from the free list, but they take them from the start, and
1831
the free list was so long that they cannot have taken the last
1834
fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
1835
IBUF_SPACE_ID, page_no, &mtr);
1837
#ifdef UNIV_DEBUG_FILE_ACCESSES
1838
buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no);
1843
mutex_enter(&ibuf_mutex);
1845
root = ibuf_tree_root_get(&mtr);
1847
ut_ad(page_no == flst_get_last(root + PAGE_HEADER
1848
+ PAGE_BTR_IBUF_FREE_LIST, &mtr).page);
1853
block = buf_page_get(
1854
IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
1856
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
1859
page = buf_block_get_frame(block);
1862
/* Remove the page from the free list and update the ibuf size data */
1864
flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1865
page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
1868
ibuf->free_list_len--;
1870
mutex_exit(&ibuf_pessimistic_insert_mutex);
1872
/* Set the bit indicating that this page is no more an ibuf tree page
1875
bitmap_page = ibuf_bitmap_get_map_page(
1876
IBUF_SPACE_ID, page_no, zip_size, &mtr);
1878
ibuf_bitmap_page_set_bits(
1879
bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr);
1881
#ifdef UNIV_DEBUG_FILE_ACCESSES
1882
buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no);
1886
mutex_exit(&ibuf_mutex);
1891
/***********************************************************************//**
1892
Frees excess pages from the ibuf free list. This function is called when an OS
1893
thread calls fsp services to allocate a new file segment, or a new page to a
1894
file segment, and the thread did not own the fsp latch before this call. */
1897
ibuf_free_excess_pages(void)
1898
/*========================*/
1902
#ifdef UNIV_SYNC_DEBUG
1903
ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL),
1905
#endif /* UNIV_SYNC_DEBUG */
1907
ut_ad(rw_lock_get_x_lock_count(
1908
fil_space_get_latch(IBUF_SPACE_ID, NULL)) == 1);
1910
ut_ad(!ibuf_inside());
1912
/* NOTE: We require that the thread did not own the latch before,
1913
because then we know that we can obey the correct latching order
1917
/* Not yet initialized; not sure if this is possible, but
1918
does no harm to check for it. */
1923
/* Free at most a few pages at a time, so that we do not delay the
1924
requested service too much */
1926
for (i = 0; i < 4; i++) {
1928
mutex_enter(&ibuf_mutex);
1930
if (!ibuf_data_too_much_free()) {
1932
mutex_exit(&ibuf_mutex);
1937
mutex_exit(&ibuf_mutex);
1939
ibuf_remove_free_page();
1943
/*********************************************************************//**
1944
Reads page numbers from a leaf in an ibuf tree.
1945
@return a lower limit for the combined volume of records which will be
1949
ibuf_get_merge_page_nos(
1950
/*====================*/
1951
ibool contract,/*!< in: TRUE if this function is called to
1952
contract the tree, FALSE if this is called
1953
when a single page becomes full and we look
1954
if it pays to read also nearby pages */
1955
rec_t* rec, /*!< in: record from which we read up and down
1956
in the chain of records */
1957
ulint* space_ids,/*!< in/out: space id's of the pages */
1958
ib_int64_t* space_versions,/*!< in/out: tablespace version
1959
timestamps; used to prevent reading in old
1960
pages after DISCARD + IMPORT tablespace */
1961
ulint* page_nos,/*!< in/out: buffer for at least
1962
IBUF_MAX_N_PAGES_MERGED many page numbers;
1963
the page numbers are in an ascending order */
1964
ulint* n_stored)/*!< out: number of page numbers stored to
1965
page_nos in this function */
1968
ulint prev_space_id;
1969
ulint first_page_no;
1970
ulint first_space_id;
1974
ulint volume_for_page;
1981
limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool->curr_size / 4);
1983
if (page_rec_is_supremum(rec)) {
1985
rec = page_rec_get_prev(rec);
1988
if (page_rec_is_infimum(rec)) {
1990
rec = page_rec_get_next(rec);
1993
if (page_rec_is_supremum(rec)) {
1998
first_page_no = ibuf_rec_get_page_no(rec);
1999
first_space_id = ibuf_rec_get_space(rec);
2004
/* Go backwards from the first rec until we reach the border of the
2005
'merge area', or the page start or the limit of storeable pages is
2008
while (!page_rec_is_infimum(rec) && UNIV_LIKELY(n_pages < limit)) {
2010
rec_page_no = ibuf_rec_get_page_no(rec);
2011
rec_space_id = ibuf_rec_get_space(rec);
2013
if (rec_space_id != first_space_id
2014
|| (rec_page_no / IBUF_MERGE_AREA)
2015
!= (first_page_no / IBUF_MERGE_AREA)) {
2020
if (rec_page_no != prev_page_no
2021
|| rec_space_id != prev_space_id) {
2025
prev_page_no = rec_page_no;
2026
prev_space_id = rec_space_id;
2028
rec = page_rec_get_prev(rec);
2031
rec = page_rec_get_next(rec);
2033
/* At the loop start there is no prev page; we mark this with a pair
2034
of space id, page no (0, 0) for which there can never be entries in
2035
the insert buffer */
2040
volume_for_page = 0;
2042
while (*n_stored < limit) {
2043
if (page_rec_is_supremum(rec)) {
2044
/* When no more records available, mark this with
2045
another 'impossible' pair of space id, page no */
2049
rec_page_no = ibuf_rec_get_page_no(rec);
2050
rec_space_id = ibuf_rec_get_space(rec);
2051
ut_ad(rec_page_no > IBUF_TREE_ROOT_PAGE_NO);
2054
#ifdef UNIV_IBUF_DEBUG
2055
ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED);
2057
if ((rec_space_id != prev_space_id
2058
|| rec_page_no != prev_page_no)
2059
&& (prev_space_id != 0 || prev_page_no != 0)) {
2061
if ((prev_page_no == first_page_no
2062
&& prev_space_id == first_space_id)
2065
> ((IBUF_MERGE_THRESHOLD - 1)
2066
* 4 * UNIV_PAGE_SIZE
2067
/ IBUF_PAGE_SIZE_PER_FREE_SPACE)
2068
/ IBUF_MERGE_THRESHOLD)) {
2070
space_ids[*n_stored] = prev_space_id;
2071
space_versions[*n_stored]
2072
= fil_space_get_version(prev_space_id);
2073
page_nos[*n_stored] = prev_page_no;
2077
sum_volumes += volume_for_page;
2080
if (rec_space_id != first_space_id
2081
|| rec_page_no / IBUF_MERGE_AREA
2082
!= first_page_no / IBUF_MERGE_AREA) {
2087
volume_for_page = 0;
2090
if (rec_page_no == 1 && rec_space_id == 0) {
2091
/* Supremum record */
2096
rec_volume = ibuf_rec_get_volume(rec);
2098
volume_for_page += rec_volume;
2100
prev_page_no = rec_page_no;
2101
prev_space_id = rec_space_id;
2103
rec = page_rec_get_next(rec);
2106
#ifdef UNIV_IBUF_DEBUG
2107
ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED);
2110
fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n",
2111
*n_stored, sum_volumes);
2113
return(sum_volumes);
2116
/*********************************************************************//**
2117
Contracts insert buffer trees by reading pages to the buffer pool.
2118
@return a lower limit for the combined size in bytes of entries which
2119
will be merged from ibuf trees to the pages read, 0 if ibuf is
2125
ulint* n_pages,/*!< out: number of pages to which merged */
2126
ibool sync) /*!< in: TRUE if the caller wants to wait for the
2127
issued read with the highest tablespace address
2131
ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
2132
ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
2133
ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
2139
ut_ad(!ibuf_inside());
2141
mutex_enter(&ibuf_mutex);
2145
mutex_exit(&ibuf_mutex);
2154
/* Open a cursor to a randomly chosen leaf of the tree, at a random
2155
position within the leaf */
2157
btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr);
2159
if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) {
2160
/* When the ibuf tree is emptied completely, the last record
2161
is removed using an optimistic delete and ibuf_size_update
2162
is not called, causing ibuf->empty to remain FALSE. If we do
2163
not reset it to TRUE here then database shutdown will hang
2164
in the loop in ibuf_contract_for_n_pages. */
2171
btr_pcur_close(&pcur);
2176
mutex_exit(&ibuf_mutex);
2178
sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur),
2179
space_ids, space_versions,
2180
page_nos, &n_stored);
2181
#if 0 /* defined UNIV_IBUF_DEBUG */
2182
fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
2183
sync, n_stored, sum_sizes);
2188
btr_pcur_close(&pcur);
2190
buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos,
2192
*n_pages = n_stored;
2194
return(sum_sizes + 1);
2197
/*********************************************************************//**
2198
Contracts insert buffer trees by reading pages to the buffer pool.
2199
@return a lower limit for the combined size in bytes of entries which
2200
will be merged from ibuf trees to the pages read, 0 if ibuf is
2206
ibool sync) /*!< in: TRUE if the caller wants to wait for the
2207
issued read with the highest tablespace address
2212
return(ibuf_contract_ext(&n_pages, sync));
2215
/*********************************************************************//**
2216
Contracts insert buffer trees by reading pages to the buffer pool.
2217
@return a lower limit for the combined size in bytes of entries which
2218
will be merged from ibuf trees to the pages read, 0 if ibuf is
2222
ibuf_contract_for_n_pages(
2223
/*======================*/
2224
ibool sync, /*!< in: TRUE if the caller wants to wait for the
2225
issued read with the highest tablespace address
2227
ulint n_pages)/*!< in: try to read at least this many pages to
2228
the buffer pool and merge the ibuf contents to
2231
ulint sum_bytes = 0;
2232
ulint sum_pages = 0;
2236
while (sum_pages < n_pages) {
2237
n_bytes = ibuf_contract_ext(&n_pag2, sync);
2243
sum_bytes += n_bytes;
2244
sum_pages += n_pag2;
2250
/*********************************************************************//**
2251
Contract insert buffer trees after insert if they are too big. */
2254
ibuf_contract_after_insert(
2255
/*=======================*/
2256
ulint entry_size) /*!< in: size of a record which was inserted
2257
into an ibuf tree */
2263
mutex_enter(&ibuf_mutex);
2265
if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
2266
mutex_exit(&ibuf_mutex);
2273
if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) {
2278
mutex_exit(&ibuf_mutex);
2280
/* Contract at least entry_size many bytes */
2284
while ((size > 0) && (sum_sizes < entry_size)) {
2286
size = ibuf_contract(sync);
2291
/*********************************************************************//**
2292
Gets an upper limit for the combined size of entries buffered in the insert
2293
buffer for a given page.
2294
@return upper limit for the volume of buffered inserts for the index
2295
page, in bytes; UNIV_PAGE_SIZE, if the entries for the index page span
2296
several pages in the insert buffer */
2299
ibuf_get_volume_buffered(
2300
/*=====================*/
2301
btr_pcur_t* pcur, /*!< in: pcur positioned at a place in an
2302
insert buffer tree where we would insert an
2303
entry for the index page whose number is
2304
page_no, latch mode has to be BTR_MODIFY_PREV
2305
or BTR_MODIFY_TREE */
2306
ulint space, /*!< in: space id */
2307
ulint page_no,/*!< in: page number of an index page */
2308
mtr_t* mtr) /*!< in: mtr */
2318
ut_a(trx_sys_multiple_tablespace_format);
2320
ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
2321
|| (pcur->latch_mode == BTR_MODIFY_TREE));
2323
/* Count the volume of records earlier in the alphabetical order than
2328
rec = btr_pcur_get_rec(pcur);
2329
page = page_align(rec);
2331
if (page_rec_is_supremum(rec)) {
2332
rec = page_rec_get_prev(rec);
2336
if (page_rec_is_infimum(rec)) {
2341
if (page_no != ibuf_rec_get_page_no(rec)
2342
|| space != ibuf_rec_get_space(rec)) {
2347
volume += ibuf_rec_get_volume(rec);
2349
rec = page_rec_get_prev(rec);
2352
/* Look at the previous page */
2354
prev_page_no = btr_page_get_prev(page, mtr);
2356
if (prev_page_no == FIL_NULL) {
2364
block = buf_page_get(
2365
IBUF_SPACE_ID, 0, prev_page_no, RW_X_LATCH, mtr);
2367
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
2370
prev_page = buf_block_get_frame(block);
2373
#ifdef UNIV_BTR_DEBUG
2374
ut_a(btr_page_get_next(prev_page, mtr)
2375
== page_get_page_no(page));
2376
#endif /* UNIV_BTR_DEBUG */
2378
rec = page_get_supremum_rec(prev_page);
2379
rec = page_rec_get_prev(rec);
2382
if (page_rec_is_infimum(rec)) {
2384
/* We cannot go to yet a previous page, because we
2385
do not have the x-latch on it, and cannot acquire one
2386
because of the latching order: we have to give up */
2388
return(UNIV_PAGE_SIZE);
2391
if (page_no != ibuf_rec_get_page_no(rec)
2392
|| space != ibuf_rec_get_space(rec)) {
2397
volume += ibuf_rec_get_volume(rec);
2399
rec = page_rec_get_prev(rec);
2403
rec = btr_pcur_get_rec(pcur);
2405
if (!page_rec_is_supremum(rec)) {
2406
rec = page_rec_get_next(rec);
2410
if (page_rec_is_supremum(rec)) {
2415
if (page_no != ibuf_rec_get_page_no(rec)
2416
|| space != ibuf_rec_get_space(rec)) {
2421
volume += ibuf_rec_get_volume(rec);
2423
rec = page_rec_get_next(rec);
2426
/* Look at the next page */
2428
next_page_no = btr_page_get_next(page, mtr);
2430
if (next_page_no == FIL_NULL) {
2438
block = buf_page_get(
2439
IBUF_SPACE_ID, 0, next_page_no, RW_X_LATCH, mtr);
2441
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
2444
next_page = buf_block_get_frame(block);
2447
#ifdef UNIV_BTR_DEBUG
2448
ut_a(btr_page_get_prev(next_page, mtr) == page_get_page_no(page));
2449
#endif /* UNIV_BTR_DEBUG */
2451
rec = page_get_infimum_rec(next_page);
2452
rec = page_rec_get_next(rec);
2455
if (page_rec_is_supremum(rec)) {
2459
return(UNIV_PAGE_SIZE);
2462
if (page_no != ibuf_rec_get_page_no(rec)
2463
|| space != ibuf_rec_get_space(rec)) {
2468
volume += ibuf_rec_get_volume(rec);
2470
rec = page_rec_get_next(rec);
2474
/*********************************************************************//**
2475
Reads the biggest tablespace id from the high end of the insert buffer
2476
tree and updates the counter in fil_system. */
2479
ibuf_update_max_tablespace_id(void)
2480
/*===============================*/
2489
ut_a(!dict_table_is_comp(ibuf->index->table));
2495
btr_pcur_open_at_index_side(
2496
FALSE, ibuf->index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
2498
btr_pcur_move_to_prev(&pcur, &mtr);
2500
if (btr_pcur_is_before_first_on_page(&pcur)) {
2501
/* The tree is empty */
2505
rec = btr_pcur_get_rec(&pcur);
2507
field = rec_get_nth_field_old(rec, 0, &len);
2511
max_space_id = mach_read_from_4(field);
2517
/* printf("Maximum space id in insert buffer %lu\n", max_space_id); */
2519
fil_set_max_space_id_if_bigger(max_space_id);
2522
/*********************************************************************//**
2523
Makes an index insert to the insert buffer, instead of directly to the disk
2524
page, if this is possible.
2525
@return DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
2530
ulint mode, /*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
2531
const dtuple_t* entry, /*!< in: index entry to insert */
2533
/*!< in: rec_get_converted_size(index, entry) */
2534
dict_index_t* index, /*!< in: index where to insert; must not be
2535
unique or clustered */
2536
ulint space, /*!< in: space id where to insert */
2537
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
2538
ulint page_no,/*!< in: page number where to insert */
2539
que_thr_t* thr) /*!< in: query thread */
2541
big_rec_t* dummy_big_rec;
2544
dtuple_t* ibuf_entry;
2548
ibool old_bit_value;
2549
page_t* bitmap_page;
2553
ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
2554
ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
2555
ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
2561
ut_a(!dict_index_is_clust(index));
2562
ut_ad(dtuple_check_typed(entry));
2563
ut_ad(ut_is_2pow(zip_size));
2565
ut_a(trx_sys_multiple_tablespace_format);
2569
mutex_enter(&ibuf_mutex);
2571
if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
2572
/* Insert buffer is now too big, contract it but do not try
2575
mutex_exit(&ibuf_mutex);
2577
#ifdef UNIV_IBUF_DEBUG
2578
fputs("Ibuf too big\n", stderr);
2580
/* Use synchronous contract (== TRUE) */
2581
ibuf_contract(TRUE);
2583
return(DB_STRONG_FAIL);
2586
mutex_exit(&ibuf_mutex);
2588
if (mode == BTR_MODIFY_TREE) {
2589
mutex_enter(&ibuf_pessimistic_insert_mutex);
2593
mutex_enter(&ibuf_mutex);
2595
while (!ibuf_data_enough_free_for_insert()) {
2597
mutex_exit(&ibuf_mutex);
2601
mutex_exit(&ibuf_pessimistic_insert_mutex);
2603
err = ibuf_add_free_page();
2605
if (err == DB_STRONG_FAIL) {
2610
mutex_enter(&ibuf_pessimistic_insert_mutex);
2614
mutex_enter(&ibuf_mutex);
2620
heap = mem_heap_create(512);
2622
/* Build the entry which contains the space id and the page number as
2623
the first fields and the type information for other fields, and which
2624
will be inserted to the insert buffer. */
2626
ibuf_entry = ibuf_entry_build(index, entry, space, page_no, heap);
2628
/* Open a cursor to the insert buffer tree to calculate if we can add
2629
the new entry to it without exceeding the free space limit for the
2634
btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
2636
/* Find out the volume of already buffered inserts for the same index
2638
buffered = ibuf_get_volume_buffered(&pcur, space, page_no, &mtr);
2640
#ifdef UNIV_IBUF_COUNT_DEBUG
2641
ut_a((buffered == 0) || ibuf_count_get(space, page_no));
2643
mtr_start(&bitmap_mtr);
2645
bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
2646
zip_size, &bitmap_mtr);
2648
/* We check if the index page is suitable for buffered entries */
2650
if (buf_page_peek(space, page_no)
2651
|| lock_rec_expl_exist_on_page(space, page_no)) {
2652
err = DB_STRONG_FAIL;
2654
mtr_commit(&bitmap_mtr);
2659
bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
2660
IBUF_BITMAP_FREE, &bitmap_mtr);
2662
if (buffered + entry_size + page_dir_calc_reserved_space(1)
2663
> ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
2664
mtr_commit(&bitmap_mtr);
2666
/* It may not fit */
2667
err = DB_STRONG_FAIL;
2671
ibuf_get_merge_page_nos(FALSE, btr_pcur_get_rec(&pcur),
2672
space_ids, space_versions,
2673
page_nos, &n_stored);
2677
/* Set the bitmap bit denoting that the insert buffer contains
2678
buffered entries for this index page, if the bit is not set yet */
2680
old_bit_value = ibuf_bitmap_page_get_bits(
2681
bitmap_page, page_no, zip_size,
2682
IBUF_BITMAP_BUFFERED, &bitmap_mtr);
2684
if (!old_bit_value) {
2685
ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
2686
IBUF_BITMAP_BUFFERED, TRUE,
2690
mtr_commit(&bitmap_mtr);
2692
cursor = btr_pcur_get_btr_cur(&pcur);
2694
if (mode == BTR_MODIFY_PREV) {
2695
err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
2696
ibuf_entry, &ins_rec,
2697
&dummy_big_rec, 0, thr, &mtr);
2698
if (err == DB_SUCCESS) {
2699
/* Update the page max trx id field */
2700
page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
2701
thr_get_trx(thr)->id, &mtr);
2704
ut_ad(mode == BTR_MODIFY_TREE);
2706
/* We acquire an x-latch to the root page before the insert,
2707
because a pessimistic insert releases the tree x-latch,
2708
which would cause the x-latching of the root after that to
2709
break the latching order. */
2711
root = ibuf_tree_root_get(&mtr);
2713
err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
2714
| BTR_NO_UNDO_LOG_FLAG,
2716
ibuf_entry, &ins_rec,
2717
&dummy_big_rec, 0, thr, &mtr);
2718
if (err == DB_SUCCESS) {
2719
/* Update the page max trx id field */
2720
page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
2721
thr_get_trx(thr)->id, &mtr);
2724
ibuf_size_update(root, &mtr);
2728
#ifdef UNIV_IBUF_COUNT_DEBUG
2729
if (err == DB_SUCCESS) {
2731
"Incrementing ibuf count of space %lu page %lu\n"
2732
"from %lu by 1\n", space, page_no,
2733
ibuf_count_get(space, page_no));
2735
ibuf_count_set(space, page_no,
2736
ibuf_count_get(space, page_no) + 1);
2739
if (mode == BTR_MODIFY_TREE) {
2741
mutex_exit(&ibuf_mutex);
2742
mutex_exit(&ibuf_pessimistic_insert_mutex);
2746
btr_pcur_close(&pcur);
2749
mem_heap_free(heap);
2751
if (err == DB_SUCCESS) {
2752
mutex_enter(&ibuf_mutex);
2754
ibuf->empty = FALSE;
2757
mutex_exit(&ibuf_mutex);
2759
if (mode == BTR_MODIFY_TREE) {
2760
ibuf_contract_after_insert(entry_size);
2765
#ifdef UNIV_IBUF_DEBUG
2766
ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
2768
buf_read_ibuf_merge_pages(FALSE, space_ids, space_versions,
2769
page_nos, n_stored);
2775
/*********************************************************************//**
2776
Makes an index insert to the insert buffer, instead of directly to the disk
2777
page, if this is possible. Does not do insert if the index is clustered
2779
@return TRUE if success */
2784
const dtuple_t* entry, /*!< in: index entry to insert */
2785
dict_index_t* index, /*!< in: index where to insert */
2786
ulint space, /*!< in: space id where to insert */
2787
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
2788
ulint page_no,/*!< in: page number where to insert */
2789
que_thr_t* thr) /*!< in: query thread */
2794
ut_a(trx_sys_multiple_tablespace_format);
2795
ut_ad(dtuple_check_typed(entry));
2796
ut_ad(ut_is_2pow(zip_size));
2798
ut_a(!dict_index_is_clust(index));
2800
switch (UNIV_EXPECT(ibuf_use, IBUF_USE_INSERT)) {
2803
case IBUF_USE_INSERT:
2805
case IBUF_USE_COUNT:
2809
ut_error; /* unknown value of ibuf_use */
2812
entry_size = rec_get_converted_size(index, entry, 0);
2815
>= (page_get_free_space_of_empty(dict_table_is_comp(index->table))
2820
err = ibuf_insert_low(BTR_MODIFY_PREV, entry, entry_size,
2821
index, space, zip_size, page_no, thr);
2822
if (err == DB_FAIL) {
2823
err = ibuf_insert_low(BTR_MODIFY_TREE, entry, entry_size,
2824
index, space, zip_size, page_no, thr);
2827
if (err == DB_SUCCESS) {
2828
#ifdef UNIV_IBUF_DEBUG
2829
/* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n",
2830
page_no, index->name); */
2835
ut_a(err == DB_STRONG_FAIL);
2841
/********************************************************************//**
2842
During merge, inserts to an index page a secondary index entry extracted
2843
from the insert buffer. */
2846
ibuf_insert_to_index_page(
2847
/*======================*/
2848
dtuple_t* entry, /*!< in: buffered entry to insert */
2849
buf_block_t* block, /*!< in/out: index page where the buffered entry
2851
dict_index_t* index, /*!< in: record descriptor */
2852
mtr_t* mtr) /*!< in: mtr */
2854
page_cur_t page_cur;
2856
page_t* page = buf_block_get_frame(block);
2858
page_t* bitmap_page;
2861
ut_ad(ibuf_inside());
2862
ut_ad(dtuple_check_typed(entry));
2864
if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
2865
!= (ibool)!!page_is_comp(page))) {
2866
fputs("InnoDB: Trying to insert a record from"
2867
" the insert buffer to an index page\n"
2868
"InnoDB: but the 'compact' flag does not match!\n",
2873
rec = page_rec_get_next(page_get_infimum_rec(page));
2875
if (UNIV_UNLIKELY(rec_get_n_fields(rec, index)
2876
!= dtuple_get_n_fields(entry))) {
2877
fputs("InnoDB: Trying to insert a record from"
2878
" the insert buffer to an index page\n"
2879
"InnoDB: but the number of fields does not match!\n",
2882
buf_page_print(page, 0);
2884
dtuple_print(stderr, entry);
2886
fputs("InnoDB: The table where where"
2887
" this index record belongs\n"
2888
"InnoDB: is now probably corrupt."
2889
" Please run CHECK TABLE on\n"
2890
"InnoDB: your tables.\n"
2891
"InnoDB: Submit a detailed bug report to"
2892
" http://bugs.mysql.com!\n", stderr);
2897
low_match = page_cur_search(block, index, entry,
2898
PAGE_CUR_LE, &page_cur);
2900
if (low_match == dtuple_get_n_fields(entry)) {
2901
page_zip_des_t* page_zip;
2903
rec = page_cur_get_rec(&page_cur);
2904
page_zip = buf_block_get_page_zip(block);
2906
btr_cur_del_unmark_for_ibuf(rec, page_zip, mtr);
2908
rec = page_cur_tuple_insert(&page_cur, entry, index, 0, mtr);
2910
if (UNIV_LIKELY(rec != NULL)) {
2914
/* If the record did not fit, reorganize */
2916
btr_page_reorganize(block, index, mtr);
2917
page_cur_search(block, index, entry, PAGE_CUR_LE, &page_cur);
2919
/* This time the record must fit */
2921
(!page_cur_tuple_insert(&page_cur, entry, index,
2927
ut_print_timestamp(stderr);
2930
" InnoDB: Error: Insert buffer insert"
2931
" fails; page free %lu,"
2932
" dtuple size %lu\n",
2933
(ulong) page_get_max_insert_size(
2935
(ulong) rec_get_converted_size(
2937
fputs("InnoDB: Cannot insert index record ",
2939
dtuple_print(stderr, entry);
2940
fputs("\nInnoDB: The table where"
2941
" this index record belongs\n"
2942
"InnoDB: is now probably corrupt."
2943
" Please run CHECK TABLE on\n"
2944
"InnoDB: that table.\n", stderr);
2946
space = page_get_space_id(page);
2947
zip_size = buf_block_get_zip_size(block);
2948
page_no = page_get_page_no(page);
2950
bitmap_page = ibuf_bitmap_get_map_page(
2951
space, page_no, zip_size, mtr);
2952
old_bits = ibuf_bitmap_page_get_bits(
2953
bitmap_page, page_no, zip_size,
2954
IBUF_BITMAP_FREE, mtr);
2957
"InnoDB: space %lu, page %lu,"
2958
" zip_size %lu, bitmap bits %lu\n",
2959
(ulong) space, (ulong) page_no,
2960
(ulong) zip_size, (ulong) old_bits);
2962
fputs("InnoDB: Submit a detailed bug report"
2963
" to http://bugs.mysql.com\n", stderr);
2968
/*********************************************************************//**
2969
Deletes from ibuf the record on which pcur is positioned. If we have to
2970
resort to a pessimistic delete, this function commits mtr and closes
2972
@return TRUE if mtr was committed and pcur closed in this operation */
2977
ulint space, /*!< in: space id */
2978
ulint page_no,/*!< in: index page number where the record
2980
btr_pcur_t* pcur, /*!< in: pcur positioned on the record to
2981
delete, having latch mode BTR_MODIFY_LEAF */
2982
const dtuple_t* search_tuple,
2983
/*!< in: search tuple for entries of page_no */
2984
mtr_t* mtr) /*!< in: mtr */
2990
ut_ad(ibuf_inside());
2991
ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
2992
ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
2993
ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
2995
success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
2998
#ifdef UNIV_IBUF_COUNT_DEBUG
3000
"Decrementing ibuf count of space %lu page %lu\n"
3001
"from %lu by 1\n", space, page_no,
3002
ibuf_count_get(space, page_no));
3003
ibuf_count_set(space, page_no,
3004
ibuf_count_get(space, page_no) - 1);
3009
ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
3010
ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
3011
ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
3013
/* We have to resort to a pessimistic delete from ibuf */
3014
btr_pcur_store_position(pcur, mtr);
3016
btr_pcur_commit_specify_mtr(pcur, mtr);
3018
mutex_enter(&ibuf_mutex);
3022
success = btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr);
3025
if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
3026
/* The tablespace has been dropped. It is possible
3027
that another thread has deleted the insert buffer
3028
entry. Do not complain. */
3029
goto commit_and_exit;
3033
"InnoDB: ERROR: Submit the output to"
3034
" http://bugs.mysql.com\n"
3035
"InnoDB: ibuf cursor restoration fails!\n"
3036
"InnoDB: ibuf record inserted to page %lu\n",
3040
rec_print_old(stderr, btr_pcur_get_rec(pcur));
3041
rec_print_old(stderr, pcur->old_rec);
3042
dtuple_print(stderr, search_tuple);
3044
rec_print_old(stderr,
3045
page_rec_get_next(btr_pcur_get_rec(pcur)));
3048
btr_pcur_commit_specify_mtr(pcur, mtr);
3050
fputs("InnoDB: Validating insert buffer tree:\n", stderr);
3051
if (!btr_validate_index(ibuf->index, NULL)) {
3055
fprintf(stderr, "InnoDB: ibuf tree ok\n");
3061
root = ibuf_tree_root_get(mtr);
3063
btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
3065
ut_a(err == DB_SUCCESS);
3067
#ifdef UNIV_IBUF_COUNT_DEBUG
3068
ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
3070
ibuf_size_update(root, mtr);
3073
btr_pcur_commit_specify_mtr(pcur, mtr);
3076
btr_pcur_close(pcur);
3078
mutex_exit(&ibuf_mutex);
3083
/*********************************************************************//**
3084
When an index page is read from a disk to the buffer pool, this function
3085
inserts to the page the possible index entries buffered in the insert buffer.
3086
The entries are deleted from the insert buffer. If the page is not read, but
3087
created in the buffer pool, this function deletes its buffered entries from
3088
the insert buffer; there can exist entries for such a page if the page
3089
belonged to an index which subsequently was dropped. */
3092
ibuf_merge_or_delete_for_page(
3093
/*==========================*/
3094
buf_block_t* block, /*!< in: if page has been read from
3095
disk, pointer to the page x-latched,
3097
ulint space, /*!< in: space id of the index page */
3098
ulint page_no,/*!< in: page number of the index page */
3099
ulint zip_size,/*!< in: compressed page size in bytes,
3101
ibool update_ibuf_bitmap)/*!< in: normally this is set
3102
to TRUE, but if we have deleted or are
3103
deleting the tablespace, then we
3104
naturally do not want to update a
3105
non-existent bitmap page */
3109
dtuple_t* search_tuple;
3111
#ifdef UNIV_IBUF_DEBUG
3114
page_zip_des_t* page_zip = NULL;
3115
ibool tablespace_being_deleted = FALSE;
3116
ibool corruption_noticed = FALSE;
3119
ut_ad(!block || buf_block_get_space(block) == space);
3120
ut_ad(!block || buf_block_get_page_no(block) == page_no);
3121
ut_ad(!block || buf_block_get_zip_size(block) == zip_size);
3123
if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
3124
|| trx_sys_hdr_page(space, page_no)) {
3128
/* We cannot refer to zip_size in the following, because
3129
zip_size is passed as ULINT_UNDEFINED (it is unknown) when
3130
buf_read_ibuf_merge_pages() is merging (discarding) changes
3131
for a dropped tablespace. When block != NULL or
3132
update_ibuf_bitmap is specified, the zip_size must be known.
3133
That is why we will repeat the check below, with zip_size in
3134
place of 0. Passing zip_size as 0 assumes that the
3135
uncompressed page size always is a power-of-2 multiple of the
3136
compressed page size. */
3138
if (ibuf_fixed_addr_page(space, 0, page_no)
3139
|| fsp_descr_page(0, page_no)) {
3143
if (UNIV_LIKELY(update_ibuf_bitmap)) {
3144
ut_a(ut_is_2pow(zip_size));
3146
if (ibuf_fixed_addr_page(space, zip_size, page_no)
3147
|| fsp_descr_page(zip_size, page_no)) {
3151
/* If the following returns FALSE, we get the counter
3152
incremented, and must decrement it when we leave this
3153
function. When the counter is > 0, that prevents tablespace
3154
from being dropped. */
3156
tablespace_being_deleted = fil_inc_pending_ibuf_merges(space);
3158
if (UNIV_UNLIKELY(tablespace_being_deleted)) {
3159
/* Do not try to read the bitmap page from space;
3160
just delete the ibuf records for the page */
3163
update_ibuf_bitmap = FALSE;
3165
page_t* bitmap_page;
3169
bitmap_page = ibuf_bitmap_get_map_page(
3170
space, page_no, zip_size, &mtr);
3172
if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
3174
IBUF_BITMAP_BUFFERED,
3176
/* No inserts buffered for this page */
3179
if (!tablespace_being_deleted) {
3180
fil_decr_pending_ibuf_merges(space);
3188
&& (ibuf_fixed_addr_page(space, zip_size, page_no)
3189
|| fsp_descr_page(zip_size, page_no))) {
3196
heap = mem_heap_create(512);
3198
if (!trx_sys_multiple_tablespace_format) {
3199
ut_a(trx_doublewrite_must_reset_space_ids);
3200
search_tuple = ibuf_search_tuple_build(space, page_no, heap);
3202
search_tuple = ibuf_new_search_tuple_build(space, page_no,
3207
/* Move the ownership of the x-latch on the page to this OS
3208
thread, so that we can acquire a second x-latch on it. This
3209
is needed for the insert operations to the index page to pass
3210
the debug checks. */
3212
rw_lock_x_lock_move_ownership(&(block->lock));
3213
page_zip = buf_block_get_page_zip(block);
3215
if (UNIV_UNLIKELY(fil_page_get_type(block->frame)
3217
|| UNIV_UNLIKELY(!page_is_leaf(block->frame))) {
3219
page_t* bitmap_page;
3221
corruption_noticed = TRUE;
3223
ut_print_timestamp(stderr);
3227
fputs(" InnoDB: Dump of the ibuf bitmap page:\n",
3230
bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
3232
buf_page_print(bitmap_page, 0);
3236
fputs("\nInnoDB: Dump of the page:\n", stderr);
3238
buf_page_print(block->frame, 0);
3241
"InnoDB: Error: corruption in the tablespace."
3242
" Bitmap shows insert\n"
3243
"InnoDB: buffer records to page n:o %lu"
3244
" though the page\n"
3245
"InnoDB: type is %lu, which is"
3246
" not an index leaf page!\n"
3247
"InnoDB: We try to resolve the problem"
3248
" by skipping the insert buffer\n"
3249
"InnoDB: merge for this page."
3250
" Please run CHECK TABLE on your tables\n"
3251
"InnoDB: to determine if they are corrupt"
3253
"InnoDB: Please submit a detailed bug report"
3254
" to http://bugs.mysql.com\n\n",
3257
fil_page_get_type(block->frame));
3262
#ifdef UNIV_IBUF_DEBUG
3271
success = buf_page_get_known_nowait(
3273
BUF_KEEP_OLD, __FILE__, __LINE__, &mtr);
3277
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
3280
/* Position pcur in the insert buffer at the first entry for this
3282
btr_pcur_open_on_user_rec(
3283
ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
3286
if (!btr_pcur_is_on_user_rec(&pcur)) {
3287
ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
3295
ut_ad(btr_pcur_is_on_user_rec(&pcur));
3297
rec = btr_pcur_get_rec(&pcur);
3299
/* Check if the entry is for this index page */
3300
if (ibuf_rec_get_page_no(rec) != page_no
3301
|| ibuf_rec_get_space(rec) != space) {
3304
page_header_reset_last_insert(
3305
block->frame, page_zip, &mtr);
3311
if (UNIV_UNLIKELY(corruption_noticed)) {
3312
fputs("InnoDB: Discarding record\n ", stderr);
3313
rec_print_old(stderr, rec);
3314
fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
3316
/* Now we have at pcur a record which should be
3317
inserted to the index page; NOTE that the call below
3318
copies pointers to fields in rec, and we must
3319
keep the latch to the rec page until the
3320
insertion is finished! */
3322
trx_id_t max_trx_id;
3323
dict_index_t* dummy_index;
3325
max_trx_id = page_get_max_trx_id(page_align(rec));
3326
page_update_max_trx_id(block, page_zip, max_trx_id,
3329
entry = ibuf_build_entry_from_ibuf_rec(
3330
rec, heap, &dummy_index);
3331
#ifdef UNIV_IBUF_DEBUG
3332
volume += rec_get_converted_size(dummy_index, entry, 0)
3333
+ page_dir_calc_reserved_space(1);
3334
ut_a(volume <= 4 * UNIV_PAGE_SIZE
3335
/ IBUF_PAGE_SIZE_PER_FREE_SPACE);
3337
ibuf_insert_to_index_page(entry, block,
3339
ibuf_dummy_index_free(dummy_index);
3344
/* Delete the record from ibuf */
3345
if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,
3347
/* Deletion was pessimistic and mtr was committed:
3348
we start from the beginning again */
3351
} else if (btr_pcur_is_after_last_on_page(&pcur)) {
3353
btr_pcur_close(&pcur);
3360
#ifdef UNIV_IBUF_COUNT_DEBUG
3361
if (ibuf_count_get(space, page_no) > 0) {
3362
/* btr_print_tree(ibuf_data->index->tree, 100);
3366
if (UNIV_LIKELY(update_ibuf_bitmap)) {
3367
page_t* bitmap_page;
3369
bitmap_page = ibuf_bitmap_get_map_page(
3370
space, page_no, zip_size, &mtr);
3372
ibuf_bitmap_page_set_bits(
3373
bitmap_page, page_no, zip_size,
3374
IBUF_BITMAP_BUFFERED, FALSE, &mtr);
3377
ulint old_bits = ibuf_bitmap_page_get_bits(
3378
bitmap_page, page_no, zip_size,
3379
IBUF_BITMAP_FREE, &mtr);
3381
ulint new_bits = ibuf_index_page_calc_free(
3384
if (old_bits != new_bits) {
3385
ibuf_bitmap_page_set_bits(
3386
bitmap_page, page_no, zip_size,
3387
IBUF_BITMAP_FREE, new_bits, &mtr);
3393
btr_pcur_close(&pcur);
3394
mem_heap_free(heap);
3396
/* Protect our statistics keeping from race conditions */
3397
mutex_enter(&ibuf_mutex);
3400
ibuf->n_merged_recs += n_inserts;
3402
mutex_exit(&ibuf_mutex);
3404
if (update_ibuf_bitmap && !tablespace_being_deleted) {
3406
fil_decr_pending_ibuf_merges(space);
3411
#ifdef UNIV_IBUF_COUNT_DEBUG
3412
ut_a(ibuf_count_get(space, page_no) == 0);
3416
/*********************************************************************//**
3417
Deletes all entries in the insert buffer for a given space id. This is used
3418
in DISCARD TABLESPACE and IMPORT TABLESPACE.
3419
NOTE: this does not update the page free bitmaps in the space. The space will
3420
become CORRUPT when you call this function! */
3423
ibuf_delete_for_discarded_space(
3424
/*============================*/
3425
ulint space) /*!< in: space id */
3429
dtuple_t* search_tuple;
3436
heap = mem_heap_create(512);
3438
/* Use page number 0 to build the search tuple so that we get the
3439
cursor positioned at the first entry for this space id */
3441
search_tuple = ibuf_new_search_tuple_build(space, 0, heap);
3449
/* Position pcur in the insert buffer at the first entry for the
3451
btr_pcur_open_on_user_rec(
3452
ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
3455
if (!btr_pcur_is_on_user_rec(&pcur)) {
3456
ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
3462
ut_ad(btr_pcur_is_on_user_rec(&pcur));
3464
ibuf_rec = btr_pcur_get_rec(&pcur);
3466
/* Check if the entry is for this space */
3467
if (ibuf_rec_get_space(ibuf_rec) != space) {
3472
page_no = ibuf_rec_get_page_no(ibuf_rec);
3476
/* Delete the record from ibuf */
3477
closed = ibuf_delete_rec(space, page_no, &pcur, search_tuple,
3480
/* Deletion was pessimistic and mtr was committed:
3481
we start from the beginning again */
3488
if (btr_pcur_is_after_last_on_page(&pcur)) {
3490
btr_pcur_close(&pcur);
3500
btr_pcur_close(&pcur);
3502
/* Protect our statistics keeping from race conditions */
3503
mutex_enter(&ibuf_mutex);
3506
ibuf->n_merged_recs += n_inserts;
3508
mutex_exit(&ibuf_mutex);
3512
mem_heap_free(heap);
3515
/******************************************************************//**
3516
Looks if the insert buffer is empty.
3517
@return TRUE if empty */
3529
mutex_enter(&ibuf_mutex);
3533
root = ibuf_tree_root_get(&mtr);
3535
if (page_get_n_recs(root) == 0) {
3539
if (ibuf->empty == FALSE) {
3541
"InnoDB: Warning: insert buffer tree is empty"
3542
" but the data struct does not\n"
3543
"InnoDB: know it. This condition is legal"
3544
" if the master thread has not yet\n"
3545
"InnoDB: run to completion.\n");
3548
ut_a(ibuf->empty == FALSE);
3555
mutex_exit(&ibuf_mutex);
3562
/******************************************************************//**
3563
Prints info of ibuf. */
3568
FILE* file) /*!< in: file where to print */
3570
#ifdef UNIV_IBUF_COUNT_DEBUG
3575
mutex_enter(&ibuf_mutex);
3578
"Ibuf: size %lu, free list len %lu, seg size %lu,\n"
3579
"%lu inserts, %lu merged recs, %lu merges\n",
3581
(ulong) ibuf->free_list_len,
3582
(ulong) ibuf->seg_size,
3583
(ulong) ibuf->n_inserts,
3584
(ulong) ibuf->n_merged_recs,
3585
(ulong) ibuf->n_merges);
3586
#ifdef UNIV_IBUF_COUNT_DEBUG
3587
for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
3588
for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
3589
ulint count = ibuf_count_get(i, j);
3593
"Ibuf count for space/page %lu/%lu"
3595
(ulong) i, (ulong) j, (ulong) count);
3599
#endif /* UNIV_IBUF_COUNT_DEBUG */
3601
mutex_exit(&ibuf_mutex);
3603
#endif /* !UNIV_HOTBACKUP */