1
/******************************************************
6
Created 7/19/1997 Heikki Tuuri
7
*******************************************************/
12
#include "ibuf0ibuf.ic"
25
#include "sync0sync.h"
26
#include "dict0boot.h"
28
#include "lock0lock.h"
32
/* STRUCTURE OF AN INSERT BUFFER RECORD
36
1. The first field is the page number.
37
2. The second field is an array which stores type info for each subsequent
38
field. We store the information which affects the ordering of records, and
39
also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it
41
3. Next we have the fields of the actual index record.
45
Note that contary to what we planned in the 1990's, there will only be one
46
insert buffer tree, and that is in the system tablespace of InnoDB.
48
1. The first field is the space id.
49
2. The second field is a one-byte marker (0) which differentiates records from
50
the < 4.1.x storage format.
51
3. The third field is the page number.
52
4. The fourth field contains the type info, where we have also added 2 bytes to
53
store the charset. In the compressed table format of 5.0.x we must add more
54
information here so that we can build a dummy 'index' struct which 5.0.x
55
can use in the binary search on the index page in the ibuf merge phase.
56
5. The rest of the fields contain the fields of the actual index record.
60
The first byte of the fourth field is an additional marker (0) if the record
61
is in the compact format. The presence of this marker can be detected by
62
looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE.
64
The high-order bit of the character set field in the type info is the
65
"nullable" flag for the field. */
68
/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
70
If an OS thread performs any operation that brings in disk pages from
71
non-system tablespaces into the buffer pool, or creates such a page there,
72
then the operation may have as a side effect an insert buffer index tree
73
compression. Thus, the tree latch of the insert buffer tree may be acquired
74
in the x-mode, and also the file space latch of the system tablespace may
75
be acquired in the x-mode.
77
Also, an insert to an index in a non-system tablespace can have the same
78
effect. How do we know this cannot lead to a deadlock of OS threads? There
79
is a problem with the i\o-handler threads: they break the latching order
80
because they own x-latches to pages which are on a lower level than the
81
insert buffer tree latch, its page latches, and the tablespace latch an
82
insert buffer operation can reserve.
84
The solution is the following: Let all the tree and page latches connected
85
with the insert buffer be later in the latching order than the fsp latch and
88
Insert buffer pages must be such that the insert buffer is never invoked
89
when these pages are accessed as this would result in a recursion violating
90
the latching order. We let a special i/o-handler thread take care of i/o to
91
the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
92
pages and the first inode page, which contains the inode of the ibuf tree: let
93
us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead
94
access both non-ibuf and ibuf pages.
96
Then an i/o-handler for the insert buffer never needs to access recursively the
97
insert buffer tree and thus obeys the latching order. On the other hand, other
98
i/o-handlers for other tablespaces may require access to the insert buffer,
99
but because all kinds of latches they need to access there are later in the
100
latching order, no violation of the latching order occurs in this case,
103
A problem is how to grow and contract an insert buffer tree. As it is later
104
in the latching order than the fsp management, we have to reserve the fsp
105
latch first, before adding or removing pages from the insert buffer tree.
106
We let the insert buffer tree have its own file space management: a free
107
list of pages linked to the tree root. To prevent recursive using of the
108
insert buffer when adding pages to the tree, we must first load these pages
109
to memory, obtaining a latch on them, and only after that add them to the
110
free list of the insert buffer tree. More difficult is removing of pages
111
from the free list. If there is an excess of pages in the free list of the
112
ibuf tree, they might be needed if some thread reserves the fsp latch,
113
intending to allocate more file space. So we do the following: if a thread
114
reserves the fsp latch, we check the writer count field of the latch. If
115
this field has value 1, it means that the thread did not own the latch
116
before entering the fsp system, and the mtr of the thread contains no
117
modifications to the fsp pages. Now we are free to reserve the ibuf latch,
118
and check if there is an excess of pages in the free list. We can then, in a
119
separate mini-transaction, take them out of the free list and free them to
122
To avoid deadlocks in the ibuf system, we divide file pages into three levels:
125
(2) ibuf tree pages and the pages in the ibuf tree free list, and
126
(3) ibuf bitmap pages.
128
No OS thread is allowed to access higher level pages if it has latches to
129
lower level pages; even if the thread owns a B-tree latch it must not access
130
the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
131
is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
132
exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
133
level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
134
it uses synchronous aio, it can access any pages, as long as it obeys the
135
access order rules. */
137
/* Buffer pool size per the maximum insert buffer size */
138
#define IBUF_POOL_SIZE_PER_MAX_SIZE 2
140
/* The insert buffer control structure */
143
static ulint ibuf_rnd = 986058871;
145
ulint ibuf_flush_count = 0;
147
#ifdef UNIV_IBUF_DEBUG
148
/* Dimensions for the ibuf_count array */
149
#define IBUF_COUNT_N_SPACES 500
150
#define IBUF_COUNT_N_PAGES 2000
152
/* Buffered entry counts for file pages, used in debugging */
153
static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
155
/**********************************************************************
156
Checks that the indexes to ibuf_counts[][] are within limits. */
161
ulint space_id, /* in: space identifier */
162
ulint page_no) /* in: page number */
164
if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
169
"InnoDB: UNIV_IBUF_DEBUG limits space_id and page_no\n"
170
"InnoDB: and breaks crash recovery.\n"
171
"InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
172
"InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
173
(ulint) space_id, (ulint) IBUF_COUNT_N_SPACES,
174
(ulint) page_no, (ulint) IBUF_COUNT_N_PAGES);
179
/* The start address for an insert buffer bitmap page bitmap */
180
#define IBUF_BITMAP PAGE_DATA
182
/* Offsets in bits for the bits describing a single page in the bitmap */
183
#define IBUF_BITMAP_FREE 0
184
#define IBUF_BITMAP_BUFFERED 2
185
#define IBUF_BITMAP_IBUF 3 /* TRUE if page is a part of the ibuf
186
tree, excluding the root page, or is
187
in the free list of the ibuf */
189
/* Number of bits describing a single page */
190
#define IBUF_BITS_PER_PAGE 4
191
#if IBUF_BITS_PER_PAGE % 2
192
# error "IBUF_BITS_PER_PAGE must be an even number!"
195
/* The mutex used to block pessimistic inserts to ibuf trees */
196
static mutex_t ibuf_pessimistic_insert_mutex;
198
/* The mutex protecting the insert buffer structs */
199
static mutex_t ibuf_mutex;
201
/* The mutex protecting the insert buffer bitmaps */
202
static mutex_t ibuf_bitmap_mutex;
204
/* The area in pages from which contract looks for page numbers for merge */
205
#define IBUF_MERGE_AREA 8
207
/* Inside the merge area, pages which have at most 1 per this number less
208
buffered entries compared to maximum volume that can buffered for a single
209
page are merged along with the page whose buffer became full */
210
#define IBUF_MERGE_THRESHOLD 4
212
/* In ibuf_contract at most this number of pages is read to memory in one
213
batch, in order to merge the entries for them in the insert buffer */
214
#define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA
216
/* If the combined size of the ibuf trees exceeds ibuf->max_size by this
217
many pages, we start to contract it in connection to inserts there, using
218
non-synchronous contract */
219
#define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0
221
/* Same as above, but use synchronous contract */
222
#define IBUF_CONTRACT_ON_INSERT_SYNC 5
224
/* Same as above, but no insert is done, only contract is called */
225
#define IBUF_CONTRACT_DO_NOT_INSERT 10
227
/* TODO: how to cope with drop table if there are records in the insert
228
buffer for the indexes of the table? Is there actually any problem,
229
because ibuf merge is done to a page when it is read in, and it is
230
still physically like the index page even if the index would have been
231
dropped! So, there seems to be no problem. */
233
/**********************************************************************
234
Validates the ibuf data structures when the caller owns ibuf_mutex. */
237
ibuf_validate_low(void);
238
/*===================*/
239
/* out: TRUE if ok */
241
/**********************************************************************
242
Sets the flag in the current OS thread local storage denoting that it is
243
inside an insert buffer routine. */
251
ptr = thr_local_get_in_ibuf_field();
253
ut_ad(*ptr == FALSE);
258
/**********************************************************************
259
Sets the flag in the current OS thread local storage denoting that it is
260
exiting an insert buffer routine. */
268
ptr = thr_local_get_in_ibuf_field();
275
/**********************************************************************
276
Returns TRUE if the current OS thread is performing an insert buffer
282
/* out: TRUE if inside an insert buffer routine: for instance,
283
a read-ahead of non-ibuf pages is then forbidden */
285
return(*thr_local_get_in_ibuf_field());
288
/**********************************************************************
289
Gets the ibuf header page and x-latches it. */
292
ibuf_header_page_get(
293
/*=================*/
294
/* out: insert buffer header page */
295
ulint space, /* in: space id */
296
mtr_t* mtr) /* in: mtr */
302
ut_ad(!ibuf_inside());
304
page = buf_page_get(space, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
306
#ifdef UNIV_SYNC_DEBUG
307
buf_page_dbg_add_level(page, SYNC_IBUF_HEADER);
308
#endif /* UNIV_SYNC_DEBUG */
313
/**********************************************************************
314
Gets the root page and x-latches it. */
319
/* out: insert buffer tree root page */
320
ibuf_data_t* data, /* in: ibuf data */
321
ulint space, /* in: space id */
322
mtr_t* mtr) /* in: mtr */
327
ut_ad(ibuf_inside());
329
mtr_x_lock(dict_index_get_lock(data->index), mtr);
331
page = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH,
333
#ifdef UNIV_SYNC_DEBUG
334
buf_page_dbg_add_level(page, SYNC_TREE_NODE);
335
#endif /* UNIV_SYNC_DEBUG */
340
#ifdef UNIV_IBUF_DEBUG
341
/**********************************************************************
342
Gets the ibuf count for a given page. */
347
/* out: number of entries in the insert buffer
348
currently buffered for this page */
349
ulint space, /* in: space id */
350
ulint page_no)/* in: page number */
352
ibuf_count_check(space, page_no);
354
return(ibuf_counts[space][page_no]);
357
/**********************************************************************
358
Sets the ibuf count for a given page. */
363
ulint space, /* in: space id */
364
ulint page_no,/* in: page number */
365
ulint val) /* in: value to set */
367
ibuf_count_check(space, page_no);
368
ut_a(val < UNIV_PAGE_SIZE);
370
ibuf_counts[space][page_no] = val;
374
/**********************************************************************
375
Creates the insert buffer data structure at a database startup and initializes
376
the data structures for the insert buffer. */
379
ibuf_init_at_db_start(void)
380
/*=======================*/
382
ibuf = mem_alloc(sizeof(ibuf_t));
384
/* Note that also a pessimistic delete can sometimes make a B-tree
385
grow in size, as the references on the upper levels of the tree can
388
ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
389
/ IBUF_POOL_SIZE_PER_MAX_SIZE;
391
UT_LIST_INIT(ibuf->data_list);
395
mutex_create(&ibuf_pessimistic_insert_mutex,
396
SYNC_IBUF_PESS_INSERT_MUTEX);
398
mutex_create(&ibuf_mutex, SYNC_IBUF_MUTEX);
400
mutex_create(&ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
402
fil_ibuf_init_at_db_start();
405
/**********************************************************************
406
Updates the size information in an ibuf data, assuming the segment size has
410
ibuf_data_sizes_update(
411
/*===================*/
412
ibuf_data_t* data, /* in: ibuf data struct */
413
page_t* root, /* in: ibuf tree root */
414
mtr_t* mtr) /* in: mtr */
418
ut_ad(mutex_own(&ibuf_mutex));
420
old_size = data->size;
422
data->free_list_len = flst_get_len(root + PAGE_HEADER
423
+ PAGE_BTR_IBUF_FREE_LIST, mtr);
425
data->height = 1 + btr_page_get_level(root, mtr);
427
data->size = data->seg_size - (1 + data->free_list_len);
428
/* the '1 +' is the ibuf header page */
429
ut_ad(data->size < data->seg_size);
431
if (page_get_n_recs(root) == 0) {
438
ut_ad(ibuf->size + data->size >= old_size);
440
ibuf->size = ibuf->size + data->size - old_size;
443
fprintf(stderr, "ibuf size %lu, space ibuf size %lu\n",
444
ibuf->size, data->size);
448
/**********************************************************************
449
Creates the insert buffer data struct for a single tablespace. Reads the
450
root page of the insert buffer tree in the tablespace. This function can
451
be called only after the dictionary system has been initialized, as this
452
creates also the insert buffer table and index into this tablespace. */
455
ibuf_data_init_for_space(
456
/*=====================*/
457
/* out, own: ibuf data struct, linked to the list
458
in ibuf control structure */
459
ulint space) /* in: space id */
473
data = mem_alloc(sizeof(ibuf_data_t));
479
mutex_enter(&ibuf_mutex);
481
mtr_x_lock(fil_space_get_latch(space), &mtr);
483
header_page = ibuf_header_page_get(space, &mtr);
485
fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
491
data->seg_size = n_used;
493
root = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH,
495
#ifdef UNIV_SYNC_DEBUG
496
buf_page_dbg_add_level(root, SYNC_TREE_NODE);
497
#endif /* UNIV_SYNC_DEBUG */
502
data->n_merged_recs = 0;
504
ibuf_data_sizes_update(data, root, &mtr);
508
"InnoDB: index entries found in the insert buffer\n");
511
"InnoDB: insert buffer empty\n");
514
mutex_exit(&ibuf_mutex);
520
heap = mem_heap_create(450);
521
buf = mem_heap_alloc(heap, 50);
523
sprintf(buf, "SYS_IBUF_TABLE_%lu", (ulong) space);
524
/* use old-style record format for the insert buffer */
525
table = dict_mem_table_create(buf, space, 2, 0);
527
dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_BINARY, 0, 0);
528
dict_mem_table_add_col(table, heap, "TYPES", DATA_BINARY, 0, 0);
530
table->id = ut_dulint_add(DICT_IBUF_ID_MIN, space);
532
dict_table_add_to_cache(table, heap);
535
index = dict_mem_index_create(
536
buf, "CLUST_IND", space,
537
DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 2);
539
dict_mem_index_add_field(index, "PAGE_NO", 0);
540
dict_mem_index_add_field(index, "TYPES", 0);
542
index->id = ut_dulint_add(DICT_IBUF_ID_MIN, space);
544
dict_index_add_to_cache(table, index, FSP_IBUF_TREE_ROOT_PAGE_NO);
546
data->index = dict_table_get_first_index(table);
548
mutex_enter(&ibuf_mutex);
550
UT_LIST_ADD_LAST(data_list, ibuf->data_list, data);
552
mutex_exit(&ibuf_mutex);
557
/*************************************************************************
558
Initializes an ibuf bitmap page. */
561
ibuf_bitmap_page_init(
562
/*==================*/
563
page_t* page, /* in: bitmap page */
564
mtr_t* mtr) /* in: mtr */
569
/* Write all zeros to the bitmap */
571
bit_offset = XDES_DESCRIBED_PER_PAGE * IBUF_BITS_PER_PAGE;
573
byte_offset = bit_offset / 8 + 1;
574
/* better: byte_offset = UT_BITS_IN_BYTES(bit_offset); */
576
fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
578
memset(page + IBUF_BITMAP, 0, byte_offset);
580
/* The remaining area (up to the page trailer) is uninitialized. */
582
mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr);
585
/*************************************************************************
586
Parses a redo log record of an ibuf bitmap page init. */
589
ibuf_parse_bitmap_init(
590
/*===================*/
591
/* out: end of log record or NULL */
592
byte* ptr, /* in: buffer */
593
byte* end_ptr __attribute__((unused)), /* in: buffer end */
594
page_t* page, /* in: page or NULL */
595
mtr_t* mtr) /* in: mtr or NULL */
597
ut_ad(ptr && end_ptr);
600
ibuf_bitmap_page_init(page, mtr);
606
/************************************************************************
607
Gets the desired bits for a given page from a bitmap page. */
610
ibuf_bitmap_page_get_bits(
611
/*======================*/
612
/* out: value of bits */
613
page_t* page, /* in: bitmap page */
614
ulint page_no,/* in: page whose bits to get */
615
ulint bit, /* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
616
mtr_t* mtr __attribute__((unused))) /* in: mtr containing an
617
x-latch to the bitmap
625
ut_ad(bit < IBUF_BITS_PER_PAGE);
626
#if IBUF_BITS_PER_PAGE % 2
627
# error "IBUF_BITS_PER_PAGE % 2 != 0"
629
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
630
MTR_MEMO_PAGE_X_FIX));
632
bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE
635
byte_offset = bit_offset / 8;
636
bit_offset = bit_offset % 8;
638
ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
640
map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
642
value = ut_bit_get_nth(map_byte, bit_offset);
644
if (bit == IBUF_BITMAP_FREE) {
645
ut_ad(bit_offset + 1 < 8);
647
value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1);
653
/************************************************************************
654
Sets the desired bit for a given page in a bitmap page. */
657
ibuf_bitmap_page_set_bits(
658
/*======================*/
659
page_t* page, /* in: bitmap page */
660
ulint page_no,/* in: page whose bits to set */
661
ulint bit, /* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
662
ulint val, /* in: value to set */
663
mtr_t* mtr) /* in: mtr containing an x-latch to the bitmap page */
669
ut_ad(bit < IBUF_BITS_PER_PAGE);
670
#if IBUF_BITS_PER_PAGE % 2
671
# error "IBUF_BITS_PER_PAGE % 2 != 0"
673
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
674
MTR_MEMO_PAGE_X_FIX));
675
#ifdef UNIV_IBUF_DEBUG
676
ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
677
|| (0 == ibuf_count_get(buf_frame_get_space_id(page),
680
bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE
683
byte_offset = bit_offset / 8;
684
bit_offset = bit_offset % 8;
686
ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
688
map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
690
if (bit == IBUF_BITMAP_FREE) {
691
ut_ad(bit_offset + 1 < 8);
694
map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2);
695
map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2);
698
map_byte = ut_bit_set_nth(map_byte, bit_offset, val);
701
mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte,
705
/************************************************************************
706
Calculates the bitmap page number for a given page number. */
709
ibuf_bitmap_page_no_calc(
710
/*=====================*/
711
/* out: the bitmap page number where
712
the file page is mapped */
713
ulint page_no) /* in: tablespace page number */
715
return(FSP_IBUF_BITMAP_OFFSET
716
+ XDES_DESCRIBED_PER_PAGE
717
* (page_no / XDES_DESCRIBED_PER_PAGE));
720
/************************************************************************
721
Gets the ibuf bitmap page where the bits describing a given file page are
725
ibuf_bitmap_get_map_page(
726
/*=====================*/
727
/* out: bitmap page where the file page is mapped,
728
that is, the bitmap page containing the descriptor
729
bits for the file page; the bitmap page is
731
ulint space, /* in: space id of the file page */
732
ulint page_no,/* in: page number of the file page */
733
mtr_t* mtr) /* in: mtr */
737
page = buf_page_get(space, ibuf_bitmap_page_no_calc(page_no),
739
#ifdef UNIV_SYNC_DEBUG
740
buf_page_dbg_add_level(page, SYNC_IBUF_BITMAP);
741
#endif /* UNIV_SYNC_DEBUG */
746
/****************************************************************************
747
Sets the free bits of the page in the ibuf bitmap. This is done in a separate
748
mini-transaction, hence this operation does not restrict further work to only
749
ibuf bitmap operations, which would result if the latch to the bitmap page
753
ibuf_set_free_bits_low(
754
/*===================*/
755
ulint type, /* in: index type */
756
page_t* page, /* in: index page; free bit is set if the index is
757
non-clustered and page level is 0 */
758
ulint val, /* in: value to set: < 4 */
759
mtr_t* mtr) /* in: mtr */
763
if (type & DICT_CLUSTERED) {
768
if (btr_page_get_level_low(page) != 0) {
773
bitmap_page = ibuf_bitmap_get_map_page(
774
buf_frame_get_space_id(page),
775
buf_frame_get_page_no(page), mtr);
776
#ifdef UNIV_IBUF_DEBUG
779
"Setting page no %lu free bits to %lu should be %lu\n",
780
buf_frame_get_page_no(page), val,
781
ibuf_index_page_calc_free(page));
784
ut_a(val <= ibuf_index_page_calc_free(page));
785
#endif /* UNIV_IBUF_DEBUG */
786
ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page),
787
IBUF_BITMAP_FREE, val, mtr);
791
/****************************************************************************
792
Sets the free bit of the page in the ibuf bitmap. This is done in a separate
793
mini-transaction, hence this operation does not restrict further work to only
794
ibuf bitmap operations, which would result if the latch to the bitmap page
800
ulint type, /* in: index type */
801
page_t* page, /* in: index page; free bit is set if the index is
802
non-clustered and page level is 0 */
803
ulint val, /* in: value to set: < 4 */
804
ulint max_val)/* in: ULINT_UNDEFINED or a maximum value which
805
the bits must have before setting; this is for
811
if (type & DICT_CLUSTERED) {
816
if (btr_page_get_level_low(page) != 0) {
823
bitmap_page = ibuf_bitmap_get_map_page(
824
buf_frame_get_space_id(page), buf_frame_get_page_no(page),
827
if (max_val != ULINT_UNDEFINED) {
828
#ifdef UNIV_IBUF_DEBUG
831
old_val = ibuf_bitmap_page_get_bits(
832
bitmap_page, buf_frame_get_page_no(page),
833
IBUF_BITMAP_FREE, &mtr);
835
if (old_val != max_val) {
837
"Ibuf: page %lu old val %lu max val %lu\n",
838
buf_frame_get_page_no(page),
843
ut_a(old_val <= max_val);
846
#ifdef UNIV_IBUF_DEBUG
848
fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
849
buf_frame_get_page_no(page), val,
850
ibuf_index_page_calc_free(page));
853
ut_a(val <= ibuf_index_page_calc_free(page));
855
ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page),
856
IBUF_BITMAP_FREE, val, &mtr);
860
/****************************************************************************
861
Resets the free bits of the page in the ibuf bitmap. This is done in a
862
separate mini-transaction, hence this operation does not restrict further
863
work to only ibuf bitmap operations, which would result if the latch to the
864
bitmap page were kept. */
867
ibuf_reset_free_bits_with_type(
868
/*===========================*/
869
ulint type, /* in: index type */
870
page_t* page) /* in: index page; free bits are set to 0 if the index
871
is non-clustered and non-unique and the page level is
874
ibuf_set_free_bits(type, page, 0, ULINT_UNDEFINED);
877
/****************************************************************************
878
Resets the free bits of the page in the ibuf bitmap. This is done in a
879
separate mini-transaction, hence this operation does not restrict further
880
work to solely ibuf bitmap operations, which would result if the latch to
881
the bitmap page were kept. */
884
ibuf_reset_free_bits(
885
/*=================*/
886
dict_index_t* index, /* in: index */
887
page_t* page) /* in: index page; free bits are set to 0 if
888
the index is non-clustered and non-unique and
889
the page level is 0 */
891
ibuf_set_free_bits(index->type, page, 0, ULINT_UNDEFINED);
894
/**************************************************************************
895
Updates the free bits for a page to reflect the present state. Does this
896
in the mtr given, which means that the latching order rules virtually prevent
897
any further operations for this OS thread until mtr is committed. */
900
ibuf_update_free_bits_low(
901
/*======================*/
902
dict_index_t* index, /* in: index */
903
page_t* page, /* in: index page */
904
ulint max_ins_size, /* in: value of maximum insert size
905
with reorganize before the latest
906
operation performed to the page */
907
mtr_t* mtr) /* in: mtr */
912
before = ibuf_index_page_calc_free_bits(max_ins_size);
914
after = ibuf_index_page_calc_free(page);
916
if (before != after) {
917
ibuf_set_free_bits_low(index->type, page, after, mtr);
921
/**************************************************************************
922
Updates the free bits for the two pages to reflect the present state. Does
923
this in the mtr given, which means that the latching order rules virtually
924
prevent any further operations until mtr is committed. */
927
ibuf_update_free_bits_for_two_pages_low(
928
/*====================================*/
929
dict_index_t* index, /* in: index */
930
page_t* page1, /* in: index page */
931
page_t* page2, /* in: index page */
932
mtr_t* mtr) /* in: mtr */
936
/* As we have to x-latch two random bitmap pages, we have to acquire
937
the bitmap mutex to prevent a deadlock with a similar operation
938
performed by another OS thread. */
940
mutex_enter(&ibuf_bitmap_mutex);
942
state = ibuf_index_page_calc_free(page1);
944
ibuf_set_free_bits_low(index->type, page1, state, mtr);
946
state = ibuf_index_page_calc_free(page2);
948
ibuf_set_free_bits_low(index->type, page2, state, mtr);
950
mutex_exit(&ibuf_bitmap_mutex);
953
/**************************************************************************
954
Returns TRUE if the page is one of the fixed address ibuf pages. */
957
ibuf_fixed_addr_page(
958
/*=================*/
959
/* out: TRUE if a fixed address ibuf i/o page */
960
ulint space, /* in: space id */
961
ulint page_no)/* in: page number */
963
return((space == 0 && page_no == IBUF_TREE_ROOT_PAGE_NO)
964
|| ibuf_bitmap_page(page_no));
967
/***************************************************************************
968
Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
973
/* out: TRUE if level 2 or level 3 page */
974
ulint space, /* in: space id */
975
ulint page_no)/* in: page number */
981
if (recv_no_ibuf_operations) {
982
/* Recovery is running: no ibuf operations should be
988
if (ibuf_fixed_addr_page(space, page_no)) {
994
/* Currently we only have an ibuf tree in space 0 */
999
ut_ad(fil_space_get_type(space) == FIL_TABLESPACE);
1003
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
1005
ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
1012
/***************************************************************************
1013
Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
1018
/* out: TRUE if level 2 or level 3 page */
1019
ulint space, /* in: space id */
1020
ulint page_no,/* in: page number */
1021
mtr_t* mtr) /* in: mtr which will contain an x-latch to the
1022
bitmap page if the page is not one of the fixed
1023
address ibuf pages */
1025
page_t* bitmap_page;
1028
if (ibuf_fixed_addr_page(space, page_no)) {
1033
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, mtr);
1035
ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
1040
/************************************************************************
1041
Returns the page number field of an ibuf record. */
1044
ibuf_rec_get_page_no(
1045
/*=================*/
1046
/* out: page number */
1047
rec_t* rec) /* in: ibuf record */
1052
ut_ad(ibuf_inside());
1053
ut_ad(rec_get_n_fields_old(rec) > 2);
1055
field = rec_get_nth_field_old(rec, 1, &len);
1058
/* This is of the >= 4.1.x record format */
1059
ut_a(trx_sys_multiple_tablespace_format);
1061
field = rec_get_nth_field_old(rec, 2, &len);
1063
ut_a(trx_doublewrite_must_reset_space_ids);
1064
ut_a(!trx_sys_multiple_tablespace_format);
1066
field = rec_get_nth_field_old(rec, 0, &len);
1071
return(mach_read_from_4(field));
1074
/************************************************************************
1075
Returns the space id field of an ibuf record. For < 4.1.x format records
1082
rec_t* rec) /* in: ibuf record */
1087
ut_ad(ibuf_inside());
1088
ut_ad(rec_get_n_fields_old(rec) > 2);
1090
field = rec_get_nth_field_old(rec, 1, &len);
1093
/* This is of the >= 4.1.x record format */
1095
ut_a(trx_sys_multiple_tablespace_format);
1096
field = rec_get_nth_field_old(rec, 0, &len);
1099
return(mach_read_from_4(field));
1102
ut_a(trx_doublewrite_must_reset_space_ids);
1103
ut_a(!trx_sys_multiple_tablespace_format);
1108
/************************************************************************
1109
Creates a dummy index for inserting a record to a non-clustered index.
1113
ibuf_dummy_index_create(
1114
/*====================*/
1115
/* out: dummy index */
1116
ulint n, /* in: number of fields */
1117
ibool comp) /* in: TRUE=use compact record format */
1119
dict_table_t* table;
1120
dict_index_t* index;
1122
table = dict_mem_table_create("IBUF_DUMMY",
1124
comp ? DICT_TF_COMPACT : 0);
1126
index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY",
1127
DICT_HDR_SPACE, 0, n);
1129
index->table = table;
1131
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
1132
index->cached = TRUE;
1136
/************************************************************************
1137
Add a column to the dummy index */
1140
ibuf_dummy_index_add_col(
1141
/*=====================*/
1142
dict_index_t* index, /* in: dummy index */
1143
dtype_t* type, /* in: the data type of the column */
1144
ulint len) /* in: length of the column */
1146
ulint i = index->table->n_def;
1147
dict_mem_table_add_col(index->table, NULL, NULL,
1148
dtype_get_mtype(type),
1149
dtype_get_prtype(type),
1150
dtype_get_len(type));
1151
dict_index_add_col(index, index->table, (dict_col_t*)
1152
dict_table_get_nth_col(index->table, i), len);
1154
/************************************************************************
1155
Deallocates a dummy index for inserting a record to a non-clustered index.
1159
ibuf_dummy_index_free(
1160
/*==================*/
1161
dict_index_t* index) /* in: dummy index */
1163
dict_table_t* table = index->table;
1165
dict_mem_index_free(index);
1166
dict_mem_table_free(table);
1169
/*************************************************************************
1170
Builds the entry to insert into a non-clustered index when we have the
1171
corresponding record in an ibuf index. */
1174
ibuf_build_entry_from_ibuf_rec(
1175
/*===========================*/
1176
/* out, own: entry to insert to
1177
a non-clustered index; NOTE that
1178
as we copy pointers to fields in
1179
ibuf_rec, the caller must hold a
1180
latch to the ibuf_rec page as long
1181
as the entry is used! */
1182
rec_t* ibuf_rec, /* in: record in an insert buffer */
1183
mem_heap_t* heap, /* in: heap where built */
1184
dict_index_t** pindex) /* out, own: dummy index that
1185
describes the entry */
1194
dict_index_t* index;
1196
data = rec_get_nth_field_old(ibuf_rec, 1, &len);
1199
/* This a < 4.1.x format record */
1201
ut_a(trx_doublewrite_must_reset_space_ids);
1202
ut_a(!trx_sys_multiple_tablespace_format);
1204
n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
1205
tuple = dtuple_create(heap, n_fields);
1206
types = rec_get_nth_field_old(ibuf_rec, 1, &len);
1208
ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1210
for (i = 0; i < n_fields; i++) {
1211
field = dtuple_get_nth_field(tuple, i);
1213
data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
1215
dfield_set_data(field, data, len);
1217
dtype_read_for_order_and_null_size(
1218
dfield_get_type(field),
1219
types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1222
*pindex = ibuf_dummy_index_create(n_fields, FALSE);
1226
/* This a >= 4.1.x format record */
1228
ut_a(trx_sys_multiple_tablespace_format);
1230
ut_a(rec_get_n_fields_old(ibuf_rec) > 4);
1232
n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
1234
tuple = dtuple_create(heap, n_fields);
1236
types = rec_get_nth_field_old(ibuf_rec, 3, &len);
1238
ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1);
1239
index = ibuf_dummy_index_create(
1240
n_fields, len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1242
if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
1243
/* compact record format */
1249
ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1251
for (i = 0; i < n_fields; i++) {
1252
field = dtuple_get_nth_field(tuple, i);
1254
data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
1256
dfield_set_data(field, data, len);
1258
dtype_new_read_for_order_and_null_size(
1259
dfield_get_type(field),
1260
types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1262
ibuf_dummy_index_add_col(index, dfield_get_type(field), len);
1269
/************************************************************************
1270
Returns the space taken by a stored non-clustered index entry if converted to
1274
ibuf_rec_get_volume(
1275
/*================*/
1276
/* out: size of index record in bytes + an upper
1277
limit of the space taken in the page directory */
1278
rec_t* ibuf_rec)/* in: ibuf record */
1281
ibool new_format = FALSE;
1282
ulint data_size = 0;
1289
ut_ad(ibuf_inside());
1290
ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
1292
data = rec_get_nth_field_old(ibuf_rec, 1, &len);
1295
/* < 4.1.x format record */
1297
ut_a(trx_doublewrite_must_reset_space_ids);
1298
ut_a(!trx_sys_multiple_tablespace_format);
1300
n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
1302
types = rec_get_nth_field_old(ibuf_rec, 1, &len);
1304
ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1306
/* >= 4.1.x format record */
1308
ut_a(trx_sys_multiple_tablespace_format);
1311
types = rec_get_nth_field_old(ibuf_rec, 3, &len);
1313
ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1);
1314
if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
1315
/* compact record format */
1317
dict_index_t* dummy_index;
1318
mem_heap_t* heap = mem_heap_create(500);
1319
dtuple_t* entry = ibuf_build_entry_from_ibuf_rec(
1320
ibuf_rec, heap, &dummy_index);
1321
volume = rec_get_converted_size(dummy_index, entry);
1322
ibuf_dummy_index_free(dummy_index);
1323
mem_heap_free(heap);
1324
return(volume + page_dir_calc_reserved_space(1));
1327
n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
1332
for (i = 0; i < n_fields; i++) {
1334
data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
1336
dtype_new_read_for_order_and_null_size(
1338
* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1340
data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
1342
dtype_read_for_order_and_null_size(
1344
* DATA_ORDER_NULL_TYPE_BUF_SIZE);
1347
if (len == UNIV_SQL_NULL) {
1348
data_size += dtype_get_sql_null_size(&dtype);
1354
return(data_size + rec_get_converted_extra_size(data_size, n_fields)
1355
+ page_dir_calc_reserved_space(1));
1358
/*************************************************************************
1359
Builds the tuple to insert to an ibuf tree when we have an entry for a
1360
non-clustered index. */
1365
/* out, own: entry to insert into an ibuf
1366
index tree; NOTE that the original entry
1367
must be kept because we copy pointers to its
1369
dict_index_t* index, /* in: non-clustered index */
1370
dtuple_t* entry, /* in: entry for a non-clustered index */
1371
ulint space, /* in: space id */
1372
ulint page_no,/* in: index page number where entry should
1374
mem_heap_t* heap) /* in: heap into which to build */
1378
dfield_t* entry_field;
1384
/* Starting from 4.1.x, we have to build a tuple whose
1385
(1) first field is the space id,
1386
(2) the second field a single marker byte (0) to tell that this
1387
is a new format record,
1388
(3) the third contains the page number, and
1389
(4) the fourth contains the relevent type information of each data
1390
field; the length of this field % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE is
1391
(a) 0 for b-trees in the old format, and
1392
(b) 1 for b-trees in the compact format, the first byte of the field
1393
being the marker (0);
1394
(5) and the rest of the fields are copied from entry. All fields
1395
in the tuple are ordered like the type binary in our insert buffer
1398
n_fields = dtuple_get_n_fields(entry);
1400
tuple = dtuple_create(heap, n_fields + 4);
1402
/* Store the space id in tuple */
1404
field = dtuple_get_nth_field(tuple, 0);
1406
buf = mem_heap_alloc(heap, 4);
1408
mach_write_to_4(buf, space);
1410
dfield_set_data(field, buf, 4);
1412
/* Store the marker byte field in tuple */
1414
field = dtuple_get_nth_field(tuple, 1);
1416
buf = mem_heap_alloc(heap, 1);
1418
/* We set the marker byte zero */
1420
mach_write_to_1(buf, 0);
1422
dfield_set_data(field, buf, 1);
1424
/* Store the page number in tuple */
1426
field = dtuple_get_nth_field(tuple, 2);
1428
buf = mem_heap_alloc(heap, 4);
1430
mach_write_to_4(buf, page_no);
1432
dfield_set_data(field, buf, 4);
1434
/* Store the type info in buf2, and add the fields from entry to
1436
buf2 = mem_heap_alloc(heap, n_fields
1437
* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
1438
+ dict_table_is_comp(index->table));
1439
if (dict_table_is_comp(index->table)) {
1440
*buf2++ = 0; /* write the compact format indicator */
1442
for (i = 0; i < n_fields; i++) {
1444
const dict_field_t* ifield;
1446
/* We add 4 below because we have the 4 extra fields at the
1447
start of an ibuf record */
1449
field = dtuple_get_nth_field(tuple, i + 4);
1450
entry_field = dtuple_get_nth_field(entry, i);
1451
dfield_copy(field, entry_field);
1453
ifield = dict_index_get_nth_field(index, i);
1454
/* Prefix index columns of fixed-length columns are of
1455
fixed length. However, in the function call below,
1456
dfield_get_type(entry_field) contains the fixed length
1457
of the column in the clustered index. Replace it with
1458
the fixed length of the secondary index column. */
1459
fixed_len = ifield->fixed_len;
1463
/* dict_index_add_col() should guarantee these */
1464
ut_ad(fixed_len <= (ulint) entry_field->type.len);
1465
if (ifield->prefix_len) {
1466
ut_ad(ifield->prefix_len == fixed_len);
1469
== (ulint) entry_field->type.len);
1472
#endif /* UNIV_DEBUG */
1474
dtype_new_store_for_order_and_null_size(
1475
buf2 + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
1476
dfield_get_type(entry_field), fixed_len);
1479
/* Store the type info in buf2 to field 3 of tuple */
1481
field = dtuple_get_nth_field(tuple, 3);
1483
if (dict_table_is_comp(index->table)) {
1487
dfield_set_data(field, buf2, n_fields
1488
* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
1489
+ dict_table_is_comp(index->table));
1490
/* Set all the types in the new tuple binary */
1492
dtuple_set_types_binary(tuple, n_fields + 4);
1497
/*************************************************************************
1498
Builds a search tuple used to search buffered inserts for an index page.
1499
This is for < 4.1.x format records */
1502
ibuf_search_tuple_build(
1503
/*====================*/
1504
/* out, own: search tuple */
1505
ulint space, /* in: space id */
1506
ulint page_no,/* in: index page number */
1507
mem_heap_t* heap) /* in: heap into which to build */
1514
ut_a(trx_doublewrite_must_reset_space_ids);
1515
ut_a(!trx_sys_multiple_tablespace_format);
1517
tuple = dtuple_create(heap, 1);
1519
/* Store the page number in tuple */
1521
field = dtuple_get_nth_field(tuple, 0);
1523
buf = mem_heap_alloc(heap, 4);
1525
mach_write_to_4(buf, page_no);
1527
dfield_set_data(field, buf, 4);
1529
dtuple_set_types_binary(tuple, 1);
1534
/*************************************************************************
1535
Builds a search tuple used to search buffered inserts for an index page.
1536
This is for >= 4.1.x format records. */
1539
ibuf_new_search_tuple_build(
1540
/*========================*/
1541
/* out, own: search tuple */
1542
ulint space, /* in: space id */
1543
ulint page_no,/* in: index page number */
1544
mem_heap_t* heap) /* in: heap into which to build */
1550
ut_a(trx_sys_multiple_tablespace_format);
1552
tuple = dtuple_create(heap, 3);
1554
/* Store the space id in tuple */
1556
field = dtuple_get_nth_field(tuple, 0);
1558
buf = mem_heap_alloc(heap, 4);
1560
mach_write_to_4(buf, space);
1562
dfield_set_data(field, buf, 4);
1564
/* Store the new format record marker byte */
1566
field = dtuple_get_nth_field(tuple, 1);
1568
buf = mem_heap_alloc(heap, 1);
1570
mach_write_to_1(buf, 0);
1572
dfield_set_data(field, buf, 1);
1574
/* Store the page number in tuple */
1576
field = dtuple_get_nth_field(tuple, 2);
1578
buf = mem_heap_alloc(heap, 4);
1580
mach_write_to_4(buf, page_no);
1582
dfield_set_data(field, buf, 4);
1584
dtuple_set_types_binary(tuple, 3);
1589
/*************************************************************************
1590
Checks if there are enough pages in the free list of the ibuf tree that we
1591
dare to start a pessimistic insert to the insert buffer. */
1594
ibuf_data_enough_free_for_insert(
1595
/*=============================*/
1596
/* out: TRUE if enough free pages in list */
1597
ibuf_data_t* data) /* in: ibuf data for the space */
1599
ut_ad(mutex_own(&ibuf_mutex));
1601
/* We want a big margin of free pages, because a B-tree can sometimes
1602
grow in size also if records are deleted from it, as the node pointers
1603
can change, and we must make sure that we are able to delete the
1604
inserts buffered for pages that we read to the buffer pool, without
1605
any risk of running out of free space in the insert buffer. */
1607
if (data->free_list_len >= data->size / 2 + 3 * data->height) {
1615
/*************************************************************************
1616
Checks if there are enough pages in the free list of the ibuf tree that we
1617
should remove them and free to the file space management. */
1620
ibuf_data_too_much_free(
1621
/*====================*/
1622
/* out: TRUE if enough free pages in list */
1623
ibuf_data_t* data) /* in: ibuf data for the space */
1625
ut_ad(mutex_own(&ibuf_mutex));
1627
return(data->free_list_len >= 3 + data->size / 2 + 3 * data->height);
1630
/*************************************************************************
1631
Allocates a new page from the ibuf file segment and adds it to the free
1637
/* out: DB_SUCCESS, or DB_STRONG_FAIL
1639
ulint space, /* in: space id */
1640
ibuf_data_t* ibuf_data) /* in: ibuf data for the space */
1643
page_t* header_page;
1647
page_t* bitmap_page;
1653
/* Acquire the fsp latch before the ibuf header, obeying the latching
1655
mtr_x_lock(fil_space_get_latch(space), &mtr);
1657
header_page = ibuf_header_page_get(space, &mtr);
1659
/* Allocate a new page: NOTE that if the page has been a part of a
1660
non-clustered index which has subsequently been dropped, then the
1661
page may have buffered inserts in the insert buffer, and these
1662
should be deleted from there. These get deleted when the page
1663
allocation creates the page in buffer. Thus the call below may end
1664
up calling the insert buffer routines and, as we yet have no latches
1665
to insert buffer tree pages, these routines can run without a risk
1666
of a deadlock. This is the reason why we created a special ibuf
1667
header page apart from the ibuf tree. */
1669
page_no = fseg_alloc_free_page(header_page + IBUF_HEADER
1670
+ IBUF_TREE_SEG_HEADER, 0, FSP_UP,
1672
if (page_no == FIL_NULL) {
1675
return(DB_STRONG_FAIL);
1678
page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
1680
#ifdef UNIV_SYNC_DEBUG
1681
buf_page_dbg_add_level(page, SYNC_TREE_NODE_NEW);
1682
#endif /* UNIV_SYNC_DEBUG */
1686
mutex_enter(&ibuf_mutex);
1688
root = ibuf_tree_root_get(ibuf_data, space, &mtr);
1690
/* Add the page to the free list and update the ibuf size data */
1692
flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1693
page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
1695
mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST,
1698
ibuf_data->seg_size++;
1699
ibuf_data->free_list_len++;
1701
/* Set the bit indicating that this page is now an ibuf tree page
1704
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
1706
ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
1710
mutex_exit(&ibuf_mutex);
1717
/*************************************************************************
1718
Removes a page from the free list and frees it to the fsp system. */
1721
ibuf_remove_free_page(
1722
/*==================*/
1723
ulint space, /* in: space id */
1724
ibuf_data_t* ibuf_data) /* in: ibuf data for the space */
1728
page_t* header_page;
1732
page_t* bitmap_page;
1738
/* Acquire the fsp latch before the ibuf header, obeying the latching
1740
mtr_x_lock(fil_space_get_latch(space), &mtr);
1742
header_page = ibuf_header_page_get(space, &mtr);
1744
/* Prevent pessimistic inserts to insert buffer trees for a while */
1745
mutex_enter(&ibuf_pessimistic_insert_mutex);
1749
mutex_enter(&ibuf_mutex);
1751
if (!ibuf_data_too_much_free(ibuf_data)) {
1753
mutex_exit(&ibuf_mutex);
1757
mutex_exit(&ibuf_pessimistic_insert_mutex);
1766
root = ibuf_tree_root_get(ibuf_data, space, &mtr2);
1768
page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1772
/* NOTE that we must release the latch on the ibuf tree root
1773
because in fseg_free_page we access level 1 pages, and the root
1774
is a level 2 page. */
1777
mutex_exit(&ibuf_mutex);
1781
/* Since pessimistic inserts were prevented, we know that the
1782
page is still in the free list. NOTE that also deletes may take
1783
pages from the free list, but they take them from the start, and
1784
the free list was so long that they cannot have taken the last
1787
fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
1788
space, page_no, &mtr);
1789
#ifdef UNIV_DEBUG_FILE_ACCESSES
1790
buf_page_reset_file_page_was_freed(space, page_no);
1794
mutex_enter(&ibuf_mutex);
1796
root = ibuf_tree_root_get(ibuf_data, space, &mtr);
1798
ut_ad(page_no == flst_get_last(root + PAGE_HEADER
1799
+ PAGE_BTR_IBUF_FREE_LIST, &mtr)
1802
page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
1804
#ifdef UNIV_SYNC_DEBUG
1805
buf_page_dbg_add_level(page, SYNC_TREE_NODE);
1806
#endif /* UNIV_SYNC_DEBUG */
1808
/* Remove the page from the free list and update the ibuf size data */
1810
flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1811
page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
1813
ibuf_data->seg_size--;
1814
ibuf_data->free_list_len--;
1816
mutex_exit(&ibuf_pessimistic_insert_mutex);
1818
/* Set the bit indicating that this page is no more an ibuf tree page
1821
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
1823
ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
1825
#ifdef UNIV_DEBUG_FILE_ACCESSES
1826
buf_page_set_file_page_was_freed(space, page_no);
1830
mutex_exit(&ibuf_mutex);
1835
/***************************************************************************
1836
Frees excess pages from the ibuf free list. This function is called when an OS
1837
thread calls fsp services to allocate a new file segment, or a new page to a
1838
file segment, and the thread did not own the fsp latch before this call. */
1841
ibuf_free_excess_pages(
1842
/*===================*/
1843
ulint space) /* in: space id */
1845
ibuf_data_t* ibuf_data;
1850
"InnoDB: Error: calling ibuf_free_excess_pages"
1851
" for space %lu\n", (ulong) space);
1855
#ifdef UNIV_SYNC_DEBUG
1856
ut_ad(rw_lock_own(fil_space_get_latch(space), RW_LOCK_EX));
1857
#endif /* UNIV_SYNC_DEBUG */
1858
ut_ad(rw_lock_get_x_lock_count(fil_space_get_latch(space)) == 1);
1859
ut_ad(!ibuf_inside());
1861
/* NOTE: We require that the thread did not own the latch before,
1862
because then we know that we can obey the correct latching order
1865
ibuf_data = fil_space_get_ibuf_data(space);
1867
if (ibuf_data == NULL) {
1868
/* Not yet initialized */
1870
#if 0 /* defined UNIV_DEBUG */
1872
"Ibuf for space %lu not yet initialized\n", space);
1878
/* Free at most a few pages at a time, so that we do not delay the
1879
requested service too much */
1881
for (i = 0; i < 4; i++) {
1883
mutex_enter(&ibuf_mutex);
1885
if (!ibuf_data_too_much_free(ibuf_data)) {
1887
mutex_exit(&ibuf_mutex);
1892
mutex_exit(&ibuf_mutex);
1894
ibuf_remove_free_page(space, ibuf_data);
1898
/*************************************************************************
1899
Reads page numbers from a leaf in an ibuf tree. */
1902
ibuf_get_merge_page_nos(
1903
/*====================*/
1904
/* out: a lower limit for the combined volume
1905
of records which will be merged */
1906
ibool contract,/* in: TRUE if this function is called to
1907
contract the tree, FALSE if this is called
1908
when a single page becomes full and we look
1909
if it pays to read also nearby pages */
1910
rec_t* rec, /* in: record from which we read up and down
1911
in the chain of records */
1912
ulint* space_ids,/* in/out: space id's of the pages */
1913
ib_longlong* space_versions,/* in/out: tablespace version
1914
timestamps; used to prevent reading in old
1915
pages after DISCARD + IMPORT tablespace */
1916
ulint* page_nos,/* in/out: buffer for at least
1917
IBUF_MAX_N_PAGES_MERGED many page numbers;
1918
the page numbers are in an ascending order */
1919
ulint* n_stored)/* out: number of page numbers stored to
1920
page_nos in this function */
1923
ulint prev_space_id;
1924
ulint first_page_no;
1925
ulint first_space_id;
1929
ulint volume_for_page;
1936
limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool->curr_size / 4);
1938
if (page_rec_is_supremum(rec)) {
1940
rec = page_rec_get_prev(rec);
1943
if (page_rec_is_infimum(rec)) {
1945
rec = page_rec_get_next(rec);
1948
if (page_rec_is_supremum(rec)) {
1953
first_page_no = ibuf_rec_get_page_no(rec);
1954
first_space_id = ibuf_rec_get_space(rec);
1959
/* Go backwards from the first rec until we reach the border of the
1960
'merge area', or the page start or the limit of storeable pages is
1963
while (!page_rec_is_infimum(rec) && UNIV_LIKELY(n_pages < limit)) {
1965
rec_page_no = ibuf_rec_get_page_no(rec);
1966
rec_space_id = ibuf_rec_get_space(rec);
1968
if (rec_space_id != first_space_id
1969
|| rec_page_no / IBUF_MERGE_AREA
1970
!= first_page_no / IBUF_MERGE_AREA) {
1975
if (rec_page_no != prev_page_no
1976
|| rec_space_id != prev_space_id) {
1980
prev_page_no = rec_page_no;
1981
prev_space_id = rec_space_id;
1983
rec = page_rec_get_prev(rec);
1986
rec = page_rec_get_next(rec);
1988
/* At the loop start there is no prev page; we mark this with a pair
1989
of space id, page no (0, 0) for which there can never be entries in
1990
the insert buffer */
1995
volume_for_page = 0;
1997
while (*n_stored < limit) {
1998
if (page_rec_is_supremum(rec)) {
1999
/* When no more records available, mark this with
2000
another 'impossible' pair of space id, page no */
2004
rec_page_no = ibuf_rec_get_page_no(rec);
2005
rec_space_id = ibuf_rec_get_space(rec);
2006
ut_ad(rec_page_no > IBUF_TREE_ROOT_PAGE_NO);
2009
#ifdef UNIV_IBUF_DEBUG
2010
ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED);
2012
if ((rec_space_id != prev_space_id
2013
|| rec_page_no != prev_page_no)
2014
&& (prev_space_id != 0 || prev_page_no != 0)) {
2016
if ((prev_page_no == first_page_no
2017
&& prev_space_id == first_space_id)
2020
> ((IBUF_MERGE_THRESHOLD - 1)
2021
* 4 * UNIV_PAGE_SIZE
2022
/ IBUF_PAGE_SIZE_PER_FREE_SPACE)
2023
/ IBUF_MERGE_THRESHOLD)) {
2025
space_ids[*n_stored] = prev_space_id;
2026
space_versions[*n_stored]
2027
= fil_space_get_version(prev_space_id);
2028
page_nos[*n_stored] = prev_page_no;
2032
sum_volumes += volume_for_page;
2035
if (rec_space_id != first_space_id
2036
|| rec_page_no / IBUF_MERGE_AREA
2037
!= first_page_no / IBUF_MERGE_AREA) {
2042
volume_for_page = 0;
2045
if (rec_page_no == 1 && rec_space_id == 0) {
2046
/* Supremum record */
2051
rec_volume = ibuf_rec_get_volume(rec);
2053
volume_for_page += rec_volume;
2055
prev_page_no = rec_page_no;
2056
prev_space_id = rec_space_id;
2058
rec = page_rec_get_next(rec);
2061
#ifdef UNIV_IBUF_DEBUG
2062
ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED);
2065
fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n",
2066
*n_stored, sum_volumes);
2068
return(sum_volumes);
2071
/*************************************************************************
2072
Contracts insert buffer trees by reading pages to the buffer pool. */
2077
/* out: a lower limit for the combined size in bytes
2078
of entries which will be merged from ibuf trees to the
2079
pages read, 0 if ibuf is empty */
2080
ulint* n_pages,/* out: number of pages to which merged */
2081
ibool sync) /* in: TRUE if the caller wants to wait for the
2082
issued read with the highest tablespace address
2089
ibool all_trees_empty;
2090
ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
2091
ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
2092
ib_longlong space_versions[IBUF_MAX_N_PAGES_MERGED];
2099
ut_ad(!ibuf_inside());
2101
mutex_enter(&ibuf_mutex);
2103
ut_ad(ibuf_validate_low());
2105
/* Choose an ibuf tree at random (though there really is only one tree
2106
in the current implementation) */
2107
ibuf_rnd += 865558671;
2109
rnd_pos = ibuf_rnd % ibuf->size;
2111
all_trees_empty = TRUE;
2113
data = UT_LIST_GET_FIRST(ibuf->data_list);
2117
all_trees_empty = FALSE;
2119
if (rnd_pos < data->size) {
2124
rnd_pos -= data->size;
2127
data = UT_LIST_GET_NEXT(data_list, data);
2130
if (all_trees_empty) {
2131
mutex_exit(&ibuf_mutex);
2136
data = UT_LIST_GET_FIRST(ibuf->data_list);
2142
space = data->index->space;
2144
ut_a(space == 0); /* We currently only have an ibuf tree in
2150
/* Open a cursor to a randomly chosen leaf of the tree, at a random
2151
position within the leaf */
2153
btr_pcur_open_at_rnd_pos(data->index, BTR_SEARCH_LEAF, &pcur, &mtr);
2155
if (0 == page_get_n_recs(btr_pcur_get_page(&pcur))) {
2157
/* This tree is empty */
2164
btr_pcur_close(&pcur);
2166
mutex_exit(&ibuf_mutex);
2171
mutex_exit(&ibuf_mutex);
2173
sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur),
2174
space_ids, space_versions,
2175
page_nos, &n_stored);
2176
#if 0 /* defined UNIV_IBUF_DEBUG */
2177
fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
2178
sync, n_stored, sum_sizes);
2183
btr_pcur_close(&pcur);
2185
buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos,
2187
*n_pages = n_stored;
2189
return(sum_sizes + 1);
2192
/*************************************************************************
2193
Contracts insert buffer trees by reading pages to the buffer pool. */
2198
/* out: a lower limit for the combined size in bytes
2199
of entries which will be merged from ibuf trees to the
2200
pages read, 0 if ibuf is empty */
2201
ibool sync) /* in: TRUE if the caller wants to wait for the
2202
issued read with the highest tablespace address
2207
return(ibuf_contract_ext(&n_pages, sync));
2210
/*************************************************************************
2211
Contracts insert buffer trees by reading pages to the buffer pool. */
2214
ibuf_contract_for_n_pages(
2215
/*======================*/
2216
/* out: a lower limit for the combined size in bytes
2217
of entries which will be merged from ibuf trees to the
2218
pages read, 0 if ibuf is empty */
2219
ibool sync, /* in: TRUE if the caller wants to wait for the
2220
issued read with the highest tablespace address
2222
ulint n_pages)/* in: try to read at least this many pages to
2223
the buffer pool and merge the ibuf contents to
2226
ulint sum_bytes = 0;
2227
ulint sum_pages = 0;
2231
while (sum_pages < n_pages) {
2232
n_bytes = ibuf_contract_ext(&n_pag2, sync);
2238
sum_bytes += n_bytes;
2239
sum_pages += n_pag2;
2245
/*************************************************************************
2246
Contract insert buffer trees after insert if they are too big. */
2249
ibuf_contract_after_insert(
2250
/*=======================*/
2251
ulint entry_size) /* in: size of a record which was inserted
2252
into an ibuf tree */
2258
mutex_enter(&ibuf_mutex);
2260
if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
2261
mutex_exit(&ibuf_mutex);
2268
if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) {
2273
mutex_exit(&ibuf_mutex);
2275
/* Contract at least entry_size many bytes */
2279
while ((size > 0) && (sum_sizes < entry_size)) {
2281
size = ibuf_contract(sync);
2286
/*************************************************************************
2287
Gets an upper limit for the combined size of entries buffered in the insert
2288
buffer for a given page. */
2291
ibuf_get_volume_buffered(
2292
/*=====================*/
2293
/* out: upper limit for the volume of
2294
buffered inserts for the index page, in bytes;
2295
we may also return UNIV_PAGE_SIZE, if the
2296
entries for the index page span on several
2297
pages in the insert buffer */
2298
btr_pcur_t* pcur, /* in: pcur positioned at a place in an
2299
insert buffer tree where we would insert an
2300
entry for the index page whose number is
2301
page_no, latch mode has to be BTR_MODIFY_PREV
2302
or BTR_MODIFY_TREE */
2303
ulint space, /* in: space id */
2304
ulint page_no,/* in: page number of an index page */
2305
mtr_t* mtr) /* in: mtr */
2315
ut_a(trx_sys_multiple_tablespace_format);
2317
ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
2318
|| (pcur->latch_mode == BTR_MODIFY_TREE));
2320
/* Count the volume of records earlier in the alphabetical order than
2325
rec = btr_pcur_get_rec(pcur);
2327
page = buf_frame_align(rec);
2329
if (page_rec_is_supremum(rec)) {
2330
rec = page_rec_get_prev(rec);
2334
if (page_rec_is_infimum(rec)) {
2339
if (page_no != ibuf_rec_get_page_no(rec)
2340
|| space != ibuf_rec_get_space(rec)) {
2345
volume += ibuf_rec_get_volume(rec);
2347
rec = page_rec_get_prev(rec);
2350
/* Look at the previous page */
2352
prev_page_no = btr_page_get_prev(page, mtr);
2354
if (prev_page_no == FIL_NULL) {
2359
prev_page = buf_page_get(0, prev_page_no, RW_X_LATCH, mtr);
2360
#ifdef UNIV_BTR_DEBUG
2361
ut_a(btr_page_get_next(prev_page, mtr)
2362
== buf_frame_get_page_no(page));
2363
#endif /* UNIV_BTR_DEBUG */
2365
#ifdef UNIV_SYNC_DEBUG
2366
buf_page_dbg_add_level(prev_page, SYNC_TREE_NODE);
2367
#endif /* UNIV_SYNC_DEBUG */
2369
rec = page_get_supremum_rec(prev_page);
2370
rec = page_rec_get_prev(rec);
2373
if (page_rec_is_infimum(rec)) {
2375
/* We cannot go to yet a previous page, because we
2376
do not have the x-latch on it, and cannot acquire one
2377
because of the latching order: we have to give up */
2379
return(UNIV_PAGE_SIZE);
2382
if (page_no != ibuf_rec_get_page_no(rec)
2383
|| space != ibuf_rec_get_space(rec)) {
2388
volume += ibuf_rec_get_volume(rec);
2390
rec = page_rec_get_prev(rec);
2394
rec = btr_pcur_get_rec(pcur);
2396
if (!page_rec_is_supremum(rec)) {
2397
rec = page_rec_get_next(rec);
2401
if (page_rec_is_supremum(rec)) {
2406
if (page_no != ibuf_rec_get_page_no(rec)
2407
|| space != ibuf_rec_get_space(rec)) {
2412
volume += ibuf_rec_get_volume(rec);
2414
rec = page_rec_get_next(rec);
2417
/* Look at the next page */
2419
next_page_no = btr_page_get_next(page, mtr);
2421
if (next_page_no == FIL_NULL) {
2426
next_page = buf_page_get(0, next_page_no, RW_X_LATCH, mtr);
2427
#ifdef UNIV_BTR_DEBUG
2428
ut_a(btr_page_get_prev(next_page, mtr)
2429
== buf_frame_get_page_no(page));
2430
#endif /* UNIV_BTR_DEBUG */
2432
#ifdef UNIV_SYNC_DEBUG
2433
buf_page_dbg_add_level(next_page, SYNC_TREE_NODE);
2434
#endif /* UNIV_SYNC_DEBUG */
2436
rec = page_get_infimum_rec(next_page);
2437
rec = page_rec_get_next(rec);
2440
if (page_rec_is_supremum(rec)) {
2444
return(UNIV_PAGE_SIZE);
2447
if (page_no != ibuf_rec_get_page_no(rec)
2448
|| space != ibuf_rec_get_space(rec)) {
2453
volume += ibuf_rec_get_volume(rec);
2455
rec = page_rec_get_next(rec);
2459
/*************************************************************************
2460
Reads the biggest tablespace id from the high end of the insert buffer
2461
tree and updates the counter in fil_system. */
2464
ibuf_update_max_tablespace_id(void)
2465
/*===============================*/
2471
ibuf_data_t* ibuf_data;
2472
dict_index_t* ibuf_index;
2476
ibuf_data = fil_space_get_ibuf_data(0);
2478
ibuf_index = ibuf_data->index;
2479
ut_a(!dict_table_is_comp(ibuf_index->table));
2485
btr_pcur_open_at_index_side(FALSE, ibuf_index, BTR_SEARCH_LEAF,
2487
btr_pcur_move_to_prev(&pcur, &mtr);
2489
if (btr_pcur_is_before_first_on_page(&pcur, &mtr)) {
2490
/* The tree is empty */
2494
rec = btr_pcur_get_rec(&pcur);
2496
field = rec_get_nth_field_old(rec, 0, &len);
2500
max_space_id = mach_read_from_4(field);
2506
/* printf("Maximum space id in insert buffer %lu\n", max_space_id); */
2508
fil_set_max_space_id_if_bigger(max_space_id);
2511
/*************************************************************************
2512
Makes an index insert to the insert buffer, instead of directly to the disk
2513
page, if this is possible. */
2518
/* out: DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
2519
ulint mode, /* in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
2520
dtuple_t* entry, /* in: index entry to insert */
2521
dict_index_t* index, /* in: index where to insert; must not be
2522
unique or clustered */
2523
ulint space, /* in: space id where to insert */
2524
ulint page_no,/* in: page number where to insert */
2525
que_thr_t* thr) /* in: query thread */
2527
big_rec_t* dummy_big_rec;
2531
dtuple_t* ibuf_entry;
2535
ibool old_bit_value;
2536
page_t* bitmap_page;
2537
ibuf_data_t* ibuf_data;
2538
dict_index_t* ibuf_index;
2542
ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
2543
ib_longlong space_versions[IBUF_MAX_N_PAGES_MERGED];
2544
ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
2550
ut_a(!(index->type & DICT_CLUSTERED));
2551
ut_ad(dtuple_check_typed(entry));
2553
ut_a(trx_sys_multiple_tablespace_format);
2557
/* Currently the insert buffer of space 0 takes care of inserts to all
2560
ibuf_data = fil_space_get_ibuf_data(0);
2562
ibuf_index = ibuf_data->index;
2564
mutex_enter(&ibuf_mutex);
2566
if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
2567
/* Insert buffer is now too big, contract it but do not try
2570
mutex_exit(&ibuf_mutex);
2572
#ifdef UNIV_IBUF_DEBUG
2573
fputs("Ibuf too big\n", stderr);
2575
/* Use synchronous contract (== TRUE) */
2576
ibuf_contract(TRUE);
2578
return(DB_STRONG_FAIL);
2581
mutex_exit(&ibuf_mutex);
2583
if (mode == BTR_MODIFY_TREE) {
2584
mutex_enter(&ibuf_pessimistic_insert_mutex);
2588
mutex_enter(&ibuf_mutex);
2590
while (!ibuf_data_enough_free_for_insert(ibuf_data)) {
2592
mutex_exit(&ibuf_mutex);
2596
mutex_exit(&ibuf_pessimistic_insert_mutex);
2598
err = ibuf_add_free_page(0, ibuf_data);
2600
if (err == DB_STRONG_FAIL) {
2605
mutex_enter(&ibuf_pessimistic_insert_mutex);
2609
mutex_enter(&ibuf_mutex);
2615
entry_size = rec_get_converted_size(index, entry);
2617
heap = mem_heap_create(512);
2619
/* Build the entry which contains the space id and the page number as
2620
the first fields and the type information for other fields, and which
2621
will be inserted to the insert buffer. */
2623
ibuf_entry = ibuf_entry_build(index, entry, space, page_no, heap);
2625
/* Open a cursor to the insert buffer tree to calculate if we can add
2626
the new entry to it without exceeding the free space limit for the
2631
btr_pcur_open(ibuf_index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
2633
/* Find out the volume of already buffered inserts for the same index
2635
buffered = ibuf_get_volume_buffered(&pcur, space, page_no, &mtr);
2637
#ifdef UNIV_IBUF_DEBUG
2638
ut_a((buffered == 0) || ibuf_count_get(space, page_no));
2640
mtr_start(&bitmap_mtr);
2642
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &bitmap_mtr);
2644
/* We check if the index page is suitable for buffered entries */
2646
if (buf_page_peek(space, page_no)
2647
|| lock_rec_expl_exist_on_page(space, page_no)) {
2648
err = DB_STRONG_FAIL;
2650
mtr_commit(&bitmap_mtr);
2655
bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
2656
IBUF_BITMAP_FREE, &bitmap_mtr);
2658
if (buffered + entry_size + page_dir_calc_reserved_space(1)
2659
> ibuf_index_page_calc_free_from_bits(bits)) {
2660
mtr_commit(&bitmap_mtr);
2662
/* It may not fit */
2663
err = DB_STRONG_FAIL;
2667
ibuf_get_merge_page_nos(FALSE, btr_pcur_get_rec(&pcur),
2668
space_ids, space_versions,
2669
page_nos, &n_stored);
2673
/* Set the bitmap bit denoting that the insert buffer contains
2674
buffered entries for this index page, if the bit is not set yet */
2676
old_bit_value = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
2677
IBUF_BITMAP_BUFFERED,
2679
if (!old_bit_value) {
2680
ibuf_bitmap_page_set_bits(bitmap_page, page_no,
2681
IBUF_BITMAP_BUFFERED, TRUE,
2685
mtr_commit(&bitmap_mtr);
2687
cursor = btr_pcur_get_btr_cur(&pcur);
2689
if (mode == BTR_MODIFY_PREV) {
2690
err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
2691
ibuf_entry, &ins_rec,
2692
&dummy_big_rec, thr,
2694
if (err == DB_SUCCESS) {
2695
/* Update the page max trx id field */
2696
page_update_max_trx_id(buf_frame_align(ins_rec),
2697
thr_get_trx(thr)->id);
2700
ut_ad(mode == BTR_MODIFY_TREE);
2702
/* We acquire an x-latch to the root page before the insert,
2703
because a pessimistic insert releases the tree x-latch,
2704
which would cause the x-latching of the root after that to
2705
break the latching order. */
2707
root = ibuf_tree_root_get(ibuf_data, 0, &mtr);
2709
err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
2710
| BTR_NO_UNDO_LOG_FLAG,
2712
ibuf_entry, &ins_rec,
2713
&dummy_big_rec, thr,
2715
if (err == DB_SUCCESS) {
2716
/* Update the page max trx id field */
2717
page_update_max_trx_id(buf_frame_align(ins_rec),
2718
thr_get_trx(thr)->id);
2721
ibuf_data_sizes_update(ibuf_data, root, &mtr);
2725
#ifdef UNIV_IBUF_DEBUG
2726
if (err == DB_SUCCESS) {
2728
"Incrementing ibuf count of space %lu page %lu\n"
2729
"from %lu by 1\n", space, page_no,
2730
ibuf_count_get(space, page_no));
2732
ibuf_count_set(space, page_no,
2733
ibuf_count_get(space, page_no) + 1);
2736
if (mode == BTR_MODIFY_TREE) {
2737
ut_ad(ibuf_validate_low());
2739
mutex_exit(&ibuf_mutex);
2740
mutex_exit(&ibuf_pessimistic_insert_mutex);
2744
btr_pcur_close(&pcur);
2747
mem_heap_free(heap);
2749
mutex_enter(&ibuf_mutex);
2751
if (err == DB_SUCCESS) {
2752
ibuf_data->empty = FALSE;
2753
ibuf_data->n_inserts++;
2756
mutex_exit(&ibuf_mutex);
2758
if ((mode == BTR_MODIFY_TREE) && (err == DB_SUCCESS)) {
2759
ibuf_contract_after_insert(entry_size);
2763
#ifdef UNIV_IBUF_DEBUG
2764
ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
2766
buf_read_ibuf_merge_pages(FALSE, space_ids, space_versions,
2767
page_nos, n_stored);
2773
/*************************************************************************
2774
Makes an index insert to the insert buffer, instead of directly to the disk
2775
page, if this is possible. Does not do insert if the index is clustered
2781
/* out: TRUE if success */
2782
dtuple_t* entry, /* in: index entry to insert */
2783
dict_index_t* index, /* in: index where to insert */
2784
ulint space, /* in: space id where to insert */
2785
ulint page_no,/* in: page number where to insert */
2786
que_thr_t* thr) /* in: query thread */
2790
ut_a(trx_sys_multiple_tablespace_format);
2791
ut_ad(dtuple_check_typed(entry));
2793
ut_a(!(index->type & DICT_CLUSTERED));
2795
if (rec_get_converted_size(index, entry)
2796
>= (page_get_free_space_of_empty(dict_table_is_comp(index->table))
2801
err = ibuf_insert_low(BTR_MODIFY_PREV, entry, index, space, page_no,
2803
if (err == DB_FAIL) {
2804
err = ibuf_insert_low(BTR_MODIFY_TREE, entry, index, space,
2808
if (err == DB_SUCCESS) {
2809
#ifdef UNIV_IBUF_DEBUG
2810
/* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n",
2811
page_no, index->name); */
2816
ut_a(err == DB_STRONG_FAIL);
2822
/************************************************************************
2823
During merge, inserts to an index page a secondary index entry extracted
2824
from the insert buffer. */
2827
ibuf_insert_to_index_page(
2828
/*======================*/
2829
dtuple_t* entry, /* in: buffered entry to insert */
2830
page_t* page, /* in: index page where the buffered entry
2832
dict_index_t* index, /* in: record descriptor */
2833
mtr_t* mtr) /* in: mtr */
2835
page_cur_t page_cur;
2838
page_t* bitmap_page;
2841
ut_ad(ibuf_inside());
2842
ut_ad(dtuple_check_typed(entry));
2844
if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
2845
!= (ibool)!!page_is_comp(page))) {
2846
fputs("InnoDB: Trying to insert a record from"
2847
" the insert buffer to an index page\n"
2848
"InnoDB: but the 'compact' flag does not match!\n",
2853
rec = page_rec_get_next(page_get_infimum_rec(page));
2855
if (UNIV_UNLIKELY(rec_get_n_fields(rec, index)
2856
!= dtuple_get_n_fields(entry))) {
2857
fputs("InnoDB: Trying to insert a record from"
2858
" the insert buffer to an index page\n"
2859
"InnoDB: but the number of fields does not match!\n",
2862
buf_page_print(page);
2864
dtuple_print(stderr, entry);
2866
fputs("InnoDB: The table where where"
2867
" this index record belongs\n"
2868
"InnoDB: is now probably corrupt."
2869
" Please run CHECK TABLE on\n"
2870
"InnoDB: your tables.\n"
2871
"InnoDB: Submit a detailed bug report to"
2872
" http://bugs.mysql.com!\n", stderr);
2877
low_match = page_cur_search(page, index, entry,
2878
PAGE_CUR_LE, &page_cur);
2880
if (low_match == dtuple_get_n_fields(entry)) {
2881
rec = page_cur_get_rec(&page_cur);
2883
btr_cur_del_unmark_for_ibuf(rec, mtr);
2885
rec = page_cur_tuple_insert(&page_cur, entry, index, mtr);
2888
/* If the record did not fit, reorganize */
2890
btr_page_reorganize(page, index, mtr);
2892
page_cur_search(page, index, entry,
2893
PAGE_CUR_LE, &page_cur);
2895
/* This time the record must fit */
2896
if (UNIV_UNLIKELY(!page_cur_tuple_insert(
2897
&page_cur, entry, index,
2900
ut_print_timestamp(stderr);
2903
" InnoDB: Error: Insert buffer insert"
2904
" fails; page free %lu,"
2905
" dtuple size %lu\n",
2906
(ulong) page_get_max_insert_size(
2908
(ulong) rec_get_converted_size(
2910
fputs("InnoDB: Cannot insert index record ",
2912
dtuple_print(stderr, entry);
2913
fputs("\nInnoDB: The table where"
2914
" this index record belongs\n"
2915
"InnoDB: is now probably corrupt."
2916
" Please run CHECK TABLE on\n"
2917
"InnoDB: that table.\n", stderr);
2919
bitmap_page = ibuf_bitmap_get_map_page(
2920
buf_frame_get_space_id(page),
2921
buf_frame_get_page_no(page),
2923
old_bits = ibuf_bitmap_page_get_bits(
2925
buf_frame_get_page_no(page),
2926
IBUF_BITMAP_FREE, mtr);
2928
fprintf(stderr, "InnoDB: Bitmap bits %lu\n",
2931
fputs("InnoDB: Submit a detailed bug report"
2932
" to http://bugs.mysql.com\n", stderr);
2938
/*************************************************************************
2939
Deletes from ibuf the record on which pcur is positioned. If we have to
2940
resort to a pessimistic delete, this function commits mtr and closes
2946
/* out: TRUE if mtr was committed and pcur
2947
closed in this operation */
2948
ulint space, /* in: space id */
2949
ulint page_no,/* in: index page number where the record
2951
btr_pcur_t* pcur, /* in: pcur positioned on the record to
2952
delete, having latch mode BTR_MODIFY_LEAF */
2953
dtuple_t* search_tuple,
2954
/* in: search tuple for entries of page_no */
2955
mtr_t* mtr) /* in: mtr */
2958
ibuf_data_t* ibuf_data;
2962
ut_ad(ibuf_inside());
2964
success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
2967
#ifdef UNIV_IBUF_DEBUG
2969
"Decrementing ibuf count of space %lu page %lu\n"
2970
"from %lu by 1\n", space, page_no,
2971
ibuf_count_get(space, page_no));
2972
ibuf_count_set(space, page_no,
2973
ibuf_count_get(space, page_no) - 1);
2978
/* We have to resort to a pessimistic delete from ibuf */
2979
btr_pcur_store_position(pcur, mtr);
2981
btr_pcur_commit_specify_mtr(pcur, mtr);
2983
/* Currently the insert buffer of space 0 takes care of inserts to all
2986
ibuf_data = fil_space_get_ibuf_data(0);
2988
mutex_enter(&ibuf_mutex);
2992
success = btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr);
2996
"InnoDB: ERROR: Submit the output to"
2997
" http://bugs.mysql.com\n"
2998
"InnoDB: ibuf cursor restoration fails!\n"
2999
"InnoDB: ibuf record inserted to page %lu\n",
3003
rec_print_old(stderr, btr_pcur_get_rec(pcur));
3004
rec_print_old(stderr, pcur->old_rec);
3005
dtuple_print(stderr, search_tuple);
3007
rec_print_old(stderr,
3008
page_rec_get_next(btr_pcur_get_rec(pcur)));
3011
btr_pcur_commit_specify_mtr(pcur, mtr);
3013
fputs("InnoDB: Validating insert buffer tree:\n", stderr);
3014
if (!btr_validate_index(ibuf_data->index, NULL)) {
3018
fprintf(stderr, "InnoDB: ibuf tree ok\n");
3021
btr_pcur_close(pcur);
3023
mutex_exit(&ibuf_mutex);
3028
root = ibuf_tree_root_get(ibuf_data, 0, mtr);
3030
btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
3032
ut_a(err == DB_SUCCESS);
3034
#ifdef UNIV_IBUF_DEBUG
3035
ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
3039
ibuf_data_sizes_update(ibuf_data, root, mtr);
3041
ut_ad(ibuf_validate_low());
3043
btr_pcur_commit_specify_mtr(pcur, mtr);
3045
btr_pcur_close(pcur);
3047
mutex_exit(&ibuf_mutex);
3052
/*************************************************************************
3053
When an index page is read from a disk to the buffer pool, this function
3054
inserts to the page the possible index entries buffered in the insert buffer.
3055
The entries are deleted from the insert buffer. If the page is not read, but
3056
created in the buffer pool, this function deletes its buffered entries from
3057
the insert buffer; there can exist entries for such a page if the page
3058
belonged to an index which subsequently was dropped. */
3061
ibuf_merge_or_delete_for_page(
3062
/*==========================*/
3063
page_t* page, /* in: if page has been read from disk, pointer to
3064
the page x-latched, else NULL */
3065
ulint space, /* in: space id of the index page */
3066
ulint page_no,/* in: page number of the index page */
3067
ibool update_ibuf_bitmap)/* in: normally this is set to TRUE, but if
3068
we have deleted or are deleting the tablespace, then we
3069
naturally do not want to update a non-existent bitmap
3075
dtuple_t* search_tuple;
3078
page_t* bitmap_page;
3079
ibuf_data_t* ibuf_data;
3081
#ifdef UNIV_IBUF_DEBUG
3084
ibool tablespace_being_deleted = FALSE;
3085
ibool corruption_noticed = FALSE;
3088
if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
3093
if (ibuf_fixed_addr_page(space, page_no) || fsp_descr_page(page_no)
3094
|| trx_sys_hdr_page(space, page_no)) {
3098
if (update_ibuf_bitmap) {
3099
/* If the following returns FALSE, we get the counter
3100
incremented, and must decrement it when we leave this
3101
function. When the counter is > 0, that prevents tablespace
3102
from being dropped. */
3104
tablespace_being_deleted = fil_inc_pending_ibuf_merges(space);
3106
if (tablespace_being_deleted) {
3107
/* Do not try to read the bitmap page from space;
3108
just delete the ibuf records for the page */
3111
update_ibuf_bitmap = FALSE;
3115
if (update_ibuf_bitmap) {
3117
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
3119
if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
3120
IBUF_BITMAP_BUFFERED, &mtr)) {
3121
/* No inserts buffered for this page */
3124
if (!tablespace_being_deleted) {
3125
fil_decr_pending_ibuf_merges(space);
3133
/* Currently the insert buffer of space 0 takes care of inserts to all
3136
ibuf_data = fil_space_get_ibuf_data(0);
3140
heap = mem_heap_create(512);
3142
if (!trx_sys_multiple_tablespace_format) {
3143
ut_a(trx_doublewrite_must_reset_space_ids);
3144
search_tuple = ibuf_search_tuple_build(space, page_no, heap);
3146
search_tuple = ibuf_new_search_tuple_build(space, page_no,
3151
/* Move the ownership of the x-latch on the page to this OS
3152
thread, so that we can acquire a second x-latch on it. This
3153
is needed for the insert operations to the index page to pass
3154
the debug checks. */
3156
block = buf_block_align(page);
3157
rw_lock_x_lock_move_ownership(&(block->lock));
3159
if (fil_page_get_type(page) != FIL_PAGE_INDEX) {
3161
corruption_noticed = TRUE;
3163
ut_print_timestamp(stderr);
3167
fputs(" InnoDB: Dump of the ibuf bitmap page:\n",
3170
bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
3172
buf_page_print(bitmap_page);
3176
fputs("\nInnoDB: Dump of the page:\n", stderr);
3178
buf_page_print(page);
3181
"InnoDB: Error: corruption in the tablespace."
3182
" Bitmap shows insert\n"
3183
"InnoDB: buffer records to page n:o %lu"
3184
" though the page\n"
3185
"InnoDB: type is %lu, which is"
3186
" not an index page!\n"
3187
"InnoDB: We try to resolve the problem"
3188
" by skipping the insert buffer\n"
3189
"InnoDB: merge for this page."
3190
" Please run CHECK TABLE on your tables\n"
3191
"InnoDB: to determine if they are corrupt"
3193
"InnoDB: Please submit a detailed bug report"
3194
" to http://bugs.mysql.com\n\n",
3196
(ulong) fil_page_get_type(page));
3201
#ifdef UNIV_IBUF_DEBUG
3208
ibool success = buf_page_get_known_nowait(RW_X_LATCH, page,
3213
#ifdef UNIV_SYNC_DEBUG
3214
buf_page_dbg_add_level(page, SYNC_TREE_NODE);
3215
#endif /* UNIV_SYNC_DEBUG */
3218
/* Position pcur in the insert buffer at the first entry for this
3220
btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE,
3221
BTR_MODIFY_LEAF, &pcur, &mtr);
3222
if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
3223
ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
3229
ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr));
3231
ibuf_rec = btr_pcur_get_rec(&pcur);
3233
/* Check if the entry is for this index page */
3234
if (ibuf_rec_get_page_no(ibuf_rec) != page_no
3235
|| ibuf_rec_get_space(ibuf_rec) != space) {
3237
page_header_reset_last_insert(page, &mtr);
3242
if (corruption_noticed) {
3243
fputs("InnoDB: Discarding record\n ", stderr);
3244
rec_print_old(stderr, ibuf_rec);
3245
fputs("\n from the insert buffer!\n\n", stderr);
3247
/* Now we have at pcur a record which should be
3248
inserted to the index page; NOTE that the call below
3249
copies pointers to fields in ibuf_rec, and we must
3250
keep the latch to the ibuf_rec page until the
3251
insertion is finished! */
3252
dict_index_t* dummy_index;
3253
dulint max_trx_id = page_get_max_trx_id(
3254
buf_frame_align(ibuf_rec));
3255
page_update_max_trx_id(page, max_trx_id);
3257
entry = ibuf_build_entry_from_ibuf_rec(
3258
ibuf_rec, heap, &dummy_index);
3259
#ifdef UNIV_IBUF_DEBUG
3260
volume += rec_get_converted_size(dummy_index, entry)
3261
+ page_dir_calc_reserved_space(1);
3262
ut_a(volume <= 4 * UNIV_PAGE_SIZE
3263
/ IBUF_PAGE_SIZE_PER_FREE_SPACE);
3265
ibuf_insert_to_index_page(entry, page,
3267
ibuf_dummy_index_free(dummy_index);
3272
/* Delete the record from ibuf */
3273
if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,
3275
/* Deletion was pessimistic and mtr was committed:
3276
we start from the beginning again */
3281
if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) {
3283
btr_pcur_close(&pcur);
3290
#ifdef UNIV_IBUF_DEBUG
3291
if (ibuf_count_get(space, page_no) > 0) {
3292
/* btr_print_tree(ibuf_data->index->tree, 100);
3296
if (update_ibuf_bitmap) {
3297
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
3298
ibuf_bitmap_page_set_bits(bitmap_page, page_no,
3299
IBUF_BITMAP_BUFFERED, FALSE, &mtr);
3301
ulint old_bits = ibuf_bitmap_page_get_bits(
3302
bitmap_page, page_no, IBUF_BITMAP_FREE, &mtr);
3303
ulint new_bits = ibuf_index_page_calc_free(page);
3304
#if 0 /* defined UNIV_IBUF_DEBUG */
3305
fprintf(stderr, "Old bits %lu new bits %lu"
3308
page_get_max_insert_size_after_reorganize(
3311
if (old_bits != new_bits) {
3312
ibuf_bitmap_page_set_bits(bitmap_page, page_no,
3318
#if 0 /* defined UNIV_IBUF_DEBUG */
3320
"Ibuf merge %lu records volume %lu to page no %lu\n",
3321
n_inserts, volume, page_no);
3324
btr_pcur_close(&pcur);
3325
mem_heap_free(heap);
3327
/* Protect our statistics keeping from race conditions */
3328
mutex_enter(&ibuf_mutex);
3330
ibuf_data->n_merges++;
3331
ibuf_data->n_merged_recs += n_inserts;
3333
mutex_exit(&ibuf_mutex);
3335
if (update_ibuf_bitmap && !tablespace_being_deleted) {
3337
fil_decr_pending_ibuf_merges(space);
3341
#ifdef UNIV_IBUF_DEBUG
3342
ut_a(ibuf_count_get(space, page_no) == 0);
3346
/*************************************************************************
3347
Deletes all entries in the insert buffer for a given space id. This is used
3348
in DISCARD TABLESPACE and IMPORT TABLESPACE.
3349
NOTE: this does not update the page free bitmaps in the space. The space will
3350
become CORRUPT when you call this function! */
3353
ibuf_delete_for_discarded_space(
3354
/*============================*/
3355
ulint space) /* in: space id */
3359
dtuple_t* search_tuple;
3363
ibuf_data_t* ibuf_data;
3367
/* Currently the insert buffer of space 0 takes care of inserts to all
3370
ibuf_data = fil_space_get_ibuf_data(0);
3372
heap = mem_heap_create(512);
3374
/* Use page number 0 to build the search tuple so that we get the
3375
cursor positioned at the first entry for this space id */
3377
search_tuple = ibuf_new_search_tuple_build(space, 0, heap);
3385
/* Position pcur in the insert buffer at the first entry for the
3387
btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE,
3388
BTR_MODIFY_LEAF, &pcur, &mtr);
3389
if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
3390
ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
3396
ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr));
3398
ibuf_rec = btr_pcur_get_rec(&pcur);
3400
/* Check if the entry is for this space */
3401
if (ibuf_rec_get_space(ibuf_rec) != space) {
3406
page_no = ibuf_rec_get_page_no(ibuf_rec);
3410
/* Delete the record from ibuf */
3411
closed = ibuf_delete_rec(space, page_no, &pcur, search_tuple,
3414
/* Deletion was pessimistic and mtr was committed:
3415
we start from the beginning again */
3422
if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) {
3424
btr_pcur_close(&pcur);
3434
btr_pcur_close(&pcur);
3436
/* Protect our statistics keeping from race conditions */
3437
mutex_enter(&ibuf_mutex);
3439
ibuf_data->n_merges++;
3440
ibuf_data->n_merged_recs += n_inserts;
3442
mutex_exit(&ibuf_mutex);
3445
"InnoDB: Discarded %lu ibuf entries for space %lu\n",
3446
(ulong) n_inserts, (ulong) space);
3450
mem_heap_free(heap);
3454
/**********************************************************************
3455
Validates the ibuf data structures when the caller owns ibuf_mutex. */
3458
ibuf_validate_low(void)
3459
/*===================*/
3460
/* out: TRUE if ok */
3465
ut_ad(mutex_own(&ibuf_mutex));
3469
data = UT_LIST_GET_FIRST(ibuf->data_list);
3472
sum_sizes += data->size;
3474
data = UT_LIST_GET_NEXT(data_list, data);
3477
ut_a(sum_sizes == ibuf->size);
3482
/**********************************************************************
3483
Looks if the insert buffer is empty. */
3488
/* out: TRUE if empty */
3497
mutex_enter(&ibuf_mutex);
3499
data = UT_LIST_GET_FIRST(ibuf->data_list);
3503
root = ibuf_tree_root_get(data, 0, &mtr);
3505
if (page_get_n_recs(root) == 0) {
3509
if (data->empty == FALSE) {
3511
"InnoDB: Warning: insert buffer tree is empty"
3512
" but the data struct does not\n"
3513
"InnoDB: know it. This condition is legal"
3514
" if the master thread has not yet\n"
3515
"InnoDB: run to completion.\n");
3518
ut_a(data->empty == FALSE);
3525
ut_a(data->space == 0);
3527
mutex_exit(&ibuf_mutex);
3534
/**********************************************************************
3535
Prints info of ibuf. */
3540
FILE* file) /* in: file where to print */
3543
#ifdef UNIV_IBUF_DEBUG
3547
mutex_enter(&ibuf_mutex);
3549
data = UT_LIST_GET_FIRST(ibuf->data_list);
3553
"Ibuf: size %lu, free list len %lu, seg size %lu,\n"
3554
"%lu inserts, %lu merged recs, %lu merges\n",
3556
(ulong) data->free_list_len,
3557
(ulong) data->seg_size,
3558
(ulong) data->n_inserts,
3559
(ulong) data->n_merged_recs,
3560
(ulong) data->n_merges);
3561
#ifdef UNIV_IBUF_DEBUG
3562
for (i = 0; i < IBUF_COUNT_N_PAGES; i++) {
3563
if (ibuf_count_get(data->space, i) > 0) {
3566
"Ibuf count for page %lu is %lu\n",
3569
ibuf_count_get(data->space, i));
3573
data = UT_LIST_GET_NEXT(data_list, data);
3576
mutex_exit(&ibuf_mutex);