1
/******************************************************
4
(c) 1994-1996 Innobase Oy
6
Created 6/2/1994 Heikki Tuuri
7
*******************************************************/
16
#include "page0page.h"
22
#include "lock0lock.h"
23
#include "ibuf0ibuf.h"
28
Latching strategy of the InnoDB B-tree
29
--------------------------------------
30
A tree latch protects all non-leaf nodes of the tree. Each node of a tree
31
also has a latch of its own.
33
A B-tree operation normally first acquires an S-latch on the tree. It
34
searches down the tree and releases the tree latch when it has the
35
leaf node latch. To save CPU time we do not acquire any latch on
36
non-leaf nodes of the tree during a search, those pages are only bufferfixed.
38
If an operation needs to restructure the tree, it acquires an X-latch on
39
the tree before searching to a leaf node. If it needs, for example, to
41
(1) InnoDB decides the split point in the leaf,
42
(2) allocates a new page,
43
(3) inserts the appropriate node pointer to the first non-leaf level,
44
(4) releases the tree X-latch,
45
(5) and then moves records from the leaf to the new allocated page.
49
Leaf pages of a B-tree contain the index records stored in the
50
tree. On levels n > 0 we store 'node pointers' to pages on level
51
n - 1. For each page there is exactly one node pointer stored:
52
thus the our tree is an ordinary B-tree, not a B-link tree.
54
A node pointer contains a prefix P of an index record. The prefix
55
is long enough so that it determines an index record uniquely.
56
The file page number of the child page is added as the last
57
field. To the child page we can store node pointers or index records
58
which are >= P in the alphabetical order, but < P1 if there is
59
a next node pointer on the level, and P1 is its prefix.
61
If a node pointer with a prefix P points to a non-leaf child,
62
then the leftmost record in the child must have the same
63
prefix P. If it points to a leaf node, the child is not required
64
to contain any record with a prefix equal to P. The leaf case
65
is decided this way to allow arbitrary deletions in a leaf node
66
without touching upper levels of the tree.
68
We have predefined a special minimum record which we
69
define as the smallest record in any alphabetical order.
70
A minimum record is denoted by setting a bit in the record
71
header. A minimum record acts as the prefix of a node pointer
72
which points to a leftmost node on any level of the tree.
76
In the root node of a B-tree there are two file segment headers.
77
The leaf pages of a tree are allocated from one file segment, to
78
make them consecutive on disk if possible. From the other file segment
79
we allocate pages for the non-leaf levels of the tree.
82
/******************************************************************
83
Gets the root node of a tree and x-latches it. */
88
/* out: root page, x-latched */
89
dict_index_t* index, /* in: index tree */
90
mtr_t* mtr) /* in: mtr */
97
space = dict_index_get_space(index);
98
zip_size = dict_table_zip_size(index->table);
99
root_page_no = dict_index_get_page(index);
101
block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
102
ut_a((ibool)!!page_is_comp(buf_block_get_frame(block))
103
== dict_table_is_comp(index->table));
108
/******************************************************************
109
Gets the root node of a tree and x-latches it. */
114
/* out: root page, x-latched */
115
dict_index_t* index, /* in: index tree */
116
mtr_t* mtr) /* in: mtr */
118
return(buf_block_get_frame(btr_root_block_get(index, mtr)));
121
/*****************************************************************
122
Gets pointer to the previous user record in the tree. It is assumed that
123
the caller has appropriate latches on the page and its neighbor. */
126
btr_get_prev_user_rec(
127
/*==================*/
128
/* out: previous user record, NULL if there is none */
129
rec_t* rec, /* in: record on leaf level */
130
mtr_t* mtr) /* in: mtr holding a latch on the page, and if
131
needed, also to the previous page */
137
if (!page_rec_is_infimum(rec)) {
139
rec_t* prev_rec = page_rec_get_prev(rec);
141
if (!page_rec_is_infimum(prev_rec)) {
147
page = page_align(rec);
148
prev_page_no = btr_page_get_prev(page, mtr);
150
if (prev_page_no != FIL_NULL) {
154
buf_block_t* prev_block;
156
space = page_get_space_id(page);
157
zip_size = fil_space_get_zip_size(space);
159
prev_block = buf_page_get_with_no_latch(space, zip_size,
161
prev_page = buf_block_get_frame(prev_block);
162
/* The caller must already have a latch to the brother */
163
ut_ad(mtr_memo_contains(mtr, prev_block,
165
|| mtr_memo_contains(mtr, prev_block,
166
MTR_MEMO_PAGE_X_FIX));
167
#ifdef UNIV_BTR_DEBUG
168
ut_a(page_is_comp(prev_page) == page_is_comp(page));
169
ut_a(btr_page_get_next(prev_page, mtr)
170
== page_get_page_no(page));
171
#endif /* UNIV_BTR_DEBUG */
173
return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
179
/*****************************************************************
180
Gets pointer to the next user record in the tree. It is assumed that the
181
caller has appropriate latches on the page and its neighbor. */
184
btr_get_next_user_rec(
185
/*==================*/
186
/* out: next user record, NULL if there is none */
187
rec_t* rec, /* in: record on leaf level */
188
mtr_t* mtr) /* in: mtr holding a latch on the page, and if
189
needed, also to the next page */
195
if (!page_rec_is_supremum(rec)) {
197
rec_t* next_rec = page_rec_get_next(rec);
199
if (!page_rec_is_supremum(next_rec)) {
205
page = page_align(rec);
206
next_page_no = btr_page_get_next(page, mtr);
208
if (next_page_no != FIL_NULL) {
211
buf_block_t* next_block;
213
space = page_get_space_id(page);
214
zip_size = fil_space_get_zip_size(space);
216
next_block = buf_page_get_with_no_latch(space, zip_size,
218
next_page = buf_block_get_frame(next_block);
219
/* The caller must already have a latch to the brother */
220
ut_ad(mtr_memo_contains(mtr, next_block, MTR_MEMO_PAGE_S_FIX)
221
|| mtr_memo_contains(mtr, next_block,
222
MTR_MEMO_PAGE_X_FIX));
223
#ifdef UNIV_BTR_DEBUG
224
ut_a(page_is_comp(next_page) == page_is_comp(page));
225
ut_a(btr_page_get_prev(next_page, mtr)
226
== page_get_page_no(page));
227
#endif /* UNIV_BTR_DEBUG */
229
return(page_rec_get_next(page_get_infimum_rec(next_page)));
235
/******************************************************************
236
Creates a new index page (not the root, and also not
237
used in page reorganization). */
242
buf_block_t* block, /* in/out: page to be created */
243
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
244
dict_index_t* index, /* in: index */
245
ulint level, /* in: the B-tree level of the page */
246
mtr_t* mtr) /* in: mtr */
248
page_t* page = buf_block_get_frame(block);
250
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
252
if (UNIV_LIKELY_NULL(page_zip)) {
253
page_create_zip(block, index, level, mtr);
255
page_create(block, mtr, dict_table_is_comp(index->table));
256
/* Set the level of the new index page */
257
btr_page_set_level(page, NULL, level, mtr);
260
block->check_index_page_at_flush = TRUE;
262
btr_page_set_index_id(page, page_zip, index->id, mtr);
265
/******************************************************************
266
Allocates a new file page to be used in an ibuf tree. Takes the page from
267
the free list of the tree, which must contain pages! */
270
btr_page_alloc_for_ibuf(
271
/*====================*/
272
/* out: new allocated block, x-latched */
273
dict_index_t* index, /* in: index tree */
274
mtr_t* mtr) /* in: mtr */
276
fil_addr_t node_addr;
279
buf_block_t* new_block;
281
root = btr_root_get(index, mtr);
283
node_addr = flst_get_first(root + PAGE_HEADER
284
+ PAGE_BTR_IBUF_FREE_LIST, mtr);
285
ut_a(node_addr.page != FIL_NULL);
287
new_block = buf_page_get(dict_index_get_space(index),
288
dict_table_zip_size(index->table),
289
node_addr.page, RW_X_LATCH, mtr);
290
new_page = buf_block_get_frame(new_block);
291
#ifdef UNIV_SYNC_DEBUG
292
buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
293
#endif /* UNIV_SYNC_DEBUG */
295
flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
296
new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
298
ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
304
/******************************************************************
305
Allocates a new file page to be used in an index tree. NOTE: we assume
306
that the caller has made the reservation for free extents! */
311
/* out: new allocated block, x-latched;
312
NULL if out of space */
313
dict_index_t* index, /* in: index */
314
ulint hint_page_no, /* in: hint of a good page */
315
byte file_direction, /* in: direction where a possible
316
page split is made */
317
ulint level, /* in: level where the page is placed
319
mtr_t* mtr) /* in: mtr */
321
fseg_header_t* seg_header;
323
buf_block_t* new_block;
326
if (dict_index_is_ibuf(index)) {
328
return(btr_page_alloc_for_ibuf(index, mtr));
331
root = btr_root_get(index, mtr);
334
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
336
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
339
/* Parameter TRUE below states that the caller has made the
340
reservation for free extents, and thus we know that a page can
343
new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
344
file_direction, TRUE, mtr);
345
if (new_page_no == FIL_NULL) {
350
new_block = buf_page_get(dict_index_get_space(index),
351
dict_table_zip_size(index->table),
352
new_page_no, RW_X_LATCH, mtr);
353
#ifdef UNIV_SYNC_DEBUG
354
buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
355
#endif /* UNIV_SYNC_DEBUG */
360
/******************************************************************
361
Gets the number of pages in a B-tree. */
366
/* out: number of pages */
367
dict_index_t* index, /* in: index */
368
ulint flag) /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
370
fseg_header_t* seg_header;
378
mtr_s_lock(dict_index_get_lock(index), &mtr);
380
root = btr_root_get(index, &mtr);
382
if (flag == BTR_N_LEAF_PAGES) {
383
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
385
fseg_n_reserved_pages(seg_header, &n, &mtr);
387
} else if (flag == BTR_TOTAL_SIZE) {
388
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
390
n = fseg_n_reserved_pages(seg_header, &dummy, &mtr);
392
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
394
n += fseg_n_reserved_pages(seg_header, &dummy, &mtr);
404
/******************************************************************
405
Frees a page used in an ibuf tree. Puts the page to the free list of the
409
btr_page_free_for_ibuf(
410
/*===================*/
411
dict_index_t* index, /* in: index tree */
412
buf_block_t* block, /* in: block to be freed, x-latched */
413
mtr_t* mtr) /* in: mtr */
417
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
418
root = btr_root_get(index, mtr);
420
flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
421
buf_block_get_frame(block)
422
+ PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
424
ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
428
/******************************************************************
429
Frees a file page used in an index tree. Can be used also to (BLOB)
430
external storage pages, because the page level 0 can be given as an
436
dict_index_t* index, /* in: index tree */
437
buf_block_t* block, /* in: block to be freed, x-latched */
438
ulint level, /* in: page level */
439
mtr_t* mtr) /* in: mtr */
441
fseg_header_t* seg_header;
444
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
445
/* The page gets invalid for optimistic searches: increment the frame
448
buf_block_modify_clock_inc(block);
450
if (dict_index_is_ibuf(index)) {
452
btr_page_free_for_ibuf(index, block, mtr);
457
root = btr_root_get(index, mtr);
460
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
462
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
465
fseg_free_page(seg_header,
466
buf_block_get_space(block),
467
buf_block_get_page_no(block), mtr);
470
/******************************************************************
471
Frees a file page used in an index tree. NOTE: cannot free field external
472
storage pages because the page must contain info on its level. */
477
dict_index_t* index, /* in: index tree */
478
buf_block_t* block, /* in: block to be freed, x-latched */
479
mtr_t* mtr) /* in: mtr */
483
level = btr_page_get_level(buf_block_get_frame(block), mtr);
485
btr_page_free_low(index, block, level, mtr);
488
/******************************************************************
489
Sets the child node file address in a node pointer. */
492
btr_node_ptr_set_child_page_no(
493
/*===========================*/
494
rec_t* rec, /* in: node pointer record */
495
page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
496
part will be updated, or NULL */
497
const ulint* offsets,/* in: array returned by rec_get_offsets() */
498
ulint page_no,/* in: child node address */
499
mtr_t* mtr) /* in: mtr */
504
ut_ad(rec_offs_validate(rec, NULL, offsets));
505
ut_ad(!page_is_leaf(page_align(rec)));
506
ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
508
/* The child address is in the last field */
509
field = rec_get_nth_field(rec, offsets,
510
rec_offs_n_fields(offsets) - 1, &len);
512
ut_ad(len == REC_NODE_PTR_SIZE);
514
if (UNIV_LIKELY_NULL(page_zip)) {
515
page_zip_write_node_ptr(page_zip, rec,
516
rec_offs_data_size(offsets),
519
mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
523
/****************************************************************
524
Returns the child page of a node pointer and x-latches it. */
527
btr_node_ptr_get_child(
528
/*===================*/
529
/* out: child page, x-latched */
530
const rec_t* node_ptr,/* in: node pointer */
531
dict_index_t* index, /* in: index */
532
const ulint* offsets,/* in: array returned by rec_get_offsets() */
533
mtr_t* mtr) /* in: mtr */
538
ut_ad(rec_offs_validate(node_ptr, index, offsets));
539
space = page_get_space_id(page_align(node_ptr));
540
page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
542
return(btr_block_get(space, dict_table_zip_size(index->table),
543
page_no, RW_X_LATCH, mtr));
546
/****************************************************************
547
Returns the upper level node pointer to a page. It is assumed that mtr holds
548
an x-latch on the tree. */
551
btr_page_get_father_node_ptr(
552
/*=========================*/
553
/* out: rec_get_offsets() of the
554
node pointer record */
555
ulint* offsets,/* in: work area for the return value */
556
mem_heap_t* heap, /* in: memory heap to use */
557
btr_cur_t* cursor, /* in: cursor pointing to user record,
558
out: cursor on node pointer record,
559
its page x-latched */
560
mtr_t* mtr) /* in: mtr */
569
page_no = buf_block_get_page_no(btr_cur_get_block(cursor));
570
index = btr_cur_get_index(cursor);
572
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
575
ut_ad(dict_index_get_page(index) != page_no);
577
level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
578
user_rec = btr_cur_get_rec(cursor);
579
ut_a(page_rec_is_user_rec(user_rec));
580
tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
582
btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
583
BTR_CONT_MODIFY_TREE, cursor, 0, mtr);
585
node_ptr = btr_cur_get_rec(cursor);
586
ut_ad(!page_rec_is_comp(node_ptr)
587
|| rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
588
offsets = rec_get_offsets(node_ptr, index, offsets,
589
ULINT_UNDEFINED, &heap);
591
if (UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr, offsets)
594
fputs("InnoDB: Dump of the child page:\n", stderr);
595
buf_page_print(page_align(user_rec), 0);
596
fputs("InnoDB: Dump of the parent page:\n", stderr);
597
buf_page_print(page_align(node_ptr), 0);
599
fputs("InnoDB: Corruption of an index tree: table ", stderr);
600
ut_print_name(stderr, NULL, TRUE, index->table_name);
601
fputs(", index ", stderr);
602
ut_print_name(stderr, NULL, FALSE, index->name);
603
fprintf(stderr, ",\n"
604
"InnoDB: father ptr page no %lu, child page no %lu\n",
606
btr_node_ptr_get_child_page_no(node_ptr, offsets),
608
print_rec = page_rec_get_next(
609
page_get_infimum_rec(page_align(user_rec)));
610
offsets = rec_get_offsets(print_rec, index,
611
offsets, ULINT_UNDEFINED, &heap);
612
page_rec_print(print_rec, offsets);
613
offsets = rec_get_offsets(node_ptr, index, offsets,
614
ULINT_UNDEFINED, &heap);
615
page_rec_print(node_ptr, offsets);
617
fputs("InnoDB: You should dump + drop + reimport the table"
619
"InnoDB: corruption. If the crash happens at "
620
"the database startup, see\n"
621
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
622
"forcing-recovery.html about\n"
623
"InnoDB: forcing recovery. "
624
"Then dump + drop + reimport.\n", stderr);
632
/****************************************************************
633
Returns the upper level node pointer to a page. It is assumed that mtr holds
634
an x-latch on the tree. */
637
btr_page_get_father_block(
638
/*======================*/
639
/* out: rec_get_offsets() of the
640
node pointer record */
641
ulint* offsets,/* in: work area for the return value */
642
mem_heap_t* heap, /* in: memory heap to use */
643
dict_index_t* index, /* in: b-tree index */
644
buf_block_t* block, /* in: child page in the index */
645
mtr_t* mtr, /* in: mtr */
646
btr_cur_t* cursor) /* out: cursor on node pointer record,
647
its page x-latched */
650
= page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
652
btr_cur_position(index, rec, block, cursor);
653
return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
656
/****************************************************************
657
Seeks to the upper level node pointer to a page.
658
It is assumed that mtr holds an x-latch on the tree. */
663
dict_index_t* index, /* in: b-tree index */
664
buf_block_t* block, /* in: child page in the index */
665
mtr_t* mtr, /* in: mtr */
666
btr_cur_t* cursor) /* out: cursor on node pointer record,
667
its page x-latched */
671
= page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
673
btr_cur_position(index, rec, block, cursor);
675
heap = mem_heap_create(100);
676
btr_page_get_father_node_ptr(NULL, heap, cursor, mtr);
680
/****************************************************************
681
Creates the root node for a new index tree. */
686
/* out: page number of the created root,
687
FIL_NULL if did not succeed */
688
ulint type, /* in: type of the index */
689
ulint space, /* in: space where created */
690
ulint zip_size,/* in: compressed page size in bytes
691
or 0 for uncompressed pages */
692
dulint index_id,/* in: index id */
693
dict_index_t* index, /* in: index */
694
mtr_t* mtr) /* in: mini-transaction handle */
700
page_zip_des_t* page_zip;
702
/* Create the two new segments (one, in the case of an ibuf tree) for
703
the index tree; the segment headers are put on the allocated root page
704
(for an ibuf tree, not in the root, but on a separate ibuf header
707
if (type & DICT_IBUF) {
708
/* Allocate first the ibuf header page */
709
buf_block_t* ibuf_hdr_block = fseg_create(
711
IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
713
#ifdef UNIV_SYNC_DEBUG
714
buf_block_dbg_add_level(ibuf_hdr_block, SYNC_TREE_NODE_NEW);
715
#endif /* UNIV_SYNC_DEBUG */
716
ut_ad(buf_block_get_page_no(ibuf_hdr_block)
717
== IBUF_HEADER_PAGE_NO);
718
/* Allocate then the next page to the segment: it will be the
721
page_no = fseg_alloc_free_page(buf_block_get_frame(
724
+ IBUF_TREE_SEG_HEADER,
725
IBUF_TREE_ROOT_PAGE_NO,
727
ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
729
block = buf_page_get(space, zip_size, page_no,
732
block = fseg_create(space, 0,
733
PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
741
page_no = buf_block_get_page_no(block);
742
frame = buf_block_get_frame(block);
744
#ifdef UNIV_SYNC_DEBUG
745
buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
746
#endif /* UNIV_SYNC_DEBUG */
748
if (type & DICT_IBUF) {
749
/* It is an insert buffer tree: initialize the free list */
751
ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
753
flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr);
755
/* It is a non-ibuf tree: create a file segment for leaf
757
fseg_create(space, page_no,
758
PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr);
759
/* The fseg create acquires a second latch on the page,
760
therefore we must declare it: */
761
#ifdef UNIV_SYNC_DEBUG
762
buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
763
#endif /* UNIV_SYNC_DEBUG */
766
/* Create a new index page on the the allocated segment page */
767
page_zip = buf_block_get_page_zip(block);
769
if (UNIV_LIKELY_NULL(page_zip)) {
770
page = page_create_zip(block, index, 0, mtr);
772
page = page_create(block, mtr,
773
dict_table_is_comp(index->table));
774
/* Set the level of the new index page */
775
btr_page_set_level(page, NULL, 0, mtr);
778
block->check_index_page_at_flush = TRUE;
780
/* Set the index id of the page */
781
btr_page_set_index_id(page, page_zip, index_id, mtr);
783
/* Set the next node and previous node fields */
784
btr_page_set_next(page, page_zip, FIL_NULL, mtr);
785
btr_page_set_prev(page, page_zip, FIL_NULL, mtr);
787
/* We reset the free bits for the page to allow creation of several
788
trees in the same mtr, otherwise the latch on a bitmap page would
789
prevent it because of the latching order */
791
if (!(type & DICT_CLUSTERED)) {
792
ibuf_reset_free_bits(block);
795
/* In the following assertion we test that two records of maximum
796
allowed size fit on the root page: this fact is needed to ensure
797
correctness of split algorithms */
799
ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE);
804
/****************************************************************
805
Frees a B-tree except the root page, which MUST be freed after this
806
by calling btr_free_root. */
809
btr_free_but_not_root(
810
/*==================*/
811
ulint space, /* in: space where created */
812
ulint zip_size, /* in: compressed page size in bytes
813
or 0 for uncompressed pages */
814
ulint root_page_no) /* in: root page number */
823
root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
825
/* NOTE: page hash indexes are dropped when a page is freed inside
828
finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF,
839
root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
841
finished = fseg_free_step_not_header(
842
root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
851
/****************************************************************
852
Frees the B-tree root page. Other tree MUST already have been freed. */
857
ulint space, /* in: space where created */
858
ulint zip_size, /* in: compressed page size in bytes
859
or 0 for uncompressed pages */
860
ulint root_page_no, /* in: root page number */
861
mtr_t* mtr) /* in: a mini-transaction which has already
865
fseg_header_t* header;
867
block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
869
btr_search_drop_page_hash_index(block);
871
header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
873
while (!fseg_free_step(header, mtr));
876
/*****************************************************************
877
Reorganizes an index page. */
880
btr_page_reorganize_low(
881
/*====================*/
882
ibool recovery,/* in: TRUE if called in recovery:
883
locks should not be updated, i.e.,
884
there cannot exist locks on the
885
page, and a hash index should not be
886
dropped: it cannot exist */
887
buf_block_t* block, /* in: page to be reorganized */
888
dict_index_t* index, /* in: record descriptor */
889
mtr_t* mtr) /* in: mtr */
891
page_t* page = buf_block_get_frame(block);
892
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
893
buf_block_t* temp_block;
900
ibool success = FALSE;
902
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
903
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
904
#ifdef UNIV_ZIP_DEBUG
905
ut_a(!page_zip || page_zip_validate(page_zip, page));
906
#endif /* UNIV_ZIP_DEBUG */
907
data_size1 = page_get_data_size(page);
908
max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
910
/* Write the log record */
911
mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
912
? MLOG_COMP_PAGE_REORGANIZE
913
: MLOG_PAGE_REORGANIZE, 0);
915
/* Turn logging off */
916
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
918
temp_block = buf_block_alloc(0);
919
temp_page = temp_block->frame;
921
/* Copy the old page to temporary space */
922
buf_frame_copy(temp_page, page);
924
if (UNIV_LIKELY(!recovery)) {
925
btr_search_drop_page_hash_index(block);
928
/* Recreate the page: note that global data on page (possible
929
segment headers, next page-field, etc.) is preserved intact */
931
page_create(block, mtr, dict_table_is_comp(index->table));
932
block->check_index_page_at_flush = TRUE;
934
/* Copy the records from the temporary space to the recreated page;
935
do not copy the lock bits yet */
937
page_copy_rec_list_end_no_locks(block, temp_block,
938
page_get_infimum_rec(temp_page),
940
/* Copy max trx id to recreated page */
941
page_set_max_trx_id(block, NULL, page_get_max_trx_id(temp_page));
943
if (UNIV_LIKELY_NULL(page_zip)
945
(!page_zip_compress(page_zip, page, index, NULL))) {
947
/* Restore the old page and exit. */
948
buf_frame_copy(page, temp_page);
953
if (UNIV_LIKELY(!recovery)) {
954
/* Update the record lock bitmaps */
955
lock_move_reorganize_page(block, temp_block);
958
data_size2 = page_get_data_size(page);
959
max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
961
if (UNIV_UNLIKELY(data_size1 != data_size2)
962
|| UNIV_UNLIKELY(max_ins_size1 != max_ins_size2)) {
963
buf_page_print(page, 0);
964
buf_page_print(temp_page, 0);
966
"InnoDB: Error: page old data size %lu"
967
" new data size %lu\n"
968
"InnoDB: Error: page old max ins size %lu"
969
" new max ins size %lu\n"
970
"InnoDB: Submit a detailed bug report"
971
" to http://bugs.mysql.com\n",
972
(unsigned long) data_size1, (unsigned long) data_size2,
973
(unsigned long) max_ins_size1,
974
(unsigned long) max_ins_size2);
980
#ifdef UNIV_ZIP_DEBUG
981
ut_a(!page_zip || page_zip_validate(page_zip, page));
982
#endif /* UNIV_ZIP_DEBUG */
983
buf_block_free(temp_block);
985
/* Restore logging mode */
986
mtr_set_log_mode(mtr, log_mode);
991
/*****************************************************************
992
Reorganizes an index page.
993
IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
994
page of a non-clustered index, the caller must update the insert
995
buffer free bits in the same mini-transaction in such a way that the
996
modification will be redo-logged. */
1000
/*================*/
1001
/* out: TRUE on success, FALSE on failure */
1002
buf_block_t* block, /* in: page to be reorganized */
1003
dict_index_t* index, /* in: record descriptor */
1004
mtr_t* mtr) /* in: mtr */
1006
return(btr_page_reorganize_low(FALSE, block, index, mtr));
1009
/***************************************************************
1010
Parses a redo log record of reorganizing a page. */
1013
btr_parse_page_reorganize(
1014
/*======================*/
1015
/* out: end of log record or NULL */
1016
byte* ptr, /* in: buffer */
1017
byte* end_ptr __attribute__((unused)),
1018
/* in: buffer end */
1019
dict_index_t* index, /* in: record descriptor */
1020
buf_block_t* block, /* in: page to be reorganized, or NULL */
1021
mtr_t* mtr) /* in: mtr or NULL */
1023
ut_ad(ptr && end_ptr);
1025
/* The record is empty, except for the record initial part */
1027
if (UNIV_LIKELY(block != NULL)) {
1028
btr_page_reorganize_low(TRUE, block, index, mtr);
1034
/*****************************************************************
1035
Empties an index page. */
1040
buf_block_t* block, /* in: page to be emptied */
1041
page_zip_des_t* page_zip,/* out: compressed page, or NULL */
1042
mtr_t* mtr, /* in: mtr */
1043
dict_index_t* index) /* in: index of the page */
1045
page_t* page = buf_block_get_frame(block);
1047
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1048
#ifdef UNIV_ZIP_DEBUG
1049
ut_a(!page_zip || page_zip_validate(page_zip, page));
1050
#endif /* UNIV_ZIP_DEBUG */
1052
btr_search_drop_page_hash_index(block);
1054
/* Recreate the page: note that global data on page (possible
1055
segment headers, next page-field, etc.) is preserved intact */
1057
if (UNIV_LIKELY_NULL(page_zip)) {
1058
page_create_zip(block, index,
1059
btr_page_get_level(page, mtr), mtr);
1061
page_create(block, mtr, dict_table_is_comp(index->table));
1064
block->check_index_page_at_flush = TRUE;
1067
/*****************************************************************
1068
Makes tree one level higher by splitting the root, and inserts
1069
the tuple. It is assumed that mtr contains an x-latch on the tree.
1070
NOTE that the operation of this function must always succeed,
1071
we cannot reverse it: therefore enough free disk space must be
1072
guaranteed to be available before this function is called. */
1075
btr_root_raise_and_insert(
1076
/*======================*/
1077
/* out: inserted record */
1078
btr_cur_t* cursor, /* in: cursor at which to insert: must be
1079
on the root page; when the function returns,
1080
the cursor is positioned on the predecessor
1081
of the inserted record */
1082
const dtuple_t* tuple, /* in: tuple to insert */
1083
ulint n_ext, /* in: number of externally stored columns */
1084
mtr_t* mtr) /* in: mtr */
1086
dict_index_t* index;
1094
rec_t* node_ptr_rec;
1095
page_cur_t* page_cursor;
1096
page_zip_des_t* root_page_zip;
1097
page_zip_des_t* new_page_zip;
1098
buf_block_t* root_block;
1099
buf_block_t* new_block;
1101
root = btr_cur_get_page(cursor);
1102
root_block = btr_cur_get_block(cursor);
1103
root_page_zip = buf_block_get_page_zip(root_block);
1104
#ifdef UNIV_ZIP_DEBUG
1105
ut_a(!root_page_zip || page_zip_validate(root_page_zip, root));
1106
#endif /* UNIV_ZIP_DEBUG */
1107
index = btr_cur_get_index(cursor);
1109
ut_ad(dict_index_get_page(index) == page_get_page_no(root));
1110
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1112
ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
1113
btr_search_drop_page_hash_index(root_block);
1115
/* Allocate a new page to the tree. Root splitting is done by first
1116
moving the root records to the new page, emptying the root, putting
1117
a node pointer to the new page, and then splitting the new page. */
1119
level = btr_page_get_level(root, mtr);
1121
new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr);
1122
new_page = buf_block_get_frame(new_block);
1123
new_page_zip = buf_block_get_page_zip(new_block);
1124
ut_a(!new_page_zip == !root_page_zip);
1126
|| page_zip_get_size(new_page_zip)
1127
== page_zip_get_size(root_page_zip));
1129
btr_page_create(new_block, new_page_zip, index, level, mtr);
1131
/* Set the next node and previous node fields of new page */
1132
btr_page_set_next(new_page, new_page_zip, FIL_NULL, mtr);
1133
btr_page_set_prev(new_page, new_page_zip, FIL_NULL, mtr);
1135
/* Copy the records from root to the new page one by one. */
1138
(!page_copy_rec_list_end(new_block, root_block,
1139
page_get_infimum_rec(root),
1143
/* Copy the page byte for byte. */
1144
page_zip_copy(new_page_zip, new_page,
1145
root_page_zip, root, index, mtr);
1148
/* If this is a pessimistic insert which is actually done to
1149
perform a pessimistic update then we have stored the lock
1150
information of the record to be inserted on the infimum of the
1151
root page: we cannot discard the lock structs on the root page */
1153
lock_update_root_raise(new_block, root_block);
1155
/* Create a memory heap where the node pointer is stored */
1156
heap = mem_heap_create(100);
1158
rec = page_rec_get_next(page_get_infimum_rec(new_page));
1159
new_page_no = buf_block_get_page_no(new_block);
1161
/* Build the node pointer (= node key and page address) for the
1164
node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
1166
/* The node pointer must be marked as the predefined minimum record,
1167
as there is no lower alphabetical limit to records in the leftmost
1169
dtuple_set_info_bits(node_ptr,
1170
dtuple_get_info_bits(node_ptr)
1171
| REC_INFO_MIN_REC_FLAG);
1173
/* Rebuild the root page to get free space */
1174
if (UNIV_LIKELY_NULL(root_page_zip)) {
1175
page_create_zip(root_block, index, level + 1, mtr);
1177
page_create(root_block, mtr, dict_table_is_comp(index->table));
1178
btr_page_set_level(root, NULL, level + 1, mtr);
1181
/* Set the next node and previous node fields, although
1182
they should already have been set. The previous node field
1183
must be FIL_NULL if root_page_zip != NULL, because the
1184
REC_INFO_MIN_REC_FLAG (of the first user record) will be
1185
set if and only if btr_page_get_prev() == FIL_NULL. */
1186
btr_page_set_next(root, root_page_zip, FIL_NULL, mtr);
1187
btr_page_set_prev(root, root_page_zip, FIL_NULL, mtr);
1189
root_block->check_index_page_at_flush = TRUE;
1191
page_cursor = btr_cur_get_page_cur(cursor);
1193
/* Insert node pointer to the root */
1195
page_cur_set_before_first(root_block, page_cursor);
1197
node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
1200
/* The root page should only contain the node pointer
1201
to new_page at this point. Thus, the data should fit. */
1204
/* Free the memory heap */
1205
mem_heap_free(heap);
1207
/* We play safe and reset the free bits for the new page */
1210
fprintf(stderr, "Root raise new page no %lu\n", new_page_no);
1213
if (!dict_index_is_clust(index)) {
1214
ibuf_reset_free_bits(new_block);
1217
/* Reposition the cursor to the child node */
1218
page_cur_search(new_block, index, tuple,
1219
PAGE_CUR_LE, page_cursor);
1221
/* Split the child and insert tuple */
1222
return(btr_page_split_and_insert(cursor, tuple, n_ext, mtr));
1225
/*****************************************************************
1226
Decides if the page should be split at the convergence point of inserts
1227
converging to the left. */
1230
btr_page_get_split_rec_to_left(
1231
/*===========================*/
1232
/* out: TRUE if split recommended */
1233
btr_cur_t* cursor, /* in: cursor at which to insert */
1234
rec_t** split_rec) /* out: if split recommended,
1235
the first record on upper half page,
1236
or NULL if tuple to be inserted should
1240
rec_t* insert_point;
1243
page = btr_cur_get_page(cursor);
1244
insert_point = btr_cur_get_rec(cursor);
1246
if (page_header_get_ptr(page, PAGE_LAST_INSERT)
1247
== page_rec_get_next(insert_point)) {
1249
infimum = page_get_infimum_rec(page);
1251
/* If the convergence is in the middle of a page, include also
1252
the record immediately before the new insert to the upper
1253
page. Otherwise, we could repeatedly move from page to page
1254
lots of records smaller than the convergence point. */
1256
if (infimum != insert_point
1257
&& page_rec_get_next(infimum) != insert_point) {
1259
*split_rec = insert_point;
1261
*split_rec = page_rec_get_next(insert_point);
1270
/*****************************************************************
1271
Decides if the page should be split at the convergence point of inserts
1272
converging to the right. */
1275
btr_page_get_split_rec_to_right(
1276
/*============================*/
1277
/* out: TRUE if split recommended */
1278
btr_cur_t* cursor, /* in: cursor at which to insert */
1279
rec_t** split_rec) /* out: if split recommended,
1280
the first record on upper half page,
1281
or NULL if tuple to be inserted should
1285
rec_t* insert_point;
1287
page = btr_cur_get_page(cursor);
1288
insert_point = btr_cur_get_rec(cursor);
1290
/* We use eager heuristics: if the new insert would be right after
1291
the previous insert on the same page, we assume that there is a
1292
pattern of sequential inserts here. */
1294
if (UNIV_LIKELY(page_header_get_ptr(page, PAGE_LAST_INSERT)
1299
next_rec = page_rec_get_next(insert_point);
1301
if (page_rec_is_supremum(next_rec)) {
1303
/* Split at the new record to insert */
1306
rec_t* next_next_rec = page_rec_get_next(next_rec);
1307
if (page_rec_is_supremum(next_next_rec)) {
1312
/* If there are >= 2 user records up from the insert
1313
point, split all but 1 off. We want to keep one because
1314
then sequential inserts can use the adaptive hash
1315
index, as they can do the necessary checks of the right
1316
search position just by looking at the records on this
1319
*split_rec = next_next_rec;
1328
/*****************************************************************
1329
Calculates a split record such that the tuple will certainly fit on
1330
its half-page when the split is performed. We assume in this function
1331
only that the cursor page has at least one user record. */
1334
btr_page_get_sure_split_rec(
1335
/*========================*/
1336
/* out: split record, or NULL if tuple
1337
will be the first record on upper half-page */
1338
btr_cur_t* cursor, /* in: cursor at which insert should be made */
1339
const dtuple_t* tuple, /* in: tuple to insert */
1340
ulint n_ext) /* in: number of externally stored columns */
1343
page_zip_des_t* page_zip;
1357
page = btr_cur_get_page(cursor);
1359
insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
1360
free_space = page_get_free_space_of_empty(page_is_comp(page));
1362
page_zip = btr_cur_get_page_zip(cursor);
1363
if (UNIV_LIKELY_NULL(page_zip)) {
1364
/* Estimate the free space of an empty compressed page. */
1365
ulint free_space_zip = page_zip_empty_size(
1366
cursor->index->n_fields,
1367
page_zip_get_size(page_zip));
1369
if (UNIV_LIKELY(free_space > (ulint) free_space_zip)) {
1370
free_space = (ulint) free_space_zip;
1374
/* free_space is now the free space of a created new page */
1376
total_data = page_get_data_size(page) + insert_size;
1377
total_n_recs = page_get_n_recs(page) + 1;
1378
ut_ad(total_n_recs >= 2);
1379
total_space = total_data + page_dir_calc_reserved_space(total_n_recs);
1383
ins_rec = btr_cur_get_rec(cursor);
1384
rec = page_get_infimum_rec(page);
1389
/* We start to include records to the left half, and when the
1390
space reserved by them exceeds half of total_space, then if
1391
the included records fit on the left page, they will be put there
1392
if something was left over also for the right page,
1393
otherwise the last included record will be the first on the right
1397
/* Decide the next record to include */
1398
if (rec == ins_rec) {
1399
rec = NULL; /* NULL denotes that tuple is
1401
} else if (rec == NULL) {
1402
rec = page_rec_get_next(ins_rec);
1404
rec = page_rec_get_next(rec);
1409
incl_data += insert_size;
1411
offsets = rec_get_offsets(rec, cursor->index,
1412
offsets, ULINT_UNDEFINED,
1414
incl_data += rec_offs_size(offsets);
1418
} while (incl_data + page_dir_calc_reserved_space(n)
1421
if (incl_data + page_dir_calc_reserved_space(n) <= free_space) {
1422
/* The next record will be the first on
1423
the right half page if it is not the
1424
supremum record of page */
1426
if (rec == ins_rec) {
1430
} else if (rec == NULL) {
1431
next_rec = page_rec_get_next(ins_rec);
1433
next_rec = page_rec_get_next(rec);
1436
if (!page_rec_is_supremum(next_rec)) {
1442
if (UNIV_LIKELY_NULL(heap)) {
1443
mem_heap_free(heap);
1448
/*****************************************************************
1449
Returns TRUE if the insert fits on the appropriate half-page with the
1450
chosen split_rec. */
1453
btr_page_insert_fits(
1454
/*=================*/
1455
/* out: TRUE if fits */
1456
btr_cur_t* cursor, /* in: cursor at which insert
1458
const rec_t* split_rec,/* in: suggestion for first record
1459
on upper half-page, or NULL if
1460
tuple to be inserted should be first */
1461
const ulint* offsets,/* in: rec_get_offsets(
1462
split_rec, cursor->index) */
1463
const dtuple_t* tuple, /* in: tuple to insert */
1464
ulint n_ext, /* in: number of externally stored columns */
1465
mem_heap_t* heap) /* in: temporary memory heap */
1473
const rec_t* end_rec;
1476
page = btr_cur_get_page(cursor);
1478
ut_ad(!split_rec == !offsets);
1480
|| !page_is_comp(page) == !rec_offs_comp(offsets));
1482
|| rec_offs_validate(split_rec, cursor->index, offsets));
1484
insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
1485
free_space = page_get_free_space_of_empty(page_is_comp(page));
1487
/* free_space is now the free space of a created new page */
1489
total_data = page_get_data_size(page) + insert_size;
1490
total_n_recs = page_get_n_recs(page) + 1;
1492
/* We determine which records (from rec to end_rec, not including
1493
end_rec) will end up on the other half page from tuple when it is
1496
if (split_rec == NULL) {
1497
rec = page_rec_get_next(page_get_infimum_rec(page));
1498
end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
1500
} else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) {
1502
rec = page_rec_get_next(page_get_infimum_rec(page));
1503
end_rec = split_rec;
1506
end_rec = page_get_supremum_rec(page);
1509
if (total_data + page_dir_calc_reserved_space(total_n_recs)
1512
/* Ok, there will be enough available space on the
1513
half page where the tuple is inserted */
1520
while (rec != end_rec) {
1521
/* In this loop we calculate the amount of reserved
1522
space after rec is removed from page. */
1524
offs = rec_get_offsets(rec, cursor->index, offs,
1525
ULINT_UNDEFINED, &heap);
1527
total_data -= rec_offs_size(offs);
1530
if (total_data + page_dir_calc_reserved_space(total_n_recs)
1533
/* Ok, there will be enough available space on the
1534
half page where the tuple is inserted */
1539
rec = page_rec_get_next_const(rec);
1545
/***********************************************************
1546
Inserts a data tuple to a tree on a non-leaf level. It is assumed
1547
that mtr holds an x-latch on the tree. */
1550
btr_insert_on_non_leaf_level(
1551
/*=========================*/
1552
dict_index_t* index, /* in: index */
1553
ulint level, /* in: level, must be > 0 */
1554
dtuple_t* tuple, /* in: the record to be inserted */
1555
mtr_t* mtr) /* in: mtr */
1557
big_rec_t* dummy_big_rec;
1564
btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
1565
BTR_CONT_MODIFY_TREE,
1568
err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
1570
| BTR_NO_UNDO_LOG_FLAG,
1571
&cursor, tuple, &rec,
1572
&dummy_big_rec, 0, NULL, mtr);
1573
ut_a(err == DB_SUCCESS);
1576
/******************************************************************
1577
Attaches the halves of an index page on the appropriate level in an
1581
btr_attach_half_pages(
1582
/*==================*/
1583
dict_index_t* index, /* in: the index tree */
1584
buf_block_t* block, /* in/out: page to be split */
1585
rec_t* split_rec, /* in: first record on upper
1587
buf_block_t* new_block, /* in/out: the new half page */
1588
ulint direction, /* in: FSP_UP or FSP_DOWN */
1589
mtr_t* mtr) /* in: mtr */
1596
page_t* page = buf_block_get_frame(block);
1599
ulint lower_page_no;
1600
ulint upper_page_no;
1601
page_zip_des_t* lower_page_zip;
1602
page_zip_des_t* upper_page_zip;
1603
dtuple_t* node_ptr_upper;
1606
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1607
ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX));
1609
/* Create a memory heap where the data tuple is stored */
1610
heap = mem_heap_create(1024);
1612
/* Based on split direction, decide upper and lower pages */
1613
if (direction == FSP_DOWN) {
1618
lower_page = buf_block_get_frame(new_block);
1619
lower_page_no = buf_block_get_page_no(new_block);
1620
lower_page_zip = buf_block_get_page_zip(new_block);
1621
upper_page = buf_block_get_frame(block);
1622
upper_page_no = buf_block_get_page_no(block);
1623
upper_page_zip = buf_block_get_page_zip(block);
1625
/* Look up the index for the node pointer to page */
1626
offsets = btr_page_get_father_block(NULL, heap, index,
1627
block, mtr, &cursor);
1629
/* Replace the address of the old child node (= page) with the
1630
address of the new lower half */
1632
btr_node_ptr_set_child_page_no(
1633
btr_cur_get_rec(&cursor),
1634
btr_cur_get_page_zip(&cursor),
1635
offsets, lower_page_no, mtr);
1636
mem_heap_empty(heap);
1638
lower_page = buf_block_get_frame(block);
1639
lower_page_no = buf_block_get_page_no(block);
1640
lower_page_zip = buf_block_get_page_zip(block);
1641
upper_page = buf_block_get_frame(new_block);
1642
upper_page_no = buf_block_get_page_no(new_block);
1643
upper_page_zip = buf_block_get_page_zip(new_block);
1646
/* Get the level of the split pages */
1647
level = btr_page_get_level(buf_block_get_frame(block), mtr);
1649
/* Build the node pointer (= node key and page address) for the upper
1652
node_ptr_upper = dict_index_build_node_ptr(index, split_rec,
1653
upper_page_no, heap, level);
1655
/* Insert it next to the pointer to the lower half. Note that this
1656
may generate recursion leading to a split on the higher level. */
1658
btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr);
1660
/* Free the memory heap */
1661
mem_heap_free(heap);
1663
/* Get the previous and next pages of page */
1665
prev_page_no = btr_page_get_prev(page, mtr);
1666
next_page_no = btr_page_get_next(page, mtr);
1667
space = buf_block_get_space(block);
1668
zip_size = buf_block_get_zip_size(block);
1670
/* Update page links of the level */
1672
if (prev_page_no != FIL_NULL) {
1673
buf_block_t* prev_block = btr_block_get(space, zip_size,
1676
#ifdef UNIV_BTR_DEBUG
1677
ut_a(page_is_comp(prev_block->frame) == page_is_comp(page));
1678
ut_a(btr_page_get_next(prev_block->frame, mtr)
1679
== buf_block_get_page_no(block));
1680
#endif /* UNIV_BTR_DEBUG */
1682
btr_page_set_next(buf_block_get_frame(prev_block),
1683
buf_block_get_page_zip(prev_block),
1684
lower_page_no, mtr);
1687
if (next_page_no != FIL_NULL) {
1688
buf_block_t* next_block = btr_block_get(space, zip_size,
1691
#ifdef UNIV_BTR_DEBUG
1692
ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
1693
ut_a(btr_page_get_prev(next_block->frame, mtr)
1694
== page_get_page_no(page));
1695
#endif /* UNIV_BTR_DEBUG */
1697
btr_page_set_prev(buf_block_get_frame(next_block),
1698
buf_block_get_page_zip(next_block),
1699
upper_page_no, mtr);
1702
btr_page_set_prev(lower_page, lower_page_zip, prev_page_no, mtr);
1703
btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
1704
btr_page_set_level(lower_page, lower_page_zip, level, mtr);
1706
btr_page_set_prev(upper_page, upper_page_zip, lower_page_no, mtr);
1707
btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
1708
btr_page_set_level(upper_page, upper_page_zip, level, mtr);
1711
/*****************************************************************
1712
Splits an index page to halves and inserts the tuple. It is assumed
1713
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
1714
is released within this function! NOTE that the operation of this
1715
function must always succeed, we cannot reverse it: therefore
1716
enough free disk space must be guaranteed to be available before
1717
this function is called. */
1720
btr_page_split_and_insert(
1721
/*======================*/
1722
/* out: inserted record; NOTE: the tree
1723
x-latch is released! NOTE: 2 free disk
1724
pages must be available! */
1725
btr_cur_t* cursor, /* in: cursor at which to insert; when the
1726
function returns, the cursor is positioned
1727
on the predecessor of the inserted record */
1728
const dtuple_t* tuple, /* in: tuple to insert */
1729
ulint n_ext, /* in: number of externally stored columns */
1730
mtr_t* mtr) /* in: mtr */
1734
page_zip_des_t* page_zip;
1738
buf_block_t* new_block;
1740
page_zip_des_t* new_page_zip;
1742
buf_block_t* left_block;
1743
buf_block_t* right_block;
1744
buf_block_t* insert_block;
1745
page_t* insert_page;
1746
page_cur_t* page_cursor;
1748
byte* buf = 0; /* remove warning */
1750
ibool insert_will_fit;
1752
ulint n_iterations = 0;
1758
heap = mem_heap_create(1024);
1759
n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
1761
mem_heap_empty(heap);
1764
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
1766
#ifdef UNIV_SYNC_DEBUG
1767
ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
1768
#endif /* UNIV_SYNC_DEBUG */
1770
block = btr_cur_get_block(cursor);
1771
page = buf_block_get_frame(block);
1772
page_zip = buf_block_get_page_zip(block);
1774
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1775
ut_ad(page_get_n_recs(page) >= 1);
1777
page_no = buf_block_get_page_no(block);
1779
/* 1. Decide the split record; split_rec == NULL means that the
1780
tuple to be inserted should be the first record on the upper
1783
if (n_iterations > 0) {
1785
hint_page_no = page_no + 1;
1786
split_rec = btr_page_get_sure_split_rec(cursor, tuple, n_ext);
1788
} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
1790
hint_page_no = page_no + 1;
1792
} else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
1793
direction = FSP_DOWN;
1794
hint_page_no = page_no - 1;
1797
hint_page_no = page_no + 1;
1798
split_rec = page_get_middle_rec(page);
1801
/* 2. Allocate a new page to the index */
1802
new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
1803
btr_page_get_level(page, mtr), mtr);
1804
new_page = buf_block_get_frame(new_block);
1805
new_page_zip = buf_block_get_page_zip(new_block);
1806
btr_page_create(new_block, new_page_zip, cursor->index,
1807
btr_page_get_level(page, mtr), mtr);
1809
/* 3. Calculate the first record on the upper half-page, and the
1810
first record (move_limit) on original page which ends up on the
1814
first_rec = move_limit = split_rec;
1816
offsets = rec_get_offsets(split_rec, cursor->index, offsets,
1819
insert_left = cmp_dtuple_rec(tuple, split_rec, offsets) < 0;
1821
if (UNIV_UNLIKELY(!insert_left && new_page_zip
1822
&& n_iterations > 0)) {
1823
/* If a compressed page has already been split,
1824
avoid further splits by inserting the record
1825
to an empty page. */
1831
insert_left = FALSE;
1832
buf = mem_alloc(rec_get_converted_size(cursor->index,
1835
first_rec = rec_convert_dtuple_to_rec(buf, cursor->index,
1837
move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
1840
/* 4. Do first the modifications in the tree structure */
1842
btr_attach_half_pages(cursor->index, block,
1843
first_rec, new_block, direction, mtr);
1845
/* If the split is made on the leaf level and the insert will fit
1846
on the appropriate half-page, we may release the tree x-latch.
1847
We can then move the records after releasing the tree latch,
1848
thus reducing the tree latch contention. */
1851
insert_will_fit = !new_page_zip
1852
&& btr_page_insert_fits(cursor, split_rec,
1853
offsets, tuple, n_ext, heap);
1856
insert_will_fit = !new_page_zip
1857
&& btr_page_insert_fits(cursor, NULL,
1858
NULL, tuple, n_ext, heap);
1861
if (insert_will_fit && page_is_leaf(page)) {
1863
mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
1867
/* 5. Move then the records to the new page */
1868
if (direction == FSP_DOWN) {
1869
/* fputs("Split left\n", stderr); */
1872
(!page_move_rec_list_start(new_block, block, move_limit,
1873
cursor->index, mtr))) {
1874
/* For some reason, compressing new_page failed,
1875
even though it should contain fewer records than
1876
the original page. Copy the page byte for byte
1877
and then delete the records from both pages
1878
as appropriate. Deleting will always succeed. */
1881
page_zip_copy(new_page_zip, new_page,
1882
page_zip, page, cursor->index, mtr);
1883
page_delete_rec_list_end(move_limit - page + new_page,
1884
new_block, cursor->index,
1886
ULINT_UNDEFINED, mtr);
1887
page_delete_rec_list_start(move_limit, block,
1888
cursor->index, mtr);
1891
left_block = new_block;
1892
right_block = block;
1894
lock_update_split_left(right_block, left_block);
1896
/* fputs("Split right\n", stderr); */
1899
(!page_move_rec_list_end(new_block, block, move_limit,
1900
cursor->index, mtr))) {
1901
/* For some reason, compressing new_page failed,
1902
even though it should contain fewer records than
1903
the original page. Copy the page byte for byte
1904
and then delete the records from both pages
1905
as appropriate. Deleting will always succeed. */
1908
page_zip_copy(new_page_zip, new_page,
1909
page_zip, page, cursor->index, mtr);
1910
page_delete_rec_list_start(move_limit - page
1911
+ new_page, new_block,
1912
cursor->index, mtr);
1913
page_delete_rec_list_end(move_limit, block,
1916
ULINT_UNDEFINED, mtr);
1920
right_block = new_block;
1922
lock_update_split_right(right_block, left_block);
1925
#ifdef UNIV_ZIP_DEBUG
1926
if (UNIV_LIKELY_NULL(page_zip)) {
1927
ut_a(page_zip_validate(page_zip, page));
1928
ut_a(page_zip_validate(new_page_zip, new_page));
1930
#endif /* UNIV_ZIP_DEBUG */
1932
/* At this point, split_rec, move_limit and first_rec may point
1933
to garbage on the old page. */
1935
/* 6. The split and the tree modification is now completed. Decide the
1936
page where the tuple should be inserted */
1939
insert_block = left_block;
1941
insert_block = right_block;
1944
insert_page = buf_block_get_frame(insert_block);
1946
/* 7. Reposition the cursor for insert and try insertion */
1947
page_cursor = btr_cur_get_page_cur(cursor);
1949
page_cur_search(insert_block, cursor->index, tuple,
1950
PAGE_CUR_LE, page_cursor);
1952
rec = page_cur_tuple_insert(page_cursor, tuple,
1953
cursor->index, n_ext, mtr);
1955
#ifdef UNIV_ZIP_DEBUG
1957
page_zip_des_t* insert_page_zip
1958
= buf_block_get_page_zip(insert_block);
1959
ut_a(!insert_page_zip
1960
|| page_zip_validate(insert_page_zip, insert_page));
1962
#endif /* UNIV_ZIP_DEBUG */
1964
if (UNIV_LIKELY(rec != NULL)) {
1969
/* 8. If insert did not fit, try page reorganization */
1972
(!btr_page_reorganize(insert_block, cursor->index, mtr))) {
1977
page_cur_search(insert_block, cursor->index, tuple,
1978
PAGE_CUR_LE, page_cursor);
1979
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
1982
if (UNIV_UNLIKELY(rec == NULL)) {
1983
/* The insert did not fit on the page: loop back to the
1984
start of the function for a new split */
1986
/* We play safe and reset the free bits for new_page */
1987
if (!dict_index_is_clust(cursor->index)) {
1988
ibuf_reset_free_bits(new_block);
1991
/* fprintf(stderr, "Split second round %lu\n",
1992
page_get_page_no(page)); */
1994
ut_ad(n_iterations < 2
1995
|| buf_block_get_page_zip(insert_block));
1996
ut_ad(!insert_will_fit);
2002
/* Insert fit on the page: update the free bits for the
2003
left and right pages in the same mtr */
2005
if (!dict_index_is_clust(cursor->index) && page_is_leaf(page)) {
2006
ibuf_update_free_bits_for_two_pages_low(
2007
buf_block_get_zip_size(left_block),
2008
left_block, right_block, mtr);
2012
fprintf(stderr, "Split and insert done %lu %lu\n",
2013
buf_block_get_page_no(left_block),
2014
buf_block_get_page_no(right_block));
2017
ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
2018
ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
2020
mem_heap_free(heap);
2024
/*****************************************************************
2025
Removes a page from the level list of pages. */
2028
btr_level_list_remove(
2029
/*==================*/
2030
ulint space, /* in: space where removed */
2031
ulint zip_size,/* in: compressed page size in bytes
2032
or 0 for uncompressed pages */
2033
page_t* page, /* in: page to remove */
2034
mtr_t* mtr) /* in: mtr */
2040
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
2041
ut_ad(space == page_get_space_id(page));
2042
/* Get the previous and next page numbers of page */
2044
prev_page_no = btr_page_get_prev(page, mtr);
2045
next_page_no = btr_page_get_next(page, mtr);
2047
/* Update page links of the level */
2049
if (prev_page_no != FIL_NULL) {
2050
buf_block_t* prev_block
2051
= btr_block_get(space, zip_size, prev_page_no,
2054
= buf_block_get_frame(prev_block);
2055
#ifdef UNIV_BTR_DEBUG
2056
ut_a(page_is_comp(prev_page) == page_is_comp(page));
2057
ut_a(btr_page_get_next(prev_page, mtr)
2058
== page_get_page_no(page));
2059
#endif /* UNIV_BTR_DEBUG */
2061
btr_page_set_next(prev_page,
2062
buf_block_get_page_zip(prev_block),
2066
if (next_page_no != FIL_NULL) {
2067
buf_block_t* next_block
2068
= btr_block_get(space, zip_size, next_page_no,
2071
= buf_block_get_frame(next_block);
2072
#ifdef UNIV_BTR_DEBUG
2073
ut_a(page_is_comp(next_page) == page_is_comp(page));
2074
ut_a(btr_page_get_prev(next_page, mtr)
2075
== page_get_page_no(page));
2076
#endif /* UNIV_BTR_DEBUG */
2078
btr_page_set_prev(next_page,
2079
buf_block_get_page_zip(next_block),
2084
/********************************************************************
2085
Writes the redo log record for setting an index record as the predefined
2089
btr_set_min_rec_mark_log(
2090
/*=====================*/
2091
rec_t* rec, /* in: record */
2092
byte type, /* in: MLOG_COMP_REC_MIN_MARK or MLOG_REC_MIN_MARK */
2093
mtr_t* mtr) /* in: mtr */
2095
mlog_write_initial_log_record(rec, type, mtr);
2097
/* Write rec offset as a 2-byte ulint */
2098
mlog_catenate_ulint(mtr, page_offset(rec), MLOG_2BYTES);
2101
/********************************************************************
2102
Parses the redo log record for setting an index record as the predefined
2106
btr_parse_set_min_rec_mark(
2107
/*=======================*/
2108
/* out: end of log record or NULL */
2109
byte* ptr, /* in: buffer */
2110
byte* end_ptr,/* in: buffer end */
2111
ulint comp, /* in: nonzero=compact page format */
2112
page_t* page, /* in: page or NULL */
2113
mtr_t* mtr) /* in: mtr or NULL */
2117
if (end_ptr < ptr + 2) {
2123
ut_a(!page_is_comp(page) == !comp);
2125
rec = page + mach_read_from_2(ptr);
2127
btr_set_min_rec_mark(rec, mtr);
2133
/********************************************************************
2134
Sets a record as the predefined minimum record. */
2137
btr_set_min_rec_mark(
2138
/*=================*/
2139
rec_t* rec, /* in: record */
2140
mtr_t* mtr) /* in: mtr */
2144
if (UNIV_LIKELY(page_rec_is_comp(rec))) {
2145
info_bits = rec_get_info_bits(rec, TRUE);
2147
rec_set_info_bits_new(rec, info_bits | REC_INFO_MIN_REC_FLAG);
2149
btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr);
2151
info_bits = rec_get_info_bits(rec, FALSE);
2153
rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG);
2155
btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr);
2159
/*****************************************************************
2160
Deletes on the upper level the node pointer to a page. */
2163
btr_node_ptr_delete(
2164
/*================*/
2165
dict_index_t* index, /* in: index tree */
2166
buf_block_t* block, /* in: page whose node pointer is deleted */
2167
mtr_t* mtr) /* in: mtr */
2173
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2175
/* Delete node pointer on father page */
2176
btr_page_get_father(index, block, mtr, &cursor);
2178
compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, FALSE,
2180
ut_a(err == DB_SUCCESS);
2183
btr_cur_compress_if_useful(&cursor, mtr);
2187
/*****************************************************************
2188
If page is the only on its level, this function moves its records to the
2189
father page, thus reducing the tree height. */
2194
dict_index_t* index, /* in: index tree */
2195
buf_block_t* block, /* in: page which is the only on its level;
2196
must not be empty: use
2197
btr_discard_only_page_on_level if the last
2198
record from the page should be removed */
2199
mtr_t* mtr) /* in: mtr */
2201
buf_block_t* father_block;
2202
page_t* father_page;
2204
page_zip_des_t* father_page_zip;
2205
page_t* page = buf_block_get_frame(block);
2207
buf_block_t* blocks[BTR_MAX_LEVELS];
2208
ulint n_blocks; /* last used index in blocks[] */
2211
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
2212
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
2213
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2215
page_level = btr_page_get_level(page, mtr);
2216
root_page_no = dict_index_get_page(index);
2220
mem_heap_t* heap = mem_heap_create(100);
2224
offsets = btr_page_get_father_block(NULL, heap, index,
2225
block, mtr, &cursor);
2226
father_block = btr_cur_get_block(&cursor);
2227
father_page_zip = buf_block_get_page_zip(father_block);
2228
father_page = buf_block_get_frame(father_block);
2232
/* Store all ancestor pages so we can reset their
2233
levels later on. We have to do all the searches on
2234
the tree now because later on, after we've replaced
2235
the first level, the tree is in an inconsistent state
2236
and can not be searched. */
2237
for (b = father_block;
2238
buf_block_get_page_no(b) != root_page_no; ) {
2239
ut_a(n_blocks < BTR_MAX_LEVELS);
2241
offsets = btr_page_get_father_block(offsets, heap,
2245
blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
2248
mem_heap_free(heap);
2251
btr_search_drop_page_hash_index(block);
2253
/* Make the father empty */
2254
btr_page_empty(father_block, father_page_zip, mtr, index);
2255
/* Set the level before inserting records, because
2256
page_zip_compress() requires that the first user record
2257
on a non-leaf page has the min_rec_mark set. */
2258
btr_page_set_level(father_page, father_page_zip, page_level, mtr);
2260
/* Copy the records to the father page one by one. */
2262
(!page_copy_rec_list_end(father_block, block,
2263
page_get_infimum_rec(page),
2265
const page_zip_des_t* page_zip
2266
= buf_block_get_page_zip(block);
2267
ut_a(father_page_zip);
2270
/* Copy the page byte for byte. */
2271
page_zip_copy(father_page_zip, father_page,
2272
page_zip, page, index, mtr);
2275
lock_update_copy_and_discard(father_block, block);
2277
/* Go upward to root page, decrementing levels by one. */
2278
for (i = 0; i < n_blocks; i++, page_level++) {
2279
page_t* page = buf_block_get_frame(blocks[i]);
2281
ut_ad(btr_page_get_level(page, mtr) == page_level + 1);
2283
btr_page_set_level(page, buf_block_get_page_zip(blocks[i]),
2287
/* Free the file page */
2288
btr_page_free(index, block, mtr);
2290
/* We play safe and reset the free bits for the father */
2291
if (!dict_index_is_clust(index)) {
2292
ibuf_reset_free_bits(father_block);
2294
ut_ad(page_validate(father_page, index));
2295
ut_ad(btr_check_node_ptr(index, father_block, mtr));
2298
/*****************************************************************
2299
Tries to merge the page first to the left immediate brother if such a
2300
brother exists, and the node pointers to the current page and to the brother
2301
reside on the same page. If the left brother does not satisfy these
2302
conditions, looks at the right brother. If the page is the only one on that
2303
level lifts the records of the page to the father page, thus reducing the
2304
tree height. It is assumed that mtr holds an x-latch on the tree and on the
2305
page. If cursor is on the leaf level, mtr must also hold x-latches to the
2306
brothers, if they exist. */
2311
/* out: TRUE on success */
2312
btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
2313
the page must not be empty: in record delete
2314
use btr_discard_page if the page would become
2316
mtr_t* mtr) /* in: mtr */
2318
dict_index_t* index;
2322
ulint right_page_no;
2323
buf_block_t* merge_block;
2325
page_zip_des_t* merge_page_zip;
2329
btr_cur_t father_cursor;
2335
ulint max_ins_size_reorg;
2338
block = btr_cur_get_block(cursor);
2339
page = btr_cur_get_page(cursor);
2340
index = btr_cur_get_index(cursor);
2341
ut_a((ibool) !!page_is_comp(page) == dict_table_is_comp(index->table));
2343
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
2345
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2346
level = btr_page_get_level(page, mtr);
2347
space = dict_index_get_space(index);
2348
zip_size = dict_table_zip_size(index->table);
2350
left_page_no = btr_page_get_prev(page, mtr);
2351
right_page_no = btr_page_get_next(page, mtr);
2354
fprintf(stderr, "Merge left page %lu right %lu \n",
2355
left_page_no, right_page_no);
2358
heap = mem_heap_create(100);
2359
offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
2362
/* Decide the page to which we try to merge and which will inherit
2365
is_left = left_page_no != FIL_NULL;
2369
merge_block = btr_block_get(space, zip_size, left_page_no,
2371
merge_page = buf_block_get_frame(merge_block);
2372
#ifdef UNIV_BTR_DEBUG
2373
ut_a(btr_page_get_next(merge_page, mtr)
2374
== buf_block_get_page_no(block));
2375
#endif /* UNIV_BTR_DEBUG */
2376
} else if (right_page_no != FIL_NULL) {
2378
merge_block = btr_block_get(space, zip_size, right_page_no,
2380
merge_page = buf_block_get_frame(merge_block);
2381
#ifdef UNIV_BTR_DEBUG
2382
ut_a(btr_page_get_prev(merge_page, mtr)
2383
== buf_block_get_page_no(block));
2384
#endif /* UNIV_BTR_DEBUG */
2386
/* The page is the only one on the level, lift the records
2388
btr_lift_page_up(index, block, mtr);
2389
mem_heap_free(heap);
2393
n_recs = page_get_n_recs(page);
2394
data_size = page_get_data_size(page);
2395
#ifdef UNIV_BTR_DEBUG
2396
ut_a(page_is_comp(merge_page) == page_is_comp(page));
2397
#endif /* UNIV_BTR_DEBUG */
2399
max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
2400
merge_page, n_recs);
2401
if (data_size > max_ins_size_reorg) {
2403
/* No space for merge */
2405
/* We play it safe and reset the free bits. */
2407
&& page_is_leaf(merge_page)
2408
&& !dict_index_is_clust(index)) {
2409
ibuf_reset_free_bits(merge_block);
2412
mem_heap_free(heap);
2416
ut_ad(page_validate(merge_page, index));
2418
max_ins_size = page_get_max_insert_size(merge_page, n_recs);
2420
if (UNIV_UNLIKELY(data_size > max_ins_size)) {
2422
/* We have to reorganize merge_page */
2424
if (UNIV_UNLIKELY(!btr_page_reorganize(merge_block,
2430
max_ins_size = page_get_max_insert_size(merge_page, n_recs);
2432
ut_ad(page_validate(merge_page, index));
2433
ut_ad(max_ins_size == max_ins_size_reorg);
2435
if (UNIV_UNLIKELY(data_size > max_ins_size)) {
2437
/* Add fault tolerance, though this should
2444
merge_page_zip = buf_block_get_page_zip(merge_block);
2445
#ifdef UNIV_ZIP_DEBUG
2446
if (UNIV_LIKELY_NULL(merge_page_zip)) {
2447
const page_zip_des_t* page_zip
2448
= buf_block_get_page_zip(block);
2450
ut_a(page_zip_validate(merge_page_zip, merge_page));
2451
ut_a(page_zip_validate(page_zip, page));
2453
#endif /* UNIV_ZIP_DEBUG */
2455
/* Move records to the merge page */
2457
rec_t* orig_pred = page_copy_rec_list_start(
2458
merge_block, block, page_get_supremum_rec(page),
2461
if (UNIV_UNLIKELY(!orig_pred)) {
2465
btr_search_drop_page_hash_index(block);
2467
/* Remove the page from the level list */
2468
btr_level_list_remove(space, zip_size, page, mtr);
2470
btr_node_ptr_delete(index, block, mtr);
2471
lock_update_merge_left(merge_block, orig_pred, block);
2474
#ifdef UNIV_BTR_DEBUG
2475
byte fil_page_prev[4];
2476
#endif /* UNIV_BTR_DEBUG */
2478
if (UNIV_LIKELY_NULL(merge_page_zip)) {
2479
/* The function page_zip_compress(), which will be
2480
invoked by page_copy_rec_list_end() below,
2481
requires that FIL_PAGE_PREV be FIL_NULL.
2482
Clear the field, but prepare to restore it. */
2483
#ifdef UNIV_BTR_DEBUG
2484
memcpy(fil_page_prev, merge_page + FIL_PAGE_PREV, 4);
2485
#endif /* UNIV_BTR_DEBUG */
2486
#if FIL_NULL != 0xffffffff
2487
# error "FIL_NULL != 0xffffffff"
2489
memset(merge_page + FIL_PAGE_PREV, 0xff, 4);
2492
orig_succ = page_copy_rec_list_end(merge_block, block,
2493
page_get_infimum_rec(page),
2494
cursor->index, mtr);
2496
if (UNIV_UNLIKELY(!orig_succ)) {
2497
ut_a(merge_page_zip);
2498
#ifdef UNIV_BTR_DEBUG
2499
/* FIL_PAGE_PREV was restored from merge_page_zip. */
2500
ut_a(!memcmp(fil_page_prev,
2501
merge_page + FIL_PAGE_PREV, 4));
2502
#endif /* UNIV_BTR_DEBUG */
2506
btr_search_drop_page_hash_index(block);
2508
#ifdef UNIV_BTR_DEBUG
2509
if (UNIV_LIKELY_NULL(merge_page_zip)) {
2510
/* Restore FIL_PAGE_PREV in order to avoid an assertion
2511
failure in btr_level_list_remove(), which will set
2512
the field again to FIL_NULL. Even though this makes
2513
merge_page and merge_page_zip inconsistent for a
2514
split second, it is harmless, because the pages
2516
memcpy(merge_page + FIL_PAGE_PREV, fil_page_prev, 4);
2518
#endif /* UNIV_BTR_DEBUG */
2520
/* Remove the page from the level list */
2521
btr_level_list_remove(space, zip_size, page, mtr);
2523
/* Replace the address of the old child node (= page) with the
2524
address of the merge page to the right */
2526
btr_node_ptr_set_child_page_no(
2527
btr_cur_get_rec(&father_cursor),
2528
btr_cur_get_page_zip(&father_cursor),
2529
offsets, right_page_no, mtr);
2530
btr_node_ptr_delete(index, merge_block, mtr);
2532
lock_update_merge_right(merge_block, orig_succ, block);
2535
mem_heap_free(heap);
2537
if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
2538
/* Update the free bits of the B-tree page in the
2539
insert buffer bitmap. This has to be done in a
2540
separate mini-transaction that is committed before the
2541
main mini-transaction. We cannot update the insert
2542
buffer bitmap in this mini-transaction, because
2543
btr_compress() can be invoked recursively without
2544
committing the mini-transaction in between. Since
2545
insert buffer bitmap pages have a lower rank than
2546
B-tree pages, we must not access other pages in the
2547
same mini-transaction after accessing an insert buffer
2550
/* The free bits in the insert buffer bitmap must
2551
never exceed the free space on a page. It is safe to
2552
decrement or reset the bits in the bitmap in a
2553
mini-transaction that is committed before the
2554
mini-transaction that affects the free space. */
2556
/* It is unsafe to increment the bits in a separately
2557
committed mini-transaction, because in crash recovery,
2558
the free bits could momentarily be set too high. */
2561
/* Because the free bits may be incremented
2562
and we cannot update the insert buffer bitmap
2563
in the same mini-transaction, the only safe
2564
thing we can do here is the pessimistic
2565
approach: reset the free bits. */
2566
ibuf_reset_free_bits(merge_block);
2568
/* On uncompressed pages, the free bits will
2569
never increase here. Thus, it is safe to
2570
write the bits accurately in a separate
2571
mini-transaction. */
2572
ibuf_update_free_bits_if_full(merge_block,
2578
ut_ad(page_validate(merge_page, index));
2580
/* Free the file page */
2581
btr_page_free(index, block, mtr);
2583
ut_ad(btr_check_node_ptr(index, merge_block, mtr));
2587
/*****************************************************************
2588
Discards a page that is the only page on its level. */
2591
btr_discard_only_page_on_level(
2592
/*===========================*/
2593
dict_index_t* index, /* in: index tree */
2594
buf_block_t* block, /* in: page which is the only on its level */
2595
mtr_t* mtr) /* in: mtr */
2597
btr_cur_t father_cursor;
2598
buf_block_t* father_block;
2599
page_t* father_page;
2600
page_zip_des_t* father_page_zip;
2601
page_t* page = buf_block_get_frame(block);
2604
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
2605
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
2606
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2607
btr_search_drop_page_hash_index(block);
2609
btr_page_get_father(index, block, mtr, &father_cursor);
2610
father_block = btr_cur_get_block(&father_cursor);
2611
father_page_zip = buf_block_get_page_zip(father_block);
2612
father_page = buf_block_get_frame(father_block);
2614
page_level = btr_page_get_level(page, mtr);
2616
lock_update_discard(father_block, PAGE_HEAP_NO_SUPREMUM, block);
2618
btr_page_set_level(father_page, father_page_zip, page_level, mtr);
2620
/* Free the file page */
2621
btr_page_free(index, block, mtr);
2623
if (UNIV_LIKELY(buf_block_get_page_no(father_block)
2624
== dict_index_get_page(index))) {
2625
/* The father is the root page */
2627
btr_page_empty(father_block, father_page_zip, mtr, index);
2629
/* We play safe and reset the free bits for the father */
2630
if (!dict_index_is_clust(index)) {
2631
ibuf_reset_free_bits(father_block);
2634
ut_ad(page_get_n_recs(father_page) == 1);
2636
btr_discard_only_page_on_level(index, father_block, mtr);
2640
/*****************************************************************
2641
Discards a page from a B-tree. This is used to remove the last record from
2642
a B-tree page: the whole page must be removed at the same time. This cannot
2643
be used for the root page, which is allowed to be empty. */
2648
btr_cur_t* cursor, /* in: cursor on the page to discard: not on
2650
mtr_t* mtr) /* in: mtr */
2652
dict_index_t* index;
2656
ulint right_page_no;
2657
buf_block_t* merge_block;
2663
block = btr_cur_get_block(cursor);
2664
index = btr_cur_get_index(cursor);
2666
ut_ad(dict_index_get_page(index) != buf_block_get_page_no(block));
2667
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
2669
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2670
space = dict_index_get_space(index);
2671
zip_size = dict_table_zip_size(index->table);
2673
/* Decide the page which will inherit the locks */
2675
left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr);
2676
right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
2678
if (left_page_no != FIL_NULL) {
2679
merge_block = btr_block_get(space, zip_size, left_page_no,
2681
merge_page = buf_block_get_frame(merge_block);
2682
#ifdef UNIV_BTR_DEBUG
2683
ut_a(btr_page_get_next(merge_page, mtr)
2684
== buf_block_get_page_no(block));
2685
#endif /* UNIV_BTR_DEBUG */
2686
} else if (right_page_no != FIL_NULL) {
2687
merge_block = btr_block_get(space, zip_size, right_page_no,
2689
merge_page = buf_block_get_frame(merge_block);
2690
#ifdef UNIV_BTR_DEBUG
2691
ut_a(btr_page_get_prev(merge_page, mtr)
2692
== buf_block_get_page_no(block));
2693
#endif /* UNIV_BTR_DEBUG */
2695
btr_discard_only_page_on_level(index, block, mtr);
2700
page = buf_block_get_frame(block);
2701
ut_a(page_is_comp(merge_page) == page_is_comp(page));
2702
btr_search_drop_page_hash_index(block);
2704
if (left_page_no == FIL_NULL && !page_is_leaf(page)) {
2706
/* We have to mark the leftmost node pointer on the right
2707
side page as the predefined minimum record */
2708
node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
2710
ut_ad(page_rec_is_user_rec(node_ptr));
2712
/* This will make page_zip_validate() fail on merge_page
2713
until btr_level_list_remove() completes. This is harmless,
2714
because everything will take place within a single
2715
mini-transaction and because writing to the redo log
2716
is an atomic operation (performed by mtr_commit()). */
2717
btr_set_min_rec_mark(node_ptr, mtr);
2720
btr_node_ptr_delete(index, block, mtr);
2722
/* Remove the page from the level list */
2723
btr_level_list_remove(space, zip_size, page, mtr);
2724
#ifdef UNIV_ZIP_DEBUG
2726
page_zip_des_t* merge_page_zip
2727
= buf_block_get_page_zip(merge_block);
2728
ut_a(!merge_page_zip
2729
|| page_zip_validate(merge_page_zip, merge_page));
2731
#endif /* UNIV_ZIP_DEBUG */
2733
if (left_page_no != FIL_NULL) {
2734
lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
2737
lock_update_discard(merge_block,
2738
lock_get_min_heap_no(merge_block),
2742
/* Free the file page */
2743
btr_page_free(index, block, mtr);
2745
ut_ad(btr_check_node_ptr(index, merge_block, mtr));
2748
#ifdef UNIV_BTR_PRINT
2749
/*****************************************************************
2750
Prints size info of a B-tree. */
2755
dict_index_t* index) /* in: index tree */
2761
if (dict_index_is_ibuf(index)) {
2762
fputs("Sorry, cannot print info of an ibuf tree:"
2763
" use ibuf functions\n", stderr);
2770
root = btr_root_get(index, &mtr);
2772
seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
2774
fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
2775
fseg_print(seg, &mtr);
2777
if (!(index->type & DICT_UNIVERSAL)) {
2779
seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
2781
fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
2782
fseg_print(seg, &mtr);
2788
/****************************************************************
2789
Prints recursively index tree pages. */
2792
btr_print_recursive(
2793
/*================*/
2794
dict_index_t* index, /* in: index tree */
2795
buf_block_t* block, /* in: index page */
2796
ulint width, /* in: print this many entries from start
2798
mem_heap_t** heap, /* in/out: heap for rec_get_offsets() */
2799
ulint** offsets,/* in/out: buffer for rec_get_offsets() */
2800
mtr_t* mtr) /* in: mtr */
2802
const page_t* page = buf_block_get_frame(block);
2808
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2809
fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
2810
(ulong) btr_page_get_level(page, mtr),
2811
(ulong) buf_block_get_page_no(block));
2813
page_print(block, index, width, width);
2815
n_recs = page_get_n_recs(page);
2817
page_cur_set_before_first(block, &cursor);
2818
page_cur_move_to_next(&cursor);
2820
while (!page_cur_is_after_last(&cursor)) {
2822
if (page_is_leaf(page)) {
2824
/* If this is the leaf level, do nothing */
2826
} else if ((i <= width) || (i >= n_recs - width)) {
2828
const rec_t* node_ptr;
2832
node_ptr = page_cur_get_rec(&cursor);
2834
*offsets = rec_get_offsets(node_ptr, index, *offsets,
2835
ULINT_UNDEFINED, heap);
2836
btr_print_recursive(index,
2837
btr_node_ptr_get_child(node_ptr,
2841
width, heap, offsets, &mtr2);
2845
page_cur_move_to_next(&cursor);
2850
/******************************************************************
2851
Prints directories and other info of all nodes in the tree. */
2856
dict_index_t* index, /* in: index */
2857
ulint width) /* in: print this many entries from start
2862
mem_heap_t* heap = NULL;
2863
ulint offsets_[REC_OFFS_NORMAL_SIZE];
2864
ulint* offsets = offsets_;
2865
rec_offs_init(offsets_);
2867
fputs("--------------------------\n"
2868
"INDEX TREE PRINT\n", stderr);
2872
root = btr_root_block_get(index, &mtr);
2874
btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
2875
if (UNIV_LIKELY_NULL(heap)) {
2876
mem_heap_free(heap);
2881
btr_validate_index(index, NULL);
2883
#endif /* UNIV_BTR_PRINT */
2886
/****************************************************************
2887
Checks that the node pointer to a page is appropriate. */
2893
dict_index_t* index, /* in: index tree */
2894
buf_block_t* block, /* in: index page */
2895
mtr_t* mtr) /* in: mtr */
2901
page_t* page = buf_block_get_frame(block);
2903
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2904
if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
2909
heap = mem_heap_create(256);
2910
offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
2913
if (page_is_leaf(page)) {
2918
tuple = dict_index_build_node_ptr(
2919
index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
2920
btr_page_get_level(page, mtr));
2922
ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
2924
mem_heap_free(heap);
2928
#endif /* UNIV_DEBUG */
2930
/****************************************************************
2931
Display identification information for a record. */
2934
btr_index_rec_validate_report(
2935
/*==========================*/
2936
const page_t* page, /* in: index page */
2937
const rec_t* rec, /* in: index record */
2938
const dict_index_t* index) /* in: index */
2940
fputs("InnoDB: Record in ", stderr);
2941
dict_index_name_print(stderr, NULL, index);
2942
fprintf(stderr, ", page %lu, at offset %lu\n",
2943
page_get_page_no(page), (ulint) page_offset(rec));
2946
/****************************************************************
2947
Checks the size and number of fields in a record based on the definition of
2951
btr_index_rec_validate(
2952
/*===================*/
2953
/* out: TRUE if ok */
2954
const rec_t* rec, /* in: index record */
2955
const dict_index_t* index, /* in: index */
2956
ibool dump_on_error) /* in: TRUE if the function
2957
should print hex dump of record
2958
and page on error */
2964
mem_heap_t* heap = NULL;
2965
ulint offsets_[REC_OFFS_NORMAL_SIZE];
2966
ulint* offsets = offsets_;
2967
rec_offs_init(offsets_);
2969
page = page_align(rec);
2971
if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
2972
/* The insert buffer index tree can contain records from any
2973
other index: we cannot check the number of fields or
2979
if (UNIV_UNLIKELY((ibool)!!page_is_comp(page)
2980
!= dict_table_is_comp(index->table))) {
2981
btr_index_rec_validate_report(page, rec, index);
2982
fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n",
2983
(ulong) !!page_is_comp(page),
2984
(ulong) dict_table_is_comp(index->table));
2989
n = dict_index_get_n_fields(index);
2991
if (!page_is_comp(page)
2992
&& UNIV_UNLIKELY(rec_get_n_fields_old(rec) != n)) {
2993
btr_index_rec_validate_report(page, rec, index);
2994
fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n",
2995
(ulong) rec_get_n_fields_old(rec), (ulong) n);
2997
if (dump_on_error) {
2998
buf_page_print(page, 0);
3000
fputs("InnoDB: corrupt record ", stderr);
3001
rec_print_old(stderr, rec);
3007
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
3009
for (i = 0; i < n; i++) {
3010
ulint fixed_size = dict_col_get_fixed_size(
3011
dict_index_get_nth_col(index, i));
3013
rec_get_nth_field_offs(offsets, i, &len);
3015
/* Note that if fixed_size != 0, it equals the
3016
length of a fixed-size column in the clustered index.
3017
A prefix index of the column is of fixed, but different
3018
length. When fixed_size == 0, prefix_len is the maximum
3019
length of the prefix index column. */
3021
if ((dict_index_get_nth_field(index, i)->prefix_len == 0
3022
&& len != UNIV_SQL_NULL && fixed_size
3023
&& len != fixed_size)
3024
|| (dict_index_get_nth_field(index, i)->prefix_len > 0
3025
&& len != UNIV_SQL_NULL
3027
> dict_index_get_nth_field(index, i)->prefix_len)) {
3029
btr_index_rec_validate_report(page, rec, index);
3031
"InnoDB: field %lu len is %lu,"
3033
(ulong) i, (ulong) len, (ulong) fixed_size);
3035
if (dump_on_error) {
3036
buf_page_print(page, 0);
3038
fputs("InnoDB: corrupt record ", stderr);
3039
rec_print_new(stderr, rec, offsets);
3042
if (UNIV_LIKELY_NULL(heap)) {
3043
mem_heap_free(heap);
3049
if (UNIV_LIKELY_NULL(heap)) {
3050
mem_heap_free(heap);
3055
/****************************************************************
3056
Checks the size and number of fields in records based on the definition of
3060
btr_index_page_validate(
3061
/*====================*/
3062
/* out: TRUE if ok */
3063
buf_block_t* block, /* in: index page */
3064
dict_index_t* index) /* in: index */
3069
page_cur_set_before_first(block, &cur);
3070
page_cur_move_to_next(&cur);
3073
if (page_cur_is_after_last(&cur)) {
3078
if (!btr_index_rec_validate(cur.rec, index, TRUE)) {
3083
page_cur_move_to_next(&cur);
3089
/****************************************************************
3090
Report an error on one page of an index tree. */
3093
btr_validate_report1(
3094
/*=================*/
3095
/* out: TRUE if ok */
3096
dict_index_t* index, /* in: index */
3097
ulint level, /* in: B-tree level */
3098
const buf_block_t* block) /* in: index page */
3100
fprintf(stderr, "InnoDB: Error in page %lu of ",
3101
buf_block_get_page_no(block));
3102
dict_index_name_print(stderr, NULL, index);
3104
fprintf(stderr, ", index tree level %lu", level);
3109
/****************************************************************
3110
Report an error on two pages of an index tree. */
3113
btr_validate_report2(
3114
/*=================*/
3115
/* out: TRUE if ok */
3116
const dict_index_t* index, /* in: index */
3117
ulint level, /* in: B-tree level */
3118
const buf_block_t* block1, /* in: first index page */
3119
const buf_block_t* block2) /* in: second index page */
3121
fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
3122
buf_block_get_page_no(block1),
3123
buf_block_get_page_no(block2));
3124
dict_index_name_print(stderr, NULL, index);
3126
fprintf(stderr, ", index tree level %lu", level);
3131
/****************************************************************
3132
Validates index tree level. */
3137
/* out: TRUE if ok */
3138
dict_index_t* index, /* in: index tree */
3139
trx_t* trx, /* in: transaction or NULL */
3140
ulint level) /* in: level number */
3146
buf_block_t* right_block = 0; /* remove warning */
3147
page_t* right_page = 0; /* remove warning */
3148
page_t* father_page;
3150
btr_cur_t right_node_cur;
3152
ulint right_page_no;
3155
dtuple_t* node_ptr_tuple;
3158
mem_heap_t* heap = mem_heap_create(256);
3159
ulint* offsets = NULL;
3160
ulint* offsets2= NULL;
3161
#ifdef UNIV_ZIP_DEBUG
3162
page_zip_des_t* page_zip;
3163
#endif /* UNIV_ZIP_DEBUG */
3167
mtr_x_lock(dict_index_get_lock(index), &mtr);
3169
block = btr_root_block_get(index, &mtr);
3170
page = buf_block_get_frame(block);
3172
space = dict_index_get_space(index);
3173
zip_size = dict_table_zip_size(index->table);
3175
while (level != btr_page_get_level(page, &mtr)) {
3176
const rec_t* node_ptr;
3178
ut_a(space == buf_block_get_space(block));
3179
ut_a(space == page_get_space_id(page));
3180
#ifdef UNIV_ZIP_DEBUG
3181
page_zip = buf_block_get_page_zip(block);
3182
ut_a(!page_zip || page_zip_validate(page_zip, page));
3183
#endif /* UNIV_ZIP_DEBUG */
3184
ut_a(!page_is_leaf(page));
3186
page_cur_set_before_first(block, &cursor);
3187
page_cur_move_to_next(&cursor);
3189
node_ptr = page_cur_get_rec(&cursor);
3190
offsets = rec_get_offsets(node_ptr, index, offsets,
3191
ULINT_UNDEFINED, &heap);
3192
block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr);
3193
page = buf_block_get_frame(block);
3196
/* Now we are on the desired level. Loop through the pages on that
3199
if (trx_is_interrupted(trx)) {
3201
mem_heap_free(heap);
3204
mem_heap_empty(heap);
3205
offsets = offsets2 = NULL;
3206
mtr_x_lock(dict_index_get_lock(index), &mtr);
3208
#ifdef UNIV_ZIP_DEBUG
3209
page_zip = buf_block_get_page_zip(block);
3210
ut_a(!page_zip || page_zip_validate(page_zip, page));
3211
#endif /* UNIV_ZIP_DEBUG */
3213
/* Check ordering etc. of records */
3215
if (!page_validate(page, index)) {
3216
btr_validate_report1(index, level, block);
3219
} else if (level == 0) {
3220
/* We are on level 0. Check that the records have the right
3221
number of fields, and field lengths are right. */
3223
if (!btr_index_page_validate(block, index)) {
3229
ut_a(btr_page_get_level(page, &mtr) == level);
3231
right_page_no = btr_page_get_next(page, &mtr);
3232
left_page_no = btr_page_get_prev(page, &mtr);
3234
ut_a(page_get_n_recs(page) > 0 || (level == 0
3235
&& page_get_page_no(page)
3236
== dict_index_get_page(index)));
3238
if (right_page_no != FIL_NULL) {
3239
const rec_t* right_rec;
3240
right_block = btr_block_get(space, zip_size, right_page_no,
3242
right_page = buf_block_get_frame(right_block);
3243
if (UNIV_UNLIKELY(btr_page_get_prev(right_page, &mtr)
3244
!= page_get_page_no(page))) {
3245
btr_validate_report2(index, level, block, right_block);
3246
fputs("InnoDB: broken FIL_PAGE_NEXT"
3247
" or FIL_PAGE_PREV links\n", stderr);
3248
buf_page_print(page, 0);
3249
buf_page_print(right_page, 0);
3254
if (UNIV_UNLIKELY(page_is_comp(right_page)
3255
!= page_is_comp(page))) {
3256
btr_validate_report2(index, level, block, right_block);
3257
fputs("InnoDB: 'compact' flag mismatch\n", stderr);
3258
buf_page_print(page, 0);
3259
buf_page_print(right_page, 0);
3263
goto node_ptr_fails;
3266
rec = page_rec_get_prev(page_get_supremum_rec(page));
3267
right_rec = page_rec_get_next(page_get_infimum_rec(
3269
offsets = rec_get_offsets(rec, index,
3270
offsets, ULINT_UNDEFINED, &heap);
3271
offsets2 = rec_get_offsets(right_rec, index,
3272
offsets2, ULINT_UNDEFINED, &heap);
3273
if (UNIV_UNLIKELY(cmp_rec_rec(rec, right_rec,
3277
btr_validate_report2(index, level, block, right_block);
3279
fputs("InnoDB: records in wrong order"
3280
" on adjacent pages\n", stderr);
3282
buf_page_print(page, 0);
3283
buf_page_print(right_page, 0);
3285
fputs("InnoDB: record ", stderr);
3286
rec = page_rec_get_prev(page_get_supremum_rec(page));
3287
rec_print(stderr, rec, index);
3289
fputs("InnoDB: record ", stderr);
3290
rec = page_rec_get_next(
3291
page_get_infimum_rec(right_page));
3292
rec_print(stderr, rec, index);
3299
if (level > 0 && left_page_no == FIL_NULL) {
3300
ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
3301
page_rec_get_next(page_get_infimum_rec(page)),
3302
page_is_comp(page)));
3305
if (buf_block_get_page_no(block) != dict_index_get_page(index)) {
3307
/* Check father node pointers */
3311
offsets = btr_page_get_father_block(offsets, heap, index,
3312
block, &mtr, &node_cur);
3313
father_page = btr_cur_get_page(&node_cur);
3314
node_ptr = btr_cur_get_rec(&node_cur);
3317
index, page_rec_get_prev(page_get_supremum_rec(page)),
3319
offsets = btr_page_get_father_node_ptr(offsets, heap,
3322
if (UNIV_UNLIKELY(node_ptr != btr_cur_get_rec(&node_cur))
3323
|| UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr,
3325
!= buf_block_get_page_no(block))) {
3327
btr_validate_report1(index, level, block);
3329
fputs("InnoDB: node pointer to the page is wrong\n",
3332
buf_page_print(father_page, 0);
3333
buf_page_print(page, 0);
3335
fputs("InnoDB: node ptr ", stderr);
3336
rec_print(stderr, node_ptr, index);
3338
rec = btr_cur_get_rec(&node_cur);
3339
fprintf(stderr, "\n"
3340
"InnoDB: node ptr child page n:o %lu\n",
3341
(ulong) btr_node_ptr_get_child_page_no(
3344
fputs("InnoDB: record on page ", stderr);
3345
rec_print_new(stderr, rec, offsets);
3349
goto node_ptr_fails;
3352
if (!page_is_leaf(page)) {
3353
node_ptr_tuple = dict_index_build_node_ptr(
3355
page_rec_get_next(page_get_infimum_rec(page)),
3356
0, heap, btr_page_get_level(page, &mtr));
3358
if (cmp_dtuple_rec(node_ptr_tuple, node_ptr,
3360
const rec_t* first_rec = page_rec_get_next(
3361
page_get_infimum_rec(page));
3363
btr_validate_report1(index, level, block);
3365
buf_page_print(father_page, 0);
3366
buf_page_print(page, 0);
3368
fputs("InnoDB: Error: node ptrs differ"
3370
"InnoDB: node ptr ", stderr);
3371
rec_print_new(stderr, node_ptr, offsets);
3372
fputs("InnoDB: first rec ", stderr);
3373
rec_print(stderr, first_rec, index);
3377
goto node_ptr_fails;
3381
if (left_page_no == FIL_NULL) {
3382
ut_a(node_ptr == page_rec_get_next(
3383
page_get_infimum_rec(father_page)));
3384
ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
3387
if (right_page_no == FIL_NULL) {
3388
ut_a(node_ptr == page_rec_get_prev(
3389
page_get_supremum_rec(father_page)));
3390
ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
3392
const rec_t* right_node_ptr
3393
= page_rec_get_next(node_ptr);
3395
offsets = btr_page_get_father_block(
3396
offsets, heap, index, right_block,
3397
&mtr, &right_node_cur);
3399
!= page_get_supremum_rec(father_page)) {
3401
if (btr_cur_get_rec(&right_node_cur)
3402
!= right_node_ptr) {
3404
fputs("InnoDB: node pointer to"
3405
" the right page is wrong\n",
3408
btr_validate_report1(index, level,
3411
buf_page_print(father_page, 0);
3412
buf_page_print(page, 0);
3413
buf_page_print(right_page, 0);
3416
page_t* right_father_page
3417
= btr_cur_get_page(&right_node_cur);
3419
if (btr_cur_get_rec(&right_node_cur)
3420
!= page_rec_get_next(
3421
page_get_infimum_rec(
3422
right_father_page))) {
3424
fputs("InnoDB: node pointer 2 to"
3425
" the right page is wrong\n",
3428
btr_validate_report1(index, level,
3431
buf_page_print(father_page, 0);
3432
buf_page_print(right_father_page, 0);
3433
buf_page_print(page, 0);
3434
buf_page_print(right_page, 0);
3437
if (page_get_page_no(right_father_page)
3438
!= btr_page_get_next(father_page, &mtr)) {
3441
fputs("InnoDB: node pointer 3 to"
3442
" the right page is wrong\n",
3445
btr_validate_report1(index, level,
3448
buf_page_print(father_page, 0);
3449
buf_page_print(right_father_page, 0);
3450
buf_page_print(page, 0);
3451
buf_page_print(right_page, 0);
3458
/* Commit the mini-transaction to release the latch on 'page'.
3459
Re-acquire the latch on right_page, which will become 'page'
3460
on the next loop. The page has already been checked. */
3463
if (right_page_no != FIL_NULL) {
3466
block = btr_block_get(space, zip_size, right_page_no,
3468
page = buf_block_get_frame(block);
3473
mem_heap_free(heap);
3477
/******************************************************************
3478
Checks the consistency of an index tree. */
3483
/* out: TRUE if ok */
3484
dict_index_t* index, /* in: index */
3485
trx_t* trx) /* in: transaction or NULL */
3493
mtr_x_lock(dict_index_get_lock(index), &mtr);
3495
root = btr_root_get(index, &mtr);
3496
n = btr_page_get_level(root, &mtr);
3498
for (i = 0; i <= n && !trx_is_interrupted(trx); i++) {
3499
if (!btr_validate_level(index, trx, n - i)) {