1
by brian
clean slate |
1 |
/******************************************************
|
2 |
The B-tree
|
|
3 |
||
4 |
(c) 1994-1996 Innobase Oy
|
|
5 |
||
6 |
Created 6/2/1994 Heikki Tuuri
|
|
7 |
*******************************************************/
|
|
8 |
||
9 |
#include "btr0btr.h" |
|
10 |
||
11 |
#ifdef UNIV_NONINL
|
|
12 |
#include "btr0btr.ic" |
|
13 |
#endif
|
|
14 |
||
15 |
#include "fsp0fsp.h" |
|
16 |
#include "page0page.h" |
|
17 |
#include "btr0cur.h" |
|
18 |
#include "btr0sea.h" |
|
19 |
#include "btr0pcur.h" |
|
20 |
#include "rem0cmp.h" |
|
21 |
#include "lock0lock.h" |
|
22 |
#include "ibuf0ibuf.h" |
|
23 |
#include "trx0trx.h" |
|
24 |
||
25 |
/*
|
|
26 |
Latching strategy of the InnoDB B-tree
|
|
27 |
--------------------------------------
|
|
28 |
A tree latch protects all non-leaf nodes of the tree. Each node of a tree
|
|
29 |
also has a latch of its own.
|
|
30 |
||
31 |
A B-tree operation normally first acquires an S-latch on the tree. It
|
|
32 |
searches down the tree and releases the tree latch when it has the
|
|
33 |
leaf node latch. To save CPU time we do not acquire any latch on
|
|
34 |
non-leaf nodes of the tree during a search, those pages are only bufferfixed.
|
|
35 |
||
36 |
If an operation needs to restructure the tree, it acquires an X-latch on
|
|
37 |
the tree before searching to a leaf node. If it needs, for example, to
|
|
38 |
split a leaf,
|
|
39 |
(1) InnoDB decides the split point in the leaf,
|
|
40 |
(2) allocates a new page,
|
|
41 |
(3) inserts the appropriate node pointer to the first non-leaf level,
|
|
42 |
(4) releases the tree X-latch,
|
|
43 |
(5) and then moves records from the leaf to the new allocated page.
|
|
44 |
||
45 |
Node pointers
|
|
46 |
-------------
|
|
47 |
Leaf pages of a B-tree contain the index records stored in the
|
|
48 |
tree. On levels n > 0 we store 'node pointers' to pages on level
|
|
49 |
n - 1. For each page there is exactly one node pointer stored:
|
|
50 |
thus the our tree is an ordinary B-tree, not a B-link tree.
|
|
51 |
||
52 |
A node pointer contains a prefix P of an index record. The prefix
|
|
53 |
is long enough so that it determines an index record uniquely.
|
|
54 |
The file page number of the child page is added as the last
|
|
55 |
field. To the child page we can store node pointers or index records
|
|
56 |
which are >= P in the alphabetical order, but < P1 if there is
|
|
57 |
a next node pointer on the level, and P1 is its prefix.
|
|
58 |
||
59 |
If a node pointer with a prefix P points to a non-leaf child,
|
|
60 |
then the leftmost record in the child must have the same
|
|
61 |
prefix P. If it points to a leaf node, the child is not required
|
|
62 |
to contain any record with a prefix equal to P. The leaf case
|
|
63 |
is decided this way to allow arbitrary deletions in a leaf node
|
|
64 |
without touching upper levels of the tree.
|
|
65 |
||
66 |
We have predefined a special minimum record which we
|
|
67 |
define as the smallest record in any alphabetical order.
|
|
68 |
A minimum record is denoted by setting a bit in the record
|
|
69 |
header. A minimum record acts as the prefix of a node pointer
|
|
70 |
which points to a leftmost node on any level of the tree.
|
|
71 |
||
72 |
File page allocation
|
|
73 |
--------------------
|
|
74 |
In the root node of a B-tree there are two file segment headers.
|
|
75 |
The leaf pages of a tree are allocated from one file segment, to
|
|
76 |
make them consecutive on disk if possible. From the other file segment
|
|
77 |
we allocate pages for the non-leaf levels of the tree.
|
|
78 |
*/
|
|
79 |
||
80 |
/****************************************************************
|
|
81 |
Returns the upper level node pointer to a page. It is assumed that
|
|
82 |
mtr holds an x-latch on the tree. */
|
|
83 |
static
|
|
84 |
rec_t* |
|
85 |
btr_page_get_father_node_ptr( |
|
86 |
/*=========================*/
|
|
87 |
/* out: pointer to node pointer record */
|
|
88 |
dict_index_t* index, /* in: index tree */ |
|
89 |
page_t* page, /* in: page: must contain at least one |
|
90 |
user record */
|
|
91 |
mtr_t* mtr); /* in: mtr */ |
|
92 |
/*****************************************************************
|
|
93 |
Empties an index page. */
|
|
94 |
static
|
|
95 |
void
|
|
96 |
btr_page_empty( |
|
97 |
/*===========*/
|
|
98 |
page_t* page, /* in: page to be emptied */ |
|
99 |
mtr_t* mtr); /* in: mtr */ |
|
100 |
/*****************************************************************
|
|
101 |
Returns TRUE if the insert fits on the appropriate half-page
|
|
102 |
with the chosen split_rec. */
|
|
103 |
static
|
|
104 |
ibool
|
|
105 |
btr_page_insert_fits( |
|
106 |
/*=================*/
|
|
107 |
/* out: TRUE if fits */
|
|
108 |
btr_cur_t* cursor, /* in: cursor at which insert |
|
109 |
should be made */
|
|
110 |
rec_t* split_rec, /* in: suggestion for first record |
|
111 |
on upper half-page, or NULL if
|
|
112 |
tuple should be first */
|
|
113 |
const ulint* offsets, /* in: rec_get_offsets( |
|
114 |
split_rec, cursor->index) */
|
|
115 |
dtuple_t* tuple, /* in: tuple to insert */ |
|
116 |
mem_heap_t* heap); /* in: temporary memory heap */ |
|
117 |
||
118 |
/******************************************************************
|
|
119 |
Gets the root node of a tree and x-latches it. */
|
|
120 |
||
121 |
page_t* |
|
122 |
btr_root_get( |
|
123 |
/*=========*/
|
|
124 |
/* out: root page, x-latched */
|
|
125 |
dict_index_t* index, /* in: index tree */ |
|
126 |
mtr_t* mtr) /* in: mtr */ |
|
127 |
{
|
|
128 |
ulint space; |
|
129 |
ulint root_page_no; |
|
130 |
page_t* root; |
|
131 |
||
132 |
space = dict_index_get_space(index); |
|
133 |
root_page_no = dict_index_get_page(index); |
|
134 |
||
135 |
root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr); |
|
136 |
ut_a((ibool)!!page_is_comp(root) == dict_table_is_comp(index->table)); |
|
137 |
||
138 |
return(root); |
|
139 |
}
|
|
140 |
||
141 |
/*****************************************************************
|
|
142 |
Gets pointer to the previous user record in the tree. It is assumed that
|
|
143 |
the caller has appropriate latches on the page and its neighbor. */
|
|
144 |
||
145 |
rec_t* |
|
146 |
btr_get_prev_user_rec( |
|
147 |
/*==================*/
|
|
148 |
/* out: previous user record, NULL if there is none */
|
|
149 |
rec_t* rec, /* in: record on leaf level */ |
|
150 |
mtr_t* mtr) /* in: mtr holding a latch on the page, and if |
|
151 |
needed, also to the previous page */
|
|
152 |
{
|
|
153 |
page_t* page; |
|
154 |
page_t* prev_page; |
|
155 |
ulint prev_page_no; |
|
156 |
ulint space; |
|
157 |
||
158 |
if (!page_rec_is_infimum(rec)) { |
|
159 |
||
160 |
rec_t* prev_rec = page_rec_get_prev(rec); |
|
161 |
||
162 |
if (!page_rec_is_infimum(prev_rec)) { |
|
163 |
||
164 |
return(prev_rec); |
|
165 |
}
|
|
166 |
}
|
|
167 |
||
168 |
page = buf_frame_align(rec); |
|
169 |
prev_page_no = btr_page_get_prev(page, mtr); |
|
170 |
space = buf_frame_get_space_id(page); |
|
171 |
||
172 |
if (prev_page_no != FIL_NULL) { |
|
173 |
||
174 |
prev_page = buf_page_get_with_no_latch(space, prev_page_no, |
|
175 |
mtr); |
|
176 |
/* The caller must already have a latch to the brother */
|
|
177 |
ut_ad((mtr_memo_contains(mtr, buf_block_align(prev_page), |
|
178 |
MTR_MEMO_PAGE_S_FIX)) |
|
179 |
|| (mtr_memo_contains(mtr, buf_block_align(prev_page), |
|
180 |
MTR_MEMO_PAGE_X_FIX))); |
|
181 |
ut_a(page_is_comp(prev_page) == page_is_comp(page)); |
|
182 |
#ifdef UNIV_BTR_DEBUG
|
|
183 |
ut_a(btr_page_get_next(prev_page, mtr) |
|
184 |
== buf_frame_get_page_no(page)); |
|
185 |
#endif /* UNIV_BTR_DEBUG */ |
|
186 |
||
187 |
return(page_rec_get_prev(page_get_supremum_rec(prev_page))); |
|
188 |
}
|
|
189 |
||
190 |
return(NULL); |
|
191 |
}
|
|
192 |
||
193 |
/*****************************************************************
|
|
194 |
Gets pointer to the next user record in the tree. It is assumed that the
|
|
195 |
caller has appropriate latches on the page and its neighbor. */
|
|
196 |
||
197 |
rec_t* |
|
198 |
btr_get_next_user_rec( |
|
199 |
/*==================*/
|
|
200 |
/* out: next user record, NULL if there is none */
|
|
201 |
rec_t* rec, /* in: record on leaf level */ |
|
202 |
mtr_t* mtr) /* in: mtr holding a latch on the page, and if |
|
203 |
needed, also to the next page */
|
|
204 |
{
|
|
205 |
page_t* page; |
|
206 |
page_t* next_page; |
|
207 |
ulint next_page_no; |
|
208 |
ulint space; |
|
209 |
||
210 |
if (!page_rec_is_supremum(rec)) { |
|
211 |
||
212 |
rec_t* next_rec = page_rec_get_next(rec); |
|
213 |
||
214 |
if (!page_rec_is_supremum(next_rec)) { |
|
215 |
||
216 |
return(next_rec); |
|
217 |
}
|
|
218 |
}
|
|
219 |
||
220 |
page = buf_frame_align(rec); |
|
221 |
next_page_no = btr_page_get_next(page, mtr); |
|
222 |
space = buf_frame_get_space_id(page); |
|
223 |
||
224 |
if (next_page_no != FIL_NULL) { |
|
225 |
||
226 |
next_page = buf_page_get_with_no_latch(space, next_page_no, |
|
227 |
mtr); |
|
228 |
/* The caller must already have a latch to the brother */
|
|
229 |
ut_ad((mtr_memo_contains(mtr, buf_block_align(next_page), |
|
230 |
MTR_MEMO_PAGE_S_FIX)) |
|
231 |
|| (mtr_memo_contains(mtr, buf_block_align(next_page), |
|
232 |
MTR_MEMO_PAGE_X_FIX))); |
|
233 |
#ifdef UNIV_BTR_DEBUG
|
|
234 |
ut_a(btr_page_get_prev(next_page, mtr) |
|
235 |
== buf_frame_get_page_no(page)); |
|
236 |
#endif /* UNIV_BTR_DEBUG */ |
|
237 |
||
238 |
ut_a(page_is_comp(next_page) == page_is_comp(page)); |
|
239 |
return(page_rec_get_next(page_get_infimum_rec(next_page))); |
|
240 |
}
|
|
241 |
||
242 |
return(NULL); |
|
243 |
}
|
|
244 |
||
245 |
/******************************************************************
|
|
246 |
Creates a new index page (not the root, and also not
|
|
247 |
used in page reorganization). */
|
|
248 |
static
|
|
249 |
void
|
|
250 |
btr_page_create( |
|
251 |
/*============*/
|
|
252 |
page_t* page, /* in: page to be created */ |
|
253 |
dict_index_t* index, /* in: index */ |
|
254 |
mtr_t* mtr) /* in: mtr */ |
|
255 |
{
|
|
256 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
257 |
MTR_MEMO_PAGE_X_FIX)); |
|
258 |
page_create(page, mtr, dict_table_is_comp(index->table)); |
|
259 |
buf_block_align(page)->check_index_page_at_flush = TRUE; |
|
260 |
||
261 |
btr_page_set_index_id(page, index->id, mtr); |
|
262 |
}
|
|
263 |
||
264 |
/******************************************************************
|
|
265 |
Allocates a new file page to be used in an ibuf tree. Takes the page from
|
|
266 |
the free list of the tree, which must contain pages! */
|
|
267 |
static
|
|
268 |
page_t* |
|
269 |
btr_page_alloc_for_ibuf( |
|
270 |
/*====================*/
|
|
271 |
/* out: new allocated page, x-latched */
|
|
272 |
dict_index_t* index, /* in: index tree */ |
|
273 |
mtr_t* mtr) /* in: mtr */ |
|
274 |
{
|
|
275 |
fil_addr_t node_addr; |
|
276 |
page_t* root; |
|
277 |
page_t* new_page; |
|
278 |
||
279 |
root = btr_root_get(index, mtr); |
|
280 |
||
281 |
node_addr = flst_get_first(root + PAGE_HEADER |
|
282 |
+ PAGE_BTR_IBUF_FREE_LIST, mtr); |
|
283 |
ut_a(node_addr.page != FIL_NULL); |
|
284 |
||
285 |
new_page = buf_page_get(dict_index_get_space(index), node_addr.page, |
|
286 |
RW_X_LATCH, mtr); |
|
287 |
#ifdef UNIV_SYNC_DEBUG
|
|
288 |
buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW); |
|
289 |
#endif /* UNIV_SYNC_DEBUG */ |
|
290 |
||
291 |
flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, |
|
292 |
new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, |
|
293 |
mtr); |
|
294 |
ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, |
|
295 |
mtr)); |
|
296 |
||
297 |
return(new_page); |
|
298 |
}
|
|
299 |
||
300 |
/******************************************************************
|
|
301 |
Allocates a new file page to be used in an index tree. NOTE: we assume
|
|
302 |
that the caller has made the reservation for free extents! */
|
|
303 |
||
304 |
page_t* |
|
305 |
btr_page_alloc( |
|
306 |
/*===========*/
|
|
307 |
/* out: new allocated page, x-latched;
|
|
308 |
NULL if out of space */
|
|
309 |
dict_index_t* index, /* in: index */ |
|
310 |
ulint hint_page_no, /* in: hint of a good page */ |
|
311 |
byte file_direction, /* in: direction where a possible |
|
312 |
page split is made */
|
|
313 |
ulint level, /* in: level where the page is placed |
|
314 |
in the tree */
|
|
315 |
mtr_t* mtr) /* in: mtr */ |
|
316 |
{
|
|
317 |
fseg_header_t* seg_header; |
|
318 |
page_t* root; |
|
319 |
page_t* new_page; |
|
320 |
ulint new_page_no; |
|
321 |
||
322 |
if (index->type & DICT_IBUF) { |
|
323 |
||
324 |
return(btr_page_alloc_for_ibuf(index, mtr)); |
|
325 |
}
|
|
326 |
||
327 |
root = btr_root_get(index, mtr); |
|
328 |
||
329 |
if (level == 0) { |
|
330 |
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; |
|
331 |
} else { |
|
332 |
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; |
|
333 |
}
|
|
334 |
||
335 |
/* Parameter TRUE below states that the caller has made the
|
|
336 |
reservation for free extents, and thus we know that a page can
|
|
337 |
be allocated: */
|
|
338 |
||
339 |
new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no, |
|
340 |
file_direction, TRUE, mtr); |
|
341 |
if (new_page_no == FIL_NULL) { |
|
342 |
||
343 |
return(NULL); |
|
344 |
}
|
|
345 |
||
346 |
new_page = buf_page_get(dict_index_get_space(index), new_page_no, |
|
347 |
RW_X_LATCH, mtr); |
|
348 |
#ifdef UNIV_SYNC_DEBUG
|
|
349 |
buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW); |
|
350 |
#endif /* UNIV_SYNC_DEBUG */ |
|
351 |
||
352 |
return(new_page); |
|
353 |
}
|
|
354 |
||
355 |
/******************************************************************
|
|
356 |
Gets the number of pages in a B-tree. */
|
|
357 |
||
358 |
ulint
|
|
359 |
btr_get_size( |
|
360 |
/*=========*/
|
|
361 |
/* out: number of pages */
|
|
362 |
dict_index_t* index, /* in: index */ |
|
363 |
ulint flag) /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ |
|
364 |
{
|
|
365 |
fseg_header_t* seg_header; |
|
366 |
page_t* root; |
|
367 |
ulint n; |
|
368 |
ulint dummy; |
|
369 |
mtr_t mtr; |
|
370 |
||
371 |
mtr_start(&mtr); |
|
372 |
||
373 |
mtr_s_lock(dict_index_get_lock(index), &mtr); |
|
374 |
||
375 |
root = btr_root_get(index, &mtr); |
|
376 |
||
377 |
if (flag == BTR_N_LEAF_PAGES) { |
|
378 |
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; |
|
379 |
||
380 |
fseg_n_reserved_pages(seg_header, &n, &mtr); |
|
381 |
||
382 |
} else if (flag == BTR_TOTAL_SIZE) { |
|
383 |
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; |
|
384 |
||
385 |
n = fseg_n_reserved_pages(seg_header, &dummy, &mtr); |
|
386 |
||
387 |
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; |
|
388 |
||
389 |
n += fseg_n_reserved_pages(seg_header, &dummy, &mtr); |
|
390 |
} else { |
|
391 |
ut_error; |
|
392 |
}
|
|
393 |
||
394 |
mtr_commit(&mtr); |
|
395 |
||
396 |
return(n); |
|
397 |
}
|
|
398 |
||
399 |
/******************************************************************
|
|
400 |
Frees a page used in an ibuf tree. Puts the page to the free list of the
|
|
401 |
ibuf tree. */
|
|
402 |
static
|
|
403 |
void
|
|
404 |
btr_page_free_for_ibuf( |
|
405 |
/*===================*/
|
|
406 |
dict_index_t* index, /* in: index tree */ |
|
407 |
page_t* page, /* in: page to be freed, x-latched */ |
|
408 |
mtr_t* mtr) /* in: mtr */ |
|
409 |
{
|
|
410 |
page_t* root; |
|
411 |
||
412 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
413 |
MTR_MEMO_PAGE_X_FIX)); |
|
414 |
root = btr_root_get(index, mtr); |
|
415 |
||
416 |
flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, |
|
417 |
page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); |
|
418 |
||
419 |
ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, |
|
420 |
mtr)); |
|
421 |
}
|
|
422 |
||
423 |
/******************************************************************
|
|
424 |
Frees a file page used in an index tree. Can be used also to (BLOB)
|
|
425 |
external storage pages, because the page level 0 can be given as an
|
|
426 |
argument. */
|
|
427 |
||
428 |
void
|
|
429 |
btr_page_free_low( |
|
430 |
/*==============*/
|
|
431 |
dict_index_t* index, /* in: index tree */ |
|
432 |
page_t* page, /* in: page to be freed, x-latched */ |
|
433 |
ulint level, /* in: page level */ |
|
434 |
mtr_t* mtr) /* in: mtr */ |
|
435 |
{
|
|
436 |
fseg_header_t* seg_header; |
|
437 |
page_t* root; |
|
438 |
ulint space; |
|
439 |
ulint page_no; |
|
440 |
||
441 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
442 |
MTR_MEMO_PAGE_X_FIX)); |
|
443 |
/* The page gets invalid for optimistic searches: increment the frame
|
|
444 |
modify clock */
|
|
445 |
||
446 |
buf_frame_modify_clock_inc(page); |
|
447 |
||
448 |
if (index->type & DICT_IBUF) { |
|
449 |
||
450 |
btr_page_free_for_ibuf(index, page, mtr); |
|
451 |
||
452 |
return; |
|
453 |
}
|
|
454 |
||
455 |
root = btr_root_get(index, mtr); |
|
456 |
||
457 |
if (level == 0) { |
|
458 |
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; |
|
459 |
} else { |
|
460 |
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; |
|
461 |
}
|
|
462 |
||
463 |
space = buf_frame_get_space_id(page); |
|
464 |
page_no = buf_frame_get_page_no(page); |
|
465 |
||
466 |
fseg_free_page(seg_header, space, page_no, mtr); |
|
467 |
}
|
|
468 |
||
469 |
/******************************************************************
|
|
470 |
Frees a file page used in an index tree. NOTE: cannot free field external
|
|
471 |
storage pages because the page must contain info on its level. */
|
|
472 |
||
473 |
void
|
|
474 |
btr_page_free( |
|
475 |
/*==========*/
|
|
476 |
dict_index_t* index, /* in: index tree */ |
|
477 |
page_t* page, /* in: page to be freed, x-latched */ |
|
478 |
mtr_t* mtr) /* in: mtr */ |
|
479 |
{
|
|
480 |
ulint level; |
|
481 |
||
482 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
483 |
MTR_MEMO_PAGE_X_FIX)); |
|
484 |
level = btr_page_get_level(page, mtr); |
|
485 |
||
486 |
btr_page_free_low(index, page, level, mtr); |
|
487 |
}
|
|
488 |
||
489 |
/******************************************************************
|
|
490 |
Sets the child node file address in a node pointer. */
|
|
491 |
UNIV_INLINE
|
|
492 |
void
|
|
493 |
btr_node_ptr_set_child_page_no( |
|
494 |
/*===========================*/
|
|
495 |
rec_t* rec, /* in: node pointer record */ |
|
496 |
const ulint* offsets,/* in: array returned by rec_get_offsets() */ |
|
497 |
ulint page_no,/* in: child node address */ |
|
498 |
mtr_t* mtr) /* in: mtr */ |
|
499 |
{
|
|
500 |
byte* field; |
|
501 |
ulint len; |
|
502 |
||
503 |
ut_ad(rec_offs_validate(rec, NULL, offsets)); |
|
504 |
ut_ad(0 < btr_page_get_level(buf_frame_align(rec), mtr)); |
|
505 |
ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec)); |
|
506 |
||
507 |
/* The child address is in the last field */
|
|
508 |
field = rec_get_nth_field(rec, offsets, |
|
509 |
rec_offs_n_fields(offsets) - 1, &len); |
|
510 |
||
511 |
ut_ad(len == 4); |
|
512 |
||
513 |
mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr); |
|
514 |
}
|
|
515 |
||
516 |
/****************************************************************
|
|
517 |
Returns the child page of a node pointer and x-latches it. */
|
|
518 |
static
|
|
519 |
page_t* |
|
520 |
btr_node_ptr_get_child( |
|
521 |
/*===================*/
|
|
522 |
/* out: child page, x-latched */
|
|
523 |
rec_t* node_ptr,/* in: node pointer */ |
|
524 |
const ulint* offsets,/* in: array returned by rec_get_offsets() */ |
|
525 |
mtr_t* mtr) /* in: mtr */ |
|
526 |
{
|
|
527 |
ulint page_no; |
|
528 |
ulint space; |
|
529 |
page_t* page; |
|
530 |
||
531 |
ut_ad(rec_offs_validate(node_ptr, NULL, offsets)); |
|
532 |
space = buf_frame_get_space_id(node_ptr); |
|
533 |
page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); |
|
534 |
||
535 |
page = btr_page_get(space, page_no, RW_X_LATCH, mtr); |
|
536 |
||
537 |
return(page); |
|
538 |
}
|
|
539 |
||
540 |
/****************************************************************
|
|
541 |
Returns the upper level node pointer to a page. It is assumed that mtr holds
|
|
542 |
an x-latch on the tree. */
|
|
543 |
static
|
|
544 |
rec_t* |
|
545 |
btr_page_get_father_for_rec( |
|
546 |
/*========================*/
|
|
547 |
/* out: pointer to node pointer record,
|
|
548 |
its page x-latched */
|
|
549 |
dict_index_t* index, /* in: index tree */ |
|
550 |
page_t* page, /* in: page: must contain at least one |
|
551 |
user record */
|
|
552 |
rec_t* user_rec,/* in: user_record on page */ |
|
553 |
mtr_t* mtr) /* in: mtr */ |
|
554 |
{
|
|
555 |
mem_heap_t* heap; |
|
556 |
dtuple_t* tuple; |
|
557 |
btr_cur_t cursor; |
|
558 |
rec_t* node_ptr; |
|
559 |
ulint offsets_[REC_OFFS_NORMAL_SIZE]; |
|
560 |
ulint* offsets = offsets_; |
|
561 |
*offsets_ = (sizeof offsets_) / sizeof *offsets_; |
|
562 |
||
563 |
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), |
|
564 |
MTR_MEMO_X_LOCK)); |
|
565 |
ut_a(page_rec_is_user_rec(user_rec)); |
|
566 |
||
567 |
ut_ad(dict_index_get_page(index) != buf_frame_get_page_no(page)); |
|
568 |
||
569 |
heap = mem_heap_create(100); |
|
570 |
||
571 |
tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, |
|
572 |
btr_page_get_level(page, mtr)); |
|
573 |
||
574 |
btr_cur_search_to_nth_level(index, |
|
575 |
btr_page_get_level(page, mtr) + 1, |
|
576 |
tuple, PAGE_CUR_LE, |
|
577 |
BTR_CONT_MODIFY_TREE, &cursor, 0, mtr); |
|
578 |
||
579 |
node_ptr = btr_cur_get_rec(&cursor); |
|
580 |
offsets = rec_get_offsets(node_ptr, index, offsets, |
|
581 |
ULINT_UNDEFINED, &heap); |
|
582 |
||
583 |
if (UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr, offsets) |
|
584 |
!= buf_frame_get_page_no(page))) { |
|
585 |
rec_t* print_rec; |
|
586 |
fputs("InnoDB: Dump of the child page:\n", stderr); |
|
587 |
buf_page_print(buf_frame_align(page)); |
|
588 |
fputs("InnoDB: Dump of the parent page:\n", stderr); |
|
589 |
buf_page_print(buf_frame_align(node_ptr)); |
|
590 |
||
591 |
fputs("InnoDB: Corruption of an index tree: table ", stderr); |
|
592 |
ut_print_name(stderr, NULL, TRUE, index->table_name); |
|
593 |
fputs(", index ", stderr); |
|
594 |
ut_print_name(stderr, NULL, FALSE, index->name); |
|
595 |
fprintf(stderr, ",\n" |
|
596 |
"InnoDB: father ptr page no %lu, child page no %lu\n", |
|
597 |
(ulong) |
|
598 |
btr_node_ptr_get_child_page_no(node_ptr, offsets), |
|
599 |
(ulong) buf_frame_get_page_no(page)); |
|
600 |
print_rec = page_rec_get_next(page_get_infimum_rec(page)); |
|
601 |
offsets = rec_get_offsets(print_rec, index, |
|
602 |
offsets, ULINT_UNDEFINED, &heap); |
|
603 |
page_rec_print(print_rec, offsets); |
|
604 |
offsets = rec_get_offsets(node_ptr, index, offsets, |
|
605 |
ULINT_UNDEFINED, &heap); |
|
606 |
page_rec_print(node_ptr, offsets); |
|
607 |
||
608 |
fputs("InnoDB: You should dump + drop + reimport the table" |
|
609 |
" to fix the\n" |
|
610 |
"InnoDB: corruption. If the crash happens at "
|
|
611 |
"the database startup, see\n" |
|
612 |
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
|
|
613 |
"forcing-recovery.html about\n" |
|
614 |
"InnoDB: forcing recovery. "
|
|
615 |
"Then dump + drop + reimport.\n", stderr); |
|
616 |
}
|
|
617 |
||
618 |
ut_a(btr_node_ptr_get_child_page_no(node_ptr, offsets) |
|
619 |
== buf_frame_get_page_no(page)); |
|
620 |
mem_heap_free(heap); |
|
621 |
||
622 |
return(node_ptr); |
|
623 |
}
|
|
624 |
||
625 |
/****************************************************************
|
|
626 |
Returns the upper level node pointer to a page. It is assumed that
|
|
627 |
mtr holds an x-latch on the tree. */
|
|
628 |
static
|
|
629 |
rec_t* |
|
630 |
btr_page_get_father_node_ptr( |
|
631 |
/*=========================*/
|
|
632 |
/* out: pointer to node pointer record */
|
|
633 |
dict_index_t* index, /* in: index tree */ |
|
634 |
page_t* page, /* in: page: must contain at least one |
|
635 |
user record */
|
|
636 |
mtr_t* mtr) /* in: mtr */ |
|
637 |
{
|
|
638 |
return(btr_page_get_father_for_rec( |
|
639 |
index, page, |
|
640 |
page_rec_get_next(page_get_infimum_rec(page)), mtr)); |
|
641 |
}
|
|
642 |
||
643 |
/****************************************************************
|
|
644 |
Creates the root node for a new index tree. */
|
|
645 |
||
646 |
ulint
|
|
647 |
btr_create( |
|
648 |
/*=======*/
|
|
649 |
/* out: page number of the created root, FIL_NULL if
|
|
650 |
did not succeed */
|
|
651 |
ulint type, /* in: type of the index */ |
|
652 |
ulint space, /* in: space where created */ |
|
653 |
dulint index_id,/* in: index id */ |
|
654 |
ulint comp, /* in: nonzero=compact page format */ |
|
655 |
mtr_t* mtr) /* in: mini-transaction handle */ |
|
656 |
{
|
|
657 |
ulint page_no; |
|
658 |
buf_frame_t* ibuf_hdr_frame; |
|
659 |
buf_frame_t* frame; |
|
660 |
page_t* page; |
|
661 |
||
662 |
/* Create the two new segments (one, in the case of an ibuf tree) for
|
|
663 |
the index tree; the segment headers are put on the allocated root page
|
|
664 |
(for an ibuf tree, not in the root, but on a separate ibuf header
|
|
665 |
page) */
|
|
666 |
||
667 |
if (type & DICT_IBUF) { |
|
668 |
/* Allocate first the ibuf header page */
|
|
669 |
ibuf_hdr_frame = fseg_create( |
|
670 |
space, 0, IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr); |
|
671 |
||
672 |
#ifdef UNIV_SYNC_DEBUG
|
|
673 |
buf_page_dbg_add_level(ibuf_hdr_frame, SYNC_TREE_NODE_NEW); |
|
674 |
#endif /* UNIV_SYNC_DEBUG */ |
|
675 |
ut_ad(buf_frame_get_page_no(ibuf_hdr_frame) |
|
676 |
== IBUF_HEADER_PAGE_NO); |
|
677 |
/* Allocate then the next page to the segment: it will be the
|
|
678 |
tree root page */
|
|
679 |
||
680 |
page_no = fseg_alloc_free_page(ibuf_hdr_frame + IBUF_HEADER |
|
681 |
+ IBUF_TREE_SEG_HEADER, |
|
682 |
IBUF_TREE_ROOT_PAGE_NO, |
|
683 |
FSP_UP, mtr); |
|
684 |
ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO); |
|
685 |
||
686 |
frame = buf_page_get(space, page_no, RW_X_LATCH, mtr); |
|
687 |
} else { |
|
688 |
frame = fseg_create(space, 0, PAGE_HEADER + PAGE_BTR_SEG_TOP, |
|
689 |
mtr); |
|
690 |
}
|
|
691 |
||
692 |
if (frame == NULL) { |
|
693 |
||
694 |
return(FIL_NULL); |
|
695 |
}
|
|
696 |
||
697 |
page_no = buf_frame_get_page_no(frame); |
|
698 |
||
699 |
#ifdef UNIV_SYNC_DEBUG
|
|
700 |
buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW); |
|
701 |
#endif /* UNIV_SYNC_DEBUG */ |
|
702 |
||
703 |
if (type & DICT_IBUF) { |
|
704 |
/* It is an insert buffer tree: initialize the free list */
|
|
705 |
||
706 |
ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO); |
|
707 |
||
708 |
flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr); |
|
709 |
} else { |
|
710 |
/* It is a non-ibuf tree: create a file segment for leaf
|
|
711 |
pages */
|
|
712 |
fseg_create(space, page_no, PAGE_HEADER + PAGE_BTR_SEG_LEAF, |
|
713 |
mtr); |
|
714 |
/* The fseg create acquires a second latch on the page,
|
|
715 |
therefore we must declare it: */
|
|
716 |
#ifdef UNIV_SYNC_DEBUG
|
|
717 |
buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW); |
|
718 |
#endif /* UNIV_SYNC_DEBUG */ |
|
719 |
}
|
|
720 |
||
721 |
/* Create a new index page on the the allocated segment page */
|
|
722 |
page = page_create(frame, mtr, comp); |
|
723 |
buf_block_align(page)->check_index_page_at_flush = TRUE; |
|
724 |
||
725 |
/* Set the index id of the page */
|
|
726 |
btr_page_set_index_id(page, index_id, mtr); |
|
727 |
||
728 |
/* Set the level of the new index page */
|
|
729 |
btr_page_set_level(page, 0, mtr); |
|
730 |
||
731 |
/* Set the next node and previous node fields */
|
|
732 |
btr_page_set_next(page, FIL_NULL, mtr); |
|
733 |
btr_page_set_prev(page, FIL_NULL, mtr); |
|
734 |
||
735 |
/* We reset the free bits for the page to allow creation of several
|
|
736 |
trees in the same mtr, otherwise the latch on a bitmap page would
|
|
737 |
prevent it because of the latching order */
|
|
738 |
||
739 |
ibuf_reset_free_bits_with_type(type, page); |
|
740 |
||
741 |
/* In the following assertion we test that two records of maximum
|
|
742 |
allowed size fit on the root page: this fact is needed to ensure
|
|
743 |
correctness of split algorithms */
|
|
744 |
||
745 |
ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE); |
|
746 |
||
747 |
return(page_no); |
|
748 |
}
|
|
749 |
||
750 |
/****************************************************************
|
|
751 |
Frees a B-tree except the root page, which MUST be freed after this
|
|
752 |
by calling btr_free_root. */
|
|
753 |
||
754 |
void
|
|
755 |
btr_free_but_not_root( |
|
756 |
/*==================*/
|
|
757 |
ulint space, /* in: space where created */ |
|
758 |
ulint root_page_no) /* in: root page number */ |
|
759 |
{
|
|
760 |
ibool finished; |
|
761 |
page_t* root; |
|
762 |
mtr_t mtr; |
|
763 |
||
764 |
leaf_loop: |
|
765 |
mtr_start(&mtr); |
|
766 |
||
767 |
root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr); |
|
768 |
||
769 |
/* NOTE: page hash indexes are dropped when a page is freed inside
|
|
770 |
fsp0fsp. */
|
|
771 |
||
772 |
finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF, |
|
773 |
&mtr); |
|
774 |
mtr_commit(&mtr); |
|
775 |
||
776 |
if (!finished) { |
|
777 |
||
778 |
goto leaf_loop; |
|
779 |
}
|
|
780 |
top_loop: |
|
781 |
mtr_start(&mtr); |
|
782 |
||
783 |
root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr); |
|
784 |
||
785 |
finished = fseg_free_step_not_header( |
|
786 |
root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr); |
|
787 |
mtr_commit(&mtr); |
|
788 |
||
789 |
if (!finished) { |
|
790 |
||
791 |
goto top_loop; |
|
792 |
}
|
|
793 |
}
|
|
794 |
||
795 |
/****************************************************************
|
|
796 |
Frees the B-tree root page. Other tree MUST already have been freed. */
|
|
797 |
||
798 |
void
|
|
799 |
btr_free_root( |
|
800 |
/*==========*/
|
|
801 |
ulint space, /* in: space where created */ |
|
802 |
ulint root_page_no, /* in: root page number */ |
|
803 |
mtr_t* mtr) /* in: a mini-transaction which has already |
|
804 |
been started */
|
|
805 |
{
|
|
806 |
ibool finished; |
|
807 |
page_t* root; |
|
808 |
||
809 |
root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr); |
|
810 |
||
811 |
btr_search_drop_page_hash_index(root); |
|
812 |
top_loop: |
|
813 |
finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr); |
|
814 |
if (!finished) { |
|
815 |
||
816 |
goto top_loop; |
|
817 |
}
|
|
818 |
}
|
|
819 |
||
820 |
/*****************************************************************
|
|
821 |
Reorganizes an index page. */
|
|
822 |
static
|
|
823 |
void
|
|
824 |
btr_page_reorganize_low( |
|
825 |
/*====================*/
|
|
826 |
ibool recovery,/* in: TRUE if called in recovery: |
|
827 |
locks should not be updated, i.e.,
|
|
828 |
there cannot exist locks on the
|
|
829 |
page, and a hash index should not be
|
|
830 |
dropped: it cannot exist */
|
|
831 |
page_t* page, /* in: page to be reorganized */ |
|
832 |
dict_index_t* index, /* in: record descriptor */ |
|
833 |
mtr_t* mtr) /* in: mtr */ |
|
834 |
{
|
|
835 |
page_t* new_page; |
|
836 |
ulint log_mode; |
|
837 |
ulint data_size1; |
|
838 |
ulint data_size2; |
|
839 |
ulint max_ins_size1; |
|
840 |
ulint max_ins_size2; |
|
841 |
||
842 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
843 |
MTR_MEMO_PAGE_X_FIX)); |
|
844 |
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); |
|
845 |
data_size1 = page_get_data_size(page); |
|
846 |
max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1); |
|
847 |
||
848 |
/* Write the log record */
|
|
849 |
mlog_open_and_write_index(mtr, page, index, page_is_comp(page) |
|
850 |
? MLOG_COMP_PAGE_REORGANIZE |
|
851 |
: MLOG_PAGE_REORGANIZE, 0); |
|
852 |
||
853 |
/* Turn logging off */
|
|
854 |
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); |
|
855 |
||
856 |
new_page = buf_frame_alloc(); |
|
857 |
||
858 |
/* Copy the old page to temporary space */
|
|
859 |
buf_frame_copy(new_page, page); |
|
860 |
||
861 |
if (!recovery) { |
|
862 |
btr_search_drop_page_hash_index(page); |
|
863 |
}
|
|
864 |
||
865 |
/* Recreate the page: note that global data on page (possible
|
|
866 |
segment headers, next page-field, etc.) is preserved intact */
|
|
867 |
||
868 |
page_create(page, mtr, page_is_comp(page)); |
|
869 |
buf_block_align(page)->check_index_page_at_flush = TRUE; |
|
870 |
||
871 |
/* Copy the records from the temporary space to the recreated page;
|
|
872 |
do not copy the lock bits yet */
|
|
873 |
||
874 |
page_copy_rec_list_end_no_locks(page, new_page, |
|
875 |
page_get_infimum_rec(new_page), |
|
876 |
index, mtr); |
|
877 |
/* Copy max trx id to recreated page */
|
|
878 |
page_set_max_trx_id(page, page_get_max_trx_id(new_page)); |
|
879 |
||
880 |
if (!recovery) { |
|
881 |
/* Update the record lock bitmaps */
|
|
882 |
lock_move_reorganize_page(page, new_page); |
|
883 |
}
|
|
884 |
||
885 |
data_size2 = page_get_data_size(page); |
|
886 |
max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1); |
|
887 |
||
888 |
if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) { |
|
889 |
buf_page_print(page); |
|
890 |
buf_page_print(new_page); |
|
891 |
fprintf(stderr, |
|
892 |
"InnoDB: Error: page old data size %lu"
|
|
893 |
" new data size %lu\n" |
|
894 |
"InnoDB: Error: page old max ins size %lu"
|
|
895 |
" new max ins size %lu\n" |
|
896 |
"InnoDB: Submit a detailed bug report"
|
|
897 |
" to http://bugs.mysql.com\n", |
|
898 |
(unsigned long) data_size1, (unsigned long) data_size2, |
|
899 |
(unsigned long) max_ins_size1, |
|
900 |
(unsigned long) max_ins_size2); |
|
901 |
}
|
|
902 |
||
903 |
buf_frame_free(new_page); |
|
904 |
||
905 |
/* Restore logging mode */
|
|
906 |
mtr_set_log_mode(mtr, log_mode); |
|
907 |
}
|
|
908 |
||
909 |
/*****************************************************************
|
|
910 |
Reorganizes an index page. */
|
|
911 |
||
912 |
void
|
|
913 |
btr_page_reorganize( |
|
914 |
/*================*/
|
|
915 |
page_t* page, /* in: page to be reorganized */ |
|
916 |
dict_index_t* index, /* in: record descriptor */ |
|
917 |
mtr_t* mtr) /* in: mtr */ |
|
918 |
{
|
|
919 |
btr_page_reorganize_low(FALSE, page, index, mtr); |
|
920 |
}
|
|
921 |
||
922 |
/***************************************************************
|
|
923 |
Parses a redo log record of reorganizing a page. */
|
|
924 |
||
925 |
byte* |
|
926 |
btr_parse_page_reorganize( |
|
927 |
/*======================*/
|
|
928 |
/* out: end of log record or NULL */
|
|
929 |
byte* ptr, /* in: buffer */ |
|
930 |
byte* end_ptr __attribute__((unused)), |
|
931 |
/* in: buffer end */
|
|
932 |
dict_index_t* index, /* in: record descriptor */ |
|
933 |
page_t* page, /* in: page or NULL */ |
|
934 |
mtr_t* mtr) /* in: mtr or NULL */ |
|
935 |
{
|
|
936 |
ut_ad(ptr && end_ptr); |
|
937 |
||
938 |
/* The record is empty, except for the record initial part */
|
|
939 |
||
940 |
if (page) { |
|
941 |
btr_page_reorganize_low(TRUE, page, index, mtr); |
|
942 |
}
|
|
943 |
||
944 |
return(ptr); |
|
945 |
}
|
|
946 |
||
947 |
/*****************************************************************
|
|
948 |
Empties an index page. */
|
|
949 |
static
|
|
950 |
void
|
|
951 |
btr_page_empty( |
|
952 |
/*===========*/
|
|
953 |
page_t* page, /* in: page to be emptied */ |
|
954 |
mtr_t* mtr) /* in: mtr */ |
|
955 |
{
|
|
956 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
957 |
MTR_MEMO_PAGE_X_FIX)); |
|
958 |
btr_search_drop_page_hash_index(page); |
|
959 |
||
960 |
/* Recreate the page: note that global data on page (possible
|
|
961 |
segment headers, next page-field, etc.) is preserved intact */
|
|
962 |
||
963 |
page_create(page, mtr, page_is_comp(page)); |
|
964 |
buf_block_align(page)->check_index_page_at_flush = TRUE; |
|
965 |
}
|
|
966 |
||
967 |
/*****************************************************************
|
|
968 |
Makes tree one level higher by splitting the root, and inserts
|
|
969 |
the tuple. It is assumed that mtr contains an x-latch on the tree.
|
|
970 |
NOTE that the operation of this function must always succeed,
|
|
971 |
we cannot reverse it: therefore enough free disk space must be
|
|
972 |
guaranteed to be available before this function is called. */
|
|
973 |
||
974 |
rec_t* |
|
975 |
btr_root_raise_and_insert( |
|
976 |
/*======================*/
|
|
977 |
/* out: inserted record */
|
|
978 |
btr_cur_t* cursor, /* in: cursor at which to insert: must be |
|
979 |
on the root page; when the function returns,
|
|
980 |
the cursor is positioned on the predecessor
|
|
981 |
of the inserted record */
|
|
982 |
dtuple_t* tuple, /* in: tuple to insert */ |
|
983 |
mtr_t* mtr) /* in: mtr */ |
|
984 |
{
|
|
985 |
dict_index_t* index; |
|
986 |
page_t* root; |
|
987 |
page_t* new_page; |
|
988 |
ulint new_page_no; |
|
989 |
rec_t* rec; |
|
990 |
mem_heap_t* heap; |
|
991 |
dtuple_t* node_ptr; |
|
992 |
ulint level; |
|
993 |
rec_t* node_ptr_rec; |
|
994 |
page_cur_t* page_cursor; |
|
995 |
||
996 |
root = btr_cur_get_page(cursor); |
|
997 |
index = btr_cur_get_index(cursor); |
|
998 |
||
999 |
ut_ad(dict_index_get_page(index) == buf_frame_get_page_no(root)); |
|
1000 |
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), |
|
1001 |
MTR_MEMO_X_LOCK)); |
|
1002 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(root), |
|
1003 |
MTR_MEMO_PAGE_X_FIX)); |
|
1004 |
btr_search_drop_page_hash_index(root); |
|
1005 |
||
1006 |
/* Allocate a new page to the tree. Root splitting is done by first
|
|
1007 |
moving the root records to the new page, emptying the root, putting
|
|
1008 |
a node pointer to the new page, and then splitting the new page. */
|
|
1009 |
||
1010 |
new_page = btr_page_alloc(index, 0, FSP_NO_DIR, |
|
1011 |
btr_page_get_level(root, mtr), mtr); |
|
1012 |
||
1013 |
btr_page_create(new_page, index, mtr); |
|
1014 |
||
1015 |
level = btr_page_get_level(root, mtr); |
|
1016 |
||
1017 |
/* Set the levels of the new index page and root page */
|
|
1018 |
btr_page_set_level(new_page, level, mtr); |
|
1019 |
btr_page_set_level(root, level + 1, mtr); |
|
1020 |
||
1021 |
/* Set the next node and previous node fields of new page */
|
|
1022 |
btr_page_set_next(new_page, FIL_NULL, mtr); |
|
1023 |
btr_page_set_prev(new_page, FIL_NULL, mtr); |
|
1024 |
||
1025 |
/* Move the records from root to the new page */
|
|
1026 |
||
1027 |
page_move_rec_list_end(new_page, root, page_get_infimum_rec(root), |
|
1028 |
index, mtr); |
|
1029 |
/* If this is a pessimistic insert which is actually done to
|
|
1030 |
perform a pessimistic update then we have stored the lock
|
|
1031 |
information of the record to be inserted on the infimum of the
|
|
1032 |
root page: we cannot discard the lock structs on the root page */
|
|
1033 |
||
1034 |
lock_update_root_raise(new_page, root); |
|
1035 |
||
1036 |
/* Create a memory heap where the node pointer is stored */
|
|
1037 |
heap = mem_heap_create(100); |
|
1038 |
||
1039 |
rec = page_rec_get_next(page_get_infimum_rec(new_page)); |
|
1040 |
new_page_no = buf_frame_get_page_no(new_page); |
|
1041 |
||
1042 |
/* Build the node pointer (= node key and page address) for the
|
|
1043 |
child */
|
|
1044 |
||
1045 |
node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap, |
|
1046 |
level); |
|
1047 |
/* Reorganize the root to get free space */
|
|
1048 |
btr_page_reorganize(root, index, mtr); |
|
1049 |
||
1050 |
page_cursor = btr_cur_get_page_cur(cursor); |
|
1051 |
||
1052 |
/* Insert node pointer to the root */
|
|
1053 |
||
1054 |
page_cur_set_before_first(root, page_cursor); |
|
1055 |
||
1056 |
node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, |
|
1057 |
index, mtr); |
|
1058 |
||
1059 |
ut_ad(node_ptr_rec); |
|
1060 |
||
1061 |
/* The node pointer must be marked as the predefined minimum record,
|
|
1062 |
as there is no lower alphabetical limit to records in the leftmost
|
|
1063 |
node of a level: */
|
|
1064 |
||
1065 |
btr_set_min_rec_mark(node_ptr_rec, page_is_comp(root), mtr); |
|
1066 |
||
1067 |
/* Free the memory heap */
|
|
1068 |
mem_heap_free(heap); |
|
1069 |
||
1070 |
/* We play safe and reset the free bits for the new page */
|
|
1071 |
||
1072 |
#if 0
|
|
1073 |
fprintf(stderr, "Root raise new page no %lu\n",
|
|
1074 |
buf_frame_get_page_no(new_page));
|
|
1075 |
#endif
|
|
1076 |
||
1077 |
ibuf_reset_free_bits(index, new_page); |
|
1078 |
/* Reposition the cursor to the child node */
|
|
1079 |
page_cur_search(new_page, index, tuple, |
|
1080 |
PAGE_CUR_LE, page_cursor); |
|
1081 |
||
1082 |
/* Split the child and insert tuple */
|
|
1083 |
return(btr_page_split_and_insert(cursor, tuple, mtr)); |
|
1084 |
}
|
|
1085 |
||
1086 |
/*****************************************************************
|
|
1087 |
Decides if the page should be split at the convergence point of inserts
|
|
1088 |
converging to the left. */
|
|
1089 |
||
1090 |
ibool
|
|
1091 |
btr_page_get_split_rec_to_left( |
|
1092 |
/*===========================*/
|
|
1093 |
/* out: TRUE if split recommended */
|
|
1094 |
btr_cur_t* cursor, /* in: cursor at which to insert */ |
|
1095 |
rec_t** split_rec) /* out: if split recommended, |
|
1096 |
the first record on upper half page,
|
|
1097 |
or NULL if tuple to be inserted should
|
|
1098 |
be first */
|
|
1099 |
{
|
|
1100 |
page_t* page; |
|
1101 |
rec_t* insert_point; |
|
1102 |
rec_t* infimum; |
|
1103 |
||
1104 |
page = btr_cur_get_page(cursor); |
|
1105 |
insert_point = btr_cur_get_rec(cursor); |
|
1106 |
||
1107 |
if (page_header_get_ptr(page, PAGE_LAST_INSERT) |
|
1108 |
== page_rec_get_next(insert_point)) { |
|
1109 |
||
1110 |
infimum = page_get_infimum_rec(page); |
|
1111 |
||
1112 |
/* If the convergence is in the middle of a page, include also
|
|
1113 |
the record immediately before the new insert to the upper
|
|
1114 |
page. Otherwise, we could repeatedly move from page to page
|
|
1115 |
lots of records smaller than the convergence point. */
|
|
1116 |
||
1117 |
if (infimum != insert_point |
|
1118 |
&& page_rec_get_next(infimum) != insert_point) { |
|
1119 |
||
1120 |
*split_rec = insert_point; |
|
1121 |
} else { |
|
1122 |
*split_rec = page_rec_get_next(insert_point); |
|
1123 |
}
|
|
1124 |
||
1125 |
return(TRUE); |
|
1126 |
}
|
|
1127 |
||
1128 |
return(FALSE); |
|
1129 |
}
|
|
1130 |
||
1131 |
/*****************************************************************
|
|
1132 |
Decides if the page should be split at the convergence point of inserts
|
|
1133 |
converging to the right. */
|
|
1134 |
||
1135 |
ibool
|
|
1136 |
btr_page_get_split_rec_to_right( |
|
1137 |
/*============================*/
|
|
1138 |
/* out: TRUE if split recommended */
|
|
1139 |
btr_cur_t* cursor, /* in: cursor at which to insert */ |
|
1140 |
rec_t** split_rec) /* out: if split recommended, |
|
1141 |
the first record on upper half page,
|
|
1142 |
or NULL if tuple to be inserted should
|
|
1143 |
be first */
|
|
1144 |
{
|
|
1145 |
page_t* page; |
|
1146 |
rec_t* insert_point; |
|
1147 |
||
1148 |
page = btr_cur_get_page(cursor); |
|
1149 |
insert_point = btr_cur_get_rec(cursor); |
|
1150 |
||
1151 |
/* We use eager heuristics: if the new insert would be right after
|
|
1152 |
the previous insert on the same page, we assume that there is a
|
|
1153 |
pattern of sequential inserts here. */
|
|
1154 |
||
1155 |
if (UNIV_LIKELY(page_header_get_ptr(page, PAGE_LAST_INSERT) |
|
1156 |
== insert_point)) { |
|
1157 |
||
1158 |
rec_t* next_rec; |
|
1159 |
||
1160 |
next_rec = page_rec_get_next(insert_point); |
|
1161 |
||
1162 |
if (page_rec_is_supremum(next_rec)) { |
|
1163 |
split_at_new: |
|
1164 |
/* Split at the new record to insert */
|
|
1165 |
*split_rec = NULL; |
|
1166 |
} else { |
|
1167 |
rec_t* next_next_rec = page_rec_get_next(next_rec); |
|
1168 |
if (page_rec_is_supremum(next_next_rec)) { |
|
1169 |
||
1170 |
goto split_at_new; |
|
1171 |
}
|
|
1172 |
||
1173 |
/* If there are >= 2 user records up from the insert
|
|
1174 |
point, split all but 1 off. We want to keep one because
|
|
1175 |
then sequential inserts can use the adaptive hash
|
|
1176 |
index, as they can do the necessary checks of the right
|
|
1177 |
search position just by looking at the records on this
|
|
1178 |
page. */
|
|
1179 |
||
1180 |
*split_rec = next_next_rec; |
|
1181 |
}
|
|
1182 |
||
1183 |
return(TRUE); |
|
1184 |
}
|
|
1185 |
||
1186 |
return(FALSE); |
|
1187 |
}
|
|
1188 |
||
1189 |
/*****************************************************************
|
|
1190 |
Calculates a split record such that the tuple will certainly fit on
|
|
1191 |
its half-page when the split is performed. We assume in this function
|
|
1192 |
only that the cursor page has at least one user record. */
|
|
1193 |
static
|
|
1194 |
rec_t* |
|
1195 |
btr_page_get_sure_split_rec( |
|
1196 |
/*========================*/
|
|
1197 |
/* out: split record, or NULL if
|
|
1198 |
tuple will be the first record on
|
|
1199 |
upper half-page */
|
|
1200 |
btr_cur_t* cursor, /* in: cursor at which insert |
|
1201 |
should be made */
|
|
1202 |
dtuple_t* tuple) /* in: tuple to insert */ |
|
1203 |
{
|
|
1204 |
page_t* page; |
|
1205 |
ulint insert_size; |
|
1206 |
ulint free_space; |
|
1207 |
ulint total_data; |
|
1208 |
ulint total_n_recs; |
|
1209 |
ulint total_space; |
|
1210 |
ulint incl_data; |
|
1211 |
rec_t* ins_rec; |
|
1212 |
rec_t* rec; |
|
1213 |
rec_t* next_rec; |
|
1214 |
ulint n; |
|
1215 |
mem_heap_t* heap; |
|
1216 |
ulint* offsets; |
|
1217 |
||
1218 |
page = btr_cur_get_page(cursor); |
|
1219 |
||
1220 |
insert_size = rec_get_converted_size(cursor->index, tuple); |
|
1221 |
free_space = page_get_free_space_of_empty(page_is_comp(page)); |
|
1222 |
||
1223 |
/* free_space is now the free space of a created new page */
|
|
1224 |
||
1225 |
total_data = page_get_data_size(page) + insert_size; |
|
1226 |
total_n_recs = page_get_n_recs(page) + 1; |
|
1227 |
ut_ad(total_n_recs >= 2); |
|
1228 |
total_space = total_data + page_dir_calc_reserved_space(total_n_recs); |
|
1229 |
||
1230 |
n = 0; |
|
1231 |
incl_data = 0; |
|
1232 |
ins_rec = btr_cur_get_rec(cursor); |
|
1233 |
rec = page_get_infimum_rec(page); |
|
1234 |
||
1235 |
heap = NULL; |
|
1236 |
offsets = NULL; |
|
1237 |
||
1238 |
/* We start to include records to the left half, and when the
|
|
1239 |
space reserved by them exceeds half of total_space, then if
|
|
1240 |
the included records fit on the left page, they will be put there
|
|
1241 |
if something was left over also for the right page,
|
|
1242 |
otherwise the last included record will be the first on the right
|
|
1243 |
half page */
|
|
1244 |
||
1245 |
for (;;) { |
|
1246 |
/* Decide the next record to include */
|
|
1247 |
if (rec == ins_rec) { |
|
1248 |
rec = NULL; /* NULL denotes that tuple is |
|
1249 |
now included */
|
|
1250 |
} else if (rec == NULL) { |
|
1251 |
rec = page_rec_get_next(ins_rec); |
|
1252 |
} else { |
|
1253 |
rec = page_rec_get_next(rec); |
|
1254 |
}
|
|
1255 |
||
1256 |
if (rec == NULL) { |
|
1257 |
/* Include tuple */
|
|
1258 |
incl_data += insert_size; |
|
1259 |
} else { |
|
1260 |
offsets = rec_get_offsets(rec, cursor->index, |
|
1261 |
offsets, ULINT_UNDEFINED, |
|
1262 |
&heap); |
|
1263 |
incl_data += rec_offs_size(offsets); |
|
1264 |
}
|
|
1265 |
||
1266 |
n++; |
|
1267 |
||
1268 |
if (incl_data + page_dir_calc_reserved_space(n) |
|
1269 |
>= total_space / 2) { |
|
1270 |
||
1271 |
if (incl_data + page_dir_calc_reserved_space(n) |
|
1272 |
<= free_space) { |
|
1273 |
/* The next record will be the first on
|
|
1274 |
the right half page if it is not the
|
|
1275 |
supremum record of page */
|
|
1276 |
||
1277 |
if (rec == ins_rec) { |
|
1278 |
rec = NULL; |
|
1279 |
||
1280 |
goto func_exit; |
|
1281 |
} else if (rec == NULL) { |
|
1282 |
next_rec = page_rec_get_next(ins_rec); |
|
1283 |
} else { |
|
1284 |
next_rec = page_rec_get_next(rec); |
|
1285 |
}
|
|
1286 |
ut_ad(next_rec); |
|
1287 |
if (!page_rec_is_supremum(next_rec)) { |
|
1288 |
rec = next_rec; |
|
1289 |
}
|
|
1290 |
}
|
|
1291 |
||
1292 |
func_exit: |
|
1293 |
if (UNIV_LIKELY_NULL(heap)) { |
|
1294 |
mem_heap_free(heap); |
|
1295 |
}
|
|
1296 |
return(rec); |
|
1297 |
}
|
|
1298 |
}
|
|
1299 |
}
|
|
1300 |
||
1301 |
/*****************************************************************
|
|
1302 |
Returns TRUE if the insert fits on the appropriate half-page with the
|
|
1303 |
chosen split_rec. */
|
|
1304 |
static
|
|
1305 |
ibool
|
|
1306 |
btr_page_insert_fits( |
|
1307 |
/*=================*/
|
|
1308 |
/* out: TRUE if fits */
|
|
1309 |
btr_cur_t* cursor, /* in: cursor at which insert |
|
1310 |
should be made */
|
|
1311 |
rec_t* split_rec, /* in: suggestion for first record |
|
1312 |
on upper half-page, or NULL if
|
|
1313 |
tuple to be inserted should be first */
|
|
1314 |
const ulint* offsets, /* in: rec_get_offsets( |
|
1315 |
split_rec, cursor->index) */
|
|
1316 |
dtuple_t* tuple, /* in: tuple to insert */ |
|
1317 |
mem_heap_t* heap) /* in: temporary memory heap */ |
|
1318 |
{
|
|
1319 |
page_t* page; |
|
1320 |
ulint insert_size; |
|
1321 |
ulint free_space; |
|
1322 |
ulint total_data; |
|
1323 |
ulint total_n_recs; |
|
1324 |
rec_t* rec; |
|
1325 |
rec_t* end_rec; |
|
1326 |
ulint* offs; |
|
1327 |
||
1328 |
page = btr_cur_get_page(cursor); |
|
1329 |
||
1330 |
ut_ad(!split_rec == !offsets); |
|
1331 |
ut_ad(!offsets |
|
1332 |
|| !page_is_comp(page) == !rec_offs_comp(offsets)); |
|
1333 |
ut_ad(!offsets |
|
1334 |
|| rec_offs_validate(split_rec, cursor->index, offsets)); |
|
1335 |
||
1336 |
insert_size = rec_get_converted_size(cursor->index, tuple); |
|
1337 |
free_space = page_get_free_space_of_empty(page_is_comp(page)); |
|
1338 |
||
1339 |
/* free_space is now the free space of a created new page */
|
|
1340 |
||
1341 |
total_data = page_get_data_size(page) + insert_size; |
|
1342 |
total_n_recs = page_get_n_recs(page) + 1; |
|
1343 |
||
1344 |
/* We determine which records (from rec to end_rec, not including
|
|
1345 |
end_rec) will end up on the other half page from tuple when it is
|
|
1346 |
inserted. */
|
|
1347 |
||
1348 |
if (split_rec == NULL) { |
|
1349 |
rec = page_rec_get_next(page_get_infimum_rec(page)); |
|
1350 |
end_rec = page_rec_get_next(btr_cur_get_rec(cursor)); |
|
1351 |
||
1352 |
} else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) { |
|
1353 |
||
1354 |
rec = page_rec_get_next(page_get_infimum_rec(page)); |
|
1355 |
end_rec = split_rec; |
|
1356 |
} else { |
|
1357 |
rec = split_rec; |
|
1358 |
end_rec = page_get_supremum_rec(page); |
|
1359 |
}
|
|
1360 |
||
1361 |
if (total_data + page_dir_calc_reserved_space(total_n_recs) |
|
1362 |
<= free_space) { |
|
1363 |
||
1364 |
/* Ok, there will be enough available space on the
|
|
1365 |
half page where the tuple is inserted */
|
|
1366 |
||
1367 |
return(TRUE); |
|
1368 |
}
|
|
1369 |
||
1370 |
offs = NULL; |
|
1371 |
||
1372 |
while (rec != end_rec) { |
|
1373 |
/* In this loop we calculate the amount of reserved
|
|
1374 |
space after rec is removed from page. */
|
|
1375 |
||
1376 |
offs = rec_get_offsets(rec, cursor->index, offs, |
|
1377 |
ULINT_UNDEFINED, &heap); |
|
1378 |
||
1379 |
total_data -= rec_offs_size(offs); |
|
1380 |
total_n_recs--; |
|
1381 |
||
1382 |
if (total_data + page_dir_calc_reserved_space(total_n_recs) |
|
1383 |
<= free_space) { |
|
1384 |
||
1385 |
/* Ok, there will be enough available space on the
|
|
1386 |
half page where the tuple is inserted */
|
|
1387 |
||
1388 |
return(TRUE); |
|
1389 |
}
|
|
1390 |
||
1391 |
rec = page_rec_get_next(rec); |
|
1392 |
}
|
|
1393 |
||
1394 |
return(FALSE); |
|
1395 |
}
|
|
1396 |
||
1397 |
/***********************************************************
|
|
1398 |
Inserts a data tuple to a tree on a non-leaf level. It is assumed
|
|
1399 |
that mtr holds an x-latch on the tree. */
|
|
1400 |
||
1401 |
void
|
|
1402 |
btr_insert_on_non_leaf_level( |
|
1403 |
/*=========================*/
|
|
1404 |
dict_index_t* index, /* in: index */ |
|
1405 |
ulint level, /* in: level, must be > 0 */ |
|
1406 |
dtuple_t* tuple, /* in: the record to be inserted */ |
|
1407 |
mtr_t* mtr) /* in: mtr */ |
|
1408 |
{
|
|
1409 |
big_rec_t* dummy_big_rec; |
|
1410 |
btr_cur_t cursor; |
|
1411 |
ulint err; |
|
1412 |
rec_t* rec; |
|
1413 |
||
1414 |
ut_ad(level > 0); |
|
1415 |
||
1416 |
btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE, |
|
1417 |
BTR_CONT_MODIFY_TREE, |
|
1418 |
&cursor, 0, mtr); |
|
1419 |
||
1420 |
err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG |
|
1421 |
| BTR_KEEP_SYS_FLAG |
|
1422 |
| BTR_NO_UNDO_LOG_FLAG, |
|
1423 |
&cursor, tuple, &rec, |
|
1424 |
&dummy_big_rec, NULL, mtr); |
|
1425 |
ut_a(err == DB_SUCCESS); |
|
1426 |
}
|
|
1427 |
||
1428 |
/******************************************************************
|
|
1429 |
Attaches the halves of an index page on the appropriate level in an
|
|
1430 |
index tree. */
|
|
1431 |
static
|
|
1432 |
void
|
|
1433 |
btr_attach_half_pages( |
|
1434 |
/*==================*/
|
|
1435 |
dict_index_t* index, /* in: the index tree */ |
|
1436 |
page_t* page, /* in: page to be split */ |
|
1437 |
rec_t* split_rec, /* in: first record on upper |
|
1438 |
half page */
|
|
1439 |
page_t* new_page, /* in: the new half page */ |
|
1440 |
ulint direction, /* in: FSP_UP or FSP_DOWN */ |
|
1441 |
mtr_t* mtr) /* in: mtr */ |
|
1442 |
{
|
|
1443 |
ulint space; |
|
1444 |
rec_t* node_ptr; |
|
1445 |
page_t* prev_page; |
|
1446 |
page_t* next_page; |
|
1447 |
ulint prev_page_no; |
|
1448 |
ulint next_page_no; |
|
1449 |
ulint level; |
|
1450 |
page_t* lower_page; |
|
1451 |
page_t* upper_page; |
|
1452 |
ulint lower_page_no; |
|
1453 |
ulint upper_page_no; |
|
1454 |
dtuple_t* node_ptr_upper; |
|
1455 |
mem_heap_t* heap; |
|
1456 |
||
1457 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
1458 |
MTR_MEMO_PAGE_X_FIX)); |
|
1459 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(new_page), |
|
1460 |
MTR_MEMO_PAGE_X_FIX)); |
|
1461 |
ut_a(page_is_comp(page) == page_is_comp(new_page)); |
|
1462 |
||
1463 |
/* Create a memory heap where the data tuple is stored */
|
|
1464 |
heap = mem_heap_create(1024); |
|
1465 |
||
1466 |
/* Based on split direction, decide upper and lower pages */
|
|
1467 |
if (direction == FSP_DOWN) { |
|
1468 |
||
1469 |
lower_page_no = buf_frame_get_page_no(new_page); |
|
1470 |
upper_page_no = buf_frame_get_page_no(page); |
|
1471 |
lower_page = new_page; |
|
1472 |
upper_page = page; |
|
1473 |
||
1474 |
/* Look up the index for the node pointer to page */
|
|
1475 |
node_ptr = btr_page_get_father_node_ptr(index, page, mtr); |
|
1476 |
||
1477 |
/* Replace the address of the old child node (= page) with the
|
|
1478 |
address of the new lower half */
|
|
1479 |
||
1480 |
btr_node_ptr_set_child_page_no(node_ptr, |
|
1481 |
rec_get_offsets( |
|
1482 |
node_ptr, index, |
|
1483 |
NULL, ULINT_UNDEFINED, |
|
1484 |
&heap), |
|
1485 |
lower_page_no, mtr); |
|
1486 |
mem_heap_empty(heap); |
|
1487 |
} else { |
|
1488 |
lower_page_no = buf_frame_get_page_no(page); |
|
1489 |
upper_page_no = buf_frame_get_page_no(new_page); |
|
1490 |
lower_page = page; |
|
1491 |
upper_page = new_page; |
|
1492 |
}
|
|
1493 |
||
1494 |
/* Get the level of the split pages */
|
|
1495 |
level = btr_page_get_level(page, mtr); |
|
1496 |
||
1497 |
/* Build the node pointer (= node key and page address) for the upper
|
|
1498 |
half */
|
|
1499 |
||
1500 |
node_ptr_upper = dict_index_build_node_ptr(index, split_rec, |
|
1501 |
upper_page_no, heap, level); |
|
1502 |
||
1503 |
/* Insert it next to the pointer to the lower half. Note that this
|
|
1504 |
may generate recursion leading to a split on the higher level. */
|
|
1505 |
||
1506 |
btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr); |
|
1507 |
||
1508 |
/* Free the memory heap */
|
|
1509 |
mem_heap_free(heap); |
|
1510 |
||
1511 |
/* Get the previous and next pages of page */
|
|
1512 |
||
1513 |
prev_page_no = btr_page_get_prev(page, mtr); |
|
1514 |
next_page_no = btr_page_get_next(page, mtr); |
|
1515 |
space = buf_frame_get_space_id(page); |
|
1516 |
||
1517 |
/* Update page links of the level */
|
|
1518 |
||
1519 |
if (prev_page_no != FIL_NULL) { |
|
1520 |
||
1521 |
prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr); |
|
1522 |
ut_a(page_is_comp(prev_page) == page_is_comp(page)); |
|
1523 |
#ifdef UNIV_BTR_DEBUG
|
|
1524 |
ut_a(btr_page_get_next(prev_page, mtr) |
|
1525 |
== buf_frame_get_page_no(page)); |
|
1526 |
#endif /* UNIV_BTR_DEBUG */ |
|
1527 |
||
1528 |
btr_page_set_next(prev_page, lower_page_no, mtr); |
|
1529 |
}
|
|
1530 |
||
1531 |
if (next_page_no != FIL_NULL) { |
|
1532 |
||
1533 |
next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr); |
|
1534 |
ut_a(page_is_comp(next_page) == page_is_comp(page)); |
|
1535 |
||
1536 |
btr_page_set_prev(next_page, upper_page_no, mtr); |
|
1537 |
}
|
|
1538 |
||
1539 |
btr_page_set_prev(lower_page, prev_page_no, mtr); |
|
1540 |
btr_page_set_next(lower_page, upper_page_no, mtr); |
|
1541 |
btr_page_set_level(lower_page, level, mtr); |
|
1542 |
||
1543 |
btr_page_set_prev(upper_page, lower_page_no, mtr); |
|
1544 |
btr_page_set_next(upper_page, next_page_no, mtr); |
|
1545 |
btr_page_set_level(upper_page, level, mtr); |
|
1546 |
}
|
|
1547 |
||
1548 |
/*****************************************************************
|
|
1549 |
Splits an index page to halves and inserts the tuple. It is assumed
|
|
1550 |
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
|
|
1551 |
is released within this function! NOTE that the operation of this
|
|
1552 |
function must always succeed, we cannot reverse it: therefore
|
|
1553 |
enough free disk space must be guaranteed to be available before
|
|
1554 |
this function is called. */
|
|
1555 |
||
1556 |
rec_t* |
|
1557 |
btr_page_split_and_insert( |
|
1558 |
/*======================*/
|
|
1559 |
/* out: inserted record; NOTE: the tree
|
|
1560 |
x-latch is released! NOTE: 2 free disk
|
|
1561 |
pages must be available! */
|
|
1562 |
btr_cur_t* cursor, /* in: cursor at which to insert; when the |
|
1563 |
function returns, the cursor is positioned
|
|
1564 |
on the predecessor of the inserted record */
|
|
1565 |
dtuple_t* tuple, /* in: tuple to insert */ |
|
1566 |
mtr_t* mtr) /* in: mtr */ |
|
1567 |
{
|
|
1568 |
page_t* page; |
|
1569 |
ulint page_no; |
|
1570 |
byte direction; |
|
1571 |
ulint hint_page_no; |
|
1572 |
page_t* new_page; |
|
1573 |
rec_t* split_rec; |
|
1574 |
page_t* left_page; |
|
1575 |
page_t* right_page; |
|
1576 |
page_t* insert_page; |
|
1577 |
page_cur_t* page_cursor; |
|
1578 |
rec_t* first_rec; |
|
1579 |
byte* buf = 0; /* remove warning */ |
|
1580 |
rec_t* move_limit; |
|
1581 |
ibool insert_will_fit; |
|
1582 |
ulint n_iterations = 0; |
|
1583 |
rec_t* rec; |
|
1584 |
mem_heap_t* heap; |
|
1585 |
ulint n_uniq; |
|
1586 |
ulint* offsets; |
|
1587 |
||
1588 |
heap = mem_heap_create(1024); |
|
1589 |
n_uniq = dict_index_get_n_unique_in_tree(cursor->index); |
|
1590 |
func_start: |
|
1591 |
mem_heap_empty(heap); |
|
1592 |
offsets = NULL; |
|
1593 |
||
1594 |
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index), |
|
1595 |
MTR_MEMO_X_LOCK)); |
|
1596 |
#ifdef UNIV_SYNC_DEBUG
|
|
1597 |
ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX)); |
|
1598 |
#endif /* UNIV_SYNC_DEBUG */ |
|
1599 |
||
1600 |
page = btr_cur_get_page(cursor); |
|
1601 |
||
1602 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
1603 |
MTR_MEMO_PAGE_X_FIX)); |
|
1604 |
ut_ad(page_get_n_recs(page) >= 2); |
|
1605 |
||
1606 |
page_no = buf_frame_get_page_no(page); |
|
1607 |
||
1608 |
/* 1. Decide the split record; split_rec == NULL means that the
|
|
1609 |
tuple to be inserted should be the first record on the upper
|
|
1610 |
half-page */
|
|
1611 |
||
1612 |
if (n_iterations > 0) { |
|
1613 |
direction = FSP_UP; |
|
1614 |
hint_page_no = page_no + 1; |
|
1615 |
split_rec = btr_page_get_sure_split_rec(cursor, tuple); |
|
1616 |
||
1617 |
} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) { |
|
1618 |
direction = FSP_UP; |
|
1619 |
hint_page_no = page_no + 1; |
|
1620 |
||
1621 |
} else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) { |
|
1622 |
direction = FSP_DOWN; |
|
1623 |
hint_page_no = page_no - 1; |
|
1624 |
} else { |
|
1625 |
direction = FSP_UP; |
|
1626 |
hint_page_no = page_no + 1; |
|
1627 |
split_rec = page_get_middle_rec(page); |
|
1628 |
}
|
|
1629 |
||
1630 |
/* 2. Allocate a new page to the index */
|
|
1631 |
new_page = btr_page_alloc(cursor->index, hint_page_no, direction, |
|
1632 |
btr_page_get_level(page, mtr), mtr); |
|
1633 |
btr_page_create(new_page, cursor->index, mtr); |
|
1634 |
||
1635 |
/* 3. Calculate the first record on the upper half-page, and the
|
|
1636 |
first record (move_limit) on original page which ends up on the
|
|
1637 |
upper half */
|
|
1638 |
||
1639 |
if (split_rec != NULL) { |
|
1640 |
first_rec = split_rec; |
|
1641 |
move_limit = split_rec; |
|
1642 |
} else { |
|
1643 |
buf = mem_alloc(rec_get_converted_size(cursor->index, tuple)); |
|
1644 |
||
1645 |
first_rec = rec_convert_dtuple_to_rec(buf, |
|
1646 |
cursor->index, tuple); |
|
1647 |
move_limit = page_rec_get_next(btr_cur_get_rec(cursor)); |
|
1648 |
}
|
|
1649 |
||
1650 |
/* 4. Do first the modifications in the tree structure */
|
|
1651 |
||
1652 |
btr_attach_half_pages(cursor->index, page, first_rec, |
|
1653 |
new_page, direction, mtr); |
|
1654 |
||
1655 |
if (split_rec == NULL) { |
|
1656 |
mem_free(buf); |
|
1657 |
}
|
|
1658 |
||
1659 |
/* If the split is made on the leaf level and the insert will fit
|
|
1660 |
on the appropriate half-page, we may release the tree x-latch.
|
|
1661 |
We can then move the records after releasing the tree latch,
|
|
1662 |
thus reducing the tree latch contention. */
|
|
1663 |
||
1664 |
if (split_rec) { |
|
1665 |
offsets = rec_get_offsets(split_rec, cursor->index, offsets, |
|
1666 |
n_uniq, &heap); |
|
1667 |
||
1668 |
insert_will_fit = btr_page_insert_fits(cursor, |
|
1669 |
split_rec, offsets, |
|
1670 |
tuple, heap); |
|
1671 |
} else { |
|
1672 |
insert_will_fit = btr_page_insert_fits(cursor, |
|
1673 |
NULL, NULL, |
|
1674 |
tuple, heap); |
|
1675 |
}
|
|
1676 |
||
1677 |
if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) { |
|
1678 |
||
1679 |
mtr_memo_release(mtr, dict_index_get_lock(cursor->index), |
|
1680 |
MTR_MEMO_X_LOCK); |
|
1681 |
}
|
|
1682 |
||
1683 |
/* 5. Move then the records to the new page */
|
|
1684 |
if (direction == FSP_DOWN) { |
|
1685 |
/* fputs("Split left\n", stderr); */
|
|
1686 |
||
1687 |
page_move_rec_list_start(new_page, page, move_limit, |
|
1688 |
cursor->index, mtr); |
|
1689 |
left_page = new_page; |
|
1690 |
right_page = page; |
|
1691 |
||
1692 |
lock_update_split_left(right_page, left_page); |
|
1693 |
} else { |
|
1694 |
/* fputs("Split right\n", stderr); */
|
|
1695 |
||
1696 |
page_move_rec_list_end(new_page, page, move_limit, |
|
1697 |
cursor->index, mtr); |
|
1698 |
left_page = page; |
|
1699 |
right_page = new_page; |
|
1700 |
||
1701 |
lock_update_split_right(right_page, left_page); |
|
1702 |
}
|
|
1703 |
||
1704 |
/* 6. The split and the tree modification is now completed. Decide the
|
|
1705 |
page where the tuple should be inserted */
|
|
1706 |
||
1707 |
if (split_rec == NULL) { |
|
1708 |
insert_page = right_page; |
|
1709 |
||
1710 |
} else { |
|
1711 |
offsets = rec_get_offsets(first_rec, cursor->index, |
|
1712 |
offsets, n_uniq, &heap); |
|
1713 |
||
1714 |
if (cmp_dtuple_rec(tuple, first_rec, offsets) >= 0) { |
|
1715 |
||
1716 |
insert_page = right_page; |
|
1717 |
} else { |
|
1718 |
insert_page = left_page; |
|
1719 |
}
|
|
1720 |
}
|
|
1721 |
||
1722 |
/* 7. Reposition the cursor for insert and try insertion */
|
|
1723 |
page_cursor = btr_cur_get_page_cur(cursor); |
|
1724 |
||
1725 |
page_cur_search(insert_page, cursor->index, tuple, |
|
1726 |
PAGE_CUR_LE, page_cursor); |
|
1727 |
||
1728 |
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); |
|
1729 |
||
1730 |
if (rec != NULL) { |
|
1731 |
/* Insert fit on the page: update the free bits for the
|
|
1732 |
left and right pages in the same mtr */
|
|
1733 |
||
1734 |
ibuf_update_free_bits_for_two_pages_low(cursor->index, |
|
1735 |
left_page, |
|
1736 |
right_page, mtr); |
|
1737 |
/* fprintf(stderr, "Split and insert done %lu %lu\n",
|
|
1738 |
buf_frame_get_page_no(left_page),
|
|
1739 |
buf_frame_get_page_no(right_page)); */
|
|
1740 |
mem_heap_free(heap); |
|
1741 |
return(rec); |
|
1742 |
}
|
|
1743 |
||
1744 |
/* 8. If insert did not fit, try page reorganization */
|
|
1745 |
||
1746 |
btr_page_reorganize(insert_page, cursor->index, mtr); |
|
1747 |
||
1748 |
page_cur_search(insert_page, cursor->index, tuple, |
|
1749 |
PAGE_CUR_LE, page_cursor); |
|
1750 |
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); |
|
1751 |
||
1752 |
if (rec == NULL) { |
|
1753 |
/* The insert did not fit on the page: loop back to the
|
|
1754 |
start of the function for a new split */
|
|
1755 |
||
1756 |
/* We play safe and reset the free bits for new_page */
|
|
1757 |
ibuf_reset_free_bits(cursor->index, new_page); |
|
1758 |
||
1759 |
/* fprintf(stderr, "Split second round %lu\n",
|
|
1760 |
buf_frame_get_page_no(page)); */
|
|
1761 |
n_iterations++; |
|
1762 |
ut_ad(n_iterations < 2); |
|
1763 |
ut_ad(!insert_will_fit); |
|
1764 |
||
1765 |
goto func_start; |
|
1766 |
}
|
|
1767 |
||
1768 |
/* Insert fit on the page: update the free bits for the
|
|
1769 |
left and right pages in the same mtr */
|
|
1770 |
||
1771 |
ibuf_update_free_bits_for_two_pages_low(cursor->index, left_page, |
|
1772 |
right_page, mtr); |
|
1773 |
#if 0
|
|
1774 |
fprintf(stderr, "Split and insert done %lu %lu\n",
|
|
1775 |
buf_frame_get_page_no(left_page),
|
|
1776 |
buf_frame_get_page_no(right_page));
|
|
1777 |
#endif
|
|
1778 |
||
1779 |
ut_ad(page_validate(left_page, cursor->index)); |
|
1780 |
ut_ad(page_validate(right_page, cursor->index)); |
|
1781 |
||
1782 |
mem_heap_free(heap); |
|
1783 |
return(rec); |
|
1784 |
}
|
|
1785 |
||
1786 |
/*****************************************************************
|
|
1787 |
Removes a page from the level list of pages. */
|
|
1788 |
static
|
|
1789 |
void
|
|
1790 |
btr_level_list_remove( |
|
1791 |
/*==================*/
|
|
1792 |
page_t* page, /* in: page to remove */ |
|
1793 |
mtr_t* mtr) /* in: mtr */ |
|
1794 |
{
|
|
1795 |
ulint space; |
|
1796 |
ulint prev_page_no; |
|
1797 |
page_t* prev_page; |
|
1798 |
ulint next_page_no; |
|
1799 |
page_t* next_page; |
|
1800 |
||
1801 |
ut_ad(page && mtr); |
|
1802 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
1803 |
MTR_MEMO_PAGE_X_FIX)); |
|
1804 |
/* Get the previous and next page numbers of page */
|
|
1805 |
||
1806 |
prev_page_no = btr_page_get_prev(page, mtr); |
|
1807 |
next_page_no = btr_page_get_next(page, mtr); |
|
1808 |
space = buf_frame_get_space_id(page); |
|
1809 |
||
1810 |
/* Update page links of the level */
|
|
1811 |
||
1812 |
if (prev_page_no != FIL_NULL) { |
|
1813 |
||
1814 |
prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr); |
|
1815 |
ut_a(page_is_comp(prev_page) == page_is_comp(page)); |
|
1816 |
#ifdef UNIV_BTR_DEBUG
|
|
1817 |
ut_a(btr_page_get_next(prev_page, mtr) |
|
1818 |
== buf_frame_get_page_no(page)); |
|
1819 |
#endif /* UNIV_BTR_DEBUG */ |
|
1820 |
||
1821 |
btr_page_set_next(prev_page, next_page_no, mtr); |
|
1822 |
}
|
|
1823 |
||
1824 |
if (next_page_no != FIL_NULL) { |
|
1825 |
||
1826 |
next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr); |
|
1827 |
ut_a(page_is_comp(next_page) == page_is_comp(page)); |
|
1828 |
#ifdef UNIV_BTR_DEBUG
|
|
1829 |
ut_a(btr_page_get_prev(next_page, mtr) |
|
1830 |
== buf_frame_get_page_no(page)); |
|
1831 |
#endif /* UNIV_BTR_DEBUG */ |
|
1832 |
||
1833 |
btr_page_set_prev(next_page, prev_page_no, mtr); |
|
1834 |
}
|
|
1835 |
}
|
|
1836 |
||
1837 |
/********************************************************************
|
|
1838 |
Writes the redo log record for setting an index record as the predefined
|
|
1839 |
minimum record. */
|
|
1840 |
UNIV_INLINE
|
|
1841 |
void
|
|
1842 |
btr_set_min_rec_mark_log( |
|
1843 |
/*=====================*/
|
|
1844 |
rec_t* rec, /* in: record */ |
|
1845 |
ulint comp, /* nonzero=compact record format */ |
|
1846 |
mtr_t* mtr) /* in: mtr */ |
|
1847 |
{
|
|
1848 |
mlog_write_initial_log_record( |
|
1849 |
rec, comp ? MLOG_COMP_REC_MIN_MARK : MLOG_REC_MIN_MARK, mtr); |
|
1850 |
||
1851 |
/* Write rec offset as a 2-byte ulint */
|
|
1852 |
mlog_catenate_ulint(mtr, page_offset(rec), MLOG_2BYTES); |
|
1853 |
}
|
|
1854 |
||
1855 |
/********************************************************************
|
|
1856 |
Parses the redo log record for setting an index record as the predefined
|
|
1857 |
minimum record. */
|
|
1858 |
||
1859 |
byte* |
|
1860 |
btr_parse_set_min_rec_mark( |
|
1861 |
/*=======================*/
|
|
1862 |
/* out: end of log record or NULL */
|
|
1863 |
byte* ptr, /* in: buffer */ |
|
1864 |
byte* end_ptr,/* in: buffer end */ |
|
1865 |
ulint comp, /* in: nonzero=compact page format */ |
|
1866 |
page_t* page, /* in: page or NULL */ |
|
1867 |
mtr_t* mtr) /* in: mtr or NULL */ |
|
1868 |
{
|
|
1869 |
rec_t* rec; |
|
1870 |
||
1871 |
if (end_ptr < ptr + 2) { |
|
1872 |
||
1873 |
return(NULL); |
|
1874 |
}
|
|
1875 |
||
1876 |
if (page) { |
|
1877 |
ut_a(!page_is_comp(page) == !comp); |
|
1878 |
||
1879 |
rec = page + mach_read_from_2(ptr); |
|
1880 |
||
1881 |
btr_set_min_rec_mark(rec, comp, mtr); |
|
1882 |
}
|
|
1883 |
||
1884 |
return(ptr + 2); |
|
1885 |
}
|
|
1886 |
||
1887 |
/********************************************************************
|
|
1888 |
Sets a record as the predefined minimum record. */
|
|
1889 |
||
1890 |
void
|
|
1891 |
btr_set_min_rec_mark( |
|
1892 |
/*=================*/
|
|
1893 |
rec_t* rec, /* in: record */ |
|
1894 |
ulint comp, /* in: nonzero=compact page format */ |
|
1895 |
mtr_t* mtr) /* in: mtr */ |
|
1896 |
{
|
|
1897 |
ulint info_bits; |
|
1898 |
||
1899 |
info_bits = rec_get_info_bits(rec, comp); |
|
1900 |
||
1901 |
rec_set_info_bits(rec, comp, info_bits | REC_INFO_MIN_REC_FLAG); |
|
1902 |
||
1903 |
btr_set_min_rec_mark_log(rec, comp, mtr); |
|
1904 |
}
|
|
1905 |
||
1906 |
/*****************************************************************
|
|
1907 |
Deletes on the upper level the node pointer to a page. */
|
|
1908 |
||
1909 |
void
|
|
1910 |
btr_node_ptr_delete( |
|
1911 |
/*================*/
|
|
1912 |
dict_index_t* index, /* in: index tree */ |
|
1913 |
page_t* page, /* in: page whose node pointer is deleted */ |
|
1914 |
mtr_t* mtr) /* in: mtr */ |
|
1915 |
{
|
|
1916 |
rec_t* node_ptr; |
|
1917 |
btr_cur_t cursor; |
|
1918 |
ibool compressed; |
|
1919 |
ulint err; |
|
1920 |
||
1921 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
1922 |
MTR_MEMO_PAGE_X_FIX)); |
|
1923 |
/* Delete node pointer on father page */
|
|
1924 |
||
1925 |
node_ptr = btr_page_get_father_node_ptr(index, page, mtr); |
|
1926 |
||
1927 |
btr_cur_position(index, node_ptr, &cursor); |
|
1928 |
compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, FALSE, |
|
1929 |
mtr); |
|
1930 |
ut_a(err == DB_SUCCESS); |
|
1931 |
||
1932 |
if (!compressed) { |
|
1933 |
btr_cur_compress_if_useful(&cursor, mtr); |
|
1934 |
}
|
|
1935 |
}
|
|
1936 |
||
1937 |
/*****************************************************************
|
|
1938 |
If page is the only on its level, this function moves its records to the
|
|
1939 |
father page, thus reducing the tree height. */
|
|
1940 |
static
|
|
1941 |
void
|
|
1942 |
btr_lift_page_up( |
|
1943 |
/*=============*/
|
|
1944 |
dict_index_t* index, /* in: index tree */ |
|
1945 |
page_t* page, /* in: page which is the only on its level; |
|
1946 |
must not be empty: use
|
|
1947 |
btr_discard_only_page_on_level if the last
|
|
1948 |
record from the page should be removed */
|
|
1949 |
mtr_t* mtr) /* in: mtr */ |
|
1950 |
{
|
|
1951 |
page_t* father_page; |
|
1952 |
page_t* iter_page; |
|
1953 |
page_t* pages[BTR_MAX_LEVELS]; |
|
1954 |
ulint page_level; |
|
1955 |
ulint root_page_no; |
|
1956 |
ulint ancestors; |
|
1957 |
ulint i; |
|
1958 |
||
1959 |
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); |
|
1960 |
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); |
|
1961 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
1962 |
MTR_MEMO_PAGE_X_FIX)); |
|
1963 |
father_page = buf_frame_align( |
|
1964 |
btr_page_get_father_node_ptr(index, page, mtr)); |
|
1965 |
||
1966 |
page_level = btr_page_get_level(page, mtr); |
|
1967 |
root_page_no = dict_index_get_page(index); |
|
1968 |
||
1969 |
ancestors = 1; |
|
1970 |
pages[0] = father_page; |
|
1971 |
||
1972 |
/* Store all ancestor pages so we can reset their levels later on.
|
|
1973 |
We have to do all the searches on the tree now because later on,
|
|
1974 |
after we've replaced the first level, the tree is in an inconsistent
|
|
1975 |
state and can not be searched. */
|
|
1976 |
iter_page = father_page; |
|
1977 |
for (;;) { |
|
1978 |
if (buf_block_get_page_no(buf_block_align(iter_page)) |
|
1979 |
== root_page_no) { |
|
1980 |
||
1981 |
break; |
|
1982 |
}
|
|
1983 |
||
1984 |
ut_a(ancestors < BTR_MAX_LEVELS); |
|
1985 |
||
1986 |
iter_page = buf_frame_align( |
|
1987 |
btr_page_get_father_node_ptr(index, iter_page, mtr)); |
|
1988 |
||
1989 |
pages[ancestors++] = iter_page; |
|
1990 |
}
|
|
1991 |
||
1992 |
btr_search_drop_page_hash_index(page); |
|
1993 |
||
1994 |
/* Make the father empty */
|
|
1995 |
btr_page_empty(father_page, mtr); |
|
1996 |
||
1997 |
/* Move records to the father */
|
|
1998 |
page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page), |
|
1999 |
index, mtr); |
|
2000 |
lock_update_copy_and_discard(father_page, page); |
|
2001 |
||
2002 |
/* Go upward to root page, decreasing levels by one. */
|
|
2003 |
for (i = 0; i < ancestors; i++) { |
|
2004 |
iter_page = pages[i]; |
|
2005 |
||
2006 |
ut_ad(btr_page_get_level(iter_page, mtr) == (page_level + 1)); |
|
2007 |
||
2008 |
btr_page_set_level(iter_page, page_level, mtr); |
|
2009 |
page_level++; |
|
2010 |
}
|
|
2011 |
||
2012 |
/* Free the file page */
|
|
2013 |
btr_page_free(index, page, mtr); |
|
2014 |
||
2015 |
/* We play safe and reset the free bits for the father */
|
|
2016 |
ibuf_reset_free_bits(index, father_page); |
|
2017 |
ut_ad(page_validate(father_page, index)); |
|
2018 |
ut_ad(btr_check_node_ptr(index, father_page, mtr)); |
|
2019 |
}
|
|
2020 |
||
2021 |
/*****************************************************************
|
|
2022 |
Tries to merge the page first to the left immediate brother if such a
|
|
2023 |
brother exists, and the node pointers to the current page and to the brother
|
|
2024 |
reside on the same page. If the left brother does not satisfy these
|
|
2025 |
conditions, looks at the right brother. If the page is the only one on that
|
|
2026 |
level lifts the records of the page to the father page, thus reducing the
|
|
2027 |
tree height. It is assumed that mtr holds an x-latch on the tree and on the
|
|
2028 |
page. If cursor is on the leaf level, mtr must also hold x-latches to the
|
|
2029 |
brothers, if they exist. NOTE: it is assumed that the caller has reserved
|
|
2030 |
enough free extents so that the compression will always succeed if done! */
|
|
2031 |
||
2032 |
void
|
|
2033 |
btr_compress( |
|
2034 |
/*=========*/
|
|
2035 |
btr_cur_t* cursor, /* in: cursor on the page to merge or lift; |
|
2036 |
the page must not be empty: in record delete
|
|
2037 |
use btr_discard_page if the page would become
|
|
2038 |
empty */
|
|
2039 |
mtr_t* mtr) /* in: mtr */ |
|
2040 |
{
|
|
2041 |
dict_index_t* index; |
|
2042 |
ulint space; |
|
2043 |
ulint left_page_no; |
|
2044 |
ulint right_page_no; |
|
2045 |
page_t* merge_page; |
|
2046 |
page_t* father_page; |
|
2047 |
ibool is_left; |
|
2048 |
page_t* page; |
|
2049 |
rec_t* orig_pred; |
|
2050 |
rec_t* orig_succ; |
|
2051 |
rec_t* node_ptr; |
|
2052 |
ulint data_size; |
|
2053 |
ulint n_recs; |
|
2054 |
ulint max_ins_size; |
|
2055 |
ulint max_ins_size_reorg; |
|
2056 |
ulint level; |
|
2057 |
ulint comp; |
|
2058 |
||
2059 |
page = btr_cur_get_page(cursor); |
|
2060 |
index = btr_cur_get_index(cursor); |
|
2061 |
comp = page_is_comp(page); |
|
2062 |
ut_a((ibool)!!comp == dict_table_is_comp(index->table)); |
|
2063 |
||
2064 |
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), |
|
2065 |
MTR_MEMO_X_LOCK)); |
|
2066 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
2067 |
MTR_MEMO_PAGE_X_FIX)); |
|
2068 |
level = btr_page_get_level(page, mtr); |
|
2069 |
space = dict_index_get_space(index); |
|
2070 |
||
2071 |
left_page_no = btr_page_get_prev(page, mtr); |
|
2072 |
right_page_no = btr_page_get_next(page, mtr); |
|
2073 |
||
2074 |
#if 0
|
|
2075 |
fprintf(stderr, "Merge left page %lu right %lu \n",
|
|
2076 |
left_page_no, right_page_no);
|
|
2077 |
#endif
|
|
2078 |
||
2079 |
node_ptr = btr_page_get_father_node_ptr(index, page, mtr); |
|
2080 |
ut_ad(!comp || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR); |
|
2081 |
father_page = buf_frame_align(node_ptr); |
|
2082 |
ut_a(comp == page_is_comp(father_page)); |
|
2083 |
||
2084 |
/* Decide the page to which we try to merge and which will inherit
|
|
2085 |
the locks */
|
|
2086 |
||
2087 |
is_left = left_page_no != FIL_NULL; |
|
2088 |
||
2089 |
if (is_left) { |
|
2090 |
||
2091 |
merge_page = btr_page_get(space, left_page_no, RW_X_LATCH, |
|
2092 |
mtr); |
|
2093 |
#ifdef UNIV_BTR_DEBUG
|
|
2094 |
ut_a(btr_page_get_next(merge_page, mtr) |
|
2095 |
== buf_frame_get_page_no(page)); |
|
2096 |
#endif /* UNIV_BTR_DEBUG */ |
|
2097 |
} else if (right_page_no != FIL_NULL) { |
|
2098 |
||
2099 |
merge_page = btr_page_get(space, right_page_no, RW_X_LATCH, |
|
2100 |
mtr); |
|
2101 |
#ifdef UNIV_BTR_DEBUG
|
|
2102 |
ut_a(btr_page_get_prev(merge_page, mtr) |
|
2103 |
== buf_frame_get_page_no(page)); |
|
2104 |
#endif /* UNIV_BTR_DEBUG */ |
|
2105 |
} else { |
|
2106 |
/* The page is the only one on the level, lift the records
|
|
2107 |
to the father */
|
|
2108 |
btr_lift_page_up(index, page, mtr); |
|
2109 |
||
2110 |
return; |
|
2111 |
}
|
|
2112 |
||
2113 |
n_recs = page_get_n_recs(page); |
|
2114 |
data_size = page_get_data_size(page); |
|
2115 |
ut_a(page_is_comp(merge_page) == comp); |
|
2116 |
||
2117 |
max_ins_size_reorg = page_get_max_insert_size_after_reorganize( |
|
2118 |
merge_page, n_recs); |
|
2119 |
if (data_size > max_ins_size_reorg) { |
|
2120 |
||
2121 |
/* No space for merge */
|
|
2122 |
||
2123 |
return; |
|
2124 |
}
|
|
2125 |
||
2126 |
ut_ad(page_validate(merge_page, index)); |
|
2127 |
||
2128 |
max_ins_size = page_get_max_insert_size(merge_page, n_recs); |
|
2129 |
||
2130 |
if (data_size > max_ins_size) { |
|
2131 |
||
2132 |
/* We have to reorganize merge_page */
|
|
2133 |
||
2134 |
btr_page_reorganize(merge_page, index, mtr); |
|
2135 |
||
2136 |
max_ins_size = page_get_max_insert_size(merge_page, n_recs); |
|
2137 |
||
2138 |
ut_ad(page_validate(merge_page, index)); |
|
2139 |
ut_ad(page_get_max_insert_size(merge_page, n_recs) |
|
2140 |
== max_ins_size_reorg); |
|
2141 |
}
|
|
2142 |
||
2143 |
if (data_size > max_ins_size) { |
|
2144 |
||
2145 |
/* Add fault tolerance, though this should never happen */
|
|
2146 |
||
2147 |
return; |
|
2148 |
}
|
|
2149 |
||
2150 |
btr_search_drop_page_hash_index(page); |
|
2151 |
||
2152 |
/* Remove the page from the level list */
|
|
2153 |
btr_level_list_remove(page, mtr); |
|
2154 |
||
2155 |
if (is_left) { |
|
2156 |
btr_node_ptr_delete(index, page, mtr); |
|
2157 |
} else { |
|
2158 |
mem_heap_t* heap = NULL; |
|
2159 |
ulint offsets_[REC_OFFS_NORMAL_SIZE]; |
|
2160 |
*offsets_ = (sizeof offsets_) / sizeof *offsets_; |
|
2161 |
/* Replace the address of the old child node (= page) with the
|
|
2162 |
address of the merge page to the right */
|
|
2163 |
||
2164 |
btr_node_ptr_set_child_page_no(node_ptr, |
|
2165 |
rec_get_offsets( |
|
2166 |
node_ptr, index, |
|
2167 |
offsets_, |
|
2168 |
ULINT_UNDEFINED, |
|
2169 |
&heap), |
|
2170 |
right_page_no, mtr); |
|
2171 |
if (UNIV_LIKELY_NULL(heap)) { |
|
2172 |
mem_heap_free(heap); |
|
2173 |
}
|
|
2174 |
btr_node_ptr_delete(index, merge_page, mtr); |
|
2175 |
}
|
|
2176 |
||
2177 |
/* Move records to the merge page */
|
|
2178 |
if (is_left) { |
|
2179 |
orig_pred = page_rec_get_prev( |
|
2180 |
page_get_supremum_rec(merge_page)); |
|
2181 |
page_copy_rec_list_start(merge_page, page, |
|
2182 |
page_get_supremum_rec(page), |
|
2183 |
index, mtr); |
|
2184 |
||
2185 |
lock_update_merge_left(merge_page, orig_pred, page); |
|
2186 |
} else { |
|
2187 |
orig_succ = page_rec_get_next( |
|
2188 |
page_get_infimum_rec(merge_page)); |
|
2189 |
page_copy_rec_list_end(merge_page, page, |
|
2190 |
page_get_infimum_rec(page), |
|
2191 |
index, mtr); |
|
2192 |
||
2193 |
lock_update_merge_right(orig_succ, page); |
|
2194 |
}
|
|
2195 |
||
2196 |
/* We have added new records to merge_page: update its free bits */
|
|
2197 |
ibuf_update_free_bits_if_full(index, merge_page, |
|
2198 |
UNIV_PAGE_SIZE, ULINT_UNDEFINED); |
|
2199 |
||
2200 |
ut_ad(page_validate(merge_page, index)); |
|
2201 |
||
2202 |
/* Free the file page */
|
|
2203 |
btr_page_free(index, page, mtr); |
|
2204 |
||
2205 |
ut_ad(btr_check_node_ptr(index, merge_page, mtr)); |
|
2206 |
}
|
|
2207 |
||
2208 |
/*****************************************************************
|
|
2209 |
Discards a page that is the only page on its level. */
|
|
2210 |
static
|
|
2211 |
void
|
|
2212 |
btr_discard_only_page_on_level( |
|
2213 |
/*===========================*/
|
|
2214 |
dict_index_t* index, /* in: index tree */ |
|
2215 |
page_t* page, /* in: page which is the only on its level */ |
|
2216 |
mtr_t* mtr) /* in: mtr */ |
|
2217 |
{
|
|
2218 |
rec_t* node_ptr; |
|
2219 |
page_t* father_page; |
|
2220 |
ulint page_level; |
|
2221 |
||
2222 |
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); |
|
2223 |
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); |
|
2224 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
2225 |
MTR_MEMO_PAGE_X_FIX)); |
|
2226 |
btr_search_drop_page_hash_index(page); |
|
2227 |
||
2228 |
node_ptr = btr_page_get_father_node_ptr(index, page, mtr); |
|
2229 |
father_page = buf_frame_align(node_ptr); |
|
2230 |
||
2231 |
page_level = btr_page_get_level(page, mtr); |
|
2232 |
||
2233 |
lock_update_discard(page_get_supremum_rec(father_page), page); |
|
2234 |
||
2235 |
btr_page_set_level(father_page, page_level, mtr); |
|
2236 |
||
2237 |
/* Free the file page */
|
|
2238 |
btr_page_free(index, page, mtr); |
|
2239 |
||
2240 |
if (buf_frame_get_page_no(father_page) == dict_index_get_page(index)) { |
|
2241 |
/* The father is the root page */
|
|
2242 |
||
2243 |
btr_page_empty(father_page, mtr); |
|
2244 |
||
2245 |
/* We play safe and reset the free bits for the father */
|
|
2246 |
ibuf_reset_free_bits(index, father_page); |
|
2247 |
} else { |
|
2248 |
ut_ad(page_get_n_recs(father_page) == 1); |
|
2249 |
||
2250 |
btr_discard_only_page_on_level(index, father_page, mtr); |
|
2251 |
}
|
|
2252 |
}
|
|
2253 |
||
2254 |
/*****************************************************************
|
|
2255 |
Discards a page from a B-tree. This is used to remove the last record from
|
|
2256 |
a B-tree page: the whole page must be removed at the same time. This cannot
|
|
2257 |
be used for the root page, which is allowed to be empty. */
|
|
2258 |
||
2259 |
void
|
|
2260 |
btr_discard_page( |
|
2261 |
/*=============*/
|
|
2262 |
btr_cur_t* cursor, /* in: cursor on the page to discard: not on |
|
2263 |
the root page */
|
|
2264 |
mtr_t* mtr) /* in: mtr */ |
|
2265 |
{
|
|
2266 |
dict_index_t* index; |
|
2267 |
ulint space; |
|
2268 |
ulint left_page_no; |
|
2269 |
ulint right_page_no; |
|
2270 |
page_t* merge_page; |
|
2271 |
page_t* page; |
|
2272 |
rec_t* node_ptr; |
|
2273 |
||
2274 |
page = btr_cur_get_page(cursor); |
|
2275 |
index = btr_cur_get_index(cursor); |
|
2276 |
||
2277 |
ut_ad(dict_index_get_page(index) != buf_frame_get_page_no(page)); |
|
2278 |
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), |
|
2279 |
MTR_MEMO_X_LOCK)); |
|
2280 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
2281 |
MTR_MEMO_PAGE_X_FIX)); |
|
2282 |
space = dict_index_get_space(index); |
|
2283 |
||
2284 |
/* Decide the page which will inherit the locks */
|
|
2285 |
||
2286 |
left_page_no = btr_page_get_prev(page, mtr); |
|
2287 |
right_page_no = btr_page_get_next(page, mtr); |
|
2288 |
||
2289 |
if (left_page_no != FIL_NULL) { |
|
2290 |
merge_page = btr_page_get(space, left_page_no, RW_X_LATCH, |
|
2291 |
mtr); |
|
2292 |
#ifdef UNIV_BTR_DEBUG
|
|
2293 |
ut_a(btr_page_get_next(merge_page, mtr) |
|
2294 |
== buf_frame_get_page_no(page)); |
|
2295 |
#endif /* UNIV_BTR_DEBUG */ |
|
2296 |
} else if (right_page_no != FIL_NULL) { |
|
2297 |
merge_page = btr_page_get(space, right_page_no, RW_X_LATCH, |
|
2298 |
mtr); |
|
2299 |
#ifdef UNIV_BTR_DEBUG
|
|
2300 |
ut_a(btr_page_get_prev(merge_page, mtr) |
|
2301 |
== buf_frame_get_page_no(page)); |
|
2302 |
#endif /* UNIV_BTR_DEBUG */ |
|
2303 |
} else { |
|
2304 |
btr_discard_only_page_on_level(index, page, mtr); |
|
2305 |
||
2306 |
return; |
|
2307 |
}
|
|
2308 |
||
2309 |
ut_a(page_is_comp(merge_page) == page_is_comp(page)); |
|
2310 |
btr_search_drop_page_hash_index(page); |
|
2311 |
||
2312 |
if (left_page_no == FIL_NULL && btr_page_get_level(page, mtr) > 0) { |
|
2313 |
||
2314 |
/* We have to mark the leftmost node pointer on the right
|
|
2315 |
side page as the predefined minimum record */
|
|
2316 |
||
2317 |
node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page)); |
|
2318 |
||
2319 |
ut_ad(page_rec_is_user_rec(node_ptr)); |
|
2320 |
||
2321 |
btr_set_min_rec_mark(node_ptr, page_is_comp(merge_page), mtr); |
|
2322 |
}
|
|
2323 |
||
2324 |
btr_node_ptr_delete(index, page, mtr); |
|
2325 |
||
2326 |
/* Remove the page from the level list */
|
|
2327 |
btr_level_list_remove(page, mtr); |
|
2328 |
||
2329 |
if (left_page_no != FIL_NULL) { |
|
2330 |
lock_update_discard(page_get_supremum_rec(merge_page), page); |
|
2331 |
} else { |
|
2332 |
lock_update_discard(page_rec_get_next( |
|
2333 |
page_get_infimum_rec(merge_page)), |
|
2334 |
page); |
|
2335 |
}
|
|
2336 |
||
2337 |
/* Free the file page */
|
|
2338 |
btr_page_free(index, page, mtr); |
|
2339 |
||
2340 |
ut_ad(btr_check_node_ptr(index, merge_page, mtr)); |
|
2341 |
}
|
|
2342 |
||
2343 |
#ifdef UNIV_BTR_PRINT
|
|
2344 |
/*****************************************************************
|
|
2345 |
Prints size info of a B-tree. */
|
|
2346 |
||
2347 |
void
|
|
2348 |
btr_print_size( |
|
2349 |
/*===========*/
|
|
2350 |
dict_index_t* index) /* in: index tree */ |
|
2351 |
{
|
|
2352 |
page_t* root; |
|
2353 |
fseg_header_t* seg; |
|
2354 |
mtr_t mtr; |
|
2355 |
||
2356 |
if (index->type & DICT_IBUF) { |
|
2357 |
fputs("Sorry, cannot print info of an ibuf tree:" |
|
2358 |
" use ibuf functions\n", stderr); |
|
2359 |
||
2360 |
return; |
|
2361 |
}
|
|
2362 |
||
2363 |
mtr_start(&mtr); |
|
2364 |
||
2365 |
root = btr_root_get(index, &mtr); |
|
2366 |
||
2367 |
seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; |
|
2368 |
||
2369 |
fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr); |
|
2370 |
fseg_print(seg, &mtr); |
|
2371 |
||
2372 |
if (!(index->type & DICT_UNIVERSAL)) { |
|
2373 |
||
2374 |
seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; |
|
2375 |
||
2376 |
fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr); |
|
2377 |
fseg_print(seg, &mtr); |
|
2378 |
}
|
|
2379 |
||
2380 |
mtr_commit(&mtr); |
|
2381 |
}
|
|
2382 |
||
2383 |
/****************************************************************
|
|
2384 |
Prints recursively index tree pages. */
|
|
2385 |
static
|
|
2386 |
void
|
|
2387 |
btr_print_recursive( |
|
2388 |
/*================*/
|
|
2389 |
dict_index_t* index, /* in: index tree */ |
|
2390 |
page_t* page, /* in: index page */ |
|
2391 |
ulint width, /* in: print this many entries from start |
|
2392 |
and end */
|
|
2393 |
mem_heap_t** heap, /* in/out: heap for rec_get_offsets() */ |
|
2394 |
ulint** offsets,/* in/out: buffer for rec_get_offsets() */ |
|
2395 |
mtr_t* mtr) /* in: mtr */ |
|
2396 |
{
|
|
2397 |
page_cur_t cursor; |
|
2398 |
ulint n_recs; |
|
2399 |
ulint i = 0; |
|
2400 |
mtr_t mtr2; |
|
2401 |
rec_t* node_ptr; |
|
2402 |
page_t* child; |
|
2403 |
||
2404 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
2405 |
MTR_MEMO_PAGE_X_FIX)); |
|
2406 |
fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n", |
|
2407 |
(ulong) btr_page_get_level(page, mtr), |
|
2408 |
(ulong) buf_frame_get_page_no(page)); |
|
2409 |
||
2410 |
page_print(page, index, width, width); |
|
2411 |
||
2412 |
n_recs = page_get_n_recs(page); |
|
2413 |
||
2414 |
page_cur_set_before_first(page, &cursor); |
|
2415 |
page_cur_move_to_next(&cursor); |
|
2416 |
||
2417 |
while (!page_cur_is_after_last(&cursor)) { |
|
2418 |
||
2419 |
if (0 == btr_page_get_level(page, mtr)) { |
|
2420 |
||
2421 |
/* If this is the leaf level, do nothing */
|
|
2422 |
||
2423 |
} else if ((i <= width) || (i >= n_recs - width)) { |
|
2424 |
||
2425 |
mtr_start(&mtr2); |
|
2426 |
||
2427 |
node_ptr = page_cur_get_rec(&cursor); |
|
2428 |
||
2429 |
*offsets = rec_get_offsets(node_ptr, index, *offsets, |
|
2430 |
ULINT_UNDEFINED, heap); |
|
2431 |
child = btr_node_ptr_get_child(node_ptr, |
|
2432 |
*offsets, &mtr2); |
|
2433 |
btr_print_recursive(index, child, width, |
|
2434 |
heap, offsets, &mtr2); |
|
2435 |
mtr_commit(&mtr2); |
|
2436 |
}
|
|
2437 |
||
2438 |
page_cur_move_to_next(&cursor); |
|
2439 |
i++; |
|
2440 |
}
|
|
2441 |
}
|
|
2442 |
||
2443 |
/******************************************************************
|
|
2444 |
Prints directories and other info of all nodes in the tree. */
|
|
2445 |
||
2446 |
void
|
|
2447 |
btr_print_index( |
|
2448 |
/*============*/
|
|
2449 |
dict_index_t* index, /* in: index */ |
|
2450 |
ulint width) /* in: print this many entries from start |
|
2451 |
and end */
|
|
2452 |
{
|
|
2453 |
mtr_t mtr; |
|
2454 |
page_t* root; |
|
2455 |
mem_heap_t* heap = NULL; |
|
2456 |
ulint offsets_[REC_OFFS_NORMAL_SIZE]; |
|
2457 |
ulint* offsets = offsets_; |
|
2458 |
*offsets_ = (sizeof offsets_) / sizeof *offsets_; |
|
2459 |
||
2460 |
fputs("--------------------------\n" |
|
2461 |
"INDEX TREE PRINT\n", stderr); |
|
2462 |
||
2463 |
mtr_start(&mtr); |
|
2464 |
||
2465 |
root = btr_root_get(index, &mtr); |
|
2466 |
||
2467 |
btr_print_recursive(index, root, width, &heap, &offsets, &mtr); |
|
2468 |
if (UNIV_LIKELY_NULL(heap)) { |
|
2469 |
mem_heap_free(heap); |
|
2470 |
}
|
|
2471 |
||
2472 |
mtr_commit(&mtr); |
|
2473 |
||
2474 |
btr_validate_index(index, NULL); |
|
2475 |
}
|
|
2476 |
#endif /* UNIV_BTR_PRINT */ |
|
2477 |
||
2478 |
#ifdef UNIV_DEBUG
|
|
2479 |
/****************************************************************
|
|
2480 |
Checks that the node pointer to a page is appropriate. */
|
|
2481 |
||
2482 |
ibool
|
|
2483 |
btr_check_node_ptr( |
|
2484 |
/*===============*/
|
|
2485 |
/* out: TRUE */
|
|
2486 |
dict_index_t* index, /* in: index tree */ |
|
2487 |
page_t* page, /* in: index page */ |
|
2488 |
mtr_t* mtr) /* in: mtr */ |
|
2489 |
{
|
|
2490 |
mem_heap_t* heap; |
|
2491 |
rec_t* node_ptr; |
|
2492 |
dtuple_t* node_ptr_tuple; |
|
2493 |
||
2494 |
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), |
|
2495 |
MTR_MEMO_PAGE_X_FIX)); |
|
2496 |
if (dict_index_get_page(index) == buf_frame_get_page_no(page)) { |
|
2497 |
||
2498 |
return(TRUE); |
|
2499 |
}
|
|
2500 |
||
2501 |
node_ptr = btr_page_get_father_node_ptr(index, page, mtr); |
|
2502 |
||
2503 |
if (btr_page_get_level(page, mtr) == 0) { |
|
2504 |
||
2505 |
return(TRUE); |
|
2506 |
}
|
|
2507 |
||
2508 |
heap = mem_heap_create(256); |
|
2509 |
||
2510 |
node_ptr_tuple = dict_index_build_node_ptr( |
|
2511 |
index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap, |
|
2512 |
btr_page_get_level(page, mtr)); |
|
2513 |
||
2514 |
ut_a(!cmp_dtuple_rec(node_ptr_tuple, node_ptr, |
|
2515 |
rec_get_offsets(node_ptr, index, |
|
2516 |
NULL, ULINT_UNDEFINED, &heap))); |
|
2517 |
||
2518 |
mem_heap_free(heap); |
|
2519 |
||
2520 |
return(TRUE); |
|
2521 |
}
|
|
2522 |
#endif /* UNIV_DEBUG */ |
|
2523 |
||
2524 |
/****************************************************************
|
|
2525 |
Display identification information for a record. */
|
|
2526 |
static
|
|
2527 |
void
|
|
2528 |
btr_index_rec_validate_report( |
|
2529 |
/*==========================*/
|
|
2530 |
page_t* page, /* in: index page */ |
|
2531 |
rec_t* rec, /* in: index record */ |
|
2532 |
dict_index_t* index) /* in: index */ |
|
2533 |
{
|
|
2534 |
fputs("InnoDB: Record in ", stderr); |
|
2535 |
dict_index_name_print(stderr, NULL, index); |
|
2536 |
fprintf(stderr, ", page %lu, at offset %lu\n", |
|
2537 |
buf_frame_get_page_no(page), (ulint)(rec - page)); |
|
2538 |
}
|
|
2539 |
||
2540 |
/****************************************************************
|
|
2541 |
Checks the size and number of fields in a record based on the definition of
|
|
2542 |
the index. */
|
|
2543 |
||
2544 |
ibool
|
|
2545 |
btr_index_rec_validate( |
|
2546 |
/*===================*/
|
|
2547 |
/* out: TRUE if ok */
|
|
2548 |
rec_t* rec, /* in: index record */ |
|
2549 |
dict_index_t* index, /* in: index */ |
|
2550 |
ibool dump_on_error) /* in: TRUE if the function |
|
2551 |
should print hex dump of record
|
|
2552 |
and page on error */
|
|
2553 |
{
|
|
2554 |
ulint len; |
|
2555 |
ulint n; |
|
2556 |
ulint i; |
|
2557 |
page_t* page; |
|
2558 |
mem_heap_t* heap = NULL; |
|
2559 |
ulint offsets_[REC_OFFS_NORMAL_SIZE]; |
|
2560 |
ulint* offsets = offsets_; |
|
2561 |
*offsets_ = (sizeof offsets_) / sizeof *offsets_; |
|
2562 |
||
2563 |
page = buf_frame_align(rec); |
|
2564 |
||
2565 |
if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) { |
|
2566 |
/* The insert buffer index tree can contain records from any
|
|
2567 |
other index: we cannot check the number of fields or
|
|
2568 |
their length */
|
|
2569 |
||
2570 |
return(TRUE); |
|
2571 |
}
|
|
2572 |
||
2573 |
if (UNIV_UNLIKELY((ibool)!!page_is_comp(page) |
|
2574 |
!= dict_table_is_comp(index->table))) { |
|
2575 |
btr_index_rec_validate_report(page, rec, index); |
|
2576 |
fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n", |
|
2577 |
(ulong) !!page_is_comp(page), |
|
2578 |
(ulong) dict_table_is_comp(index->table)); |
|
2579 |
||
2580 |
return(FALSE); |
|
2581 |
}
|
|
2582 |
||
2583 |
n = dict_index_get_n_fields(index); |
|
2584 |
||
2585 |
if (!page_is_comp(page) |
|
2586 |
&& UNIV_UNLIKELY(rec_get_n_fields_old(rec) != n)) { |
|
2587 |
btr_index_rec_validate_report(page, rec, index); |
|
2588 |
fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n", |
|
2589 |
(ulong) rec_get_n_fields_old(rec), (ulong) n); |
|
2590 |
||
2591 |
if (dump_on_error) { |
|
2592 |
buf_page_print(page); |
|
2593 |
||
2594 |
fputs("InnoDB: corrupt record ", stderr); |
|
2595 |
rec_print_old(stderr, rec); |
|
2596 |
putc('\n', stderr); |
|
2597 |
}
|
|
2598 |
return(FALSE); |
|
2599 |
}
|
|
2600 |
||
2601 |
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); |
|
2602 |
||
2603 |
for (i = 0; i < n; i++) { |
|
2604 |
ulint fixed_size = dict_col_get_fixed_size( |
|
2605 |
dict_index_get_nth_col(index, i)); |
|
2606 |
||
2607 |
rec_get_nth_field(rec, offsets, i, &len); |
|
2608 |
||
2609 |
/* Note that if fixed_size != 0, it equals the
|
|
2610 |
length of a fixed-size column in the clustered index.
|
|
2611 |
A prefix index of the column is of fixed, but different
|
|
2612 |
length. When fixed_size == 0, prefix_len is the maximum
|
|
2613 |
length of the prefix index column. */
|
|
2614 |
||
2615 |
if ((dict_index_get_nth_field(index, i)->prefix_len == 0 |
|
2616 |
&& len != UNIV_SQL_NULL && fixed_size |
|
2617 |
&& len != fixed_size) |
|
2618 |
|| (dict_index_get_nth_field(index, i)->prefix_len > 0 |
|
2619 |
&& len != UNIV_SQL_NULL |
|
2620 |
&& len |
|
2621 |
> dict_index_get_nth_field(index, i)->prefix_len)) { |
|
2622 |
||
2623 |
btr_index_rec_validate_report(page, rec, index); |
|
2624 |
fprintf(stderr, |
|
2625 |
"InnoDB: field %lu len is %lu,"
|
|
2626 |
" should be %lu\n", |
|
2627 |
(ulong) i, (ulong) len, (ulong) fixed_size); |
|
2628 |
||
2629 |
if (dump_on_error) { |
|
2630 |
buf_page_print(page); |
|
2631 |
||
2632 |
fputs("InnoDB: corrupt record ", stderr); |
|
2633 |
rec_print_new(stderr, rec, offsets); |
|
2634 |
putc('\n', stderr); |
|
2635 |
}
|
|
2636 |
if (UNIV_LIKELY_NULL(heap)) { |
|
2637 |
mem_heap_free(heap); |
|
2638 |
}
|
|
2639 |
return(FALSE); |
|
2640 |
}
|
|
2641 |
}
|
|
2642 |
||
2643 |
if (UNIV_LIKELY_NULL(heap)) { |
|
2644 |
mem_heap_free(heap); |
|
2645 |
}
|
|
2646 |
return(TRUE); |
|
2647 |
}
|
|
2648 |
||
2649 |
/****************************************************************
|
|
2650 |
Checks the size and number of fields in records based on the definition of
|
|
2651 |
the index. */
|
|
2652 |
static
|
|
2653 |
ibool
|
|
2654 |
btr_index_page_validate( |
|
2655 |
/*====================*/
|
|
2656 |
/* out: TRUE if ok */
|
|
2657 |
page_t* page, /* in: index page */ |
|
2658 |
dict_index_t* index) /* in: index */ |
|
2659 |
{
|
|
2660 |
page_cur_t cur; |
|
2661 |
ibool ret = TRUE; |
|
2662 |
||
2663 |
page_cur_set_before_first(page, &cur); |
|
2664 |
page_cur_move_to_next(&cur); |
|
2665 |
||
2666 |
for (;;) { |
|
2667 |
if (page_cur_is_after_last(&cur)) { |
|
2668 |
||
2669 |
break; |
|
2670 |
}
|
|
2671 |
||
2672 |
if (!btr_index_rec_validate(cur.rec, index, TRUE)) { |
|
2673 |
||
2674 |
return(FALSE); |
|
2675 |
}
|
|
2676 |
||
2677 |
page_cur_move_to_next(&cur); |
|
2678 |
}
|
|
2679 |
||
2680 |
return(ret); |
|
2681 |
}
|
|
2682 |
||
2683 |
/****************************************************************
|
|
2684 |
Report an error on one page of an index tree. */
|
|
2685 |
static
|
|
2686 |
void
|
|
2687 |
btr_validate_report1( |
|
2688 |
/*=================*/
|
|
2689 |
/* out: TRUE if ok */
|
|
2690 |
dict_index_t* index, /* in: index */ |
|
2691 |
ulint level, /* in: B-tree level */ |
|
2692 |
page_t* page) /* in: index page */ |
|
2693 |
{
|
|
2694 |
fprintf(stderr, "InnoDB: Error in page %lu of ", |
|
2695 |
buf_frame_get_page_no(page)); |
|
2696 |
dict_index_name_print(stderr, NULL, index); |
|
2697 |
if (level) { |
|
2698 |
fprintf(stderr, ", index tree level %lu", level); |
|
2699 |
}
|
|
2700 |
putc('\n', stderr); |
|
2701 |
}
|
|
2702 |
||
2703 |
/****************************************************************
|
|
2704 |
Report an error on two pages of an index tree. */
|
|
2705 |
static
|
|
2706 |
void
|
|
2707 |
btr_validate_report2( |
|
2708 |
/*=================*/
|
|
2709 |
/* out: TRUE if ok */
|
|
2710 |
dict_index_t* index, /* in: index */ |
|
2711 |
ulint level, /* in: B-tree level */ |
|
2712 |
page_t* page1, /* in: first index page */ |
|
2713 |
page_t* page2) /* in: second index page */ |
|
2714 |
{
|
|
2715 |
fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ", |
|
2716 |
buf_frame_get_page_no(page1), |
|
2717 |
buf_frame_get_page_no(page2)); |
|
2718 |
dict_index_name_print(stderr, NULL, index); |
|
2719 |
if (level) { |
|
2720 |
fprintf(stderr, ", index tree level %lu", level); |
|
2721 |
}
|
|
2722 |
putc('\n', stderr); |
|
2723 |
}
|
|
2724 |
||
2725 |
/****************************************************************
|
|
2726 |
Validates index tree level. */
|
|
2727 |
static
|
|
2728 |
ibool
|
|
2729 |
btr_validate_level( |
|
2730 |
/*===============*/
|
|
2731 |
/* out: TRUE if ok */
|
|
2732 |
dict_index_t* index, /* in: index tree */ |
|
2733 |
trx_t* trx, /* in: transaction or NULL */ |
|
2734 |
ulint level) /* in: level number */ |
|
2735 |
{
|
|
2736 |
ulint space; |
|
2737 |
page_t* page; |
|
2738 |
page_t* right_page = 0; /* remove warning */ |
|
2739 |
page_t* father_page; |
|
2740 |
page_t* right_father_page; |
|
2741 |
rec_t* node_ptr; |
|
2742 |
rec_t* right_node_ptr; |
|
2743 |
rec_t* rec; |
|
2744 |
ulint right_page_no; |
|
2745 |
ulint left_page_no; |
|
2746 |
page_cur_t cursor; |
|
2747 |
dtuple_t* node_ptr_tuple; |
|
2748 |
ibool ret = TRUE; |
|
2749 |
mtr_t mtr; |
|
2750 |
mem_heap_t* heap = mem_heap_create(256); |
|
2751 |
ulint* offsets = NULL; |
|
2752 |
ulint* offsets2= NULL; |
|
2753 |
||
2754 |
mtr_start(&mtr); |
|
2755 |
||
2756 |
mtr_x_lock(dict_index_get_lock(index), &mtr); |
|
2757 |
||
2758 |
page = btr_root_get(index, &mtr); |
|
2759 |
||
2760 |
space = buf_frame_get_space_id(page); |
|
2761 |
||
2762 |
while (level != btr_page_get_level(page, &mtr)) { |
|
2763 |
||
2764 |
ut_a(btr_page_get_level(page, &mtr) > 0); |
|
2765 |
||
2766 |
page_cur_set_before_first(page, &cursor); |
|
2767 |
page_cur_move_to_next(&cursor); |
|
2768 |
||
2769 |
node_ptr = page_cur_get_rec(&cursor); |
|
2770 |
offsets = rec_get_offsets(node_ptr, index, offsets, |
|
2771 |
ULINT_UNDEFINED, &heap); |
|
2772 |
page = btr_node_ptr_get_child(node_ptr, offsets, &mtr); |
|
2773 |
}
|
|
2774 |
||
2775 |
/* Now we are on the desired level. Loop through the pages on that
|
|
2776 |
level. */
|
|
2777 |
loop: |
|
2778 |
if (trx_is_interrupted(trx)) { |
|
2779 |
mtr_commit(&mtr); |
|
2780 |
mem_heap_free(heap); |
|
2781 |
return(ret); |
|
2782 |
}
|
|
2783 |
mem_heap_empty(heap); |
|
2784 |
offsets = offsets2 = NULL; |
|
2785 |
mtr_x_lock(dict_index_get_lock(index), &mtr); |
|
2786 |
||
2787 |
/* Check ordering etc. of records */
|
|
2788 |
||
2789 |
if (!page_validate(page, index)) { |
|
2790 |
btr_validate_report1(index, level, page); |
|
2791 |
||
2792 |
ret = FALSE; |
|
2793 |
} else if (level == 0) { |
|
2794 |
/* We are on level 0. Check that the records have the right
|
|
2795 |
number of fields, and field lengths are right. */
|
|
2796 |
||
2797 |
if (!btr_index_page_validate(page, index)) { |
|
2798 |
||
2799 |
ret = FALSE; |
|
2800 |
}
|
|
2801 |
}
|
|
2802 |
||
2803 |
ut_a(btr_page_get_level(page, &mtr) == level); |
|
2804 |
||
2805 |
right_page_no = btr_page_get_next(page, &mtr); |
|
2806 |
left_page_no = btr_page_get_prev(page, &mtr); |
|
2807 |
||
2808 |
ut_a((page_get_n_recs(page) > 0) |
|
2809 |
|| ((level == 0) |
|
2810 |
&& (buf_frame_get_page_no(page) |
|
2811 |
== dict_index_get_page(index)))); |
|
2812 |
||
2813 |
if (right_page_no != FIL_NULL) { |
|
2814 |
rec_t* right_rec; |
|
2815 |
right_page = btr_page_get(space, right_page_no, RW_X_LATCH, |
|
2816 |
&mtr); |
|
2817 |
if (UNIV_UNLIKELY(btr_page_get_prev(right_page, &mtr) |
|
2818 |
!= buf_frame_get_page_no(page))) { |
|
2819 |
btr_validate_report2(index, level, page, right_page); |
|
2820 |
fputs("InnoDB: broken FIL_PAGE_NEXT" |
|
2821 |
" or FIL_PAGE_PREV links\n", stderr); |
|
2822 |
buf_page_print(page); |
|
2823 |
buf_page_print(right_page); |
|
2824 |
||
2825 |
ret = FALSE; |
|
2826 |
}
|
|
2827 |
||
2828 |
if (UNIV_UNLIKELY(page_is_comp(right_page) |
|
2829 |
!= page_is_comp(page))) { |
|
2830 |
btr_validate_report2(index, level, page, right_page); |
|
2831 |
fputs("InnoDB: 'compact' flag mismatch\n", stderr); |
|
2832 |
buf_page_print(page); |
|
2833 |
buf_page_print(right_page); |
|
2834 |
||
2835 |
ret = FALSE; |
|
2836 |
||
2837 |
goto node_ptr_fails; |
|
2838 |
}
|
|
2839 |
||
2840 |
rec = page_rec_get_prev(page_get_supremum_rec(page)); |
|
2841 |
right_rec = page_rec_get_next(page_get_infimum_rec( |
|
2842 |
right_page)); |
|
2843 |
offsets = rec_get_offsets(rec, index, |
|
2844 |
offsets, ULINT_UNDEFINED, &heap); |
|
2845 |
offsets2 = rec_get_offsets(right_rec, index, |
|
2846 |
offsets2, ULINT_UNDEFINED, &heap); |
|
2847 |
if (UNIV_UNLIKELY(cmp_rec_rec(rec, right_rec, |
|
2848 |
offsets, offsets2, |
|
2849 |
index) >= 0)) { |
|
2850 |
||
2851 |
btr_validate_report2(index, level, page, right_page); |
|
2852 |
||
2853 |
fputs("InnoDB: records in wrong order" |
|
2854 |
" on adjacent pages\n", stderr); |
|
2855 |
||
2856 |
buf_page_print(page); |
|
2857 |
buf_page_print(right_page); |
|
2858 |
||
2859 |
fputs("InnoDB: record ", stderr); |
|
2860 |
rec = page_rec_get_prev(page_get_supremum_rec(page)); |
|
2861 |
rec_print(stderr, rec, index); |
|
2862 |
putc('\n', stderr); |
|
2863 |
fputs("InnoDB: record ", stderr); |
|
2864 |
rec = page_rec_get_next( |
|
2865 |
page_get_infimum_rec(right_page)); |
|
2866 |
rec_print(stderr, rec, index); |
|
2867 |
putc('\n', stderr); |
|
2868 |
||
2869 |
ret = FALSE; |
|
2870 |
}
|
|
2871 |
}
|
|
2872 |
||
2873 |
if (level > 0 && left_page_no == FIL_NULL) { |
|
2874 |
ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits( |
|
2875 |
page_rec_get_next(page_get_infimum_rec(page)), |
|
2876 |
page_is_comp(page))); |
|
2877 |
}
|
|
2878 |
||
2879 |
if (buf_frame_get_page_no(page) != dict_index_get_page(index)) { |
|
2880 |
||
2881 |
/* Check father node pointers */
|
|
2882 |
||
2883 |
node_ptr = btr_page_get_father_node_ptr(index, page, &mtr); |
|
2884 |
father_page = buf_frame_align(node_ptr); |
|
2885 |
offsets = rec_get_offsets(node_ptr, index, |
|
2886 |
offsets, ULINT_UNDEFINED, &heap); |
|
2887 |
||
2888 |
if (btr_node_ptr_get_child_page_no(node_ptr, offsets) |
|
2889 |
!= buf_frame_get_page_no(page) |
|
2890 |
|| node_ptr != btr_page_get_father_for_rec( |
|
2891 |
index, page, |
|
2892 |
page_rec_get_prev(page_get_supremum_rec(page)), |
|
2893 |
&mtr)) { |
|
2894 |
btr_validate_report1(index, level, page); |
|
2895 |
||
2896 |
fputs("InnoDB: node pointer to the page is wrong\n", |
|
2897 |
stderr); |
|
2898 |
||
2899 |
buf_page_print(father_page); |
|
2900 |
buf_page_print(page); |
|
2901 |
||
2902 |
fputs("InnoDB: node ptr ", stderr); |
|
2903 |
rec_print_new(stderr, node_ptr, offsets); |
|
2904 |
||
2905 |
fprintf(stderr, "\n" |
|
2906 |
"InnoDB: node ptr child page n:o %lu\n", |
|
2907 |
(unsigned long) btr_node_ptr_get_child_page_no |
|
2908 |
(node_ptr, offsets)); |
|
2909 |
||
2910 |
fputs("InnoDB: record on page ", stderr); |
|
2911 |
rec = btr_page_get_father_for_rec( |
|
2912 |
index, page, |
|
2913 |
page_rec_get_prev(page_get_supremum_rec(page)), |
|
2914 |
&mtr); |
|
2915 |
rec_print(stderr, rec, index); |
|
2916 |
putc('\n', stderr); |
|
2917 |
ret = FALSE; |
|
2918 |
||
2919 |
goto node_ptr_fails; |
|
2920 |
}
|
|
2921 |
||
2922 |
if (btr_page_get_level(page, &mtr) > 0) { |
|
2923 |
offsets = rec_get_offsets(node_ptr, index, |
|
2924 |
offsets, ULINT_UNDEFINED, |
|
2925 |
&heap); |
|
2926 |
||
2927 |
node_ptr_tuple = dict_index_build_node_ptr( |
|
2928 |
index, |
|
2929 |
page_rec_get_next(page_get_infimum_rec(page)), |
|
2930 |
0, heap, btr_page_get_level(page, &mtr)); |
|
2931 |
||
2932 |
if (cmp_dtuple_rec(node_ptr_tuple, node_ptr, |
|
2933 |
offsets)) { |
|
2934 |
rec_t* first_rec = page_rec_get_next( |
|
2935 |
page_get_infimum_rec(page)); |
|
2936 |
||
2937 |
btr_validate_report1(index, level, page); |
|
2938 |
||
2939 |
buf_page_print(father_page); |
|
2940 |
buf_page_print(page); |
|
2941 |
||
2942 |
fputs("InnoDB: Error: node ptrs differ" |
|
2943 |
" on levels > 0\n" |
|
2944 |
"InnoDB: node ptr ", stderr); |
|
2945 |
rec_print_new(stderr, node_ptr, offsets); |
|
2946 |
fputs("InnoDB: first rec ", stderr); |
|
2947 |
rec_print(stderr, first_rec, index); |
|
2948 |
putc('\n', stderr); |
|
2949 |
ret = FALSE; |
|
2950 |
||
2951 |
goto node_ptr_fails; |
|
2952 |
}
|
|
2953 |
}
|
|
2954 |
||
2955 |
if (left_page_no == FIL_NULL) { |
|
2956 |
ut_a(node_ptr == page_rec_get_next( |
|
2957 |
page_get_infimum_rec(father_page))); |
|
2958 |
ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL); |
|
2959 |
}
|
|
2960 |
||
2961 |
if (right_page_no == FIL_NULL) { |
|
2962 |
ut_a(node_ptr == page_rec_get_prev( |
|
2963 |
page_get_supremum_rec(father_page))); |
|
2964 |
ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL); |
|
2965 |
} else { |
|
2966 |
right_node_ptr = btr_page_get_father_node_ptr( |
|
2967 |
index, right_page, &mtr); |
|
2968 |
if (page_rec_get_next(node_ptr) |
|
2969 |
!= page_get_supremum_rec(father_page)) { |
|
2970 |
||
2971 |
if (right_node_ptr |
|
2972 |
!= page_rec_get_next(node_ptr)) { |
|
2973 |
ret = FALSE; |
|
2974 |
fputs("InnoDB: node pointer to" |
|
2975 |
" the right page is wrong\n", |
|
2976 |
stderr); |
|
2977 |
||
2978 |
btr_validate_report1(index, level, |
|
2979 |
page); |
|
2980 |
||
2981 |
buf_page_print(father_page); |
|
2982 |
buf_page_print(page); |
|
2983 |
buf_page_print(right_page); |
|
2984 |
}
|
|
2985 |
} else { |
|
2986 |
right_father_page = buf_frame_align( |
|
2987 |
right_node_ptr); |
|
2988 |
||
2989 |
if (right_node_ptr != page_rec_get_next( |
|
2990 |
page_get_infimum_rec( |
|
2991 |
right_father_page))) { |
|
2992 |
ret = FALSE; |
|
2993 |
fputs("InnoDB: node pointer 2 to" |
|
2994 |
" the right page is wrong\n", |
|
2995 |
stderr); |
|
2996 |
||
2997 |
btr_validate_report1(index, level, |
|
2998 |
page); |
|
2999 |
||
3000 |
buf_page_print(father_page); |
|
3001 |
buf_page_print(right_father_page); |
|
3002 |
buf_page_print(page); |
|
3003 |
buf_page_print(right_page); |
|
3004 |
}
|
|
3005 |
||
3006 |
if (buf_frame_get_page_no(right_father_page) |
|
3007 |
!= btr_page_get_next(father_page, &mtr)) { |
|
3008 |
||
3009 |
ret = FALSE; |
|
3010 |
fputs("InnoDB: node pointer 3 to" |
|
3011 |
" the right page is wrong\n", |
|
3012 |
stderr); |
|
3013 |
||
3014 |
btr_validate_report1(index, level, |
|
3015 |
page); |
|
3016 |
||
3017 |
buf_page_print(father_page); |
|
3018 |
buf_page_print(right_father_page); |
|
3019 |
buf_page_print(page); |
|
3020 |
buf_page_print(right_page); |
|
3021 |
}
|
|
3022 |
}
|
|
3023 |
}
|
|
3024 |
}
|
|
3025 |
||
3026 |
node_ptr_fails: |
|
3027 |
/* Commit the mini-transaction to release the latch on 'page'.
|
|
3028 |
Re-acquire the latch on right_page, which will become 'page'
|
|
3029 |
on the next loop. The page has already been checked. */
|
|
3030 |
mtr_commit(&mtr); |
|
3031 |
||
3032 |
if (right_page_no != FIL_NULL) { |
|
3033 |
mtr_start(&mtr); |
|
3034 |
||
3035 |
page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr); |
|
3036 |
||
3037 |
goto loop; |
|
3038 |
}
|
|
3039 |
||
3040 |
mem_heap_free(heap); |
|
3041 |
return(ret); |
|
3042 |
}
|
|
3043 |
||
3044 |
/******************************************************************
|
|
3045 |
Checks the consistency of an index tree. */
|
|
3046 |
||
3047 |
ibool
|
|
3048 |
btr_validate_index( |
|
3049 |
/*===============*/
|
|
3050 |
/* out: TRUE if ok */
|
|
3051 |
dict_index_t* index, /* in: index */ |
|
3052 |
trx_t* trx) /* in: transaction or NULL */ |
|
3053 |
{
|
|
3054 |
mtr_t mtr; |
|
3055 |
page_t* root; |
|
3056 |
ulint i; |
|
3057 |
ulint n; |
|
3058 |
||
3059 |
mtr_start(&mtr); |
|
3060 |
mtr_x_lock(dict_index_get_lock(index), &mtr); |
|
3061 |
||
3062 |
root = btr_root_get(index, &mtr); |
|
3063 |
n = btr_page_get_level(root, &mtr); |
|
3064 |
||
3065 |
for (i = 0; i <= n && !trx_is_interrupted(trx); i++) { |
|
3066 |
if (!btr_validate_level(index, trx, n - i)) { |
|
3067 |
||
3068 |
mtr_commit(&mtr); |
|
3069 |
||
3070 |
return(FALSE); |
|
3071 |
}
|
|
3072 |
}
|
|
3073 |
||
3074 |
mtr_commit(&mtr); |
|
3075 |
||
3076 |
return(TRUE); |
|
3077 |
}
|