1
by brian
clean slate |
1 |
/******************************************************
|
2 |
The B-tree
|
|
3 |
||
4 |
(c) 1994-1996 Innobase Oy
|
|
5 |
||
6 |
Created 6/2/1994 Heikki Tuuri
|
|
7 |
*******************************************************/
|
|
8 |
||
9 |
#ifndef btr0btr_h
|
|
10 |
#define btr0btr_h
|
|
11 |
||
12 |
#include "univ.i" |
|
13 |
||
14 |
#include "dict0dict.h" |
|
15 |
#include "data0data.h" |
|
16 |
#include "page0cur.h" |
|
17 |
#include "rem0rec.h" |
|
18 |
#include "mtr0mtr.h" |
|
19 |
#include "btr0types.h" |
|
20 |
||
21 |
/* Maximum record size which can be stored on a page, without using the
|
|
22 |
special big record storage structure */
|
|
23 |
||
24 |
#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200)
|
|
25 |
||
26 |
/* Maximum depth of a B-tree in InnoDB. Note that this isn't a maximum as
|
|
27 |
such; none of the tree operations avoid producing trees bigger than this. It
|
|
28 |
is instead a "max depth that other code must work with", useful for e.g.
|
|
29 |
fixed-size arrays that must store some information about each level in a
|
|
30 |
tree. In other words: if a B-tree with bigger depth than this is
|
|
31 |
encountered, it is not acceptable for it to lead to mysterious memory
|
|
32 |
corruption, but it is acceptable for the program to die with a clear assert
|
|
33 |
failure. */
|
|
34 |
#define BTR_MAX_LEVELS 100
|
|
35 |
||
36 |
/* Latching modes for btr_cur_search_to_nth_level(). */
|
|
37 |
#define BTR_SEARCH_LEAF RW_S_LATCH
|
|
38 |
#define BTR_MODIFY_LEAF RW_X_LATCH
|
|
39 |
#define BTR_NO_LATCHES RW_NO_LATCH
|
|
40 |
#define BTR_MODIFY_TREE 33
|
|
41 |
#define BTR_CONT_MODIFY_TREE 34
|
|
42 |
#define BTR_SEARCH_PREV 35
|
|
43 |
#define BTR_MODIFY_PREV 36
|
|
44 |
||
45 |
/* If this is ORed to the latch mode, it means that the search tuple will be
|
|
46 |
inserted to the index, at the searched position */
|
|
47 |
#define BTR_INSERT 512
|
|
48 |
||
49 |
/* This flag ORed to latch mode says that we do the search in query
|
|
50 |
optimization */
|
|
51 |
#define BTR_ESTIMATE 1024
|
|
52 |
||
53 |
/* This flag ORed to latch mode says that we can ignore possible
|
|
54 |
UNIQUE definition on secondary indexes when we decide if we can use the
|
|
55 |
insert buffer to speed up inserts */
|
|
56 |
#define BTR_IGNORE_SEC_UNIQUE 2048
|
|
57 |
||
58 |
/******************************************************************
|
|
59 |
Gets the root node of a tree and x-latches it. */
|
|
60 |
||
61 |
page_t* |
|
62 |
btr_root_get( |
|
63 |
/*=========*/
|
|
64 |
/* out: root page, x-latched */
|
|
65 |
dict_index_t* index, /* in: index tree */ |
|
66 |
mtr_t* mtr); /* in: mtr */ |
|
67 |
/******************************************************************
|
|
68 |
Gets a buffer page and declares its latching order level. */
|
|
69 |
UNIV_INLINE
|
|
70 |
page_t* |
|
71 |
btr_page_get( |
|
72 |
/*=========*/
|
|
73 |
ulint space, /* in: space id */ |
|
74 |
ulint page_no, /* in: page number */ |
|
75 |
ulint mode, /* in: latch mode */ |
|
76 |
mtr_t* mtr); /* in: mtr */ |
|
77 |
/******************************************************************
|
|
78 |
Gets the index id field of a page. */
|
|
79 |
UNIV_INLINE
|
|
80 |
dulint
|
|
81 |
btr_page_get_index_id( |
|
82 |
/*==================*/
|
|
83 |
/* out: index id */
|
|
84 |
page_t* page); /* in: index page */ |
|
85 |
/************************************************************
|
|
86 |
Gets the node level field in an index page. */
|
|
87 |
UNIV_INLINE
|
|
88 |
ulint
|
|
89 |
btr_page_get_level_low( |
|
90 |
/*===================*/
|
|
91 |
/* out: level, leaf level == 0 */
|
|
92 |
page_t* page); /* in: index page */ |
|
93 |
/************************************************************
|
|
94 |
Gets the node level field in an index page. */
|
|
95 |
UNIV_INLINE
|
|
96 |
ulint
|
|
97 |
btr_page_get_level( |
|
98 |
/*===============*/
|
|
99 |
/* out: level, leaf level == 0 */
|
|
100 |
page_t* page, /* in: index page */ |
|
101 |
mtr_t* mtr); /* in: mini-transaction handle */ |
|
102 |
/************************************************************
|
|
103 |
Gets the next index page number. */
|
|
104 |
UNIV_INLINE
|
|
105 |
ulint
|
|
106 |
btr_page_get_next( |
|
107 |
/*==============*/
|
|
108 |
/* out: next page number */
|
|
109 |
page_t* page, /* in: index page */ |
|
110 |
mtr_t* mtr); /* in: mini-transaction handle */ |
|
111 |
/************************************************************
|
|
112 |
Gets the previous index page number. */
|
|
113 |
UNIV_INLINE
|
|
114 |
ulint
|
|
115 |
btr_page_get_prev( |
|
116 |
/*==============*/
|
|
117 |
/* out: prev page number */
|
|
118 |
page_t* page, /* in: index page */ |
|
119 |
mtr_t* mtr); /* in: mini-transaction handle */ |
|
120 |
/*****************************************************************
|
|
121 |
Gets pointer to the previous user record in the tree. It is assumed
|
|
122 |
that the caller has appropriate latches on the page and its neighbor. */
|
|
123 |
||
124 |
rec_t* |
|
125 |
btr_get_prev_user_rec( |
|
126 |
/*==================*/
|
|
127 |
/* out: previous user record, NULL if there is none */
|
|
128 |
rec_t* rec, /* in: record on leaf level */ |
|
129 |
mtr_t* mtr); /* in: mtr holding a latch on the page, and if |
|
130 |
needed, also to the previous page */
|
|
131 |
/*****************************************************************
|
|
132 |
Gets pointer to the next user record in the tree. It is assumed
|
|
133 |
that the caller has appropriate latches on the page and its neighbor. */
|
|
134 |
||
135 |
rec_t* |
|
136 |
btr_get_next_user_rec( |
|
137 |
/*==================*/
|
|
138 |
/* out: next user record, NULL if there is none */
|
|
139 |
rec_t* rec, /* in: record on leaf level */ |
|
140 |
mtr_t* mtr); /* in: mtr holding a latch on the page, and if |
|
141 |
needed, also to the next page */
|
|
142 |
/******************************************************************
|
|
143 |
Releases the latch on a leaf page and bufferunfixes it. */
|
|
144 |
UNIV_INLINE
|
|
145 |
void
|
|
146 |
btr_leaf_page_release( |
|
147 |
/*==================*/
|
|
148 |
page_t* page, /* in: page */ |
|
149 |
ulint latch_mode, /* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */ |
|
150 |
mtr_t* mtr); /* in: mtr */ |
|
151 |
/******************************************************************
|
|
152 |
Gets the child node file address in a node pointer. */
|
|
153 |
UNIV_INLINE
|
|
154 |
ulint
|
|
155 |
btr_node_ptr_get_child_page_no( |
|
156 |
/*===========================*/
|
|
157 |
/* out: child node address */
|
|
158 |
rec_t* rec, /* in: node pointer record */ |
|
159 |
const ulint* offsets);/* in: array returned by rec_get_offsets() */ |
|
160 |
/****************************************************************
|
|
161 |
Creates the root node for a new index tree. */
|
|
162 |
||
163 |
ulint
|
|
164 |
btr_create( |
|
165 |
/*=======*/
|
|
166 |
/* out: page number of the created root, FIL_NULL if
|
|
167 |
did not succeed */
|
|
168 |
ulint type, /* in: type of the index */ |
|
169 |
ulint space, /* in: space where created */ |
|
170 |
dulint index_id,/* in: index id */ |
|
171 |
ulint comp, /* in: nonzero=compact page format */ |
|
172 |
mtr_t* mtr); /* in: mini-transaction handle */ |
|
173 |
/****************************************************************
|
|
174 |
Frees a B-tree except the root page, which MUST be freed after this
|
|
175 |
by calling btr_free_root. */
|
|
176 |
||
177 |
void
|
|
178 |
btr_free_but_not_root( |
|
179 |
/*==================*/
|
|
180 |
ulint space, /* in: space where created */ |
|
181 |
ulint root_page_no); /* in: root page number */ |
|
182 |
/****************************************************************
|
|
183 |
Frees the B-tree root page. Other tree MUST already have been freed. */
|
|
184 |
||
185 |
void
|
|
186 |
btr_free_root( |
|
187 |
/*==========*/
|
|
188 |
ulint space, /* in: space where created */ |
|
189 |
ulint root_page_no, /* in: root page number */ |
|
190 |
mtr_t* mtr); /* in: a mini-transaction which has already |
|
191 |
been started */
|
|
192 |
/*****************************************************************
|
|
193 |
Makes tree one level higher by splitting the root, and inserts
|
|
194 |
the tuple. It is assumed that mtr contains an x-latch on the tree.
|
|
195 |
NOTE that the operation of this function must always succeed,
|
|
196 |
we cannot reverse it: therefore enough free disk space must be
|
|
197 |
guaranteed to be available before this function is called. */
|
|
198 |
||
199 |
rec_t* |
|
200 |
btr_root_raise_and_insert( |
|
201 |
/*======================*/
|
|
202 |
/* out: inserted record */
|
|
203 |
btr_cur_t* cursor, /* in: cursor at which to insert: must be |
|
204 |
on the root page; when the function returns,
|
|
205 |
the cursor is positioned on the predecessor
|
|
206 |
of the inserted record */
|
|
207 |
dtuple_t* tuple, /* in: tuple to insert */ |
|
208 |
mtr_t* mtr); /* in: mtr */ |
|
209 |
/*****************************************************************
|
|
210 |
Reorganizes an index page. */
|
|
211 |
||
212 |
void
|
|
213 |
btr_page_reorganize( |
|
214 |
/*================*/
|
|
215 |
page_t* page, /* in: page to be reorganized */ |
|
216 |
dict_index_t* index, /* in: record descriptor */ |
|
217 |
mtr_t* mtr); /* in: mtr */ |
|
218 |
/*****************************************************************
|
|
219 |
Decides if the page should be split at the convergence point of
|
|
220 |
inserts converging to left. */
|
|
221 |
||
222 |
ibool
|
|
223 |
btr_page_get_split_rec_to_left( |
|
224 |
/*===========================*/
|
|
225 |
/* out: TRUE if split recommended */
|
|
226 |
btr_cur_t* cursor, /* in: cursor at which to insert */ |
|
227 |
rec_t** split_rec);/* out: if split recommended, |
|
228 |
the first record on upper half page,
|
|
229 |
or NULL if tuple should be first */
|
|
230 |
/*****************************************************************
|
|
231 |
Decides if the page should be split at the convergence point of
|
|
232 |
inserts converging to right. */
|
|
233 |
||
234 |
ibool
|
|
235 |
btr_page_get_split_rec_to_right( |
|
236 |
/*============================*/
|
|
237 |
/* out: TRUE if split recommended */
|
|
238 |
btr_cur_t* cursor, /* in: cursor at which to insert */ |
|
239 |
rec_t** split_rec);/* out: if split recommended, |
|
240 |
the first record on upper half page,
|
|
241 |
or NULL if tuple should be first */
|
|
242 |
/*****************************************************************
|
|
243 |
Splits an index page to halves and inserts the tuple. It is assumed
|
|
244 |
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
|
|
245 |
is released within this function! NOTE that the operation of this
|
|
246 |
function must always succeed, we cannot reverse it: therefore
|
|
247 |
enough free disk space must be guaranteed to be available before
|
|
248 |
this function is called. */
|
|
249 |
||
250 |
rec_t* |
|
251 |
btr_page_split_and_insert( |
|
252 |
/*======================*/
|
|
253 |
/* out: inserted record; NOTE: the tree
|
|
254 |
x-latch is released! NOTE: 2 free disk
|
|
255 |
pages must be available! */
|
|
256 |
btr_cur_t* cursor, /* in: cursor at which to insert; when the |
|
257 |
function returns, the cursor is positioned
|
|
258 |
on the predecessor of the inserted record */
|
|
259 |
dtuple_t* tuple, /* in: tuple to insert */ |
|
260 |
mtr_t* mtr); /* in: mtr */ |
|
261 |
/***********************************************************
|
|
262 |
Inserts a data tuple to a tree on a non-leaf level. It is assumed
|
|
263 |
that mtr holds an x-latch on the tree. */
|
|
264 |
||
265 |
void
|
|
266 |
btr_insert_on_non_leaf_level( |
|
267 |
/*=========================*/
|
|
268 |
dict_index_t* index, /* in: index */ |
|
269 |
ulint level, /* in: level, must be > 0 */ |
|
270 |
dtuple_t* tuple, /* in: the record to be inserted */ |
|
271 |
mtr_t* mtr); /* in: mtr */ |
|
272 |
/********************************************************************
|
|
273 |
Sets a record as the predefined minimum record. */
|
|
274 |
||
275 |
void
|
|
276 |
btr_set_min_rec_mark( |
|
277 |
/*=================*/
|
|
278 |
rec_t* rec, /* in: record */ |
|
279 |
ulint comp, /* in: nonzero=compact page format */ |
|
280 |
mtr_t* mtr); /* in: mtr */ |
|
281 |
/*****************************************************************
|
|
282 |
Deletes on the upper level the node pointer to a page. */
|
|
283 |
||
284 |
void
|
|
285 |
btr_node_ptr_delete( |
|
286 |
/*================*/
|
|
287 |
dict_index_t* index, /* in: index tree */ |
|
288 |
page_t* page, /* in: page whose node pointer is deleted */ |
|
289 |
mtr_t* mtr); /* in: mtr */ |
|
290 |
#ifdef UNIV_DEBUG
|
|
291 |
/****************************************************************
|
|
292 |
Checks that the node pointer to a page is appropriate. */
|
|
293 |
||
294 |
ibool
|
|
295 |
btr_check_node_ptr( |
|
296 |
/*===============*/
|
|
297 |
/* out: TRUE */
|
|
298 |
dict_index_t* index, /* in: index tree */ |
|
299 |
page_t* page, /* in: index page */ |
|
300 |
mtr_t* mtr); /* in: mtr */ |
|
301 |
#endif /* UNIV_DEBUG */ |
|
302 |
/*****************************************************************
|
|
303 |
Tries to merge the page first to the left immediate brother if such a
|
|
304 |
brother exists, and the node pointers to the current page and to the
|
|
305 |
brother reside on the same page. If the left brother does not satisfy these
|
|
306 |
conditions, looks at the right brother. If the page is the only one on that
|
|
307 |
level lifts the records of the page to the father page, thus reducing the
|
|
308 |
tree height. It is assumed that mtr holds an x-latch on the tree and on the
|
|
309 |
page. If cursor is on the leaf level, mtr must also hold x-latches to
|
|
310 |
the brothers, if they exist. NOTE: it is assumed that the caller has reserved
|
|
311 |
enough free extents so that the compression will always succeed if done! */
|
|
312 |
void
|
|
313 |
btr_compress( |
|
314 |
/*=========*/
|
|
315 |
btr_cur_t* cursor, /* in: cursor on the page to merge or lift; |
|
316 |
the page must not be empty: in record delete
|
|
317 |
use btr_discard_page if the page would become
|
|
318 |
empty */
|
|
319 |
mtr_t* mtr); /* in: mtr */ |
|
320 |
/*****************************************************************
|
|
321 |
Discards a page from a B-tree. This is used to remove the last record from
|
|
322 |
a B-tree page: the whole page must be removed at the same time. This cannot
|
|
323 |
be used for the root page, which is allowed to be empty. */
|
|
324 |
||
325 |
void
|
|
326 |
btr_discard_page( |
|
327 |
/*=============*/
|
|
328 |
btr_cur_t* cursor, /* in: cursor on the page to discard: not on |
|
329 |
the root page */
|
|
330 |
mtr_t* mtr); /* in: mtr */ |
|
331 |
/********************************************************************
|
|
332 |
Parses the redo log record for setting an index record as the predefined
|
|
333 |
minimum record. */
|
|
334 |
||
335 |
byte* |
|
336 |
btr_parse_set_min_rec_mark( |
|
337 |
/*=======================*/
|
|
338 |
/* out: end of log record or NULL */
|
|
339 |
byte* ptr, /* in: buffer */ |
|
340 |
byte* end_ptr,/* in: buffer end */ |
|
341 |
ulint comp, /* in: nonzero=compact page format */ |
|
342 |
page_t* page, /* in: page or NULL */ |
|
343 |
mtr_t* mtr); /* in: mtr or NULL */ |
|
344 |
/***************************************************************
|
|
345 |
Parses a redo log record of reorganizing a page. */
|
|
346 |
||
347 |
byte* |
|
348 |
btr_parse_page_reorganize( |
|
349 |
/*======================*/
|
|
350 |
/* out: end of log record or NULL */
|
|
351 |
byte* ptr, /* in: buffer */ |
|
352 |
byte* end_ptr,/* in: buffer end */ |
|
353 |
dict_index_t* index, /* in: record descriptor */ |
|
354 |
page_t* page, /* in: page or NULL */ |
|
355 |
mtr_t* mtr); /* in: mtr or NULL */ |
|
356 |
/******************************************************************
|
|
357 |
Gets the number of pages in a B-tree. */
|
|
358 |
||
359 |
ulint
|
|
360 |
btr_get_size( |
|
361 |
/*=========*/
|
|
362 |
/* out: number of pages */
|
|
363 |
dict_index_t* index, /* in: index */ |
|
364 |
ulint flag); /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ |
|
365 |
/******************************************************************
|
|
366 |
Allocates a new file page to be used in an index tree. NOTE: we assume
|
|
367 |
that the caller has made the reservation for free extents! */
|
|
368 |
||
369 |
page_t* |
|
370 |
btr_page_alloc( |
|
371 |
/*===========*/
|
|
372 |
/* out: new allocated page, x-latched;
|
|
373 |
NULL if out of space */
|
|
374 |
dict_index_t* index, /* in: index tree */ |
|
375 |
ulint hint_page_no, /* in: hint of a good page */ |
|
376 |
byte file_direction, /* in: direction where a possible |
|
377 |
page split is made */
|
|
378 |
ulint level, /* in: level where the page is placed |
|
379 |
in the tree */
|
|
380 |
mtr_t* mtr); /* in: mtr */ |
|
381 |
/******************************************************************
|
|
382 |
Frees a file page used in an index tree. NOTE: cannot free field external
|
|
383 |
storage pages because the page must contain info on its level. */
|
|
384 |
||
385 |
void
|
|
386 |
btr_page_free( |
|
387 |
/*==========*/
|
|
388 |
dict_index_t* index, /* in: index tree */ |
|
389 |
page_t* page, /* in: page to be freed, x-latched */ |
|
390 |
mtr_t* mtr); /* in: mtr */ |
|
391 |
/******************************************************************
|
|
392 |
Frees a file page used in an index tree. Can be used also to BLOB
|
|
393 |
external storage pages, because the page level 0 can be given as an
|
|
394 |
argument. */
|
|
395 |
||
396 |
void
|
|
397 |
btr_page_free_low( |
|
398 |
/*==============*/
|
|
399 |
dict_index_t* index, /* in: index tree */ |
|
400 |
page_t* page, /* in: page to be freed, x-latched */ |
|
401 |
ulint level, /* in: page level */ |
|
402 |
mtr_t* mtr); /* in: mtr */ |
|
403 |
#ifdef UNIV_BTR_PRINT
|
|
404 |
/*****************************************************************
|
|
405 |
Prints size info of a B-tree. */
|
|
406 |
||
407 |
void
|
|
408 |
btr_print_size( |
|
409 |
/*===========*/
|
|
410 |
dict_index_t* index); /* in: index tree */ |
|
411 |
/******************************************************************
|
|
412 |
Prints directories and other info of all nodes in the index. */
|
|
413 |
||
414 |
void
|
|
415 |
btr_print_index( |
|
416 |
/*============*/
|
|
417 |
dict_index_t* index, /* in: index */ |
|
418 |
ulint width); /* in: print this many entries from start |
|
419 |
and end */
|
|
420 |
#endif /* UNIV_BTR_PRINT */ |
|
421 |
/****************************************************************
|
|
422 |
Checks the size and number of fields in a record based on the definition of
|
|
423 |
the index. */
|
|
424 |
||
425 |
ibool
|
|
426 |
btr_index_rec_validate( |
|
427 |
/*===================*/
|
|
428 |
/* out: TRUE if ok */
|
|
429 |
rec_t* rec, /* in: index record */ |
|
430 |
dict_index_t* index, /* in: index */ |
|
431 |
ibool dump_on_error); /* in: TRUE if the function |
|
432 |
should print hex dump of record
|
|
433 |
and page on error */
|
|
434 |
/******************************************************************
|
|
435 |
Checks the consistency of an index tree. */
|
|
436 |
||
437 |
ibool
|
|
438 |
btr_validate_index( |
|
439 |
/*===============*/
|
|
440 |
/* out: TRUE if ok */
|
|
441 |
dict_index_t* index, /* in: index */ |
|
442 |
trx_t* trx); /* in: transaction or NULL */ |
|
443 |
||
444 |
#define BTR_N_LEAF_PAGES 1
|
|
445 |
#define BTR_TOTAL_SIZE 2
|
|
446 |
||
447 |
#ifndef UNIV_NONINL
|
|
448 |
#include "btr0btr.ic" |
|
449 |
#endif
|
|
450 |
||
451 |
#endif
|