1
/*****************************************************************************
3
Copyright (C) 1994, 2010, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15
St, Fifth Floor, Boston, MA 02110-1301 USA
17
*****************************************************************************/
19
/**************************************************//**
20
@file include/btr0btr.h
23
Created 6/2/1994 Heikki Tuuri
24
*******************************************************/
31
#include "dict0dict.h"
32
#include "data0data.h"
35
#include "btr0types.h"
37
#ifndef UNIV_HOTBACKUP
38
/** Maximum record size which can be stored on a page, without using the
39
special big record storage structure */
40
#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200)
42
/** @brief Maximum depth of a B-tree in InnoDB.
44
Note that this isn't a maximum as such; none of the tree operations
45
avoid producing trees bigger than this. It is instead a "max depth
46
that other code must work with", useful for e.g. fixed-size arrays
47
that must store some information about each level in a tree. In other
48
words: if a B-tree with bigger depth than this is encountered, it is
49
not acceptable for it to lead to mysterious memory corruption, but it
50
is acceptable for the program to die with a clear assert failure. */
51
#define BTR_MAX_LEVELS 100
53
/** Latching modes for btr_cur_search_to_nth_level(). */
55
/** Search a record on a leaf page and S-latch it. */
56
BTR_SEARCH_LEAF = RW_S_LATCH,
57
/** (Prepare to) modify a record on a leaf page and X-latch it. */
58
BTR_MODIFY_LEAF = RW_X_LATCH,
59
/** Obtain no latches. */
60
BTR_NO_LATCHES = RW_NO_LATCH,
61
/** Start modifying the entire B-tree. */
63
/** Continue modifying the entire B-tree. */
64
BTR_CONT_MODIFY_TREE = 34,
65
/** Search the previous record. */
67
/** Modify the previous record. */
71
/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */
73
/** If this is ORed to btr_latch_mode, it means that the search tuple
74
will be inserted to the index, at the searched position.
75
When the record is not in the buffer pool, try to use the insert buffer. */
76
#define BTR_INSERT 512
78
/** This flag ORed to btr_latch_mode says that we do the search in query
80
#define BTR_ESTIMATE 1024
82
/** This flag ORed to BTR_INSERT says that we can ignore possible
83
UNIQUE definition on secondary indexes when we decide if we can use
84
the insert buffer to speed up inserts */
85
#define BTR_IGNORE_SEC_UNIQUE 2048
87
/** Try to delete mark the record at the searched position using the
88
insert/delete buffer when the record is not in the buffer pool. */
89
#define BTR_DELETE_MARK 4096
91
/** Try to purge the record at the searched position using the insert/delete
92
buffer when the record is not in the buffer pool. */
93
#define BTR_DELETE 8192
95
/**************************************************************//**
96
Gets the root node of a tree and x-latches it.
97
@return root page, x-latched */
102
dict_index_t* index, /*!< in: index tree */
103
mtr_t* mtr); /*!< in: mtr */
104
/**************************************************************//**
105
Gets a buffer page and declares its latching order level. */
110
ulint space, /*!< in: space id */
111
ulint zip_size, /*!< in: compressed page size in bytes
112
or 0 for uncompressed pages */
113
ulint page_no, /*!< in: page number */
114
ulint mode, /*!< in: latch mode */
115
const char* file, /*!< in: file name */
116
ulint line, /*!< in: line where called */
117
mtr_t* mtr) /*!< in/out: mtr */
118
__attribute__((nonnull));
119
/** Gets a buffer page and declares its latching order level.
120
@param space tablespace identifier
121
@param zip_size compressed page size in bytes or 0 for uncompressed pages
122
@param page_no page number
123
@param mode latch mode
124
@param mtr mini-transaction handle
125
@return the block descriptor */
126
# define btr_block_get(space,zip_size,page_no,mode,mtr) \
127
btr_block_get_func(space,zip_size,page_no,mode,__FILE__,__LINE__,mtr)
128
/** Gets a buffer page and declares its latching order level.
129
@param space tablespace identifier
130
@param zip_size compressed page size in bytes or 0 for uncompressed pages
131
@param page_no page number
132
@param mode latch mode
133
@param mtr mini-transaction handle
134
@return the uncompressed page frame */
135
# define btr_page_get(space,zip_size,page_no,mode,mtr) \
136
buf_block_get_frame(btr_block_get(space,zip_size,page_no,mode,mtr))
137
#endif /* !UNIV_HOTBACKUP */
138
/**************************************************************//**
139
Gets the index id field of a page.
143
btr_page_get_index_id(
144
/*==================*/
145
const page_t* page); /*!< in: index page */
146
#ifndef UNIV_HOTBACKUP
147
/********************************************************//**
148
Gets the node level field in an index page.
149
@return level, leaf level == 0 */
152
btr_page_get_level_low(
153
/*===================*/
154
const page_t* page); /*!< in: index page */
155
/********************************************************//**
156
Gets the node level field in an index page.
157
@return level, leaf level == 0 */
162
const page_t* page, /*!< in: index page */
163
mtr_t* mtr); /*!< in: mini-transaction handle */
164
/********************************************************//**
165
Gets the next index page number.
166
@return next page number */
171
const page_t* page, /*!< in: index page */
172
mtr_t* mtr); /*!< in: mini-transaction handle */
173
/********************************************************//**
174
Gets the previous index page number.
175
@return prev page number */
180
const page_t* page, /*!< in: index page */
181
mtr_t* mtr); /*!< in: mini-transaction handle */
182
/*************************************************************//**
183
Gets pointer to the previous user record in the tree. It is assumed
184
that the caller has appropriate latches on the page and its neighbor.
185
@return previous user record, NULL if there is none */
188
btr_get_prev_user_rec(
189
/*==================*/
190
rec_t* rec, /*!< in: record on leaf level */
191
mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
192
needed, also to the previous page */
193
/*************************************************************//**
194
Gets pointer to the next user record in the tree. It is assumed
195
that the caller has appropriate latches on the page and its neighbor.
196
@return next user record, NULL if there is none */
199
btr_get_next_user_rec(
200
/*==================*/
201
rec_t* rec, /*!< in: record on leaf level */
202
mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
203
needed, also to the next page */
204
/**************************************************************//**
205
Releases the latch on a leaf page and bufferunfixes it. */
208
btr_leaf_page_release(
209
/*==================*/
210
buf_block_t* block, /*!< in: buffer block */
211
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
213
mtr_t* mtr); /*!< in: mtr */
214
/**************************************************************//**
215
Gets the child node file address in a node pointer.
216
NOTE: the offsets array must contain all offsets for the record since
217
we read the last field according to offsets and assume that it contains
218
the child page number. In other words offsets must have been retrieved
219
with rec_get_offsets(n_fields=ULINT_UNDEFINED).
220
@return child node address */
223
btr_node_ptr_get_child_page_no(
224
/*===========================*/
225
const rec_t* rec, /*!< in: node pointer record */
226
const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
227
/************************************************************//**
228
Creates the root node for a new index tree.
229
@return page number of the created root, FIL_NULL if did not succeed */
234
ulint type, /*!< in: type of the index */
235
ulint space, /*!< in: space where created */
236
ulint zip_size,/*!< in: compressed page size in bytes
237
or 0 for uncompressed pages */
238
index_id_t index_id,/*!< in: index id */
239
dict_index_t* index, /*!< in: index */
240
mtr_t* mtr); /*!< in: mini-transaction handle */
241
/************************************************************//**
242
Frees a B-tree except the root page, which MUST be freed after this
243
by calling btr_free_root. */
246
btr_free_but_not_root(
247
/*==================*/
248
ulint space, /*!< in: space where created */
249
ulint zip_size, /*!< in: compressed page size in bytes
250
or 0 for uncompressed pages */
251
ulint root_page_no); /*!< in: root page number */
252
/************************************************************//**
253
Frees the B-tree root page. Other tree MUST already have been freed. */
258
ulint space, /*!< in: space where created */
259
ulint zip_size, /*!< in: compressed page size in bytes
260
or 0 for uncompressed pages */
261
ulint root_page_no, /*!< in: root page number */
262
mtr_t* mtr); /*!< in: a mini-transaction which has already
264
/*************************************************************//**
265
Makes tree one level higher by splitting the root, and inserts
266
the tuple. It is assumed that mtr contains an x-latch on the tree.
267
NOTE that the operation of this function must always succeed,
268
we cannot reverse it: therefore enough free disk space must be
269
guaranteed to be available before this function is called.
270
@return inserted record */
273
btr_root_raise_and_insert(
274
/*======================*/
275
btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
276
on the root page; when the function returns,
277
the cursor is positioned on the predecessor
278
of the inserted record */
279
const dtuple_t* tuple, /*!< in: tuple to insert */
280
ulint n_ext, /*!< in: number of externally stored columns */
281
mtr_t* mtr); /*!< in: mtr */
282
/*************************************************************//**
283
Reorganizes an index page.
284
IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
285
page of a non-clustered index, the caller must update the insert
286
buffer free bits in the same mini-transaction in such a way that the
287
modification will be redo-logged.
288
@return TRUE on success, FALSE on failure */
293
buf_block_t* block, /*!< in: page to be reorganized */
294
dict_index_t* index, /*!< in: record descriptor */
295
mtr_t* mtr); /*!< in: mtr */
296
/*************************************************************//**
297
Decides if the page should be split at the convergence point of
298
inserts converging to left.
299
@return TRUE if split recommended */
302
btr_page_get_split_rec_to_left(
303
/*===========================*/
304
btr_cur_t* cursor, /*!< in: cursor at which to insert */
305
rec_t** split_rec);/*!< out: if split recommended,
306
the first record on upper half page,
307
or NULL if tuple should be first */
308
/*************************************************************//**
309
Decides if the page should be split at the convergence point of
310
inserts converging to right.
311
@return TRUE if split recommended */
314
btr_page_get_split_rec_to_right(
315
/*============================*/
316
btr_cur_t* cursor, /*!< in: cursor at which to insert */
317
rec_t** split_rec);/*!< out: if split recommended,
318
the first record on upper half page,
319
or NULL if tuple should be first */
320
/*************************************************************//**
321
Splits an index page to halves and inserts the tuple. It is assumed
322
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
323
released within this function! NOTE that the operation of this
324
function must always succeed, we cannot reverse it: therefore enough
325
free disk space (2 pages) must be guaranteed to be available before
326
this function is called.
328
@return inserted record */
331
btr_page_split_and_insert(
332
/*======================*/
333
btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
334
function returns, the cursor is positioned
335
on the predecessor of the inserted record */
336
const dtuple_t* tuple, /*!< in: tuple to insert */
337
ulint n_ext, /*!< in: number of externally stored columns */
338
mtr_t* mtr); /*!< in: mtr */
339
/*******************************************************//**
340
Inserts a data tuple to a tree on a non-leaf level. It is assumed
341
that mtr holds an x-latch on the tree. */
344
btr_insert_on_non_leaf_level_func(
345
/*==============================*/
346
dict_index_t* index, /*!< in: index */
347
ulint level, /*!< in: level, must be > 0 */
348
dtuple_t* tuple, /*!< in: the record to be inserted */
349
const char* file, /*!< in: file name */
350
ulint line, /*!< in: line where called */
351
mtr_t* mtr); /*!< in: mtr */
352
# define btr_insert_on_non_leaf_level(i,l,t,m) \
353
btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
354
#endif /* !UNIV_HOTBACKUP */
355
/****************************************************************//**
356
Sets a record as the predefined minimum record. */
359
btr_set_min_rec_mark(
360
/*=================*/
361
rec_t* rec, /*!< in/out: record */
362
mtr_t* mtr); /*!< in: mtr */
363
#ifndef UNIV_HOTBACKUP
364
/*************************************************************//**
365
Deletes on the upper level the node pointer to a page. */
370
dict_index_t* index, /*!< in: index tree */
371
buf_block_t* block, /*!< in: page whose node pointer is deleted */
372
mtr_t* mtr); /*!< in: mtr */
374
/************************************************************//**
375
Checks that the node pointer to a page is appropriate.
381
dict_index_t* index, /*!< in: index tree */
382
buf_block_t* block, /*!< in: index page */
383
mtr_t* mtr); /*!< in: mtr */
384
#endif /* UNIV_DEBUG */
385
/*************************************************************//**
386
Tries to merge the page first to the left immediate brother if such a
387
brother exists, and the node pointers to the current page and to the
388
brother reside on the same page. If the left brother does not satisfy these
389
conditions, looks at the right brother. If the page is the only one on that
390
level lifts the records of the page to the father page, thus reducing the
391
tree height. It is assumed that mtr holds an x-latch on the tree and on the
392
page. If cursor is on the leaf level, mtr must also hold x-latches to
393
the brothers, if they exist.
394
@return TRUE on success */
399
btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift;
400
the page must not be empty: in record delete
401
use btr_discard_page if the page would become
403
mtr_t* mtr); /*!< in: mtr */
404
/*************************************************************//**
405
Discards a page from a B-tree. This is used to remove the last record from
406
a B-tree page: the whole page must be removed at the same time. This cannot
407
be used for the root page, which is allowed to be empty. */
412
btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
414
mtr_t* mtr); /*!< in: mtr */
415
#endif /* !UNIV_HOTBACKUP */
416
/****************************************************************//**
417
Parses the redo log record for setting an index record as the predefined
419
@return end of log record or NULL */
422
btr_parse_set_min_rec_mark(
423
/*=======================*/
424
byte* ptr, /*!< in: buffer */
425
byte* end_ptr,/*!< in: buffer end */
426
ulint comp, /*!< in: nonzero=compact page format */
427
page_t* page, /*!< in: page or NULL */
428
mtr_t* mtr); /*!< in: mtr or NULL */
429
/***********************************************************//**
430
Parses a redo log record of reorganizing a page.
431
@return end of log record or NULL */
434
btr_parse_page_reorganize(
435
/*======================*/
436
byte* ptr, /*!< in: buffer */
437
byte* end_ptr,/*!< in: buffer end */
438
dict_index_t* index, /*!< in: record descriptor */
439
buf_block_t* block, /*!< in: page to be reorganized, or NULL */
440
mtr_t* mtr); /*!< in: mtr or NULL */
441
#ifndef UNIV_HOTBACKUP
442
/**************************************************************//**
443
Gets the number of pages in a B-tree.
444
@return number of pages */
449
dict_index_t* index, /*!< in: index */
450
ulint flag); /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
451
/**************************************************************//**
452
Allocates a new file page to be used in an index tree. NOTE: we assume
453
that the caller has made the reservation for free extents!
454
@return new allocated block, x-latched; NULL if out of space */
459
dict_index_t* index, /*!< in: index tree */
460
ulint hint_page_no, /*!< in: hint of a good page */
461
byte file_direction, /*!< in: direction where a possible
462
page split is made */
463
ulint level, /*!< in: level where the page is placed
465
mtr_t* mtr); /*!< in: mtr */
466
/**************************************************************//**
467
Frees a file page used in an index tree. NOTE: cannot free field external
468
storage pages because the page must contain info on its level. */
473
dict_index_t* index, /*!< in: index tree */
474
buf_block_t* block, /*!< in: block to be freed, x-latched */
475
mtr_t* mtr); /*!< in: mtr */
476
/**************************************************************//**
477
Frees a file page used in an index tree. Can be used also to BLOB
478
external storage pages, because the page level 0 can be given as an
484
dict_index_t* index, /*!< in: index tree */
485
buf_block_t* block, /*!< in: block to be freed, x-latched */
486
ulint level, /*!< in: page level */
487
mtr_t* mtr); /*!< in: mtr */
488
#ifdef UNIV_BTR_PRINT
489
/*************************************************************//**
490
Prints size info of a B-tree. */
495
dict_index_t* index); /*!< in: index tree */
496
/**************************************************************//**
497
Prints directories and other info of all nodes in the index. */
502
dict_index_t* index, /*!< in: index */
503
ulint width); /*!< in: print this many entries from start
505
#endif /* UNIV_BTR_PRINT */
506
/************************************************************//**
507
Checks the size and number of fields in a record based on the definition of
509
@return TRUE if ok */
512
btr_index_rec_validate(
513
/*===================*/
514
const rec_t* rec, /*!< in: index record */
515
const dict_index_t* index, /*!< in: index */
516
ibool dump_on_error); /*!< in: TRUE if the function
517
should print hex dump of record
519
/**************************************************************//**
520
Checks the consistency of an index tree.
521
@return TRUE if ok */
526
dict_index_t* index, /*!< in: index */
527
trx_t* trx); /*!< in: transaction or NULL */
529
#define BTR_N_LEAF_PAGES 1
530
#define BTR_TOTAL_SIZE 2
531
#endif /* !UNIV_HOTBACKUP */
534
#include "btr0btr.ic"