31
14
#include "dict0dict.h"
32
15
#include "data0data.h"
33
16
#include "page0cur.h"
34
18
#include "mtr0mtr.h"
35
19
#include "btr0types.h"
37
#ifndef UNIV_HOTBACKUP
38
/** Maximum record size which can be stored on a page, without using the
21
/* Maximum record size which can be stored on a page, without using the
39
22
special big record storage structure */
40
24
#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200)
42
/** @brief Maximum depth of a B-tree in InnoDB.
44
Note that this isn't a maximum as such; none of the tree operations
45
avoid producing trees bigger than this. It is instead a "max depth
46
that other code must work with", useful for e.g. fixed-size arrays
47
that must store some information about each level in a tree. In other
48
words: if a B-tree with bigger depth than this is encountered, it is
49
not acceptable for it to lead to mysterious memory corruption, but it
50
is acceptable for the program to die with a clear assert failure. */
26
/* Maximum depth of a B-tree in InnoDB. Note that this isn't a maximum as
27
such; none of the tree operations avoid producing trees bigger than this. It
28
is instead a "max depth that other code must work with", useful for e.g.
29
fixed-size arrays that must store some information about each level in a
30
tree. In other words: if a B-tree with bigger depth than this is
31
encountered, it is not acceptable for it to lead to mysterious memory
32
corruption, but it is acceptable for the program to die with a clear assert
51
34
#define BTR_MAX_LEVELS 100
53
/** Latching modes for btr_cur_search_to_nth_level(). */
55
/** Search a record on a leaf page and S-latch it. */
56
BTR_SEARCH_LEAF = RW_S_LATCH,
57
/** (Prepare to) modify a record on a leaf page and X-latch it. */
58
BTR_MODIFY_LEAF = RW_X_LATCH,
59
/** Obtain no latches. */
60
BTR_NO_LATCHES = RW_NO_LATCH,
61
/** Start modifying the entire B-tree. */
63
/** Continue modifying the entire B-tree. */
64
BTR_CONT_MODIFY_TREE = 34,
65
/** Search the previous record. */
67
/** Modify the previous record. */
36
/* Latching modes for btr_cur_search_to_nth_level(). */
37
#define BTR_SEARCH_LEAF RW_S_LATCH
38
#define BTR_MODIFY_LEAF RW_X_LATCH
39
#define BTR_NO_LATCHES RW_NO_LATCH
40
#define BTR_MODIFY_TREE 33
41
#define BTR_CONT_MODIFY_TREE 34
42
#define BTR_SEARCH_PREV 35
43
#define BTR_MODIFY_PREV 36
71
/** If this is ORed to btr_latch_mode, it means that the search tuple
72
will be inserted to the index, at the searched position */
45
/* If this is ORed to the latch mode, it means that the search tuple will be
46
inserted to the index, at the searched position */
73
47
#define BTR_INSERT 512
75
/** This flag ORed to btr_latch_mode says that we do the search in query
49
/* This flag ORed to latch mode says that we do the search in query
77
51
#define BTR_ESTIMATE 1024
79
/** This flag ORed to btr_latch_mode says that we can ignore possible
80
UNIQUE definition on secondary indexes when we decide if we can use
81
the insert buffer to speed up inserts */
53
/* This flag ORed to latch mode says that we can ignore possible
54
UNIQUE definition on secondary indexes when we decide if we can use the
55
insert buffer to speed up inserts */
82
56
#define BTR_IGNORE_SEC_UNIQUE 2048
84
/**************************************************************//**
85
Gets the root node of a tree and x-latches it.
86
@return root page, x-latched */
58
/******************************************************************
59
Gets the root node of a tree and x-latches it. */
91
dict_index_t* index, /*!< in: index tree */
92
mtr_t* mtr); /*!< in: mtr */
93
/**************************************************************//**
64
/* out: root page, x-latched */
65
dict_index_t* index, /* in: index tree */
66
mtr_t* mtr); /* in: mtr */
67
/******************************************************************
94
68
Gets a buffer page and declares its latching order level. */
99
ulint space, /*!< in: space id */
100
ulint zip_size, /*!< in: compressed page size in bytes
73
ulint space, /* in: space id */
74
ulint zip_size, /* in: compressed page size in bytes
101
75
or 0 for uncompressed pages */
102
ulint page_no, /*!< in: page number */
103
ulint mode, /*!< in: latch mode */
104
mtr_t* mtr); /*!< in: mtr */
105
/**************************************************************//**
76
ulint page_no, /* in: page number */
77
ulint mode, /* in: latch mode */
78
mtr_t* mtr); /* in: mtr */
79
/******************************************************************
106
80
Gets a buffer page and declares its latching order level. */
111
ulint space, /*!< in: space id */
112
ulint zip_size, /*!< in: compressed page size in bytes
85
ulint space, /* in: space id */
86
ulint zip_size, /* in: compressed page size in bytes
113
87
or 0 for uncompressed pages */
114
ulint page_no, /*!< in: page number */
115
ulint mode, /*!< in: latch mode */
116
mtr_t* mtr); /*!< in: mtr */
117
#endif /* !UNIV_HOTBACKUP */
118
/**************************************************************//**
119
Gets the index id field of a page.
88
ulint page_no, /* in: page number */
89
ulint mode, /* in: latch mode */
90
mtr_t* mtr); /* in: mtr */
91
/******************************************************************
92
Gets the index id field of a page. */
123
95
btr_page_get_index_id(
124
96
/*==================*/
125
const page_t* page); /*!< in: index page */
126
#ifndef UNIV_HOTBACKUP
127
/********************************************************//**
128
Gets the node level field in an index page.
129
@return level, leaf level == 0 */
98
const page_t* page); /* in: index page */
99
/************************************************************
100
Gets the node level field in an index page. */
132
103
btr_page_get_level_low(
133
104
/*===================*/
134
const page_t* page); /*!< in: index page */
135
/********************************************************//**
136
Gets the node level field in an index page.
137
@return level, leaf level == 0 */
105
/* out: level, leaf level == 0 */
106
const page_t* page); /* in: index page */
107
/************************************************************
108
Gets the node level field in an index page. */
140
111
btr_page_get_level(
141
112
/*===============*/
142
const page_t* page, /*!< in: index page */
143
mtr_t* mtr); /*!< in: mini-transaction handle */
144
/********************************************************//**
145
Gets the next index page number.
146
@return next page number */
113
/* out: level, leaf level == 0 */
114
const page_t* page, /* in: index page */
115
mtr_t* mtr); /* in: mini-transaction handle */
116
/************************************************************
117
Gets the next index page number. */
149
120
btr_page_get_next(
150
121
/*==============*/
151
const page_t* page, /*!< in: index page */
152
mtr_t* mtr); /*!< in: mini-transaction handle */
153
/********************************************************//**
154
Gets the previous index page number.
155
@return prev page number */
122
/* out: next page number */
123
const page_t* page, /* in: index page */
124
mtr_t* mtr); /* in: mini-transaction handle */
125
/************************************************************
126
Gets the previous index page number. */
158
129
btr_page_get_prev(
159
130
/*==============*/
160
const page_t* page, /*!< in: index page */
161
mtr_t* mtr); /*!< in: mini-transaction handle */
162
/*************************************************************//**
131
/* out: prev page number */
132
const page_t* page, /* in: index page */
133
mtr_t* mtr); /* in: mini-transaction handle */
134
/*****************************************************************
163
135
Gets pointer to the previous user record in the tree. It is assumed
164
that the caller has appropriate latches on the page and its neighbor.
165
@return previous user record, NULL if there is none */
136
that the caller has appropriate latches on the page and its neighbor. */
168
139
btr_get_prev_user_rec(
169
140
/*==================*/
170
rec_t* rec, /*!< in: record on leaf level */
171
mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
141
/* out: previous user record, NULL if there is none */
142
rec_t* rec, /* in: record on leaf level */
143
mtr_t* mtr); /* in: mtr holding a latch on the page, and if
172
144
needed, also to the previous page */
173
/*************************************************************//**
145
/*****************************************************************
174
146
Gets pointer to the next user record in the tree. It is assumed
175
that the caller has appropriate latches on the page and its neighbor.
176
@return next user record, NULL if there is none */
147
that the caller has appropriate latches on the page and its neighbor. */
179
150
btr_get_next_user_rec(
180
151
/*==================*/
181
rec_t* rec, /*!< in: record on leaf level */
182
mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
152
/* out: next user record, NULL if there is none */
153
rec_t* rec, /* in: record on leaf level */
154
mtr_t* mtr); /* in: mtr holding a latch on the page, and if
183
155
needed, also to the next page */
184
/**************************************************************//**
156
/******************************************************************
185
157
Releases the latch on a leaf page and bufferunfixes it. */
188
160
btr_leaf_page_release(
189
161
/*==================*/
190
buf_block_t* block, /*!< in: buffer block */
191
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
162
buf_block_t* block, /* in: buffer block */
163
ulint latch_mode, /* in: BTR_SEARCH_LEAF or
192
164
BTR_MODIFY_LEAF */
193
mtr_t* mtr); /*!< in: mtr */
194
/**************************************************************//**
195
Gets the child node file address in a node pointer.
196
@return child node address */
165
mtr_t* mtr); /* in: mtr */
166
/******************************************************************
167
Gets the child node file address in a node pointer. */
199
170
btr_node_ptr_get_child_page_no(
200
171
/*===========================*/
201
const rec_t* rec, /*!< in: node pointer record */
202
const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
203
/************************************************************//**
204
Creates the root node for a new index tree.
205
@return page number of the created root, FIL_NULL if did not succeed */
172
/* out: child node address */
173
const rec_t* rec, /* in: node pointer record */
174
const ulint* offsets);/* in: array returned by rec_get_offsets() */
175
/****************************************************************
176
Creates the root node for a new index tree. */
210
ulint type, /*!< in: type of the index */
211
ulint space, /*!< in: space where created */
212
ulint zip_size,/*!< in: compressed page size in bytes
181
/* out: page number of the created root,
182
FIL_NULL if did not succeed */
183
ulint type, /* in: type of the index */
184
ulint space, /* in: space where created */
185
ulint zip_size,/* in: compressed page size in bytes
213
186
or 0 for uncompressed pages */
214
dulint index_id,/*!< in: index id */
215
dict_index_t* index, /*!< in: index */
216
mtr_t* mtr); /*!< in: mini-transaction handle */
217
/************************************************************//**
187
dulint index_id,/* in: index id */
188
dict_index_t* index, /* in: index */
189
mtr_t* mtr); /* in: mini-transaction handle */
190
/****************************************************************
218
191
Frees a B-tree except the root page, which MUST be freed after this
219
192
by calling btr_free_root. */
222
195
btr_free_but_not_root(
223
196
/*==================*/
224
ulint space, /*!< in: space where created */
225
ulint zip_size, /*!< in: compressed page size in bytes
197
ulint space, /* in: space where created */
198
ulint zip_size, /* in: compressed page size in bytes
226
199
or 0 for uncompressed pages */
227
ulint root_page_no); /*!< in: root page number */
228
/************************************************************//**
200
ulint root_page_no); /* in: root page number */
201
/****************************************************************
229
202
Frees the B-tree root page. Other tree MUST already have been freed. */
234
ulint space, /*!< in: space where created */
235
ulint zip_size, /*!< in: compressed page size in bytes
207
ulint space, /* in: space where created */
208
ulint zip_size, /* in: compressed page size in bytes
236
209
or 0 for uncompressed pages */
237
ulint root_page_no, /*!< in: root page number */
238
mtr_t* mtr); /*!< in: a mini-transaction which has already
210
ulint root_page_no, /* in: root page number */
211
mtr_t* mtr); /* in: a mini-transaction which has already
240
/*************************************************************//**
213
/*****************************************************************
241
214
Makes tree one level higher by splitting the root, and inserts
242
215
the tuple. It is assumed that mtr contains an x-latch on the tree.
243
216
NOTE that the operation of this function must always succeed,
244
217
we cannot reverse it: therefore enough free disk space must be
245
guaranteed to be available before this function is called.
246
@return inserted record */
218
guaranteed to be available before this function is called. */
249
221
btr_root_raise_and_insert(
250
222
/*======================*/
251
btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
223
/* out: inserted record */
224
btr_cur_t* cursor, /* in: cursor at which to insert: must be
252
225
on the root page; when the function returns,
253
226
the cursor is positioned on the predecessor
254
227
of the inserted record */
255
const dtuple_t* tuple, /*!< in: tuple to insert */
256
ulint n_ext, /*!< in: number of externally stored columns */
257
mtr_t* mtr); /*!< in: mtr */
258
/*************************************************************//**
228
const dtuple_t* tuple, /* in: tuple to insert */
229
ulint n_ext, /* in: number of externally stored columns */
230
mtr_t* mtr); /* in: mtr */
231
/*****************************************************************
259
232
Reorganizes an index page.
260
233
IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
261
234
page of a non-clustered index, the caller must update the insert
262
235
buffer free bits in the same mini-transaction in such a way that the
263
modification will be redo-logged.
264
@return TRUE on success, FALSE on failure */
236
modification will be redo-logged. */
267
239
btr_page_reorganize(
268
240
/*================*/
269
buf_block_t* block, /*!< in: page to be reorganized */
270
dict_index_t* index, /*!< in: record descriptor */
271
mtr_t* mtr); /*!< in: mtr */
272
/*************************************************************//**
241
/* out: TRUE on success, FALSE on failure */
242
buf_block_t* block, /* in: page to be reorganized */
243
dict_index_t* index, /* in: record descriptor */
244
mtr_t* mtr); /* in: mtr */
245
/*****************************************************************
273
246
Decides if the page should be split at the convergence point of
274
inserts converging to left.
275
@return TRUE if split recommended */
247
inserts converging to left. */
278
250
btr_page_get_split_rec_to_left(
279
251
/*===========================*/
280
btr_cur_t* cursor, /*!< in: cursor at which to insert */
281
rec_t** split_rec);/*!< out: if split recommended,
252
/* out: TRUE if split recommended */
253
btr_cur_t* cursor, /* in: cursor at which to insert */
254
rec_t** split_rec);/* out: if split recommended,
282
255
the first record on upper half page,
283
256
or NULL if tuple should be first */
284
/*************************************************************//**
257
/*****************************************************************
285
258
Decides if the page should be split at the convergence point of
286
inserts converging to right.
287
@return TRUE if split recommended */
259
inserts converging to right. */
290
262
btr_page_get_split_rec_to_right(
291
263
/*============================*/
292
btr_cur_t* cursor, /*!< in: cursor at which to insert */
293
rec_t** split_rec);/*!< out: if split recommended,
264
/* out: TRUE if split recommended */
265
btr_cur_t* cursor, /* in: cursor at which to insert */
266
rec_t** split_rec);/* out: if split recommended,
294
267
the first record on upper half page,
295
268
or NULL if tuple should be first */
296
/*************************************************************//**
269
/*****************************************************************
297
270
Splits an index page to halves and inserts the tuple. It is assumed
298
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
299
released within this function! NOTE that the operation of this
300
function must always succeed, we cannot reverse it: therefore enough
301
free disk space (2 pages) must be guaranteed to be available before
302
this function is called.
304
@return inserted record */
271
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
272
is released within this function! NOTE that the operation of this
273
function must always succeed, we cannot reverse it: therefore
274
enough free disk space must be guaranteed to be available before
275
this function is called. */
307
278
btr_page_split_and_insert(
308
279
/*======================*/
309
btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
280
/* out: inserted record; NOTE: the tree
281
x-latch is released! NOTE: 2 free disk
282
pages must be available! */
283
btr_cur_t* cursor, /* in: cursor at which to insert; when the
310
284
function returns, the cursor is positioned
311
285
on the predecessor of the inserted record */
312
const dtuple_t* tuple, /*!< in: tuple to insert */
313
ulint n_ext, /*!< in: number of externally stored columns */
314
mtr_t* mtr); /*!< in: mtr */
315
/*******************************************************//**
286
const dtuple_t* tuple, /* in: tuple to insert */
287
ulint n_ext, /* in: number of externally stored columns */
288
mtr_t* mtr); /* in: mtr */
289
/***********************************************************
316
290
Inserts a data tuple to a tree on a non-leaf level. It is assumed
317
291
that mtr holds an x-latch on the tree. */
320
294
btr_insert_on_non_leaf_level(
321
295
/*=========================*/
322
dict_index_t* index, /*!< in: index */
323
ulint level, /*!< in: level, must be > 0 */
324
dtuple_t* tuple, /*!< in: the record to be inserted */
325
mtr_t* mtr); /*!< in: mtr */
326
#endif /* !UNIV_HOTBACKUP */
327
/****************************************************************//**
296
dict_index_t* index, /* in: index */
297
ulint level, /* in: level, must be > 0 */
298
dtuple_t* tuple, /* in: the record to be inserted */
299
mtr_t* mtr); /* in: mtr */
300
/********************************************************************
328
301
Sets a record as the predefined minimum record. */
331
304
btr_set_min_rec_mark(
332
305
/*=================*/
333
rec_t* rec, /*!< in/out: record */
334
mtr_t* mtr); /*!< in: mtr */
335
#ifndef UNIV_HOTBACKUP
336
/*************************************************************//**
306
rec_t* rec, /* in/out: record */
307
mtr_t* mtr); /* in: mtr */
308
/*****************************************************************
337
309
Deletes on the upper level the node pointer to a page. */
340
312
btr_node_ptr_delete(
341
313
/*================*/
342
dict_index_t* index, /*!< in: index tree */
343
buf_block_t* block, /*!< in: page whose node pointer is deleted */
344
mtr_t* mtr); /*!< in: mtr */
314
dict_index_t* index, /* in: index tree */
315
buf_block_t* block, /* in: page whose node pointer is deleted */
316
mtr_t* mtr); /* in: mtr */
345
317
#ifdef UNIV_DEBUG
346
/************************************************************//**
347
Checks that the node pointer to a page is appropriate.
318
/****************************************************************
319
Checks that the node pointer to a page is appropriate. */
351
322
btr_check_node_ptr(
352
323
/*===============*/
353
dict_index_t* index, /*!< in: index tree */
354
buf_block_t* block, /*!< in: index page */
355
mtr_t* mtr); /*!< in: mtr */
325
dict_index_t* index, /* in: index tree */
326
buf_block_t* block, /* in: index page */
327
mtr_t* mtr); /* in: mtr */
356
328
#endif /* UNIV_DEBUG */
357
/*************************************************************//**
329
/*****************************************************************
358
330
Tries to merge the page first to the left immediate brother if such a
359
331
brother exists, and the node pointers to the current page and to the
360
332
brother reside on the same page. If the left brother does not satisfy these
382
354
btr_discard_page(
383
355
/*=============*/
384
btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
356
btr_cur_t* cursor, /* in: cursor on the page to discard: not on
386
mtr_t* mtr); /*!< in: mtr */
387
#endif /* !UNIV_HOTBACKUP */
388
/****************************************************************//**
358
mtr_t* mtr); /* in: mtr */
359
/********************************************************************
389
360
Parses the redo log record for setting an index record as the predefined
391
@return end of log record or NULL */
394
364
btr_parse_set_min_rec_mark(
395
365
/*=======================*/
396
byte* ptr, /*!< in: buffer */
397
byte* end_ptr,/*!< in: buffer end */
398
ulint comp, /*!< in: nonzero=compact page format */
399
page_t* page, /*!< in: page or NULL */
400
mtr_t* mtr); /*!< in: mtr or NULL */
401
/***********************************************************//**
402
Parses a redo log record of reorganizing a page.
403
@return end of log record or NULL */
366
/* out: end of log record or NULL */
367
byte* ptr, /* in: buffer */
368
byte* end_ptr,/* in: buffer end */
369
ulint comp, /* in: nonzero=compact page format */
370
page_t* page, /* in: page or NULL */
371
mtr_t* mtr); /* in: mtr or NULL */
372
/***************************************************************
373
Parses a redo log record of reorganizing a page. */
406
376
btr_parse_page_reorganize(
407
377
/*======================*/
408
byte* ptr, /*!< in: buffer */
409
byte* end_ptr,/*!< in: buffer end */
410
dict_index_t* index, /*!< in: record descriptor */
411
buf_block_t* block, /*!< in: page to be reorganized, or NULL */
412
mtr_t* mtr); /*!< in: mtr or NULL */
413
#ifndef UNIV_HOTBACKUP
414
/**************************************************************//**
415
Gets the number of pages in a B-tree.
416
@return number of pages */
378
/* out: end of log record or NULL */
379
byte* ptr, /* in: buffer */
380
byte* end_ptr,/* in: buffer end */
381
dict_index_t* index, /* in: record descriptor */
382
buf_block_t* block, /* in: page to be reorganized, or NULL */
383
mtr_t* mtr); /* in: mtr or NULL */
384
/******************************************************************
385
Gets the number of pages in a B-tree. */
421
dict_index_t* index, /*!< in: index */
422
ulint flag); /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
423
/**************************************************************//**
390
/* out: number of pages */
391
dict_index_t* index, /* in: index */
392
ulint flag); /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
393
/******************************************************************
424
394
Allocates a new file page to be used in an index tree. NOTE: we assume
425
that the caller has made the reservation for free extents!
426
@return new allocated block, x-latched; NULL if out of space */
395
that the caller has made the reservation for free extents! */
431
dict_index_t* index, /*!< in: index tree */
432
ulint hint_page_no, /*!< in: hint of a good page */
433
byte file_direction, /*!< in: direction where a possible
400
/* out: new allocated block, x-latched;
401
NULL if out of space */
402
dict_index_t* index, /* in: index tree */
403
ulint hint_page_no, /* in: hint of a good page */
404
byte file_direction, /* in: direction where a possible
434
405
page split is made */
435
ulint level, /*!< in: level where the page is placed
406
ulint level, /* in: level where the page is placed
437
mtr_t* mtr); /*!< in: mtr */
438
/**************************************************************//**
408
mtr_t* mtr); /* in: mtr */
409
/******************************************************************
439
410
Frees a file page used in an index tree. NOTE: cannot free field external
440
411
storage pages because the page must contain info on its level. */
445
dict_index_t* index, /*!< in: index tree */
446
buf_block_t* block, /*!< in: block to be freed, x-latched */
447
mtr_t* mtr); /*!< in: mtr */
448
/**************************************************************//**
416
dict_index_t* index, /* in: index tree */
417
buf_block_t* block, /* in: block to be freed, x-latched */
418
mtr_t* mtr); /* in: mtr */
419
/******************************************************************
449
420
Frees a file page used in an index tree. Can be used also to BLOB
450
421
external storage pages, because the page level 0 can be given as an