~drizzle-trunk/drizzle/development

641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1
/*****************************************************************************
2
3
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
4
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
8
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
Place, Suite 330, Boston, MA 02111-1307 USA
16
17
*****************************************************************************/
18
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
19
/******************************************************
20
The B-tree
21
22
Created 6/2/1994 Heikki Tuuri
23
*******************************************************/
24
25
#ifndef btr0btr_h
26
#define btr0btr_h
27
28
#include "univ.i"
29
30
#include "dict0dict.h"
31
#include "data0data.h"
32
#include "page0cur.h"
33
#include "mtr0mtr.h"
34
#include "btr0types.h"
35
36
/* Maximum record size which can be stored on a page, without using the
37
special big record storage structure */
38
39
#define	BTR_PAGE_MAX_REC_SIZE	(UNIV_PAGE_SIZE / 2 - 200)
40
41
/* Maximum depth of a B-tree in InnoDB. Note that this isn't a maximum as
42
such; none of the tree operations avoid producing trees bigger than this. It
43
is instead a "max depth that other code must work with", useful for e.g.
44
fixed-size arrays that must store some information about each level in a
45
tree. In other words: if a B-tree with bigger depth than this is
46
encountered, it is not acceptable for it to lead to mysterious memory
47
corruption, but it is acceptable for the program to die with a clear assert
48
failure. */
49
#define BTR_MAX_LEVELS		100
50
51
/* Latching modes for btr_cur_search_to_nth_level(). */
52
#define BTR_SEARCH_LEAF		RW_S_LATCH
53
#define BTR_MODIFY_LEAF		RW_X_LATCH
54
#define BTR_NO_LATCHES		RW_NO_LATCH
55
#define	BTR_MODIFY_TREE		33
56
#define	BTR_CONT_MODIFY_TREE	34
57
#define	BTR_SEARCH_PREV		35
58
#define	BTR_MODIFY_PREV		36
59
60
/* If this is ORed to the latch mode, it means that the search tuple will be
61
inserted to the index, at the searched position */
62
#define BTR_INSERT		512
63
64
/* This flag ORed to latch mode says that we do the search in query
65
optimization */
66
#define BTR_ESTIMATE		1024
67
68
/* This flag ORed to latch mode says that we can ignore possible
69
UNIQUE definition on secondary indexes when we decide if we can use the
70
insert buffer to speed up inserts */
71
#define BTR_IGNORE_SEC_UNIQUE	2048
72
73
/******************************************************************
74
Gets the root node of a tree and x-latches it. */
75
UNIV_INTERN
76
page_t*
77
btr_root_get(
78
/*=========*/
79
				/* out: root page, x-latched */
80
	dict_index_t*	index,	/* in: index tree */
81
	mtr_t*		mtr);	/* in: mtr */
82
/******************************************************************
83
Gets a buffer page and declares its latching order level. */
84
UNIV_INLINE
85
buf_block_t*
86
btr_block_get(
87
/*==========*/
88
	ulint	space,		/* in: space id */
89
	ulint	zip_size,	/* in: compressed page size in bytes
90
				or 0 for uncompressed pages */
91
	ulint	page_no,	/* in: page number */
92
	ulint	mode,		/* in: latch mode */
93
	mtr_t*	mtr);		/* in: mtr */
94
/******************************************************************
95
Gets a buffer page and declares its latching order level. */
96
UNIV_INLINE
97
page_t*
98
btr_page_get(
99
/*=========*/
100
	ulint	space,		/* in: space id */
101
	ulint	zip_size,	/* in: compressed page size in bytes
102
				or 0 for uncompressed pages */
103
	ulint	page_no,	/* in: page number */
104
	ulint	mode,		/* in: latch mode */
105
	mtr_t*	mtr);		/* in: mtr */
106
/******************************************************************
107
Gets the index id field of a page. */
108
UNIV_INLINE
109
dulint
110
btr_page_get_index_id(
111
/*==================*/
112
				/* out: index id */
113
	const page_t*	page);	/* in: index page */
114
/************************************************************
115
Gets the node level field in an index page. */
116
UNIV_INLINE
117
ulint
118
btr_page_get_level_low(
119
/*===================*/
120
				/* out: level, leaf level == 0 */
121
	const page_t*	page);	/* in: index page */
122
/************************************************************
123
Gets the node level field in an index page. */
124
UNIV_INLINE
125
ulint
126
btr_page_get_level(
127
/*===============*/
128
				/* out: level, leaf level == 0 */
129
	const page_t*	page,	/* in: index page */
130
	mtr_t*		mtr);	/* in: mini-transaction handle */
131
/************************************************************
132
Gets the next index page number. */
133
UNIV_INLINE
134
ulint
135
btr_page_get_next(
136
/*==============*/
137
				/* out: next page number */
138
	const page_t*	page,	/* in: index page */
139
	mtr_t*		mtr);	/* in: mini-transaction handle */
140
/************************************************************
141
Gets the previous index page number. */
142
UNIV_INLINE
143
ulint
144
btr_page_get_prev(
145
/*==============*/
146
				/* out: prev page number */
147
	const page_t*	page,	/* in: index page */
148
	mtr_t*		mtr);	/* in: mini-transaction handle */
149
/*****************************************************************
150
Gets pointer to the previous user record in the tree. It is assumed
151
that the caller has appropriate latches on the page and its neighbor. */
152
UNIV_INTERN
153
rec_t*
154
btr_get_prev_user_rec(
155
/*==================*/
156
			/* out: previous user record, NULL if there is none */
157
	rec_t*	rec,	/* in: record on leaf level */
158
	mtr_t*	mtr);	/* in: mtr holding a latch on the page, and if
159
			needed, also to the previous page */
160
/*****************************************************************
161
Gets pointer to the next user record in the tree. It is assumed
162
that the caller has appropriate latches on the page and its neighbor. */
163
UNIV_INTERN
164
rec_t*
165
btr_get_next_user_rec(
166
/*==================*/
167
			/* out: next user record, NULL if there is none */
168
	rec_t*	rec,	/* in: record on leaf level */
169
	mtr_t*	mtr);	/* in: mtr holding a latch on the page, and if
170
			needed, also to the next page */
171
/******************************************************************
172
Releases the latch on a leaf page and bufferunfixes it. */
173
UNIV_INLINE
174
void
175
btr_leaf_page_release(
176
/*==================*/
177
	buf_block_t*	block,		/* in: buffer block */
178
	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF or
179
					BTR_MODIFY_LEAF */
180
	mtr_t*		mtr);		/* in: mtr */
181
/******************************************************************
182
Gets the child node file address in a node pointer. */
183
UNIV_INLINE
184
ulint
185
btr_node_ptr_get_child_page_no(
186
/*===========================*/
187
				/* out: child node address */
188
	const rec_t*	rec,	/* in: node pointer record */
189
	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
190
/****************************************************************
191
Creates the root node for a new index tree. */
192
UNIV_INTERN
193
ulint
194
btr_create(
195
/*=======*/
196
				/* out: page number of the created root,
197
				FIL_NULL if did not succeed */
198
	ulint		type,	/* in: type of the index */
199
	ulint		space,	/* in: space where created */
200
	ulint		zip_size,/* in: compressed page size in bytes
201
				or 0 for uncompressed pages */
202
	dulint		index_id,/* in: index id */
203
	dict_index_t*	index,	/* in: index */
204
	mtr_t*		mtr);	/* in: mini-transaction handle */
205
/****************************************************************
206
Frees a B-tree except the root page, which MUST be freed after this
207
by calling btr_free_root. */
208
UNIV_INTERN
209
void
210
btr_free_but_not_root(
211
/*==================*/
212
	ulint	space,		/* in: space where created */
213
	ulint	zip_size,	/* in: compressed page size in bytes
214
				or 0 for uncompressed pages */
215
	ulint	root_page_no);	/* in: root page number */
216
/****************************************************************
217
Frees the B-tree root page. Other tree MUST already have been freed. */
218
UNIV_INTERN
219
void
220
btr_free_root(
221
/*==========*/
222
	ulint	space,		/* in: space where created */
223
	ulint	zip_size,	/* in: compressed page size in bytes
224
				or 0 for uncompressed pages */
225
	ulint	root_page_no,	/* in: root page number */
226
	mtr_t*	mtr);		/* in: a mini-transaction which has already
227
				been started */
228
/*****************************************************************
229
Makes tree one level higher by splitting the root, and inserts
230
the tuple. It is assumed that mtr contains an x-latch on the tree.
231
NOTE that the operation of this function must always succeed,
232
we cannot reverse it: therefore enough free disk space must be
233
guaranteed to be available before this function is called. */
234
UNIV_INTERN
235
rec_t*
236
btr_root_raise_and_insert(
237
/*======================*/
238
				/* out: inserted record */
239
	btr_cur_t*	cursor,	/* in: cursor at which to insert: must be
240
				on the root page; when the function returns,
241
				the cursor is positioned on the predecessor
242
				of the inserted record */
243
	const dtuple_t*	tuple,	/* in: tuple to insert */
244
	ulint		n_ext,	/* in: number of externally stored columns */
245
	mtr_t*		mtr);	/* in: mtr */
246
/*****************************************************************
247
Reorganizes an index page.
248
IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
249
page of a non-clustered index, the caller must update the insert
250
buffer free bits in the same mini-transaction in such a way that the
251
modification will be redo-logged. */
252
UNIV_INTERN
253
ibool
254
btr_page_reorganize(
255
/*================*/
256
				/* out: TRUE on success, FALSE on failure */
257
	buf_block_t*	block,	/* in: page to be reorganized */
258
	dict_index_t*	index,	/* in: record descriptor */
259
	mtr_t*		mtr);	/* in: mtr */
260
/*****************************************************************
261
Decides if the page should be split at the convergence point of
262
inserts converging to left. */
263
UNIV_INTERN
264
ibool
265
btr_page_get_split_rec_to_left(
266
/*===========================*/
267
				/* out: TRUE if split recommended */
268
	btr_cur_t*	cursor,	/* in: cursor at which to insert */
269
	rec_t**		split_rec);/* out: if split recommended,
270
				the first record on upper half page,
271
				or NULL if tuple should be first */
272
/*****************************************************************
273
Decides if the page should be split at the convergence point of
274
inserts converging to right. */
275
UNIV_INTERN
276
ibool
277
btr_page_get_split_rec_to_right(
278
/*============================*/
279
				/* out: TRUE if split recommended */
280
	btr_cur_t*	cursor,	/* in: cursor at which to insert */
281
	rec_t**		split_rec);/* out: if split recommended,
282
				the first record on upper half page,
283
				or NULL if tuple should be first */
284
/*****************************************************************
285
Splits an index page to halves and inserts the tuple. It is assumed
286
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
287
is released within this function! NOTE that the operation of this
288
function must always succeed, we cannot reverse it: therefore
289
enough free disk space must be guaranteed to be available before
290
this function is called. */
291
UNIV_INTERN
292
rec_t*
293
btr_page_split_and_insert(
294
/*======================*/
295
				/* out: inserted record; NOTE: the tree
296
				x-latch is released! NOTE: 2 free disk
297
				pages must be available! */
298
	btr_cur_t*	cursor,	/* in: cursor at which to insert; when the
299
				function returns, the cursor is positioned
300
				on the predecessor of the inserted record */
301
	const dtuple_t*	tuple,	/* in: tuple to insert */
302
	ulint		n_ext,	/* in: number of externally stored columns */
303
	mtr_t*		mtr);	/* in: mtr */
304
/***********************************************************
305
Inserts a data tuple to a tree on a non-leaf level. It is assumed
306
that mtr holds an x-latch on the tree. */
307
UNIV_INTERN
308
void
309
btr_insert_on_non_leaf_level(
310
/*=========================*/
311
	dict_index_t*	index,	/* in: index */
312
	ulint		level,	/* in: level, must be > 0 */
313
	dtuple_t*	tuple,	/* in: the record to be inserted */
314
	mtr_t*		mtr);	/* in: mtr */
315
/********************************************************************
316
Sets a record as the predefined minimum record. */
317
UNIV_INTERN
318
void
319
btr_set_min_rec_mark(
320
/*=================*/
321
	rec_t*	rec,	/* in/out: record */
322
	mtr_t*	mtr);	/* in: mtr */
323
/*****************************************************************
324
Deletes on the upper level the node pointer to a page. */
325
UNIV_INTERN
326
void
327
btr_node_ptr_delete(
328
/*================*/
329
	dict_index_t*	index,	/* in: index tree */
330
	buf_block_t*	block,	/* in: page whose node pointer is deleted */
331
	mtr_t*		mtr);	/* in: mtr */
332
#ifdef UNIV_DEBUG
333
/****************************************************************
334
Checks that the node pointer to a page is appropriate. */
335
UNIV_INTERN
336
ibool
337
btr_check_node_ptr(
338
/*===============*/
339
				/* out: TRUE */
340
	dict_index_t*	index,	/* in: index tree */
341
	buf_block_t*	block,	/* in: index page */
342
	mtr_t*		mtr);	/* in: mtr */
343
#endif /* UNIV_DEBUG */
344
/*****************************************************************
345
Tries to merge the page first to the left immediate brother if such a
346
brother exists, and the node pointers to the current page and to the
347
brother reside on the same page. If the left brother does not satisfy these
348
conditions, looks at the right brother. If the page is the only one on that
349
level lifts the records of the page to the father page, thus reducing the
350
tree height. It is assumed that mtr holds an x-latch on the tree and on the
351
page. If cursor is on the leaf level, mtr must also hold x-latches to
352
the brothers, if they exist. */
353
UNIV_INTERN
354
ibool
355
btr_compress(
356
/*=========*/
357
				/* out: TRUE on success */
358
	btr_cur_t*	cursor,	/* in: cursor on the page to merge or lift;
359
				the page must not be empty: in record delete
360
				use btr_discard_page if the page would become
361
				empty */
362
	mtr_t*		mtr);	/* in: mtr */
363
/*****************************************************************
364
Discards a page from a B-tree. This is used to remove the last record from
365
a B-tree page: the whole page must be removed at the same time. This cannot
366
be used for the root page, which is allowed to be empty. */
367
UNIV_INTERN
368
void
369
btr_discard_page(
370
/*=============*/
371
	btr_cur_t*	cursor,	/* in: cursor on the page to discard: not on
372
				the root page */
373
	mtr_t*		mtr);	/* in: mtr */
374
/********************************************************************
375
Parses the redo log record for setting an index record as the predefined
376
minimum record. */
377
UNIV_INTERN
378
byte*
379
btr_parse_set_min_rec_mark(
380
/*=======================*/
381
			/* out: end of log record or NULL */
382
	byte*	ptr,	/* in: buffer */
383
	byte*	end_ptr,/* in: buffer end */
384
	ulint	comp,	/* in: nonzero=compact page format */
385
	page_t*	page,	/* in: page or NULL */
386
	mtr_t*	mtr);	/* in: mtr or NULL */
387
/***************************************************************
388
Parses a redo log record of reorganizing a page. */
389
UNIV_INTERN
390
byte*
391
btr_parse_page_reorganize(
392
/*======================*/
393
				/* out: end of log record or NULL */
394
	byte*		ptr,	/* in: buffer */
395
	byte*		end_ptr,/* in: buffer end */
396
	dict_index_t*	index,	/* in: record descriptor */
397
	buf_block_t*	block,	/* in: page to be reorganized, or NULL */
398
	mtr_t*		mtr);	/* in: mtr or NULL */
399
/******************************************************************
400
Gets the number of pages in a B-tree. */
401
UNIV_INTERN
402
ulint
403
btr_get_size(
404
/*=========*/
405
				/* out: number of pages */
406
	dict_index_t*	index,	/* in: index */
407
	ulint		flag);	/* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
408
/******************************************************************
409
Allocates a new file page to be used in an index tree. NOTE: we assume
410
that the caller has made the reservation for free extents! */
411
UNIV_INTERN
412
buf_block_t*
413
btr_page_alloc(
414
/*===========*/
415
					/* out: new allocated block, x-latched;
416
					NULL if out of space */
417
	dict_index_t*	index,		/* in: index tree */
418
	ulint		hint_page_no,	/* in: hint of a good page */
419
	byte		file_direction,	/* in: direction where a possible
420
					page split is made */
421
	ulint		level,		/* in: level where the page is placed
422
					in the tree */
423
	mtr_t*		mtr);		/* in: mtr */
424
/******************************************************************
425
Frees a file page used in an index tree. NOTE: cannot free field external
426
storage pages because the page must contain info on its level. */
427
UNIV_INTERN
428
void
429
btr_page_free(
430
/*==========*/
431
	dict_index_t*	index,	/* in: index tree */
432
	buf_block_t*	block,	/* in: block to be freed, x-latched */
433
	mtr_t*		mtr);	/* in: mtr */
434
/******************************************************************
435
Frees a file page used in an index tree. Can be used also to BLOB
436
external storage pages, because the page level 0 can be given as an
437
argument. */
438
UNIV_INTERN
439
void
440
btr_page_free_low(
441
/*==============*/
442
	dict_index_t*	index,	/* in: index tree */
443
	buf_block_t*	block,	/* in: block to be freed, x-latched */
444
	ulint		level,	/* in: page level */
445
	mtr_t*		mtr);	/* in: mtr */
446
#ifdef UNIV_BTR_PRINT
447
/*****************************************************************
448
Prints size info of a B-tree. */
449
UNIV_INTERN
450
void
451
btr_print_size(
452
/*===========*/
453
	dict_index_t*	index);	/* in: index tree */
454
/******************************************************************
455
Prints directories and other info of all nodes in the index. */
456
UNIV_INTERN
457
void
458
btr_print_index(
459
/*============*/
460
	dict_index_t*	index,	/* in: index */
461
	ulint		width);	/* in: print this many entries from start
462
				and end */
463
#endif /* UNIV_BTR_PRINT */
464
/****************************************************************
465
Checks the size and number of fields in a record based on the definition of
466
the index. */
467
UNIV_INTERN
468
ibool
469
btr_index_rec_validate(
470
/*===================*/
471
						/* out: TRUE if ok */
472
	const rec_t*		rec,		/* in: index record */
473
	const dict_index_t*	index,		/* in: index */
474
	ibool			dump_on_error);	/* in: TRUE if the function
475
						should print hex dump of record
476
						and page on error */
477
/******************************************************************
478
Checks the consistency of an index tree. */
479
UNIV_INTERN
480
ibool
481
btr_validate_index(
482
/*===============*/
483
				/* out: TRUE if ok */
484
	dict_index_t*	index,	/* in: index */
485
	trx_t*		trx);	/* in: transaction or NULL */
486
487
#define BTR_N_LEAF_PAGES	1
488
#define BTR_TOTAL_SIZE		2
489
490
#ifndef UNIV_NONINL
491
#include "btr0btr.ic"
492
#endif
493
494
#endif