~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/******************************************************
2
The B-tree
3
4
(c) 1994-1996 Innobase Oy
5
6
Created 6/2/1994 Heikki Tuuri
7
*******************************************************/
8
9
#ifndef btr0btr_h
10
#define btr0btr_h
11
12
#include "univ.i"
13
14
#include "dict0dict.h"
15
#include "data0data.h"
16
#include "page0cur.h"
17
#include "rem0rec.h"
18
#include "mtr0mtr.h"
19
#include "btr0types.h"
20
21
/* Maximum record size which can be stored on a page, without using the
22
special big record storage structure */
23
24
#define	BTR_PAGE_MAX_REC_SIZE	(UNIV_PAGE_SIZE / 2 - 200)
25
26
/* Maximum depth of a B-tree in InnoDB. Note that this isn't a maximum as
27
such; none of the tree operations avoid producing trees bigger than this. It
28
is instead a "max depth that other code must work with", useful for e.g.
29
fixed-size arrays that must store some information about each level in a
30
tree. In other words: if a B-tree with bigger depth than this is
31
encountered, it is not acceptable for it to lead to mysterious memory
32
corruption, but it is acceptable for the program to die with a clear assert
33
failure. */
34
#define BTR_MAX_LEVELS		100
35
36
/* Latching modes for btr_cur_search_to_nth_level(). */
37
#define BTR_SEARCH_LEAF		RW_S_LATCH
38
#define BTR_MODIFY_LEAF		RW_X_LATCH
39
#define BTR_NO_LATCHES		RW_NO_LATCH
40
#define	BTR_MODIFY_TREE		33
41
#define	BTR_CONT_MODIFY_TREE	34
42
#define	BTR_SEARCH_PREV		35
43
#define	BTR_MODIFY_PREV		36
44
45
/* If this is ORed to the latch mode, it means that the search tuple will be
46
inserted to the index, at the searched position */
47
#define BTR_INSERT		512
48
49
/* This flag ORed to latch mode says that we do the search in query
50
optimization */
51
#define BTR_ESTIMATE		1024
52
53
/* This flag ORed to latch mode says that we can ignore possible
54
UNIQUE definition on secondary indexes when we decide if we can use the
55
insert buffer to speed up inserts */
56
#define BTR_IGNORE_SEC_UNIQUE	2048
57
58
/******************************************************************
59
Gets the root node of a tree and x-latches it. */
60
61
page_t*
62
btr_root_get(
63
/*=========*/
64
				/* out: root page, x-latched */
65
	dict_index_t*	index,	/* in: index tree */
66
	mtr_t*		mtr);	/* in: mtr */
67
/******************************************************************
68
Gets a buffer page and declares its latching order level. */
69
UNIV_INLINE
70
page_t*
71
btr_page_get(
72
/*=========*/
73
	ulint	space,		/* in: space id */
74
	ulint	page_no,	/* in: page number */
75
	ulint	mode,		/* in: latch mode */
76
	mtr_t*	mtr);		/* in: mtr */
77
/******************************************************************
78
Gets the index id field of a page. */
79
UNIV_INLINE
80
dulint
81
btr_page_get_index_id(
82
/*==================*/
83
				/* out: index id */
84
	page_t*		page);	/* in: index page */
85
/************************************************************
86
Gets the node level field in an index page. */
87
UNIV_INLINE
88
ulint
89
btr_page_get_level_low(
90
/*===================*/
91
			/* out: level, leaf level == 0 */
92
	page_t*	page);	/* in: index page */
93
/************************************************************
94
Gets the node level field in an index page. */
95
UNIV_INLINE
96
ulint
97
btr_page_get_level(
98
/*===============*/
99
			/* out: level, leaf level == 0 */
100
	page_t*	page,	/* in: index page */
101
	mtr_t*	mtr);	/* in: mini-transaction handle */
102
/************************************************************
103
Gets the next index page number. */
104
UNIV_INLINE
105
ulint
106
btr_page_get_next(
107
/*==============*/
108
			/* out: next page number */
109
	page_t*	page,	/* in: index page */
110
	mtr_t*	mtr);	/* in: mini-transaction handle */
111
/************************************************************
112
Gets the previous index page number. */
113
UNIV_INLINE
114
ulint
115
btr_page_get_prev(
116
/*==============*/
117
			/* out: prev page number */
118
	page_t*	page,	/* in: index page */
119
	mtr_t*	mtr);	/* in: mini-transaction handle */
120
/*****************************************************************
121
Gets pointer to the previous user record in the tree. It is assumed
122
that the caller has appropriate latches on the page and its neighbor. */
123
124
rec_t*
125
btr_get_prev_user_rec(
126
/*==================*/
127
			/* out: previous user record, NULL if there is none */
128
	rec_t*	rec,	/* in: record on leaf level */
129
	mtr_t*	mtr);	/* in: mtr holding a latch on the page, and if
130
			needed, also to the previous page */
131
/*****************************************************************
132
Gets pointer to the next user record in the tree. It is assumed
133
that the caller has appropriate latches on the page and its neighbor. */
134
135
rec_t*
136
btr_get_next_user_rec(
137
/*==================*/
138
			/* out: next user record, NULL if there is none */
139
	rec_t*	rec,	/* in: record on leaf level */
140
	mtr_t*	mtr);	/* in: mtr holding a latch on the page, and if
141
			needed, also to the next page */
142
/******************************************************************
143
Releases the latch on a leaf page and bufferunfixes it. */
144
UNIV_INLINE
145
void
146
btr_leaf_page_release(
147
/*==================*/
148
	page_t*	page,		/* in: page */
149
	ulint	latch_mode,	/* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
150
	mtr_t*	mtr);		/* in: mtr */
151
/******************************************************************
152
Gets the child node file address in a node pointer. */
153
UNIV_INLINE
154
ulint
155
btr_node_ptr_get_child_page_no(
156
/*===========================*/
157
				/* out: child node address */
158
	rec_t*		rec,	/* in: node pointer record */
159
	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
160
/****************************************************************
161
Creates the root node for a new index tree. */
162
163
ulint
164
btr_create(
165
/*=======*/
166
			/* out: page number of the created root, FIL_NULL if
167
			did not succeed */
168
	ulint	type,	/* in: type of the index */
169
	ulint	space,	/* in: space where created */
170
	dulint	index_id,/* in: index id */
171
	ulint	comp,	/* in: nonzero=compact page format */
172
	mtr_t*	mtr);	/* in: mini-transaction handle */
173
/****************************************************************
174
Frees a B-tree except the root page, which MUST be freed after this
175
by calling btr_free_root. */
176
177
void
178
btr_free_but_not_root(
179
/*==================*/
180
	ulint	space,		/* in: space where created */
181
	ulint	root_page_no);	/* in: root page number */
182
/****************************************************************
183
Frees the B-tree root page. Other tree MUST already have been freed. */
184
185
void
186
btr_free_root(
187
/*==========*/
188
	ulint	space,		/* in: space where created */
189
	ulint	root_page_no,	/* in: root page number */
190
	mtr_t*	mtr);		/* in: a mini-transaction which has already
191
				been started */
192
/*****************************************************************
193
Makes tree one level higher by splitting the root, and inserts
194
the tuple. It is assumed that mtr contains an x-latch on the tree.
195
NOTE that the operation of this function must always succeed,
196
we cannot reverse it: therefore enough free disk space must be
197
guaranteed to be available before this function is called. */
198
199
rec_t*
200
btr_root_raise_and_insert(
201
/*======================*/
202
				/* out: inserted record */
203
	btr_cur_t*	cursor,	/* in: cursor at which to insert: must be
204
				on the root page; when the function returns,
205
				the cursor is positioned on the predecessor
206
				of the inserted record */
207
	dtuple_t*	tuple,	/* in: tuple to insert */
208
	mtr_t*		mtr);	/* in: mtr */
209
/*****************************************************************
210
Reorganizes an index page. */
211
212
void
213
btr_page_reorganize(
214
/*================*/
215
	page_t*		page,	/* in: page to be reorganized */
216
	dict_index_t*	index,	/* in: record descriptor */
217
	mtr_t*		mtr);	/* in: mtr */
218
/*****************************************************************
219
Decides if the page should be split at the convergence point of
220
inserts converging to left. */
221
222
ibool
223
btr_page_get_split_rec_to_left(
224
/*===========================*/
225
				/* out: TRUE if split recommended */
226
	btr_cur_t*	cursor,	/* in: cursor at which to insert */
227
	rec_t**		split_rec);/* out: if split recommended,
228
				the first record on upper half page,
229
				or NULL if tuple should be first */
230
/*****************************************************************
231
Decides if the page should be split at the convergence point of
232
inserts converging to right. */
233
234
ibool
235
btr_page_get_split_rec_to_right(
236
/*============================*/
237
				/* out: TRUE if split recommended */
238
	btr_cur_t*	cursor,	/* in: cursor at which to insert */
239
	rec_t**		split_rec);/* out: if split recommended,
240
				the first record on upper half page,
241
				or NULL if tuple should be first */
242
/*****************************************************************
243
Splits an index page to halves and inserts the tuple. It is assumed
244
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
245
is released within this function! NOTE that the operation of this
246
function must always succeed, we cannot reverse it: therefore
247
enough free disk space must be guaranteed to be available before
248
this function is called. */
249
250
rec_t*
251
btr_page_split_and_insert(
252
/*======================*/
253
				/* out: inserted record; NOTE: the tree
254
				x-latch is released! NOTE: 2 free disk
255
				pages must be available! */
256
	btr_cur_t*	cursor,	/* in: cursor at which to insert; when the
257
				function returns, the cursor is positioned
258
				on the predecessor of the inserted record */
259
	dtuple_t*	tuple,	/* in: tuple to insert */
260
	mtr_t*		mtr);	/* in: mtr */
261
/***********************************************************
262
Inserts a data tuple to a tree on a non-leaf level. It is assumed
263
that mtr holds an x-latch on the tree. */
264
265
void
266
btr_insert_on_non_leaf_level(
267
/*=========================*/
268
	dict_index_t*	index,	/* in: index */
269
	ulint		level,	/* in: level, must be > 0 */
270
	dtuple_t*	tuple,	/* in: the record to be inserted */
271
	mtr_t*		mtr);	/* in: mtr */
272
/********************************************************************
273
Sets a record as the predefined minimum record. */
274
275
void
276
btr_set_min_rec_mark(
277
/*=================*/
278
	rec_t*	rec,	/* in: record */
279
	ulint	comp,	/* in: nonzero=compact page format */
280
	mtr_t*	mtr);	/* in: mtr */
281
/*****************************************************************
282
Deletes on the upper level the node pointer to a page. */
283
284
void
285
btr_node_ptr_delete(
286
/*================*/
287
	dict_index_t*	index,	/* in: index tree */
288
	page_t*		page,	/* in: page whose node pointer is deleted */
289
	mtr_t*		mtr);	/* in: mtr */
290
#ifdef UNIV_DEBUG
291
/****************************************************************
292
Checks that the node pointer to a page is appropriate. */
293
294
ibool
295
btr_check_node_ptr(
296
/*===============*/
297
				/* out: TRUE */
298
	dict_index_t*	index,	/* in: index tree */
299
	page_t*		page,	/* in: index page */
300
	mtr_t*		mtr);	/* in: mtr */
301
#endif /* UNIV_DEBUG */
302
/*****************************************************************
303
Tries to merge the page first to the left immediate brother if such a
304
brother exists, and the node pointers to the current page and to the
305
brother reside on the same page. If the left brother does not satisfy these
306
conditions, looks at the right brother. If the page is the only one on that
307
level lifts the records of the page to the father page, thus reducing the
308
tree height. It is assumed that mtr holds an x-latch on the tree and on the
309
page. If cursor is on the leaf level, mtr must also hold x-latches to
310
the brothers, if they exist. NOTE: it is assumed that the caller has reserved
311
enough free extents so that the compression will always succeed if done! */
312
void
313
btr_compress(
314
/*=========*/
315
	btr_cur_t*	cursor,	/* in: cursor on the page to merge or lift;
316
				the page must not be empty: in record delete
317
				use btr_discard_page if the page would become
318
				empty */
319
	mtr_t*		mtr);	/* in: mtr */
320
/*****************************************************************
321
Discards a page from a B-tree. This is used to remove the last record from
322
a B-tree page: the whole page must be removed at the same time. This cannot
323
be used for the root page, which is allowed to be empty. */
324
325
void
326
btr_discard_page(
327
/*=============*/
328
	btr_cur_t*	cursor,	/* in: cursor on the page to discard: not on
329
				the root page */
330
	mtr_t*		mtr);	/* in: mtr */
331
/********************************************************************
332
Parses the redo log record for setting an index record as the predefined
333
minimum record. */
334
335
byte*
336
btr_parse_set_min_rec_mark(
337
/*=======================*/
338
			/* out: end of log record or NULL */
339
	byte*	ptr,	/* in: buffer */
340
	byte*	end_ptr,/* in: buffer end */
341
	ulint	comp,	/* in: nonzero=compact page format */
342
	page_t*	page,	/* in: page or NULL */
343
	mtr_t*	mtr);	/* in: mtr or NULL */
344
/***************************************************************
345
Parses a redo log record of reorganizing a page. */
346
347
byte*
348
btr_parse_page_reorganize(
349
/*======================*/
350
				/* out: end of log record or NULL */
351
	byte*		ptr,	/* in: buffer */
352
	byte*		end_ptr,/* in: buffer end */
353
	dict_index_t*	index,	/* in: record descriptor */
354
	page_t*		page,	/* in: page or NULL */
355
	mtr_t*		mtr);	/* in: mtr or NULL */
356
/******************************************************************
357
Gets the number of pages in a B-tree. */
358
359
ulint
360
btr_get_size(
361
/*=========*/
362
				/* out: number of pages */
363
	dict_index_t*	index,	/* in: index */
364
	ulint		flag);	/* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
365
/******************************************************************
366
Allocates a new file page to be used in an index tree. NOTE: we assume
367
that the caller has made the reservation for free extents! */
368
369
page_t*
370
btr_page_alloc(
371
/*===========*/
372
					/* out: new allocated page, x-latched;
373
					NULL if out of space */
374
	dict_index_t*	index,		/* in: index tree */
375
	ulint		hint_page_no,	/* in: hint of a good page */
376
	byte		file_direction,	/* in: direction where a possible
377
					page split is made */
378
	ulint		level,		/* in: level where the page is placed
379
					in the tree */
380
	mtr_t*		mtr);		/* in: mtr */
381
/******************************************************************
382
Frees a file page used in an index tree. NOTE: cannot free field external
383
storage pages because the page must contain info on its level. */
384
385
void
386
btr_page_free(
387
/*==========*/
388
	dict_index_t*	index,	/* in: index tree */
389
	page_t*		page,	/* in: page to be freed, x-latched */
390
	mtr_t*		mtr);	/* in: mtr */
391
/******************************************************************
392
Frees a file page used in an index tree. Can be used also to BLOB
393
external storage pages, because the page level 0 can be given as an
394
argument. */
395
396
void
397
btr_page_free_low(
398
/*==============*/
399
	dict_index_t*	index,	/* in: index tree */
400
	page_t*		page,	/* in: page to be freed, x-latched */
401
	ulint		level,	/* in: page level */
402
	mtr_t*		mtr);	/* in: mtr */
403
#ifdef UNIV_BTR_PRINT
404
/*****************************************************************
405
Prints size info of a B-tree. */
406
407
void
408
btr_print_size(
409
/*===========*/
410
	dict_index_t*	index);	/* in: index tree */
411
/******************************************************************
412
Prints directories and other info of all nodes in the index. */
413
414
void
415
btr_print_index(
416
/*============*/
417
	dict_index_t*	index,	/* in: index */
418
	ulint		width);	/* in: print this many entries from start
419
				and end */
420
#endif /* UNIV_BTR_PRINT */
421
/****************************************************************
422
Checks the size and number of fields in a record based on the definition of
423
the index. */
424
425
ibool
426
btr_index_rec_validate(
427
/*===================*/
428
					/* out: TRUE if ok */
429
	rec_t*		rec,		/* in: index record */
430
	dict_index_t*	index,		/* in: index */
431
	ibool		dump_on_error);	/* in: TRUE if the function
432
					should print hex dump of record
433
					and page on error */
434
/******************************************************************
435
Checks the consistency of an index tree. */
436
437
ibool
438
btr_validate_index(
439
/*===============*/
440
				/* out: TRUE if ok */
441
	dict_index_t*	index,	/* in: index */
442
	trx_t*		trx);	/* in: transaction or NULL */
443
444
#define BTR_N_LEAF_PAGES	1
445
#define BTR_TOTAL_SIZE		2
446
447
#ifndef UNIV_NONINL
448
#include "btr0btr.ic"
449
#endif
450
451
#endif