~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/******************************************************
2
Index page routines
3
4
(c) 1994-1996 Innobase Oy
5
6
Created 2/2/1994 Heikki Tuuri
7
*******************************************************/
8
9
#define THIS_MODULE
10
#include "page0page.h"
11
#ifdef UNIV_NONINL
12
#include "page0page.ic"
13
#endif
14
#undef THIS_MODULE
15
16
#include "page0cur.h"
17
#include "lock0lock.h"
18
#include "fut0lst.h"
19
#include "btr0sea.h"
20
#include "buf0buf.h"
21
#include "srv0srv.h"
22
#include "btr0btr.h"
23
24
/*			THE INDEX PAGE
25
			==============
26
27
The index page consists of a page header which contains the page's
28
id and other information. On top of it are the the index records
29
in a heap linked into a one way linear list according to alphabetic order.
30
31
Just below page end is an array of pointers which we call page directory,
32
to about every sixth record in the list. The pointers are placed in
33
the directory in the alphabetical order of the records pointed to,
34
enabling us to make binary search using the array. Each slot n:o I
35
in the directory points to a record, where a 4-bit field contains a count
36
of those records which are in the linear list between pointer I and
37
the pointer I - 1 in the directory, including the record
38
pointed to by pointer I and not including the record pointed to by I - 1.
39
We say that the record pointed to by slot I, or that slot I, owns
40
these records. The count is always kept in the range 4 to 8, with
41
the exception that it is 1 for the first slot, and 1--8 for the second slot.
42
43
An essentially binary search can be performed in the list of index
44
records, like we could do if we had pointer to every record in the
45
page directory. The data structure is, however, more efficient when
46
we are doing inserts, because most inserts are just pushed on a heap.
47
Only every 8th insert requires block move in the directory pointer
48
table, which itself is quite small. A record is deleted from the page
49
by just taking it off the linear list and updating the number of owned
50
records-field of the record which owns it, and updating the page directory,
51
if necessary. A special case is the one when the record owns itself.
52
Because the overhead of inserts is so small, we may also increase the
53
page size from the projected default of 8 kB to 64 kB without too
54
much loss of efficiency in inserts. Bigger page becomes actual
55
when the disk transfer rate compared to seek and latency time rises.
56
On the present system, the page size is set so that the page transfer
57
time (3 ms) is 20 % of the disk random access time (15 ms).
58
59
When the page is split, merged, or becomes full but contains deleted
60
records, we have to reorganize the page.
61
62
Assuming a page size of 8 kB, a typical index page of a secondary
63
index contains 300 index entries, and the size of the page directory
64
is 50 x 4 bytes = 200 bytes. */
65
66
/*******************************************************************
67
Looks for the directory slot which owns the given record. */
68
69
ulint
70
page_dir_find_owner_slot(
71
/*=====================*/
72
			/* out: the directory slot number */
73
	rec_t*	rec)	/* in: the physical record */
74
{
75
	page_t*				page;
76
	register uint16			rec_offs_bytes;
77
	register page_dir_slot_t*	slot;
78
	register const page_dir_slot_t*	first_slot;
79
	register rec_t*			r = rec;
80
81
	ut_ad(page_rec_check(rec));
82
83
	page = buf_frame_align(rec);
84
	first_slot = page_dir_get_nth_slot(page, 0);
85
	slot = page_dir_get_nth_slot(page, page_dir_get_n_slots(page) - 1);
86
87
	if (page_is_comp(page)) {
88
		while (rec_get_n_owned(r, TRUE) == 0) {
89
			r = page + rec_get_next_offs(r, TRUE);
90
			ut_ad(r >= page + PAGE_NEW_SUPREMUM);
91
			ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
92
		}
93
	} else {
94
		while (rec_get_n_owned(r, FALSE) == 0) {
95
			r = page + rec_get_next_offs(r, FALSE);
96
			ut_ad(r >= page + PAGE_OLD_SUPREMUM);
97
			ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
98
		}
99
	}
100
101
	rec_offs_bytes = mach_encode_2(r - page);
102
103
	while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) {
104
105
		if (UNIV_UNLIKELY(slot == first_slot)) {
106
			fprintf(stderr,
107
				"InnoDB: Probable data corruption on"
108
				" page %lu\n"
109
				"InnoDB: Original record ",
110
				(ulong) buf_frame_get_page_no(page));
111
112
			if (page_is_comp(page)) {
113
				fputs("(compact record)", stderr);
114
			} else {
115
				rec_print_old(stderr, rec);
116
			}
117
118
			fputs("\n"
119
			      "InnoDB: on that page.\n"
120
			      "InnoDB: Cannot find the dir slot for record ",
121
			      stderr);
122
			if (page_is_comp(page)) {
123
				fputs("(compact record)", stderr);
124
			} else {
125
				rec_print_old(stderr, page
126
					      + mach_decode_2(rec_offs_bytes));
127
			}
128
			fputs("\n"
129
			      "InnoDB: on that page!\n", stderr);
130
131
			buf_page_print(page);
132
133
			ut_error;
134
		}
135
136
		slot += PAGE_DIR_SLOT_SIZE;
137
	}
138
139
	return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE);
140
}
141
142
/******************************************************************
143
Used to check the consistency of a directory slot. */
144
static
145
ibool
146
page_dir_slot_check(
147
/*================*/
148
					/* out: TRUE if succeed */
149
	page_dir_slot_t*	slot)	/* in: slot */
150
{
151
	page_t*	page;
152
	ulint	n_slots;
153
	ulint	n_owned;
154
155
	ut_a(slot);
156
157
	page = buf_frame_align(slot);
158
159
	n_slots = page_dir_get_n_slots(page);
160
161
	ut_a(slot <= page_dir_get_nth_slot(page, 0));
162
	ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1));
163
164
	ut_a(page_rec_check(page_dir_slot_get_rec(slot)));
165
166
	n_owned = rec_get_n_owned(page_dir_slot_get_rec(slot),
167
				  page_is_comp(page));
168
169
	if (slot == page_dir_get_nth_slot(page, 0)) {
170
		ut_a(n_owned == 1);
171
	} else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) {
172
		ut_a(n_owned >= 1);
173
		ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
174
	} else {
175
		ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED);
176
		ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
177
	}
178
179
	return(TRUE);
180
}
181
182
/*****************************************************************
183
Sets the max trx id field value. */
184
185
void
186
page_set_max_trx_id(
187
/*================*/
188
	page_t*	page,	/* in: page */
189
	dulint	trx_id)	/* in: transaction id */
190
{
191
	buf_block_t*	block;
192
193
	ut_ad(page);
194
195
	block = buf_block_align(page);
196
197
	if (block->is_hashed) {
198
		rw_lock_x_lock(&btr_search_latch);
199
	}
200
201
	/* It is not necessary to write this change to the redo log, as
202
	during a database recovery we assume that the max trx id of every
203
	page is the maximum trx id assigned before the crash. */
204
205
	mach_write_to_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID, trx_id);
206
207
	if (block->is_hashed) {
208
		rw_lock_x_unlock(&btr_search_latch);
209
	}
210
}
211
212
/*****************************************************************
213
Calculates free space if a page is emptied. */
214
215
ulint
216
page_get_free_space_of_empty_noninline(
217
/*===================================*/
218
			/* out: free space */
219
	ulint	comp)	/* in: nonzero=compact page format */
220
{
221
	return(page_get_free_space_of_empty(comp));
222
}
223
224
/****************************************************************
225
Allocates a block of memory from an index page. */
226
227
byte*
228
page_mem_alloc(
229
/*===========*/
230
				/* out: pointer to start of allocated
231
				buffer, or NULL if allocation fails */
232
	page_t*		page,	/* in: index page */
233
	ulint		need,	/* in: number of bytes needed */
234
	dict_index_t*	index,	/* in: record descriptor */
235
	ulint*		heap_no)/* out: this contains the heap number
236
				of the allocated record
237
				if allocation succeeds */
238
{
239
	rec_t*	rec;
240
	byte*	block;
241
	ulint	avl_space;
242
	ulint	garbage;
243
244
	ut_ad(page && heap_no);
245
246
	/* If there are records in the free list, look if the first is
247
	big enough */
248
249
	rec = page_header_get_ptr(page, PAGE_FREE);
250
251
	if (rec) {
252
		mem_heap_t*	heap		= NULL;
253
		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
254
		ulint*		offsets		= offsets_;
255
		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
256
257
		offsets = rec_get_offsets(rec, index, offsets,
258
					  ULINT_UNDEFINED, &heap);
259
260
		if (rec_offs_size(offsets) >= need) {
261
			page_header_set_ptr(page, PAGE_FREE,
262
					    page_rec_get_next(rec));
263
264
			garbage = page_header_get_field(page, PAGE_GARBAGE);
265
			ut_ad(garbage >= need);
266
267
			page_header_set_field(page, PAGE_GARBAGE,
268
					      garbage - need);
269
270
			*heap_no = rec_get_heap_no(rec, page_is_comp(page));
271
272
			block = rec_get_start(rec, offsets);
273
			if (UNIV_LIKELY_NULL(heap)) {
274
				mem_heap_free(heap);
275
			}
276
			return(block);
277
		}
278
279
		if (UNIV_LIKELY_NULL(heap)) {
280
			mem_heap_free(heap);
281
		}
282
	}
283
284
	/* Could not find space from the free list, try top of heap */
285
286
	avl_space = page_get_max_insert_size(page, 1);
287
288
	if (avl_space >= need) {
289
		block = page_header_get_ptr(page, PAGE_HEAP_TOP);
290
291
		page_header_set_ptr(page, PAGE_HEAP_TOP, block + need);
292
		*heap_no = page_dir_get_n_heap(page);
293
294
		page_dir_set_n_heap(page, 1 + *heap_no);
295
296
		return(block);
297
	}
298
299
	return(NULL);
300
}
301
302
/**************************************************************
303
Writes a log record of page creation. */
304
UNIV_INLINE
305
void
306
page_create_write_log(
307
/*==================*/
308
	buf_frame_t*	frame,	/* in: a buffer frame where the page is
309
				created */
310
	mtr_t*		mtr,	/* in: mini-transaction handle */
311
	ulint		comp)	/* in: nonzero=compact page format */
312
{
313
	mlog_write_initial_log_record(frame, comp
314
				      ? MLOG_COMP_PAGE_CREATE
315
				      : MLOG_PAGE_CREATE, mtr);
316
}
317
318
/***************************************************************
319
Parses a redo log record of creating a page. */
320
321
byte*
322
page_parse_create(
323
/*==============*/
324
			/* out: end of log record or NULL */
325
	byte*	ptr,	/* in: buffer */
326
	byte*	end_ptr __attribute__((unused)), /* in: buffer end */
327
	ulint	comp,	/* in: nonzero=compact page format */
328
	page_t*	page,	/* in: page or NULL */
329
	mtr_t*	mtr)	/* in: mtr or NULL */
330
{
331
	ut_ad(ptr && end_ptr);
332
333
	/* The record is empty, except for the record initial part */
334
335
	if (page) {
336
		page_create(page, mtr, comp);
337
	}
338
339
	return(ptr);
340
}
341
342
/**************************************************************
343
The index page creation function. */
344
345
page_t*
346
page_create(
347
/*========*/
348
				/* out: pointer to the page */
349
	buf_frame_t*	frame,	/* in: a buffer frame where the page is
350
				created */
351
	mtr_t*		mtr,	/* in: mini-transaction handle */
352
	ulint		comp)	/* in: nonzero=compact page format */
353
{
354
	page_dir_slot_t* slot;
355
	mem_heap_t*	heap;
356
	dtuple_t*	tuple;
357
	dfield_t*	field;
358
	byte*		heap_top;
359
	rec_t*		infimum_rec;
360
	rec_t*		supremum_rec;
361
	page_t*		page;
362
	dict_index_t*	index;
363
	ulint*		offsets;
364
365
	index = comp ? srv_sys->dummy_ind2 : srv_sys->dummy_ind1;
366
367
	ut_ad(frame && mtr);
368
#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA
369
# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA"
370
#endif
371
#if PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA
372
# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA"
373
#endif
374
375
	/* 1. INCREMENT MODIFY CLOCK */
376
	buf_frame_modify_clock_inc(frame);
377
378
	/* 2. WRITE LOG INFORMATION */
379
	page_create_write_log(frame, mtr, comp);
380
381
	page = frame;
382
383
	fil_page_set_type(page, FIL_PAGE_INDEX);
384
385
	heap = mem_heap_create(200);
386
387
	/* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */
388
389
	/* Create first a data tuple for infimum record */
390
	tuple = dtuple_create(heap, 1);
391
	dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM);
392
	field = dtuple_get_nth_field(tuple, 0);
393
394
	dfield_set_data(field, "infimum", 8);
395
	dtype_set(dfield_get_type(field),
396
		  DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8);
397
	/* Set the corresponding physical record to its place in the page
398
	record heap */
399
400
	heap_top = page + PAGE_DATA;
401
402
	infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple);
403
404
	ut_a(infimum_rec == page
405
	     + (comp ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
406
407
	rec_set_n_owned(infimum_rec, comp, 1);
408
	rec_set_heap_no(infimum_rec, comp, 0);
409
	offsets = rec_get_offsets(infimum_rec, index, NULL,
410
				  ULINT_UNDEFINED, &heap);
411
412
	heap_top = rec_get_end(infimum_rec, offsets);
413
414
	/* Create then a tuple for supremum */
415
416
	tuple = dtuple_create(heap, 1);
417
	dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM);
418
	field = dtuple_get_nth_field(tuple, 0);
419
420
	dfield_set_data(field, "supremum", comp ? 8 : 9);
421
	dtype_set(dfield_get_type(field),
422
		  DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9);
423
424
	supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple);
425
426
	ut_a(supremum_rec == page
427
	     + (comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM));
428
429
	rec_set_n_owned(supremum_rec, comp, 1);
430
	rec_set_heap_no(supremum_rec, comp, 1);
431
432
	offsets = rec_get_offsets(supremum_rec, index, offsets,
433
				  ULINT_UNDEFINED, &heap);
434
	heap_top = rec_get_end(supremum_rec, offsets);
435
436
	ut_ad(heap_top == page
437
	      + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END));
438
439
	mem_heap_free(heap);
440
441
	/* 4. INITIALIZE THE PAGE */
442
443
	page_header_set_field(page, PAGE_N_DIR_SLOTS, 2);
444
	page_header_set_ptr(page, PAGE_HEAP_TOP, heap_top);
445
	page_header_set_field(page, PAGE_N_HEAP, comp ? 0x8002 : 2);
446
	page_header_set_ptr(page, PAGE_FREE, NULL);
447
	page_header_set_field(page, PAGE_GARBAGE, 0);
448
	page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
449
	page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
450
	page_header_set_field(page, PAGE_N_DIRECTION, 0);
451
	page_header_set_field(page, PAGE_N_RECS, 0);
452
	page_set_max_trx_id(page, ut_dulint_zero);
453
	memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START
454
	       - (heap_top - page));
455
456
	/* 5. SET POINTERS IN RECORDS AND DIR SLOTS */
457
458
	/* Set the slots to point to infimum and supremum. */
459
460
	slot = page_dir_get_nth_slot(page, 0);
461
	page_dir_slot_set_rec(slot, infimum_rec);
462
463
	slot = page_dir_get_nth_slot(page, 1);
464
	page_dir_slot_set_rec(slot, supremum_rec);
465
466
	/* Set the next pointers in infimum and supremum */
467
468
	rec_set_next_offs(infimum_rec, comp, (ulint)(supremum_rec - page));
469
	rec_set_next_offs(supremum_rec, comp, 0);
470
471
	return(page);
472
}
473
474
/*****************************************************************
475
Differs from page_copy_rec_list_end, because this function does not
476
touch the lock table and max trx id on page. */
477
478
void
479
page_copy_rec_list_end_no_locks(
480
/*============================*/
481
	page_t*		new_page,	/* in: index page to copy to */
482
	page_t*		page,		/* in: index page */
483
	rec_t*		rec,		/* in: record on page */
484
	dict_index_t*	index,		/* in: record descriptor */
485
	mtr_t*		mtr)		/* in: mtr */
486
{
487
	page_cur_t	cur1;
488
	page_cur_t	cur2;
489
	rec_t*		sup;
490
	mem_heap_t*	heap		= NULL;
491
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
492
	ulint*		offsets		= offsets_;
493
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
494
495
	page_cur_position(rec, &cur1);
496
497
	if (page_cur_is_before_first(&cur1)) {
498
499
		page_cur_move_to_next(&cur1);
500
	}
501
502
	ut_a((ibool)!!page_is_comp(new_page)
503
	     == dict_table_is_comp(index->table));
504
	ut_a(page_is_comp(new_page) == page_is_comp(page));
505
	ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
506
	     (page_is_comp(new_page)
507
	      ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
508
509
	page_cur_set_before_first(new_page, &cur2);
510
511
	/* Copy records from the original page to the new page */
512
513
	sup = page_get_supremum_rec(page);
514
515
	for (;;) {
516
		rec_t*	cur1_rec = page_cur_get_rec(&cur1);
517
		if (cur1_rec == sup) {
518
			break;
519
		}
520
		offsets = rec_get_offsets(cur1_rec, index, offsets,
521
					  ULINT_UNDEFINED, &heap);
522
		if (UNIV_UNLIKELY(!page_cur_rec_insert(&cur2, cur1_rec, index,
523
						       offsets, mtr))) {
524
			/* Track an assertion failure reported on the mailing
525
			list on June 18th, 2003 */
526
527
			buf_page_print(new_page);
528
			buf_page_print(page);
529
			ut_print_timestamp(stderr);
530
531
			fprintf(stderr,
532
				"InnoDB: rec offset %lu, cur1 offset %lu,"
533
				" cur2 offset %lu\n",
534
				(ulong)(rec - page),
535
				(ulong)(page_cur_get_rec(&cur1) - page),
536
				(ulong)(page_cur_get_rec(&cur2) - new_page));
537
538
			ut_error;
539
		}
540
541
		page_cur_move_to_next(&cur1);
542
		page_cur_move_to_next(&cur2);
543
	}
544
545
	if (UNIV_LIKELY_NULL(heap)) {
546
		mem_heap_free(heap);
547
	}
548
}
549
550
/*****************************************************************
551
Copies records from page to new_page, from a given record onward,
552
including that record. Infimum and supremum records are not copied.
553
The records are copied to the start of the record list on new_page. */
554
555
void
556
page_copy_rec_list_end(
557
/*===================*/
558
	page_t*		new_page,	/* in: index page to copy to */
559
	page_t*		page,		/* in: index page */
560
	rec_t*		rec,		/* in: record on page */
561
	dict_index_t*	index,		/* in: record descriptor */
562
	mtr_t*		mtr)		/* in: mtr */
563
{
564
	if (page_dir_get_n_heap(new_page) == 2) {
565
		page_copy_rec_list_end_to_created_page(new_page, page, rec,
566
						       index, mtr);
567
	} else {
568
		page_copy_rec_list_end_no_locks(new_page, page, rec,
569
						index, mtr);
570
	}
571
572
	/* Update the lock table, MAX_TRX_ID, and possible hash index */
573
574
	lock_move_rec_list_end(new_page, page, rec);
575
576
	page_update_max_trx_id(new_page, page_get_max_trx_id(page));
577
578
	btr_search_move_or_delete_hash_entries(new_page, page, index);
579
}
580
581
/*****************************************************************
582
Copies records from page to new_page, up to the given record,
583
NOT including that record. Infimum and supremum records are not copied.
584
The records are copied to the end of the record list on new_page. */
585
586
void
587
page_copy_rec_list_start(
588
/*=====================*/
589
	page_t*		new_page,	/* in: index page to copy to */
590
	page_t*		page,		/* in: index page */
591
	rec_t*		rec,		/* in: record on page */
592
	dict_index_t*	index,		/* in: record descriptor */
593
	mtr_t*		mtr)		/* in: mtr */
594
{
595
	page_cur_t	cur1;
596
	page_cur_t	cur2;
597
	rec_t*		old_end;
598
	mem_heap_t*	heap		= NULL;
599
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
600
	ulint*		offsets		= offsets_;
601
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
602
603
	page_cur_set_before_first(page, &cur1);
604
605
	if (rec == page_cur_get_rec(&cur1)) {
606
607
		return;
608
	}
609
610
	page_cur_move_to_next(&cur1);
611
612
	page_cur_set_after_last(new_page, &cur2);
613
	page_cur_move_to_prev(&cur2);
614
	old_end = page_cur_get_rec(&cur2);
615
616
	/* Copy records from the original page to the new page */
617
618
	while (page_cur_get_rec(&cur1) != rec) {
619
		rec_t*	ins_rec;
620
		rec_t*	cur1_rec = page_cur_get_rec(&cur1);
621
		offsets = rec_get_offsets(cur1_rec, index, offsets,
622
					  ULINT_UNDEFINED, &heap);
623
		ins_rec = page_cur_rec_insert(&cur2, cur1_rec, index,
624
					      offsets, mtr);
625
		ut_a(ins_rec);
626
627
		page_cur_move_to_next(&cur1);
628
		page_cur_move_to_next(&cur2);
629
	}
630
631
	/* Update the lock table, MAX_TRX_ID, and possible hash index */
632
633
	lock_move_rec_list_start(new_page, page, rec, old_end);
634
635
	page_update_max_trx_id(new_page, page_get_max_trx_id(page));
636
637
	btr_search_move_or_delete_hash_entries(new_page, page, index);
638
639
	if (UNIV_LIKELY_NULL(heap)) {
640
		mem_heap_free(heap);
641
	}
642
}
643
644
/**************************************************************
645
Writes a log record of a record list end or start deletion. */
646
UNIV_INLINE
647
void
648
page_delete_rec_list_write_log(
649
/*===========================*/
650
	rec_t*		rec,	/* in: record on page */
651
	dict_index_t*	index,	/* in: record descriptor */
652
	byte		type,	/* in: operation type:
653
				MLOG_LIST_END_DELETE, ... */
654
	mtr_t*		mtr)	/* in: mtr */
655
{
656
	byte*	log_ptr;
657
	ut_ad(type == MLOG_LIST_END_DELETE
658
	      || type == MLOG_LIST_START_DELETE
659
	      || type == MLOG_COMP_LIST_END_DELETE
660
	      || type == MLOG_COMP_LIST_START_DELETE);
661
662
	log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2);
663
	if (log_ptr) {
664
		/* Write the parameter as a 2-byte ulint */
665
		mach_write_to_2(log_ptr, page_offset(rec));
666
		mlog_close(mtr, log_ptr + 2);
667
	}
668
}
669
670
/**************************************************************
671
Parses a log record of a record list end or start deletion. */
672
673
byte*
674
page_parse_delete_rec_list(
675
/*=======================*/
676
				/* out: end of log record or NULL */
677
	byte		type,	/* in: MLOG_LIST_END_DELETE,
678
				MLOG_LIST_START_DELETE,
679
				MLOG_COMP_LIST_END_DELETE or
680
				MLOG_COMP_LIST_START_DELETE */
681
	byte*		ptr,	/* in: buffer */
682
	byte*		end_ptr,/* in: buffer end */
683
	dict_index_t*	index,	/* in: record descriptor */
684
	page_t*		page,	/* in: page or NULL */
685
	mtr_t*		mtr)	/* in: mtr or NULL */
686
{
687
	ulint	offset;
688
689
	ut_ad(type == MLOG_LIST_END_DELETE
690
	      || type == MLOG_LIST_START_DELETE
691
	      || type == MLOG_COMP_LIST_END_DELETE
692
	      || type == MLOG_COMP_LIST_START_DELETE);
693
694
	/* Read the record offset as a 2-byte ulint */
695
696
	if (end_ptr < ptr + 2) {
697
698
		return(NULL);
699
	}
700
701
	offset = mach_read_from_2(ptr);
702
	ptr += 2;
703
704
	if (!page) {
705
706
		return(ptr);
707
	}
708
709
	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
710
711
	if (type == MLOG_LIST_END_DELETE
712
	    || type == MLOG_COMP_LIST_END_DELETE) {
713
		page_delete_rec_list_end(page, page + offset, index,
714
					 ULINT_UNDEFINED,
715
					 ULINT_UNDEFINED, mtr);
716
	} else {
717
		page_delete_rec_list_start(page, page + offset, index, mtr);
718
	}
719
720
	return(ptr);
721
}
722
723
/*****************************************************************
724
Deletes records from a page from a given record onward, including that record.
725
The infimum and supremum records are not deleted. */
726
727
void
728
page_delete_rec_list_end(
729
/*=====================*/
730
	page_t*		page,	/* in: index page */
731
	rec_t*		rec,	/* in: record on page */
732
	dict_index_t*	index,	/* in: record descriptor */
733
	ulint		n_recs,	/* in: number of records to delete,
734
				or ULINT_UNDEFINED if not known */
735
	ulint		size,	/* in: the sum of the sizes of the
736
				records in the end of the chain to
737
				delete, or ULINT_UNDEFINED if not known */
738
	mtr_t*		mtr)	/* in: mtr */
739
{
740
	page_dir_slot_t* slot;
741
	ulint	slot_index;
742
	rec_t*	last_rec;
743
	rec_t*	prev_rec;
744
	rec_t*	free;
745
	rec_t*	rec2;
746
	ulint	count;
747
	ulint	n_owned;
748
	rec_t*	sup;
749
	ulint	comp;
750
751
	/* Reset the last insert info in the page header and increment
752
	the modify clock for the frame */
753
754
	ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
755
	page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
756
757
	/* The page gets invalid for optimistic searches: increment the
758
	frame modify clock */
759
760
	buf_frame_modify_clock_inc(page);
761
762
	sup = page_get_supremum_rec(page);
763
764
	comp = page_is_comp(page);
765
	if (page_rec_is_infimum_low(rec - page)) {
766
		rec = page_rec_get_next(rec);
767
	}
768
769
	page_delete_rec_list_write_log(rec, index, comp
770
				       ? MLOG_COMP_LIST_END_DELETE
771
				       : MLOG_LIST_END_DELETE, mtr);
772
773
	if (rec == sup) {
774
775
		return;
776
	}
777
778
	prev_rec = page_rec_get_prev(rec);
779
780
	last_rec = page_rec_get_prev(sup);
781
782
	if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) {
783
		mem_heap_t*	heap		= NULL;
784
		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
785
		ulint*		offsets		= offsets_;
786
		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
787
		/* Calculate the sum of sizes and the number of records */
788
		size = 0;
789
		n_recs = 0;
790
		rec2 = rec;
791
792
		while (rec2 != sup) {
793
			ulint	s;
794
			offsets = rec_get_offsets(rec2, index, offsets,
795
						  ULINT_UNDEFINED, &heap);
796
			s = rec_offs_size(offsets);
797
			ut_ad(rec2 - page + s - rec_offs_extra_size(offsets)
798
			      < UNIV_PAGE_SIZE);
799
			ut_ad(size + s < UNIV_PAGE_SIZE);
800
			size += s;
801
			n_recs++;
802
803
			rec2 = page_rec_get_next(rec2);
804
		}
805
806
		if (UNIV_LIKELY_NULL(heap)) {
807
			mem_heap_free(heap);
808
		}
809
	}
810
811
	ut_ad(size < UNIV_PAGE_SIZE);
812
813
	/* Update the page directory; there is no need to balance the number
814
	of the records owned by the supremum record, as it is allowed to be
815
	less than PAGE_DIR_SLOT_MIN_N_OWNED */
816
817
	rec2 = rec;
818
	count = 0;
819
820
	while (rec_get_n_owned(rec2, comp) == 0) {
821
		count++;
822
823
		rec2 = page_rec_get_next(rec2);
824
	}
825
826
	ut_ad(rec_get_n_owned(rec2, comp) - count > 0);
827
828
	n_owned = rec_get_n_owned(rec2, comp) - count;
829
830
	slot_index = page_dir_find_owner_slot(rec2);
831
	slot = page_dir_get_nth_slot(page, slot_index);
832
833
	page_dir_slot_set_rec(slot, sup);
834
	page_dir_slot_set_n_owned(slot, n_owned);
835
836
	page_dir_set_n_slots(page, slot_index + 1);
837
838
	/* Remove the record chain segment from the record chain */
839
	page_rec_set_next(prev_rec, page_get_supremum_rec(page));
840
841
	/* Catenate the deleted chain segment to the page free list */
842
843
	free = page_header_get_ptr(page, PAGE_FREE);
844
845
	page_rec_set_next(last_rec, free);
846
	page_header_set_ptr(page, PAGE_FREE, rec);
847
848
	page_header_set_field(page, PAGE_GARBAGE, size
849
			      + page_header_get_field(page, PAGE_GARBAGE));
850
851
	page_header_set_field(page, PAGE_N_RECS,
852
			      (ulint)(page_get_n_recs(page) - n_recs));
853
}
854
855
/*****************************************************************
856
Deletes records from page, up to the given record, NOT including
857
that record. Infimum and supremum records are not deleted. */
858
859
void
860
page_delete_rec_list_start(
861
/*=======================*/
862
	page_t*		page,	/* in: index page */
863
	rec_t*		rec,	/* in: record on page */
864
	dict_index_t*	index,	/* in: record descriptor */
865
	mtr_t*		mtr)	/* in: mtr */
866
{
867
	page_cur_t	cur1;
868
	ulint		log_mode;
869
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
870
	ulint*		offsets		= offsets_;
871
	mem_heap_t*	heap		= NULL;
872
	byte		type;
873
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
874
875
	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
876
877
	if (page_is_comp(page)) {
878
		type = MLOG_COMP_LIST_START_DELETE;
879
	} else {
880
		type = MLOG_LIST_START_DELETE;
881
	}
882
883
	page_delete_rec_list_write_log(rec, index, type, mtr);
884
885
	page_cur_set_before_first(page, &cur1);
886
887
	if (rec == page_cur_get_rec(&cur1)) {
888
889
		return;
890
	}
891
892
	page_cur_move_to_next(&cur1);
893
894
	/* Individual deletes are not logged */
895
896
	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
897
898
	while (page_cur_get_rec(&cur1) != rec) {
899
		offsets = rec_get_offsets(page_cur_get_rec(&cur1), index,
900
					  offsets, ULINT_UNDEFINED, &heap);
901
		page_cur_delete_rec(&cur1, index, offsets, mtr);
902
	}
903
904
	if (UNIV_LIKELY_NULL(heap)) {
905
		mem_heap_free(heap);
906
	}
907
908
	/* Restore log mode */
909
910
	mtr_set_log_mode(mtr, log_mode);
911
}
912
913
/*****************************************************************
914
Moves record list end to another page. Moved records include
915
split_rec. */
916
917
void
918
page_move_rec_list_end(
919
/*===================*/
920
	page_t*		new_page,	/* in: index page where to move */
921
	page_t*		page,		/* in: index page */
922
	rec_t*		split_rec,	/* in: first record to move */
923
	dict_index_t*	index,		/* in: record descriptor */
924
	mtr_t*		mtr)		/* in: mtr */
925
{
926
	ulint	old_data_size;
927
	ulint	new_data_size;
928
	ulint	old_n_recs;
929
	ulint	new_n_recs;
930
931
	old_data_size = page_get_data_size(new_page);
932
	old_n_recs = page_get_n_recs(new_page);
933
934
	page_copy_rec_list_end(new_page, page, split_rec, index, mtr);
935
936
	new_data_size = page_get_data_size(new_page);
937
	new_n_recs = page_get_n_recs(new_page);
938
939
	ut_ad(new_data_size >= old_data_size);
940
941
	page_delete_rec_list_end(page, split_rec, index,
942
				 new_n_recs - old_n_recs,
943
				 new_data_size - old_data_size, mtr);
944
}
945
946
/*****************************************************************
947
Moves record list start to another page. Moved records do not include
948
split_rec. */
949
950
void
951
page_move_rec_list_start(
952
/*=====================*/
953
	page_t*		new_page,	/* in: index page where to move */
954
	page_t*		page,		/* in: index page */
955
	rec_t*		split_rec,	/* in: first record not to move */
956
	dict_index_t*	index,		/* in: record descriptor */
957
	mtr_t*		mtr)		/* in: mtr */
958
{
959
	page_copy_rec_list_start(new_page, page, split_rec, index, mtr);
960
961
	page_delete_rec_list_start(page, split_rec, index, mtr);
962
}
963
964
/***************************************************************************
965
This is a low-level operation which is used in a database index creation
966
to update the page number of a created B-tree to a data dictionary record. */
967
968
void
969
page_rec_write_index_page_no(
970
/*=========================*/
971
	rec_t*	rec,	/* in: record to update */
972
	ulint	i,	/* in: index of the field to update */
973
	ulint	page_no,/* in: value to write */
974
	mtr_t*	mtr)	/* in: mtr */
975
{
976
	byte*	data;
977
	ulint	len;
978
979
	data = rec_get_nth_field_old(rec, i, &len);
980
981
	ut_ad(len == 4);
982
983
	mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr);
984
}
985
986
/******************************************************************
987
Used to delete n slots from the directory. This function updates
988
also n_owned fields in the records, so that the first slot after
989
the deleted ones inherits the records of the deleted slots. */
990
UNIV_INLINE
991
void
992
page_dir_delete_slots(
993
/*==================*/
994
	page_t*	page,	/* in: the index page */
995
	ulint	start,	/* in: first slot to be deleted */
996
	ulint	n)	/* in: number of slots to delete (currently
997
			only n == 1 allowed) */
998
{
999
	page_dir_slot_t*	slot;
1000
	ulint			i;
1001
	ulint			sum_owned = 0;
1002
	ulint			n_slots;
1003
	rec_t*			rec;
1004
1005
	ut_ad(n == 1);
1006
	ut_ad(start > 0);
1007
	ut_ad(start + n < page_dir_get_n_slots(page));
1008
1009
	n_slots = page_dir_get_n_slots(page);
1010
1011
	/* 1. Reset the n_owned fields of the slots to be
1012
	deleted */
1013
	for (i = start; i < start + n; i++) {
1014
		slot = page_dir_get_nth_slot(page, i);
1015
		sum_owned += page_dir_slot_get_n_owned(slot);
1016
		page_dir_slot_set_n_owned(slot, 0);
1017
	}
1018
1019
	/* 2. Update the n_owned value of the first non-deleted slot */
1020
1021
	slot = page_dir_get_nth_slot(page, start + n);
1022
	page_dir_slot_set_n_owned(slot,
1023
				  sum_owned + page_dir_slot_get_n_owned(slot));
1024
1025
	/* 3. Destroy start and other slots by copying slots */
1026
	for (i = start + n; i < n_slots; i++) {
1027
		slot = page_dir_get_nth_slot(page, i);
1028
		rec = page_dir_slot_get_rec(slot);
1029
1030
		slot = page_dir_get_nth_slot(page, i - n);
1031
		page_dir_slot_set_rec(slot, rec);
1032
	}
1033
1034
	/* 4. Update the page header */
1035
	page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots - n);
1036
}
1037
1038
/******************************************************************
1039
Used to add n slots to the directory. Does not set the record pointers
1040
in the added slots or update n_owned values: this is the responsibility
1041
of the caller. */
1042
UNIV_INLINE
1043
void
1044
page_dir_add_slots(
1045
/*===============*/
1046
	page_t*	page,	/* in: the index page */
1047
	ulint	start,	/* in: the slot above which the new slots are added */
1048
	ulint	n)	/* in: number of slots to add (currently only n == 1
1049
			allowed) */
1050
{
1051
	page_dir_slot_t*	slot;
1052
	ulint			n_slots;
1053
	ulint			i;
1054
	rec_t*			rec;
1055
1056
	ut_ad(n == 1);
1057
1058
	n_slots = page_dir_get_n_slots(page);
1059
1060
	ut_ad(start < n_slots - 1);
1061
1062
	/* Update the page header */
1063
	page_dir_set_n_slots(page, n_slots + n);
1064
1065
	/* Move slots up */
1066
1067
	for (i = n_slots - 1; i > start; i--) {
1068
1069
		slot = page_dir_get_nth_slot(page, i);
1070
		rec = page_dir_slot_get_rec(slot);
1071
1072
		slot = page_dir_get_nth_slot(page, i + n);
1073
		page_dir_slot_set_rec(slot, rec);
1074
	}
1075
}
1076
1077
/********************************************************************
1078
Splits a directory slot which owns too many records. */
1079
1080
void
1081
page_dir_split_slot(
1082
/*================*/
1083
	page_t*	page,		/* in: the index page in question */
1084
	ulint	slot_no)	/* in: the directory slot */
1085
{
1086
	rec_t*			rec;
1087
	page_dir_slot_t*	new_slot;
1088
	page_dir_slot_t*	prev_slot;
1089
	page_dir_slot_t*	slot;
1090
	ulint			i;
1091
	ulint			n_owned;
1092
1093
	ut_ad(page);
1094
	ut_ad(slot_no > 0);
1095
1096
	slot = page_dir_get_nth_slot(page, slot_no);
1097
1098
	n_owned = page_dir_slot_get_n_owned(slot);
1099
	ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1);
1100
1101
	/* 1. We loop to find a record approximately in the middle of the
1102
	records owned by the slot. */
1103
1104
	prev_slot = page_dir_get_nth_slot(page, slot_no - 1);
1105
	rec = page_dir_slot_get_rec(prev_slot);
1106
1107
	for (i = 0; i < n_owned / 2; i++) {
1108
		rec = page_rec_get_next(rec);
1109
	}
1110
1111
	ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED);
1112
1113
	/* 2. We add one directory slot immediately below the slot to be
1114
	split. */
1115
1116
	page_dir_add_slots(page, slot_no - 1, 1);
1117
1118
	/* The added slot is now number slot_no, and the old slot is
1119
	now number slot_no + 1 */
1120
1121
	new_slot = page_dir_get_nth_slot(page, slot_no);
1122
	slot = page_dir_get_nth_slot(page, slot_no + 1);
1123
1124
	/* 3. We store the appropriate values to the new slot. */
1125
1126
	page_dir_slot_set_rec(new_slot, rec);
1127
	page_dir_slot_set_n_owned(new_slot, n_owned / 2);
1128
1129
	/* 4. Finally, we update the number of records field of the
1130
	original slot */
1131
1132
	page_dir_slot_set_n_owned(slot, n_owned - (n_owned / 2));
1133
}
1134
1135
/*****************************************************************
1136
Tries to balance the given directory slot with too few records with the upper
1137
neighbor, so that there are at least the minimum number of records owned by
1138
the slot; this may result in the merging of two slots. */
1139
1140
void
1141
page_dir_balance_slot(
1142
/*==================*/
1143
	page_t*	page,		/* in: index page */
1144
	ulint	slot_no)	/* in: the directory slot */
1145
{
1146
	page_dir_slot_t*	slot;
1147
	page_dir_slot_t*	up_slot;
1148
	ulint			n_owned;
1149
	ulint			up_n_owned;
1150
	rec_t*			old_rec;
1151
	rec_t*			new_rec;
1152
1153
	ut_ad(page);
1154
	ut_ad(slot_no > 0);
1155
1156
	slot = page_dir_get_nth_slot(page, slot_no);
1157
1158
	/* The last directory slot cannot be balanced with the upper
1159
	neighbor, as there is none. */
1160
1161
	if (slot_no == page_dir_get_n_slots(page) - 1) {
1162
1163
		return;
1164
	}
1165
1166
	up_slot = page_dir_get_nth_slot(page, slot_no + 1);
1167
1168
	n_owned = page_dir_slot_get_n_owned(slot);
1169
	up_n_owned = page_dir_slot_get_n_owned(up_slot);
1170
1171
	ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1);
1172
1173
	/* If the upper slot has the minimum value of n_owned, we will merge
1174
	the two slots, therefore we assert: */
1175
	ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED);
1176
1177
	if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) {
1178
1179
		/* In this case we can just transfer one record owned
1180
		by the upper slot to the property of the lower slot */
1181
		old_rec = page_dir_slot_get_rec(slot);
1182
		new_rec = page_rec_get_next(old_rec);
1183
1184
		rec_set_n_owned(old_rec, page_is_comp(page), 0);
1185
		rec_set_n_owned(new_rec, page_is_comp(page), n_owned + 1);
1186
1187
		page_dir_slot_set_rec(slot, new_rec);
1188
1189
		page_dir_slot_set_n_owned(up_slot, up_n_owned -1);
1190
	} else {
1191
		/* In this case we may merge the two slots */
1192
		page_dir_delete_slots(page, slot_no, 1);
1193
	}
1194
}
1195
1196
/****************************************************************
1197
Returns the middle record of the record list. If there are an even number
1198
of records in the list, returns the first record of the upper half-list. */
1199
1200
rec_t*
1201
page_get_middle_rec(
1202
/*================*/
1203
			/* out: middle record */
1204
	page_t*	page)	/* in: page */
1205
{
1206
	page_dir_slot_t*	slot;
1207
	ulint			middle;
1208
	ulint			i;
1209
	ulint			n_owned;
1210
	ulint			count;
1211
	rec_t*			rec;
1212
1213
	/* This many records we must leave behind */
1214
	middle = (page_get_n_recs(page) + 2) / 2;
1215
1216
	count = 0;
1217
1218
	for (i = 0;; i++) {
1219
1220
		slot = page_dir_get_nth_slot(page, i);
1221
		n_owned = page_dir_slot_get_n_owned(slot);
1222
1223
		if (count + n_owned > middle) {
1224
			break;
1225
		} else {
1226
			count += n_owned;
1227
		}
1228
	}
1229
1230
	ut_ad(i > 0);
1231
	slot = page_dir_get_nth_slot(page, i - 1);
1232
	rec = page_dir_slot_get_rec(slot);
1233
	rec = page_rec_get_next(rec);
1234
1235
	/* There are now count records behind rec */
1236
1237
	for (i = 0; i < middle - count; i++) {
1238
		rec = page_rec_get_next(rec);
1239
	}
1240
1241
	return(rec);
1242
}
1243
1244
/*******************************************************************
1245
Returns the number of records before the given record in chain.
1246
The number includes infimum and supremum records. */
1247
1248
ulint
1249
page_rec_get_n_recs_before(
1250
/*=======================*/
1251
			/* out: number of records */
1252
	rec_t*	rec)	/* in: the physical record */
1253
{
1254
	page_dir_slot_t*	slot;
1255
	rec_t*			slot_rec;
1256
	page_t*			page;
1257
	ulint			i;
1258
	ulint			comp;
1259
	lint			n	= 0;
1260
1261
	ut_ad(page_rec_check(rec));
1262
1263
	page = buf_frame_align(rec);
1264
	comp = page_is_comp(page);
1265
1266
	while (rec_get_n_owned(rec, comp) == 0) {
1267
1268
		rec = page_rec_get_next(rec);
1269
		n--;
1270
	}
1271
1272
	for (i = 0; ; i++) {
1273
		slot = page_dir_get_nth_slot(page, i);
1274
		slot_rec = page_dir_slot_get_rec(slot);
1275
1276
		n += rec_get_n_owned(slot_rec, comp);
1277
1278
		if (rec == slot_rec) {
1279
1280
			break;
1281
		}
1282
	}
1283
1284
	n--;
1285
1286
	ut_ad(n >= 0);
1287
1288
	return((ulint) n);
1289
}
1290
1291
/****************************************************************
1292
Prints record contents including the data relevant only in
1293
the index page context. */
1294
1295
void
1296
page_rec_print(
1297
/*===========*/
1298
	rec_t*		rec,	/* in: physical record */
1299
	const ulint*	offsets)/* in: record descriptor */
1300
{
1301
	ulint	comp	= page_is_comp(buf_frame_align(rec));
1302
1303
	ut_a(!comp == !rec_offs_comp(offsets));
1304
	rec_print_new(stderr, rec, offsets);
1305
	fprintf(stderr,
1306
		"            n_owned: %lu; heap_no: %lu; next rec: %lu\n",
1307
		(ulong) rec_get_n_owned(rec, comp),
1308
		(ulong) rec_get_heap_no(rec, comp),
1309
		(ulong) rec_get_next_offs(rec, comp));
1310
1311
	page_rec_check(rec);
1312
	rec_validate(rec, offsets);
1313
}
1314
1315
/*******************************************************************
1316
This is used to print the contents of the directory for
1317
debugging purposes. */
1318
1319
void
1320
page_dir_print(
1321
/*===========*/
1322
	page_t*	page,	/* in: index page */
1323
	ulint	pr_n)	/* in: print n first and n last entries */
1324
{
1325
	ulint			n;
1326
	ulint			i;
1327
	page_dir_slot_t*	slot;
1328
1329
	n = page_dir_get_n_slots(page);
1330
1331
	fprintf(stderr, "--------------------------------\n"
1332
		"PAGE DIRECTORY\n"
1333
		"Page address %p\n"
1334
		"Directory stack top at offs: %lu; number of slots: %lu\n",
1335
		page, (ulong)(page_dir_get_nth_slot(page, n - 1) - page),
1336
		(ulong) n);
1337
	for (i = 0; i < n; i++) {
1338
		slot = page_dir_get_nth_slot(page, i);
1339
		if ((i == pr_n) && (i < n - pr_n)) {
1340
			fputs("    ...   \n", stderr);
1341
		}
1342
		if ((i < pr_n) || (i >= n - pr_n)) {
1343
			fprintf(stderr,
1344
				"Contents of slot: %lu: n_owned: %lu,"
1345
				" rec offs: %lu\n",
1346
				(ulong) i,
1347
				(ulong) page_dir_slot_get_n_owned(slot),
1348
				(ulong)(page_dir_slot_get_rec(slot) - page));
1349
		}
1350
	}
1351
	fprintf(stderr, "Total of %lu records\n"
1352
		"--------------------------------\n",
1353
		(ulong) (2 + page_get_n_recs(page)));
1354
}
1355
1356
/*******************************************************************
1357
This is used to print the contents of the page record list for
1358
debugging purposes. */
1359
1360
void
1361
page_print_list(
1362
/*============*/
1363
	page_t*		page,	/* in: index page */
1364
	dict_index_t*	index,	/* in: dictionary index of the page */
1365
	ulint		pr_n)	/* in: print n first and n last entries */
1366
{
1367
	page_cur_t	cur;
1368
	ulint		count;
1369
	ulint		n_recs;
1370
	mem_heap_t*	heap		= NULL;
1371
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
1372
	ulint*		offsets		= offsets_;
1373
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1374
1375
	ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
1376
1377
	fprintf(stderr,
1378
		"--------------------------------\n"
1379
		"PAGE RECORD LIST\n"
1380
		"Page address %p\n", page);
1381
1382
	n_recs = page_get_n_recs(page);
1383
1384
	page_cur_set_before_first(page, &cur);
1385
	count = 0;
1386
	for (;;) {
1387
		offsets = rec_get_offsets(cur.rec, index, offsets,
1388
					  ULINT_UNDEFINED, &heap);
1389
		page_rec_print(cur.rec, offsets);
1390
1391
		if (count == pr_n) {
1392
			break;
1393
		}
1394
		if (page_cur_is_after_last(&cur)) {
1395
			break;
1396
		}
1397
		page_cur_move_to_next(&cur);
1398
		count++;
1399
	}
1400
1401
	if (n_recs > 2 * pr_n) {
1402
		fputs(" ... \n", stderr);
1403
	}
1404
1405
	while (!page_cur_is_after_last(&cur)) {
1406
		page_cur_move_to_next(&cur);
1407
1408
		if (count + pr_n >= n_recs) {
1409
			offsets = rec_get_offsets(cur.rec, index, offsets,
1410
						  ULINT_UNDEFINED, &heap);
1411
			page_rec_print(cur.rec, offsets);
1412
		}
1413
		count++;
1414
	}
1415
1416
	fprintf(stderr,
1417
		"Total of %lu records \n"
1418
		"--------------------------------\n",
1419
		(ulong) (count + 1));
1420
1421
	if (UNIV_LIKELY_NULL(heap)) {
1422
		mem_heap_free(heap);
1423
	}
1424
}
1425
1426
/*******************************************************************
1427
Prints the info in a page header. */
1428
1429
void
1430
page_header_print(
1431
/*==============*/
1432
	page_t*	page)
1433
{
1434
	fprintf(stderr,
1435
		"--------------------------------\n"
1436
		"PAGE HEADER INFO\n"
1437
		"Page address %p, n records %lu (%s)\n"
1438
		"n dir slots %lu, heap top %lu\n"
1439
		"Page n heap %lu, free %lu, garbage %lu\n"
1440
		"Page last insert %lu, direction %lu, n direction %lu\n",
1441
		page, (ulong) page_header_get_field(page, PAGE_N_RECS),
1442
		page_is_comp(page) ? "compact format" : "original format",
1443
		(ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS),
1444
		(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
1445
		(ulong) page_dir_get_n_heap(page),
1446
		(ulong) page_header_get_field(page, PAGE_FREE),
1447
		(ulong) page_header_get_field(page, PAGE_GARBAGE),
1448
		(ulong) page_header_get_field(page, PAGE_LAST_INSERT),
1449
		(ulong) page_header_get_field(page, PAGE_DIRECTION),
1450
		(ulong) page_header_get_field(page, PAGE_N_DIRECTION));
1451
}
1452
1453
/*******************************************************************
1454
This is used to print the contents of the page for
1455
debugging purposes. */
1456
1457
void
1458
page_print(
1459
/*=======*/
1460
	page_t*		page,	/* in: index page */
1461
	dict_index_t*	index,	/* in: dictionary index of the page */
1462
	ulint		dn,	/* in: print dn first and last entries
1463
				in directory */
1464
	ulint		rn)	/* in: print rn first and last records
1465
				in directory */
1466
{
1467
	page_header_print(page);
1468
	page_dir_print(page, dn);
1469
	page_print_list(page, index, rn);
1470
}
1471
1472
/*******************************************************************
1473
The following is used to validate a record on a page. This function
1474
differs from rec_validate as it can also check the n_owned field and
1475
the heap_no field. */
1476
1477
ibool
1478
page_rec_validate(
1479
/*==============*/
1480
				/* out: TRUE if ok */
1481
	rec_t*		rec,	/* in: physical record */
1482
	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
1483
{
1484
	ulint	n_owned;
1485
	ulint	heap_no;
1486
	page_t*	page;
1487
	ulint	comp;
1488
1489
	page = buf_frame_align(rec);
1490
	comp = page_is_comp(page);
1491
	ut_a(!comp == !rec_offs_comp(offsets));
1492
1493
	page_rec_check(rec);
1494
	rec_validate(rec, offsets);
1495
1496
	n_owned = rec_get_n_owned(rec, comp);
1497
	heap_no = rec_get_heap_no(rec, comp);
1498
1499
	if (!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
1500
		fprintf(stderr,
1501
			"InnoDB: Dir slot of rec %lu, n owned too big %lu\n",
1502
			(ulong)(rec - page), (ulong) n_owned);
1503
		return(FALSE);
1504
	}
1505
1506
	if (!(heap_no < page_dir_get_n_heap(page))) {
1507
		fprintf(stderr,
1508
			"InnoDB: Heap no of rec %lu too big %lu %lu\n",
1509
			(ulong)(rec - page), (ulong) heap_no,
1510
			(ulong) page_dir_get_n_heap(page));
1511
		return(FALSE);
1512
	}
1513
1514
	return(TRUE);
1515
}
1516
1517
/*******************************************************************
1518
Checks that the first directory slot points to the infimum record and
1519
the last to the supremum. This function is intended to track if the
1520
bug fixed in 4.0.14 has caused corruption to users' databases. */
1521
1522
void
1523
page_check_dir(
1524
/*===========*/
1525
	page_t*	page)	/* in: index page */
1526
{
1527
	ulint	n_slots;
1528
1529
	n_slots = page_dir_get_n_slots(page);
1530
1531
	if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, 0))
1532
	    != page_get_infimum_rec(page)) {
1533
1534
		fprintf(stderr,
1535
			"InnoDB: Page directory corruption:"
1536
			" infimum not pointed to\n");
1537
		buf_page_print(page);
1538
	}
1539
1540
	if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, n_slots - 1))
1541
	    != page_get_supremum_rec(page)) {
1542
1543
		fprintf(stderr,
1544
			"InnoDB: Page directory corruption:"
1545
			" supremum not pointed to\n");
1546
		buf_page_print(page);
1547
	}
1548
}
1549
1550
/*******************************************************************
1551
This function checks the consistency of an index page when we do not
1552
know the index. This is also resilient so that this should never crash
1553
even if the page is total garbage. */
1554
1555
ibool
1556
page_simple_validate(
1557
/*=================*/
1558
			/* out: TRUE if ok */
1559
	page_t*	page)	/* in: index page */
1560
{
1561
	page_cur_t	cur;
1562
	page_dir_slot_t* slot;
1563
	ulint		slot_no;
1564
	ulint		n_slots;
1565
	rec_t*		rec;
1566
	byte*		rec_heap_top;
1567
	ulint		count;
1568
	ulint		own_count;
1569
	ibool		ret	= FALSE;
1570
	ulint		comp	= page_is_comp(page);
1571
1572
	/* Check first that the record heap and the directory do not
1573
	overlap. */
1574
1575
	n_slots = page_dir_get_n_slots(page);
1576
1577
	if (n_slots > UNIV_PAGE_SIZE / 4) {
1578
		fprintf(stderr,
1579
			"InnoDB: Nonsensical number %lu of page dir slots\n",
1580
			(ulong) n_slots);
1581
1582
		goto func_exit;
1583
	}
1584
1585
	rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
1586
1587
	if (rec_heap_top > page_dir_get_nth_slot(page, n_slots - 1)) {
1588
1589
		fprintf(stderr,
1590
			"InnoDB: Record heap and dir overlap on a page,"
1591
			" heap top %lu, dir %lu\n",
1592
			(ulong)
1593
			(page_header_get_ptr(page, PAGE_HEAP_TOP) - page),
1594
			(ulong)
1595
			(page_dir_get_nth_slot(page, n_slots - 1) - page));
1596
1597
		goto func_exit;
1598
	}
1599
1600
	/* Validate the record list in a loop checking also that it is
1601
	consistent with the page record directory. */
1602
1603
	count = 0;
1604
	own_count = 1;
1605
	slot_no = 0;
1606
	slot = page_dir_get_nth_slot(page, slot_no);
1607
1608
	page_cur_set_before_first(page, &cur);
1609
1610
	for (;;) {
1611
		rec = (&cur)->rec;
1612
1613
		if (rec > rec_heap_top) {
1614
			fprintf(stderr,
1615
				"InnoDB: Record %lu is above"
1616
				" rec heap top %lu\n",
1617
				(ulong)(rec - page),
1618
				(ulong)(rec_heap_top - page));
1619
1620
			goto func_exit;
1621
		}
1622
1623
		if (rec_get_n_owned(rec, comp) != 0) {
1624
			/* This is a record pointed to by a dir slot */
1625
			if (rec_get_n_owned(rec, comp) != own_count) {
1626
1627
				fprintf(stderr,
1628
					"InnoDB: Wrong owned count %lu, %lu,"
1629
					" rec %lu\n",
1630
					(ulong) rec_get_n_owned(rec, comp),
1631
					(ulong) own_count,
1632
					(ulong)(rec - page));
1633
1634
				goto func_exit;
1635
			}
1636
1637
			if (page_dir_slot_get_rec(slot) != rec) {
1638
				fprintf(stderr,
1639
					"InnoDB: Dir slot does not point"
1640
					" to right rec %lu\n",
1641
					(ulong)(rec - page));
1642
1643
				goto func_exit;
1644
			}
1645
1646
			own_count = 0;
1647
1648
			if (!page_cur_is_after_last(&cur)) {
1649
				slot_no++;
1650
				slot = page_dir_get_nth_slot(page, slot_no);
1651
			}
1652
		}
1653
1654
		if (page_cur_is_after_last(&cur)) {
1655
1656
			break;
1657
		}
1658
1659
		if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA
1660
		    || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) {
1661
			fprintf(stderr,
1662
				"InnoDB: Next record offset"
1663
				" nonsensical %lu for rec %lu\n",
1664
				(ulong) rec_get_next_offs(rec, comp),
1665
				(ulong)(rec - page));
1666
1667
			goto func_exit;
1668
		}
1669
1670
		count++;
1671
1672
		if (count > UNIV_PAGE_SIZE) {
1673
			fprintf(stderr,
1674
				"InnoDB: Page record list appears"
1675
				" to be circular %lu\n",
1676
				(ulong) count);
1677
			goto func_exit;
1678
		}
1679
1680
		page_cur_move_to_next(&cur);
1681
		own_count++;
1682
	}
1683
1684
	if (rec_get_n_owned(rec, comp) == 0) {
1685
		fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n");
1686
1687
		goto func_exit;
1688
	}
1689
1690
	if (slot_no != n_slots - 1) {
1691
		fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
1692
			(ulong) slot_no, (ulong) (n_slots - 1));
1693
		goto func_exit;
1694
	}
1695
1696
	if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) {
1697
		fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
1698
			(ulong) page_header_get_field(page, PAGE_N_RECS) + 2,
1699
			(ulong) (count + 1));
1700
1701
		goto func_exit;
1702
	}
1703
1704
	/* Check then the free list */
1705
	rec = page_header_get_ptr(page, PAGE_FREE);
1706
1707
	while (rec != NULL) {
1708
		if (rec < page + FIL_PAGE_DATA
1709
		    || rec >= page + UNIV_PAGE_SIZE) {
1710
			fprintf(stderr,
1711
				"InnoDB: Free list record has"
1712
				" a nonsensical offset %lu\n",
1713
				(ulong) (rec - page));
1714
1715
			goto func_exit;
1716
		}
1717
1718
		if (rec > rec_heap_top) {
1719
			fprintf(stderr,
1720
				"InnoDB: Free list record %lu"
1721
				" is above rec heap top %lu\n",
1722
				(ulong) (rec - page),
1723
				(ulong) (rec_heap_top - page));
1724
1725
			goto func_exit;
1726
		}
1727
1728
		count++;
1729
1730
		if (count > UNIV_PAGE_SIZE) {
1731
			fprintf(stderr,
1732
				"InnoDB: Page free list appears"
1733
				" to be circular %lu\n",
1734
				(ulong) count);
1735
			goto func_exit;
1736
		}
1737
1738
		rec = page_rec_get_next(rec);
1739
	}
1740
1741
	if (page_dir_get_n_heap(page) != count + 1) {
1742
1743
		fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
1744
			(ulong) page_dir_get_n_heap(page),
1745
			(ulong) (count + 1));
1746
1747
		goto func_exit;
1748
	}
1749
1750
	ret = TRUE;
1751
1752
func_exit:
1753
	return(ret);
1754
}
1755
1756
/*******************************************************************
1757
This function checks the consistency of an index page. */
1758
1759
ibool
1760
page_validate(
1761
/*==========*/
1762
				/* out: TRUE if ok */
1763
	page_t*		page,	/* in: index page */
1764
	dict_index_t*	index)	/* in: data dictionary index containing
1765
				the page record type definition */
1766
{
1767
	page_dir_slot_t* slot;
1768
	mem_heap_t*	heap;
1769
	page_cur_t	cur;
1770
	byte*		buf;
1771
	ulint		count;
1772
	ulint		own_count;
1773
	ulint		slot_no;
1774
	ulint		data_size;
1775
	rec_t*		rec;
1776
	rec_t*		old_rec		= NULL;
1777
	ulint		offs;
1778
	ulint		n_slots;
1779
	ibool		ret		= FALSE;
1780
	ulint		i;
1781
	ulint		comp		= page_is_comp(page);
1782
	ulint*		offsets		= NULL;
1783
	ulint*		old_offsets	= NULL;
1784
1785
	if ((ibool)!!comp != dict_table_is_comp(index->table)) {
1786
		fputs("InnoDB: 'compact format' flag mismatch\n", stderr);
1787
		goto func_exit2;
1788
	}
1789
	if (!page_simple_validate(page)) {
1790
		goto func_exit2;
1791
	}
1792
1793
	heap = mem_heap_create(UNIV_PAGE_SIZE + 200);
1794
1795
	/* The following buffer is used to check that the
1796
	records in the page record heap do not overlap */
1797
1798
	buf = mem_heap_alloc(heap, UNIV_PAGE_SIZE);
1799
	memset(buf, 0, UNIV_PAGE_SIZE);
1800
1801
	/* Check first that the record heap and the directory do not
1802
	overlap. */
1803
1804
	n_slots = page_dir_get_n_slots(page);
1805
1806
	if (!(page_header_get_ptr(page, PAGE_HEAP_TOP)
1807
	      <= page_dir_get_nth_slot(page, n_slots - 1))) {
1808
1809
		fputs("InnoDB: Record heap and dir overlap on a page ",
1810
		      stderr);
1811
		dict_index_name_print(stderr, NULL, index);
1812
		fprintf(stderr, ", %p, %p\n",
1813
			page_header_get_ptr(page, PAGE_HEAP_TOP),
1814
			page_dir_get_nth_slot(page, n_slots - 1));
1815
1816
		goto func_exit;
1817
	}
1818
1819
	/* Validate the record list in a loop checking also that
1820
	it is consistent with the directory. */
1821
	count = 0;
1822
	data_size = 0;
1823
	own_count = 1;
1824
	slot_no = 0;
1825
	slot = page_dir_get_nth_slot(page, slot_no);
1826
1827
	page_cur_set_before_first(page, &cur);
1828
1829
	for (;;) {
1830
		rec = cur.rec;
1831
		offsets = rec_get_offsets(rec, index, offsets,
1832
					  ULINT_UNDEFINED, &heap);
1833
1834
		if (comp && page_rec_is_user_rec(rec)
1835
		    && rec_get_node_ptr_flag(rec)
1836
		    != (ibool)
1837
		    (btr_page_get_level_low(page) != 0)) {
1838
			fputs("InnoDB: node_ptr flag mismatch\n", stderr);
1839
			goto func_exit;
1840
		}
1841
1842
		if (!page_rec_validate(rec, offsets)) {
1843
			goto func_exit;
1844
		}
1845
1846
		/* Check that the records are in the ascending order */
1847
		if ((count >= 2) && (!page_cur_is_after_last(&cur))) {
1848
			if (!(1 == cmp_rec_rec(rec, old_rec,
1849
					       offsets, old_offsets, index))) {
1850
				fprintf(stderr,
1851
					"InnoDB: Records in wrong order"
1852
					" on page %lu ",
1853
					(ulong) buf_frame_get_page_no(page));
1854
				dict_index_name_print(stderr, NULL, index);
1855
				fputs("\nInnoDB: previous record ", stderr);
1856
				rec_print_new(stderr, old_rec, old_offsets);
1857
				fputs("\nInnoDB: record ", stderr);
1858
				rec_print_new(stderr, rec, offsets);
1859
				putc('\n', stderr);
1860
1861
				goto func_exit;
1862
			}
1863
		}
1864
1865
		if (page_rec_is_user_rec(rec)) {
1866
1867
			data_size += rec_offs_size(offsets);
1868
		}
1869
1870
		offs = rec_get_start(rec, offsets) - page;
1871
1872
		for (i = 0; i < rec_offs_size(offsets); i++) {
1873
			if (!buf[offs + i] == 0) {
1874
				/* No other record may overlap this */
1875
1876
				fputs("InnoDB: Record overlaps another\n",
1877
				      stderr);
1878
				goto func_exit;
1879
			}
1880
1881
			buf[offs + i] = 1;
1882
		}
1883
1884
		if (rec_get_n_owned(rec, comp) != 0) {
1885
			/* This is a record pointed to by a dir slot */
1886
			if (rec_get_n_owned(rec, comp) != own_count) {
1887
				fprintf(stderr,
1888
					"InnoDB: Wrong owned count %lu, %lu\n",
1889
					(ulong) rec_get_n_owned(rec, comp),
1890
					(ulong) own_count);
1891
				goto func_exit;
1892
			}
1893
1894
			if (page_dir_slot_get_rec(slot) != rec) {
1895
				fputs("InnoDB: Dir slot does not"
1896
				      " point to right rec\n",
1897
				      stderr);
1898
				goto func_exit;
1899
			}
1900
1901
			page_dir_slot_check(slot);
1902
1903
			own_count = 0;
1904
			if (!page_cur_is_after_last(&cur)) {
1905
				slot_no++;
1906
				slot = page_dir_get_nth_slot(page, slot_no);
1907
			}
1908
		}
1909
1910
		if (page_cur_is_after_last(&cur)) {
1911
			break;
1912
		}
1913
1914
		if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA
1915
		    || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) {
1916
			fprintf(stderr,
1917
				"InnoDB: Next record offset wrong %lu\n",
1918
				(ulong) rec_get_next_offs(rec, comp));
1919
			goto func_exit;
1920
		}
1921
1922
		count++;
1923
		page_cur_move_to_next(&cur);
1924
		own_count++;
1925
		old_rec = rec;
1926
		/* set old_offsets to offsets; recycle offsets */
1927
		{
1928
			ulint* offs = old_offsets;
1929
			old_offsets = offsets;
1930
			offsets = offs;
1931
		}
1932
	}
1933
1934
	if (rec_get_n_owned(rec, comp) == 0) {
1935
		fputs("InnoDB: n owned is zero\n", stderr);
1936
		goto func_exit;
1937
	}
1938
1939
	if (slot_no != n_slots - 1) {
1940
		fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n",
1941
			(ulong) slot_no, (ulong) (n_slots - 1));
1942
		goto func_exit;
1943
	}
1944
1945
	if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) {
1946
		fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
1947
			(ulong) page_header_get_field(page, PAGE_N_RECS) + 2,
1948
			(ulong) (count + 1));
1949
		goto func_exit;
1950
	}
1951
1952
	if (data_size != page_get_data_size(page)) {
1953
		fprintf(stderr,
1954
			"InnoDB: Summed data size %lu, returned by func %lu\n",
1955
			(ulong) data_size, (ulong) page_get_data_size(page));
1956
		goto func_exit;
1957
	}
1958
1959
	/* Check then the free list */
1960
	rec = page_header_get_ptr(page, PAGE_FREE);
1961
1962
	while (rec != NULL) {
1963
		offsets = rec_get_offsets(rec, index, offsets,
1964
					  ULINT_UNDEFINED, &heap);
1965
		if (!page_rec_validate(rec, offsets)) {
1966
1967
			goto func_exit;
1968
		}
1969
1970
		count++;
1971
		offs = rec_get_start(rec, offsets) - page;
1972
1973
		for (i = 0; i < rec_offs_size(offsets); i++) {
1974
1975
			if (buf[offs + i] != 0) {
1976
				fputs("InnoDB: Record overlaps another"
1977
				      " in free list\n", stderr);
1978
				goto func_exit;
1979
			}
1980
1981
			buf[offs + i] = 1;
1982
		}
1983
1984
		rec = page_rec_get_next(rec);
1985
	}
1986
1987
	if (page_dir_get_n_heap(page) != count + 1) {
1988
		fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n",
1989
			(ulong) page_dir_get_n_heap(page),
1990
			(ulong) count + 1);
1991
		goto func_exit;
1992
	}
1993
1994
	ret = TRUE;
1995
1996
func_exit:
1997
	mem_heap_free(heap);
1998
1999
	if (ret == FALSE) {
2000
func_exit2:
2001
		fprintf(stderr, "InnoDB: Apparent corruption in page %lu in ",
2002
			(ulong) buf_frame_get_page_no(page));
2003
		dict_index_name_print(stderr, NULL, index);
2004
		putc('\n', stderr);
2005
		buf_page_print(page);
2006
	}
2007
2008
	return(ret);
2009
}
2010
2011
/*******************************************************************
2012
Looks in the page record list for a record with the given heap number. */
2013
2014
rec_t*
2015
page_find_rec_with_heap_no(
2016
/*=======================*/
2017
			/* out: record, NULL if not found */
2018
	page_t*	page,	/* in: index page */
2019
	ulint	heap_no)/* in: heap number */
2020
{
2021
	page_cur_t	cur;
2022
2023
	page_cur_set_before_first(page, &cur);
2024
2025
	for (;;) {
2026
		if (rec_get_heap_no(cur.rec, page_is_comp(page)) == heap_no) {
2027
2028
			return(cur.rec);
2029
		}
2030
2031
		if (page_cur_is_after_last(&cur)) {
2032
2033
			return(NULL);
2034
		}
2035
2036
		page_cur_move_to_next(&cur);
2037
	}
2038
}