1
/******************************************************
4
(c) 1994-1996 Innobase Oy
6
Created 2/2/1994 Heikki Tuuri
7
*******************************************************/
10
#include "page0page.h"
12
#include "page0page.ic"
18
#include "lock0lock.h"
28
The index page consists of a page header which contains the page's
29
id and other information. On top of it are the the index records
30
in a heap linked into a one way linear list according to alphabetic order.
32
Just below page end is an array of pointers which we call page directory,
33
to about every sixth record in the list. The pointers are placed in
34
the directory in the alphabetical order of the records pointed to,
35
enabling us to make binary search using the array. Each slot n:o I
36
in the directory points to a record, where a 4-bit field contains a count
37
of those records which are in the linear list between pointer I and
38
the pointer I - 1 in the directory, including the record
39
pointed to by pointer I and not including the record pointed to by I - 1.
40
We say that the record pointed to by slot I, or that slot I, owns
41
these records. The count is always kept in the range 4 to 8, with
42
the exception that it is 1 for the first slot, and 1--8 for the second slot.
44
An essentially binary search can be performed in the list of index
45
records, like we could do if we had pointer to every record in the
46
page directory. The data structure is, however, more efficient when
47
we are doing inserts, because most inserts are just pushed on a heap.
48
Only every 8th insert requires block move in the directory pointer
49
table, which itself is quite small. A record is deleted from the page
50
by just taking it off the linear list and updating the number of owned
51
records-field of the record which owns it, and updating the page directory,
52
if necessary. A special case is the one when the record owns itself.
53
Because the overhead of inserts is so small, we may also increase the
54
page size from the projected default of 8 kB to 64 kB without too
55
much loss of efficiency in inserts. Bigger page becomes actual
56
when the disk transfer rate compared to seek and latency time rises.
57
On the present system, the page size is set so that the page transfer
58
time (3 ms) is 20 % of the disk random access time (15 ms).
60
When the page is split, merged, or becomes full but contains deleted
61
records, we have to reorganize the page.
63
Assuming a page size of 8 kB, a typical index page of a secondary
64
index contains 300 index entries, and the size of the page directory
65
is 50 x 4 bytes = 200 bytes. */
67
/*******************************************************************
68
Looks for the directory slot which owns the given record. */
71
page_dir_find_owner_slot(
72
/*=====================*/
73
/* out: the directory slot number */
74
const rec_t* rec) /* in: the physical record */
77
register uint16 rec_offs_bytes;
78
register const page_dir_slot_t* slot;
79
register const page_dir_slot_t* first_slot;
80
register const rec_t* r = rec;
82
ut_ad(page_rec_check(rec));
84
page = page_align(rec);
85
first_slot = page_dir_get_nth_slot(page, 0);
86
slot = page_dir_get_nth_slot(page, page_dir_get_n_slots(page) - 1);
88
if (page_is_comp(page)) {
89
while (rec_get_n_owned_new(r) == 0) {
90
r = rec_get_next_ptr_const(r, TRUE);
91
ut_ad(r >= page + PAGE_NEW_SUPREMUM);
92
ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
95
while (rec_get_n_owned_old(r) == 0) {
96
r = rec_get_next_ptr_const(r, FALSE);
97
ut_ad(r >= page + PAGE_OLD_SUPREMUM);
98
ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
102
rec_offs_bytes = mach_encode_2(r - page);
104
while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) {
106
if (UNIV_UNLIKELY(slot == first_slot)) {
108
"InnoDB: Probable data corruption on"
110
"InnoDB: Original record ",
111
(ulong) page_get_page_no(page));
113
if (page_is_comp(page)) {
114
fputs("(compact record)", stderr);
116
rec_print_old(stderr, rec);
120
"InnoDB: on that page.\n"
121
"InnoDB: Cannot find the dir slot for record ",
123
if (page_is_comp(page)) {
124
fputs("(compact record)", stderr);
126
rec_print_old(stderr, page
127
+ mach_decode_2(rec_offs_bytes));
130
"InnoDB: on that page!\n", stderr);
132
buf_page_print(page, 0);
137
slot += PAGE_DIR_SLOT_SIZE;
140
return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE);
143
/******************************************************************
144
Used to check the consistency of a directory slot. */
149
/* out: TRUE if succeed */
150
page_dir_slot_t* slot) /* in: slot */
158
page = page_align(slot);
160
n_slots = page_dir_get_n_slots(page);
162
ut_a(slot <= page_dir_get_nth_slot(page, 0));
163
ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1));
165
ut_a(page_rec_check(page_dir_slot_get_rec(slot)));
167
if (page_is_comp(page)) {
168
n_owned = rec_get_n_owned_new(page_dir_slot_get_rec(slot));
170
n_owned = rec_get_n_owned_old(page_dir_slot_get_rec(slot));
173
if (slot == page_dir_get_nth_slot(page, 0)) {
175
} else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) {
177
ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
179
ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED);
180
ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
186
/*****************************************************************
187
Sets the max trx id field value. */
192
buf_block_t* block, /* in/out: page */
193
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
194
dulint trx_id) /* in: transaction id */
196
const ibool is_hashed = block->is_hashed;
197
page_t* page = buf_block_get_frame(block);
200
rw_lock_x_lock(&btr_search_latch);
203
/* It is not necessary to write this change to the redo log, as
204
during a database recovery we assume that the max trx id of every
205
page is the maximum trx id assigned before the crash. */
207
mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id);
208
if (UNIV_LIKELY_NULL(page_zip)) {
209
page_zip_write_header(page_zip,
210
page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
215
rw_lock_x_unlock(&btr_search_latch);
219
/****************************************************************
220
Allocates a block of memory from the heap of an index page. */
225
/* out: pointer to start of allocated
226
buffer, or NULL if allocation fails */
227
page_t* page, /* in/out: index page */
228
page_zip_des_t* page_zip,/* in/out: compressed page with enough
229
space available for inserting the record,
231
ulint need, /* in: total number of bytes needed */
232
ulint* heap_no)/* out: this contains the heap number
233
of the allocated record
234
if allocation succeeds */
239
ut_ad(page && heap_no);
241
avl_space = page_get_max_insert_size(page, 1);
243
if (avl_space >= need) {
244
block = page_header_get_ptr(page, PAGE_HEAP_TOP);
246
page_header_set_ptr(page, page_zip, PAGE_HEAP_TOP,
248
*heap_no = page_dir_get_n_heap(page);
250
page_dir_set_n_heap(page, page_zip, 1 + *heap_no);
258
/**************************************************************
259
Writes a log record of page creation. */
262
page_create_write_log(
263
/*==================*/
264
buf_frame_t* frame, /* in: a buffer frame where the page is
266
mtr_t* mtr, /* in: mini-transaction handle */
267
ibool comp) /* in: TRUE=compact page format */
269
mlog_write_initial_log_record(frame, comp
270
? MLOG_COMP_PAGE_CREATE
271
: MLOG_PAGE_CREATE, mtr);
274
/***************************************************************
275
Parses a redo log record of creating a page. */
280
/* out: end of log record or NULL */
281
byte* ptr, /* in: buffer */
282
byte* end_ptr __attribute__((unused)), /* in: buffer end */
283
ulint comp, /* in: nonzero=compact page format */
284
buf_block_t* block, /* in: block or NULL */
285
mtr_t* mtr) /* in: mtr or NULL */
287
ut_ad(ptr && end_ptr);
289
/* The record is empty, except for the record initial part */
292
page_create(block, mtr, comp);
298
/**************************************************************
299
The index page creation function. */
304
/* out: pointer to the page */
305
buf_block_t* block, /* in: a buffer block where the
307
ulint comp) /* in: nonzero=compact page format */
309
page_dir_slot_t* slot;
321
#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA
322
# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA"
324
#if PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA
325
# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA"
328
/* The infimum and supremum records use a dummy index. */
329
if (UNIV_LIKELY(comp)) {
330
index = srv_sys->dummy_ind2;
332
index = srv_sys->dummy_ind1;
335
/* 1. INCREMENT MODIFY CLOCK */
336
buf_block_modify_clock_inc(block);
338
page = buf_block_get_frame(block);
340
fil_page_set_type(page, FIL_PAGE_INDEX);
342
heap = mem_heap_create(200);
344
/* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */
346
/* Create first a data tuple for infimum record */
347
tuple = dtuple_create(heap, 1);
348
dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM);
349
field = dtuple_get_nth_field(tuple, 0);
351
dfield_set_data(field, "infimum", 8);
352
dtype_set(dfield_get_type(field),
353
DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8);
354
/* Set the corresponding physical record to its place in the page
357
heap_top = page + PAGE_DATA;
359
infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
361
if (UNIV_LIKELY(comp)) {
362
ut_a(infimum_rec == page + PAGE_NEW_INFIMUM);
364
rec_set_n_owned_new(infimum_rec, NULL, 1);
365
rec_set_heap_no_new(infimum_rec, 0);
367
ut_a(infimum_rec == page + PAGE_OLD_INFIMUM);
369
rec_set_n_owned_old(infimum_rec, 1);
370
rec_set_heap_no_old(infimum_rec, 0);
373
offsets = rec_get_offsets(infimum_rec, index, NULL,
374
ULINT_UNDEFINED, &heap);
376
heap_top = rec_get_end(infimum_rec, offsets);
378
/* Create then a tuple for supremum */
380
tuple = dtuple_create(heap, 1);
381
dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM);
382
field = dtuple_get_nth_field(tuple, 0);
384
dfield_set_data(field, "supremum", comp ? 8 : 9);
385
dtype_set(dfield_get_type(field),
386
DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9);
388
supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
390
if (UNIV_LIKELY(comp)) {
391
ut_a(supremum_rec == page + PAGE_NEW_SUPREMUM);
393
rec_set_n_owned_new(supremum_rec, NULL, 1);
394
rec_set_heap_no_new(supremum_rec, 1);
396
ut_a(supremum_rec == page + PAGE_OLD_SUPREMUM);
398
rec_set_n_owned_old(supremum_rec, 1);
399
rec_set_heap_no_old(supremum_rec, 1);
402
offsets = rec_get_offsets(supremum_rec, index, offsets,
403
ULINT_UNDEFINED, &heap);
404
heap_top = rec_get_end(supremum_rec, offsets);
406
ut_ad(heap_top == page
407
+ (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END));
411
/* 4. INITIALIZE THE PAGE */
413
page_header_set_field(page, NULL, PAGE_N_DIR_SLOTS, 2);
414
page_header_set_ptr(page, NULL, PAGE_HEAP_TOP, heap_top);
415
page_header_set_field(page, NULL, PAGE_N_HEAP, comp
416
? 0x8000 | PAGE_HEAP_NO_USER_LOW
417
: PAGE_HEAP_NO_USER_LOW);
418
page_header_set_ptr(page, NULL, PAGE_FREE, NULL);
419
page_header_set_field(page, NULL, PAGE_GARBAGE, 0);
420
page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, NULL);
421
page_header_set_field(page, NULL, PAGE_DIRECTION, PAGE_NO_DIRECTION);
422
page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
423
page_header_set_field(page, NULL, PAGE_N_RECS, 0);
424
page_set_max_trx_id(block, NULL, ut_dulint_zero);
425
memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START
426
- page_offset(heap_top));
428
/* 5. SET POINTERS IN RECORDS AND DIR SLOTS */
430
/* Set the slots to point to infimum and supremum. */
432
slot = page_dir_get_nth_slot(page, 0);
433
page_dir_slot_set_rec(slot, infimum_rec);
435
slot = page_dir_get_nth_slot(page, 1);
436
page_dir_slot_set_rec(slot, supremum_rec);
438
/* Set the next pointers in infimum and supremum */
440
if (UNIV_LIKELY(comp)) {
441
rec_set_next_offs_new(infimum_rec, PAGE_NEW_SUPREMUM);
442
rec_set_next_offs_new(supremum_rec, 0);
444
rec_set_next_offs_old(infimum_rec, PAGE_OLD_SUPREMUM);
445
rec_set_next_offs_old(supremum_rec, 0);
451
/**************************************************************
452
Create an uncompressed B-tree index page. */
457
/* out: pointer to the page */
458
buf_block_t* block, /* in: a buffer block where the
460
mtr_t* mtr, /* in: mini-transaction handle */
461
ulint comp) /* in: nonzero=compact page format */
463
page_create_write_log(buf_block_get_frame(block), mtr, comp);
464
return(page_create_low(block, comp));
467
/**************************************************************
468
Create a compressed B-tree index page. */
473
/* out: pointer to the page */
474
buf_block_t* block, /* in/out: a buffer frame where the
476
dict_index_t* index, /* in: the index of the page */
477
ulint level, /* in: the B-tree level of the page */
478
mtr_t* mtr) /* in: mini-transaction handle */
481
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
486
ut_ad(dict_table_is_comp(index->table));
488
page = page_create_low(block, TRUE);
489
mach_write_to_2(page + PAGE_HEADER + PAGE_LEVEL, level);
491
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
492
/* The compression of a newly created page
493
should always succeed. */
500
/*****************************************************************
501
Differs from page_copy_rec_list_end, because this function does not
502
touch the lock table and max trx id on page or compress the page. */
505
page_copy_rec_list_end_no_locks(
506
/*============================*/
507
buf_block_t* new_block, /* in: index page to copy to */
508
buf_block_t* block, /* in: index page of rec */
509
rec_t* rec, /* in: record on page */
510
dict_index_t* index, /* in: record descriptor */
511
mtr_t* mtr) /* in: mtr */
513
page_t* new_page = buf_block_get_frame(new_block);
516
mem_heap_t* heap = NULL;
517
ulint offsets_[REC_OFFS_NORMAL_SIZE];
518
ulint* offsets = offsets_;
519
rec_offs_init(offsets_);
521
page_cur_position(rec, block, &cur1);
523
if (page_cur_is_before_first(&cur1)) {
525
page_cur_move_to_next(&cur1);
528
ut_a((ibool)!!page_is_comp(new_page)
529
== dict_table_is_comp(index->table));
530
ut_a(page_is_comp(new_page) == page_rec_is_comp(rec));
531
ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
532
(page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
534
cur2 = page_get_infimum_rec(buf_block_get_frame(new_block));
536
/* Copy records from the original page to the new page */
538
while (!page_cur_is_after_last(&cur1)) {
539
rec_t* cur1_rec = page_cur_get_rec(&cur1);
541
offsets = rec_get_offsets(cur1_rec, index, offsets,
542
ULINT_UNDEFINED, &heap);
543
ins_rec = page_cur_insert_rec_low(cur2, index,
544
cur1_rec, offsets, mtr);
545
if (UNIV_UNLIKELY(!ins_rec)) {
546
/* Track an assertion failure reported on the mailing
547
list on June 18th, 2003 */
549
buf_page_print(new_page, 0);
550
buf_page_print(page_align(rec), 0);
551
ut_print_timestamp(stderr);
554
"InnoDB: rec offset %lu, cur1 offset %lu,"
555
" cur2 offset %lu\n",
556
(ulong) page_offset(rec),
557
(ulong) page_offset(page_cur_get_rec(&cur1)),
558
(ulong) page_offset(cur2));
562
page_cur_move_to_next(&cur1);
566
if (UNIV_LIKELY_NULL(heap)) {
571
/*****************************************************************
572
Copies records from page to new_page, from a given record onward,
573
including that record. Infimum and supremum records are not copied.
574
The records are copied to the start of the record list on new_page. */
577
page_copy_rec_list_end(
578
/*===================*/
579
/* out: pointer to the original
580
successor of the infimum record
581
on new_page, or NULL on zip overflow
582
(new_block will be decompressed) */
583
buf_block_t* new_block, /* in/out: index page to copy to */
584
buf_block_t* block, /* in: index page containing rec */
585
rec_t* rec, /* in: record on page */
586
dict_index_t* index, /* in: record descriptor */
587
mtr_t* mtr) /* in: mtr */
589
page_t* new_page = buf_block_get_frame(new_block);
590
page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
591
page_t* page = page_align(rec);
592
rec_t* ret = page_rec_get_next(
593
page_get_infimum_rec(new_page));
594
ulint log_mode = 0; /* remove warning */
596
/* page_zip_validate() will fail here if btr_compress()
597
sets FIL_PAGE_PREV to FIL_NULL */
598
ut_ad(buf_block_get_frame(block) == page);
599
ut_ad(page_is_leaf(page) == page_is_leaf(new_page));
600
ut_ad(page_is_comp(page) == page_is_comp(new_page));
602
if (UNIV_LIKELY_NULL(new_page_zip)) {
603
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
606
if (page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW) {
607
page_copy_rec_list_end_to_created_page(new_page, rec,
610
page_copy_rec_list_end_no_locks(new_block, block, rec,
614
if (UNIV_LIKELY_NULL(new_page_zip)) {
615
mtr_set_log_mode(mtr, log_mode);
618
(!page_zip_compress(new_page_zip, new_page, index, mtr))) {
619
/* Before trying to reorganize the page,
620
store the number of preceding records on the page. */
622
= page_rec_get_n_recs_before(ret);
625
(!page_zip_reorganize(new_block, index, mtr))) {
628
(!page_zip_decompress(new_page_zip,
632
ut_ad(page_validate(new_page, index));
635
/* The page was reorganized:
637
ret = new_page + PAGE_NEW_INFIMUM;
640
ret = rec_get_next_ptr(ret, TRUE);
646
/* Update the lock table, MAX_TRX_ID, and possible hash index */
648
lock_move_rec_list_end(new_block, block, rec);
650
page_update_max_trx_id(new_block, new_page_zip,
651
page_get_max_trx_id(page));
653
btr_search_move_or_delete_hash_entries(new_block, block, index);
658
/*****************************************************************
659
Copies records from page to new_page, up to the given record,
660
NOT including that record. Infimum and supremum records are not copied.
661
The records are copied to the end of the record list on new_page. */
664
page_copy_rec_list_start(
665
/*=====================*/
666
/* out: pointer to the original
667
predecessor of the supremum record
668
on new_page, or NULL on zip overflow
669
(new_block will be decompressed) */
670
buf_block_t* new_block, /* in/out: index page to copy to */
671
buf_block_t* block, /* in: index page containing rec */
672
rec_t* rec, /* in: record on page */
673
dict_index_t* index, /* in: record descriptor */
674
mtr_t* mtr) /* in: mtr */
676
page_t* new_page = buf_block_get_frame(new_block);
677
page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
680
ulint log_mode = 0 /* remove warning */;
681
mem_heap_t* heap = NULL;
683
= page_rec_get_prev(page_get_supremum_rec(new_page));
684
ulint offsets_[REC_OFFS_NORMAL_SIZE];
685
ulint* offsets = offsets_;
686
rec_offs_init(offsets_);
688
if (page_rec_is_infimum(rec)) {
693
if (UNIV_LIKELY_NULL(new_page_zip)) {
694
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
697
page_cur_set_before_first(block, &cur1);
698
page_cur_move_to_next(&cur1);
702
/* Copy records from the original page to the new page */
704
while (page_cur_get_rec(&cur1) != rec) {
705
rec_t* cur1_rec = page_cur_get_rec(&cur1);
706
offsets = rec_get_offsets(cur1_rec, index, offsets,
707
ULINT_UNDEFINED, &heap);
708
cur2 = page_cur_insert_rec_low(cur2, index,
709
cur1_rec, offsets, mtr);
712
page_cur_move_to_next(&cur1);
715
if (UNIV_LIKELY_NULL(heap)) {
719
if (UNIV_LIKELY_NULL(new_page_zip)) {
720
mtr_set_log_mode(mtr, log_mode);
723
(!page_zip_compress(new_page_zip, new_page, index, mtr))) {
724
/* Before trying to reorganize the page,
725
store the number of preceding records on the page. */
727
= page_rec_get_n_recs_before(ret);
730
(!page_zip_reorganize(new_block, index, mtr))) {
733
(!page_zip_decompress(new_page_zip,
737
ut_ad(page_validate(new_page, index));
740
/* The page was reorganized:
742
ret = new_page + PAGE_NEW_INFIMUM;
745
ret = rec_get_next_ptr(ret, TRUE);
751
/* Update MAX_TRX_ID, the lock table, and possible hash index */
753
page_update_max_trx_id(new_block, new_page_zip,
754
page_get_max_trx_id(page_align(rec)));
756
lock_move_rec_list_start(new_block, block, rec, ret);
758
btr_search_move_or_delete_hash_entries(new_block, block, index);
763
/**************************************************************
764
Writes a log record of a record list end or start deletion. */
767
page_delete_rec_list_write_log(
768
/*===========================*/
769
rec_t* rec, /* in: record on page */
770
dict_index_t* index, /* in: record descriptor */
771
byte type, /* in: operation type:
772
MLOG_LIST_END_DELETE, ... */
773
mtr_t* mtr) /* in: mtr */
776
ut_ad(type == MLOG_LIST_END_DELETE
777
|| type == MLOG_LIST_START_DELETE
778
|| type == MLOG_COMP_LIST_END_DELETE
779
|| type == MLOG_COMP_LIST_START_DELETE);
781
log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2);
783
/* Write the parameter as a 2-byte ulint */
784
mach_write_to_2(log_ptr, page_offset(rec));
785
mlog_close(mtr, log_ptr + 2);
789
/**************************************************************
790
Parses a log record of a record list end or start deletion. */
793
page_parse_delete_rec_list(
794
/*=======================*/
795
/* out: end of log record or NULL */
796
byte type, /* in: MLOG_LIST_END_DELETE,
797
MLOG_LIST_START_DELETE,
798
MLOG_COMP_LIST_END_DELETE or
799
MLOG_COMP_LIST_START_DELETE */
800
byte* ptr, /* in: buffer */
801
byte* end_ptr,/* in: buffer end */
802
buf_block_t* block, /* in/out: buffer block or NULL */
803
dict_index_t* index, /* in: record descriptor */
804
mtr_t* mtr) /* in: mtr or NULL */
809
ut_ad(type == MLOG_LIST_END_DELETE
810
|| type == MLOG_LIST_START_DELETE
811
|| type == MLOG_COMP_LIST_END_DELETE
812
|| type == MLOG_COMP_LIST_START_DELETE);
814
/* Read the record offset as a 2-byte ulint */
816
if (end_ptr < ptr + 2) {
821
offset = mach_read_from_2(ptr);
829
page = buf_block_get_frame(block);
831
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
833
if (type == MLOG_LIST_END_DELETE
834
|| type == MLOG_COMP_LIST_END_DELETE) {
835
page_delete_rec_list_end(page + offset, block, index,
836
ULINT_UNDEFINED, ULINT_UNDEFINED,
839
page_delete_rec_list_start(page + offset, block, index, mtr);
845
/*****************************************************************
846
Deletes records from a page from a given record onward, including that record.
847
The infimum and supremum records are not deleted. */
850
page_delete_rec_list_end(
851
/*=====================*/
852
rec_t* rec, /* in: pointer to record on page */
853
buf_block_t* block, /* in: buffer block of the page */
854
dict_index_t* index, /* in: record descriptor */
855
ulint n_recs, /* in: number of records to delete,
856
or ULINT_UNDEFINED if not known */
857
ulint size, /* in: the sum of the sizes of the
858
records in the end of the chain to
859
delete, or ULINT_UNDEFINED if not known */
860
mtr_t* mtr) /* in: mtr */
862
page_dir_slot_t*slot;
867
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
868
page_t* page = page_align(rec);
869
mem_heap_t* heap = NULL;
870
ulint offsets_[REC_OFFS_NORMAL_SIZE];
871
ulint* offsets = offsets_;
872
rec_offs_init(offsets_);
874
ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
875
ut_ad(!page_zip || page_rec_is_comp(rec));
876
#ifdef UNIV_ZIP_DEBUG
877
ut_a(!page_zip || page_zip_validate(page_zip, page));
878
#endif /* UNIV_ZIP_DEBUG */
880
if (page_rec_is_infimum(rec)) {
881
rec = page_rec_get_next(rec);
884
if (page_rec_is_supremum(rec)) {
889
/* Reset the last insert info in the page header and increment
890
the modify clock for the frame */
892
page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
894
/* The page gets invalid for optimistic searches: increment the
895
frame modify clock */
897
buf_block_modify_clock_inc(block);
899
page_delete_rec_list_write_log(rec, index, page_is_comp(page)
900
? MLOG_COMP_LIST_END_DELETE
901
: MLOG_LIST_END_DELETE, mtr);
903
if (UNIV_LIKELY_NULL(page_zip)) {
906
ut_a(page_is_comp(page));
907
/* Individual deletes are not logged */
909
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
913
page_cur_position(rec, block, &cur);
915
offsets = rec_get_offsets(rec, index, offsets,
916
ULINT_UNDEFINED, &heap);
917
rec = rec_get_next_ptr(rec, TRUE);
918
#ifdef UNIV_ZIP_DEBUG
919
ut_a(page_zip_validate(page_zip, page));
920
#endif /* UNIV_ZIP_DEBUG */
921
page_cur_delete_rec(&cur, index, offsets, mtr);
922
} while (page_offset(rec) != PAGE_NEW_SUPREMUM);
924
if (UNIV_LIKELY_NULL(heap)) {
928
/* Restore log mode */
930
mtr_set_log_mode(mtr, log_mode);
934
prev_rec = page_rec_get_prev(rec);
936
last_rec = page_rec_get_prev(page_get_supremum_rec(page));
938
if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) {
940
/* Calculate the sum of sizes and the number of records */
946
offsets = rec_get_offsets(rec2, index, offsets,
947
ULINT_UNDEFINED, &heap);
948
s = rec_offs_size(offsets);
949
ut_ad(rec2 - page + s - rec_offs_extra_size(offsets)
951
ut_ad(size + s < UNIV_PAGE_SIZE);
955
rec2 = page_rec_get_next(rec2);
956
} while (!page_rec_is_supremum(rec2));
958
if (UNIV_LIKELY_NULL(heap)) {
963
ut_ad(size < UNIV_PAGE_SIZE);
965
/* Update the page directory; there is no need to balance the number
966
of the records owned by the supremum record, as it is allowed to be
967
less than PAGE_DIR_SLOT_MIN_N_OWNED */
969
if (page_is_comp(page)) {
973
while (rec_get_n_owned_new(rec2) == 0) {
976
rec2 = rec_get_next_ptr(rec2, TRUE);
979
ut_ad(rec_get_n_owned_new(rec2) > count);
981
n_owned = rec_get_n_owned_new(rec2) - count;
982
slot_index = page_dir_find_owner_slot(rec2);
983
slot = page_dir_get_nth_slot(page, slot_index);
988
while (rec_get_n_owned_old(rec2) == 0) {
991
rec2 = rec_get_next_ptr(rec2, FALSE);
994
ut_ad(rec_get_n_owned_old(rec2) > count);
996
n_owned = rec_get_n_owned_old(rec2) - count;
997
slot_index = page_dir_find_owner_slot(rec2);
998
slot = page_dir_get_nth_slot(page, slot_index);
1001
page_dir_slot_set_rec(slot, page_get_supremum_rec(page));
1002
page_dir_slot_set_n_owned(slot, NULL, n_owned);
1004
page_dir_set_n_slots(page, NULL, slot_index + 1);
1006
/* Remove the record chain segment from the record chain */
1007
page_rec_set_next(prev_rec, page_get_supremum_rec(page));
1009
/* Catenate the deleted chain segment to the page free list */
1011
page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE));
1012
page_header_set_ptr(page, NULL, PAGE_FREE, rec);
1014
page_header_set_field(page, NULL, PAGE_GARBAGE, size
1015
+ page_header_get_field(page, PAGE_GARBAGE));
1017
page_header_set_field(page, NULL, PAGE_N_RECS,
1018
(ulint)(page_get_n_recs(page) - n_recs));
1021
/*****************************************************************
1022
Deletes records from page, up to the given record, NOT including
1023
that record. Infimum and supremum records are not deleted. */
1026
page_delete_rec_list_start(
1027
/*=======================*/
1028
rec_t* rec, /* in: record on page */
1029
buf_block_t* block, /* in: buffer block of the page */
1030
dict_index_t* index, /* in: record descriptor */
1031
mtr_t* mtr) /* in: mtr */
1035
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1036
ulint* offsets = offsets_;
1037
mem_heap_t* heap = NULL;
1040
rec_offs_init(offsets_);
1042
ut_ad((ibool) !!page_rec_is_comp(rec)
1043
== dict_table_is_comp(index->table));
1044
/* page_zip_validate() would detect a min_rec_mark mismatch
1045
in btr_page_split_and_insert()
1046
between btr_attach_half_pages() and insert_page = ...
1047
when btr_page_get_split_rec_to_left() holds (direction == FSP_DOWN). */
1049
if (page_rec_is_infimum(rec)) {
1054
if (page_rec_is_comp(rec)) {
1055
type = MLOG_COMP_LIST_START_DELETE;
1057
type = MLOG_LIST_START_DELETE;
1060
page_delete_rec_list_write_log(rec, index, type, mtr);
1062
page_cur_set_before_first(block, &cur1);
1063
page_cur_move_to_next(&cur1);
1065
/* Individual deletes are not logged */
1067
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
1069
while (page_cur_get_rec(&cur1) != rec) {
1070
offsets = rec_get_offsets(page_cur_get_rec(&cur1), index,
1071
offsets, ULINT_UNDEFINED, &heap);
1072
page_cur_delete_rec(&cur1, index, offsets, mtr);
1075
if (UNIV_LIKELY_NULL(heap)) {
1076
mem_heap_free(heap);
1079
/* Restore log mode */
1081
mtr_set_log_mode(mtr, log_mode);
1084
/*****************************************************************
1085
Moves record list end to another page. Moved records include
1089
page_move_rec_list_end(
1090
/*===================*/
1091
/* out: TRUE on success; FALSE on
1093
(new_block will be decompressed) */
1094
buf_block_t* new_block, /* in/out: index page where to move */
1095
buf_block_t* block, /* in: index page from where to move */
1096
rec_t* split_rec, /* in: first record to move */
1097
dict_index_t* index, /* in: record descriptor */
1098
mtr_t* mtr) /* in: mtr */
1100
page_t* new_page = buf_block_get_frame(new_block);
1101
ulint old_data_size;
1102
ulint new_data_size;
1106
old_data_size = page_get_data_size(new_page);
1107
old_n_recs = page_get_n_recs(new_page);
1108
#ifdef UNIV_ZIP_DEBUG
1110
page_zip_des_t* new_page_zip
1111
= buf_block_get_page_zip(new_block);
1112
page_zip_des_t* page_zip
1113
= buf_block_get_page_zip(block);
1114
ut_a(!new_page_zip == !page_zip);
1116
|| page_zip_validate(new_page_zip, new_page));
1118
|| page_zip_validate(page_zip, page_align(split_rec)));
1120
#endif /* UNIV_ZIP_DEBUG */
1122
if (UNIV_UNLIKELY(!page_copy_rec_list_end(new_block, block,
1123
split_rec, index, mtr))) {
1127
new_data_size = page_get_data_size(new_page);
1128
new_n_recs = page_get_n_recs(new_page);
1130
ut_ad(new_data_size >= old_data_size);
1132
page_delete_rec_list_end(split_rec, block, index,
1133
new_n_recs - old_n_recs,
1134
new_data_size - old_data_size, mtr);
1139
/*****************************************************************
1140
Moves record list start to another page. Moved records do not include
1144
page_move_rec_list_start(
1145
/*=====================*/
1146
/* out: TRUE on success; FALSE on
1147
compression failure */
1148
buf_block_t* new_block, /* in/out: index page where to move */
1149
buf_block_t* block, /* in/out: page containing split_rec */
1150
rec_t* split_rec, /* in: first record not to move */
1151
dict_index_t* index, /* in: record descriptor */
1152
mtr_t* mtr) /* in: mtr */
1154
if (UNIV_UNLIKELY(!page_copy_rec_list_start(new_block, block,
1155
split_rec, index, mtr))) {
1159
page_delete_rec_list_start(split_rec, block, index, mtr);
1164
/***************************************************************************
1165
This is a low-level operation which is used in a database index creation
1166
to update the page number of a created B-tree to a data dictionary record. */
1169
page_rec_write_index_page_no(
1170
/*=========================*/
1171
rec_t* rec, /* in: record to update */
1172
ulint i, /* in: index of the field to update */
1173
ulint page_no,/* in: value to write */
1174
mtr_t* mtr) /* in: mtr */
1179
data = rec_get_nth_field_old(rec, i, &len);
1183
mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr);
1186
/******************************************************************
1187
Used to delete n slots from the directory. This function updates
1188
also n_owned fields in the records, so that the first slot after
1189
the deleted ones inherits the records of the deleted slots. */
1192
page_dir_delete_slot(
1193
/*=================*/
1194
page_t* page, /* in/out: the index page */
1195
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
1196
ulint slot_no)/* in: slot to be deleted */
1198
page_dir_slot_t* slot;
1203
ut_ad(!page_zip || page_is_comp(page));
1205
ut_ad(slot_no + 1 < page_dir_get_n_slots(page));
1207
n_slots = page_dir_get_n_slots(page);
1209
/* 1. Reset the n_owned fields of the slots to be
1211
slot = page_dir_get_nth_slot(page, slot_no);
1212
n_owned = page_dir_slot_get_n_owned(slot);
1213
page_dir_slot_set_n_owned(slot, page_zip, 0);
1215
/* 2. Update the n_owned value of the first non-deleted slot */
1217
slot = page_dir_get_nth_slot(page, slot_no + 1);
1218
page_dir_slot_set_n_owned(slot, page_zip,
1219
n_owned + page_dir_slot_get_n_owned(slot));
1221
/* 3. Destroy the slot by copying slots */
1222
for (i = slot_no + 1; i < n_slots; i++) {
1223
rec_t* rec = (rec_t*)
1224
page_dir_slot_get_rec(page_dir_get_nth_slot(page, i));
1225
page_dir_slot_set_rec(page_dir_get_nth_slot(page, i - 1), rec);
1228
/* 4. Zero out the last slot, which will be removed */
1229
mach_write_to_2(page_dir_get_nth_slot(page, n_slots - 1), 0);
1231
/* 5. Update the page header */
1232
page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots - 1);
1235
/******************************************************************
1236
Used to add n slots to the directory. Does not set the record pointers
1237
in the added slots or update n_owned values: this is the responsibility
1243
page_t* page, /* in/out: the index page */
1244
page_zip_des_t* page_zip,/* in/out: comprssed page, or NULL */
1245
ulint start) /* in: the slot above which the new slots
1248
page_dir_slot_t* slot;
1251
n_slots = page_dir_get_n_slots(page);
1253
ut_ad(start < n_slots - 1);
1255
/* Update the page header */
1256
page_dir_set_n_slots(page, page_zip, n_slots + 1);
1259
slot = page_dir_get_nth_slot(page, n_slots);
1260
memmove(slot, slot + PAGE_DIR_SLOT_SIZE,
1261
(n_slots - 1 - start) * PAGE_DIR_SLOT_SIZE);
1264
/********************************************************************
1265
Splits a directory slot which owns too many records. */
1268
page_dir_split_slot(
1269
/*================*/
1270
page_t* page, /* in/out: index page */
1271
page_zip_des_t* page_zip,/* in/out: compressed page whose
1272
uncompressed part will be written, or NULL */
1273
ulint slot_no)/* in: the directory slot */
1276
page_dir_slot_t* new_slot;
1277
page_dir_slot_t* prev_slot;
1278
page_dir_slot_t* slot;
1283
ut_ad(!page_zip || page_is_comp(page));
1286
slot = page_dir_get_nth_slot(page, slot_no);
1288
n_owned = page_dir_slot_get_n_owned(slot);
1289
ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1);
1291
/* 1. We loop to find a record approximately in the middle of the
1292
records owned by the slot. */
1294
prev_slot = page_dir_get_nth_slot(page, slot_no - 1);
1295
rec = (rec_t*) page_dir_slot_get_rec(prev_slot);
1297
for (i = 0; i < n_owned / 2; i++) {
1298
rec = page_rec_get_next(rec);
1301
ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED);
1303
/* 2. We add one directory slot immediately below the slot to be
1306
page_dir_add_slot(page, page_zip, slot_no - 1);
1308
/* The added slot is now number slot_no, and the old slot is
1309
now number slot_no + 1 */
1311
new_slot = page_dir_get_nth_slot(page, slot_no);
1312
slot = page_dir_get_nth_slot(page, slot_no + 1);
1314
/* 3. We store the appropriate values to the new slot. */
1316
page_dir_slot_set_rec(new_slot, rec);
1317
page_dir_slot_set_n_owned(new_slot, page_zip, n_owned / 2);
1319
/* 4. Finally, we update the number of records field of the
1322
page_dir_slot_set_n_owned(slot, page_zip, n_owned - (n_owned / 2));
1325
/*****************************************************************
1326
Tries to balance the given directory slot with too few records with the upper
1327
neighbor, so that there are at least the minimum number of records owned by
1328
the slot; this may result in the merging of two slots. */
1331
page_dir_balance_slot(
1332
/*==================*/
1333
page_t* page, /* in/out: index page */
1334
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
1335
ulint slot_no)/* in: the directory slot */
1337
page_dir_slot_t* slot;
1338
page_dir_slot_t* up_slot;
1345
ut_ad(!page_zip || page_is_comp(page));
1348
slot = page_dir_get_nth_slot(page, slot_no);
1350
/* The last directory slot cannot be balanced with the upper
1351
neighbor, as there is none. */
1353
if (UNIV_UNLIKELY(slot_no == page_dir_get_n_slots(page) - 1)) {
1358
up_slot = page_dir_get_nth_slot(page, slot_no + 1);
1360
n_owned = page_dir_slot_get_n_owned(slot);
1361
up_n_owned = page_dir_slot_get_n_owned(up_slot);
1363
ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1);
1365
/* If the upper slot has the minimum value of n_owned, we will merge
1366
the two slots, therefore we assert: */
1367
ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED);
1369
if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) {
1371
/* In this case we can just transfer one record owned
1372
by the upper slot to the property of the lower slot */
1373
old_rec = (rec_t*) page_dir_slot_get_rec(slot);
1375
if (page_is_comp(page)) {
1376
new_rec = rec_get_next_ptr(old_rec, TRUE);
1378
rec_set_n_owned_new(old_rec, page_zip, 0);
1379
rec_set_n_owned_new(new_rec, page_zip, n_owned + 1);
1381
new_rec = rec_get_next_ptr(old_rec, FALSE);
1383
rec_set_n_owned_old(old_rec, 0);
1384
rec_set_n_owned_old(new_rec, n_owned + 1);
1387
page_dir_slot_set_rec(slot, new_rec);
1389
page_dir_slot_set_n_owned(up_slot, page_zip, up_n_owned -1);
1391
/* In this case we may merge the two slots */
1392
page_dir_delete_slot(page, page_zip, slot_no);
1396
/****************************************************************
1397
Returns the middle record of the record list. If there are an even number
1398
of records in the list, returns the first record of the upper half-list. */
1401
page_get_middle_rec(
1402
/*================*/
1403
/* out: middle record */
1404
page_t* page) /* in: page */
1406
page_dir_slot_t* slot;
1413
/* This many records we must leave behind */
1414
middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
1420
slot = page_dir_get_nth_slot(page, i);
1421
n_owned = page_dir_slot_get_n_owned(slot);
1423
if (count + n_owned > middle) {
1431
slot = page_dir_get_nth_slot(page, i - 1);
1432
rec = (rec_t*) page_dir_slot_get_rec(slot);
1433
rec = page_rec_get_next(rec);
1435
/* There are now count records behind rec */
1437
for (i = 0; i < middle - count; i++) {
1438
rec = page_rec_get_next(rec);
1444
/*******************************************************************
1445
Returns the number of records before the given record in chain.
1446
The number includes infimum and supremum records. */
1449
page_rec_get_n_recs_before(
1450
/*=======================*/
1451
/* out: number of records */
1452
const rec_t* rec) /* in: the physical record */
1454
const page_dir_slot_t* slot;
1455
const rec_t* slot_rec;
1460
ut_ad(page_rec_check(rec));
1462
page = page_align(rec);
1463
if (page_is_comp(page)) {
1464
while (rec_get_n_owned_new(rec) == 0) {
1466
rec = rec_get_next_ptr_const(rec, TRUE);
1470
for (i = 0; ; i++) {
1471
slot = page_dir_get_nth_slot(page, i);
1472
slot_rec = page_dir_slot_get_rec(slot);
1474
n += rec_get_n_owned_new(slot_rec);
1476
if (rec == slot_rec) {
1482
while (rec_get_n_owned_old(rec) == 0) {
1484
rec = rec_get_next_ptr_const(rec, FALSE);
1488
for (i = 0; ; i++) {
1489
slot = page_dir_get_nth_slot(page, i);
1490
slot_rec = page_dir_slot_get_rec(slot);
1492
n += rec_get_n_owned_old(slot_rec);
1494
if (rec == slot_rec) {
1508
/****************************************************************
1509
Prints record contents including the data relevant only in
1510
the index page context. */
1515
const rec_t* rec, /* in: physical record */
1516
const ulint* offsets)/* in: record descriptor */
1518
ut_a(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
1519
rec_print_new(stderr, rec, offsets);
1520
if (page_rec_is_comp(rec)) {
1522
" n_owned: %lu; heap_no: %lu; next rec: %lu\n",
1523
(ulong) rec_get_n_owned_new(rec),
1524
(ulong) rec_get_heap_no_new(rec),
1525
(ulong) rec_get_next_offs(rec, TRUE));
1528
" n_owned: %lu; heap_no: %lu; next rec: %lu\n",
1529
(ulong) rec_get_n_owned_old(rec),
1530
(ulong) rec_get_heap_no_old(rec),
1531
(ulong) rec_get_next_offs(rec, TRUE));
1534
page_rec_check(rec);
1535
rec_validate(rec, offsets);
1538
/*******************************************************************
1539
This is used to print the contents of the directory for
1540
debugging purposes. */
1545
page_t* page, /* in: index page */
1546
ulint pr_n) /* in: print n first and n last entries */
1550
page_dir_slot_t* slot;
1552
n = page_dir_get_n_slots(page);
1554
fprintf(stderr, "--------------------------------\n"
1557
"Directory stack top at offs: %lu; number of slots: %lu\n",
1558
page, (ulong) page_offset(page_dir_get_nth_slot(page, n - 1)),
1560
for (i = 0; i < n; i++) {
1561
slot = page_dir_get_nth_slot(page, i);
1562
if ((i == pr_n) && (i < n - pr_n)) {
1563
fputs(" ... \n", stderr);
1565
if ((i < pr_n) || (i >= n - pr_n)) {
1567
"Contents of slot: %lu: n_owned: %lu,"
1570
(ulong) page_dir_slot_get_n_owned(slot),
1572
page_offset(page_dir_slot_get_rec(slot)));
1575
fprintf(stderr, "Total of %lu records\n"
1576
"--------------------------------\n",
1577
(ulong) (PAGE_HEAP_NO_USER_LOW + page_get_n_recs(page)));
1580
/*******************************************************************
1581
This is used to print the contents of the page record list for
1582
debugging purposes. */
1587
buf_block_t* block, /* in: index page */
1588
dict_index_t* index, /* in: dictionary index of the page */
1589
ulint pr_n) /* in: print n first and n last entries */
1591
page_t* page = block->frame;
1595
mem_heap_t* heap = NULL;
1596
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1597
ulint* offsets = offsets_;
1598
rec_offs_init(offsets_);
1600
ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
1603
"--------------------------------\n"
1604
"PAGE RECORD LIST\n"
1605
"Page address %p\n", page);
1607
n_recs = page_get_n_recs(page);
1609
page_cur_set_before_first(block, &cur);
1612
offsets = rec_get_offsets(cur.rec, index, offsets,
1613
ULINT_UNDEFINED, &heap);
1614
page_rec_print(cur.rec, offsets);
1616
if (count == pr_n) {
1619
if (page_cur_is_after_last(&cur)) {
1622
page_cur_move_to_next(&cur);
1626
if (n_recs > 2 * pr_n) {
1627
fputs(" ... \n", stderr);
1630
while (!page_cur_is_after_last(&cur)) {
1631
page_cur_move_to_next(&cur);
1633
if (count + pr_n >= n_recs) {
1634
offsets = rec_get_offsets(cur.rec, index, offsets,
1635
ULINT_UNDEFINED, &heap);
1636
page_rec_print(cur.rec, offsets);
1642
"Total of %lu records \n"
1643
"--------------------------------\n",
1644
(ulong) (count + 1));
1646
if (UNIV_LIKELY_NULL(heap)) {
1647
mem_heap_free(heap);
1651
/*******************************************************************
1652
Prints the info in a page header. */
1660
"--------------------------------\n"
1661
"PAGE HEADER INFO\n"
1662
"Page address %p, n records %lu (%s)\n"
1663
"n dir slots %lu, heap top %lu\n"
1664
"Page n heap %lu, free %lu, garbage %lu\n"
1665
"Page last insert %lu, direction %lu, n direction %lu\n",
1666
page, (ulong) page_header_get_field(page, PAGE_N_RECS),
1667
page_is_comp(page) ? "compact format" : "original format",
1668
(ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS),
1669
(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
1670
(ulong) page_dir_get_n_heap(page),
1671
(ulong) page_header_get_field(page, PAGE_FREE),
1672
(ulong) page_header_get_field(page, PAGE_GARBAGE),
1673
(ulong) page_header_get_field(page, PAGE_LAST_INSERT),
1674
(ulong) page_header_get_field(page, PAGE_DIRECTION),
1675
(ulong) page_header_get_field(page, PAGE_N_DIRECTION));
1678
/*******************************************************************
1679
This is used to print the contents of the page for
1680
debugging purposes. */
1685
buf_block_t* block, /* in: index page */
1686
dict_index_t* index, /* in: dictionary index of the page */
1687
ulint dn, /* in: print dn first and last entries
1689
ulint rn) /* in: print rn first and last records
1692
page_t* page = block->frame;
1694
page_header_print(page);
1695
page_dir_print(page, dn);
1696
page_print_list(block, index, rn);
1699
/*******************************************************************
1700
The following is used to validate a record on a page. This function
1701
differs from rec_validate as it can also check the n_owned field and
1702
the heap_no field. */
1707
/* out: TRUE if ok */
1708
rec_t* rec, /* in: physical record */
1709
const ulint* offsets)/* in: array returned by rec_get_offsets() */
1715
page = page_align(rec);
1716
ut_a(!page_is_comp(page) == !rec_offs_comp(offsets));
1718
page_rec_check(rec);
1719
rec_validate(rec, offsets);
1721
if (page_rec_is_comp(rec)) {
1722
n_owned = rec_get_n_owned_new(rec);
1723
heap_no = rec_get_heap_no_new(rec);
1725
n_owned = rec_get_n_owned_old(rec);
1726
heap_no = rec_get_heap_no_old(rec);
1729
if (UNIV_UNLIKELY(!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED))) {
1731
"InnoDB: Dir slot of rec %lu, n owned too big %lu\n",
1732
(ulong) page_offset(rec), (ulong) n_owned);
1736
if (UNIV_UNLIKELY(!(heap_no < page_dir_get_n_heap(page)))) {
1738
"InnoDB: Heap no of rec %lu too big %lu %lu\n",
1739
(ulong) page_offset(rec), (ulong) heap_no,
1740
(ulong) page_dir_get_n_heap(page));
1747
/*******************************************************************
1748
Checks that the first directory slot points to the infimum record and
1749
the last to the supremum. This function is intended to track if the
1750
bug fixed in 4.0.14 has caused corruption to users' databases. */
1755
const page_t* page) /* in: index page */
1759
ulint supremum_offs;
1761
n_slots = page_dir_get_n_slots(page);
1762
infimum_offs = mach_read_from_2(page_dir_get_nth_slot(page, 0));
1763
supremum_offs = mach_read_from_2(page_dir_get_nth_slot(page,
1766
if (UNIV_UNLIKELY(!page_rec_is_infimum_low(infimum_offs))) {
1769
"InnoDB: Page directory corruption:"
1770
" infimum not pointed to\n");
1771
buf_page_print(page, 0);
1774
if (UNIV_UNLIKELY(!page_rec_is_supremum_low(supremum_offs))) {
1777
"InnoDB: Page directory corruption:"
1778
" supremum not pointed to\n");
1779
buf_page_print(page, 0);
1783
/*******************************************************************
1784
This function checks the consistency of an index page when we do not
1785
know the index. This is also resilient so that this should never crash
1786
even if the page is total garbage. */
1789
page_simple_validate_old(
1790
/*=====================*/
1791
/* out: TRUE if ok */
1792
page_t* page) /* in: old-style index page */
1794
page_dir_slot_t* slot;
1803
ut_a(!page_is_comp(page));
1805
/* Check first that the record heap and the directory do not
1808
n_slots = page_dir_get_n_slots(page);
1810
if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
1812
"InnoDB: Nonsensical number %lu of page dir slots\n",
1818
rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
1820
if (UNIV_UNLIKELY(rec_heap_top
1821
> page_dir_get_nth_slot(page, n_slots - 1))) {
1824
"InnoDB: Record heap and dir overlap on a page,"
1825
" heap top %lu, dir %lu\n",
1826
(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
1828
page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
1833
/* Validate the record list in a loop checking also that it is
1834
consistent with the page record directory. */
1839
slot = page_dir_get_nth_slot(page, slot_no);
1841
rec = page_get_infimum_rec(page);
1844
if (UNIV_UNLIKELY(rec > rec_heap_top)) {
1846
"InnoDB: Record %lu is above"
1847
" rec heap top %lu\n",
1848
(ulong)(rec - page),
1849
(ulong)(rec_heap_top - page));
1854
if (UNIV_UNLIKELY(rec_get_n_owned_old(rec))) {
1855
/* This is a record pointed to by a dir slot */
1856
if (UNIV_UNLIKELY(rec_get_n_owned_old(rec)
1860
"InnoDB: Wrong owned count %lu, %lu,"
1862
(ulong) rec_get_n_owned_old(rec),
1864
(ulong)(rec - page));
1870
(page_dir_slot_get_rec(slot) != rec)) {
1872
"InnoDB: Dir slot does not point"
1873
" to right rec %lu\n",
1874
(ulong)(rec - page));
1881
if (!page_rec_is_supremum(rec)) {
1883
slot = page_dir_get_nth_slot(page, slot_no);
1887
if (page_rec_is_supremum(rec)) {
1893
(rec_get_next_offs(rec, FALSE) < FIL_PAGE_DATA
1894
|| rec_get_next_offs(rec, FALSE) >= UNIV_PAGE_SIZE)) {
1896
"InnoDB: Next record offset"
1897
" nonsensical %lu for rec %lu\n",
1898
(ulong) rec_get_next_offs(rec, FALSE),
1899
(ulong) (rec - page));
1906
if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
1908
"InnoDB: Page record list appears"
1909
" to be circular %lu\n",
1914
rec = page_rec_get_next(rec);
1918
if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
1919
fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n");
1924
if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
1925
fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
1926
(ulong) slot_no, (ulong) (n_slots - 1));
1930
if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
1931
+ PAGE_HEAP_NO_USER_LOW
1933
fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
1934
(ulong) page_header_get_field(page, PAGE_N_RECS)
1935
+ PAGE_HEAP_NO_USER_LOW,
1936
(ulong) (count + 1));
1941
/* Check then the free list */
1942
rec = page_header_get_ptr(page, PAGE_FREE);
1944
while (rec != NULL) {
1945
if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
1946
|| rec >= page + UNIV_PAGE_SIZE)) {
1948
"InnoDB: Free list record has"
1949
" a nonsensical offset %lu\n",
1950
(ulong) (rec - page));
1955
if (UNIV_UNLIKELY(rec > rec_heap_top)) {
1957
"InnoDB: Free list record %lu"
1958
" is above rec heap top %lu\n",
1959
(ulong) (rec - page),
1960
(ulong) (rec_heap_top - page));
1967
if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
1969
"InnoDB: Page free list appears"
1970
" to be circular %lu\n",
1975
rec = page_rec_get_next(rec);
1978
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
1980
fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
1981
(ulong) page_dir_get_n_heap(page),
1982
(ulong) (count + 1));
1993
/*******************************************************************
1994
This function checks the consistency of an index page when we do not
1995
know the index. This is also resilient so that this should never crash
1996
even if the page is total garbage. */
1999
page_simple_validate_new(
2000
/*=====================*/
2001
/* out: TRUE if ok */
2002
page_t* page) /* in: new-style index page */
2004
page_dir_slot_t* slot;
2013
ut_a(page_is_comp(page));
2015
/* Check first that the record heap and the directory do not
2018
n_slots = page_dir_get_n_slots(page);
2020
if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
2022
"InnoDB: Nonsensical number %lu"
2023
" of page dir slots\n", (ulong) n_slots);
2028
rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
2030
if (UNIV_UNLIKELY(rec_heap_top
2031
> page_dir_get_nth_slot(page, n_slots - 1))) {
2034
"InnoDB: Record heap and dir overlap on a page,"
2035
" heap top %lu, dir %lu\n",
2036
(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
2038
page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
2043
/* Validate the record list in a loop checking also that it is
2044
consistent with the page record directory. */
2049
slot = page_dir_get_nth_slot(page, slot_no);
2051
rec = page_get_infimum_rec(page);
2054
if (UNIV_UNLIKELY(rec > rec_heap_top)) {
2056
"InnoDB: Record %lu is above rec"
2058
(ulong) page_offset(rec),
2059
(ulong) page_offset(rec_heap_top));
2064
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
2065
/* This is a record pointed to by a dir slot */
2066
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec)
2070
"InnoDB: Wrong owned count %lu, %lu,"
2072
(ulong) rec_get_n_owned_new(rec),
2074
(ulong) page_offset(rec));
2080
(page_dir_slot_get_rec(slot) != rec)) {
2082
"InnoDB: Dir slot does not point"
2083
" to right rec %lu\n",
2084
(ulong) page_offset(rec));
2091
if (!page_rec_is_supremum(rec)) {
2093
slot = page_dir_get_nth_slot(page, slot_no);
2097
if (page_rec_is_supremum(rec)) {
2103
(rec_get_next_offs(rec, TRUE) < FIL_PAGE_DATA
2104
|| rec_get_next_offs(rec, TRUE) >= UNIV_PAGE_SIZE)) {
2106
"InnoDB: Next record offset nonsensical %lu"
2108
(ulong) rec_get_next_offs(rec, TRUE),
2109
(ulong) page_offset(rec));
2116
if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
2118
"InnoDB: Page record list appears"
2119
" to be circular %lu\n",
2124
rec = page_rec_get_next(rec);
2128
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
2129
fprintf(stderr, "InnoDB: n owned is zero"
2130
" in a supremum rec\n");
2135
if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
2136
fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
2137
(ulong) slot_no, (ulong) (n_slots - 1));
2141
if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
2142
+ PAGE_HEAP_NO_USER_LOW
2144
fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
2145
(ulong) page_header_get_field(page, PAGE_N_RECS)
2146
+ PAGE_HEAP_NO_USER_LOW,
2147
(ulong) (count + 1));
2152
/* Check then the free list */
2153
rec = page_header_get_ptr(page, PAGE_FREE);
2155
while (rec != NULL) {
2156
if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
2157
|| rec >= page + UNIV_PAGE_SIZE)) {
2159
"InnoDB: Free list record has"
2160
" a nonsensical offset %lu\n",
2161
(ulong) page_offset(rec));
2166
if (UNIV_UNLIKELY(rec > rec_heap_top)) {
2168
"InnoDB: Free list record %lu"
2169
" is above rec heap top %lu\n",
2170
(ulong) page_offset(rec),
2171
(ulong) page_offset(rec_heap_top));
2178
if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
2180
"InnoDB: Page free list appears"
2181
" to be circular %lu\n",
2186
rec = page_rec_get_next(rec);
2189
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
2191
fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
2192
(ulong) page_dir_get_n_heap(page),
2193
(ulong) (count + 1));
2204
/*******************************************************************
2205
This function checks the consistency of an index page. */
2210
/* out: TRUE if ok */
2211
page_t* page, /* in: index page */
2212
dict_index_t* index) /* in: data dictionary index containing
2213
the page record type definition */
2215
page_dir_slot_t*slot;
2220
ulint rec_own_count;
2224
rec_t* old_rec = NULL;
2229
ulint* offsets = NULL;
2230
ulint* old_offsets = NULL;
2232
if (UNIV_UNLIKELY((ibool) !!page_is_comp(page)
2233
!= dict_table_is_comp(index->table))) {
2234
fputs("InnoDB: 'compact format' flag mismatch\n", stderr);
2237
if (page_is_comp(page)) {
2238
if (UNIV_UNLIKELY(!page_simple_validate_new(page))) {
2242
if (UNIV_UNLIKELY(!page_simple_validate_old(page))) {
2247
heap = mem_heap_create(UNIV_PAGE_SIZE + 200);
2249
/* The following buffer is used to check that the
2250
records in the page record heap do not overlap */
2252
buf = mem_heap_zalloc(heap, UNIV_PAGE_SIZE);
2254
/* Check first that the record heap and the directory do not
2257
n_slots = page_dir_get_n_slots(page);
2259
if (UNIV_UNLIKELY(!(page_header_get_ptr(page, PAGE_HEAP_TOP)
2260
<= page_dir_get_nth_slot(page, n_slots - 1)))) {
2262
fputs("InnoDB: Record heap and dir overlap on a page ",
2264
dict_index_name_print(stderr, NULL, index);
2265
fprintf(stderr, ", %p, %p\n",
2266
page_header_get_ptr(page, PAGE_HEAP_TOP),
2267
page_dir_get_nth_slot(page, n_slots - 1));
2272
/* Validate the record list in a loop checking also that
2273
it is consistent with the directory. */
2278
slot = page_dir_get_nth_slot(page, slot_no);
2280
rec = page_get_infimum_rec(page);
2283
offsets = rec_get_offsets(rec, index, offsets,
2284
ULINT_UNDEFINED, &heap);
2286
if (page_is_comp(page) && page_rec_is_user_rec(rec)
2287
&& UNIV_UNLIKELY(rec_get_node_ptr_flag(rec)
2288
== page_is_leaf(page))) {
2289
fputs("InnoDB: node_ptr flag mismatch\n", stderr);
2293
if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
2297
/* Check that the records are in the ascending order */
2298
if (UNIV_LIKELY(count >= PAGE_HEAP_NO_USER_LOW)
2299
&& !page_rec_is_supremum(rec)) {
2301
(1 != cmp_rec_rec(rec, old_rec,
2302
offsets, old_offsets, index))) {
2304
"InnoDB: Records in wrong order"
2306
(ulong) page_get_page_no(page));
2307
dict_index_name_print(stderr, NULL, index);
2308
fputs("\nInnoDB: previous record ", stderr);
2309
rec_print_new(stderr, old_rec, old_offsets);
2310
fputs("\nInnoDB: record ", stderr);
2311
rec_print_new(stderr, rec, offsets);
2318
if (page_rec_is_user_rec(rec)) {
2320
data_size += rec_offs_size(offsets);
2323
offs = page_offset(rec_get_start(rec, offsets));
2325
for (i = rec_offs_size(offsets); i--; ) {
2326
if (UNIV_UNLIKELY(buf[offs + i])) {
2327
/* No other record may overlap this */
2329
fputs("InnoDB: Record overlaps another\n",
2337
if (page_is_comp(page)) {
2338
rec_own_count = rec_get_n_owned_new(rec);
2340
rec_own_count = rec_get_n_owned_old(rec);
2343
if (UNIV_UNLIKELY(rec_own_count)) {
2344
/* This is a record pointed to by a dir slot */
2345
if (UNIV_UNLIKELY(rec_own_count != own_count)) {
2347
"InnoDB: Wrong owned count %lu, %lu\n",
2348
(ulong) rec_own_count,
2353
if (page_dir_slot_get_rec(slot) != rec) {
2354
fputs("InnoDB: Dir slot does not"
2355
" point to right rec\n",
2360
page_dir_slot_check(slot);
2363
if (!page_rec_is_supremum(rec)) {
2365
slot = page_dir_get_nth_slot(page, slot_no);
2369
if (page_rec_is_supremum(rec)) {
2376
rec = page_rec_get_next(rec);
2378
/* set old_offsets to offsets; recycle offsets */
2380
ulint* offs = old_offsets;
2381
old_offsets = offsets;
2386
if (page_is_comp(page)) {
2387
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
2391
} else if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
2393
fputs("InnoDB: n owned is zero\n", stderr);
2397
if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
2398
fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n",
2399
(ulong) slot_no, (ulong) (n_slots - 1));
2403
if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
2404
+ PAGE_HEAP_NO_USER_LOW
2406
fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
2407
(ulong) page_header_get_field(page, PAGE_N_RECS)
2408
+ PAGE_HEAP_NO_USER_LOW,
2409
(ulong) (count + 1));
2413
if (UNIV_UNLIKELY(data_size != page_get_data_size(page))) {
2415
"InnoDB: Summed data size %lu, returned by func %lu\n",
2416
(ulong) data_size, (ulong) page_get_data_size(page));
2420
/* Check then the free list */
2421
rec = page_header_get_ptr(page, PAGE_FREE);
2423
while (rec != NULL) {
2424
offsets = rec_get_offsets(rec, index, offsets,
2425
ULINT_UNDEFINED, &heap);
2426
if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
2432
offs = page_offset(rec_get_start(rec, offsets));
2434
for (i = rec_offs_size(offsets); i--; ) {
2436
if (UNIV_UNLIKELY(buf[offs + i])) {
2437
fputs("InnoDB: Record overlaps another"
2438
" in free list\n", stderr);
2445
rec = page_rec_get_next(rec);
2448
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
2449
fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n",
2450
(ulong) page_dir_get_n_heap(page),
2458
mem_heap_free(heap);
2460
if (UNIV_UNLIKELY(ret == FALSE)) {
2462
fprintf(stderr, "InnoDB: Apparent corruption in page %lu in ",
2463
(ulong) page_get_page_no(page));
2464
dict_index_name_print(stderr, NULL, index);
2466
buf_page_print(page, 0);
2472
/*******************************************************************
2473
Looks in the page record list for a record with the given heap number. */
2476
page_find_rec_with_heap_no(
2477
/*=======================*/
2478
/* out: record, NULL if not found */
2479
const page_t* page, /* in: index page */
2480
ulint heap_no)/* in: heap number */
2484
if (page_is_comp(page)) {
2485
rec = page + PAGE_NEW_INFIMUM;
2488
ulint rec_heap_no = rec_get_heap_no_new(rec);
2490
if (rec_heap_no == heap_no) {
2493
} else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2498
rec = page + rec_get_next_offs(rec, TRUE);
2501
rec = page + PAGE_OLD_INFIMUM;
2504
ulint rec_heap_no = rec_get_heap_no_old(rec);
2506
if (rec_heap_no == heap_no) {
2509
} else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2514
rec = page + rec_get_next_offs(rec, FALSE);