1
/************************************************************************
2
The index tree adaptive search
6
Created 2/17/1996 Heikki Tuuri
7
*************************************************************************/
15
#include "page0page.h"
22
ulint btr_search_this_is_zero = 0; /* A dummy variable to fool the
25
#ifdef UNIV_SEARCH_PERF_STAT
26
ulint btr_search_n_succ = 0;
27
ulint btr_search_n_hash_fail = 0;
28
#endif /* UNIV_SEARCH_PERF_STAT */
30
byte btr_sea_pad1[64]; /* padding to prevent other memory update
31
hotspots from residing on the same memory
32
cache line as btr_search_latch */
34
/* The latch protecting the adaptive search system: this latch protects the
35
(1) positions of records on those pages where a hash index has been built.
36
NOTE: It does not protect values of non-ordering fields within a record from
37
being updated in-place! We can use fact (1) to perform unique searches to
40
rw_lock_t* btr_search_latch_temp; /* We will allocate the latch from
41
dynamic memory to get it to the
42
same DRAM page as other hotspot
45
byte btr_sea_pad2[64]; /* padding to prevent other memory update
46
hotspots from residing on the same memory
49
btr_search_sys_t* btr_search_sys;
51
/* If the number of records on the page divided by this parameter
52
would have been successfully accessed using a hash index, the index
53
is then built on the page, assuming the global limit has been reached */
55
#define BTR_SEARCH_PAGE_BUILD_LIMIT 16
57
/* The global limit for consecutive potentially successful hash searches,
58
before hash index building is started */
60
#define BTR_SEARCH_BUILD_LIMIT 100
62
/************************************************************************
63
Builds a hash index on a page with the given parameters. If the page already
64
has a hash index with different parameters, the old hash index is removed.
65
If index is non-NULL, this function checks if n_fields and n_bytes are
66
sensible values, and does not build a hash index if not. */
69
btr_search_build_page_hash_index(
70
/*=============================*/
71
dict_index_t* index, /* in: index for which to build, or NULL if
73
page_t* page, /* in: index page, s- or x-latched */
74
ulint n_fields,/* in: hash this many full fields */
75
ulint n_bytes,/* in: hash this many bytes from the next
77
ibool left_side);/* in: hash for searches from left side? */
79
/*********************************************************************
80
This function should be called before reserving any btr search mutex, if
81
the intended operation might add nodes to the search system hash table.
82
Because of the latching order, once we have reserved the btr search system
83
latch, we cannot allocate a free frame from the buffer pool. Checks that
84
there is a free buffer frame allocated for hash table heap in the btr search
85
system. If not, allocates a free frames for the heap. This check makes it
86
probable that, when have reserved the btr search system latch and we need to
87
allocate a new node to the hash table, it will succeed. However, the check
88
will not guarantee success. */
91
btr_search_check_free_space_in_heap(void)
92
/*=====================================*/
98
#ifdef UNIV_SYNC_DEBUG
99
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
100
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
101
#endif /* UNIV_SYNC_DEBUG */
103
table = btr_search_sys->hash_index;
107
/* Note that we peek the value of heap->free_block without reserving
108
the latch: this is ok, because we will not guarantee that there will
109
be enough free space in the hash table. */
111
if (heap->free_block == NULL) {
112
frame = buf_frame_alloc();
114
rw_lock_x_lock(&btr_search_latch);
116
if (heap->free_block == NULL) {
117
heap->free_block = frame;
119
buf_frame_free(frame);
122
rw_lock_x_unlock(&btr_search_latch);
126
/*********************************************************************
127
Creates and initializes the adaptive search system at a database start. */
130
btr_search_sys_create(
131
/*==================*/
132
ulint hash_size) /* in: hash index hash table size */
134
/* We allocate the search latch from dynamic memory:
135
see above at the global variable definition */
137
btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t));
139
rw_lock_create(&btr_search_latch, SYNC_SEARCH_SYS);
141
btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));
143
btr_search_sys->hash_index = ha_create(TRUE, hash_size, 0, 0);
147
/*********************************************************************
148
Creates and initializes a search info struct. */
151
btr_search_info_create(
152
/*===================*/
153
/* out, own: search info struct */
154
mem_heap_t* heap) /* in: heap where created */
158
info = mem_heap_alloc(heap, sizeof(btr_search_t));
161
info->magic_n = BTR_SEARCH_MAGIC_N;
162
#endif /* UNIV_DEBUG */
164
info->root_guess = NULL;
166
info->hash_analysis = 0;
167
info->n_hash_potential = 0;
169
info->last_hash_succ = FALSE;
171
#ifdef UNIV_SEARCH_PERF_STAT
172
info->n_hash_succ = 0;
173
info->n_hash_fail = 0;
174
info->n_patt_succ = 0;
175
info->n_searches = 0;
176
#endif /* UNIV_SEARCH_PERF_STAT */
178
/* Set some sensible values */
182
info->left_side = TRUE;
187
/*************************************************************************
188
Updates the search info of an index about hash successes. NOTE that info
189
is NOT protected by any semaphore, to save CPU time! Do not assume its fields
193
btr_search_info_update_hash(
194
/*========================*/
195
btr_search_t* info, /* in/out: search info */
196
btr_cur_t* cursor) /* in: cursor which was just positioned */
202
#ifdef UNIV_SYNC_DEBUG
203
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
204
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
205
#endif /* UNIV_SYNC_DEBUG */
207
index = cursor->index;
209
if (index->type & DICT_IBUF) {
210
/* So many deletes are performed on an insert buffer tree
211
that we do not consider a hash index useful on it: */
216
n_unique = dict_index_get_n_unique_in_tree(index);
218
if (info->n_hash_potential == 0) {
223
/* Test if the search would have succeeded using the recommended
226
if (info->n_fields >= n_unique && cursor->up_match >= n_unique) {
228
info->n_hash_potential++;
233
cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
234
cursor->low_match, cursor->low_bytes);
236
if (info->left_side ? cmp <= 0 : cmp > 0) {
241
cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
242
cursor->up_match, cursor->up_bytes);
244
if (info->left_side ? cmp <= 0 : cmp > 0) {
246
goto increment_potential;
250
/* We have to set a new recommendation; skip the hash analysis
251
for a while to avoid unnecessary CPU time usage when there is no
252
chance for success */
254
info->hash_analysis = 0;
256
cmp = ut_pair_cmp(cursor->up_match, cursor->up_bytes,
257
cursor->low_match, cursor->low_bytes);
259
info->n_hash_potential = 0;
261
/* For extra safety, we set some sensible values here */
266
info->left_side = TRUE;
268
} else if (cmp > 0) {
269
info->n_hash_potential = 1;
271
if (cursor->up_match >= n_unique) {
273
info->n_fields = n_unique;
276
} else if (cursor->low_match < cursor->up_match) {
278
info->n_fields = cursor->low_match + 1;
281
info->n_fields = cursor->low_match;
282
info->n_bytes = cursor->low_bytes + 1;
285
info->left_side = TRUE;
287
info->n_hash_potential = 1;
289
if (cursor->low_match >= n_unique) {
291
info->n_fields = n_unique;
294
} else if (cursor->low_match > cursor->up_match) {
296
info->n_fields = cursor->up_match + 1;
299
info->n_fields = cursor->up_match;
300
info->n_bytes = cursor->up_bytes + 1;
303
info->left_side = FALSE;
307
/*************************************************************************
308
Updates the block search info on hash successes. NOTE that info and
309
block->n_hash_helps, n_fields, n_bytes, side are NOT protected by any
310
semaphore, to save CPU time! Do not assume the fields are consistent. */
313
btr_search_update_block_hash_info(
314
/*==============================*/
315
/* out: TRUE if building a (new) hash index on
316
the block is recommended */
317
btr_search_t* info, /* in: search info */
318
buf_block_t* block, /* in: buffer block */
319
btr_cur_t* cursor __attribute__((unused)))
322
#ifdef UNIV_SYNC_DEBUG
323
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
324
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
325
ut_ad(rw_lock_own(&((buf_block_t*) block)->lock, RW_LOCK_SHARED)
326
|| rw_lock_own(&((buf_block_t*) block)->lock, RW_LOCK_EX));
327
#endif /* UNIV_SYNC_DEBUG */
330
info->last_hash_succ = FALSE;
332
ut_a(block->magic_n == BUF_BLOCK_MAGIC_N);
333
ut_ad(info->magic_n == BTR_SEARCH_MAGIC_N);
335
if ((block->n_hash_helps > 0)
336
&& (info->n_hash_potential > 0)
337
&& (block->n_fields == info->n_fields)
338
&& (block->n_bytes == info->n_bytes)
339
&& (block->left_side == info->left_side)) {
341
if ((block->is_hashed)
342
&& (block->curr_n_fields == info->n_fields)
343
&& (block->curr_n_bytes == info->n_bytes)
344
&& (block->curr_left_side == info->left_side)) {
346
/* The search would presumably have succeeded using
349
info->last_hash_succ = TRUE;
352
block->n_hash_helps++;
354
block->n_hash_helps = 1;
355
block->n_fields = info->n_fields;
356
block->n_bytes = info->n_bytes;
357
block->left_side = info->left_side;
361
if (cursor->index->table->does_not_fit_in_memory) {
362
block->n_hash_helps = 0;
364
#endif /* UNIV_DEBUG */
366
if ((block->n_hash_helps > page_get_n_recs(block->frame)
367
/ BTR_SEARCH_PAGE_BUILD_LIMIT)
368
&& (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) {
370
if ((!block->is_hashed)
371
|| (block->n_hash_helps
372
> 2 * page_get_n_recs(block->frame))
373
|| (block->n_fields != block->curr_n_fields)
374
|| (block->n_bytes != block->curr_n_bytes)
375
|| (block->left_side != block->curr_left_side)) {
377
/* Build a new hash index on the page */
386
/*************************************************************************
387
Updates a hash node reference when it has been unsuccessfully used in a
388
search which could have succeeded with the used hash parameters. This can
389
happen because when building a hash index for a page, we do not check
390
what happens at page boundaries, and therefore there can be misleading
391
hash nodes. Also, collisions in the fold value can lead to misleading
392
references. This function lazily fixes these imperfections in the hash
396
btr_search_update_hash_ref(
397
/*=======================*/
398
btr_search_t* info, /* in: search info */
399
buf_block_t* block, /* in: buffer block where cursor positioned */
400
btr_cur_t* cursor) /* in: cursor */
406
ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
407
#ifdef UNIV_SYNC_DEBUG
408
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
409
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
410
|| rw_lock_own(&(block->lock), RW_LOCK_EX));
411
#endif /* UNIV_SYNC_DEBUG */
412
ut_ad(buf_block_align(btr_cur_get_rec(cursor)) == block);
413
ut_a(!block->is_hashed || block->index == cursor->index);
416
&& (info->n_hash_potential > 0)
417
&& (block->curr_n_fields == info->n_fields)
418
&& (block->curr_n_bytes == info->n_bytes)
419
&& (block->curr_left_side == info->left_side)) {
420
mem_heap_t* heap = NULL;
421
ulint offsets_[REC_OFFS_NORMAL_SIZE];
422
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
424
rec = btr_cur_get_rec(cursor);
426
if (!page_rec_is_user_rec(rec)) {
431
index_id = cursor->index->id;
433
rec_get_offsets(rec, cursor->index, offsets_,
434
ULINT_UNDEFINED, &heap),
435
block->curr_n_fields,
436
block->curr_n_bytes, index_id);
437
if (UNIV_LIKELY_NULL(heap)) {
440
#ifdef UNIV_SYNC_DEBUG
441
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
442
#endif /* UNIV_SYNC_DEBUG */
444
ha_insert_for_fold(btr_search_sys->hash_index, fold, rec);
448
/*************************************************************************
449
Updates the search info. */
452
btr_search_info_update_slow(
453
/*========================*/
454
btr_search_t* info, /* in/out: search info */
455
btr_cur_t* cursor) /* in: cursor which was just positioned */
462
#ifdef UNIV_SYNC_DEBUG
463
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
464
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
465
#endif /* UNIV_SYNC_DEBUG */
467
block = buf_block_align(btr_cur_get_rec(cursor));
469
/* NOTE that the following two function calls do NOT protect
470
info or block->n_fields etc. with any semaphore, to save CPU time!
471
We cannot assume the fields are consistent when we return from
474
btr_search_info_update_hash(info, cursor);
476
build_index = btr_search_update_block_hash_info(info, block, cursor);
478
if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
480
btr_search_check_free_space_in_heap();
483
if (cursor->flag == BTR_CUR_HASH_FAIL) {
484
/* Update the hash node reference, if appropriate */
486
#ifdef UNIV_SEARCH_PERF_STAT
487
btr_search_n_hash_fail++;
488
#endif /* UNIV_SEARCH_PERF_STAT */
490
rw_lock_x_lock(&btr_search_latch);
492
btr_search_update_hash_ref(info, block, cursor);
494
rw_lock_x_unlock(&btr_search_latch);
498
/* Note that since we did not protect block->n_fields etc.
499
with any semaphore, the values can be inconsistent. We have
500
to check inside the function call that they make sense. We
501
also malloc an array and store the values there to make sure
502
the compiler does not let the function call parameters change
503
inside the called function. It might be that the compiler
504
would optimize the call just to pass pointers to block. */
506
params = mem_alloc(3 * sizeof(ulint));
507
params[0] = block->n_fields;
508
params[1] = block->n_bytes;
509
params[2] = block->left_side;
511
/* Make sure the compiler cannot deduce the values and do
514
params2 = params + btr_search_this_is_zero;
516
btr_search_build_page_hash_index(cursor->index,
525
/**********************************************************************
526
Checks if a guessed position for a tree cursor is right. Note that if
527
mode is PAGE_CUR_LE, which is used in inserts, and the function returns
528
TRUE, then cursor->up_match and cursor->low_match both have sensible values. */
531
btr_search_check_guess(
532
/*===================*/
533
/* out: TRUE if success */
534
btr_cur_t* cursor, /* in: guessed cursor position */
535
ibool can_only_compare_to_cursor_rec,
536
/* in: if we do not have a latch on the page
537
of cursor, but only a latch on
538
btr_search_latch, then ONLY the columns
539
of the record UNDER the cursor are
540
protected, not the next or previous record
541
in the chain: we cannot look at the next or
542
previous record to check our guess! */
543
dtuple_t* tuple, /* in: data tuple */
544
ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
546
mtr_t* mtr) /* in: mtr */
553
mem_heap_t* heap = NULL;
554
ulint offsets_[REC_OFFS_NORMAL_SIZE];
555
ulint* offsets = offsets_;
556
ibool success = FALSE;
557
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
559
n_unique = dict_index_get_n_unique_in_tree(cursor->index);
561
rec = btr_cur_get_rec(cursor);
563
ut_ad(page_rec_is_user_rec(rec));
568
offsets = rec_get_offsets(rec, cursor->index, offsets,
570
cmp = page_cmp_dtuple_rec_with_match(tuple, rec,
571
offsets, &match, &bytes);
573
if (mode == PAGE_CUR_GE) {
578
cursor->up_match = match;
580
if (match >= n_unique) {
584
} else if (mode == PAGE_CUR_LE) {
589
cursor->low_match = match;
591
} else if (mode == PAGE_CUR_G) {
595
} else if (mode == PAGE_CUR_L) {
601
if (can_only_compare_to_cursor_rec) {
602
/* Since we could not determine if our guess is right just by
603
looking at the record under the cursor, return FALSE */
610
if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) {
613
ut_ad(!page_rec_is_infimum(rec));
615
prev_rec = page_rec_get_prev(rec);
617
if (page_rec_is_infimum(prev_rec)) {
618
success = btr_page_get_prev(
619
buf_frame_align(prev_rec), mtr) == FIL_NULL;
624
offsets = rec_get_offsets(prev_rec, cursor->index, offsets,
626
cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec,
627
offsets, &match, &bytes);
628
if (mode == PAGE_CUR_GE) {
638
ut_ad(!page_rec_is_supremum(rec));
640
next_rec = page_rec_get_next(rec);
642
if (page_rec_is_supremum(next_rec)) {
643
if (btr_page_get_next(
644
buf_frame_align(next_rec), mtr)
647
cursor->up_match = 0;
654
offsets = rec_get_offsets(next_rec, cursor->index, offsets,
656
cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec,
657
offsets, &match, &bytes);
658
if (mode == PAGE_CUR_LE) {
660
cursor->up_match = match;
666
if (UNIV_LIKELY_NULL(heap)) {
672
/**********************************************************************
673
Tries to guess the right search position based on the hash search info
674
of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
675
and the function returns TRUE, then cursor->up_match and cursor->low_match
676
both have sensible values. */
679
btr_search_guess_on_hash(
680
/*=====================*/
681
/* out: TRUE if succeeded */
682
dict_index_t* index, /* in: index */
683
btr_search_t* info, /* in: index search info */
684
dtuple_t* tuple, /* in: logical record */
685
ulint mode, /* in: PAGE_CUR_L, ... */
686
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ...;
687
NOTE that only if has_search_latch
688
is 0, we will have a latch set on
689
the cursor page, otherwise we assume
690
the caller uses his search latch
691
to protect the record! */
692
btr_cur_t* cursor, /* out: tree cursor */
693
ulint has_search_latch,/* in: latch mode the caller
694
currently has on btr_search_latch:
695
RW_S_LATCH, RW_X_LATCH, or 0 */
696
mtr_t* mtr) /* in: mtr */
702
ulint tuple_n_fields;
704
ibool can_only_compare_to_cursor_rec = TRUE;
709
ut_ad(index && info && tuple && cursor && mtr);
710
ut_ad((latch_mode == BTR_SEARCH_LEAF)
711
|| (latch_mode == BTR_MODIFY_LEAF));
713
/* Note that, for efficiency, the struct info may not be protected by
716
if (UNIV_UNLIKELY(info->n_hash_potential == 0)) {
721
cursor->n_fields = info->n_fields;
722
cursor->n_bytes = info->n_bytes;
724
tuple_n_fields = dtuple_get_n_fields(tuple);
726
if (UNIV_UNLIKELY(tuple_n_fields < cursor->n_fields)) {
731
if (UNIV_UNLIKELY(tuple_n_fields == cursor->n_fields)
732
&& (cursor->n_bytes > 0)) {
737
index_id = index->id;
739
#ifdef UNIV_SEARCH_PERF_STAT
742
fold = dtuple_fold(tuple, cursor->n_fields, cursor->n_bytes, index_id);
745
cursor->flag = BTR_CUR_HASH;
747
if (UNIV_LIKELY(!has_search_latch)) {
748
rw_lock_s_lock(&btr_search_latch);
751
ut_ad(btr_search_latch.writer != RW_LOCK_EX);
752
ut_ad(btr_search_latch.reader_count > 0);
754
rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);
756
if (UNIV_UNLIKELY(!rec)) {
760
page = buf_frame_align(rec);
762
if (UNIV_LIKELY(!has_search_latch)) {
765
!buf_page_get_known_nowait(latch_mode, page,
772
rw_lock_s_unlock(&btr_search_latch);
773
can_only_compare_to_cursor_rec = FALSE;
775
#ifdef UNIV_SYNC_DEBUG
776
buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
777
#endif /* UNIV_SYNC_DEBUG */
780
block = buf_block_align(page);
782
if (UNIV_UNLIKELY(block->state == BUF_BLOCK_REMOVE_HASH)) {
783
if (UNIV_LIKELY(!has_search_latch)) {
785
btr_leaf_page_release(page, latch_mode, mtr);
791
ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
792
ut_ad(page_rec_is_user_rec(rec));
794
btr_cur_position(index, rec, cursor);
796
/* Check the validity of the guess within the page */
798
/* If we only have the latch on btr_search_latch, not on the
799
page, it only protects the columns of the record the cursor
800
is positioned on. We cannot look at the next of the previous
801
record to determine if our guess for the cursor position is
804
ut_dulint_cmp(index_id, btr_page_get_index_id(page)), 0)
805
|| !btr_search_check_guess(cursor,
806
can_only_compare_to_cursor_rec,
808
if (UNIV_LIKELY(!has_search_latch)) {
809
btr_leaf_page_release(page, latch_mode, mtr);
815
if (UNIV_LIKELY(info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5)) {
817
info->n_hash_potential++;
821
/* These lines of code can be used in a debug version to check
822
the correctness of the searched cursor position: */
824
info->last_hash_succ = FALSE;
826
/* Currently, does not work if the following fails: */
827
ut_ad(!has_search_latch);
829
btr_leaf_page_release(page, latch_mode, mtr);
831
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
833
if (mode == PAGE_CUR_GE
834
&& page_rec_is_supremum(btr_cur_get_rec(&cursor2))) {
836
/* If mode is PAGE_CUR_GE, then the binary search
837
in the index tree may actually take us to the supremum
838
of the previous page */
840
info->last_hash_succ = FALSE;
842
btr_pcur_open_on_user_rec(index, tuple, mode, latch_mode,
844
ut_ad(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor));
846
ut_ad(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
849
/* NOTE that it is theoretically possible that the above assertions
850
fail if the page of the cursor gets removed from the buffer pool
851
meanwhile! Thus it might not be a bug. */
853
info->last_hash_succ = TRUE;
855
#ifdef UNIV_SEARCH_PERF_STAT
858
if (UNIV_LIKELY(!has_search_latch)
859
&& buf_block_peek_if_too_old(block)) {
861
buf_page_make_young(page);
864
/* Increment the page get statistics though we did not really
865
fix the page: for user info only */
867
buf_pool->n_page_gets++;
871
/*-------------------------------------------*/
873
if (UNIV_LIKELY(!has_search_latch)) {
874
rw_lock_s_unlock(&btr_search_latch);
877
cursor->flag = BTR_CUR_HASH_FAIL;
879
#ifdef UNIV_SEARCH_PERF_STAT
882
if (info->n_hash_succ > 0) {
886
info->last_hash_succ = FALSE;
891
/************************************************************************
892
Drops a page hash index. */
895
btr_search_drop_page_hash_index(
896
/*============================*/
897
page_t* page) /* in: index page, s- or x-latched, or an index page
898
for which we know that block->buf_fix_count == 0 */
916
#ifdef UNIV_SYNC_DEBUG
917
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
918
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
919
#endif /* UNIV_SYNC_DEBUG */
921
rw_lock_s_lock(&btr_search_latch);
923
block = buf_block_align(page);
925
if (UNIV_LIKELY(!block->is_hashed)) {
927
rw_lock_s_unlock(&btr_search_latch);
932
table = btr_search_sys->hash_index;
934
#ifdef UNIV_SYNC_DEBUG
935
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
936
|| rw_lock_own(&(block->lock), RW_LOCK_EX)
937
|| (block->buf_fix_count == 0));
938
#endif /* UNIV_SYNC_DEBUG */
940
n_fields = block->curr_n_fields;
941
n_bytes = block->curr_n_bytes;
942
index = block->index;
944
/* NOTE: The fields of block must not be accessed after
945
releasing btr_search_latch, as the index page might only
948
rw_lock_s_unlock(&btr_search_latch);
950
ut_a(n_fields + n_bytes > 0);
952
n_recs = page_get_n_recs(page);
954
/* Calculate and cache fold values into an array for fast deletion
955
from the hash index */
957
folds = mem_alloc(n_recs * sizeof(ulint));
961
rec = page_get_infimum_rec(page);
962
rec = page_rec_get_next(rec);
964
index_id = btr_page_get_index_id(page);
966
ut_a(0 == ut_dulint_cmp(index_id, index->id));
973
while (!page_rec_is_supremum(rec)) {
974
offsets = rec_get_offsets(rec, index, offsets,
975
n_fields + (n_bytes > 0), &heap);
976
ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0));
977
fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
979
if (fold == prev_fold && prev_fold != 0) {
984
/* Remove all hash nodes pointing to this page from the
987
folds[n_cached] = fold;
990
rec = page_rec_get_next(rec);
994
if (UNIV_LIKELY_NULL(heap)) {
998
rw_lock_x_lock(&btr_search_latch);
1000
if (UNIV_UNLIKELY(!block->is_hashed)) {
1001
/* Someone else has meanwhile dropped the hash index */
1006
ut_a(block->index == index);
1008
if (UNIV_UNLIKELY(block->curr_n_fields != n_fields)
1009
|| UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) {
1011
/* Someone else has meanwhile built a new hash index on the
1012
page, with different parameters */
1014
rw_lock_x_unlock(&btr_search_latch);
1020
for (i = 0; i < n_cached; i++) {
1022
ha_remove_all_nodes_to_page(table, folds[i], page);
1025
block->is_hashed = FALSE;
1026
block->index = NULL;
1028
if (UNIV_UNLIKELY(block->n_pointers)) {
1030
ut_print_timestamp(stderr);
1032
" InnoDB: Corruption of adaptive hash index."
1034
"InnoDB: the hash index to a page of %s,"
1035
" still %lu hash nodes remain.\n",
1036
index->name, (ulong) block->n_pointers);
1037
rw_lock_x_unlock(&btr_search_latch);
1039
btr_search_validate();
1041
rw_lock_x_unlock(&btr_search_latch);
1047
/************************************************************************
1048
Drops a page hash index when a page is freed from a fseg to the file system.
1049
Drops possible hash index if the page happens to be in the buffer pool. */
1052
btr_search_drop_page_hash_when_freed(
1053
/*=================================*/
1054
ulint space, /* in: space id */
1055
ulint page_no) /* in: page number */
1061
is_hashed = buf_page_peek_if_search_hashed(space, page_no);
1070
/* We assume that if the caller has a latch on the page, then the
1071
caller has already dropped the hash index for the page, and we never
1072
get here. Therefore we can acquire the s-latch to the page without
1073
having to fear a deadlock. */
1075
page = buf_page_get_gen(space, page_no, RW_S_LATCH, NULL,
1076
BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
1079
#ifdef UNIV_SYNC_DEBUG
1080
buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
1081
#endif /* UNIV_SYNC_DEBUG */
1083
btr_search_drop_page_hash_index(page);
1088
/************************************************************************
1089
Builds a hash index on a page with the given parameters. If the page already
1090
has a hash index with different parameters, the old hash index is removed.
1091
If index is non-NULL, this function checks if n_fields and n_bytes are
1092
sensible values, and does not build a hash index if not. */
1095
btr_search_build_page_hash_index(
1096
/*=============================*/
1097
dict_index_t* index, /* in: index for which to build */
1098
page_t* page, /* in: index page, s- or x-latched */
1099
ulint n_fields,/* in: hash this many full fields */
1100
ulint n_bytes,/* in: hash this many bytes from the next
1102
ibool left_side)/* in: hash for searches from left side? */
1104
hash_table_t* table;
1116
mem_heap_t* heap = NULL;
1117
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1118
ulint* offsets = offsets_;
1119
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1123
block = buf_block_align(page);
1124
table = btr_search_sys->hash_index;
1126
#ifdef UNIV_SYNC_DEBUG
1127
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
1128
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
1129
|| rw_lock_own(&(block->lock), RW_LOCK_EX));
1130
#endif /* UNIV_SYNC_DEBUG */
1132
rw_lock_s_lock(&btr_search_latch);
1134
if (block->is_hashed && ((block->curr_n_fields != n_fields)
1135
|| (block->curr_n_bytes != n_bytes)
1136
|| (block->curr_left_side != left_side))) {
1138
rw_lock_s_unlock(&btr_search_latch);
1140
btr_search_drop_page_hash_index(page);
1142
rw_lock_s_unlock(&btr_search_latch);
1145
n_recs = page_get_n_recs(page);
1152
/* Check that the values for hash index build are sensible */
1154
if (n_fields + n_bytes == 0) {
1159
if (dict_index_get_n_unique_in_tree(index) < n_fields
1160
|| (dict_index_get_n_unique_in_tree(index) == n_fields
1165
/* Calculate and cache fold values and corresponding records into
1166
an array for fast insertion to the hash index */
1168
folds = mem_alloc(n_recs * sizeof(ulint));
1169
recs = mem_alloc(n_recs * sizeof(rec_t*));
1173
index_id = btr_page_get_index_id(page);
1175
rec = page_get_infimum_rec(page);
1176
rec = page_rec_get_next(rec);
1178
offsets = rec_get_offsets(rec, index, offsets,
1179
n_fields + (n_bytes > 0), &heap);
1181
if (!page_rec_is_supremum(rec)) {
1182
ut_a(n_fields <= rec_offs_n_fields(offsets));
1185
ut_a(n_fields < rec_offs_n_fields(offsets));
1189
fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
1193
folds[n_cached] = fold;
1194
recs[n_cached] = rec;
1199
next_rec = page_rec_get_next(rec);
1201
if (page_rec_is_supremum(next_rec)) {
1205
folds[n_cached] = fold;
1206
recs[n_cached] = rec;
1213
offsets = rec_get_offsets(next_rec, index, offsets,
1214
n_fields + (n_bytes > 0), &heap);
1215
next_fold = rec_fold(next_rec, offsets, n_fields,
1218
if (fold != next_fold) {
1219
/* Insert an entry into the hash index */
1223
folds[n_cached] = next_fold;
1224
recs[n_cached] = next_rec;
1227
folds[n_cached] = fold;
1228
recs[n_cached] = rec;
1237
btr_search_check_free_space_in_heap();
1239
rw_lock_x_lock(&btr_search_latch);
1241
if (block->is_hashed && ((block->curr_n_fields != n_fields)
1242
|| (block->curr_n_bytes != n_bytes)
1243
|| (block->curr_left_side != left_side))) {
1247
block->is_hashed = TRUE;
1248
block->n_hash_helps = 0;
1250
block->curr_n_fields = n_fields;
1251
block->curr_n_bytes = n_bytes;
1252
block->curr_left_side = left_side;
1253
block->index = index;
1255
for (i = 0; i < n_cached; i++) {
1257
ha_insert_for_fold(table, folds[i], recs[i]);
1261
rw_lock_x_unlock(&btr_search_latch);
1265
if (UNIV_LIKELY_NULL(heap)) {
1266
mem_heap_free(heap);
1270
/************************************************************************
1271
Moves or deletes hash entries for moved records. If new_page is already hashed,
1272
then the hash index for page, if any, is dropped. If new_page is not hashed,
1273
and page is hashed, then a new hash index is built to new_page with the same
1274
parameters as page (this often happens when a page is split). */
1277
btr_search_move_or_delete_hash_entries(
1278
/*===================================*/
1279
page_t* new_page, /* in: records are copied
1281
page_t* page, /* in: index page from which
1282
records were copied, and the
1283
copied records will be deleted
1285
dict_index_t* index) /* in: record descriptor */
1288
buf_block_t* new_block;
1293
block = buf_block_align(page);
1294
new_block = buf_block_align(new_page);
1295
ut_a(page_is_comp(page) == page_is_comp(new_page));
1297
#ifdef UNIV_SYNC_DEBUG
1298
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1299
ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_EX));
1300
#endif /* UNIV_SYNC_DEBUG */
1301
ut_a(!new_block->is_hashed || new_block->index == index);
1302
ut_a(!block->is_hashed || block->index == index);
1304
rw_lock_s_lock(&btr_search_latch);
1306
if (new_block->is_hashed) {
1308
rw_lock_s_unlock(&btr_search_latch);
1310
btr_search_drop_page_hash_index(page);
1315
if (block->is_hashed) {
1317
n_fields = block->curr_n_fields;
1318
n_bytes = block->curr_n_bytes;
1319
left_side = block->curr_left_side;
1321
new_block->n_fields = block->curr_n_fields;
1322
new_block->n_bytes = block->curr_n_bytes;
1323
new_block->left_side = left_side;
1325
rw_lock_s_unlock(&btr_search_latch);
1327
ut_a(n_fields + n_bytes > 0);
1329
btr_search_build_page_hash_index(index, new_page, n_fields,
1330
n_bytes, left_side);
1331
#if 1 /* TODO: safe to remove? */
1332
ut_a(n_fields == block->curr_n_fields);
1333
ut_a(n_bytes == block->curr_n_bytes);
1334
ut_a(left_side == block->curr_left_side);
1339
rw_lock_s_unlock(&btr_search_latch);
1342
/************************************************************************
1343
Updates the page hash index when a single record is deleted from a page. */
1346
btr_search_update_hash_on_delete(
1347
/*=============================*/
1348
btr_cur_t* cursor) /* in: cursor which was positioned on the
1349
record to delete using btr_cur_search_...,
1350
the record is not yet deleted */
1352
hash_table_t* table;
1358
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1359
mem_heap_t* heap = NULL;
1360
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1362
rec = btr_cur_get_rec(cursor);
1364
block = buf_block_align(rec);
1366
#ifdef UNIV_SYNC_DEBUG
1367
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1368
#endif /* UNIV_SYNC_DEBUG */
1370
if (!block->is_hashed) {
1375
ut_a(block->index == cursor->index);
1376
ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
1378
table = btr_search_sys->hash_index;
1380
index_id = cursor->index->id;
1381
fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, offsets_,
1382
ULINT_UNDEFINED, &heap),
1383
block->curr_n_fields, block->curr_n_bytes, index_id);
1384
if (UNIV_LIKELY_NULL(heap)) {
1385
mem_heap_free(heap);
1387
rw_lock_x_lock(&btr_search_latch);
1389
found = ha_search_and_delete_if_found(table, fold, rec);
1391
rw_lock_x_unlock(&btr_search_latch);
1394
/************************************************************************
1395
Updates the page hash index when a single record is inserted on a page. */
1398
btr_search_update_hash_node_on_insert(
1399
/*==================================*/
1400
btr_cur_t* cursor) /* in: cursor which was positioned to the
1401
place to insert using btr_cur_search_...,
1402
and the new record has been inserted next
1405
hash_table_t* table;
1409
rec = btr_cur_get_rec(cursor);
1411
block = buf_block_align(rec);
1413
#ifdef UNIV_SYNC_DEBUG
1414
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1415
#endif /* UNIV_SYNC_DEBUG */
1417
if (!block->is_hashed) {
1422
ut_a(block->index == cursor->index);
1424
rw_lock_x_lock(&btr_search_latch);
1426
if ((cursor->flag == BTR_CUR_HASH)
1427
&& (cursor->n_fields == block->curr_n_fields)
1428
&& (cursor->n_bytes == block->curr_n_bytes)
1429
&& !block->curr_left_side) {
1431
table = btr_search_sys->hash_index;
1433
ha_search_and_update_if_found(table, cursor->fold, rec,
1434
page_rec_get_next(rec));
1436
rw_lock_x_unlock(&btr_search_latch);
1438
rw_lock_x_unlock(&btr_search_latch);
1440
btr_search_update_hash_on_insert(cursor);
1444
/************************************************************************
1445
Updates the page hash index when a single record is inserted on a page. */
1448
btr_search_update_hash_on_insert(
1449
/*=============================*/
1450
btr_cur_t* cursor) /* in: cursor which was positioned to the
1451
place to insert using btr_cur_search_...,
1452
and the new record has been inserted next
1455
hash_table_t* table;
1463
ulint next_fold = 0; /* remove warning (??? bug ???) */
1467
ibool locked = FALSE;
1468
mem_heap_t* heap = NULL;
1469
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1470
ulint* offsets = offsets_;
1471
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1473
table = btr_search_sys->hash_index;
1475
btr_search_check_free_space_in_heap();
1477
rec = btr_cur_get_rec(cursor);
1479
block = buf_block_align(rec);
1481
#ifdef UNIV_SYNC_DEBUG
1482
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1483
#endif /* UNIV_SYNC_DEBUG */
1485
if (!block->is_hashed) {
1490
ut_a(block->index == cursor->index);
1492
index_id = cursor->index->id;
1494
n_fields = block->curr_n_fields;
1495
n_bytes = block->curr_n_bytes;
1496
left_side = block->curr_left_side;
1498
ins_rec = page_rec_get_next(rec);
1499
next_rec = page_rec_get_next(ins_rec);
1501
offsets = rec_get_offsets(ins_rec, cursor->index, offsets,
1502
ULINT_UNDEFINED, &heap);
1503
ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, index_id);
1505
if (!page_rec_is_supremum(next_rec)) {
1506
offsets = rec_get_offsets(next_rec, cursor->index, offsets,
1507
n_fields + (n_bytes > 0), &heap);
1508
next_fold = rec_fold(next_rec, offsets, n_fields,
1512
if (!page_rec_is_infimum(rec)) {
1513
offsets = rec_get_offsets(rec, cursor->index, offsets,
1514
n_fields + (n_bytes > 0), &heap);
1515
fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
1519
rw_lock_x_lock(&btr_search_latch);
1523
ha_insert_for_fold(table, ins_fold, ins_rec);
1526
goto check_next_rec;
1529
if (fold != ins_fold) {
1533
rw_lock_x_lock(&btr_search_latch);
1539
ha_insert_for_fold(table, fold, rec);
1541
ha_insert_for_fold(table, ins_fold, ins_rec);
1546
if (page_rec_is_supremum(next_rec)) {
1551
rw_lock_x_lock(&btr_search_latch);
1556
ha_insert_for_fold(table, ins_fold, ins_rec);
1562
if (ins_fold != next_fold) {
1566
rw_lock_x_lock(&btr_search_latch);
1573
ha_insert_for_fold(table, ins_fold, ins_rec);
1575
fputs("Hash insert for ", stderr);
1576
dict_index_name_print(stderr, cursor->index);
1577
fprintf(stderr, " fold %lu\n", ins_fold);
1580
ha_insert_for_fold(table, next_fold, next_rec);
1585
if (UNIV_LIKELY_NULL(heap)) {
1586
mem_heap_free(heap);
1589
rw_lock_x_unlock(&btr_search_latch);
1593
/************************************************************************
1594
Validates the search system. */
1597
btr_search_validate(void)
1598
/*=====================*/
1599
/* out: TRUE if ok */
1604
ulint n_page_dumps = 0;
1608
mem_heap_t* heap = NULL;
1609
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1610
ulint* offsets = offsets_;
1612
/* How many cells to check before temporarily releasing
1613
btr_search_latch. */
1614
ulint chunk_size = 10000;
1616
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1618
rw_lock_x_lock(&btr_search_latch);
1620
cell_count = hash_get_n_cells(btr_search_sys->hash_index);
1622
for (i = 0; i < cell_count; i++) {
1623
/* We release btr_search_latch every once in a while to
1624
give other queries a chance to run. */
1625
if ((i != 0) && ((i % chunk_size) == 0)) {
1626
rw_lock_x_unlock(&btr_search_latch);
1628
rw_lock_x_lock(&btr_search_latch);
1631
node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
1633
while (node != NULL) {
1634
block = buf_block_align(node->data);
1635
page = buf_frame_align(node->data);
1636
offsets = rec_get_offsets((rec_t*) node->data,
1637
block->index, offsets,
1638
block->curr_n_fields
1639
+ (block->curr_n_bytes > 0),
1642
if (!block->is_hashed || node->fold
1643
!= rec_fold((rec_t*)(node->data),
1645
block->curr_n_fields,
1646
block->curr_n_bytes,
1647
btr_page_get_index_id(page))) {
1649
ut_print_timestamp(stderr);
1652
" InnoDB: Error in an adaptive hash"
1653
" index pointer to page %lu\n"
1654
"InnoDB: ptr mem address %p"
1655
" index id %lu %lu,"
1656
" node fold %lu, rec fold %lu\n",
1657
(ulong) buf_frame_get_page_no(page),
1659
(ulong) ut_dulint_get_high(
1660
btr_page_get_index_id(page)),
1661
(ulong) ut_dulint_get_low(
1662
btr_page_get_index_id(page)),
1664
(ulong) rec_fold((rec_t*)(node->data),
1666
block->curr_n_fields,
1667
block->curr_n_bytes,
1668
btr_page_get_index_id(
1671
fputs("InnoDB: Record ", stderr);
1672
rec_print_new(stderr, (rec_t*)node->data,
1674
fprintf(stderr, "\nInnoDB: on that page."
1675
" Page mem address %p, is hashed %lu,"
1676
" n fields %lu, n bytes %lu\n"
1677
"InnoDB: side %lu\n",
1678
(void*) page, (ulong) block->is_hashed,
1679
(ulong) block->curr_n_fields,
1680
(ulong) block->curr_n_bytes,
1681
(ulong) block->curr_left_side);
1683
if (n_page_dumps < 20) {
1684
buf_page_print(page);
1693
for (i = 0; i < cell_count; i += chunk_size) {
1694
ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1);
1696
/* We release btr_search_latch every once in a while to
1697
give other queries a chance to run. */
1699
rw_lock_x_unlock(&btr_search_latch);
1701
rw_lock_x_lock(&btr_search_latch);
1704
if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
1709
rw_lock_x_unlock(&btr_search_latch);
1710
if (UNIV_LIKELY_NULL(heap)) {
1711
mem_heap_free(heap);