1
/************************************************************************
2
The index tree adaptive search
6
Created 2/17/1996 Heikki Tuuri
7
*************************************************************************/
15
#include "page0page.h"
22
/* Flag: has the search system been disabled? */
23
UNIV_INTERN ibool btr_search_disabled = FALSE;
25
/* A dummy variable to fool the compiler */
26
UNIV_INTERN ulint btr_search_this_is_zero = 0;
28
#ifdef UNIV_SEARCH_PERF_STAT
29
UNIV_INTERN ulint btr_search_n_succ = 0;
30
UNIV_INTERN ulint btr_search_n_hash_fail = 0;
31
#endif /* UNIV_SEARCH_PERF_STAT */
33
/* padding to prevent other memory update
34
hotspots from residing on the same memory
35
cache line as btr_search_latch */
36
UNIV_INTERN byte btr_sea_pad1[64];
38
/* The latch protecting the adaptive search system: this latch protects the
39
(1) positions of records on those pages where a hash index has been built.
40
NOTE: It does not protect values of non-ordering fields within a record from
41
being updated in-place! We can use fact (1) to perform unique searches to
44
/* We will allocate the latch from dynamic memory to get it to the
45
same DRAM page as other hotspot semaphores */
46
UNIV_INTERN rw_lock_t* btr_search_latch_temp;
48
/* padding to prevent other memory update hotspots from residing on
49
the same memory cache line */
50
UNIV_INTERN byte btr_sea_pad2[64];
52
UNIV_INTERN btr_search_sys_t* btr_search_sys;
54
/* If the number of records on the page divided by this parameter
55
would have been successfully accessed using a hash index, the index
56
is then built on the page, assuming the global limit has been reached */
58
#define BTR_SEARCH_PAGE_BUILD_LIMIT 16
60
/* The global limit for consecutive potentially successful hash searches,
61
before hash index building is started */
63
#define BTR_SEARCH_BUILD_LIMIT 100
65
/************************************************************************
66
Builds a hash index on a page with the given parameters. If the page already
67
has a hash index with different parameters, the old hash index is removed.
68
If index is non-NULL, this function checks if n_fields and n_bytes are
69
sensible values, and does not build a hash index if not. */
72
btr_search_build_page_hash_index(
73
/*=============================*/
74
dict_index_t* index, /* in: index for which to build, or NULL if
76
buf_block_t* block, /* in: index page, s- or x-latched */
77
ulint n_fields,/* in: hash this many full fields */
78
ulint n_bytes,/* in: hash this many bytes from the next
80
ibool left_side);/* in: hash for searches from left side? */
82
/*********************************************************************
83
This function should be called before reserving any btr search mutex, if
84
the intended operation might add nodes to the search system hash table.
85
Because of the latching order, once we have reserved the btr search system
86
latch, we cannot allocate a free frame from the buffer pool. Checks that
87
there is a free buffer frame allocated for hash table heap in the btr search
88
system. If not, allocates a free frames for the heap. This check makes it
89
probable that, when have reserved the btr search system latch and we need to
90
allocate a new node to the hash table, it will succeed. However, the check
91
will not guarantee success. */
94
btr_search_check_free_space_in_heap(void)
95
/*=====================================*/
100
#ifdef UNIV_SYNC_DEBUG
101
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
102
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
103
#endif /* UNIV_SYNC_DEBUG */
105
table = btr_search_sys->hash_index;
109
/* Note that we peek the value of heap->free_block without reserving
110
the latch: this is ok, because we will not guarantee that there will
111
be enough free space in the hash table. */
113
if (heap->free_block == NULL) {
114
buf_block_t* block = buf_block_alloc(0);
116
rw_lock_x_lock(&btr_search_latch);
118
if (heap->free_block == NULL) {
119
heap->free_block = block;
121
buf_block_free(block);
124
rw_lock_x_unlock(&btr_search_latch);
128
/*********************************************************************
129
Creates and initializes the adaptive search system at a database start. */
132
btr_search_sys_create(
133
/*==================*/
134
ulint hash_size) /* in: hash index hash table size */
136
/* We allocate the search latch from dynamic memory:
137
see above at the global variable definition */
139
btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t));
141
rw_lock_create(&btr_search_latch, SYNC_SEARCH_SYS);
143
btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));
145
btr_search_sys->hash_index = ha_create(hash_size, 0, 0);
149
/************************************************************************
150
Disable the adaptive hash search system and empty the index. */
153
btr_search_disable(void)
154
/*====================*/
156
btr_search_disabled = TRUE;
157
rw_lock_x_lock(&btr_search_latch);
159
ha_clear(btr_search_sys->hash_index);
161
rw_lock_x_unlock(&btr_search_latch);
164
/************************************************************************
165
Enable the adaptive hash search system. */
168
btr_search_enable(void)
169
/*====================*/
171
btr_search_disabled = FALSE;
174
/*********************************************************************
175
Creates and initializes a search info struct. */
178
btr_search_info_create(
179
/*===================*/
180
/* out, own: search info struct */
181
mem_heap_t* heap) /* in: heap where created */
185
info = mem_heap_alloc(heap, sizeof(btr_search_t));
188
info->magic_n = BTR_SEARCH_MAGIC_N;
189
#endif /* UNIV_DEBUG */
191
info->root_guess = NULL;
193
info->hash_analysis = 0;
194
info->n_hash_potential = 0;
196
info->last_hash_succ = FALSE;
198
#ifdef UNIV_SEARCH_PERF_STAT
199
info->n_hash_succ = 0;
200
info->n_hash_fail = 0;
201
info->n_patt_succ = 0;
202
info->n_searches = 0;
203
#endif /* UNIV_SEARCH_PERF_STAT */
205
/* Set some sensible values */
209
info->left_side = TRUE;
214
/*************************************************************************
215
Updates the search info of an index about hash successes. NOTE that info
216
is NOT protected by any semaphore, to save CPU time! Do not assume its fields
220
btr_search_info_update_hash(
221
/*========================*/
222
btr_search_t* info, /* in/out: search info */
223
btr_cur_t* cursor) /* in: cursor which was just positioned */
229
#ifdef UNIV_SYNC_DEBUG
230
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
231
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
232
#endif /* UNIV_SYNC_DEBUG */
234
index = cursor->index;
236
if (dict_index_is_ibuf(index)) {
237
/* So many deletes are performed on an insert buffer tree
238
that we do not consider a hash index useful on it: */
243
n_unique = dict_index_get_n_unique_in_tree(index);
245
if (info->n_hash_potential == 0) {
250
/* Test if the search would have succeeded using the recommended
253
if (info->n_fields >= n_unique && cursor->up_match >= n_unique) {
255
info->n_hash_potential++;
260
cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
261
cursor->low_match, cursor->low_bytes);
263
if (info->left_side ? cmp <= 0 : cmp > 0) {
268
cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
269
cursor->up_match, cursor->up_bytes);
271
if (info->left_side ? cmp <= 0 : cmp > 0) {
273
goto increment_potential;
277
/* We have to set a new recommendation; skip the hash analysis
278
for a while to avoid unnecessary CPU time usage when there is no
279
chance for success */
281
info->hash_analysis = 0;
283
cmp = ut_pair_cmp(cursor->up_match, cursor->up_bytes,
284
cursor->low_match, cursor->low_bytes);
286
info->n_hash_potential = 0;
288
/* For extra safety, we set some sensible values here */
293
info->left_side = TRUE;
295
} else if (cmp > 0) {
296
info->n_hash_potential = 1;
298
if (cursor->up_match >= n_unique) {
300
info->n_fields = n_unique;
303
} else if (cursor->low_match < cursor->up_match) {
305
info->n_fields = cursor->low_match + 1;
308
info->n_fields = cursor->low_match;
309
info->n_bytes = cursor->low_bytes + 1;
312
info->left_side = TRUE;
314
info->n_hash_potential = 1;
316
if (cursor->low_match >= n_unique) {
318
info->n_fields = n_unique;
321
} else if (cursor->low_match > cursor->up_match) {
323
info->n_fields = cursor->up_match + 1;
326
info->n_fields = cursor->up_match;
327
info->n_bytes = cursor->up_bytes + 1;
330
info->left_side = FALSE;
334
/*************************************************************************
335
Updates the block search info on hash successes. NOTE that info and
336
block->n_hash_helps, n_fields, n_bytes, side are NOT protected by any
337
semaphore, to save CPU time! Do not assume the fields are consistent. */
340
btr_search_update_block_hash_info(
341
/*==============================*/
342
/* out: TRUE if building a (new) hash index on
343
the block is recommended */
344
btr_search_t* info, /* in: search info */
345
buf_block_t* block, /* in: buffer block */
346
btr_cur_t* cursor __attribute__((unused)))
349
#ifdef UNIV_SYNC_DEBUG
350
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
351
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
352
ut_ad(rw_lock_own(&block->lock, RW_LOCK_SHARED)
353
|| rw_lock_own(&block->lock, RW_LOCK_EX));
354
#endif /* UNIV_SYNC_DEBUG */
357
info->last_hash_succ = FALSE;
359
ut_a(buf_block_state_valid(block));
360
ut_ad(info->magic_n == BTR_SEARCH_MAGIC_N);
362
if ((block->n_hash_helps > 0)
363
&& (info->n_hash_potential > 0)
364
&& (block->n_fields == info->n_fields)
365
&& (block->n_bytes == info->n_bytes)
366
&& (block->left_side == info->left_side)) {
368
if ((block->is_hashed)
369
&& (block->curr_n_fields == info->n_fields)
370
&& (block->curr_n_bytes == info->n_bytes)
371
&& (block->curr_left_side == info->left_side)) {
373
/* The search would presumably have succeeded using
376
info->last_hash_succ = TRUE;
379
block->n_hash_helps++;
381
block->n_hash_helps = 1;
382
block->n_fields = info->n_fields;
383
block->n_bytes = info->n_bytes;
384
block->left_side = info->left_side;
388
if (cursor->index->table->does_not_fit_in_memory) {
389
block->n_hash_helps = 0;
391
#endif /* UNIV_DEBUG */
393
if ((block->n_hash_helps > page_get_n_recs(block->frame)
394
/ BTR_SEARCH_PAGE_BUILD_LIMIT)
395
&& (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) {
397
if ((!block->is_hashed)
398
|| (block->n_hash_helps
399
> 2 * page_get_n_recs(block->frame))
400
|| (block->n_fields != block->curr_n_fields)
401
|| (block->n_bytes != block->curr_n_bytes)
402
|| (block->left_side != block->curr_left_side)) {
404
/* Build a new hash index on the page */
413
/*************************************************************************
414
Updates a hash node reference when it has been unsuccessfully used in a
415
search which could have succeeded with the used hash parameters. This can
416
happen because when building a hash index for a page, we do not check
417
what happens at page boundaries, and therefore there can be misleading
418
hash nodes. Also, collisions in the fold value can lead to misleading
419
references. This function lazily fixes these imperfections in the hash
423
btr_search_update_hash_ref(
424
/*=======================*/
425
btr_search_t* info, /* in: search info */
426
buf_block_t* block, /* in: buffer block where cursor positioned */
427
btr_cur_t* cursor) /* in: cursor */
433
ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
434
#ifdef UNIV_SYNC_DEBUG
435
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
436
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
437
|| rw_lock_own(&(block->lock), RW_LOCK_EX));
438
#endif /* UNIV_SYNC_DEBUG */
439
ut_ad(page_align(btr_cur_get_rec(cursor))
440
== buf_block_get_frame(block));
442
if (!block->is_hashed) {
447
ut_a(block->index == cursor->index);
448
ut_a(!dict_index_is_ibuf(cursor->index));
450
if ((info->n_hash_potential > 0)
451
&& (block->curr_n_fields == info->n_fields)
452
&& (block->curr_n_bytes == info->n_bytes)
453
&& (block->curr_left_side == info->left_side)) {
454
mem_heap_t* heap = NULL;
455
ulint offsets_[REC_OFFS_NORMAL_SIZE];
456
rec_offs_init(offsets_);
458
rec = btr_cur_get_rec(cursor);
460
if (!page_rec_is_user_rec(rec)) {
465
index_id = cursor->index->id;
467
rec_get_offsets(rec, cursor->index, offsets_,
468
ULINT_UNDEFINED, &heap),
469
block->curr_n_fields,
470
block->curr_n_bytes, index_id);
471
if (UNIV_LIKELY_NULL(heap)) {
474
#ifdef UNIV_SYNC_DEBUG
475
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
476
#endif /* UNIV_SYNC_DEBUG */
478
ha_insert_for_fold(btr_search_sys->hash_index, fold,
483
/*************************************************************************
484
Updates the search info. */
487
btr_search_info_update_slow(
488
/*========================*/
489
btr_search_t* info, /* in/out: search info */
490
btr_cur_t* cursor) /* in: cursor which was just positioned */
497
#ifdef UNIV_SYNC_DEBUG
498
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
499
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
500
#endif /* UNIV_SYNC_DEBUG */
502
block = btr_cur_get_block(cursor);
504
/* NOTE that the following two function calls do NOT protect
505
info or block->n_fields etc. with any semaphore, to save CPU time!
506
We cannot assume the fields are consistent when we return from
509
btr_search_info_update_hash(info, cursor);
511
build_index = btr_search_update_block_hash_info(info, block, cursor);
513
if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
515
btr_search_check_free_space_in_heap();
518
if (cursor->flag == BTR_CUR_HASH_FAIL) {
519
/* Update the hash node reference, if appropriate */
521
#ifdef UNIV_SEARCH_PERF_STAT
522
btr_search_n_hash_fail++;
523
#endif /* UNIV_SEARCH_PERF_STAT */
525
rw_lock_x_lock(&btr_search_latch);
527
btr_search_update_hash_ref(info, block, cursor);
529
rw_lock_x_unlock(&btr_search_latch);
533
/* Note that since we did not protect block->n_fields etc.
534
with any semaphore, the values can be inconsistent. We have
535
to check inside the function call that they make sense. We
536
also malloc an array and store the values there to make sure
537
the compiler does not let the function call parameters change
538
inside the called function. It might be that the compiler
539
would optimize the call just to pass pointers to block. */
541
params = mem_alloc(3 * sizeof(ulint));
542
params[0] = block->n_fields;
543
params[1] = block->n_bytes;
544
params[2] = block->left_side;
546
/* Make sure the compiler cannot deduce the values and do
549
params2 = params + btr_search_this_is_zero;
551
btr_search_build_page_hash_index(cursor->index,
560
/**********************************************************************
561
Checks if a guessed position for a tree cursor is right. Note that if
562
mode is PAGE_CUR_LE, which is used in inserts, and the function returns
563
TRUE, then cursor->up_match and cursor->low_match both have sensible values. */
566
btr_search_check_guess(
567
/*===================*/
568
/* out: TRUE if success */
569
btr_cur_t* cursor, /* in: guessed cursor position */
570
ibool can_only_compare_to_cursor_rec,
571
/* in: if we do not have a latch on the page
572
of cursor, but only a latch on
573
btr_search_latch, then ONLY the columns
574
of the record UNDER the cursor are
575
protected, not the next or previous record
576
in the chain: we cannot look at the next or
577
previous record to check our guess! */
578
const dtuple_t* tuple, /* in: data tuple */
579
ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
581
mtr_t* mtr) /* in: mtr */
588
mem_heap_t* heap = NULL;
589
ulint offsets_[REC_OFFS_NORMAL_SIZE];
590
ulint* offsets = offsets_;
591
ibool success = FALSE;
592
rec_offs_init(offsets_);
594
n_unique = dict_index_get_n_unique_in_tree(cursor->index);
596
rec = btr_cur_get_rec(cursor);
598
ut_ad(page_rec_is_user_rec(rec));
603
offsets = rec_get_offsets(rec, cursor->index, offsets,
605
cmp = page_cmp_dtuple_rec_with_match(tuple, rec,
606
offsets, &match, &bytes);
608
if (mode == PAGE_CUR_GE) {
613
cursor->up_match = match;
615
if (match >= n_unique) {
619
} else if (mode == PAGE_CUR_LE) {
624
cursor->low_match = match;
626
} else if (mode == PAGE_CUR_G) {
630
} else if (mode == PAGE_CUR_L) {
636
if (can_only_compare_to_cursor_rec) {
637
/* Since we could not determine if our guess is right just by
638
looking at the record under the cursor, return FALSE */
645
if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) {
648
ut_ad(!page_rec_is_infimum(rec));
650
prev_rec = page_rec_get_prev(rec);
652
if (page_rec_is_infimum(prev_rec)) {
653
success = btr_page_get_prev(page_align(prev_rec), mtr)
659
offsets = rec_get_offsets(prev_rec, cursor->index, offsets,
661
cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec,
662
offsets, &match, &bytes);
663
if (mode == PAGE_CUR_GE) {
673
ut_ad(!page_rec_is_supremum(rec));
675
next_rec = page_rec_get_next(rec);
677
if (page_rec_is_supremum(next_rec)) {
678
if (btr_page_get_next(page_align(next_rec), mtr)
681
cursor->up_match = 0;
688
offsets = rec_get_offsets(next_rec, cursor->index, offsets,
690
cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec,
691
offsets, &match, &bytes);
692
if (mode == PAGE_CUR_LE) {
694
cursor->up_match = match;
700
if (UNIV_LIKELY_NULL(heap)) {
706
/**********************************************************************
707
Tries to guess the right search position based on the hash search info
708
of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
709
and the function returns TRUE, then cursor->up_match and cursor->low_match
710
both have sensible values. */
713
btr_search_guess_on_hash(
714
/*=====================*/
715
/* out: TRUE if succeeded */
716
dict_index_t* index, /* in: index */
717
btr_search_t* info, /* in: index search info */
718
const dtuple_t* tuple, /* in: logical record */
719
ulint mode, /* in: PAGE_CUR_L, ... */
720
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ...;
721
NOTE that only if has_search_latch
722
is 0, we will have a latch set on
723
the cursor page, otherwise we assume
724
the caller uses his search latch
725
to protect the record! */
726
btr_cur_t* cursor, /* out: tree cursor */
727
ulint has_search_latch,/* in: latch mode the caller
728
currently has on btr_search_latch:
729
RW_S_LATCH, RW_X_LATCH, or 0 */
730
mtr_t* mtr) /* in: mtr */
741
ut_ad(index && info && tuple && cursor && mtr);
742
ut_ad((latch_mode == BTR_SEARCH_LEAF)
743
|| (latch_mode == BTR_MODIFY_LEAF));
745
/* Note that, for efficiency, the struct info may not be protected by
748
if (UNIV_UNLIKELY(info->n_hash_potential == 0)) {
753
cursor->n_fields = info->n_fields;
754
cursor->n_bytes = info->n_bytes;
756
if (UNIV_UNLIKELY(dtuple_get_n_fields(tuple)
757
< cursor->n_fields + (cursor->n_bytes > 0))) {
762
index_id = index->id;
764
#ifdef UNIV_SEARCH_PERF_STAT
767
fold = dtuple_fold(tuple, cursor->n_fields, cursor->n_bytes, index_id);
770
cursor->flag = BTR_CUR_HASH;
772
if (UNIV_LIKELY(!has_search_latch)) {
773
rw_lock_s_lock(&btr_search_latch);
776
ut_ad(btr_search_latch.writer != RW_LOCK_EX);
777
ut_ad(btr_search_latch.reader_count > 0);
779
rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);
781
if (UNIV_UNLIKELY(!rec)) {
785
page = page_align(rec);
787
ulint page_no = page_get_page_no(page);
788
ulint space_id = page_get_space_id(page);
790
buf_pool_mutex_enter();
791
block = (buf_block_t*) buf_page_hash_get(space_id, page_no);
792
buf_pool_mutex_exit();
795
if (UNIV_UNLIKELY(!block)
796
|| UNIV_UNLIKELY(buf_block_get_state(block)
797
!= BUF_BLOCK_FILE_PAGE)) {
799
/* The block is most probably being freed.
800
The function buf_LRU_search_and_free_block()
801
first removes the block from buf_pool->page_hash
802
by calling buf_LRU_block_remove_hashed_page().
803
After that, it invokes btr_search_drop_page_hash_index().
804
Let us pretend that the block was also removed from
805
the adaptive hash index. */
809
if (UNIV_LIKELY(!has_search_latch)) {
812
!buf_page_get_known_nowait(latch_mode, block,
819
rw_lock_s_unlock(&btr_search_latch);
821
#ifdef UNIV_SYNC_DEBUG
822
buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
823
#endif /* UNIV_SYNC_DEBUG */
826
if (UNIV_UNLIKELY(buf_block_get_state(block)
827
== BUF_BLOCK_REMOVE_HASH)) {
828
if (UNIV_LIKELY(!has_search_latch)) {
830
btr_leaf_page_release(block, latch_mode, mtr);
836
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
837
ut_ad(page_rec_is_user_rec(rec));
839
btr_cur_position(index, rec, block, cursor);
841
/* Check the validity of the guess within the page */
843
/* If we only have the latch on btr_search_latch, not on the
844
page, it only protects the columns of the record the cursor
845
is positioned on. We cannot look at the next of the previous
846
record to determine if our guess for the cursor position is
849
ut_dulint_cmp(index_id, btr_page_get_index_id(page)), 0)
850
|| !btr_search_check_guess(cursor,
853
if (UNIV_LIKELY(!has_search_latch)) {
854
btr_leaf_page_release(block, latch_mode, mtr);
860
if (UNIV_LIKELY(info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5)) {
862
info->n_hash_potential++;
866
/* These lines of code can be used in a debug version to check
867
the correctness of the searched cursor position: */
869
info->last_hash_succ = FALSE;
871
/* Currently, does not work if the following fails: */
872
ut_ad(!has_search_latch);
874
btr_leaf_page_release(block, latch_mode, mtr);
876
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
878
if (mode == PAGE_CUR_GE
879
&& page_rec_is_supremum(btr_cur_get_rec(&cursor2))) {
881
/* If mode is PAGE_CUR_GE, then the binary search
882
in the index tree may actually take us to the supremum
883
of the previous page */
885
info->last_hash_succ = FALSE;
887
btr_pcur_open_on_user_rec(index, tuple, mode, latch_mode,
889
ut_ad(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor));
891
ut_ad(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
894
/* NOTE that it is theoretically possible that the above assertions
895
fail if the page of the cursor gets removed from the buffer pool
896
meanwhile! Thus it might not be a bug. */
898
info->last_hash_succ = TRUE;
900
#ifdef UNIV_SEARCH_PERF_STAT
903
if (UNIV_LIKELY(!has_search_latch)
904
&& buf_page_peek_if_too_old(&block->page)) {
906
buf_page_make_young(&block->page);
909
/* Increment the page get statistics though we did not really
910
fix the page: for user info only */
912
buf_pool->n_page_gets++;
916
/*-------------------------------------------*/
918
if (UNIV_LIKELY(!has_search_latch)) {
919
rw_lock_s_unlock(&btr_search_latch);
922
cursor->flag = BTR_CUR_HASH_FAIL;
924
#ifdef UNIV_SEARCH_PERF_STAT
927
if (info->n_hash_succ > 0) {
931
info->last_hash_succ = FALSE;
936
/************************************************************************
937
Drops a page hash index. */
940
btr_search_drop_page_hash_index(
941
/*============================*/
942
buf_block_t* block) /* in: block containing index page,
943
s- or x-latched, or an index page
944
for which we know that
945
block->buf_fix_count == 0 */
963
#ifdef UNIV_SYNC_DEBUG
964
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
965
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
966
#endif /* UNIV_SYNC_DEBUG */
969
rw_lock_s_lock(&btr_search_latch);
972
if (UNIV_LIKELY(!block->is_hashed)) {
974
rw_lock_s_unlock(&btr_search_latch);
979
table = btr_search_sys->hash_index;
981
#ifdef UNIV_SYNC_DEBUG
982
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
983
|| rw_lock_own(&(block->lock), RW_LOCK_EX)
984
|| (block->page.buf_fix_count == 0));
985
#endif /* UNIV_SYNC_DEBUG */
987
n_fields = block->curr_n_fields;
988
n_bytes = block->curr_n_bytes;
989
index = block->index;
990
ut_a(!dict_index_is_ibuf(index));
992
/* NOTE: The fields of block must not be accessed after
993
releasing btr_search_latch, as the index page might only
996
rw_lock_s_unlock(&btr_search_latch);
998
ut_a(n_fields + n_bytes > 0);
1000
n_recs = page_get_n_recs(page);
1002
/* Calculate and cache fold values into an array for fast deletion
1003
from the hash index */
1005
folds = mem_alloc(n_recs * sizeof(ulint));
1009
rec = page_get_infimum_rec(page);
1010
rec = page_rec_get_next(rec);
1012
index_id = btr_page_get_index_id(page);
1014
ut_a(0 == ut_dulint_cmp(index_id, index->id));
1021
while (!page_rec_is_supremum(rec)) {
1022
offsets = rec_get_offsets(rec, index, offsets,
1023
n_fields + (n_bytes > 0), &heap);
1024
ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0));
1025
fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
1027
if (fold == prev_fold && prev_fold != 0) {
1032
/* Remove all hash nodes pointing to this page from the
1035
folds[n_cached] = fold;
1038
rec = page_rec_get_next(rec);
1042
if (UNIV_LIKELY_NULL(heap)) {
1043
mem_heap_free(heap);
1046
rw_lock_x_lock(&btr_search_latch);
1048
if (UNIV_UNLIKELY(!block->is_hashed)) {
1049
/* Someone else has meanwhile dropped the hash index */
1054
ut_a(block->index == index);
1056
if (UNIV_UNLIKELY(block->curr_n_fields != n_fields)
1057
|| UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) {
1059
/* Someone else has meanwhile built a new hash index on the
1060
page, with different parameters */
1062
rw_lock_x_unlock(&btr_search_latch);
1068
for (i = 0; i < n_cached; i++) {
1070
ha_remove_all_nodes_to_page(table, folds[i], page);
1073
block->is_hashed = FALSE;
1074
block->index = NULL;
1077
if (UNIV_UNLIKELY(block->n_pointers)) {
1079
ut_print_timestamp(stderr);
1081
" InnoDB: Corruption of adaptive hash index."
1083
"InnoDB: the hash index to a page of %s,"
1084
" still %lu hash nodes remain.\n",
1085
index->name, (ulong) block->n_pointers);
1086
rw_lock_x_unlock(&btr_search_latch);
1088
btr_search_validate();
1090
rw_lock_x_unlock(&btr_search_latch);
1092
#else /* UNIV_DEBUG */
1093
rw_lock_x_unlock(&btr_search_latch);
1094
#endif /* UNIV_DEBUG */
1099
/************************************************************************
1100
Drops a page hash index when a page is freed from a fseg to the file system.
1101
Drops possible hash index if the page happens to be in the buffer pool. */
1104
btr_search_drop_page_hash_when_freed(
1105
/*=================================*/
1106
ulint space, /* in: space id */
1107
ulint zip_size, /* in: compressed page size in bytes
1108
or 0 for uncompressed pages */
1109
ulint page_no) /* in: page number */
1114
if (!buf_page_peek_if_search_hashed(space, page_no)) {
1121
/* We assume that if the caller has a latch on the page, then the
1122
caller has already dropped the hash index for the page, and we never
1123
get here. Therefore we can acquire the s-latch to the page without
1124
having to fear a deadlock. */
1126
block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL,
1127
BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
1130
#ifdef UNIV_SYNC_DEBUG
1131
buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
1132
#endif /* UNIV_SYNC_DEBUG */
1134
btr_search_drop_page_hash_index(block);
1139
/************************************************************************
1140
Builds a hash index on a page with the given parameters. If the page already
1141
has a hash index with different parameters, the old hash index is removed.
1142
If index is non-NULL, this function checks if n_fields and n_bytes are
1143
sensible values, and does not build a hash index if not. */
1146
btr_search_build_page_hash_index(
1147
/*=============================*/
1148
dict_index_t* index, /* in: index for which to build */
1149
buf_block_t* block, /* in: index page, s- or x-latched */
1150
ulint n_fields,/* in: hash this many full fields */
1151
ulint n_bytes,/* in: hash this many bytes from the next
1153
ibool left_side)/* in: hash for searches from left side? */
1155
hash_table_t* table;
1167
mem_heap_t* heap = NULL;
1168
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1169
ulint* offsets = offsets_;
1170
rec_offs_init(offsets_);
1173
ut_a(!dict_index_is_ibuf(index));
1175
table = btr_search_sys->hash_index;
1176
page = buf_block_get_frame(block);
1178
#ifdef UNIV_SYNC_DEBUG
1179
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
1180
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
1181
|| rw_lock_own(&(block->lock), RW_LOCK_EX));
1182
#endif /* UNIV_SYNC_DEBUG */
1184
rw_lock_s_lock(&btr_search_latch);
1186
if (block->is_hashed && ((block->curr_n_fields != n_fields)
1187
|| (block->curr_n_bytes != n_bytes)
1188
|| (block->curr_left_side != left_side))) {
1190
rw_lock_s_unlock(&btr_search_latch);
1192
btr_search_drop_page_hash_index(block);
1194
rw_lock_s_unlock(&btr_search_latch);
1197
n_recs = page_get_n_recs(page);
1204
/* Check that the values for hash index build are sensible */
1206
if (n_fields + n_bytes == 0) {
1211
if (dict_index_get_n_unique_in_tree(index) < n_fields
1212
|| (dict_index_get_n_unique_in_tree(index) == n_fields
1217
/* Calculate and cache fold values and corresponding records into
1218
an array for fast insertion to the hash index */
1220
folds = mem_alloc(n_recs * sizeof(ulint));
1221
recs = mem_alloc(n_recs * sizeof(rec_t*));
1225
index_id = btr_page_get_index_id(page);
1227
rec = page_rec_get_next(page_get_infimum_rec(page));
1229
offsets = rec_get_offsets(rec, index, offsets,
1230
n_fields + (n_bytes > 0), &heap);
1232
if (!page_rec_is_supremum(rec)) {
1233
ut_a(n_fields <= rec_offs_n_fields(offsets));
1236
ut_a(n_fields < rec_offs_n_fields(offsets));
1240
fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
1244
folds[n_cached] = fold;
1245
recs[n_cached] = rec;
1250
next_rec = page_rec_get_next(rec);
1252
if (page_rec_is_supremum(next_rec)) {
1256
folds[n_cached] = fold;
1257
recs[n_cached] = rec;
1264
offsets = rec_get_offsets(next_rec, index, offsets,
1265
n_fields + (n_bytes > 0), &heap);
1266
next_fold = rec_fold(next_rec, offsets, n_fields,
1269
if (fold != next_fold) {
1270
/* Insert an entry into the hash index */
1274
folds[n_cached] = next_fold;
1275
recs[n_cached] = next_rec;
1278
folds[n_cached] = fold;
1279
recs[n_cached] = rec;
1288
btr_search_check_free_space_in_heap();
1290
rw_lock_x_lock(&btr_search_latch);
1292
if (block->is_hashed && ((block->curr_n_fields != n_fields)
1293
|| (block->curr_n_bytes != n_bytes)
1294
|| (block->curr_left_side != left_side))) {
1298
block->is_hashed = TRUE;
1299
block->n_hash_helps = 0;
1301
block->curr_n_fields = n_fields;
1302
block->curr_n_bytes = n_bytes;
1303
block->curr_left_side = left_side;
1304
block->index = index;
1306
for (i = 0; i < n_cached; i++) {
1308
ha_insert_for_fold(table, folds[i], block, recs[i]);
1312
rw_lock_x_unlock(&btr_search_latch);
1316
if (UNIV_LIKELY_NULL(heap)) {
1317
mem_heap_free(heap);
1321
/************************************************************************
1322
Moves or deletes hash entries for moved records. If new_page is already hashed,
1323
then the hash index for page, if any, is dropped. If new_page is not hashed,
1324
and page is hashed, then a new hash index is built to new_page with the same
1325
parameters as page (this often happens when a page is split). */
1328
btr_search_move_or_delete_hash_entries(
1329
/*===================================*/
1330
buf_block_t* new_block, /* in: records are copied
1332
buf_block_t* block, /* in: index page from which
1333
records were copied, and the
1334
copied records will be deleted
1336
dict_index_t* index) /* in: record descriptor */
1342
#ifdef UNIV_SYNC_DEBUG
1343
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1344
ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_EX));
1345
#endif /* UNIV_SYNC_DEBUG */
1346
ut_a(!new_block->is_hashed || new_block->index == index);
1347
ut_a(!block->is_hashed || block->index == index);
1348
ut_a(!(new_block->is_hashed || block->is_hashed)
1349
|| !dict_index_is_ibuf(index));
1351
rw_lock_s_lock(&btr_search_latch);
1353
if (new_block->is_hashed) {
1355
rw_lock_s_unlock(&btr_search_latch);
1357
btr_search_drop_page_hash_index(block);
1362
if (block->is_hashed) {
1364
n_fields = block->curr_n_fields;
1365
n_bytes = block->curr_n_bytes;
1366
left_side = block->curr_left_side;
1368
new_block->n_fields = block->curr_n_fields;
1369
new_block->n_bytes = block->curr_n_bytes;
1370
new_block->left_side = left_side;
1372
rw_lock_s_unlock(&btr_search_latch);
1374
ut_a(n_fields + n_bytes > 0);
1376
btr_search_build_page_hash_index(index, new_block, n_fields,
1377
n_bytes, left_side);
1378
ut_ad(n_fields == block->curr_n_fields);
1379
ut_ad(n_bytes == block->curr_n_bytes);
1380
ut_ad(left_side == block->curr_left_side);
1384
rw_lock_s_unlock(&btr_search_latch);
1387
/************************************************************************
1388
Updates the page hash index when a single record is deleted from a page. */
1391
btr_search_update_hash_on_delete(
1392
/*=============================*/
1393
btr_cur_t* cursor) /* in: cursor which was positioned on the
1394
record to delete using btr_cur_search_...,
1395
the record is not yet deleted */
1397
hash_table_t* table;
1403
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1404
mem_heap_t* heap = NULL;
1405
rec_offs_init(offsets_);
1407
rec = btr_cur_get_rec(cursor);
1409
block = btr_cur_get_block(cursor);
1411
#ifdef UNIV_SYNC_DEBUG
1412
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1413
#endif /* UNIV_SYNC_DEBUG */
1415
if (!block->is_hashed) {
1420
ut_a(block->index == cursor->index);
1421
ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
1422
ut_a(!dict_index_is_ibuf(cursor->index));
1424
table = btr_search_sys->hash_index;
1426
index_id = cursor->index->id;
1427
fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, offsets_,
1428
ULINT_UNDEFINED, &heap),
1429
block->curr_n_fields, block->curr_n_bytes, index_id);
1430
if (UNIV_LIKELY_NULL(heap)) {
1431
mem_heap_free(heap);
1433
rw_lock_x_lock(&btr_search_latch);
1435
found = ha_search_and_delete_if_found(table, fold, rec);
1437
rw_lock_x_unlock(&btr_search_latch);
1440
/************************************************************************
1441
Updates the page hash index when a single record is inserted on a page. */
1444
btr_search_update_hash_node_on_insert(
1445
/*==================================*/
1446
btr_cur_t* cursor) /* in: cursor which was positioned to the
1447
place to insert using btr_cur_search_...,
1448
and the new record has been inserted next
1451
hash_table_t* table;
1455
rec = btr_cur_get_rec(cursor);
1457
block = btr_cur_get_block(cursor);
1459
#ifdef UNIV_SYNC_DEBUG
1460
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1461
#endif /* UNIV_SYNC_DEBUG */
1463
if (!block->is_hashed) {
1468
ut_a(block->index == cursor->index);
1469
ut_a(!dict_index_is_ibuf(cursor->index));
1471
rw_lock_x_lock(&btr_search_latch);
1473
if ((cursor->flag == BTR_CUR_HASH)
1474
&& (cursor->n_fields == block->curr_n_fields)
1475
&& (cursor->n_bytes == block->curr_n_bytes)
1476
&& !block->curr_left_side) {
1478
table = btr_search_sys->hash_index;
1480
ha_search_and_update_if_found(table, cursor->fold, rec,
1481
block, page_rec_get_next(rec));
1483
rw_lock_x_unlock(&btr_search_latch);
1485
rw_lock_x_unlock(&btr_search_latch);
1487
btr_search_update_hash_on_insert(cursor);
1491
/************************************************************************
1492
Updates the page hash index when a single record is inserted on a page. */
1495
btr_search_update_hash_on_insert(
1496
/*=============================*/
1497
btr_cur_t* cursor) /* in: cursor which was positioned to the
1498
place to insert using btr_cur_search_...,
1499
and the new record has been inserted next
1502
hash_table_t* table;
1510
ulint next_fold = 0; /* remove warning (??? bug ???) */
1514
ibool locked = FALSE;
1515
mem_heap_t* heap = NULL;
1516
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1517
ulint* offsets = offsets_;
1518
rec_offs_init(offsets_);
1520
table = btr_search_sys->hash_index;
1522
btr_search_check_free_space_in_heap();
1524
rec = btr_cur_get_rec(cursor);
1526
block = btr_cur_get_block(cursor);
1528
#ifdef UNIV_SYNC_DEBUG
1529
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1530
#endif /* UNIV_SYNC_DEBUG */
1532
if (!block->is_hashed) {
1537
ut_a(block->index == cursor->index);
1538
ut_a(!dict_index_is_ibuf(cursor->index));
1540
index_id = cursor->index->id;
1542
n_fields = block->curr_n_fields;
1543
n_bytes = block->curr_n_bytes;
1544
left_side = block->curr_left_side;
1546
ins_rec = page_rec_get_next(rec);
1547
next_rec = page_rec_get_next(ins_rec);
1549
offsets = rec_get_offsets(ins_rec, cursor->index, offsets,
1550
ULINT_UNDEFINED, &heap);
1551
ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, index_id);
1553
if (!page_rec_is_supremum(next_rec)) {
1554
offsets = rec_get_offsets(next_rec, cursor->index, offsets,
1555
n_fields + (n_bytes > 0), &heap);
1556
next_fold = rec_fold(next_rec, offsets, n_fields,
1560
if (!page_rec_is_infimum(rec)) {
1561
offsets = rec_get_offsets(rec, cursor->index, offsets,
1562
n_fields + (n_bytes > 0), &heap);
1563
fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
1567
rw_lock_x_lock(&btr_search_latch);
1571
ha_insert_for_fold(table, ins_fold, block, ins_rec);
1574
goto check_next_rec;
1577
if (fold != ins_fold) {
1581
rw_lock_x_lock(&btr_search_latch);
1587
ha_insert_for_fold(table, fold, block, rec);
1589
ha_insert_for_fold(table, ins_fold, block, ins_rec);
1594
if (page_rec_is_supremum(next_rec)) {
1599
rw_lock_x_lock(&btr_search_latch);
1604
ha_insert_for_fold(table, ins_fold, block, ins_rec);
1610
if (ins_fold != next_fold) {
1614
rw_lock_x_lock(&btr_search_latch);
1621
ha_insert_for_fold(table, ins_fold, block, ins_rec);
1623
fputs("Hash insert for ", stderr);
1624
dict_index_name_print(stderr, cursor->index);
1625
fprintf(stderr, " fold %lu\n", ins_fold);
1628
ha_insert_for_fold(table, next_fold, block, next_rec);
1633
if (UNIV_LIKELY_NULL(heap)) {
1634
mem_heap_free(heap);
1637
rw_lock_x_unlock(&btr_search_latch);
1641
/************************************************************************
1642
Validates the search system. */
1645
btr_search_validate(void)
1646
/*=====================*/
1647
/* out: TRUE if ok */
1651
ulint n_page_dumps = 0;
1655
mem_heap_t* heap = NULL;
1656
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1657
ulint* offsets = offsets_;
1659
/* How many cells to check before temporarily releasing
1660
btr_search_latch. */
1661
ulint chunk_size = 10000;
1663
rec_offs_init(offsets_);
1665
rw_lock_x_lock(&btr_search_latch);
1666
buf_pool_mutex_enter();
1668
cell_count = hash_get_n_cells(btr_search_sys->hash_index);
1670
for (i = 0; i < cell_count; i++) {
1671
/* We release btr_search_latch every once in a while to
1672
give other queries a chance to run. */
1673
if ((i != 0) && ((i % chunk_size) == 0)) {
1674
buf_pool_mutex_exit();
1675
rw_lock_x_unlock(&btr_search_latch);
1677
rw_lock_x_lock(&btr_search_latch);
1678
buf_pool_mutex_enter();
1681
node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
1683
for (; node != NULL; node = node->next) {
1684
const buf_block_t* block;
1686
page = page_align(node->data);
1688
ulint page_no = page_get_page_no(page);
1689
ulint space_id= page_get_space_id(page);
1691
block = buf_block_hash_get(space_id, page_no);
1694
if (UNIV_UNLIKELY(!block)) {
1696
/* The block is most probably being freed.
1697
The function buf_LRU_search_and_free_block()
1698
first removes the block from
1699
buf_pool->page_hash by calling
1700
buf_LRU_block_remove_hashed_page().
1701
After that, it invokes
1702
btr_search_drop_page_hash_index().
1703
Let us pretend that the block was also removed
1704
from the adaptive hash index. */
1708
ut_a(!dict_index_is_ibuf(block->index));
1710
offsets = rec_get_offsets((const rec_t*) node->data,
1711
block->index, offsets,
1712
block->curr_n_fields
1713
+ (block->curr_n_bytes > 0),
1716
if (!block->is_hashed || node->fold
1717
!= rec_fold((rec_t*)(node->data),
1719
block->curr_n_fields,
1720
block->curr_n_bytes,
1721
btr_page_get_index_id(page))) {
1723
ut_print_timestamp(stderr);
1726
" InnoDB: Error in an adaptive hash"
1727
" index pointer to page %lu\n"
1728
"InnoDB: ptr mem address %p"
1729
" index id %lu %lu,"
1730
" node fold %lu, rec fold %lu\n",
1731
(ulong) page_get_page_no(page),
1733
(ulong) ut_dulint_get_high(
1734
btr_page_get_index_id(page)),
1735
(ulong) ut_dulint_get_low(
1736
btr_page_get_index_id(page)),
1738
(ulong) rec_fold((rec_t*)(node->data),
1740
block->curr_n_fields,
1741
block->curr_n_bytes,
1742
btr_page_get_index_id(
1745
fputs("InnoDB: Record ", stderr);
1746
rec_print_new(stderr, (rec_t*)node->data,
1748
fprintf(stderr, "\nInnoDB: on that page."
1749
" Page mem address %p, is hashed %lu,"
1750
" n fields %lu, n bytes %lu\n"
1751
"InnoDB: side %lu\n",
1752
(void*) page, (ulong) block->is_hashed,
1753
(ulong) block->curr_n_fields,
1754
(ulong) block->curr_n_bytes,
1755
(ulong) block->curr_left_side);
1757
if (n_page_dumps < 20) {
1758
buf_page_print(page, 0);
1765
for (i = 0; i < cell_count; i += chunk_size) {
1766
ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1);
1768
/* We release btr_search_latch every once in a while to
1769
give other queries a chance to run. */
1771
buf_pool_mutex_exit();
1772
rw_lock_x_unlock(&btr_search_latch);
1774
rw_lock_x_lock(&btr_search_latch);
1775
buf_pool_mutex_enter();
1778
if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
1783
buf_pool_mutex_exit();
1784
rw_lock_x_unlock(&btr_search_latch);
1785
if (UNIV_LIKELY_NULL(heap)) {
1786
mem_heap_free(heap);